package org.jruby.ext.ripper;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.jcodings.Encoding;
import org.jruby.Ruby;
import org.jruby.lexer.LexerSource;
import org.jruby.util.ByteList;
import org.jruby.util.RegexpOptions;
import static org.jruby.lexer.LexingCommon.*;
public class StringTerm extends StrTerm {
private int flags;
private final char begin;
private final char end;
private int nest;
private List<ByteList> regexpFragments;
private boolean regexpDynamic;
protected Encoding encodingOut;
public StringTerm(int flags, int begin, int end) {
this.flags = flags;
this.begin = (char) begin;
this.end = (char) end;
this.nest = 0;
if ((flags & STR_FUNC_REGEXP) != 0) {
this.regexpFragments = new ArrayList<>();
}
}
public int getFlags() {
return flags;
}
protected ByteList createByteList(RipperLexer lexer) {
return new ByteList(ByteList.NULL_ARRAY, lexer.getEncoding());
}
private int endFound(RipperLexer lexer) throws IOException {
if ((flags & STR_FUNC_QWORDS) != 0) {
flags |= STR_FUNC_TERM;
lexer.pushback(0);
lexer.addDelayedToken(lexer.tokp, lexer.lex_p);
return ' ';
}
lexer.setStrTerm(null);
if ((flags & STR_FUNC_REGEXP) != 0) {
validateRegexp(lexer);
lexer.dispatchScanEvent(RipperParser.tREGEXP_END);
lexer.setState(EXPR_END | EXPR_ENDARG);
return RipperParser.tREGEXP_END;
}
if ((flags & STR_FUNC_LABEL) != 0 && lexer.isLabelSuffix()) {
lexer.nextc();
lexer.setState(EXPR_BEG | EXPR_LABEL);
return RipperParser.tLABEL_END;
}
lexer.setState(EXPR_END | EXPR_ENDARG);
return RipperParser.tSTRING_END;
}
private void validateRegexp(RipperLexer lexer) throws IOException {
Ruby runtime = lexer.getRuntime();
RegexpOptions options = lexer.parseRegexpFlags();
for (ByteList fragment : regexpFragments) {
lexer.checkRegexpFragment(runtime, fragment, options);
}
if (!regexpDynamic && regexpFragments.size() == 1) {
lexer.checkRegexpSyntax(runtime, regexpFragments.get(0), options);
}
regexpFragments.clear();
regexpDynamic = false;
}
@Override
public int parseString(RipperLexer lexer, LexerSource src) throws IOException {
boolean spaceSeen = false;
int c;
if ((flags & STR_FUNC_TERM) != 0) {
if ((flags & STR_FUNC_QWORDS) != 0) lexer.nextc();
lexer.setState(EXPR_END | EXPR_ENDARG);
lexer.setStrTerm(null);
return ((flags & STR_FUNC_REGEXP) != 0) ? RipperParser.tREGEXP_END : RipperParser.tSTRING_END;
}
ByteList buffer = createByteList(lexer);
c = lexer.nextc();
if ((flags & STR_FUNC_QWORDS) != 0 && Character.isWhitespace(c)) {
do {
c = lexer.nextc();
} while (Character.isWhitespace(c));
spaceSeen = true;
}
if ((flags & STR_FUNC_LIST) != 0) {
flags &= ~STR_FUNC_LIST;
spaceSeen = true;
}
if (c == end && nest == 0) {
return endFound(lexer);
}
if (spaceSeen) {
lexer.pushback(c);
lexer.addDelayedToken(lexer.tokp, lexer.lex_p);
return ' ';
}
if ((flags & STR_FUNC_EXPAND) != 0 && c == '#') {
int token = lexer.peekVariableName(RipperParser.tSTRING_DVAR, RipperParser.tSTRING_DBEG);
if (token != 0) {
if ((flags & STR_FUNC_REGEXP) != 0) {
regexpDynamic = true;
}
return token;
} else {
buffer.append(c);
}
}
lexer.pushback(c);
if (parseStringIntoBuffer(lexer, src, buffer, lexer.getEncoding()) == EOF) {
if ((flags & STR_FUNC_REGEXP) != 0) {
lexer.compile_error("unterminated regexp meets end of file");
} else {
lexer.compile_error("unterminated string meets end of file");
}
flags |= STR_FUNC_TERM;
}
lexer.setValue(lexer.createStr(buffer, flags));
if ((flags & STR_FUNC_REGEXP) != 0) {
regexpFragments.add(buffer);
}
lexer.flush_string_content(encodingOut);
return RipperParser.tSTRING_CONTENT;
}
private void mixedEscape(RipperLexer lexer, Encoding foundEncoding, Encoding parserEncoding) {
lexer.compile_error(" mixed within " + parserEncoding);
}
public int parseStringIntoBuffer(RipperLexer lexer, LexerSource src, ByteList buffer, Encoding enc) throws IOException {
boolean qwords = (flags & STR_FUNC_QWORDS) != 0;
boolean expand = (flags & STR_FUNC_EXPAND) != 0;
boolean escape = (flags & STR_FUNC_ESCAPE) != 0;
boolean regexp = (flags & STR_FUNC_REGEXP) != 0;
boolean symbol = (flags & STR_FUNC_SYMBOL) != 0;
boolean hasNonAscii = false;
int c;
while ((c = lexer.nextc()) != EOF) {
if (lexer.getHeredocIndent() > 0) {
lexer.update_heredoc_indent(c);
}
if (begin != '\0' && c == begin) {
nest++;
} else if (c == end) {
if (nest == 0) {
lexer.pushback(c);
break;
}
nest--;
} else if (expand && c == '#' && !lexer.peek('\n')) {
int c2 = lexer.nextc();
if (c2 == '$' || c2 == '@' || c2 == '{') {
lexer.pushback(c2);
lexer.pushback(c);
break;
}
lexer.pushback(c2);
} else if (c == '\\') {
c = lexer.nextc();
switch (c) {
case '\n':
if (qwords) break;
if (expand) continue;
buffer.append('\\');
break;
case '\\':
if (escape) buffer.append(c);
break;
case 'u':
if (!expand) {
buffer.append('\\');
break;
}
if (regexp) {
lexer.readUTFEscapeRegexpLiteral(buffer);
} else {
lexer.readUTFEscape(buffer, true, symbol);
}
if (hasNonAscii && buffer.getEncoding() != enc) {
mixedEscape(lexer, buffer.getEncoding(), enc);
}
continue;
default:
if (c == EOF) return EOF;
if (!lexer.isASCII()) {
if (!expand) buffer.append('\\');
hasNonAscii = true;
if (buffer.getEncoding() != enc) {
mixedEscape(lexer, buffer.getEncoding(), enc);
continue;
}
if (!lexer.tokenAddMBC(c, buffer)) {
lexer.compile_error("invalid multibyte char (" + enc + ")");
return EOF;
}
continue;
}
if (regexp) {
if (c == end && !simple_re_meta(c)) {
buffer.append(c);
continue;
}
lexer.pushback(c);
parseEscapeIntoBuffer(lexer, src, buffer);
if (hasNonAscii && buffer.getEncoding() != enc) {
mixedEscape(lexer, buffer.getEncoding(), enc);
}
continue;
} else if (expand) {
lexer.pushback(c);
if (escape) buffer.append('\\');
c = lexer.readEscape();
} else if (qwords && Character.isWhitespace(c)) {
} else if (c != end && !(begin != '\0' && c == begin)) {
buffer.append('\\');
}
}
} else if (!lexer.isASCII()) {
nonascii: hasNonAscii = true;
if (buffer.getEncoding() != enc) {
mixedEscape(lexer, buffer.getEncoding(), enc);
continue;
}
if (!lexer.tokenAddMBC(c, buffer)) {
lexer.compile_error("invalid multibyte char (" + enc + ")");
return EOF;
}
continue;
} else if (qwords && Character.isWhitespace(c)) {
lexer.pushback(c);
break;
}
if ((c & 0x80) != 0) {
hasNonAscii = true;
if (buffer.getEncoding() != enc) {
mixedEscape(lexer, buffer.getEncoding(), enc);
continue;
}
}
buffer.append(c);
}
encodingOut = buffer.getEncoding();
return c;
}
private boolean simple_re_meta(int c) {
switch(c) {
case '$': case '*': case '+': case '.': case '?': case '^': case '|': case ')': case ']': case '}': case '>':
return true;
}
return false;
}
private void escaped(RipperLexer lexer, LexerSource src, ByteList buffer) throws java.io.IOException {
int c;
switch (c = lexer.nextc()) {
case '\\':
parseEscapeIntoBuffer(lexer, src, buffer);
break;
case EOF:
lexer.compile_error("Invalid escape character syntax");
default:
buffer.append(c);
}
}
private void parseEscapeIntoBuffer(RipperLexer lexer, LexerSource src, ByteList buffer) throws java.io.IOException {
int c;
switch (c = lexer.nextc()) {
case '\n':
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
buffer.append('\\');
buffer.append(c);
for (int i = 0; i < 2; i++) {
c = lexer.nextc();
if (c == EOF) {
lexer.compile_error("Invalid escape character syntax");
}
if (!isOctChar(c)) {
lexer.pushback(c);
break;
}
buffer.append(c);
}
break;
case 'x':
buffer.append('\\');
buffer.append(c);
c = lexer.nextc();
if (!isHexChar(c)) {
lexer.compile_error("Invalid escape character syntax");
}
buffer.append(c);
c = lexer.nextc();
if (isHexChar(c)) {
buffer.append(c);
} else {
lexer.pushback(c);
}
break;
case 'M':
if ((lexer.nextc()) != '-') {
lexer.compile_error("Invalid escape character syntax");
}
buffer.append(new byte[] { '\\', 'M', '-' });
escaped(lexer, src, buffer);
break;
case 'C':
if ((lexer.nextc()) != '-') {
lexer.compile_error("Invalid escape character syntax");
}
buffer.append(new byte[] { '\\', 'C', '-' });
escaped(lexer, src, buffer);
break;
case 'c':
buffer.append(new byte[] { '\\', 'c' });
escaped(lexer, src, buffer);
break;
case EOF:
lexer.compile_error("Invalid escape character syntax");
default:
if (c != '\\' || c != end) buffer.append('\\');
buffer.append(c);
}
}
}