package jdk.nashorn.internal.runtime.regexp.joni;
import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnOff;
import static jdk.nashorn.internal.runtime.regexp.joni.Option.isDontCaptureGroup;
import static jdk.nashorn.internal.runtime.regexp.joni.Option.isIgnoreCase;
import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode;
import jdk.nashorn.internal.runtime.regexp.joni.ast.AnyCharNode;
import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode;
import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode.CCStateArg;
import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode;
import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode;
import jdk.nashorn.internal.runtime.regexp.joni.ast.Node;
import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode;
import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode;
import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
import jdk.nashorn.internal.runtime.regexp.joni.constants.CCSTATE;
import jdk.nashorn.internal.runtime.regexp.joni.constants.CCVALTYPE;
import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType;
import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType;
import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
class Parser extends Lexer {
protected final Regex regex;
protected Node root;
protected int returnCode;
protected Parser(final ScanEnvironment env, final char[] chars, final int p, final int end) {
super(env, chars, p, end);
regex = env.reg;
}
protected final Node parse() {
root = parseRegexp();
regex.numMem = env.numMem;
return root;
}
private boolean codeExistCheck(final int code, final boolean ignoreEscaped) {
mark();
boolean inEsc = false;
while (left()) {
if (ignoreEscaped && inEsc) {
inEsc = false;
} else {
fetch();
if (c == code) {
restore();
return true;
}
if (c == syntax.metaCharTable.esc) {
inEsc = true;
}
}
}
restore();
return false;
}
private CClassNode parseCharClass() {
fetchTokenInCC();
final boolean neg;
if (token.type == TokenType.CHAR && token.getC() == '^' && !token.escaped) {
neg = true;
fetchTokenInCC();
} else {
neg = false;
}
if (token.type == TokenType.CC_CLOSE) {
if (!codeExistCheck(']', true)) {
throw new SyntaxException(ERR_EMPTY_CHAR_CLASS);
}
env.ccEscWarn("]");
token.type = TokenType.CHAR;
}
CClassNode cc = new CClassNode();
CClassNode prevCC = null;
CClassNode workCC = null;
final CCStateArg arg = new CCStateArg();
boolean andStart = false;
arg.state = CCSTATE.START;
while (token.type != TokenType.CC_CLOSE) {
boolean fetched = false;
switch (token.type) {
case CHAR:
if (token.getC() > 0xff) {
arg.inType = CCVALTYPE.CODE_POINT;
} else {
arg.inType = CCVALTYPE.SB;
}
arg.v = token.getC();
arg.vIsRaw = false;
parseCharClassValEntry2(cc, arg);
break;
case RAW_BYTE:
arg.v = token.getC();
arg.inType = CCVALTYPE.SB;
arg.vIsRaw = true;
parseCharClassValEntry2(cc, arg);
break;
case CODE_POINT:
arg.v = token.getCode();
arg.vIsRaw = true;
parseCharClassValEntry(cc, arg);
break;
case CHAR_TYPE:
cc.addCType(token.getPropCType(), token.getPropNot(), env, this);
cc.nextStateClass(arg, env);
break;
case CC_RANGE:
if (arg.state == CCSTATE.VALUE) {
fetchTokenInCC();
fetched = true;
if (token.type == TokenType.CC_CLOSE) {
parseCharClassRangeEndVal(cc, arg);
break;
} else if (token.type == TokenType.CC_AND) {
env.ccEscWarn("-");
parseCharClassRangeEndVal(cc, arg);
break;
}
arg.state = CCSTATE.RANGE;
} else if (arg.state == CCSTATE.START) {
arg.v = token.getC();
arg.vIsRaw = false;
fetchTokenInCC();
fetched = true;
if (token.type == TokenType.CC_RANGE || andStart) {
env.ccEscWarn("-");
}
parseCharClassValEntry(cc, arg);
break;
} else if (arg.state == CCSTATE.RANGE) {
env.ccEscWarn("-");
parseCharClassSbChar(cc, arg);
break;
} else {
fetchTokenInCC();
fetched = true;
if (token.type == TokenType.CC_CLOSE) {
parseCharClassRangeEndVal(cc, arg);
break;
} else if (token.type == TokenType.CC_AND) {
env.ccEscWarn("-");
parseCharClassRangeEndVal(cc, arg);
break;
}
if (syntax.allowDoubleRangeOpInCC()) {
env.ccEscWarn("-");
arg.inType = CCVALTYPE.SB;
arg.v = '-';
arg.vIsRaw = false;
parseCharClassValEntry2(cc, arg);
break;
}
throw new SyntaxException(ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS);
}
break;
case CC_CC_OPEN:
final CClassNode acc = parseCharClass();
cc.or(acc);
break;
case CC_AND:
if (arg.state == CCSTATE.VALUE) {
arg.v = 0;
arg.vIsRaw = false;
cc.nextStateValue(arg, env);
}
andStart = true;
arg.state = CCSTATE.START;
if (prevCC != null) {
prevCC.and(cc);
} else {
prevCC = cc;
if (workCC == null) {
workCC = new CClassNode();
}
cc = workCC;
}
cc.clear();
break;
case EOT:
throw new SyntaxException(ERR_PREMATURE_END_OF_CHAR_CLASS);
default:
throw new InternalException(ERR_PARSER_BUG);
}
if (!fetched) {
fetchTokenInCC();
}
}
if (arg.state == CCSTATE.VALUE) {
arg.v = 0;
arg.vIsRaw = false;
cc.nextStateValue(arg, env);
}
if (prevCC != null) {
prevCC.and(cc);
cc = prevCC;
}
if (neg) {
cc.setNot();
} else {
cc.clearNot();
}
if (cc.isNot() && syntax.notNewlineInNegativeCC()) {
if (!cc.isEmpty()) {
final int NEW_LINE = 0x0a;
if (EncodingHelper.isNewLine(NEW_LINE)) {
cc.bs.set(NEW_LINE);
}
}
}
return cc;
}
private void parseCharClassSbChar(final CClassNode cc, final CCStateArg arg) {
arg.inType = CCVALTYPE.SB;
arg.v = token.getC();
arg.vIsRaw = false;
parseCharClassValEntry2(cc, arg);
}
private void parseCharClassRangeEndVal(final CClassNode cc, final CCStateArg arg) {
arg.v = '-';
arg.vIsRaw = false;
parseCharClassValEntry(cc, arg);
}
private void parseCharClassValEntry(final CClassNode cc, final CCStateArg arg) {
arg.inType = arg.v <= 0xff ? CCVALTYPE.SB : CCVALTYPE.CODE_POINT;
parseCharClassValEntry2(cc, arg);
}
private void parseCharClassValEntry2(final CClassNode cc, final CCStateArg arg) {
cc.nextStateValue(arg, env);
}
private Node parseEnclose(final TokenType term) {
Node node = null;
if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
}
int option = env.option;
if (peekIs('?') && syntax.op2QMarkGroupEffect()) {
inc();
if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_IN_GROUP);
}
fetch();
switch(c) {
case ':':
fetchToken();
node = parseSubExp(term);
returnCode = 1;
return node;
case '=':
node = new AnchorNode(AnchorType.PREC_READ);
break;
case '!':
node = new AnchorNode(AnchorType.PREC_READ_NOT);
break;
case '>':
node = new EncloseNode(EncloseType.STOP_BACKTRACK);
break;
case '\'':
break;
case '<':
fetch();
if (c == '=') {
node = new AnchorNode(AnchorType.LOOK_BEHIND);
} else if (c == '!') {
node = new AnchorNode(AnchorType.LOOK_BEHIND_NOT);
} else {
throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
}
break;
case '@':
if (syntax.op2AtMarkCaptureHistory()) {
final EncloseNode en = new EncloseNode();
final int num = env.addMemEntry();
if (num >= BitStatus.BIT_STATUS_BITS_NUM) {
throw new ValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY);
}
en.regNum = num;
node = en;
} else {
throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
}
break;
case '-':
case 'i':
case 'm':
case 's':
case 'x':
boolean neg = false;
while (true) {
switch(c) {
case ':':
case ')':
break;
case '-':
neg = true;
break;
case 'x':
option = bsOnOff(option, Option.EXTEND, neg);
break;
case 'i':
option = bsOnOff(option, Option.IGNORECASE, neg);
break;
case 's':
if (syntax.op2OptionPerl()) {
option = bsOnOff(option, Option.MULTILINE, neg);
} else {
throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
}
break;
case 'm':
if (syntax.op2OptionPerl()) {
option = bsOnOff(option, Option.SINGLELINE, !neg);
} else if (syntax.op2OptionRuby()) {
option = bsOnOff(option, Option.MULTILINE, neg);
} else {
throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
}
break;
default:
throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
}
if (c == ')') {
final EncloseNode en = new EncloseNode(option, 0);
node = en;
returnCode = 2;
return node;
} else if (c == ':') {
final int prev = env.option;
env.option = option;
fetchToken();
final Node target = parseSubExp(term);
env.option = prev;
final EncloseNode en = new EncloseNode(option, 0);
en.setTarget(target);
node = en;
returnCode = 0;
return node;
}
if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_IN_GROUP);
}
fetch();
}
default:
throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
}
} else {
if (isDontCaptureGroup(env.option)) {
fetchToken();
node = parseSubExp(term);
returnCode = 1;
return node;
}
final EncloseNode en = new EncloseNode();
final int num = env.addMemEntry();
en.regNum = num;
node = en;
}
fetchToken();
final Node target = parseSubExp(term);
if (node.getType() == NodeType.ANCHOR) {
final AnchorNode an = (AnchorNode) node;
an.setTarget(target);
} else {
final EncloseNode en = (EncloseNode)node;
en.setTarget(target);
if (en.type == EncloseType.MEMORY) {
env.setMemNode(en.regNum, node);
}
}
returnCode = 0;
return node;
}
private Node parseExp(final TokenType term) {
if (token.type == term)
{
return StringNode.EMPTY;
}
Node node = null;
boolean group = false;
switch(token.type) {
case ALT:
case EOT:
return StringNode.EMPTY;
case SUBEXP_OPEN:
node = parseEnclose(TokenType.SUBEXP_CLOSE);
if (returnCode == 1) {
group = true;
} else if (returnCode == 2) {
final int prev = env.option;
final EncloseNode en = (EncloseNode)node;
env.option = en.option;
fetchToken();
final Node target = parseSubExp(term);
env.option = prev;
en.setTarget(target);
return node;
}
break;
case SUBEXP_CLOSE:
if (!syntax.allowUnmatchedCloseSubexp()) {
throw new SyntaxException(ERR_UNMATCHED_CLOSE_PARENTHESIS);
}
if (token.escaped) {
return parseExpTkRawByte(group);
}
return parseExpTkByte(group);
case STRING:
return parseExpTkByte(group);
case RAW_BYTE:
return parseExpTkRawByte(group);
case CODE_POINT:
final char[] buf = new char[] {(char)token.getCode()};
node = new StringNode(buf, 0, 1);
break;
case CHAR_TYPE:
switch(token.getPropCType()) {
case CharacterType.D:
case CharacterType.S:
case CharacterType.W:
if (Config.NON_UNICODE_SDW) {
final CClassNode cc = new CClassNode();
cc.addCType(token.getPropCType(), false, env, this);
if (token.getPropNot()) {
cc.setNot();
}
node = cc;
}
break;
case CharacterType.SPACE:
case CharacterType.DIGIT:
case CharacterType.XDIGIT:
final CClassNode ccn = new CClassNode();
ccn.addCType(token.getPropCType(), false, env, this);
if (token.getPropNot()) {
ccn.setNot();
}
node = ccn;
break;
default:
throw new InternalException(ERR_PARSER_BUG);
}
break;
case CC_CC_OPEN:
final CClassNode cc = parseCharClass();
node = cc;
if (isIgnoreCase(env.option)) {
final ApplyCaseFoldArg arg = new ApplyCaseFoldArg(env, cc);
EncodingHelper.applyAllCaseFold(env.caseFoldFlag, ApplyCaseFold.INSTANCE, arg);
if (arg.altRoot != null) {
node = ConsAltNode.newAltNode(node, arg.altRoot);
}
}
break;
case ANYCHAR:
node = new AnyCharNode();
break;
case ANYCHAR_ANYTIME:
node = new AnyCharNode();
final QuantifierNode qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
qn.setTarget(node);
node = qn;
break;
case BACKREF:
final int backRef = token.getBackrefRef();
node = new BackRefNode(backRef, env);
break;
case ANCHOR:
node = new AnchorNode(token.getAnchor());
break;
case OP_REPEAT:
case INTERVAL:
if (syntax.contextIndepRepeatOps()) {
if (syntax.contextInvalidRepeatOps()) {
throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED);
}
node = StringNode.EMPTY;
} else {
return parseExpTkByte(group);
}
break;
default:
throw new InternalException(ERR_PARSER_BUG);
}
fetchToken();
return parseExpRepeat(node, group);
}
private Node parseExpTkByte(final boolean group) {
final StringNode node = new StringNode(chars, token.backP, p);
while (true) {
fetchToken();
if (token.type != TokenType.STRING) {
break;
}
if (token.backP == node.end) {
node.end = p;
} else {
node.cat(chars, token.backP, p);
}
}
return parseExpRepeat(node, group);
}
private Node parseExpTkRawByte(final boolean group) {
final StringNode node = new StringNode((char)token.getC());
node.setRaw();
fetchToken();
node.clearRaw();
return parseExpRepeat(node, group);
}
private Node parseExpRepeat(final Node targetp, final boolean group) {
Node target = targetp;
while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) {
if (target.isInvalidQuantifier()) {
throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
}
final QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
token.getRepeatUpper(),
token.type == TokenType.INTERVAL);
qtfr.greedy = token.getRepeatGreedy();
final int ret = qtfr.setQuantifier(target, group, env, chars, getBegin(), getEnd());
Node qn = qtfr;
if (token.getRepeatPossessive()) {
final EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK);
en.setTarget(qn);
qn = en;
}
if (ret == 0) {
target = qn;
} else if (ret == 2) {
target = ConsAltNode.newListNode(target, null);
final ConsAltNode tmp = ((ConsAltNode)target).setCdr(ConsAltNode.newListNode(qn, null));
fetchToken();
return parseExpRepeatForCar(target, tmp, group);
}
fetchToken();
}
return target;
}
private Node parseExpRepeatForCar(final Node top, final ConsAltNode target, final boolean group) {
while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) {
if (target.car.isInvalidQuantifier()) {
throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
}
final QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
token.getRepeatUpper(),
token.type == TokenType.INTERVAL);
qtfr.greedy = token.getRepeatGreedy();
final int ret = qtfr.setQuantifier(target.car, group, env, chars, getBegin(), getEnd());
Node qn = qtfr;
if (token.getRepeatPossessive()) {
final EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK);
en.setTarget(qn);
qn = en;
}
if (ret == 0) {
target.setCar(qn);
} else if (ret == 2) {
assert false;
}
fetchToken();
}
return top;
}
private Node parseBranch(final TokenType term) {
Node node = parseExp(term);
if (token.type == TokenType.EOT || token.type == term || token.type == TokenType.ALT) {
return node;
}
final ConsAltNode top = ConsAltNode.newListNode(node, null);
ConsAltNode t = top;
while (token.type != TokenType.EOT && token.type != term && token.type != TokenType.ALT) {
node = parseExp(term);
if (node.getType() == NodeType.LIST) {
t.setCdr((ConsAltNode)node);
while (((ConsAltNode)node).cdr != null ) {
node = ((ConsAltNode)node).cdr;
}
t = ((ConsAltNode)node);
} else {
t.setCdr(ConsAltNode.newListNode(node, null));
t = t.cdr;
}
}
return top;
}
private Node parseSubExp(final TokenType term) {
Node node = parseBranch(term);
if (token.type == term) {
return node;
} else if (token.type == TokenType.ALT) {
final ConsAltNode top = ConsAltNode.newAltNode(node, null);
ConsAltNode t = top;
while (token.type == TokenType.ALT) {
fetchToken();
node = parseBranch(term);
t.setCdr(ConsAltNode.newAltNode(node, null));
t = t.cdr;
}
if (token.type != term) {
parseSubExpError(term);
}
return top;
} else {
parseSubExpError(term);
return null;
}
}
private static void parseSubExpError(final TokenType term) {
if (term == TokenType.SUBEXP_CLOSE) {
throw new SyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
}
throw new InternalException(ERR_PARSER_BUG);
}
private Node parseRegexp() {
fetchToken();
return parseSubExp(TokenType.EOT);
}
}