package com.jayway.jsonpath.internal.path;

import com.jayway.jsonpath.InvalidPathException;
import com.jayway.jsonpath.Predicate;
import com.jayway.jsonpath.internal.CharacterIndex;
import com.jayway.jsonpath.internal.Path;
import com.jayway.jsonpath.internal.Utils;
import com.jayway.jsonpath.internal.filter.FilterCompiler;
import com.jayway.jsonpath.internal.function.ParamType;
import com.jayway.jsonpath.internal.function.Parameter;

import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;

import static java.lang.Character.isDigit;
import static java.util.Arrays.asList;

public class PathCompiler {

    private static final char DOC_CONTEXT = '$';
    private static final char EVAL_CONTEXT = '@';

    private static final char OPEN_SQUARE_BRACKET = '[';
    private static final char CLOSE_SQUARE_BRACKET = ']';
    private static final char OPEN_PARENTHESIS = '(';
    private static final char CLOSE_PARENTHESIS = ')';
    private static final char OPEN_BRACE = '{';
    private static final char CLOSE_BRACE = '}';

    private static final char WILDCARD = '*';
    private static final char PERIOD = '.';
    private static final char SPACE = ' ';
    private static final char TAB = '\t';
    private static final char CR = '\r';
    private static final char LF = '\n';
    private static final char BEGIN_FILTER = '?';
    private static final char COMMA = ',';
    private static final char SPLIT = ':';
    private static final char MINUS = '-';
    private static final char SINGLE_QUOTE = '\'';
    private static final char DOUBLE_QUOTE = '"';

    private final LinkedList<Predicate> filterStack;
    private final CharacterIndex path;

    private PathCompiler(String path, LinkedList<Predicate> filterStack){
        this(new CharacterIndex(path), filterStack);
    }

    private PathCompiler(CharacterIndex path, LinkedList<Predicate> filterStack){
        this.filterStack = filterStack;
        this.path = path;
    }

    private Path compile() {
        RootPathToken root = readContextToken();
        return new CompiledPath(root, root.getPathFragment().equals("$"));
    }

    public static Path compile(String path, final Predicate... filters) {
        try {
            CharacterIndex ci = new CharacterIndex(path);
            ci.trim();

            if(!( ci.charAt(0) == DOC_CONTEXT)  && !( ci.charAt(0) == EVAL_CONTEXT)){
                ci = new CharacterIndex("$." + path);
                ci.trim();
            }
            if(ci.lastCharIs('.')){
                fail("Path must not end with a '.' or '..'");
            }
            LinkedList<Predicate> filterStack = new LinkedList<Predicate>(asList(filters));
            Path p = new PathCompiler(ci, filterStack).compile();
            return p;
        } catch (Exception e) {
            InvalidPathException ipe;
            if (e instanceof InvalidPathException) {
                ipe = (InvalidPathException) e;
            } else {
                ipe = new InvalidPathException(e);
            }
            throw ipe;
        }
    }

    private void readWhitespace() {
        while (path.inBounds()) {
            char c = path.currentChar();
            if (!isWhitespace(c)) {
                break;
            }
            path.incrementPosition(1);
        }
    }

    private Boolean isPathContext(char c) {
        return (c == DOC_CONTEXT || c == EVAL_CONTEXT);
    }

    //[$ | @]
    private RootPathToken readContextToken() {

        readWhitespace();

        if (!isPathContext(path.currentChar())) {
            throw new InvalidPathException("Path must start with '$' or '@'");
        }

        RootPathToken pathToken = PathTokenFactory.createRootPathToken(path.currentChar());

        if (path.currentIsTail()) {
            return pathToken;
        }

        path.incrementPosition(1);

        if(path.currentChar() != PERIOD && path.currentChar() != OPEN_SQUARE_BRACKET){
            fail("Illegal character at position " + path.position() + " expected '.' or '[");
        }

        PathTokenAppender appender = pathToken.getPathTokenAppender();
        readNextToken(appender);

        return pathToken;
    }

    //
    //
    //
    private boolean readNextToken(PathTokenAppender appender) {

        char c = path.currentChar();

        switch (c) {
            case OPEN_SQUARE_BRACKET:
                return readBracketPropertyToken(appender) ||
                        readArrayToken(appender) ||
                        readWildCardToken(appender) ||
                        readFilterToken(appender) ||
                        readPlaceholderToken(appender) ||
                        fail("Could not parse token starting at position " + path.position() + ". Expected ?, ', 0-9, * ");
            case PERIOD:
                return readDotToken(appender) ||
                        fail("Could not parse token starting at position " + path.position());
            case WILDCARD:
                return readWildCardToken(appender) ||
                        fail("Could not parse token starting at position " + path.position());
            default:
                return readPropertyOrFunctionToken(appender) ||
                        fail("Could not parse token starting at position " + path.position());
        }
    }

    //
    // . and ..
    //
    private boolean readDotToken(PathTokenAppender appender) {
        if (path.currentCharIs(PERIOD) && path.nextCharIs(PERIOD)) {
            appender.appendPathToken(PathTokenFactory.crateScanToken());
            path.incrementPosition(2);
        } else if (!path.hasMoreCharacters()) {
            throw new InvalidPathException("Path must not end with a '.");
        } else {
            path.incrementPosition(1);
        }
        if(path.currentCharIs(PERIOD)){
            throw new InvalidPathException("Character '.' on position " + path.position() + " is not valid.");
        }
        return readNextToken(appender);
    }

    //
    // fooBar or fooBar()
    //
    private boolean readPropertyOrFunctionToken(PathTokenAppender appender) {
        if (path.currentCharIs(OPEN_SQUARE_BRACKET) || path.currentCharIs(WILDCARD) || path.currentCharIs(PERIOD) || path.currentCharIs(SPACE)) {
            return false;
        }
        int startPosition = path.position();
        int readPosition = startPosition;
        int endPosition = 0;

        boolean isFunction = false;

        while (path.inBounds(readPosition)) {
            char c = path.charAt(readPosition);
            if (c == SPACE) {
                throw new InvalidPathException("Use bracket notion ['my prop'] if your property contains blank characters. position: " + path.position());
            }
            else if (c == PERIOD || c == OPEN_SQUARE_BRACKET) {
                endPosition = readPosition;
                break;
            }
            else if (c == OPEN_PARENTHESIS) {
                isFunction = true;
                endPosition = readPosition++;
                break;
            }
            readPosition++;
        }
        if (endPosition == 0) {
            endPosition = path.length();
        }


        List<Parameter> functionParameters = null;
        if (isFunction) {
            if (path.inBounds(readPosition+1)) {
                // read the next token to determine if we have a simple no-args function call
                char c = path.charAt(readPosition + 1);
                if (c != CLOSE_PARENTHESIS) {
                    path.setPosition(endPosition+1);
                    // parse the arguments of the function - arguments that are inner queries or JSON document(s)
                    String functionName = path.subSequence(startPosition, endPosition).toString();
                    functionParameters = parseFunctionParameters(functionName);
                } else {
                    path.setPosition(readPosition + 1);
                }
            }
            else {
                path.setPosition(readPosition);
            }
        }
        else {
            path.setPosition(endPosition);
        }

        String property = path.subSequence(startPosition, endPosition).toString();
        if(isFunction){
            appender.appendPathToken(PathTokenFactory.createFunctionPathToken(property, functionParameters));
        } else {
            appender.appendPathToken(PathTokenFactory.createSinglePropertyPathToken(property, SINGLE_QUOTE));
        }

        return path.currentIsTail() || readNextToken(appender);
    }

    
Parse the parameters of a function call, either the caller has supplied JSON data, or the caller has supplied another path expression which must be evaluated and in turn invoked against the root document. In this tokenizer we're only concerned with parsing the path thus the output of this function is a list of parameters with the Path set if the parameter is an expression. If the parameter is a JSON document then the value of the cachedValue is set on the object. Sequence for parsing out the parameters: This code has its own tokenizer - it does some rudimentary level of lexing in that it can distinguish between JSON block parameters and sub-JSON blocks - it effectively regex's out the parameters into string blocks that can then be passed along to the appropriate parser. Since sub-jsonpath expressions can themselves contain other function calls this routine needs to be sensitive to token counting to determine the boundaries. Since the Path parser isn't aware of JSON processing this uber routine is needed. Parameters are separated by COMMAs ','
doc = {"numbers": [1,2,3,4,5,6,7,8,9,10]}
$.sum({10}, $.numbers.avg())
The above is a valid function call, we're first summing 10 + avg of 1...10 (5.5) so the total should be 15.5
Returns: An ordered list of parameters that are to processed via the function. Typically functions either process an array of values and/or can consume parameters in addition to the values provided from the consumption of an array.
/** * Parse the parameters of a function call, either the caller has supplied JSON data, or the caller has supplied * another path expression which must be evaluated and in turn invoked against the root document. In this tokenizer * we're only concerned with parsing the path thus the output of this function is a list of parameters with the Path * set if the parameter is an expression. If the parameter is a JSON document then the value of the cachedValue is * set on the object. * * Sequence for parsing out the parameters: * * This code has its own tokenizer - it does some rudimentary level of lexing in that it can distinguish between JSON block parameters * and sub-JSON blocks - it effectively regex's out the parameters into string blocks that can then be passed along to the appropriate parser. * Since sub-jsonpath expressions can themselves contain other function calls this routine needs to be sensitive to token counting to * determine the boundaries. Since the Path parser isn't aware of JSON processing this uber routine is needed. * * Parameters are separated by COMMAs ',' * * <pre> * doc = {"numbers": [1,2,3,4,5,6,7,8,9,10]} * * $.sum({10}, $.numbers.avg()) * </pre> * * The above is a valid function call, we're first summing 10 + avg of 1...10 (5.5) so the total should be 15.5 * * @return * An ordered list of parameters that are to processed via the function. Typically functions either process * an array of values and/or can consume parameters in addition to the values provided from the consumption of * an array. */
private List<Parameter> parseFunctionParameters(String funcName) { ParamType type = null; // Parenthesis starts at 1 since we're marking the start of a function call, the close paren will denote the // last parameter boundary Integer groupParen = 1, groupBracket = 0, groupBrace = 0, groupQuote = 0; Boolean endOfStream = false; char priorChar = 0; List<Parameter> parameters = new ArrayList<Parameter>(); StringBuffer parameter = new StringBuffer(); while (path.inBounds() && !endOfStream) { char c = path.currentChar(); path.incrementPosition(1); // we're at the start of the stream, and don't know what type of parameter we have if (type == null) { if (isWhitespace(c)) { continue; } if (c == OPEN_BRACE || isDigit(c) || DOUBLE_QUOTE == c) { type = ParamType.JSON; } else if (isPathContext(c)) { type = ParamType.PATH; // read until we reach a terminating comma and we've reset grouping to zero } } switch (c) { case DOUBLE_QUOTE: if (priorChar != '\\' && groupQuote > 0) { if (groupQuote == 0) { throw new InvalidPathException("Unexpected quote '\"' at character position: " + path.position()); } groupQuote--; } else { groupQuote++; } break; case OPEN_PARENTHESIS: groupParen++; break; case OPEN_BRACE: groupBrace++; break; case OPEN_SQUARE_BRACKET: groupBracket++; break; case CLOSE_BRACE: if (0 == groupBrace) { throw new InvalidPathException("Unexpected close brace '}' at character position: " + path.position()); } groupBrace--; break; case CLOSE_SQUARE_BRACKET: if (0 == groupBracket) { throw new InvalidPathException("Unexpected close bracket ']' at character position: " + path.position()); } groupBracket--; break; // In either the close paren case where we have zero paren groups left, capture the parameter, or where // we've encountered a COMMA do the same case CLOSE_PARENTHESIS: groupParen--; if (0 != groupParen) { parameter.append(c); } case COMMA: // In this state we've reach the end of a function parameter and we can pass along the parameter string // to the parser if ((0 == groupQuote && 0 == groupBrace && 0 == groupBracket && ((0 == groupParen && CLOSE_PARENTHESIS == c) || 1 == groupParen))) { endOfStream = (0 == groupParen); if (null != type) { Parameter param = null; switch (type) { case JSON: // parse the json and set the value param = new Parameter(parameter.toString()); break; case PATH: LinkedList<Predicate> predicates = new LinkedList<Predicate>(); PathCompiler compiler = new PathCompiler(parameter.toString(), predicates); param = new Parameter(compiler.compile()); break; } if (null != param) { parameters.add(param); } parameter.delete(0, parameter.length()); type = null; } } break; } if (type != null && !(c == COMMA && 0 == groupBrace && 0 == groupBracket && 1 == groupParen)) { parameter.append(c); } priorChar = c; } if (0 != groupBrace || 0 != groupParen || 0 != groupBracket) { throw new InvalidPathException("Arguments to function: '" + funcName + "' are not closed properly."); } return parameters; } private boolean isWhitespace(char c) { return (c == SPACE || c == TAB || c == LF || c == CR); } // // [?], [?,?, ..] // private boolean readPlaceholderToken(PathTokenAppender appender) { if (!path.currentCharIs(OPEN_SQUARE_BRACKET)) { return false; } int questionmarkIndex = path.indexOfNextSignificantChar(BEGIN_FILTER); if (questionmarkIndex == -1) { return false; } char nextSignificantChar = path.nextSignificantChar(questionmarkIndex); if (nextSignificantChar != CLOSE_SQUARE_BRACKET && nextSignificantChar != COMMA) { return false; } int expressionBeginIndex = path.position() + 1; int expressionEndIndex = path.nextIndexOf(expressionBeginIndex, CLOSE_SQUARE_BRACKET); if (expressionEndIndex == -1) { return false; } String expression = path.subSequence(expressionBeginIndex, expressionEndIndex).toString(); String[] tokens = expression.split(","); if (filterStack.size() < tokens.length) { throw new InvalidPathException("Not enough predicates supplied for filter [" + expression + "] at position " + path.position()); } Collection<Predicate> predicates = new ArrayList<Predicate>(); for (String token : tokens) { token = token != null ? token.trim() : token; if (!"?".equals(token == null ? "" : token)) { throw new InvalidPathException("Expected '?' but found " + token); } predicates.add(filterStack.pop()); } appender.appendPathToken(PathTokenFactory.createPredicatePathToken(predicates)); path.setPosition(expressionEndIndex + 1); return path.currentIsTail() || readNextToken(appender); } // // [?(...)] // private boolean readFilterToken(PathTokenAppender appender) { if (!path.currentCharIs(OPEN_SQUARE_BRACKET) && !path.nextSignificantCharIs(BEGIN_FILTER)) { return false; } int openStatementBracketIndex = path.position(); int questionMarkIndex = path.indexOfNextSignificantChar(BEGIN_FILTER); if (questionMarkIndex == -1) { return false; } int openBracketIndex = path.indexOfNextSignificantChar(questionMarkIndex, OPEN_PARENTHESIS); if (openBracketIndex == -1) { return false; } int closeBracketIndex = path.indexOfClosingBracket(openBracketIndex, true, true); if (closeBracketIndex == -1) { return false; } if (!path.nextSignificantCharIs(closeBracketIndex, CLOSE_SQUARE_BRACKET)) { return false; } int closeStatementBracketIndex = path.indexOfNextSignificantChar(closeBracketIndex, CLOSE_SQUARE_BRACKET); String criteria = path.subSequence(openStatementBracketIndex, closeStatementBracketIndex + 1).toString(); Predicate predicate = FilterCompiler.compile(criteria); appender.appendPathToken(PathTokenFactory.createPredicatePathToken(predicate)); path.setPosition(closeStatementBracketIndex + 1); return path.currentIsTail() || readNextToken(appender); } // // [*] // * // private boolean readWildCardToken(PathTokenAppender appender) { boolean inBracket = path.currentCharIs(OPEN_SQUARE_BRACKET); if (inBracket && !path.nextSignificantCharIs(WILDCARD)) { return false; } if (!path.currentCharIs(WILDCARD) && path.isOutOfBounds(path.position() + 1)) { return false; } if (inBracket) { int wildCardIndex = path.indexOfNextSignificantChar(WILDCARD); if (!path.nextSignificantCharIs(wildCardIndex, CLOSE_SQUARE_BRACKET)) { throw new InvalidPathException("Expected wildcard token to end with ']' on position " + wildCardIndex + 1); } int bracketCloseIndex = path.indexOfNextSignificantChar(wildCardIndex, CLOSE_SQUARE_BRACKET); path.setPosition(bracketCloseIndex + 1); } else { path.incrementPosition(1); } appender.appendPathToken(PathTokenFactory.createWildCardPathToken()); return path.currentIsTail() || readNextToken(appender); } // // [1], [1,2, n], [1:], [1:2], [:2] // private boolean readArrayToken(PathTokenAppender appender) { if (!path.currentCharIs(OPEN_SQUARE_BRACKET)) { return false; } char nextSignificantChar = path.nextSignificantChar(); if (!isDigit(nextSignificantChar) && nextSignificantChar != MINUS && nextSignificantChar != SPLIT) { return false; } int expressionBeginIndex = path.position() + 1; int expressionEndIndex = path.nextIndexOf(expressionBeginIndex, CLOSE_SQUARE_BRACKET); if (expressionEndIndex == -1) { return false; } String expression = path.subSequence(expressionBeginIndex, expressionEndIndex).toString().trim(); if ("*".equals(expression)) { return false; } //check valid chars for (int i = 0; i < expression.length(); i++) { char c = expression.charAt(i); if (!isDigit(c) && c != COMMA && c != MINUS && c != SPLIT && c != SPACE) { return false; } } boolean isSliceOperation = expression.contains(":"); if (isSliceOperation) { ArraySliceOperation arraySliceOperation = ArraySliceOperation.parse(expression); appender.appendPathToken(PathTokenFactory.createSliceArrayPathToken(arraySliceOperation)); } else { ArrayIndexOperation arrayIndexOperation = ArrayIndexOperation.parse(expression); appender.appendPathToken(PathTokenFactory.createIndexArrayPathToken(arrayIndexOperation)); } path.setPosition(expressionEndIndex + 1); return path.currentIsTail() || readNextToken(appender); } // // ['foo'] // private boolean readBracketPropertyToken(PathTokenAppender appender) { if (!path.currentCharIs(OPEN_SQUARE_BRACKET)) { return false; } char potentialStringDelimiter = path.nextSignificantChar(); if (potentialStringDelimiter != SINGLE_QUOTE && potentialStringDelimiter != DOUBLE_QUOTE) { return false; } List<String> properties = new ArrayList<String>(); int startPosition = path.position() + 1; int readPosition = startPosition; int endPosition = 0; boolean inProperty = false; boolean inEscape = false; boolean lastSignificantWasComma = false; while (path.inBounds(readPosition)) { char c = path.charAt(readPosition); if(inEscape){ inEscape = false; } else if('\\' == c){ inEscape = true; } else if (c == CLOSE_SQUARE_BRACKET && !inProperty) { if (lastSignificantWasComma){ fail("Found empty property at index "+readPosition); } break; } else if (c == potentialStringDelimiter) { if (inProperty && !inEscape) { char nextSignificantChar = path.nextSignificantChar(readPosition); if (nextSignificantChar != CLOSE_SQUARE_BRACKET && nextSignificantChar != COMMA) { fail("Property must be separated by comma or Property must be terminated close square bracket at index "+readPosition); } endPosition = readPosition; String prop = path.subSequence(startPosition, endPosition).toString(); properties.add(Utils.unescape(prop)); inProperty = false; } else { startPosition = readPosition + 1; inProperty = true; lastSignificantWasComma = false; } } else if (c == COMMA){ if (lastSignificantWasComma){ fail("Found empty property at index "+readPosition); } lastSignificantWasComma = true; } readPosition++; } int endBracketIndex = path.indexOfNextSignificantChar(endPosition, CLOSE_SQUARE_BRACKET) + 1; path.setPosition(endBracketIndex); appender.appendPathToken(PathTokenFactory.createPropertyPathToken(properties, potentialStringDelimiter)); return path.currentIsTail() || readNextToken(appender); } public static boolean fail(String message) { throw new InvalidPathException(message); } }