/*
* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
package org.antlr.v4.runtime.tree.xpath;
import org.antlr.v4.runtime.ANTLRInputStream;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.LexerNoViableAltException;
import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.tree.ParseTree;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
Represent a subset of XPath XML path syntax for use in identifying nodes in
parse trees.
Split path into words and separators /
and //
via ANTLR itself then walk path elements from left to right. At each separator-word pair, find set of nodes. Next stage uses those as work list.
The basic interface is ParseTree.findAll
(tree, pathString, parser)
. But that is just shorthand for:
XPath
p = new XPath
(parser, pathString); return p.evaluate
(tree);
See org.antlr.v4.test.TestXPath
for descriptions. In short, this allows operators:
- /
- root
- //
- anywhere
- !
- invert; this must appear directly after root or anywhere
operator
and path elements:
- ID
- token name
- 'string'
- any string literal token from the grammar
- expr
- rule name
- *
- wildcard matching any node
Whitespace is not allowed.
/**
* Represent a subset of XPath XML path syntax for use in identifying nodes in
* parse trees.
*
* <p>
* Split path into words and separators {@code /} and {@code //} via ANTLR
* itself then walk path elements from left to right. At each separator-word
* pair, find set of nodes. Next stage uses those as work list.</p>
*
* <p>
* The basic interface is
* {@link XPath#findAll ParseTree.findAll}{@code (tree, pathString, parser)}.
* But that is just shorthand for:</p>
*
* <pre>
* {@link XPath} p = new {@link XPath#XPath XPath}(parser, pathString);
* return p.{@link #evaluate evaluate}(tree);
* </pre>
*
* <p>
* See {@code org.antlr.v4.test.TestXPath} for descriptions. In short, this
* allows operators:</p>
*
* <dl>
* <dt>/</dt> <dd>root</dd>
* <dt>//</dt> <dd>anywhere</dd>
* <dt>!</dt> <dd>invert; this must appear directly after root or anywhere
* operator</dd>
* </dl>
*
* <p>
* and path elements:</p>
*
* <dl>
* <dt>ID</dt> <dd>token name</dd>
* <dt>'string'</dt> <dd>any string literal token from the grammar</dd>
* <dt>expr</dt> <dd>rule name</dd>
* <dt>*</dt> <dd>wildcard matching any node</dd>
* </dl>
*
* <p>
* Whitespace is not allowed.</p>
*/
public class XPath {
public static final String WILDCARD = "*"; // word not operator/separator
public static final String NOT = "!"; // word for invert operator
protected String path;
protected XPathElement[] elements;
protected Parser parser;
public XPath(Parser parser, String path) {
this.parser = parser;
this.path = path;
elements = split(path);
// System.out.println(Arrays.toString(elements));
}
// TODO: check for invalid token/rule names, bad syntax
public XPathElement[] split(String path) {
ANTLRInputStream in;
try {
in = new ANTLRInputStream(new StringReader(path));
}
catch (IOException ioe) {
throw new IllegalArgumentException("Could not read path: "+path, ioe);
}
XPathLexer lexer = new XPathLexer(in) {
@Override
public void recover(LexerNoViableAltException e) { throw e; }
};
lexer.removeErrorListeners();
lexer.addErrorListener(new XPathLexerErrorListener());
CommonTokenStream tokenStream = new CommonTokenStream(lexer);
try {
tokenStream.fill();
}
catch (LexerNoViableAltException e) {
int pos = lexer.getCharPositionInLine();
String msg = "Invalid tokens or characters at index "+pos+" in path '"+path+"'";
throw new IllegalArgumentException(msg, e);
}
List<Token> tokens = tokenStream.getTokens();
// System.out.println("path="+path+"=>"+tokens);
List<XPathElement> elements = new ArrayList<XPathElement>();
int n = tokens.size();
int i=0;
loop:
while ( i<n ) {
Token el = tokens.get(i);
Token next = null;
switch ( el.getType() ) {
case XPathLexer.ROOT :
case XPathLexer.ANYWHERE :
boolean anywhere = el.getType() == XPathLexer.ANYWHERE;
i++;
next = tokens.get(i);
boolean invert = next.getType()==XPathLexer.BANG;
if ( invert ) {
i++;
next = tokens.get(i);
}
XPathElement pathElement = getXPathElement(next, anywhere);
pathElement.invert = invert;
elements.add(pathElement);
i++;
break;
case XPathLexer.TOKEN_REF :
case XPathLexer.RULE_REF :
case XPathLexer.WILDCARD :
elements.add( getXPathElement(el, false) );
i++;
break;
case Token.EOF :
break loop;
default :
throw new IllegalArgumentException("Unknowth path element "+el);
}
}
return elements.toArray(new XPathElement[0]);
}
Convert word like *
or ID
or expr
to a path element. anywhere
is true
if //
precedes the word. /**
* Convert word like {@code *} or {@code ID} or {@code expr} to a path
* element. {@code anywhere} is {@code true} if {@code //} precedes the
* word.
*/
protected XPathElement getXPathElement(Token wordToken, boolean anywhere) {
if ( wordToken.getType()==Token.EOF ) {
throw new IllegalArgumentException("Missing path element at end of path");
}
String word = wordToken.getText();
int ttype = parser.getTokenType(word);
int ruleIndex = parser.getRuleIndex(word);
switch ( wordToken.getType() ) {
case XPathLexer.WILDCARD :
return anywhere ?
new XPathWildcardAnywhereElement() :
new XPathWildcardElement();
case XPathLexer.TOKEN_REF :
case XPathLexer.STRING :
if ( ttype==Token.INVALID_TYPE ) {
throw new IllegalArgumentException(word+
" at index "+
wordToken.getStartIndex()+
" isn't a valid token name");
}
return anywhere ?
new XPathTokenAnywhereElement(word, ttype) :
new XPathTokenElement(word, ttype);
default :
if ( ruleIndex==-1 ) {
throw new IllegalArgumentException(word+
" at index "+
wordToken.getStartIndex()+
" isn't a valid rule name");
}
return anywhere ?
new XPathRuleAnywhereElement(word, ruleIndex) :
new XPathRuleElement(word, ruleIndex);
}
}
public static Collection<ParseTree> findAll(ParseTree tree, String xpath, Parser parser) {
XPath p = new XPath(parser, xpath);
return p.evaluate(tree);
}
Return a list of all nodes starting at t
as root that satisfy the path. The root /
is relative to the node passed to evaluate
. /**
* Return a list of all nodes starting at {@code t} as root that satisfy the
* path. The root {@code /} is relative to the node passed to
* {@link #evaluate}.
*/
public Collection<ParseTree> evaluate(final ParseTree t) {
ParserRuleContext dummyRoot = new ParserRuleContext();
dummyRoot.children = Collections.singletonList(t); // don't set t's parent.
Collection<ParseTree> work = Collections.<ParseTree>singleton(dummyRoot);
int i = 0;
while ( i < elements.length ) {
Collection<ParseTree> next = new LinkedHashSet<ParseTree>();
for (ParseTree node : work) {
if ( node.getChildCount()>0 ) {
// only try to match next element if it has children
// e.g., //func/*/stat might have a token node for which
// we can't go looking for stat nodes.
Collection<? extends ParseTree> matching = elements[i].evaluate(node);
next.addAll(matching);
}
}
i++;
work = next;
}
return work;
}
}