// copied and adapted from ANTLR 4 Java runtime /* * [The "BSD license"] * Copyright (c) 2012 Terence Parr * Copyright (c) 2012 Sam Harwell * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.antlr.intellij.adaptor.xpath; import com.intellij.lang.Language; import com.intellij.lang.parser.GeneratedParserUtilBase; import com.intellij.psi.PsiElement; import com.intellij.psi.PsiFile; import com.intellij.psi.PsiReference; import com.intellij.psi.impl.source.tree.CompositePsiElement; import org.antlr.intellij.adaptor.lexer.PSIElementTypeFactory; import org.antlr.intellij.adaptor.lexer.RuleIElementType; import org.antlr.intellij.adaptor.lexer.TokenIElementType; import org.antlr.intellij.adaptor.psi.Trees; import org.antlr.v4.runtime.ANTLRInputStream; import org.antlr.v4.runtime.CommonTokenStream; import org.antlr.v4.runtime.LexerNoViableAltException; import org.antlr.v4.runtime.Token; import org.antlr.v4.runtime.tree.xpath.XPathLexer; import org.jetbrains.annotations.NotNull; import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; /** * Represent a subset of XPath XML path syntax for use in identifying nodes in * parse trees. * * <p> * Split path into words and separators {@code /} and {@code //} via ANTLR * itself then walk path elements from left to right. At each separator-word * pair, find set of nodes. Next stage uses those as work list.</p> * * <p> * The basic interface is * {@link XPath#findAll}{@code (tree, pathString, parser)}. * But that is just shorthand for:</p> * * <pre> * {@link XPath} p = new {@link XPath#XPath XPath}(parser, pathString); * return p.{@link #evaluate evaluate}(tree); * </pre> * * <p> * See {@code org.antlr.v4.test.TestXPath} for descriptions. In short, this * allows operators:</p> * * <dl> * <dt>/</dt> <dd>root</dd> * <dt>//</dt> <dd>anywhere</dd> * <dt>!</dt> <dd>invert; this must appear directly after root or anywhere * operator</dd> * </dl> * * <p> * and path elements:</p> * * <dl> * <dt>ID</dt> <dd>token name</dd> * <dt>'string'</dt> <dd>any string literal token from the grammar</dd> * <dt>expr</dt> <dd>rule name</dd> * <dt>*</dt> <dd>wildcard matching any node</dd> * </dl> * * <p> * Whitespace is not allowed.</p> */ public class XPath { public static final String WILDCARD = "*"; // word not operator/separator public static final String NOT = "!"; // word for invert operator private final List<TokenIElementType> tokenElementTypes; private final List<RuleIElementType> ruleElementTypes; private final Map<String, Integer> ruleIndexes; private final Map<String, Integer> tokenTypes; protected String path; public XPath(Language language, String path) { this.path = path; this.tokenElementTypes = PSIElementTypeFactory.getTokenIElementTypes(language); this.ruleElementTypes = PSIElementTypeFactory.getRuleIElementTypes(language); this.ruleIndexes = PSIElementTypeFactory.getRuleNameToIndexMap(language); this.tokenTypes = PSIElementTypeFactory.getTokenNameToTypeMap(language); } // TODO: check for invalid token/rule names, bad syntax public XPathElement[] split(String path) { ANTLRInputStream in; try { in = new ANTLRInputStream(new StringReader(path)); } catch (IOException ioe) { throw new IllegalArgumentException("Could not read path: "+path, ioe); } XPathLexer lexer = new XPathLexer(in) { public void recover(LexerNoViableAltException e) { throw e; } }; lexer.removeErrorListeners(); lexer.addErrorListener(new XPathLexerErrorListener()); CommonTokenStream tokenStream = new CommonTokenStream(lexer); try { tokenStream.fill(); } catch (LexerNoViableAltException e) { int pos = lexer.getCharPositionInLine(); String msg = "Invalid tokens or characters at index "+pos+" in path '"+path+"'"; throw new IllegalArgumentException(msg, e); } List<Token> tokens = tokenStream.getTokens(); // System.out.println("path="+path+"=>"+tokens); List<XPathElement> elements = new ArrayList<XPathElement>(); int n = tokens.size(); int i=0; loop: while ( i<n ) { Token el = tokens.get(i); Token next = null; switch ( el.getType() ) { case XPathLexer.ROOT : case XPathLexer.ANYWHERE : boolean anywhere = el.getType() == XPathLexer.ANYWHERE; i++; next = tokens.get(i); boolean invert = next.getType()==XPathLexer.BANG; if ( invert ) { i++; next = tokens.get(i); } XPathElement pathElement = getXPathElement(next, anywhere); pathElement.invert = invert; elements.add(pathElement); i++; break; case XPathLexer.TOKEN_REF : case XPathLexer.RULE_REF : case XPathLexer.WILDCARD : elements.add( getXPathElement(el, false) ); i++; break; case Token.EOF : break loop; default : throw new IllegalArgumentException("Unknowth path element "+el); } } return elements.toArray(new XPathElement[0]); } /** * Convert word like {@code *} or {@code ID} or {@code expr} to a path * element. {@code anywhere} is {@code true} if {@code //} precedes the * word. */ protected XPathElement getXPathElement(Token wordToken, boolean anywhere) { if ( wordToken.getType()==Token.EOF ) { throw new IllegalArgumentException("Missing path element at end of path"); } String word = wordToken.getText(); Integer ttype = tokenTypes.get(word); Integer ruleIndex = ruleIndexes.get(word); switch ( wordToken.getType() ) { case XPathLexer.WILDCARD : return anywhere ? new XPathWildcardAnywhereElement() : new XPathWildcardElement(); case XPathLexer.TOKEN_REF : case XPathLexer.STRING : if ( ttype==null || ttype==Token.INVALID_TYPE ) { throw new IllegalArgumentException(word+ " at index "+ wordToken.getStartIndex()+ " isn't a valid token name"); } return anywhere ? new XPathTokenAnywhereElement(word, ttype) : new XPathTokenElement(word, ttype); default : if ( ruleIndex==null || ruleIndex==-1 ) { throw new IllegalArgumentException(word+ " at index "+ wordToken.getStartIndex()+ " isn't a valid rule name"); } return anywhere ? new XPathRuleAnywhereElement(word, ruleIndex) : new XPathRuleElement(word, ruleIndex); } } public static Collection<? extends PsiElement> findAll(Language language, PsiElement tree, String xpath) { XPath p = new XPath(language, xpath); XPathElement[] elements = p.split(xpath); return p.evaluate(tree, elements); } public static class DummyRoot extends CompositePsiElement { public final PsiElement child; public DummyRoot(PsiElement child) { super(GeneratedParserUtilBase.DUMMY_BLOCK); this.child = child; } @NotNull @Override public PsiElement[] getChildren() { return new PsiElement[] {child}; } @NotNull @Override public PsiReference[] getReferences() { return PsiReference.EMPTY_ARRAY; } @NotNull @Override public Language getLanguage() { return getParent().getLanguage(); } } /** * Return a list of all nodes starting at {@code t} as root that satisfy the * path. The root {@code /} is relative to the node passed to * {@link #evaluate}. */ public Collection<? extends PsiElement> evaluate(PsiElement t, XPathElement[] elements) { if ( t==null ) return Collections.emptyList(); if ( t instanceof PsiFile ) { // the PSI fileroot exists above start rule in ANTLR grammar and hence above ANTLR's parse tree root // drop t down to top of ANTLR's tree. Should be only child if we ignore WS, Comments t = Trees.getChildren(t)[0]; } PsiElement dummyRoot = new DummyRoot(t); // a dummy parent of t so we can initialize the work list Collection<PsiElement> work = Collections.singleton(dummyRoot); int i = 0; while ( i < elements.length ) { Collection<PsiElement> next = new LinkedHashSet<>(); for (PsiElement node : work) { if ( node.getChildren().length>0 ) { // only try to match next element if it has children // e.g., //func/*/stat might have a token node for which // we can't go looking for stat nodes. Collection<? extends PsiElement> matching = elements[i].evaluate(node); next.addAll(matching); } } i++; work = next; } return work; } }