java source code of ANTLRParseTreeToPSIConverter

package org.antlr.intellij.adaptor.parser;

import com.intellij.lang.Language;
import com.intellij.lang.PsiBuilder;
import com.intellij.openapi.progress.ProgressIndicatorProvider;
import org.antlr.intellij.adaptor.lexer.PSIElementTypeFactory;
import org.antlr.intellij.adaptor.lexer.RuleIElementType;
import org.antlr.intellij.adaptor.lexer.TokenIElementType;
import org.antlr.v4.runtime.ANTLRErrorListener;
import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.tree.ErrorNode;
import org.antlr.v4.runtime.tree.ParseTreeListener;
import org.antlr.v4.runtime.tree.TerminalNode;

import java.util.ArrayDeque;
import java.util.Deque;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/** This is how we build an intellij PSI tree from an ANTLR parse tree.
 *  We let the ANTLR parser build its kind of ParseTree and then
 *  we convert to a PSI tree in one go using a standard ANTLR ParseTreeListener.
 *
 *  The list of SyntaxError objects are pulled from the parser and used
 *  for error message highlighting (error nodes don't have the info).
 */
public class ANTLRParseTreeToPSIConverter implements ParseTreeListener {
	protected final Language language;
	protected final PsiBuilder builder;
	protected List<SyntaxError> syntaxErrors;
	protected final Deque<PsiBuilder.Marker> markers = new ArrayDeque<PsiBuilder.Marker>();

	protected final List<TokenIElementType> tokenElementTypes;
	protected final List<RuleIElementType> ruleElementTypes;

	/** Map an error's start char index (usually start of a token) to the error object. */
	protected Map<Integer, SyntaxError> tokenToErrorMap = new HashMap<>();

	public ANTLRParseTreeToPSIConverter(Language language, Parser parser, PsiBuilder builder) {
		this.language = language;
		this.builder = builder;

		this.tokenElementTypes = PSIElementTypeFactory.getTokenIElementTypes(language);
		this.ruleElementTypes = PSIElementTypeFactory.getRuleIElementTypes(language);

		for (ANTLRErrorListener listener : parser.getErrorListeners()) {
			if (listener instanceof SyntaxErrorListener) {
				syntaxErrors = ((SyntaxErrorListener)listener).getSyntaxErrors();
				for (SyntaxError error : syntaxErrors) {
					// record first error per token
					int StartIndex = error.getOffendingSymbol().getStartIndex();
					if ( !tokenToErrorMap.containsKey(StartIndex) ) {
						tokenToErrorMap.put(StartIndex, error);
					}
				}
			}
		}
	}

	protected final Language getLanguage() {
		return language;
	}

	protected final PsiBuilder getBuilder() {
		return builder;
	}

	protected final Deque<PsiBuilder.Marker> getMarkers() {
		return markers;
	}

	protected final List<TokenIElementType> getTokenElementTypes() {
		return tokenElementTypes;
	}

	protected final List<RuleIElementType> getRuleElementTypes() {
		return ruleElementTypes;
	}

	@Override
	public void visitTerminal(TerminalNode node) {
		builder.advanceLexer();
	}

	/** Summary. For any syntax error thrown by the parser, there will be an
	 *  ErrorNode in the parse tree and this method will process it.
	 *  All errors correspond to actual tokens in the input except for
	 *  missing token errors.
	 *
	 *  There are there are multiple error situations to consider:
	 *
	 *  1. Extraneous token. The parse tree will have an ErrorNode for token.
	 *
	 *  2. Token mismatch. The parse tree will have an ErrorNode for token.
	 *
	 *  3. Missing token. The parse tree will have an ErrorNode but
	 *     it does not correspond to any bit of the input. We underline
	 *     the current token.
	 *
	 *  4. NoViableAlt (input inconsistent with any rule alt).
	 *     The parse tree will have an ErrorNode for token.
	 *
	 *  5. Tokens consumed to resync the parser during recovery.
	 *     The parse tree will have an ErrorNode for each token.
	 *
	 *  This is complicated by errors that occur at EOF but I have
	 *  modified error strategy to add error nodes for EOF if needed.
	 *
	 *  Another complication. During prediction, we might match n
	 *  tokens and then fail on the n+1 token, leading to NoViableAltException.
	 *  But, it's offending token is at n+1 not current token where
	 *  prediction started (which we use to find syntax errors). So,
	 *  SyntaxError objects return start not offending token in this case.
	 */
	public void visitErrorNode(ErrorNode node) {
		ProgressIndicatorProvider.checkCanceled();

		Token badToken = node.getSymbol();
		boolean isConjuredToken = badToken.getTokenIndex()<0;
		int nodeStartIndex = badToken.getStartIndex();
		SyntaxError error = tokenToErrorMap.get(nodeStartIndex);

		if ( error!=null ) {
			PsiBuilder.Marker errorMarker = builder.mark();
			if ( badToken.getStartIndex()>=0 &&
				 badToken.getType()!=Token.EOF &&
				 !isConjuredToken )
			{
				// we advance lexer if error occurred at a real token
				// Missing tokens should highlight the token at the missing position
				// but can't consume a token that does not exist.
				builder.advanceLexer();
			}
			String message = String.format("%s%n", error.getMessage());
			errorMarker.error(message);
		}
		else {
			if ( isConjuredToken ) {
				PsiBuilder.Marker errorMarker = builder.mark();
				errorMarker.error(badToken.getText()); // says "<missing X>" or similar
			}
			else {
				// must be a real token consumed during recovery; just consume w/o highlighting it as an error
				builder.advanceLexer();
			}
		}
	}

	@Override
	public void enterEveryRule(ParserRuleContext ctx) {
		ProgressIndicatorProvider.checkCanceled();
		markers.push(getBuilder().mark());
	}

	@Override
	public void exitEveryRule(ParserRuleContext ctx) {
		ProgressIndicatorProvider.checkCanceled();
		PsiBuilder.Marker marker = markers.pop();
		marker.done(getRuleElementTypes().get(ctx.getRuleIndex()));
	}
}