/*
 * [The "BSD license"]
 *  Copyright (c) 2011 Terence Parr
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *  1. Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *  3. The name of the author may not be used to endorse or promote products
 *     derived from this software without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.stringtemplate.v4.compiler;

import org.antlr.runtime.CharStream;
import org.antlr.runtime.CommonToken;
import org.antlr.runtime.MismatchedTokenException;
import org.antlr.runtime.NoViableAltException;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.Token;
import org.antlr.runtime.TokenSource;
import org.stringtemplate.v4.STGroup;
import org.stringtemplate.v4.misc.ErrorManager;
import org.stringtemplate.v4.misc.Misc;
import java.util.ArrayList;
import java.util.List;

/**
 * This class represents the tokenizer for templates. It operates in two modes:
 * inside and outside of expressions. It implements the {@link TokenSource}
 * interface so it can be used with ANTLR parsers. Outside of expressions, we
 * can return these token types: {@link #TEXT}, {@link #INDENT}, {@link #LDELIM}
 * (start of expression), {@link #RCURLY} (end of subtemplate), and
 * {@link #NEWLINE}. Inside of an expression, this lexer returns all of the
 * tokens needed by {@link STParser}. From the parser's point of view, it can
 * treat a template as a simple stream of elements.
 * <p>
 * This class defines the token types and communicates these values to
 * {@code STParser.g} via {@code STLexer.tokens} file (which must remain
 * consistent).</p>
 */


public class STLexer implements TokenSource {
    public static final char EOF = (char) -1;            // EOF char
    public static final int EOF_TYPE = CharStream.EOF;  // EOF token type

    /** We build {@code STToken} tokens instead of relying on {@link CommonToken}
     *  so we can override {@link #toString()}. It just converts token types to
     *  token names like 23 to {@code "LDELIM"}.
     */
    public static class STToken extends CommonToken {
        public STToken(CharStream input, int type, int start, int stop) {
            super(input, type, DEFAULT_CHANNEL, start, stop);
        }
        public STToken(int type, String text) {
            super(type, text);
        }

        @Override
        public String toString() {
            String channelStr = "";
            if ( channel>0) {
                channelStr = ",channel="+channel;
            }
            String txt = getText();
            if ( txt!=null ) txt = Misc.replaceEscapes(txt);
            else txt = "<no text>";
            String tokenName;
            if ( type==EOF_TYPE ) tokenName = "EOF";
            else tokenName = STParser.tokenNames[type];
            return "[@"+getTokenIndex()+","+start+":"+stop+"='"+txt+"',<"+tokenName+">"+channelStr+","+line+":"+getCharPositionInLine()+"]";
        }
    }

    public static final Token SKIP = new STToken(-1, "<skip>");

    // must follow STLexer.tokens file that STParser.g loads
    public static final int RBRACK = 17;
    public static final int LBRACK = 16;
    public static final int ELSE = 5;
    public static final int ELLIPSIS = 11;
    public static final int LCURLY = 20;
    public static final int BANG = 10;
    public static final int EQUALS = 12;
    public static final int TEXT = 22;
    public static final int ID = 25;
    public static final int SEMI = 9;
    public static final int LPAREN = 14;
    public static final int IF = 4;
    public static final int ELSEIF = 6;
    public static final int COLON = 13;
    public static final int RPAREN = 15;
    public static final int COMMA = 18;
    public static final int RCURLY = 21;
    public static final int ENDIF = 7;
    public static final int RDELIM = 24;
    public static final int SUPER = 8;
    public static final int DOT = 19;
    public static final int LDELIM = 23;
    public static final int STRING = 26;
    public static final int PIPE = 28;
    public static final int OR = 29;
    public static final int AND = 30;
    public static final int INDENT = 31;
    public static final int NEWLINE = 32;
    public static final int AT = 33;
    public static final int REGION_END = 34;
    public static final int TRUE = 35;
    public static final int FALSE = 36;
    public static final int COMMENT = 37;


    /** The char which delimits the start of an expression. */

    char delimiterStartChar = '<';
    /** The char which delimits the end of an expression. */

    char delimiterStopChar = '>';

    /**
     * This keeps track of the current mode of the lexer. Are we inside or
     * outside an ST expression?
     */

    boolean scanningInsideExpr = false;

    /** To be able to properly track the inside/outside mode, we need to
     *  track how deeply nested we are in some templates. Otherwise, we
     *  know whether a <code>'}'</code> and the outermost subtemplate to send this
     *  back to outside mode.
     */
    public int subtemplateDepth = 0; // start out *not* in a {...} subtemplate

    ErrorManager errMgr;

    /** template embedded in a group file? this is the template */

    Token templateToken;

    CharStream input;
    /** current character */

    char c;

    /** When we started token, track initial coordinates so we can properly
     *  build token objects.
     */

    int startCharIndex;

    int startLine;

    int startCharPositionInLine;

    /** Our lexer routines might have to emit more than a single token. We
     *  buffer everything through this list.
     */

    List<Token> tokens = new ArrayList<Token>();

    public STLexer(CharStream input) {
        this(STGroup.DEFAULT_ERR_MGR, input, null, '<', '>');
    }
    public STLexer(ErrorManager errMgr, CharStream input, Token templateToken) {
        this(errMgr, input, templateToken, '<', '>');
    }
    public STLexer(ErrorManager errMgr, CharStream input, Token templateToken, char delimiterStartChar, char delimiterStopChar) {
        this.errMgr = errMgr;
        this.input = input;
        c = (char)input.LA(1); // prime lookahead
        this.templateToken = templateToken;
        this.delimiterStartChar = delimiterStartChar;
        this.delimiterStopChar = delimiterStopChar;
    }

    @Override
    public Token nextToken() {
        Token t;
        if ( tokens.size()>0 ) {
            t = tokens.remove(0);
        }
        else t = _nextToken();
//      System.out.println(t);
        return t;
    }

    /** Consume if {@code x} is next character on the input stream.
     */

    public void match(char x) {
        if ( c!= x ) {
            NoViableAltException e = new NoViableAltException("", 0, 0, input);
            errMgr.lexerError(input.getSourceName(), "expecting '"+x+"', found '"+str(c)+"'", templateToken, e);
        }
        consume();
    }

    protected void consume() {
        input.consume();
        c = (char)input.LA(1);
    }

    public void emit(Token token) {
        tokens.add(token);
    }

    public Token _nextToken() {
        //System.out.println("nextToken: c="+(char)c+"@"+input.index());
        while ( true ) { // lets us avoid recursion when skipping stuff
            startCharIndex = input.index();
            startLine = input.getLine();
            startCharPositionInLine = input.getCharPositionInLine();
            if ( c==EOF ) return newToken(EOF_TYPE);
            Token t;
            if ( scanningInsideExpr ) t = inside();
            else t = outside();
            if ( t!= SKIP ) return t;
        }
    }

    protected Token outside() {
        if ( input.getCharPositionInLine()==0 && (c==' ' || c=='\t') ) {
            while ( c==' ' || c=='\t' ) consume(); // scarf indent
            if ( c!= EOF ) return newToken(INDENT);
            return newToken(TEXT);
        }

        if ( c==delimiterStartChar ) {
            consume();
            if ( c=='!' ) return COMMENT();
            if ( c=='\\' ) return ESCAPE(); // <\\> <\uFFFF> <\n> etc...
            scanningInsideExpr = true;
            return newToken(LDELIM);
        }

        if ( c=='\r' ) {
            consume();
            consume();
            return newToken(NEWLINE);
        } // \r\n -> \n

        if ( c=='\n' ) {
            consume();
            return newToken(NEWLINE);
        }

        if ( c=='}' && subtemplateDepth>0) {
            scanningInsideExpr = true;
            subtemplateDepth--;
            consume();
            return newTokenFromPreviousChar(RCURLY);
        }
        return mTEXT();
    }

    protected Token inside() {
        while ( true ) {
            switch (c) {
                case ' ' :
                case '\t' :
                case '\n' :
                case '\r' :
                    consume();
                    return SKIP;
                case '.' :
                    consume();
                    if ( input.LA(1)=='.' && input.LA(2)=='.' ) {
                        consume();
                        match('.');
                        return newToken(ELLIPSIS);
                    }
                    return newToken(DOT);
                case ',' :
                    consume();
                    return newToken(COMMA);
                case ':' :
                    consume();
                    return newToken(COLON);
                case ';' :
                    consume();
                    return newToken(SEMI);
                case '(' :
                    consume();
                    return newToken(LPAREN);
                case ')' :
                    consume();
                    return newToken(RPAREN);
                case '[' :
                    consume();
                    return newToken(LBRACK);
                case ']' :
                    consume();
                    return newToken(RBRACK);
                case '=' :
                    consume();
                    return newToken(EQUALS);
                case '!' :
                    consume();
                    return newToken(BANG);
                case '@' :
                    consume();
                    if ( c=='e' && input.LA(2)=='n' && input.LA(3)=='d' ) {
                        consume();
                        consume();
                        consume();
                        return newToken(REGION_END);
                    }
                    return newToken(AT);
                case '"' :
                    return mSTRING();
                case '&' :
                    consume();
                    match('&');
                    return newToken(AND); // &&
                case '|' :
                    consume();
                    match('|');
                    return newToken(OR); // ||
                case '{' :
                    return subTemplate();
                default:
                    if ( c==delimiterStopChar ) {
                        consume();
                        scanningInsideExpr = false;
                        return newToken(RDELIM);
                    }

                    if ( isIDStartLetter(c) ) {
                        Token id = mID();
                        String name = id.getText();
                        if ( name.equals("if") ) return newToken(IF);
                        else if ( name.equals("endif") ) return newToken(ENDIF);
                        else if ( name.equals("else") ) return newToken(ELSE);
                        else if ( name.equals("elseif") ) return newToken(ELSEIF);
                        else if ( name.equals("super") ) return newToken(SUPER);
                        else if ( name.equals("true") ) return newToken(TRUE);
                        else if ( name.equals("false") ) return newToken(FALSE);
                        return id;
                    }
                    RecognitionException re = new NoViableAltException("", 0, 0, input);
                    re.line = startLine;
                    re.charPositionInLine = startCharPositionInLine;
                    errMgr.lexerError(input.getSourceName(), "invalid character '"+str(c)+"'", templateToken, re);
                    if ( c==EOF ) {
                        return newToken(EOF_TYPE);
                    }
                    consume();
            }
        }
    }

    Token subTemplate() {
        // look for "{ args ID (',' ID)* '|' ..."
        subtemplateDepth++;
        int m = input.mark();
        int curlyStartChar = startCharIndex;
        int curlyLine = startLine;
        int curlyPos = startCharPositionInLine;
        List<Token> argTokens = new ArrayList<Token>();
        consume();
        Token curly = newTokenFromPreviousChar(LCURLY);
        WS();
        argTokens.add(mID());
        WS();
        while ( c==',' ) {
            consume();
            argTokens.add(newTokenFromPreviousChar(COMMA));
            WS();
            argTokens.add(mID());
            WS();
        }
        WS();
        if ( c=='|' ) {
            consume();
            argTokens.add(newTokenFromPreviousChar(PIPE));
            if ( isWS(c) ) consume(); // ignore a single whitespace after |
            //System.out.println("matched args: "+argTokens);
            for (Token t : argTokens) emit(t);
            input.release(m);
            scanningInsideExpr = false;
            startCharIndex = curlyStartChar; // reset state
            startLine = curlyLine;
            startCharPositionInLine = curlyPos;
            return curly;
        }
        input.rewind(m);
        startCharIndex = curlyStartChar; // reset state
        startLine = curlyLine;
        startCharPositionInLine = curlyPos;
        consume();
        scanningInsideExpr = false;
        return curly;
    }

    Token ESCAPE() {
        startCharIndex = input.index();
        startCharPositionInLine = input.getCharPositionInLine();
        consume(); // kill \\
        if ( c=='u' ) return UNICODE();
        String text;
        switch (c) {
            case '\\' :
                LINEBREAK();
                return SKIP;
            case 'n' :
                text = "\n";
                break;
            case 't' :
                text = "\t";
                break;
            case ' ' :
                text = " ";
                break;
            default:
                NoViableAltException e = new NoViableAltException("", 0, 0, input);
                errMgr.lexerError(input.getSourceName(), "invalid escaped char: '"+str(c)+"'", templateToken, e);
                consume();
                match(delimiterStopChar);
                return SKIP;
        }
        consume();
        Token t = newToken(TEXT, text, input.getCharPositionInLine() -2);
        match(delimiterStopChar);
        return t;
    }

    Token UNICODE() {
        consume();
        char[] chars = new char[4];
        if ( !isUnicodeLetter(c) ) {
            NoViableAltException e = new NoViableAltException("", 0, 0, input);
            errMgr.lexerError(input.getSourceName(), "invalid unicode char: '"+str(c)+"'", templateToken, e);
        }
        chars[0] = c;
        consume();
        if ( !isUnicodeLetter(c) ) {
            NoViableAltException e = new NoViableAltException("", 0, 0, input);
            errMgr.lexerError(input.getSourceName(), "invalid unicode char: '"+str(c)+"'", templateToken, e);
        }
        chars[1] = c;
        consume();
        if ( !isUnicodeLetter(c) ) {
            NoViableAltException e = new NoViableAltException("", 0, 0, input);
            errMgr.lexerError(input.getSourceName(), "invalid unicode char: '"+str(c)+"'", templateToken, e);
        }
        chars[2] = c;
        consume();
        if ( !isUnicodeLetter(c) ) {
            NoViableAltException e = new NoViableAltException("", 0, 0, input);
            errMgr.lexerError(input.getSourceName(), "invalid unicode char: '"+str(c)+"'", templateToken, e);
        }
        chars[3] = c;
        // ESCAPE kills >
        char uc = (char)Integer.parseInt(new String(chars), 16);
        Token t = newToken(TEXT, String.valueOf(uc), input.getCharPositionInLine() -6);
        consume();
        match(delimiterStopChar);
        return t;
    }

    Token mTEXT() {
        boolean modifiedText = false;
        StringBuilder buf = new StringBuilder();
        while ( c!= EOF && c!= delimiterStartChar ) {
            if ( c=='\r' || c=='\n' ) break;
            if ( c=='}' && subtemplateDepth>0) break;
            if ( c=='\\' ) {
                if ( input.LA(2)=='\\' ) { // convert \\ to \
                    consume();
                    consume();
                    buf.append('\\');
                    modifiedText = true;
                    continue;
                }

                if ( input.LA(2)==delimiterStartChar || input.LA(2)=='}' ) {
                    modifiedText = true;
                    consume(); // toss out \ char
                    buf.append(c);
                    consume();
                }
                else {
                    buf.append(c);
                    consume();
                }
                continue;
            }
            buf.append(c);
            consume();
        }

        if ( modifiedText ) return newToken(TEXT, buf.toString());
        else return newToken(TEXT);
    }

    /** <pre>
     *  ID  : ('a'..'z'|'A'..'Z'|'_'|'/')
     *        ('a'..'z'|'A'..'Z'|'0'..'9'|'_'|'/')*
     *      ;
     *  </pre>
     */

    Token mID() {
        // called from subTemplate; so keep resetting position during speculation
        startCharIndex = input.index();
        startLine = input.getLine();
        startCharPositionInLine = input.getCharPositionInLine();
        consume();
        while ( isIDLetter(c) ) {
            consume();
        }
        return newToken(ID);
    }

    /** <pre>
     *  STRING : '"'
     *           (   '\\' '"'
     *           |   '\\' ~'"'
     *           |   ~('\\'|'"')
     *           )*
     *           '"'
     *         ;
     * </pre>
     */

    Token mSTRING() {
        //{setText(getText().substring(1, getText().length()-1));}
        boolean sawEscape = false;
        StringBuilder buf = new StringBuilder();
        buf.append(c);
        consume();
        while ( c!='"' ) {
            if ( c=='\\' ) {
                sawEscape = true;
                consume();
                switch (c) {
                    case 'n' :
                        buf.append('\n');
                        break;
                    case 'r' :
                        buf.append('\r');
                        break;
                    case 't' :
                        buf.append('\t');
                        break;
                    default:
                        buf.append(c);
                        break;
                }
                consume();
                continue;
            }
            buf.append(c);
            consume();
            if ( c==EOF ) {
                RecognitionException re = new MismatchedTokenException((int)'"', input);
                re.line = input.getLine();
                re.charPositionInLine = input.getCharPositionInLine();
                errMgr.lexerError(input.getSourceName(), "EOF in string", templateToken, re);
                break;
            }
        }
        buf.append(c);
        consume();
        if ( sawEscape ) return newToken(STRING, buf.toString());
        else return newToken(STRING);
    }

    void WS() {
        while ( c==' ' || c=='\t' || c=='\n' || c=='\r' ) consume();
    }

    Token COMMENT() {
        match('!');
        while ( !(c=='!' && input.LA(2)==delimiterStopChar) ) {
            if ( c==EOF ) {
                RecognitionException re = new MismatchedTokenException((int)'!', input);
                re.line = input.getLine();
                re.charPositionInLine = input.getCharPositionInLine();
                errMgr.lexerError(input.getSourceName(), "Nonterminated comment starting at "+startLine+":"+startCharPositionInLine+": '!"+delimiterStopChar+"' missing", templateToken, re);
                break;
            }
            consume();
        }
        consume();
        consume(); // grab !>
        return newToken(COMMENT);
    }

    void LINEBREAK() {
        match('\\'); // only kill 2nd \ as ESCAPE() kills first one
        match(delimiterStopChar);
        while ( c==' ' || c=='\t' ) consume(); // scarf WS after <\\>
        if ( c==EOF ) {
            RecognitionException re = new RecognitionException(input);
            re.line = input.getLine();
            re.charPositionInLine = input.getCharPositionInLine();
            errMgr.lexerError(input.getSourceName(), "Missing newline after newline escape <\\\\>", templateToken, re);
            return;
        }

        if ( c=='\r' ) consume();
        match('\n');
        while ( c==' ' || c=='\t' ) consume(); // scarf any indent
    }

    public static boolean isIDStartLetter(char c) {
        return isIDLetter(c);
    }

    public static boolean isIDLetter(char c) {
        return c >='a' && c <='z' || c >='A' && c <='Z' || c >='0' && c <='9' || c=='_' || c=='/';
    }

    public static boolean isWS(char c) {
        return c==' ' || c=='\t' || c=='\n' || c=='\r';
    }

    public static boolean isUnicodeLetter(char c) {
        return c >='a' && c <='f' || c >='A' && c <='F' || c >='0' && c <='9';
    }

    public Token newToken(int ttype) {
        STToken t = new STToken(input, ttype, startCharIndex, input.index() -1);
        t.setLine(startLine);
        t.setCharPositionInLine(startCharPositionInLine);
        return t;
    }

    public Token newTokenFromPreviousChar(int ttype) {
        STToken t = new STToken(input, ttype, input.index() -1, input.index() -1);
        t.setLine(input.getLine());
        t.setCharPositionInLine(input.getCharPositionInLine() -1);
        return t;
    }

    public Token newToken(int ttype, String text, int pos) {
        STToken t = new STToken(ttype, text);
        t.setStartIndex(startCharIndex);
        t.setStopIndex(input.index() -1);
        t.setLine(input.getLine());
        t.setCharPositionInLine(pos);
        return t;
    }

    public Token newToken(int ttype, String text) {
        STToken t = new STToken(ttype, text);
        t.setStartIndex(startCharIndex);
        t.setStopIndex(input.index() -1);
        t.setLine(startLine);
        t.setCharPositionInLine(startCharPositionInLine);
        return t;
    }

//    public String getErrorHeader() {
//        return startLine+":"+startCharPositionInLine;
//    }
//

    @Override
    public String getSourceName() {
        return "no idea";
    }

    public static String str(int c) {
        if ( c==EOF ) return "<EOF>";
        return String.valueOf((char)c);
    }
}