package com.zongtui.webcrawler.sourceer.xsoup.xevaluator;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jsoup.helper.Validate;
import org.jsoup.select.Evaluator;
import org.jsoup.select.Selector;

/**
 * Parser of XPath.
 *
 * @author [email protected]
 */
public class XPathParser {

    private static final String[] COMBINATORS = new String[]{"//", "/", "|"};

    private static final String[] ESCAPED_QUOTES = new String[]{"\\\"", "\\'"};

    private static final String[] QUOTES = new String[]{"\"", "'"};

    private static final String[] HIERARCHY_COMBINATORS = new String[]{"//", "/", "|"};

    private static final Map<String, FunctionEvaluator> FUNCTION_MAPPING = new HashMap<String, FunctionEvaluator>();
    static {
        FUNCTION_MAPPING.put("contains", new FunctionEvaluator() {
            @Override
            public Evaluator call(String... param) {
                Validate.isTrue(param.length == 2, String.format("Error argument of %s", "contains"));
                return new Evaluator.AttributeWithValueContaining(param[0], param[1]);
            }
        });
        FUNCTION_MAPPING.put("starts-with", new FunctionEvaluator() {
            @Override
            public Evaluator call(String... param) {
                Validate.isTrue(param.length == 2, String.format("Error argument of %s", "starts-with"));
                return new Evaluator.AttributeWithValueStarting(param[0], param[1]);
            }
        });
        FUNCTION_MAPPING.put("ends-with", new FunctionEvaluator() {
            @Override
            public Evaluator call(String... param) {
                Validate.isTrue(param.length == 2, String.format("Error argument of %s", "ends-with"));
                return new Evaluator.AttributeWithValueEnding(param[0], param[1]);
            }
        });
    }

    private static final String OR_COMBINATOR = "|";

    private XTokenQueue tq;
    private String query;
    private List<Evaluator> evals = new ArrayList<Evaluator>();
    private ElementOperator elementOperator;
    private boolean noEvalAllow = false;

    public XPathParser(String xpathStr) {
        this.query = xpathStr;
        this.tq = new XTokenQueue(xpathStr);
    }

    public XPathEvaluator parse() {

        while (!tq.isEmpty()) {
            Validate.isFalse(noEvalAllow, "XPath error! No operator allowed after attribute or function!" + tq);
            if (tq.matchChomp(OR_COMBINATOR)) {
                tq.consumeWhitespace();
                return combineXPathEvaluator(tq.remainder());
            } else if (tq.matchesAny(HIERARCHY_COMBINATORS)) {
                combinator(tq.consumeAny(HIERARCHY_COMBINATORS));
            } else {
                findElements();
            }
            tq.consumeWhitespace();
        }
        return collectXPathEvaluator();
    }

    private XPathEvaluator combineXPathEvaluator(String subQuery) {
        XPathEvaluator xPathEvaluator = collectXPathEvaluator();
        return new CombingXPathEvaluator(xPathEvaluator, parse(subQuery));
    }

    private XPathEvaluator collectXPathEvaluator() {
        if (noEvalAllow) {
            return new DefaultXPathEvaluator(null, elementOperator);
        }

        if (evals.size() == 1)
            return new DefaultXPathEvaluator(evals.get(0), elementOperator);

        return new DefaultXPathEvaluator(new CombiningEvaluator.And(evals), elementOperator);
    }

    private void combinator(String combinator) {
        Evaluator currentEval;
        if (evals.size() == 0) {
            currentEval = new StructuralEvaluator.Root();
        } else if (evals.size() == 1) {
            currentEval = evals.get(0);
        } else {
            currentEval = new CombiningEvaluator.And(evals);
        }
        evals.clear();
        String subQuery = consumeSubQuery();
        XPathEvaluator tmpEval = parse(subQuery);
        if (!(tmpEval instanceof DefaultXPathEvaluator)) {
            throw new IllegalArgumentException(String.format("Error XPath in %s", subQuery));
        }
        DefaultXPathEvaluator newEval = (DefaultXPathEvaluator) tmpEval;
        if (newEval.getElementOperator() != null) {
            elementOperator = newEval.getElementOperator();
        }
        // attribute expr does not return Evaluator
        if (newEval.getEvaluator() != null) {
            if (combinator.equals("//")) {
                currentEval = new CombiningEvaluator.And(newEval.getEvaluator(), new StructuralEvaluator.Parent(currentEval));
            } else if (combinator.equals("/")) {
                currentEval = new CombiningEvaluator.And(newEval.getEvaluator(), new StructuralEvaluator.ImmediateParent(currentEval));
            }
        }
        evals.add(currentEval);

    }

    private String consumeSubQuery() {
        StringBuilder sq = new StringBuilder();
        while (!tq.isEmpty()) {
            tq.consumeWhitespace();
            if (tq.matches("("))
                sq.append("(").append(tq.chompBalanced('(', ')')).append(")");
            else if (tq.matches("["))
                sq.append("[").append(tq.chompBalanced('[', ']')).append("]");
            else if (tq.matchesAny(ESCAPED_QUOTES))
                sq.append(tq.consumeAny(ESCAPED_QUOTES));
            else if (tq.matchesAny(QUOTES))
                sq.append(tq.chompBalancedQuotes());
            else if (tq.matchesAny(COMBINATORS))
                break;
            else if (!tq.isEmpty()) {
                sq.append(tq.consume());
            }
        }
        return sq.toString();
    }

    private void findElements() {
        if (tq.matches("@")) {
            consumeAttribute();
        } else if (tq.matches("*")) {
            allElements();
        } else if (tq.matchesRegex("\\w+\\(.*\\).*")) {
            consumeOperatorFunction();
        } else if (tq.matchesWord()) {
            byTag();
        } else if (tq.matchesRegex("\\[\\d+\\]")) {
            byNth();
        } else if (tq.matches("[")) {
            evals.add(consumePredicates(tq.chompBalanced('[', ']')));
        } else {
            // unhandled
            throw new Selector.SelectorParseException("Could not parse query '%s': unexpected token at '%s'", query, tq.remainder());
        }

    }

    /**
     * EvaluatorStack for logic calculate.
     * Priority: AND > OR, Regardless of bracket.
     * <p/>
     * Calculate AND immediately.
     * Store evaluator with OR, until there are two evaluator in stack, then calculate it.
     */
    static class EvaluatorStack extends Stack<Evaluator> {

        public void calc(Evaluator evaluator, Operation operation) {
            if (size() == 0) {
                push(evaluator);
            } else {
                if (operation == Operation.AND) {
                    evaluator = new CombiningEvaluator.And(pop(), evaluator);
                } else {
                    mergeOr();
                }
                push(evaluator);
            }
        }

        public void mergeOr() {
            if (size() >= 2) {
                Evaluator pop1 = pop();
                Evaluator pop2 = pop();
                Evaluator tempEvaluator = new CombiningEvaluator.Or(pop2, pop1);
                push(tempEvaluator);
            }
        }
    }

    interface FunctionEvaluator {
        Evaluator call(String... param);
    }

    enum Operation {
        AND, OR;
    }

    private Evaluator consumePredicates(String queue) {
        XTokenQueue predicatesQueue = new XTokenQueue(queue);
        EvaluatorStack evaluatorStack = new EvaluatorStack();
        Operation currentOperation = null;
        predicatesQueue.consumeWhitespace();
        while (!predicatesQueue.isEmpty()) {
            if (predicatesQueue.matchChomp("and")) {
                currentOperation = Operation.AND;
            } else if (predicatesQueue.matchChomp("or")) {
                currentOperation = Operation.OR;
            } else {
                if (currentOperation == null && evaluatorStack.size() > 0) {
                    throw new IllegalArgumentException(String.format("Need AND/OR between two predicate! %s", predicatesQueue.remainder()));
                }
                Evaluator evaluator;
                if (predicatesQueue.matches("(")) {
                    evaluator = consumePredicates(predicatesQueue.chompBalanced('(', ')'));
                } else if (predicatesQueue.matches("@")) {
                    evaluator = byAttribute(predicatesQueue);
                } else if (predicatesQueue.matchesRegex("\\w+.*")) {
                    evaluator = byFunction(predicatesQueue);
                } else {
                    throw new Selector.SelectorParseException("Could not parse query '%s': unexpected token at '%s'", query, predicatesQueue.remainder());
                }
                evaluatorStack.calc(evaluator, currentOperation);
                //consume operator
                currentOperation = null;
            }
            predicatesQueue.consumeWhitespace();
        }
        evaluatorStack.mergeOr();
        return evaluatorStack.peek();
    }

    private Evaluator byFunction(XTokenQueue predicatesQueue) {
        for (Map.Entry<String, FunctionEvaluator> entry : FUNCTION_MAPPING.entrySet()) {
            if (predicatesQueue.matchChomp(entry.getKey())) {
                String paramString = predicatesQueue.chompBalanced('(', ')');
                List<String> params = XTokenQueue.trimQuotes(XTokenQueue.parseFuncionParams(paramString));

                if (params.get(0).startsWith("@")) {
                    params.set(0, params.get(0).substring(1));
                    return entry.getValue().call(params.toArray(new String[0]));
                } else {
                    return null;
                }
            }
        }

        throw new Selector.SelectorParseException("Could not parse query '%s': unexpected token at '%s'", query, predicatesQueue.remainder());
    }

    private void allElements() {
        tq.consume();
        evals.add(new Evaluator.AllElements());
    }

    private void byNth() {
        String nth = tq.chompBalanced('[', ']');
        evals.add(new XEvaluators.IsNthOfType(0, Integer.parseInt(nth)));
    }

    private void consumeAttribute() {
        tq.consume("@");
        elementOperator = new ElementOperator.AttributeGetter(tq.remainder());
        noEvalAllow = true;
    }

    private Pattern patternForText = Pattern.compile("text\\((\\d*)\\)");

    private void consumeOperatorFunction() {
        String remainder = consumeSubQuery();
        if (remainder.startsWith("text(")) {
            functionText(remainder);
        } else if (remainder.startsWith("regex(")) {
            functionRegex(remainder);
        } else if (remainder.equals("allText()")) {
            elementOperator = new ElementOperator.AllText();
        } else if (remainder.equals("tidyText()")) {
            elementOperator = new ElementOperator.TidyText();
        } else if (remainder.equals("html()")) {
            elementOperator = new ElementOperator.Html();
        } else if (remainder.equals("outerHtml()")) {
            elementOperator = new ElementOperator.OuterHtml();
        } else {
            throw new IllegalArgumentException("Unsupported function " + remainder);
        }
        if (elementOperator != null) {
            noEvalAllow = true;
        }
    }

    private void functionRegex(String remainder) {
        Validate.isTrue(remainder.endsWith(")"), "Unclosed bracket for function! " + remainder);
        List<String> params = XTokenQueue.trimQuotes(XTokenQueue.parseFuncionParams(remainder.substring("regex(".length(), remainder.length() - 1)));
        if (params.size() == 1) {
            elementOperator = new ElementOperator.Regex(params.get(0));
        } else if (params.size() == 2) {
            if (params.get(0).startsWith("@")) {
                elementOperator = new ElementOperator.Regex(params.get(1), params.get(0).substring(1));
            } else {
                elementOperator = new ElementOperator.Regex(params.get(0), null, Integer.parseInt(params.get(1)));
            }
        } else if (params.size() == 3) {
            elementOperator = new ElementOperator.Regex(params.get(1), params.get(0).substring(1), Integer.parseInt(params.get(2)));
        } else {
            throw new Selector.SelectorParseException("Unknown usage for regex()" + remainder);
        }
    }

    private void functionText(String remainder) {
        Matcher matcher = patternForText.matcher(remainder);
        if (matcher.matches()) {
            int attributeGroup;
            String group = matcher.group(1);
            if (group.equals("")) {
                attributeGroup = 0;
            } else {
                attributeGroup = Integer.parseInt(group);
            }
            elementOperator = new ElementOperator.GroupedText(attributeGroup);
        }
    }

    private void byTag() {
        String tagName = tq.consumeElementSelector();
        Validate.notEmpty(tagName);

        // namespaces: if element name is "abc:def", selector must be "abc|def", so flip:
        if (tagName.contains("|"))
            tagName = tagName.replace("|", ":");

        evals.add(new Evaluator.Tag(tagName.trim().toLowerCase()));
    }

    private Evaluator byAttribute(XTokenQueue cq) {
        cq.matchChomp("@");
        String key = cq.consumeToAny("=", "!=", "^=", "$=", "*=", "~="); // eq, not, start, end, contain, match, (no val)
        Validate.notEmpty(key);
        cq.consumeWhitespace();
        Evaluator evaluator;
        if (cq.isEmpty()) {
            if ("*".equals(key)) {
                evaluator = new XEvaluators.HasAnyAttribute();
            } else {
                evaluator = new Evaluator.Attribute(key);
            }
        } else {
            if (cq.matchChomp("=")) {
                String value = chompEqualValue(cq);
                //to support select one class out of all
                if (key.equals("class")) {
                    String className = XTokenQueue.trimQuotes(value);
                    if (!className.contains(" ")) {
                        evaluator = new Evaluator.Class(className);
                    } else {
                        evaluator = new Evaluator.AttributeWithValue(key, className);
                    }
                } else {
                    evaluator = new Evaluator.AttributeWithValue(key, XTokenQueue.trimQuotes(value));
                }
            } else if (cq.matchChomp("!="))
                evaluator = new Evaluator.AttributeWithValueNot(key, XTokenQueue.trimQuotes(chompEqualValue(cq)));

            else if (cq.matchChomp("^="))
                evaluator = new Evaluator.AttributeWithValueStarting(key, XTokenQueue.trimQuotes(chompEqualValue(cq)));

            else if (cq.matchChomp("$="))
                evaluator = new Evaluator.AttributeWithValueEnding(key, XTokenQueue.trimQuotes(chompEqualValue(cq)));

            else if (cq.matchChomp("*="))
                evaluator = new Evaluator.AttributeWithValueContaining(key, XTokenQueue.trimQuotes(chompEqualValue(cq)));

            else if (cq.matchChomp("~="))
                evaluator = new Evaluator.AttributeWithValueMatching(key, Pattern.compile(XTokenQueue.trimQuotes(chompEqualValue(cq))));
            else
                throw new Selector.SelectorParseException("Could not parse attribute query '%s': unexpected token at '%s'", query, chompEqualValue(cq));
        }
        return evaluator;
    }

    private String chompEqualValue(XTokenQueue cq) {
        String value;
        if (cq.matchChomp("'")) {
            value = cq.chompTo("'");
        } else if (cq.matchChomp("\"")) {
            value = cq.chompTo("\"");
        } else if (cq.containsAny(" ")) {
            value = cq.chompTo(" ");
        } else {
            value = cq.remainder();
        }
        return value;
    }

    public static XPathEvaluator parse(String xpathStr) {
        XPathParser xPathParser = new XPathParser(xpathStr);
        return xPathParser.parse();
    }

}