/*
 *    Copyright (c) Sematext International
 *    All Rights Reserved
 *
 *    THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF Sematext International
 *    The copyright notice above does not evidence any
 *    actual or intended publication of such source code.
 */
package com.sematext.solr.handler.component.relaxer.query;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.DisMaxQParser;
import org.apache.solr.search.SyntaxError;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;

/**
 * 
 * {@link QueryConverter} that converts query to a list of {@link Clause}s.
 * 
 * @author sematext, http://www.sematext.com/
 */
public class EdismaxQueryConverter extends QueryConverter {
  private static final Logger LOG = LoggerFactory.getLogger(EdismaxQueryConverter.class);

  @Override
  public List<Clause> convert(String query, SolrQueryRequest req) {
    try {
      Map<String, Float> queryFields = DisMaxQParser.parseQueryFields(req.getSchema(), req.getParams());
      return splitIntoClauses(query, queryFields.keySet(), false);
    } catch (SyntaxError e) {
      throw new RuntimeException();
    }
  }

  private List<Clause> splitIntoClauses(String s, Set<String> defaultFields, boolean ignoreQuote) {
    ArrayList<Clause> lst = new ArrayList<Clause>(4);
    Clause clause;

    int pos = 0;
    int end = s.length();
    char ch = 0;
    int start;
    boolean disallowUserField;
    while (pos < end) {
      clause = new Clause();
      disallowUserField = true;

      ch = s.charAt(pos);

      while (Character.isWhitespace(ch)) {
        if (++pos >= end)
          break;
        ch = s.charAt(pos);
      }

      start = pos;

      if (ch == '+' || ch == '-') {
        clause.must = ch;
        pos++;
      }

      clause.field = getFieldName(s, pos, end);
      if (clause.field != null) {
        disallowUserField = false;
        int colon = s.indexOf(':', pos);
        clause.rawField = s.substring(pos, colon);
        pos += colon - pos; // skip the field name
        pos++; // skip the ':'
      }

      if (pos >= end)
        break;

      char inString = 0;

      ch = s.charAt(pos);
      if (!ignoreQuote && ch == '"') {
        clause.isPhrase = true;
        inString = '"';
        pos++;
      }

      StringBuilder sb = new StringBuilder();
      while (pos < end) {
        ch = s.charAt(pos++);
        if (ch == '\\') { // skip escaped chars, but leave escaped
          sb.append(ch);
          if (pos >= end) {
            sb.append(ch); // double backslash if we are at the end of the string
            break;
          }
          ch = s.charAt(pos++);
          sb.append(ch);
          continue;
        } else if (inString != 0 && ch == inString) {
          inString = 0;
          break;
        } else if (Character.isWhitespace(ch)) {
          clause.hasWhitespace = true;
          if (inString == 0) {
            // end of the token if we aren't in a string, backing
            // up the position.
            pos--;
            break;
          }
        }

        if (inString == 0) {
          switch (ch) {
            case '!':
            case '(':
            case ')':
            case ':':
            case '^':
            case '[':
            case ']':
            case '{':
            case '}':
            case '~':
            case '*':
            case '?':
            case '"':
            case '+':
            case '-':
            case '\\':
            case '|':
            case '&':
            case '/':
              clause.hasSpecialSyntax = true;
              sb.append('\\');
          }
        } else if (ch == '"') {
          // only char we need to escape in a string is double quote
          sb.append('\\');
        }
        sb.append(ch);
      }

      clause.val = sb.toString();
      Analyzer analyzer = findField(clause.field, defaultFields, getFieldAnalyzerMaps());
      if (analyzer != null) {
        try {
          clause.tokens = analyze(sb.toString(), analyzer);
        } catch (IOException e) {
          LOG.warn("Analysis text:" + sb.toString() + ": failed by " + analyzer.toString(), e);
        }
      }

      if (clause.isPhrase) {
        if (inString != 0) {
          // detected bad quote balancing... retry
          // parsing with quotes like any other char
          return splitIntoClauses(s, defaultFields, true);
        }

        // special syntax in a string isn't special
        clause.hasSpecialSyntax = false;
      } else {
        // an empty clause... must be just a + or - on it's own
        if (clause.val.length() == 0) {
          clause.syntaxError = true;
          if (clause.must != 0) {
            clause.val = "\\" + clause.must;
            clause.must = 0;
            clause.hasSpecialSyntax = true;
          } else {
            // uh.. this shouldn't happen.
            clause = null;
          }
        }
      }

      if (clause != null) {
        if (disallowUserField) {
          clause.raw = s.substring(start, pos);
          // escape colons, except for "match all" query
          if (!"*:*".equals(clause.raw)) {
            clause.raw = clause.raw.replaceAll(":", "\\\\:");
          }
        } else {
          clause.raw = s.substring(start, pos);
        }
        lst.add(clause);
      }
    }

    return lst;
  }

  private Analyzer findField(String field, Set<String> defaultFields, Map<Pattern, Analyzer> fieldAnalyzerMaps) {
    if (fieldAnalyzerMaps != null && !fieldAnalyzerMaps.isEmpty()) {
      if (field == null) {
        if (defaultFields != null) {
          for (String defaultField : defaultFields) {
            Analyzer analyzer = findAnalyzer(defaultField, fieldAnalyzerMaps);
            if (analyzer != null) {
              return analyzer;
            }
          }
        }
      } else {
        return findAnalyzer(field, fieldAnalyzerMaps);
      }

    }

    return null;
  }

  private Analyzer findAnalyzer(String field, Map<Pattern, Analyzer> fieldAnalyzerMaps) {
    for (Pattern pattern : fieldAnalyzerMaps.keySet()) {
      if (pattern.matcher(field).find()) {
        return fieldAnalyzerMaps.get(pattern);
      }
    }
    return null;
  }

  /**
   * returns a field name from the current position of the string
   */
  private String getFieldName(String s, int pos, int end) {
    if (pos >= end)
      return null;
    int p = pos;
    int colon = s.indexOf(':', pos);
    // make sure there is space after the colon, but not whitespace
    if (colon <= pos || colon + 1 >= end || Character.isWhitespace(s.charAt(colon + 1)))
      return null;
    char ch = s.charAt(p++);
    while ((ch == '(' || ch == '+' || ch == '-') && (pos < end)) {
      ch = s.charAt(p++);
      pos++;
    }
    if (!Character.isJavaIdentifierPart(ch))
      return null;
    while (p < colon) {
      ch = s.charAt(p++);
      if (!(Character.isJavaIdentifierPart(ch) || ch == '-' || ch == '.'))
        return null;
    }
    String fname = s.substring(pos, p);

    return fname;
  }

  protected String[] analyze(String text, Analyzer analyzer) throws IOException {
    List<String> result = new ArrayList<String>();
    TokenStream stream = analyzer.tokenStream("", new StringReader(text));
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
    stream.reset();
    while (stream.incrementToken()) {
      result.add(new String(termAtt.buffer(), 0, termAtt.length()));
    }
    stream.end();
    stream.close();

    return result.toArray(new String[result.size()]);
  }
}