/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.handler;

import java.io.IOException;
import java.io.Reader;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.ExitableDirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.mlt.MoreLikeThis;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.StringUtils;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.MoreLikeThisParams.TermStyle;
import org.apache.solr.common.params.MoreLikeThisParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.handler.component.FacetComponent;
import org.apache.solr.request.SimpleFacets;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.DocListAndSet;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QParserPlugin;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.ReturnFields;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SolrQueryTimeoutImpl;
import org.apache.solr.search.SolrReturnFields;
import org.apache.solr.search.SortSpec;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.util.SolrPluginUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Solr MoreLikeThis --
 * 
 * Return similar documents either based on a single document or based on posted text.
 * 
 * @since solr 1.3
 */
public class MoreLikeThisHandler extends RequestHandlerBase  
{
  // Pattern is thread safe -- TODO? share this with general 'fl' param
  private static final Pattern splitList = Pattern.compile(",| ");

  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

  static final String ERR_MSG_QUERY_OR_TEXT_REQUIRED =
      "MoreLikeThis requires either a query (?q=) or text to find similar documents.";

  static final String ERR_MSG_SINGLE_STREAM_ONLY =
      "MoreLikeThis does not support multiple ContentStreams";

  @Override
  public void init(@SuppressWarnings({"rawtypes"})NamedList args) {
    super.init(args);
  }

  @Override
  public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception 
  {
    SolrParams params = req.getParams();

    long timeAllowed = (long)params.getInt( CommonParams.TIME_ALLOWED, -1 );
    if(timeAllowed > 0) {
      SolrQueryTimeoutImpl.set(timeAllowed);
    }
      try {

        // Set field flags
        ReturnFields returnFields = new SolrReturnFields(req);
        rsp.setReturnFields(returnFields);
        int flags = 0;
        if (returnFields.wantsScore()) {
          flags |= SolrIndexSearcher.GET_SCORES;
        }

        String defType = params.get(QueryParsing.DEFTYPE, QParserPlugin.DEFAULT_QTYPE);
        String q = params.get(CommonParams.Q);
        Query query = null;
        SortSpec sortSpec = null;
        List<Query> filters = null;

        try {
          if (q != null) {
            QParser parser = QParser.getParser(q, defType, req);
            query = parser.getQuery();
            sortSpec = parser.getSortSpec(true);
          }

          String[] fqs = req.getParams().getParams(CommonParams.FQ);
          if (fqs != null && fqs.length != 0) {
            filters = new ArrayList<>();
            for (String fq : fqs) {
              if (fq != null && fq.trim().length() != 0) {
                QParser fqp = QParser.getParser(fq, req);
                filters.add(fqp.getQuery());
              }
            }
          }
        } catch (SyntaxError e) {
          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
        }

        SolrIndexSearcher searcher = req.getSearcher();

        MoreLikeThisHelper mlt = new MoreLikeThisHelper(params, searcher);

        // Hold on to the interesting terms if relevant
        TermStyle termStyle = TermStyle.get(params.get(MoreLikeThisParams.INTERESTING_TERMS));
        List<InterestingTerm> interesting = (termStyle == TermStyle.NONE)
            ? null : new ArrayList<>(mlt.mlt.getMaxQueryTerms());

        DocListAndSet mltDocs = null;

        // Parse Required Params
        // This will either have a single Reader or valid query
        Reader reader = null;
        try {
          if (q == null || q.trim().length() < 1) {
            Iterable<ContentStream> streams = req.getContentStreams();
            if (streams != null) {
              Iterator<ContentStream> iter = streams.iterator();
              if (iter.hasNext()) {
                reader = iter.next().getReader();
              }
              if (iter.hasNext()) {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                    ERR_MSG_SINGLE_STREAM_ONLY);
              }
            }
          }

          int start = params.getInt(CommonParams.START, CommonParams.START_DEFAULT);
          int rows = params.getInt(CommonParams.ROWS, CommonParams.ROWS_DEFAULT);

          // Find documents MoreLikeThis - either with a reader or a query
          // --------------------------------------------------------------------------------
          if (reader != null) {
            mltDocs = mlt.getMoreLikeThis(reader, start, rows, filters,
                interesting, flags);
          } else if (q != null) {
            // Matching options
            boolean includeMatch = params.getBool(MoreLikeThisParams.MATCH_INCLUDE,
                true);
            int matchOffset = params.getInt(MoreLikeThisParams.MATCH_OFFSET, 0);
            // Find the base match
            DocList match = searcher.getDocList(query, null, null, matchOffset, 1,
                flags); // only get the first one...
            if (includeMatch) {
              rsp.add("match", match);
            }

            // This is an iterator, but we only handle the first match
            DocIterator iterator = match.iterator();
            if (iterator.hasNext()) {
              // do a MoreLikeThis query for each document in results
              int id = iterator.nextDoc();
              mltDocs = mlt.getMoreLikeThis(id, start, rows, filters, interesting,
                  flags);
            }
          } else {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                ERR_MSG_QUERY_OR_TEXT_REQUIRED);
          }

        } finally {
          if (reader != null) {
            reader.close();
          }
        }

        if (mltDocs == null) {
          mltDocs = new DocListAndSet(); // avoid NPE
        }
        rsp.addResponse(mltDocs.docList);


        if (interesting != null) {
          if (termStyle == TermStyle.DETAILS) {
            NamedList<Float> it = new NamedList<>();
            for (InterestingTerm t : interesting) {
              it.add(t.term.toString(), t.boost);
            }
            rsp.add("interestingTerms", it);
          } else {
            List<String> it = new ArrayList<>(interesting.size());
            for (InterestingTerm t : interesting) {
              it.add(t.term.text());
            }
            rsp.add("interestingTerms", it);
          }
        }

        // maybe facet the results
        if (params.getBool(FacetParams.FACET, false)) {
          if (mltDocs.docSet == null) {
            rsp.add("facet_counts", null);
          } else {
            SimpleFacets f = new SimpleFacets(req, mltDocs.docSet, params);
            rsp.add("facet_counts", FacetComponent.getFacetCounts(f));
          }
        }
        boolean dbg = req.getParams().getBool(CommonParams.DEBUG_QUERY, false);

        boolean dbgQuery = false, dbgResults = false;
        if (dbg == false) {//if it's true, we are doing everything anyway.
          String[] dbgParams = req.getParams().getParams(CommonParams.DEBUG);
          if (dbgParams != null) {
            for (String dbgParam : dbgParams) {
              if (dbgParam.equals(CommonParams.QUERY)) {
                dbgQuery = true;
              } else if (dbgParam.equals(CommonParams.RESULTS)) {
                dbgResults = true;
              }
            }
          }
        } else {
          dbgQuery = true;
          dbgResults = true;
        }
        // TODO resolve duplicated code with DebugComponent.  Perhaps it should be added to doStandardDebug?
        if (dbg == true) {
          try {
            @SuppressWarnings({"unchecked"})
            NamedList<Object> dbgInfo = SolrPluginUtils.doStandardDebug(req, q, mlt.getRawMLTQuery(), mltDocs.docList, dbgQuery, dbgResults);
            if (null != dbgInfo) {
              if (null != filters) {
                dbgInfo.add("filter_queries", req.getParams().getParams(CommonParams.FQ));
                List<String> fqs = new ArrayList<>(filters.size());
                for (Query fq : filters) {
                  fqs.add(QueryParsing.toString(fq, req.getSchema()));
                }
                dbgInfo.add("parsed_filter_queries", fqs);
              }
              rsp.add("debug", dbgInfo);
            }
          } catch (Exception e) {
            SolrException.log(log, "Exception during debug", e);
            rsp.add("exception_during_debug", SolrException.toStr(e));
          }
        }
      } catch (ExitableDirectoryReader.ExitingReaderException ex) {
        log.warn( "Query: {}; {}", req.getParamString(), ex.getMessage());
      } finally {
        SolrQueryTimeoutImpl.reset();
      }
  }
  
  public static class InterestingTerm
  {
    public Term term;
    public float boost;

  }
  
  /**
   * Helper class for MoreLikeThis that can be called from other request handlers
   */
  public static class MoreLikeThisHelper 
  { 
    final SolrIndexSearcher searcher;
    final MoreLikeThis mlt;
    final IndexReader reader;
    final SchemaField uniqueKeyField;
    final boolean needDocSet;
    Map<String,Float> boostFields;
    
    public MoreLikeThisHelper( SolrParams params, SolrIndexSearcher searcher )
    {
      this.searcher = searcher;
      this.reader = searcher.getIndexReader();
      this.uniqueKeyField = searcher.getSchema().getUniqueKeyField();
      this.needDocSet = params.getBool(FacetParams.FACET,false);
      
      SolrParams required = params.required();
      String[] fl = required.getParams(MoreLikeThisParams.SIMILARITY_FIELDS);
      List<String> list = new ArrayList<>();
      for (String f : fl) {
        if (!StringUtils.isEmpty(f))  {
          String[] strings = splitList.split(f);
          for (String string : strings) {
            if (!StringUtils.isEmpty(string)) {
              list.add(string);
            }
          }
        }
      }
      String[] fields = list.toArray(new String[list.size()]);
      if( fields.length < 1 ) {
        throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, 
            "MoreLikeThis requires at least one similarity field: "+MoreLikeThisParams.SIMILARITY_FIELDS );
      }
      
      this.mlt = new MoreLikeThis( reader ); // TODO -- after LUCENE-896, we can use , searcher.getSimilarity() );
      mlt.setFieldNames(fields);
      mlt.setAnalyzer( searcher.getSchema().getIndexAnalyzer() );
      
      // configurable params
      
      mlt.setMinTermFreq(       params.getInt(MoreLikeThisParams.MIN_TERM_FREQ,         MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
      mlt.setMinDocFreq(        params.getInt(MoreLikeThisParams.MIN_DOC_FREQ,          MoreLikeThis.DEFAULT_MIN_DOC_FREQ));
      mlt.setMaxDocFreq(        params.getInt(MoreLikeThisParams.MAX_DOC_FREQ,          MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
      mlt.setMinWordLen(        params.getInt(MoreLikeThisParams.MIN_WORD_LEN,          MoreLikeThis.DEFAULT_MIN_WORD_LENGTH));
      mlt.setMaxWordLen(        params.getInt(MoreLikeThisParams.MAX_WORD_LEN,          MoreLikeThis.DEFAULT_MAX_WORD_LENGTH));
      mlt.setMaxQueryTerms(     params.getInt(MoreLikeThisParams.MAX_QUERY_TERMS,       MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
      mlt.setMaxNumTokensParsed(params.getInt(MoreLikeThisParams.MAX_NUM_TOKENS_PARSED, MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
      mlt.setBoost(            params.getBool(MoreLikeThisParams.BOOST, false ) );
      
      // There is no default for maxDocFreqPct. Also, it's a bit oddly expressed as an integer value 
      // (percentage of the collection's documents count). We keep Lucene's convention here. 
      if (params.getInt(MoreLikeThisParams.MAX_DOC_FREQ_PCT) != null) {
        mlt.setMaxDocFreqPct(params.getInt(MoreLikeThisParams.MAX_DOC_FREQ_PCT));
      }

      boostFields = SolrPluginUtils.parseFieldBoosts(params.getParams(MoreLikeThisParams.QF));
    }
    
    private Query rawMLTQuery;
    private Query boostedMLTQuery;
    private BooleanQuery realMLTQuery;
    
    public Query getRawMLTQuery(){
      return rawMLTQuery;
    }
    
    public Query getBoostedMLTQuery(){
      return boostedMLTQuery;
    }
    
    public Query getRealMLTQuery(){
      return realMLTQuery;
    }
    
    private Query getBoostedQuery(Query mltquery) {
      BooleanQuery boostedQuery = (BooleanQuery)mltquery;
      if (boostFields.size() > 0) {
        BooleanQuery.Builder newQ = new BooleanQuery.Builder();
        newQ.setMinimumNumberShouldMatch(boostedQuery.getMinimumNumberShouldMatch());
        for (BooleanClause clause : boostedQuery) {
          Query q = clause.getQuery();
          float originalBoost = 1f;
          if (q instanceof BoostQuery) {
            BoostQuery bq = (BoostQuery) q;
            q = bq.getQuery();
            originalBoost = bq.getBoost();
          }
          Float fieldBoost = boostFields.get(((TermQuery) q).getTerm().field());
          q = ((fieldBoost != null) ? new BoostQuery(q, fieldBoost * originalBoost) : clause.getQuery());
          newQ.add(q, clause.getOccur());
        }
        boostedQuery = newQ.build();
      }
      return boostedQuery;
    }
    
    public DocListAndSet getMoreLikeThis( int id, int start, int rows, List<Query> filters, List<InterestingTerm> terms, int flags ) throws IOException
    {
      Document doc = reader.document(id);
      rawMLTQuery = mlt.like(id);
      boostedMLTQuery = getBoostedQuery( rawMLTQuery );
      if( terms != null ) {
        fillInterestingTermsFromMLTQuery( boostedMLTQuery, terms );
      }

      // exclude current document from results
      BooleanQuery.Builder realMLTQuery = new BooleanQuery.Builder();
      realMLTQuery.add(boostedMLTQuery, BooleanClause.Occur.MUST);
      realMLTQuery.add(
          new TermQuery(new Term(uniqueKeyField.getName(), uniqueKeyField.getType().storedToIndexed(doc.getField(uniqueKeyField.getName())))), 
            BooleanClause.Occur.MUST_NOT);
      this.realMLTQuery = realMLTQuery.build();
      
      DocListAndSet results = new DocListAndSet();
      if (this.needDocSet) {
        results = searcher.getDocListAndSet(this.realMLTQuery, filters, null, start, rows, flags);
      } else {
        results.docList = searcher.getDocList(this.realMLTQuery, filters, null, start, rows, flags);
      }
      return results;
    }

    public DocListAndSet getMoreLikeThis( Reader reader, int start, int rows, List<Query> filters, List<InterestingTerm> terms, int flags ) throws IOException
    {
      // SOLR-5351: if only check against a single field, use the reader directly. Otherwise we
      // repeat the stream's content for multiple fields so that query terms can be pulled from any
      // of those fields.
      String [] fields = mlt.getFieldNames();
      if (fields.length == 1) {
        rawMLTQuery = mlt.like(fields[0], reader);
      } else {
        CharsRefBuilder buffered = new CharsRefBuilder();
        char [] chunk = new char [1024];
        int len;
        while ((len = reader.read(chunk)) >= 0) {
          buffered.append(chunk, 0, len);
        }

        Collection<Object> streamValue = Collections.singleton(buffered.get().toString());
        Map<String, Collection<Object>> multifieldDoc = new HashMap<>(fields.length);
        for (String field : fields) {
          multifieldDoc.put(field, streamValue);
        }

        rawMLTQuery = mlt.like(multifieldDoc);
      }

      boostedMLTQuery = getBoostedQuery( rawMLTQuery );
      if (terms != null) {
        fillInterestingTermsFromMLTQuery( boostedMLTQuery, terms );
      }
      DocListAndSet results = new DocListAndSet();
      if (this.needDocSet) {
        results = searcher.getDocListAndSet( boostedMLTQuery, filters, null, start, rows, flags);
      } else {
        results.docList = searcher.getDocList( boostedMLTQuery, filters, null, start, rows, flags);
      }
      return results;
    }

    public NamedList<BooleanQuery> getMoreLikeTheseQuery(DocList docs)
        throws IOException {
      IndexSchema schema = searcher.getSchema();
      NamedList<BooleanQuery> result = new NamedList<>();
      DocIterator iterator = docs.iterator();
      while (iterator.hasNext()) {
        int id = iterator.nextDoc();
        String uniqueId = schema.printableUniqueKey(reader.document(id));

        BooleanQuery mltquery = (BooleanQuery) mlt.like(id);
        if (mltquery.clauses().size() == 0) {
          return result;
        }
        mltquery = (BooleanQuery) getBoostedQuery(mltquery);
        
        // exclude current document from results
        BooleanQuery.Builder mltQuery = new BooleanQuery.Builder();
        mltQuery.add(mltquery, BooleanClause.Occur.MUST);
        
        mltQuery.add(
            new TermQuery(new Term(uniqueKeyField.getName(), uniqueId)), BooleanClause.Occur.MUST_NOT);
        result.add(uniqueId, mltQuery.build());
      }

      return result;
    }
    
    private void fillInterestingTermsFromMLTQuery( Query query, List<InterestingTerm> terms )
    { 
      Collection<BooleanClause> clauses = ((BooleanQuery)query).clauses();
      for( BooleanClause o : clauses ) {
        Query q = o.getQuery();
        float boost = 1f;
        if (q instanceof BoostQuery) {
          BoostQuery bq = (BoostQuery) q;
          q = bq.getQuery();
          boost = bq.getBoost();
        }
        InterestingTerm it = new InterestingTerm();
        it.boost = boost;
        it.term = ((TermQuery) q).getTerm();
        terms.add( it );
      } 
      // alternatively we could use
      // mltquery.extractTerms( terms );
    }
    
    public MoreLikeThis getMoreLikeThis()
    {
      return mlt;
    }
  }
  
  
  //////////////////////// SolrInfoMBeans methods //////////////////////

  @Override
  public String getDescription() {
    return "Solr MoreLikeThis";
  }
}