/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.lucene.monitor;

import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;

/**
 * A TermFilteredPresearcher that indexes queries multiple times, with terms collected
 * from different routes through a querytree.  Each route will produce a set of terms
 * that are *sufficient* to select the query, and are indexed into a separate, suffixed field.
 * <p>
 * Incoming documents are then converted to a set of Disjunction queries over each
 * suffixed field, and these queries are combined into a conjunction query, such that the
 * document's set of terms must match a term from each route.
 * <p>
 * This allows filtering out of documents that contain one half of a two-term phrase query, for
 * example.  The query {@code "hello world"} will be indexed twice, once under 'hello' and once
 * under 'world'.  A document containing the terms "hello there" would match the first field,
 * but not the second, and so would not be selected for matching.
 * <p>
 * The number of passes the presearcher makes is configurable.  More passes will improve the
 * selected/matched ratio, but will take longer to index and will use more RAM.
 * <p>
 * A minimum weight can we set for terms to be chosen for the second and subsequent passes.  This
 * allows users to avoid indexing stopwords, for example.
 */
public class MultipassTermFilteredPresearcher extends TermFilteredPresearcher {

  private final int passes;
  private final float minWeight;

  /**
   * Construct a new MultipassTermFilteredPresearcher
   *
   * @param passes        the number of times a query should be indexed
   * @param minWeight     the minimum weight a querytree should be advanced over
   * @param weightor      the TreeWeightor to use
   * @param queryHandlers a list of custom query handlers
   * @param filterFields  a set of fields to use as filters
   */
  public MultipassTermFilteredPresearcher(int passes, float minWeight, TermWeightor weightor,
                                          List<CustomQueryHandler> queryHandlers, Set<String> filterFields) {
    super(weightor, queryHandlers, filterFields);
    this.passes = passes;
    this.minWeight = minWeight;
  }

  /**
   * Construct a new MultipassTermFilteredPresearcher using {@link TermFilteredPresearcher#DEFAULT_WEIGHTOR}
   * <p>
   * Note that this will be constructed with a minimum advance weight of zero
   *
   * @param passes     the number of times a query should be indexed
   */
  public MultipassTermFilteredPresearcher(int passes) {
    this(passes, 0, DEFAULT_WEIGHTOR, Collections.emptyList(), Collections.emptySet());
  }

  @Override
  protected DocumentQueryBuilder getQueryBuilder() {
    return new MultipassDocumentQueryBuilder();
  }

  private static String field(String field, int pass) {
    return field + "_" + pass;
  }

  private class MultipassDocumentQueryBuilder implements DocumentQueryBuilder {

    BooleanQuery.Builder[] queries = new BooleanQuery.Builder[passes];
    Map<String, BytesRefHash> terms = new HashMap<>();

    MultipassDocumentQueryBuilder() {
      for (int i = 0; i < queries.length; i++) {
        queries[i] = new BooleanQuery.Builder();
      }
    }

    @Override
    public void addTerm(String field, BytesRef term) {
      BytesRefHash t = terms.computeIfAbsent(field, f -> new BytesRefHash());
      t.add(term);
    }

    @Override
    public Query build() {
      Map<String, BytesRef[]> collectedTerms = new HashMap<>();
      for (Map.Entry<String, BytesRefHash> entry : terms.entrySet()) {
        collectedTerms.put(entry.getKey(), convertHash(entry.getValue()));
      }
      BooleanQuery.Builder parent = new BooleanQuery.Builder();
      for (int i = 0; i < passes; i++) {
        BooleanQuery.Builder child = new BooleanQuery.Builder();
        for (String field : terms.keySet()) {
          child.add(new TermInSetQuery(field(field, i), collectedTerms.get(field)), BooleanClause.Occur.SHOULD);
        }
        parent.add(child.build(), BooleanClause.Occur.MUST);
      }
      return parent.build();
    }
  }

  @Override
  public Document buildQueryDocument(QueryTree querytree) {

    Document doc = new Document();

    for (int i = 0; i < passes; i++) {
      Map<String, BytesRefHash> fieldTerms = collectTerms(querytree);
      for (Map.Entry<String, BytesRefHash> entry : fieldTerms.entrySet()) {
        // we add the index terms once under a suffixed field for the multipass query, and
        // once under the plan field name for the TermsEnumTokenFilter
        doc.add(new Field(field(entry.getKey(), i),
            new TermsEnumTokenStream(new BytesRefHashIterator(entry.getValue())), QUERYFIELDTYPE));
        doc.add(new Field(entry.getKey(),
            new TermsEnumTokenStream(new BytesRefHashIterator(entry.getValue())), QUERYFIELDTYPE));
      }
      querytree.advancePhase(minWeight);
    }

    return doc;
  }

  private static BytesRef[] convertHash(BytesRefHash hash) {
    BytesRef[] terms = new BytesRef[hash.size()];
    for (int i = 0; i < terms.length; i++) {
      BytesRef t = new BytesRef();
      terms[i] = hash.get(i, t);
    }
    return terms;
  }

}