/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.search.join;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.TreeSet;

import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocValuesTermsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocSet;

/**
 * A graph hit collector.  This accumulates the edges for a given graph traversal.
 * On each collect method, the collector skips edge extraction for nodes that it has
 * already traversed.
 * @lucene.internal
 */
abstract class GraphEdgeCollector extends SimpleCollector implements Collector {
  // For graph traversal, the result set that has already been visited and thus can be skipped for during value collection.
  DocSet skipSet;
  // known leaf nodes
  DocSet leafNodes;

  int numHits = 0;    // number of documents visited
  BitSet bits;  // if not null, used to collect documents visited

  int base;

  SchemaField collectField;

  // skipSet and leafNodes may be null
  GraphEdgeCollector(SchemaField collectField, DocSet skipSet, DocSet leafNodes) {
    this.collectField = collectField;
    this.skipSet = skipSet;
    this.leafNodes = leafNodes;
  }

  // Set to use to collect docs being visited
  // TODO: this should be replaced with a more general delegating collector
  public void setCollectDocs(FixedBitSet target) {
    this.bits = target;
  }

  // the number of docs visited
  public int getNumHits() {
    return numHits;
  }

  public void collect(int segDoc) throws IOException {
    int doc = segDoc + base;
    if (skipSet != null && skipSet.exists(doc)) {
      // when skipSet == all nodes visited so far, then this represents a cycle and we can
      // keep track of that here in the future if we need to.
      return;
    }

    if (bits != null) bits.set(doc);
    // increment the hit count so we know how many docs we traversed this time.
    numHits++;

    // Optimization to not look up edges for a document that is a leaf node (i.e. has no outgoing edges)
    if (leafNodes == null || !leafNodes.exists(doc)) {
      addEdgeIdsToResult(segDoc);
    }
    // Note: tracking links in for each result would be a huge memory hog... so not implementing at this time.
  }

  abstract void addEdgeIdsToResult(int doc) throws IOException;

  private void addDocToResult(int docWithBase) {
    // this document is part of the traversal. mark it in our bitmap.
    bits.set(docWithBase);
    // increment the hit count so we know how many docs we traversed this time.
    numHits++;
  }

  @Override
  public void doSetNextReader(LeafReaderContext context) throws IOException {
    base = context.docBase;
  }

  public abstract Query getResultQuery(SchemaField matchField, boolean useAutomaton);

  @Override
  public ScoreMode scoreMode() {
    return ScoreMode.COMPLETE_NO_SCORES;
  }


  static class GraphTermsCollector extends GraphEdgeCollector {
    // all the collected terms
    private BytesRefHash collectorTerms;
    private SortedSetDocValues docTermOrds;


    GraphTermsCollector(SchemaField collectField, DocSet skipSet, DocSet leafNodes) {
      super(collectField, skipSet, leafNodes);
      this.collectorTerms = new BytesRefHash();
    }

    @Override
    public void doSetNextReader(LeafReaderContext context) throws IOException {
      super.doSetNextReader(context);
      // Grab the updated doc values.
      docTermOrds = DocValues.getSortedSet(context.reader(), collectField.getName());
    }

    @Override
    void addEdgeIdsToResult(int doc) throws IOException {
      // set the doc to pull the edges ids for.
      if (doc > docTermOrds.docID()) {
        docTermOrds.advance(doc);
      }
      if (doc == docTermOrds.docID()) {
        BytesRef edgeValue = new BytesRef();
        long ord;
        while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
          edgeValue = docTermOrds.lookupOrd(ord);
          // add the edge id to the collector terms.
          collectorTerms.add(edgeValue);
        }
      }
    }

    @Override
    public Query getResultQuery(SchemaField matchField, boolean useAutomaton) {
      if (collectorTerms == null || collectorTerms.size() == 0) {
        // return null if there are no terms (edges) to traverse.
        return null;
      } else {
        // Create a query
        Query q = null;

        // TODO: see if we should dynamically select this based on the frontier size.
        if (useAutomaton) {
          // build an automaton based query for the frontier.
          Automaton autn = buildAutomaton(collectorTerms);
          AutomatonQuery autnQuery = new AutomatonQuery(new Term(matchField.getName()), autn);
          q = autnQuery;
        } else {
          List<BytesRef> termList = new ArrayList<>(collectorTerms.size());
          for (int i = 0; i < collectorTerms.size(); i++) {
            BytesRef ref = new BytesRef();
            collectorTerms.get(i, ref);
            termList.add(ref);
          }
          q = (matchField.hasDocValues() && !matchField.indexed())
                  ? new DocValuesTermsQuery(matchField.getName(), termList)
                  : new TermInSetQuery(matchField.getName(), termList);
        }

        return q;
      }
    }


    /**
     * Build an automaton to represent the frontier query
     */
    private Automaton buildAutomaton(BytesRefHash termBytesHash) {
      // need top pass a sorted set of terms to the autn builder (maybe a better way to avoid this?)
      final TreeSet<BytesRef> terms = new TreeSet<BytesRef>();
      for (int i = 0; i < termBytesHash.size(); i++) {
        BytesRef ref = new BytesRef();
        termBytesHash.get(i, ref);
        terms.add(ref);
      }
      final Automaton a = DaciukMihovAutomatonBuilder.build(terms);
      return a;
    }

  }
}