package lucene4ir.similarity;

import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.SmallFloat;
import java.io.IOException;
import java.util.Collections;

/**
   Author: rup, [email protected]
   
   This is the bnn.bnn TFxIDF variant from SMART. The nomenclature is
   SMART's. See http://kak.tx0.org/IR/TFxIDF/ for the naming scheme
   and an encyclopedia of TFxIDF variants.

   This is a stripped-down version of one of Lucene's stock
   'similarity' classes, devoid of any checks for validitiy and
   safety, so use it carefully. The reduction is meant to make this
   beast more readable.
*/

public class SMARTBNNBNNSimilarity extends Similarity {
    
    public SMARTBNNBNNSimilarity() {}

    public float log(double x)
    {
	return (float)(Math.log(x) / Math.log(2.0D));
    }

    public float coord(int overlap, int maxOverlap)
    {
	return 1f;
    }

    public float queryNorm(float valueForNormalization)
    {
	return 1f;
    }

    @Override
    public final SimWeight computeWeight(CollectionStatistics collectionStats,
					 TermStatistics... termStats)
    {
	float N, n, idf, adl;
	idf = 1.0f;
	N   = collectionStats.maxDoc();
	adl = collectionStats.sumTotalTermFreq() / N;
	
	if (termStats.length == 1) {
	    n = termStats[0].docFreq();
	    idf = log(N/n);
	}
	else {
	    for (final TermStatistics stat : termStats) {
		n = stat.docFreq();
		idf += log(N/n);
	    }
	}
	
	return new TFIDFWeight(collectionStats.field(), idf, adl);
    }

    @Override
    public final SimScorer simScorer(SimWeight sw, LeafReaderContext context)
	throws IOException
    {
	TFIDFWeight tw = (TFIDFWeight) sw;
	return new TFIDFScorer(tw, context.reader().getNormValues(tw.field));
    }

    public class TFIDFScorer extends SimScorer
    {
	private final TFIDFWeight tw;
	private final NumericDocValues norms;
    
	TFIDFScorer(TFIDFWeight tw, NumericDocValues norms)
	    throws IOException
	{
	    this.tw    = tw;
	    this.norms = norms;
	}

	@Override
	public float score(int doc, float tf)
	{
	    float idf, dl, adl, K, w;
	    idf = tw.idf;
	    adl = tw.adl;
	    dl = (float)norms.get(doc);
	    K = 1.0f;
	    w = 1.0f;
	    return w;
	}

	@Override
	public float computeSlopFactor(int distance)
	{
	    return 1.0f / (distance + 1);
	}

	@Override
	public float computePayloadFactor(int doc, int start, int end, BytesRef payload)
	{
	    return 1.0f;
	}
    }
  
    public static class TFIDFWeight extends SimWeight
    {
	private final String field;
	private final float idf;
	private final float adl;
	
	public TFIDFWeight(String field, float idf, float adl)
	{
	    this.field = field;
	    this.idf   = idf;
	    this.adl   = adl;
	}

	@Override
	public float getValueForNormalization()
	{
	    return 1.0f;
	}

	@Override
	public void normalize(float queryNorm, float boost) {}
    }    

    @Override
    public final long computeNorm(FieldInvertState state)
    {
	return state.getLength();
    }
}