/*
 * Seldon -- open source prediction engine
 * =======================================
 *
 * Copyright 2011-2015 Seldon Technologies Ltd and Rummble Ltd (http://www.seldon.io/)
 *
 * ********************************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * ********************************************************************************************
 */

package io.seldon.semvec;

import java.util.Enumeration;
import java.util.LinkedList;
import java.util.Set;
import java.util.logging.Logger;

import pitt.search.semanticvectors.CompoundVectorBuilder;
import pitt.search.semanticvectors.FlagConfig;
import pitt.search.semanticvectors.LuceneUtils;
import pitt.search.semanticvectors.ObjectVector;
import pitt.search.semanticvectors.SearchResult;
import pitt.search.semanticvectors.VectorStore;
import pitt.search.semanticvectors.vectors.Vector;
import pitt.search.semanticvectors.vectors.ZeroVectorException;

abstract public class VectorStoreRecommender {
	 private static final Logger logger = Logger.getLogger(VectorStoreRecommender.class.getCanonicalName());
	  
	  private VectorStore queryVecStore;
	  private VectorStore searchVecStore;
	  private LuceneUtils luceneUtils;
	  private Set<String> exclusions;
	  private Set<String> inclusions;
	  private String minDoc;
	  /**
	   * This needs to be filled in for each subclass. It takes an individual
	   * vector and assigns it a relevance score for this VectorSearcher.
	   */
	  public abstract double getScore(Vector testVector);
	  public abstract double getScore(Vector v1,Vector v2);

	  /**
	   * Performs basic initialization; subclasses should normally call super() to use this.
	   * @param queryVecStore Vector store to use for query generation.
	   * @param searchVecStore The vector store to search.
	   * @param luceneUtils LuceneUtils object to use for query weighting. (May be null.)
	   */
	  public VectorStoreRecommender(VectorStore queryVecStore,
	                        VectorStore searchVecStore,
	                        LuceneUtils luceneUtils,
	                        Set<String> exclusions,
	                        Set<String> inclusions,
	                        String minDoc) {
	    this.queryVecStore = queryVecStore;
	    this.searchVecStore = searchVecStore;
	    this.luceneUtils = luceneUtils;
	    this.exclusions = exclusions;
	    this.inclusions = inclusions;
	    this.minDoc = minDoc;
	  }

	  /**
	   * This nearest neighbor search is implemented in the abstract
	   * VectorSearcher class itself: this enables all subclasses to reuse
	   * the search whatever scoring method they implement.  Since query
	   * expressions are built into the VectorSearcher,
	   * getNearestNeighbors no longer takes a query vector as an
	   * argument.
	   * @param numResults the number of results / length of the result list.
	   */
	  public LinkedList<SearchResult> getNearestNeighbors(int numResults) {
	    LinkedList<SearchResult> results = new LinkedList<>();
	    double score, threshold = -1;
	    int duplicatesRemoved = 0;
	    Enumeration<ObjectVector> vecEnum = searchVecStore.getAllVectors();

	    while (vecEnum.hasMoreElements()) {
	    	
	    	ObjectVector testElement = vecEnum.nextElement();
	    	
	    	// ignore excluded items
	    	if (exclusions.contains(testElement.getObject().toString()))
	    		continue;
	    	
	    	//only allow includions if specified
	    	if (inclusions != null && inclusions.size()>0 && !inclusions.contains(testElement.getObject().toString()))
	    		continue;
	    	
	    	// ignore items greater than minDoc id (assume doc ids string ordering is useful)
	    	if (minDoc != null && testElement.getObject().toString().compareTo(minDoc) < 0)
	    		continue;
	    	
	    	
	      // Initialize result list if just starting.
	      if (results.size() == 0) {
	        score = getScore(testElement.getVector());
	        results.add(new SearchResult(score, testElement));
	        continue;
	      }

	      // Test this element.

	      score = getScore(testElement.getVector());

	      // This is a way of using the Lucene Index to get term and
	      // document frequency information to reweight all results. It
	      // seems to be good at moving excessively common terms further
	      // down the results. Note that using this means that scores
	      // returned are no longer just cosine similarities.
	      if (this.luceneUtils != null) {
	        score = score *
	            luceneUtils.getGlobalTermWeightFromString((String) testElement.getObject());
	      }

	      if (score > threshold) 
	      {
	        boolean added = false;
	        boolean duplicate = false;
	        for (int i = 0; i < results.size() && !added && !duplicate; ++i) 
	        {
	        	SearchResult r = results.get(i);
	        	
	        	if (score == r.getScore())
	        	{
	        		ObjectVector rVec = (ObjectVector) r.getObjectVector();
	        		double overlap = getScore(rVec.getVector(),testElement.getVector());
	        		double epsilon = Math.abs(overlap-1);
	        		if (epsilon < 0.000001)
	        		{
	        			duplicatesRemoved++;
	        			duplicate = true;
	        			continue;
	        		}
	        	}
	        	// Add to list if this is right place.
	        	if (score > r.getScore() && added == false) 
	        	{
	        		results.add(i, new SearchResult(score, testElement));
	        		added = true;
	        	}
	        }
	        // Prune list if there are already numResults.
	        if (results.size() > numResults) 
	        {
	          results.removeLast();
	          threshold = results.getLast().getScore();
	        } 
	        else 
	        {
	          if (results.size() < numResults && !added && !duplicate) 
	          {
	            results.add(new SearchResult(score, testElement));
	          }
	        }
	      }
	    }
	    if (duplicatesRemoved > 0)
	    	logger.info("removed "+duplicatesRemoved+" duplicates");
	    return results;
	  }
	  
	  
	  /**
	   * Class for searching a vector store using cosine similarity.
	   * Takes a sum of positive query terms and optionally negates some terms.
	   */
	  static public class VectorStoreRecommenderCosine extends VectorStoreRecommender {
	    Vector queryVector;
	    /**
	     * @param queryVecStore Vector store to use for query generation.
	     * @param searchVecStore The vector store to search.
	     * @param luceneUtils LuceneUtils object to use for query weighting. (May be null.)
	     * @param queryTerms Terms that will be parsed into a query
	     * expression. If the string "NOT" appears, terms after this will be negated.
	     */
	    public VectorStoreRecommenderCosine(VectorStore queryVecStore,
	                                VectorStore searchVecStore,
	                                LuceneUtils luceneUtils,
	                                String[] queryTerms,
	                                Set<String> exclusions,
	                                Set<String> inclusions,
	                                String minDoc)
	        throws ZeroVectorException {
	      super(queryVecStore, searchVecStore, luceneUtils, exclusions,inclusions,minDoc);
	      this.queryVector = CompoundVectorBuilder.getQueryVector(queryVecStore,
	                                                              luceneUtils,
	                                                              FlagConfig.getFlagConfig(null),
	                                                              queryTerms);
	      if (this.queryVector.isZeroVector()) {
	        throw new ZeroVectorException("Query vector is zero ... no results.");
	      }
	    }

	    @Override
	    public double getScore(Vector testVector) {
	      //testVector = VectorUtils.getNormalizedVector(testVector);
	      return this.queryVector.measureOverlap(testVector);
	    }

		@Override
		public double getScore(Vector v1, Vector v2) {
			return v1.measureOverlap(v2);
		}
	  }


}