package de.cxp.predict;

import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;

import com.google.common.collect.Ordering;

import de.cxp.predict.api.DictionaryItem;
import de.cxp.predict.api.PreDictSettings;
import de.cxp.predict.api.SuggestItem;
import de.cxp.predict.customizing.PreDictCustomizing;

/**
 * Based on a SymSpell Port
 * 
 * Original SymSpell by:
 * Copyright (C) 2015 Wolf Garbe
 * Version: 3.0
 * Author: Wolf Garbe <[email protected]>
 * Maintainer: Wolf Garbe <[email protected]>
 * URL: http://blog.faroo.com/2012/06/07/improved-edit-distance-based-spelling-correction/
 * Description: http://blog.faroo.com/2012/06/07/improved-edit-distance-based-spelling-correction/
 * 
 * Changes by:
 * Copyright (C) 2017 CXP Commerce Experts
 * Version: 0.1.0
 * Author: Andreas Wagner <[email protected]>
 * Maintainer: Rudolf Batt <[email protected]>
 * URL: https://github.com/searchhub/preDict
 * 
 * License:
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License, 
 * version 3.0 (LGPL-3.0) as published by the Free Software Foundation.
 * http://www.opensource.org/licenses/LGPL-3.0
 */
public class PreDict {

	private final PreDictCustomizing customizing;

	public PreDict(PreDictCustomizing customizing) {
		this.customizing = customizing;
		
		// copy the settings to ensure a immutable object
		PreDictSettings settings = customizing.getSettings();
		editDistanceMax = settings.getEditDistanceMax();
		accuracyLevel = settings.getAccuracyLevel();
		topK = settings.getTopK();
		deletionWeight = settings.getDeletionWeight();
		insertionWeight = settings.getInsertionWeight();
		transpositionWeight = settings.getTranspositionWeight();
		replaceWeight = settings.getReplaceWeight();
	}

	/**
	 * 2 = maximum recall and precision
	 * 1 = faster calculation with less accuracy
	 * 0 = similar as 1, but returns only top hit
	 */
	public static enum AccuracyLevel {
		topHit, fast, maximum;
	}

	private final AccuracyLevel accuracyLevel;
	
	private final int editDistanceMax;
	
	// limit suggestion list to topK entries
	private final int topK; 

	// Damerau function variables
	private final double deletionWeight;

	private final double insertionWeight;

	private final double replaceWeight;

	private final double transpositionWeight;

	private final Comparator<SuggestItem> distanceCountComparator = new Comparator<SuggestItem>() {
		public int compare(SuggestItem x, SuggestItem y) {
			return (2 * Double.compare(x.distance, y.distance) - Integer.compare(x.count, y.count));
		}
	};
	
	// Dictionary that contains both the original words and the deletes derived
	// from them. A term might be both word and delete from another word at the
	// same time.
	// For space reduction a item might be either of type DictionaryItem or int.
	// A DictionaryItem is used for word, word/delete, and delete with multiple
	// suggestions. Int is used for deletes with a single suggestion (the
	// majority of entries).
	private final HashMap<String, Object> dictionary = new HashMap<String, Object>();

	// List of unique words. By using the suggestions (int) as index for this
	// list they are translated into the original String.
	private final List<String> wordlist = new ArrayList<String>();

	private int maxlength = 0; // maximum dictionary term length


	public boolean indexWord(String word) {
		word = customizing.cleanIndexWord(word);
		DictionaryItem value = appendToDictionary(word);

		// edits/suggestions are created only once, no matter how often a word
		// occurs. they are created only as soon as the word occurs in the
		// corpus, even if the same term existed before in the dictionary as an
		// edit from another word.
		// a threshold might be specified, when a term occurs so frequently in
		// the corpus that it is considered a valid word for spelling correction
		if (value.count == 1) {
			indexFragments(word);
		}
		return true;
	}

	private DictionaryItem appendToDictionary(String word) {
		DictionaryItem value;
		Object dictionaryEntry = dictionary.get(word);

		// known word or fragment
		if (dictionaryEntry != null) {
			value = asDictionaryItem(dictionaryEntry);

			// replace integer entry with DictionaryItem object
			if (dictionaryEntry instanceof Integer) {
				dictionary.put(word, value);
			}

			// prevent overflow
			if (value.count < Integer.MAX_VALUE) value.count++;
		}
		// this is a new word
		else if (wordlist.size() < Integer.MAX_VALUE) {
			value = new DictionaryItem();
			value.count++;
			dictionary.put(word, value);
			if (word.length() > maxlength) maxlength = word.length();
		} else {
			throw new IllegalStateException("can not index word since wordlist reached limit of Integer.MAX_VALUE");
		}
		return value;
	}

	private void indexFragments(String word) {
		wordlist.add(word);
		int wordNr = wordlist.size() - 1;

		// create deletes aka fragements
		for (String fragment : getEdits(word, 0, new HashSet<String>())) {
			Object dictionaryEntry;
			dictionaryEntry = dictionary.get(fragment);
			if (dictionaryEntry != null) {
				// scenario where this entry already exists:
				// 1. word == deletes(anotherWord)
				// 2. deletes(word) == deletes(anotherWord)
				if (dictionaryEntry instanceof Integer) {
					DictionaryItem dictItem = asDictionaryItem(dictionaryEntry);
					dictionary.put(fragment, dictItem);
					if (wordNr != (int) dictionaryEntry)
						addLowestDistance(dictItem, word, wordNr, fragment);
				} else if (!((DictionaryItem) dictionaryEntry).suggestions.contains(wordNr))
					addLowestDistance((DictionaryItem) dictionaryEntry, word, wordNr, fragment);
			} else {
				dictionary.put(fragment, wordNr);
			}
		}
	}

	private DictionaryItem asDictionaryItem(Object entry) {
		if (entry instanceof DictionaryItem) {
			return (DictionaryItem) entry;
		} else if (entry instanceof Integer) {
			// if value is an integer, word is also a fragment from another word
			// => append fragment to suggestions
			DictionaryItem dictItem = new DictionaryItem();
			dictItem.suggestions.add((int) entry);
			return dictItem;
		} else {
			throw new IllegalStateException("unknown entry type found: " + entry.getClass().getSimpleName());
		}
	}

	// inexpensive and language independent: only deletes, no transposes +
	// replaces + inserts
	// replaces and inserts are expensive and language dependent (Chinese has
	// 70,000 Unicode Han characters)
	private HashSet<String> getEdits(String word, int editDistance, HashSet<String> deletes) {
		editDistance++;
		if (word.length() > 1) {
			for (int i = 0; i < word.length(); i++) {
				// delete ith character
				String delete = word.substring(0, i) + word.substring(i + 1);
				if (deletes.add(delete)) {
					// recursion, if maximum edit distance not yet reached
					if (editDistance < editDistanceMax) getEdits(delete, editDistance, deletes);
				}
			}
		}
		return deletes;
	}

	// save some time and space
	private void addLowestDistance(DictionaryItem item, String word, int wordNr, String fragment) {
		int indexedDistance = item.suggestions.size() > 0
				? wordlist.get(item.suggestions.get(0)).length() - fragment.length()
				: -1;
		int fragmentDistance = word.length() - fragment.length();

		// remove all existing suggestions (of higher distance) if this word has
		// a lower distance (only at recallLevel < 2)
		if ((accuracyLevel.ordinal() < 2) && (indexedDistance > fragmentDistance)) {
			item.suggestions.clear();
		}

		// if recall level is 2, add this word anyways
		// otherwise only add it if it has a similar or lower distance
		// then the indexed words
		if ((accuracyLevel.ordinal() == 2)
				|| (item.suggestions.size() == 0)
				|| (indexedDistance >= fragmentDistance)) {
			item.suggestions.add(wordNr);
		}
	}

	public List<String> findSimilarWords(String searchQuery) {
		List<SuggestItem> suggestions = lookup(searchQuery, editDistanceMax);
		
		List<String> similarWords = new ArrayList<>();
		suggestions.forEach(suggestion -> similarWords.add(suggestion.term));
		
		return similarWords;
	}

	private List<SuggestItem> lookup(String searchWord, int editDistanceMax) {
		String cleanedSearchWord = customizing.cleanSearchWord(searchWord);

		// save some time
		if (cleanedSearchWord.length() - editDistanceMax > maxlength)
			return new ArrayList<SuggestItem>();

		List<String> candidates = new ArrayList<String>();
		HashSet<String> candidatesUniq = new HashSet<String>();

		List<SuggestItem> suggestions = new ArrayList<SuggestItem>();
		HashSet<String> checkedWords = new HashSet<String>();

		Object dictionaryEntry;

		// add original term
		candidates.add(cleanedSearchWord);

		while (candidates.size() > 0) {
			String candidate = candidates.remove(0);

			nosort: {

				// if recallLevel is lower 2, save some time by early
				// termination (of candidate check)
				// if candidate distance is already higher than
				// distance of first suggestion
				if ((accuracyLevel.ordinal() < 2)
						&& (suggestions.size() > 0)
						&& (cleanedSearchWord.length() - candidate.length() > suggestions.get(0).distance))
					break nosort;

				// read candidate entry from dictionary
				dictionaryEntry = dictionary.get(candidate);
				if (dictionaryEntry != null) {
					DictionaryItem matchedDictionaryItem = asDictionaryItem(dictionaryEntry);

					// if count>0 then candidate entry is correct dictionary
					// term, not only delete item
					if ((matchedDictionaryItem.count > 0) && checkedWords.add(candidate)) {
						// add correct dictionary term term to suggestion list
						SuggestItem si = new SuggestItem();
						si.term = candidate;
						si.count = matchedDictionaryItem.count;
						si.wordFrequency = ((double) si.count / dictionary.size());
						si.distance = getMaxDistance(cleanedSearchWord, candidate);

						si.distance = customizing.adjustDistance(cleanedSearchWord, candidate, si.distance);

						if (si.distance <= editDistanceMax) {
							suggestions.add(si);
						}
						// early termination
						if ((accuracyLevel.ordinal() < 2) && (cleanedSearchWord.length() - candidate.length() == 0))
							break nosort;
					}

					// iterate through suggestions (to other correct dictionary
					// items) of delete item and add them to suggestion list
					for (int wordNr : matchedDictionaryItem.suggestions.toArray()) {
						// save some time by skipping double items early:
						// different deletes of the input term can lead to
						// the same suggestion
						String suggestion = wordlist.get(wordNr);
						if (checkedWords.add(suggestion)) {
							// Symmetric Delete Spelling Correction Magic:
							// adjust distance, if both distances>0
							// We allow simultaneous edits (deletes) of
							// editDistanceMax on both the dictionary and the
							// input term.
							// For replaces and adjacent transposes the
							// resulting edit distance stays <= editDistanceMax.
							// For inserts and deletes the resulting edit
							// distance might exceed editDistanceMax.
							// To prevent suggestions of a higher edit distance,
							// we need to calculate the resulting edit distance,
							// if there are simultaneous edits on both sides.
							// Example: (bank==bnak and bank==bink, but
							// bank!=kanb and bank!=xban and bank!=baxn for
							// editDistanceMaxe=1)
							// Two deletes on each side of a pair makes them all
							// equal, but the first two pairs have edit
							// distance=1, the others edit distance=2.
							double distance = 0;
							if (!suggestion.equals(cleanedSearchWord)) {
								// Case 1: if only deletes match the dictionary
								if (suggestion.length() == candidate.length()) {
									distance = getMaxDistance(cleanedSearchWord, candidate);
								} else if (cleanedSearchWord.length() == candidate.length()) {
									distance = getMaxDistance(suggestion, candidate);

									// Case 2: if further edits additional to
									// the deletes need to happen in order to
									// match the dictionary
								} else {
									// common prefixes and suffixes are ignored,
									// because this speeds up the
									// Damerau-levenshtein-Distance calculation
									// without changing it.
									int prefixLength = 0;
									int suffixLength = 0;

									while ((prefixLength < suggestion.length()) && (prefixLength < cleanedSearchWord.length())
											&& (suggestion
													.charAt(
															prefixLength) == cleanedSearchWord.charAt(prefixLength)))
										prefixLength++;

									while ((suffixLength < suggestion.length() - prefixLength)
											&& (suffixLength < cleanedSearchWord.length() - prefixLength)
											&& (suggestion.charAt(suggestion.length() - suffixLength - 1) == cleanedSearchWord
													.charAt(cleanedSearchWord.length() - suffixLength - 1)))
										suffixLength++;

									if ((prefixLength > 0) || (suffixLength > 0)) {
										distance = cxpDamerauLevenshtein(
												cleanedSearchWord.substring(prefixLength, cleanedSearchWord.length() - suffixLength),
												suggestion.substring(prefixLength, suggestion.length() - suffixLength));
										
									} else {
										distance = cxpDamerauLevenshtein(cleanedSearchWord, suggestion);
									}
									distance = customizing.adjustDetailedDistance(cleanedSearchWord, suggestion, distance, prefixLength, suffixLength);
								}
							}

							// save some time.
							// remove all existing suggestions of higher
							// distance, if verbose<2
							if ((accuracyLevel.ordinal() < 2)
									&& (suggestions.size() > 0)
									&& (suggestions.get(0).distance > distance)) {
								suggestions.clear();
							}

							// do not process higher distances than those
							// already found, if recallLevel < 2
							if ((accuracyLevel.ordinal() < 2)
									&& (suggestions.size() > 0)
									&& (distance > suggestions.get(0).distance)) {
								continue;
							}

							distance = customizing.adjustDistance(cleanedSearchWord, candidate, distance);

							if (distance <= editDistanceMax) {
								Object suggestedItem = dictionary.get(suggestion);
								if (suggestedItem != null) {
									SuggestItem si = new SuggestItem();
									si.term = suggestion;
									si.count = ((DictionaryItem) suggestedItem).count;
									si.wordFrequency = ((double) si.count / dictionary.size());
									si.distance = distance;
									suggestions.add(si);
								}
							}
						}
					} // end for each
				} // end if: candidate exists in dictionary

				// add more edits to candidate list
				// derive edits (deletes) from current candidate and add them to
				// candidates list
				// this is a recursive process until the maximum edit distance
				// has been reached
				if (cleanedSearchWord.length() - candidate.length() < editDistanceMax) {
					// save some time: do not create edits with edit distance
					// smaller than suggestions already found
					if ((accuracyLevel.ordinal() < 2) && (suggestions.size() > 0) && (cleanedSearchWord.length() - candidate
							.length() >= suggestions.get(0).distance)) continue;

					for (int i = 0; i < candidate.length(); i++) {
						String delete = candidate.substring(0, i) + candidate.substring(i + 1);
						if (candidatesUniq.add(delete)) candidates.add(delete);
					}
				}
			} // end lable nosort
		} // end while

		return pickSuggestions(cleanedSearchWord, editDistanceMax, suggestions);
	}

	private List<SuggestItem> pickSuggestions(String searchWord, int editDistanceMax, List<SuggestItem> suggestions) {
		int k = suggestions.size();
		if ((accuracyLevel == AccuracyLevel.topHit) && (suggestions.size() > 1))
			k = 1;
		else if (suggestions.size() > topK) {
			k = topK;
		}

		List<SuggestItem> returnSuggestions;
		if (k >= suggestions.size()) {
			returnSuggestions = suggestions;
		} else {
			returnSuggestions = Ordering.from(distanceCountComparator).leastOf(suggestions, k);
		}
		
		return customizing.adjustFinalResult(searchWord, returnSuggestions);
	}

	/**
	 * Simple and fast calculation of edit distance, comparing only length of
	 * both strings and multiplying the delta with deletion / insertion weight.
	 * 
	 * This is exact if the one string is part of the other string. This method
	 * won't do this check however.
	 * 
	 * @param fromString
	 * @param toString
	 * @return
	 */
	private double getMaxDistance(String fromString, String toString) {
		boolean isDelete = fromString.length() > toString.length();
		return (isDelete ? deletionWeight : insertionWeight)
				* (isDelete ? fromString.length() - toString.length() : toString.length() - fromString.length());
	}

	private double cxpDamerauLevenshtein(String a, String b) {
		double[][] d = new double[b.length() + 1][a.length() + 1]; // 2d matrix

		// Step 1
		if (a.length() == 0) return b.length();
		if (b.length() == 0) return a.length();

		// Step 2
		for (int i = a.length(); i >= 0; i--)
			d[0][i] = i * deletionWeight;
		for (int j = b.length(); j >= 0; j--)
			d[j][0] = j;

		// Step 4
		for (int j = 1; j <= b.length(); j++) {
			char b_j = b.charAt(j - 1);

			// Step 3
			for (int i = 1; i <= a.length(); i++) {
				char a_i = a.charAt(i - 1);

				// CXP Damerau operations
				double min = min(
						d[j - 1][i - 1],
						d[j - 1][i],
						d[j][i - 1]);
				if (a_i == b_j) {
					d[j][i] = min;
				} else if (i == j) {
					d[j][i] = min + (replaceWeight * customizing.getReplacementDistance(b_j, a_i)); // replace

					if (i > 1 && a_i == b.charAt(j - 2) && a.charAt(i - 2) == b_j) {
						d[j][i] = Math.min(d[j][i], d[j - 2][i - 2] + transpositionWeight); // transpose
					}
				} else if (i > j) {
					d[j][i] = min + deletionWeight; // delete
				} else if (i < j) {
					d[j][i] = min + insertionWeight; // insert
				}
			}
		}
		// Step 5
		return d[b.length()][a.length()];
	}

	private double min(double a, double b, double c) {
		return Math.min(a, Math.min(b, c));
	}

	@Override
	public String toString() {
		return "PreDict "+customizing.toString();
	}
}