import os.path
import pickle
import sys
import unittest

import pkg_resources
import pytest

from symspellpy import SymSpell, Verbosity
from symspellpy.symspellpy import SuggestItem

class TestSymSpellPy(unittest.TestCase):
    dictionary_path = pkg_resources.resource_filename(
        "symspellpy", "frequency_dictionary_en_82_765.txt")
    bigram_path = pkg_resources.resource_filename(
        "symspellpy", "frequency_bigramdictionary_en_243_342.txt")
    fortests_path = os.path.join(os.path.dirname(__file__), "fortests")

    def test_negative_max_dictionary_edit_distance(self):
        with pytest.raises(ValueError) as excinfo:
            __ = SymSpell(-1, 3)
        self.assertEqual("max_dictionary_edit_distance cannot be negative",
                         str(excinfo.value))

    def test_invalid_prefix_length(self):
        # prefix_length < 1
        with pytest.raises(ValueError) as excinfo:
            __ = SymSpell(1, 0)
        self.assertEqual("prefix_length cannot be less than 1 or "
                         "smaller than max_dictionary_edit_distance",
                         str(excinfo.value))

        with pytest.raises(ValueError) as excinfo:
            __ = SymSpell(1, -1)
        self.assertEqual("prefix_length cannot be less than 1 or "
                         "smaller than max_dictionary_edit_distance",
                         str(excinfo.value))

        # prefix_length <= max_dictionary_edit_distance
        with pytest.raises(ValueError) as excinfo:
            __ = SymSpell(2, 2)
        self.assertEqual("prefix_length cannot be less than 1 or "
                         "smaller than max_dictionary_edit_distance",
                         str(excinfo.value))

    def test_negative_count_threshold(self):
        with pytest.raises(ValueError) as excinfo:
            __ = SymSpell(1, 3, -1)
        self.assertEqual("count_threshold cannot be negative",
                         str(excinfo.value))

    def test_create_dictionary_entry_negative_count(self):
        sym_spell = SymSpell(1, 3)
        self.assertEqual(False, sym_spell.create_dictionary_entry("pipe", 0))
        self.assertEqual(False,
                         sym_spell.create_dictionary_entry("pipe", -1))

        sym_spell = SymSpell(1, 3, count_threshold=0)
        self.assertEqual(True, sym_spell.create_dictionary_entry("pipe", 0))

    def test_create_dictionary_entry_below_threshold(self):
        sym_spell = SymSpell(1, 3, count_threshold=10)
        sym_spell.create_dictionary_entry("pipe", 4)
        self.assertEqual(1, len(sym_spell.below_threshold_words))
        self.assertEqual(4, sym_spell.below_threshold_words["pipe"])

        sym_spell.create_dictionary_entry("pipe", 4)
        self.assertEqual(1, len(sym_spell.below_threshold_words))
        self.assertEqual(8, sym_spell.below_threshold_words["pipe"])

        sym_spell.create_dictionary_entry("pipe", 4)
        self.assertEqual(0, len(sym_spell.below_threshold_words))

    def test_deletes(self):
        sym_spell = SymSpell()
        sym_spell.create_dictionary_entry("steama", 4)
        sym_spell.create_dictionary_entry("steamb", 6)
        sym_spell.create_dictionary_entry("steamc", 2)
        result = sym_spell.lookup("stream", Verbosity.TOP, 2)
        self.assertEqual(1, len(result))
        self.assertEqual("steamb", result[0].term)
        self.assertEqual(6, result[0].count)
        self.assertTrue(len(sym_spell.deletes))

    def test_words_with_shared_prefix_should_retain_counts(self):
        sym_spell = SymSpell(1, 3)
        sym_spell.create_dictionary_entry("pipe", 5)
        sym_spell.create_dictionary_entry("pips", 10)

        result = sym_spell.lookup("pipe", Verbosity.ALL, 1)
        self.assertEqual(2, len(result))
        self.assertEqual("pipe", result[0].term)
        self.assertEqual(5, result[0].count)
        self.assertEqual("pips", result[1].term)
        self.assertEqual(10, result[1].count)

        result = sym_spell.lookup("pips", Verbosity.ALL, 1)
        self.assertEqual(2, len(result))
        self.assertEqual("pips", result[0].term)
        self.assertEqual(10, result[0].count)
        self.assertEqual("pipe", result[1].term)
        self.assertEqual(5, result[1].count)

        result = sym_spell.lookup("pip", Verbosity.ALL, 1)
        self.assertEqual(2, len(result))
        self.assertEqual("pips", result[0].term)
        self.assertEqual(10, result[0].count)
        self.assertEqual("pipe", result[1].term)
        self.assertEqual(5, result[1].count)

    def test_add_additional_counts_should_not_add_word_again(self):
        sym_spell = SymSpell()
        word = "hello"
        sym_spell.create_dictionary_entry(word, 11)
        self.assertEqual(1, sym_spell.word_count)

        sym_spell.create_dictionary_entry(word, 3)
        self.assertEqual(1, sym_spell.word_count)

    def test_add_additional_counts_should_increase_count(self):
        sym_spell = SymSpell()
        word = "hello"
        sym_spell.create_dictionary_entry(word, 11)
        result = sym_spell.lookup(word, Verbosity.TOP)
        count = result[0].count if len(result) == 1 else 0
        self.assertEqual(11, count)

        sym_spell.create_dictionary_entry(word, 3)
        result = sym_spell.lookup(word, Verbosity.TOP)
        count = result[0].count if len(result) == 1 else 0
        self.assertEqual(11 + 3, count)

    def test_add_additional_counts_should_not_overflow(self):
        sym_spell = SymSpell()
        word = "hello"
        sym_spell.create_dictionary_entry(word, sys.maxsize - 10)
        result = sym_spell.lookup(word, Verbosity.TOP)
        count = result[0].count if len(result) == 1 else 0
        self.assertEqual(sys.maxsize - 10, count)

        sym_spell.create_dictionary_entry(word, 11)
        result = sym_spell.lookup(word, Verbosity.TOP)
        count = result[0].count if len(result) == 1 else 0
        self.assertEqual(sys.maxsize, count)

    def test_verbosity_should_control_lookup_results(self):
        sym_spell = SymSpell()
        sym_spell.create_dictionary_entry("steam", 1)
        sym_spell.create_dictionary_entry("steams", 2)
        sym_spell.create_dictionary_entry("steem", 3)

        result = sym_spell.lookup("steems", Verbosity.TOP, 2)
        self.assertEqual(1, len(result))
        result = sym_spell.lookup("steems", Verbosity.CLOSEST, 2)
        self.assertEqual(2, len(result))
        result = sym_spell.lookup("steems", Verbosity.ALL, 2)
        self.assertEqual(3, len(result))

    def test_lookup_should_return_most_frequent(self):
        sym_spell = SymSpell()
        sym_spell.create_dictionary_entry("steama", 4)
        sym_spell.create_dictionary_entry("steamb", 6)
        sym_spell.create_dictionary_entry("steamc", 2)
        result = sym_spell.lookup("stream", Verbosity.TOP, 2)
        self.assertEqual(1, len(result))
        self.assertEqual("steamb", result[0].term)
        self.assertEqual(6, result[0].count)

    def test_lookup_should_find_exact_match(self):
        sym_spell = SymSpell()
        sym_spell.create_dictionary_entry("steama", 4)
        sym_spell.create_dictionary_entry("steamb", 6)
        sym_spell.create_dictionary_entry("steamc", 2)
        result = sym_spell.lookup("streama", Verbosity.TOP, 2)
        self.assertEqual(1, len(result))
        self.assertEqual("steama", result[0].term)

    def test_lookup_should_not_return_non_word_delete(self):
        sym_spell = SymSpell(2, 7, 10)
        sym_spell.create_dictionary_entry("pawn", 10)
        result = sym_spell.lookup("paw", Verbosity.TOP, 0)
        self.assertEqual(0, len(result))
        result = sym_spell.lookup("awn", Verbosity.TOP, 0)
        self.assertEqual(0, len(result))

    def test_lookup_should_not_return_low_count_word(self):
        sym_spell = SymSpell(2, 7, 10)
        sym_spell.create_dictionary_entry("pawn", 1)
        result = sym_spell.lookup("pawn", Verbosity.TOP, 0)
        self.assertEqual(0, len(result))

    def test_lookup_should_not_return_low_count_word_that_are_also_delete_word(self):
        sym_spell = SymSpell(2, 7, 10)
        sym_spell.create_dictionary_entry("flame", 20)
        sym_spell.create_dictionary_entry("flam", 1)
        result = sym_spell.lookup("flam", Verbosity.TOP, 0)
        self.assertEqual(0, len(result))

    def test_lookup_max_edit_distance_too_large(self):
        sym_spell = SymSpell(2, 7, 10)
        sym_spell.create_dictionary_entry("flame", 20)
        sym_spell.create_dictionary_entry("flam", 1)
        with pytest.raises(ValueError) as excinfo:
            __ = sym_spell.lookup("flam", Verbosity.TOP, 3)
        self.assertEqual("Distance too large", str(excinfo.value))

    def test_lookup_include_unknown(self):
        sym_spell = SymSpell(2, 7, 10)
        sym_spell.create_dictionary_entry("flame", 20)
        sym_spell.create_dictionary_entry("flam", 1)
        result = sym_spell.lookup("flam", Verbosity.TOP, 0, True)
        self.assertEqual(1, len(result))
        self.assertEqual("flam", result[0].term)

    def test_lookup_avoid_exact_match_early_exit(self):
        edit_distance_max = 2
        sym_spell = SymSpell(edit_distance_max, 7, 10)
        sym_spell.create_dictionary_entry("flame", 20)
        sym_spell.create_dictionary_entry("flam", 1)
        result = sym_spell.lookup("24th", Verbosity.ALL, edit_distance_max,
                                  ignore_token=r"\d{2}\w*\b")
        self.assertEqual(1, len(result))
        self.assertEqual("24th", result[0].term)

    def test_load_bigram_dictionary_invalid_path(self):
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        self.assertEqual(False, sym_spell.load_bigram_dictionary(
            "invalid/dictionary/path.txt", 0, 2))

    def test_loading_dictionary_from_fileobject(self):
        big_words_path = os.path.join(self.fortests_path, "big_words.txt")
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        with open(big_words_path, 'r', encoding='utf8') as file:
            self.assertEqual(True, sym_spell.create_dictionary(file))

    def test_load_bigram_dictionary_bad_dict(self):
        dictionary_path = os.path.join(self.fortests_path,
                                       "bad_dict.txt")
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        self.assertEqual(True, sym_spell.load_bigram_dictionary(
            dictionary_path, 0, 2))
        self.assertEqual(2, len(sym_spell.bigrams))
        self.assertEqual(12, sym_spell.bigrams["rtyu tyui"])
        self.assertEqual(13, sym_spell.bigrams["yuio uiop"])

    def test_load_bigram_dictionary_separator(self):
        dictionary_path = os.path.join(self.fortests_path,
                                       "separator_dict.txt")
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        self.assertEqual(True, sym_spell.load_bigram_dictionary(
            dictionary_path, 0, 1, "$"))
        self.assertEqual(5, len(sym_spell.bigrams))
        self.assertEqual(23135851162, sym_spell.bigrams["the"])
        self.assertEqual(13151942776, sym_spell.bigrams["of"])
        self.assertEqual(10956800, sym_spell.bigrams["abcs of"])
        self.assertEqual(10721728, sym_spell.bigrams["aaron and"])
        self.assertEqual(12997637966, sym_spell.bigrams["and"])

    def test_load_dictionary_invalid_path(self):
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        self.assertEqual(False, sym_spell.load_dictionary(
            "invalid/dictionary/path.txt", 0, 1))

    def test_load_dictionary_bad_dictionary(self):
        dictionary_path = os.path.join(self.fortests_path, "bad_dict.txt")
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        self.assertEqual(True, sym_spell.load_dictionary(
            dictionary_path, 0, 1))
        self.assertEqual(2, sym_spell.word_count)
        self.assertEqual(10, sym_spell.words["asdf"])
        self.assertEqual(12, sym_spell.words["sdfg"])

    def test_load_dictionary_separator(self):
        dictionary_path = os.path.join(self.fortests_path,
                                       "separator_dict.txt")
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        self.assertEqual(True, sym_spell.load_dictionary(
            dictionary_path, 0, 1, "$"))
        self.assertEqual(5, sym_spell.word_count)
        self.assertEqual(23135851162, sym_spell.words["the"])
        self.assertEqual(13151942776, sym_spell.words["of"])
        self.assertEqual(10956800, sym_spell.words["abcs of"])
        self.assertEqual(10721728, sym_spell.words["aaron and"])
        self.assertEqual(12997637966, sym_spell.words["and"])

    def test_lookup_should_replicate_noisy_results(self):
        query_path = os.path.join(self.fortests_path,
                                  "noisy_query_en_1000.txt")

        edit_distance_max = 2
        prefix_length = 7
        verbosity = Verbosity.CLOSEST
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(self.dictionary_path, 0, 1)

        test_list = []
        with open(query_path, "r") as infile:
            for line in infile.readlines():
                line_parts = line.rstrip().split(" ")
                if len(line_parts) >= 2:
                    test_list.append(line_parts[0])
        result_sum = 0
        for phrase in test_list:
            result_sum += len(sym_spell.lookup(phrase, verbosity,
                                               edit_distance_max))
        self.assertEqual(4945, result_sum)

    def test_lookup_compound(self):
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(self.dictionary_path, 0, 1)
        sym_spell.load_bigram_dictionary(self.bigram_path, 0, 2)

        typo = "whereis th elove"
        correction = "where is the love"
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)
        self.assertEqual(2, results[0].distance)
        self.assertEqual(585, results[0].count)

        typo = "the bigjest playrs"
        correction = "the biggest players"
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)
        self.assertEqual(2, results[0].distance)
        self.assertEqual(34, results[0].count)

        typo = "Can yu readthis"
        correction = "can you read this"
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)
        self.assertEqual(3, results[0].distance)
        self.assertEqual(11440, results[0].count)

        typo = ("whereis th elove hehad dated forImuch of thepast who "
                "couqdn'tread in sixthgrade and ins pired him")
        correction = ("where is the love he had dated for much of the past "
                      "who couldn't read in sixth grade and inspired him")
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)
        self.assertEqual(9, results[0].distance)
        self.assertEqual(0, results[0].count)

        typo = "in te dhird qarter oflast jear he hadlearned ofca sekretplan"
        correction = ("in the third quarter of last year he had learned of a "
                      "secret plan")
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)
        self.assertEqual(9, results[0].distance)
        self.assertEqual(0, results[0].count)

        typo = ("the bigjest playrs in te strogsommer film slatew ith plety "
                "of funn")
        correction = ("the biggest players in the strong summer film slate "
                      "with plenty of fun")
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)
        self.assertEqual(9, results[0].distance)
        self.assertEqual(0, results[0].count)

        typo = ("Can yu readthis messa ge despite thehorible sppelingmsitakes")
        correction = ("can you read this message despite the horrible "
                      "spelling mistakes")
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)
        self.assertEqual(10, results[0].distance)
        self.assertEqual(0, results[0].count)

    def test_lookup_compound_no_bigram(self):
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(self.dictionary_path, 0, 1)

        typo = "whereis th elove"
        correction = "whereas the love"
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)
        self.assertEqual(2, results[0].distance)
        self.assertEqual(64, results[0].count)

        typo = "the bigjest playrs"
        correction = "the biggest players"
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)
        self.assertEqual(2, results[0].distance)
        self.assertEqual(34, results[0].count)

        typo = "Can yu readthis"
        correction = "can you read this"
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)
        self.assertEqual(3, results[0].distance)
        self.assertEqual(3, results[0].count)

        typo = ("whereis th elove hehad dated forImuch of thepast who "
                "couqdn'tread in sixthgrade and ins pired him")
        correction = ("whereas the love head dated for much of the past who "
                      "couldn't read in sixth grade and inspired him")
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)
        self.assertEqual(9, results[0].distance)
        self.assertEqual(0, results[0].count)

        typo = "in te dhird qarter oflast jear he hadlearned ofca sekretplan"
        correction = ("in the third quarter of last year he had learned of "
                      "a secret plan")
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)
        self.assertEqual(9, results[0].distance)
        self.assertEqual(0, results[0].count)

        typo = ("the bigjest playrs in te strogsommer film slatew ith plety "
                "of funn")
        correction = ("the biggest players in the strong summer film slate "
                      "with plenty of fun")
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)
        self.assertEqual(9, results[0].distance)
        self.assertEqual(0, results[0].count)

        typo = ("Can yu readthis messa ge despite thehorible sppelingmsitakes")
        correction = ("can you read this message despite the horrible "
                      "spelling mistakes")
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)
        self.assertEqual(10, results[0].distance)
        self.assertEqual(0, results[0].count)

    def test_lookup_compound_only_combi(self):
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.create_dictionary_entry("steam", 1)
        sym_spell.create_dictionary_entry("machine", 1)

        typo = "ste am machie"
        correction = "steam machine"
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)

    def test_lookup_compound_no_suggestion(self):
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.create_dictionary_entry("steam", 1)
        sym_spell.create_dictionary_entry("machine", 1)

        typo = "qwer erty ytui a"
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(typo, results[0].term)

    def test_lookup_compound_replaced_words(self):
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(self.dictionary_path, 0, 1)
        sym_spell.load_bigram_dictionary(self.bigram_path, 0, 2)

        typo = ("whereis th elove hehad dated forImuch of thepast who "
                "couqdn'tread in sixthgrade and ins pired him")
        correction = ("where is the love he had dated for much of the past "
                      "who couldn't read in sixth grade and inspired him")
        replacement_1 = {
            "whereis": "where is",
            "th": "the",
            "elove": "love",
            "hehad": "he had",
            "forimuch": "for much",
            "thepast": "the past",
            "couqdn'tread": "couldn't read",
            "sixthgrade": "sixth grade",
            "ins": "in"}
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(len(replacement_1), len(sym_spell.replaced_words))
        for k, v in replacement_1.items():
            self.assertEqual(v, sym_spell.replaced_words[k].term)

        typo = "in te dhird qarter oflast jear he hadlearned ofca sekretplan"
        correction = ("in the third quarter of last year he had learned of a "
                      "secret plan")
        replacement_2 = {
            "te": "the",
            "dhird": "third",
            "qarter": "quarter",
            "oflast": "of last",
            "jear": "year",
            "hadlearned": "had learned",
            "ofca": "of a",
            "sekretplan": "secret plan"}
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(len(replacement_1) + len(replacement_2),
                         len(sym_spell.replaced_words))
        for k, v in replacement_2.items():
            self.assertEqual(v, sym_spell.replaced_words[k].term)

        typo = ("the bigjest playrs in te strogsommer film slatew ith plety "
                "of funn")
        correction = ("the biggest players in the strong summer film slate "
                      "with plenty of fun")
        replacement_3 = {
            "bigjest": "biggest",
            "playrs": "players",
            "strogsommer": "strong summer",
            "slatew": "slate",
            "ith": "with",
            "plety": "plenty",
            "funn": "fun"}
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(len(replacement_1) + len(replacement_2) +
                         len(replacement_3), len(sym_spell.replaced_words))
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)
        for k, v in replacement_3.items():
            self.assertEqual(v, sym_spell.replaced_words[k].term)

    def test_lookup_compound_replaced_words_no_bigram(self):
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(self.dictionary_path, 0, 1)

        typo = ("whereis th elove hehad dated forImuch of thepast who "
                "couqdn'tread in sixthgrade and ins pired him")
        correction = ("whereas the love head dated for much of the past who "
                      "couldn't read in sixth grade and inspired him")
        replacement_1 = {
            "whereis": "whereas",
            "th": "the",
            "elove": "love",
            "hehad": "head",
            "forimuch": "for much",
            "thepast": "the past",
            "couqdn'tread": "couldn't read",
            "sixthgrade": "sixth grade",
            "ins": "in"}
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(len(replacement_1), len(sym_spell.replaced_words))
        for k, v in replacement_1.items():
            self.assertEqual(v, sym_spell.replaced_words[k].term)

        typo = "in te dhird qarter oflast jear he hadlearned ofca sekretplan"
        correction = ("in the third quarter of last year he had learned of a "
                      "secret plan")
        replacement_2 = {
            "te": "the",
            "dhird": "third",
            "qarter": "quarter",
            "oflast": "of last",
            "jear": "year",
            "hadlearned": "had learned",
            "ofca": "of a",
            "sekretplan": "secret plan"}
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(len(replacement_1) + len(replacement_2),
                         len(sym_spell.replaced_words))
        for k, v in replacement_2.items():
            self.assertEqual(v, sym_spell.replaced_words[k].term)

        typo = ("the bigjest playrs in te strogsommer film slatew ith plety "
                "of funn")
        correction = ("the biggest players in the strong summer film slate "
                      "with plenty of fun")
        replacement_3 = {
            "bigjest": "biggest",
            "playrs": "players",
            "strogsommer": "strong summer",
            "slatew": "slate",
            "ith": "with",
            "plety": "plenty",
            "funn": "fun"}
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(len(replacement_1) + len(replacement_2) +
                         len(replacement_3), len(sym_spell.replaced_words))
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)
        for k, v in replacement_3.items():
            self.assertEqual(v, sym_spell.replaced_words[k].term)

    def test_lookup_compound_ignore_non_words(self):
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(self.dictionary_path, 0, 1)
        sym_spell.load_bigram_dictionary(self.bigram_path, 0, 2)

        typo = ("whereis th elove 123 hehad dated forImuch of THEPAST who "
                "couqdn'tread in SIXTHgrade and ins pired him")
        correction = ("where is the love 123 he had dated for much of THEPAST "
                      "who couldn't read in sixth grade and inspired him")
        results = sym_spell.lookup_compound(typo, edit_distance_max, True)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)

        typo = "in te DHIRD 1 qarter oflast jear he hadlearned ofca sekretplan"
        correction = ("in the DHIRD 1 quarter of last year he had learned "
                      "of a secret plan")
        results = sym_spell.lookup_compound(typo, edit_distance_max, True)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)

        typo = ("the bigjest playrs in te stroGSOmmer film slatew ith PLETY "
                "of 12 funn")
        correction = ("the biggest players in the strong summer film slate "
                      "with PLETY of 12 fun")
        results = sym_spell.lookup_compound(typo, edit_distance_max, True)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)

        typo = ("Can yu readtHIS messa ge despite thehorible 1234 "
                "sppelingmsitakes")
        correction = ("can you read this message despite the horrible 1234 "
                      "spelling mistakes")
        results = sym_spell.lookup_compound(typo, edit_distance_max, True)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)

        typo = ("Can yu readtHIS messa ge despite thehorible AB1234 "
                "sppelingmsitakes")
        correction = ("can you read this message despite the horrible AB1234 "
                      "spelling mistakes")
        results = sym_spell.lookup_compound(typo, edit_distance_max, True)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)

        typo = "PI on leave, arrange Co-I to do screening"
        correction = "PI on leave arrange co i to do screening"
        results = sym_spell.lookup_compound(typo, edit_distance_max, True)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)

    def test_lookup_compound_ignore_non_words_no_bigram(self):
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(self.dictionary_path, 0, 1)

        typo = ("whereis th elove 123 hehad dated forImuch of THEPAST who "
                "couqdn'tread in SIXTHgrade and ins pired him")
        correction = ("whereas the love 123 head dated for much of THEPAST "
                      "who couldn't read in sixth grade and inspired him")
        results = sym_spell.lookup_compound(typo, edit_distance_max, True)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)

        typo = "in te DHIRD 1 qarter oflast jear he hadlearned ofca sekretplan"
        correction = ("in the DHIRD 1 quarter of last year he had learned "
                      "of a secret plan")
        results = sym_spell.lookup_compound(typo, edit_distance_max, True)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)

        typo = ("the bigjest playrs in te stroGSOmmer film slatew ith PLETY "
                "of 12 funn")
        correction = ("the biggest players in the strong summer film slate "
                      "with PLETY of 12 fun")
        results = sym_spell.lookup_compound(typo, edit_distance_max, True)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)

        typo = ("Can yu readtHIS messa ge despite thehorible 1234 "
                "sppelingmsitakes")
        correction = ("can you read this message despite the horrible 1234 "
                      "spelling mistakes")
        results = sym_spell.lookup_compound(typo, edit_distance_max, True)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)

        typo = ("Can yu readtHIS messa ge despite thehorible AB1234 "
                "sppelingmsitakes")
        correction = ("can you read this message despite the horrible AB1234 "
                      "spelling mistakes")
        results = sym_spell.lookup_compound(typo, edit_distance_max, True)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)

        typo = "PI on leave, arrange Co-I to do screening"
        correction = "PI on leave arrange co i to do screening"
        results = sym_spell.lookup_compound(typo, edit_distance_max, True)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)

    def test_load_dictionary_encoding(self):
        dictionary_path = os.path.join(self.fortests_path, "non_en_dict.txt")

        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(dictionary_path, 0, 1, encoding="utf-8")

        result = sym_spell.lookup("АБ", Verbosity.TOP, 2)
        self.assertEqual(1, len(result))
        self.assertEqual("АБИ", result[0].term)

    def test_word_segmentation(self):
        edit_distance_max = 0
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(self.dictionary_path, 0, 1)

        typo = "thequickbrownfoxjumpsoverthelazydog"
        correction = "the quick brown fox jumps over the lazy dog"
        result = sym_spell.word_segmentation(typo)
        self.assertEqual(correction, result.corrected_string)

        typo = "itwasabrightcolddayinaprilandtheclockswerestrikingthirteen"
        correction = ("it was a bright cold day in april and the clocks "
                      "were striking thirteen")
        result = sym_spell.word_segmentation(typo)
        self.assertEqual(correction, result[1])

        typo = ("itwasthebestoftimesitwastheworstoftimesitwastheageofwisdom"
                "itwastheageoffoolishness")
        correction = ("it was the best of times it was the worst of times "
                      "it was the age of wisdom it was the age of foolishness")
        result = sym_spell.word_segmentation(typo)
        self.assertEqual(correction, result[1])

    def test_word_segmentation_ignore_token(self):
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(self.dictionary_path, 0, 1)

        typo = "24th december"
        result = sym_spell.word_segmentation(typo, ignore_token=r"\d{2}\w*\b")
        self.assertEqual(typo, result.corrected_string)

    def test_word_segmentation_with_arguments(self):
        edit_distance_max = 0
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(self.dictionary_path, 0, 1)

        typo = "thequickbrownfoxjumpsoverthelazydog"
        correction = "the quick brown fox jumps over the lazy dog"
        result = sym_spell.word_segmentation(typo, edit_distance_max, 11)
        self.assertEqual(correction, result.corrected_string)

        typo = "itwasabrightcolddayinaprilandtheclockswerestrikingthirteen"
        correction = ("it was a bright cold day in april and the clocks "
                      "were striking thirteen")
        result = sym_spell.word_segmentation(typo, edit_distance_max, 11)
        self.assertEqual(correction, result.corrected_string)

        typo = (" itwasthebestoftimesitwastheworstoftimesitwastheageofwisdom"
                "itwastheageoffoolishness")
        correction = ("it was the best of times it was the worst of times "
                      "it was the age of wisdom it was the age of foolishness")
        result = sym_spell.word_segmentation(typo, edit_distance_max, 11)
        self.assertEqual(correction, result.corrected_string)

    def test_suggest_item(self):
        si_1 = SuggestItem("asdf", 12, 34)
        si_2 = SuggestItem("sdfg", 12, 34)
        si_3 = SuggestItem("dfgh", 56, 78)

        self.assertTrue(si_1 == si_2)
        self.assertFalse(si_2 == si_3)

        self.assertEqual("asdf", si_1.term)
        si_1.term = "qwer"
        self.assertEqual("qwer", si_1.term)

        self.assertEqual(34, si_1.count)
        si_1.count = 78
        self.assertEqual(78, si_1.count)

        self.assertEqual("qwer, 12, 78", str(si_1))

    def test_create_dictionary_invalid_path(self):
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        self.assertEqual(False, sym_spell.create_dictionary(
            "invalid/dictionary/path.txt"))

    def test_create_dictionary(self):
        corpus_path = os.path.join(self.fortests_path, "big_modified.txt")
        big_words_path = os.path.join(self.fortests_path, "big_words.txt")

        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.create_dictionary(corpus_path, encoding="utf-8")

        num_lines = 0
        with open(big_words_path, "r") as infile:
            for line in infile:
                key, count = line.rstrip().split(" ")
                self.assertEqual(int(count), sym_spell.words[key])
                num_lines += 1
        self.assertEqual(num_lines, sym_spell.word_count)

    def test_pickle_uncompressed(self):
        pickle_path = os.path.join(self.fortests_path, "dictionary.pickle")
        is_compressed = False
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(self.dictionary_path, 0, 1)
        sym_spell.save_pickle(pickle_path, is_compressed)

        sym_spell_2 = SymSpell(edit_distance_max, prefix_length)
        sym_spell_2.load_pickle(pickle_path, is_compressed)
        self.assertEqual(sym_spell.deletes, sym_spell_2.deletes)
        self.assertEqual(sym_spell.words, sym_spell_2.words)
        self.assertEqual(sym_spell._max_length, sym_spell_2._max_length)
        os.remove(pickle_path)

    def test_pickle_compressed(self):
        pickle_path = os.path.join(self.fortests_path, "dictionary.pickle")
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(self.dictionary_path, 0, 1)
        sym_spell.save_pickle(pickle_path)

        sym_spell_2 = SymSpell(edit_distance_max, prefix_length)
        sym_spell_2.load_pickle(pickle_path)
        self.assertEqual(sym_spell.deletes, sym_spell_2.deletes)
        self.assertEqual(sym_spell.words, sym_spell_2.words)
        self.assertEqual(sym_spell._max_length, sym_spell_2._max_length)
        os.remove(pickle_path)

    def test_pickle_invalid(self):
        pickle_path = os.path.join(self.fortests_path, "dictionary.pickle")
        is_compressed = False
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)

        pickle_data = {
            "deletes": {},
            "words": {},
            "max_length": 0,
            "data_version": -1
        }
        with open(pickle_path, "wb") as f:
            pickle.dump(pickle_data, f)
        self.assertFalse(sym_spell.load_pickle(pickle_path, is_compressed))
        os.remove(pickle_path)

        pickle_data = {
            "deletes": {},
            "words": {},
            "max_length": 0
        }
        with open(pickle_path, "wb") as f:
            pickle.dump(pickle_data, f)
        self.assertFalse(sym_spell.load_pickle(pickle_path, is_compressed))
        os.remove(pickle_path)

    def test_delete_dictionary_entry(self):
        sym_spell = SymSpell()
        sym_spell.create_dictionary_entry("stea", 1)
        sym_spell.create_dictionary_entry("steama", 2)
        sym_spell.create_dictionary_entry("steem", 3)

        result = sym_spell.lookup("steama", Verbosity.TOP, 2)
        self.assertEqual(1, len(result))
        self.assertEqual("steama", result[0].term)
        self.assertEqual(len("steama"), sym_spell._max_length)

        self.assertTrue(sym_spell.delete_dictionary_entry("steama"))
        self.assertFalse("steama" in sym_spell.words)
        self.assertEqual(len("steem"), sym_spell._max_length)
        result = sym_spell.lookup("steama", Verbosity.TOP, 2)
        self.assertEqual(1, len(result))
        self.assertEqual("steem", result[0].term)

        self.assertTrue(sym_spell.delete_dictionary_entry("stea"))
        self.assertFalse("stea" in sym_spell.words)
        self.assertEqual(len("steem"), sym_spell._max_length)
        result = sym_spell.lookup("steama", Verbosity.TOP, 2)
        self.assertEqual(1, len(result))
        self.assertEqual("steem", result[0].term)

    def test_delete_dictionary_entry_invalid_word(self):
        sym_spell = SymSpell()
        sym_spell.create_dictionary_entry("stea", 1)
        sym_spell.create_dictionary_entry("steama", 2)
        sym_spell.create_dictionary_entry("steem", 3)

        result = sym_spell.lookup("steama", Verbosity.TOP, 2)
        self.assertEqual(1, len(result))
        self.assertEqual("steama", result[0].term)
        self.assertEqual(len("steama"), sym_spell._max_length)

        self.assertFalse(sym_spell.delete_dictionary_entry("steamab"))
        result = sym_spell.lookup("steama", Verbosity.TOP, 2)
        self.assertEqual(1, len(result))
        self.assertEqual("steama", result[0].term)
        self.assertEqual(len("steama"), sym_spell._max_length)

    def test_lookup_transfer_casing(self):
        sym_spell = SymSpell()
        sym_spell.create_dictionary_entry("steam", 4)
        result = sym_spell.lookup("Stream", Verbosity.TOP, 2,
                                  transfer_casing=True)
        self.assertEqual("Steam", result[0].term)

        sym_spell = SymSpell()
        sym_spell.create_dictionary_entry("steam", 4)
        result = sym_spell.lookup("StreaM", Verbosity.TOP, 2,
                                  transfer_casing=True)
        self.assertEqual("SteaM", result[0].term)

        sym_spell = SymSpell()
        sym_spell.create_dictionary_entry("steam", 4)
        result = sym_spell.lookup("STREAM", Verbosity.TOP, 2,
                                  transfer_casing=True)
        self.assertEqual("STEAM", result[0].term)

        sym_spell = SymSpell()
        sym_spell.create_dictionary_entry("i", 4)
        result = sym_spell.lookup("I", Verbosity.TOP, 2,
                                  transfer_casing=True)
        self.assertEqual("I", result[0].term)

    def test_lookup_compound_transfer_casing(self):
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(self.dictionary_path, 0, 1)
        sym_spell.load_bigram_dictionary(self.bigram_path, 0, 2)

        typo = ("Whereis th elove hehaD Dated forImuch of thepast who "
                "couqdn'tread in sixthgrade AND ins pired him")
        correction = ("Where is the love he haD Dated for much of the past "
                      "who couldn't read in sixth grade AND inspired him")

        results = sym_spell.lookup_compound(typo, edit_distance_max,
                                            transfer_casing=True)
        self.assertEqual(correction, results[0].term)

    def test_lookup_compound_transfer_casing_no_bigram(self):
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(self.dictionary_path, 0, 1)

        typo = ("Whereis th elove hehaD Dated forImuch of thepast who "
                "couqdn'tread in sixthgrade AND ins pired him")
        correction = ("Whereas the love heaD Dated for much of the past "
                      "who couldn't read in sixth grade AND inspired him")

        results = sym_spell.lookup_compound(typo, edit_distance_max,
                                            transfer_casing=True)
        self.assertEqual(correction, results[0].term)

    def test_lookup_compound_transfer_casing_ignore_nonwords(self):
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(self.dictionary_path, 0, 1)
        sym_spell.load_bigram_dictionary(self.bigram_path, 0, 2)

        typo = ("Whereis th elove hehaD Dated FOREEVER forImuch of thepast who"
                " couqdn'tread in sixthgrade AND ins pired him")
        correction = ("Where is the love he haD Dated FOREEVER for much of the"
                      " past who couldn't read in sixth grade AND inspired "
                      "him")

        results = sym_spell.lookup_compound(typo, edit_distance_max,
                                            ignore_non_words=True,
                                            transfer_casing=True)
        self.assertEqual(correction, results[0].term)

    def test_lookup_compound_transfer_casing_ignore_nonwords_no_bigram(self):
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(self.dictionary_path, 0, 1)

        typo = ("Whereis th elove hehaD Dated FOREEVER forImuch of thepast who"
                " couqdn'tread in sixthgrade AND ins pired him")
        correction = ("Whereas the love heaD Dated FOREEVER for much of the"
                      " past who couldn't read in sixth grade AND inspired "
                      "him")

        results = sym_spell.lookup_compound(typo, edit_distance_max,
                                            ignore_non_words=True,
                                            transfer_casing=True)
        self.assertEqual(correction, results[0].term)