Python nltk.metrics.distance.edit_distance() Examples

The following are 12 code examples of nltk.metrics.distance.edit_distance(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module nltk.metrics.distance , or try the search function

Example #1

Source File: test_distance.py From persephone with Apache License 2.0

6 votes

def seq_cases():
    """ Cases are of the form (reference, hypothesis, substitution_cost, dist).
    """
    hardcoded_seqs = [("", "", 1, 0),
                      ("ab", "ad", 1, 1),
                      ("abde", "abcde", 1, 1),
                      ([1,3,5], [], 1, 3),
                      ([1,3,5], [3], 1, 2),
                     ]

    # Here we assume the nltk.metrics.distance implementation is correct.
    generated_seqs = []
    for length in range(25):
        for _ in range(10):
            length2 = random.randint(0, int(length*1.5))
            s1 = rand_str(length)
            s2 = rand_str(length2)
            sub_cost = random.randint(0, 3)
            dist = distance.edit_distance(s1, s2, substitution_cost=sub_cost)
            generated_seqs.append((s1, s2, sub_cost, dist))

    return hardcoded_seqs + generated_seqs

Example #2

Source File: decoder.py From g-tensorflow-models with Apache License 2.0

6 votes

def wer(self, decode, target):
    """Computes the Word Error Rate (WER).

    WER is defined as the edit distance between the two provided sentences after
    tokenizing to words.

    Args:
      decode: string of the decoded output.
      target: a string for the ground truth label.

    Returns:
      A float number for the WER of the current decode-target pair.
    """
    # Map each word to a new char.
    words = set(decode.split() + target.split())
    word2char = dict(zip(words, range(len(words))))

    new_decode = [chr(word2char[w]) for w in decode.split()]
    new_target = [chr(word2char[w]) for w in target.split()]

    return distance.edit_distance(''.join(new_decode), ''.join(new_target))

Example #3

Source File: decoder.py From models with Apache License 2.0

6 votes

def wer(self, decode, target):
    """Computes the Word Error Rate (WER).

    WER is defined as the edit distance between the two provided sentences after
    tokenizing to words.

    Args:
      decode: string of the decoded output.
      target: a string for the ground truth label.

    Returns:
      A float number for the WER of the current decode-target pair.
    """
    # Map each word to a new char.
    words = set(decode.split() + target.split())
    word2char = dict(zip(words, range(len(words))))

    new_decode = [chr(word2char[w]) for w in decode.split()]
    new_target = [chr(word2char[w]) for w in target.split()]

    return distance.edit_distance(''.join(new_decode), ''.join(new_target))

Example #4

Source File: decoder.py From multilabel-image-classification-tensorflow with MIT License

6 votes

def wer(self, decode, target):
    """Computes the Word Error Rate (WER).

    WER is defined as the edit distance between the two provided sentences after
    tokenizing to words.

    Args:
      decode: string of the decoded output.
      target: a string for the ground truth label.

    Returns:
      A float number for the WER of the current decode-target pair.
    """
    # Map each word to a new char.
    words = set(decode.split() + target.split())
    word2char = dict(zip(words, range(len(words))))

    new_decode = [chr(word2char[w]) for w in decode.split()]
    new_target = [chr(word2char[w]) for w in target.split()]

    return distance.edit_distance(''.join(new_decode), ''.join(new_target))

Example #5

Source File: methods_training_graph.py From Sentence-Simplification-ACL14 with BSD 3-Clause "New" or "Revised" License

5 votes

def process_rel_candidate_for_drop_led(relnode_candidate, filtered_mod_pos, nodeset, simple_sentences, main_sent_dict, boxer_graph, opr_drop_rel):
    simple_sentence = " ".join(simple_sentences)
    
    sentence_before_drop = boxer_graph.extract_main_sentence(nodeset, main_sent_dict, filtered_mod_pos)
    edit_dist_before_drop = edit_distance(sentence_before_drop.split(), simple_sentence.split())        
    
    temp_nodeset, temp_filtered_mod_pos = boxer_graph.drop_relation(nodeset, relnode_candidate, filtered_mod_pos)
    sentence_after_drop = boxer_graph.extract_main_sentence(temp_nodeset, main_sent_dict, temp_filtered_mod_pos)
    edit_dist_after_drop = edit_distance(sentence_after_drop.split(), simple_sentence.split())
    
    isDrop = compare_edit_distance(opr_drop_rel, edit_dist_after_drop, edit_dist_before_drop)
    return isDrop

# functions : Drop-MOD Candidate

Example #6

Source File: methods_training_graph.py From Sentence-Simplification-ACL14 with BSD 3-Clause "New" or "Revised" License

5 votes

def process_mod_candidate_for_drop_led(modcand_to_process, filtered_mod_pos, nodeset, simple_sentences, main_sent_dict, boxer_graph, opr_drop_mod):
    simple_sentence = " ".join(simple_sentences)
    
    sentence_before_drop = boxer_graph.extract_main_sentence(nodeset, main_sent_dict, filtered_mod_pos)
    edit_dist_before_drop = edit_distance(sentence_before_drop.split(), simple_sentence.split())
    
    modcand_position_to_process = modcand_to_process[0]
    temp_filtered_mod_pos = filtered_mod_pos[:]+[modcand_position_to_process]
    sentence_after_drop = boxer_graph.extract_main_sentence(nodeset, main_sent_dict, temp_filtered_mod_pos)
    edit_dist_after_drop = edit_distance(sentence_after_drop.split(), simple_sentence.split())
    
    isDrop = compare_edit_distance(opr_drop_mod, edit_dist_after_drop, edit_dist_before_drop)
    return isDrop

# functions : Drop-OOD Candidate

Example #7

Source File: methods_training_graph.py From Sentence-Simplification-ACL14 with BSD 3-Clause "New" or "Revised" License

5 votes

def process_ood_candidate_for_drop_led(oodnode_candidate, filtered_mod_pos, nodeset, simple_sentences, main_sent_dict, boxer_graph, opr_drop_ood):
    simple_sentence = " ".join(simple_sentences)
    
    sentence_before_drop = boxer_graph.extract_main_sentence(nodeset, main_sent_dict, filtered_mod_pos)
    edit_dist_before_drop = edit_distance(sentence_before_drop.split(), simple_sentence.split())
    
    temp_nodeset = nodeset[:]
    temp_nodeset.remove(oodnode_candidate)
    sentence_after_drop = boxer_graph.extract_main_sentence(temp_nodeset, main_sent_dict, filtered_mod_pos)
    edit_dist_after_drop = edit_distance(sentence_after_drop.split(), simple_sentence.split())

    isDrop = compare_edit_distance(opr_drop_ood, edit_dist_after_drop, edit_dist_before_drop)
    return isDrop

Example #8

Source File: utils.py From persephone with Apache License 2.0

5 votes

def batch_per(hyps: Sequence[Sequence[T]],
              refs: Sequence[Sequence[T]]) -> float:
    """ Calculates the phoneme error rate of a batch."""

    macro_per = 0.0
    for i in range(len(hyps)):
        ref = [phn_i for phn_i in refs[i] if phn_i != 0]
        hyp = [phn_i for phn_i in hyps[i] if phn_i != 0]
        macro_per += distance.edit_distance(ref, hyp)/len(ref)
    return macro_per/len(hyps)

Example #9

Source File: decoder.py From g-tensorflow-models with Apache License 2.0

5 votes

def cer(self, decode, target):
    """Computes the Character Error Rate (CER).

    CER is defined as the edit distance between the two given strings.

    Args:
      decode: a string of the decoded output.
      target: a string for the ground truth label.

    Returns:
      A float number denoting the CER for the current sentence pair.
    """
    return distance.edit_distance(decode, target)

Example #10

Source File: decoder.py From models with Apache License 2.0

5 votes

def cer(self, decode, target):
    """Computes the Character Error Rate (CER).

    CER is defined as the edit distance between the two given strings.

    Args:
      decode: a string of the decoded output.
      target: a string for the ground truth label.

    Returns:
      A float number denoting the CER for the current sentence pair.
    """
    return distance.edit_distance(decode, target)

Example #11

Source File: matcher.py From text-matcher with GNU General Public License v3.0

5 votes

def edit_ratio(self, wordA, wordB):
        """ Computes the number of edits required to transform one
        (stemmed already, probably) word into another word, and
        adjusts for the average number of letters in each.

        Examples:
        color, colour: 0.1818181818
        theater, theatre: 0.2857
        day, today: 0.5
        foobar, foo56bar: 0.2857
        """
        distance = editDistance(wordA, wordB)
        averageLength = (len(wordA) + len(wordB))/2
        return distance/averageLength

Example #12

Source File: decoder.py From multilabel-image-classification-tensorflow with MIT License

5 votes

def cer(self, decode, target):
    """Computes the Character Error Rate (CER).

    CER is defined as the edit distance between the two given strings.

    Args:
      decode: a string of the decoded output.
      target: a string for the ground truth label.

    Returns:
      A float number denoting the CER for the current sentence pair.
    """
    return distance.edit_distance(decode, target)