# Python nltk.compat.Fraction() Examples

The following are
code examples for showing how to use *nltk.compat.Fraction()*.
They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1

Project: OpenBottle Author: xiaozhuchacha File: bleu_score.py MIT License | 5 votes |

def method2(self, p_n, *args, **kwargs): """ Smoothing method 2: Add 1 to both numerator and denominator from Chin-Yew Lin and Franz Josef Och (2004) Automatic evaluation of machine translation quality using longest common subsequence and skip-bigram statistics. In ACL04. """ return [Fraction(p_i.numerator + 1, p_i.denominator + 1, _normalize=False) for p_i in p_n]

Example 2

Project: OpenBottle Author: xiaozhuchacha File: test_2x_compat.py MIT License | 5 votes |

def test_unnoramlize_fraction(self): from fractions import Fraction as NativePythonFraction from nltk.compat import Fraction as NLTKFraction # The native fraction should throw a TypeError in Python < 3.5 with self.assertRaises(TypeError): NativePythonFraction(0, 1000, _normalize=False) # Using nltk.compat.Fraction in Python < 3.5 compat_frac = NLTKFraction(0, 1000, _normalize=False) # The numerator and denominator does not change. assert compat_frac.numerator == 0 assert compat_frac.denominator == 1000 # The floating point value remains normalized. assert float(compat_frac) == 0.0 # Checks that the division is not divided by # # by greatest common divisor (gcd). six_twelve = NLTKFraction(6, 12, _normalize=False) assert six_twelve.numerator == 6 assert six_twelve.denominator == 12 one_two = NLTKFraction(1, 2, _normalize=False) assert one_two.numerator == 1 assert one_two.denominator == 2 # Checks against the native fraction. six_twelve_original = NativePythonFraction(6, 12) # Checks that rational values of one_two and six_twelve is the same. assert float(one_two) == float(six_twelve) == float(six_twelve_original) # Checks that the fraction does get normalized, even when # _normalize == False when numerator is using native # fractions.Fraction.from_float assert NLTKFraction(3.142, _normalize=False) == NativePythonFraction(3.142)

Example 3

Project: OpenBottle Author: xiaozhuchacha File: bleu_score.py MIT License | 5 votes |

Example 4

Project: OpenBottle Author: xiaozhuchacha File: test_2x_compat.py MIT License | 5 votes |

Example 5

Project: Health-Checker Author: KriAga File: bleu_score.py MIT License | 5 votes |

Example 6

Project: Health-Checker Author: KriAga File: test_2x_compat.py MIT License | 5 votes |

Example 7

Project: NMT-RDPG Author: MultiPath File: bleu.py MIT License | 5 votes |

Example 8

Project: NQG Author: magic282 File: nltk_bleu_score.py GNU General Public License v3.0 | 5 votes |

Example 9

Project: seq2seq-keyphrase-pytorch Author: memray File: bleu_score(3.2).py Apache License 2.0 | 5 votes |

Example 10

Project: honours_project Author: JFriel File: bleu_score.py GNU General Public License v3.0 | 5 votes |

Example 11

Project: honours_project Author: JFriel File: test_2x_compat.py GNU General Public License v3.0 | 5 votes |

Example 12

Project: honours_project Author: JFriel File: bleu_score.py GNU General Public License v3.0 | 5 votes |

Example 13

Project: honours_project Author: JFriel File: test_2x_compat.py GNU General Public License v3.0 | 5 votes |

Example 14

Project: aop-helpFinder Author: jecarvaill File: bleu_score.py GNU General Public License v3.0 | 5 votes |

Example 15

Project: aop-helpFinder Author: jecarvaill File: test_2x_compat.py GNU General Public License v3.0 | 5 votes |

Example 16

Project: serverless-chatbots-workshop Author: datteswararao File: bleu_score.py Apache License 2.0 | 5 votes |

Example 17

Project: serverless-chatbots-workshop Author: datteswararao File: test_2x_compat.py Apache License 2.0 | 5 votes |

Example 18

Project: serverless-chatbots-workshop Author: datteswararao File: bleu_score.py Apache License 2.0 | 5 votes |

Example 19

Project: serverless-chatbots-workshop Author: datteswararao File: test_2x_compat.py Apache License 2.0 | 5 votes |

Example 20

Project: dl4mt-nonauto Author: nyu-dl File: utils.py BSD 3-Clause "New" or "Revised" License | 4 votes |

def corpus_bleu(list_of_references, hypotheses, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=None, auto_reweigh=False, emulate_multibleu=False): p_numerators = Counter() # Key = ngram order, and value = no. of ngram matches. p_denominators = Counter() # Key = ngram order, and value = no. of ngram in ref. hyp_lengths, ref_lengths = 0, 0 if len(list_of_references) != len(hypotheses): print ("The number of hypotheses and their reference(s) should be the same") return (0, (0, 0, 0, 0), 0, 0, 0) # Iterate through each hypothesis and their corresponding references. for references, hypothesis in zip(list_of_references, hypotheses): # For each order of ngram, calculate the numerator and # denominator for the corpus-level modified precision. for i, _ in enumerate(weights, start=1): p_i = modified_precision(references, hypothesis, i) p_numerators[i] += p_i.numerator p_denominators[i] += p_i.denominator # Calculate the hypothesis length and the closest reference length. # Adds them to the corpus-level hypothesis and reference counts. hyp_len = len(hypothesis) hyp_lengths += hyp_len ref_lengths += closest_ref_length(references, hyp_len) # Calculate corpus-level brevity penalty. bp = brevity_penalty(ref_lengths, hyp_lengths) # Uniformly re-weighting based on maximum hypothesis lengths if largest # order of n-grams < 4 and weights is set at default. if auto_reweigh: if hyp_lengths < 4 and weights == (0.25, 0.25, 0.25, 0.25): weights = ( 1 / hyp_lengths ,) * hyp_lengths # Collects the various precision values for the different ngram orders. p_n = [Fraction(p_numerators[i], p_denominators[i], _normalize=False) for i, _ in enumerate(weights, start=1)] p_n_ = [xx.numerator / xx.denominator * 100 for xx in p_n] # Returns 0 if there's no matching n-grams # We only need to check for p_numerators[1] == 0, since if there's # no unigrams, there won't be any higher order ngrams. if p_numerators[1] == 0: return (0, (0, 0, 0, 0), 0, 0, 0) # If there's no smoothing, set use method0 from SmoothinFunction class. if not smoothing_function: smoothing_function = SmoothingFunction().method0 # Smoothen the modified precision. # Note: smoothing_function() may convert values into floats; # it tries to retain the Fraction object as much as the # smoothing method allows. p_n = smoothing_function(p_n, references=references, hypothesis=hypothesis, hyp_len=hyp_len, emulate_multibleu=emulate_multibleu) s = (w * math.log(p_i) for i, (w, p_i) in enumerate(zip(weights, p_n))) s = bp * math.exp(math.fsum(s)) * 100 final_bleu = round(s, 4) if emulate_multibleu else s return (final_bleu, p_n_, bp, ref_lengths, hyp_lengths)

Example 21

Project: aop-helpFinder Author: jecarvaill File: nist_score.py GNU General Public License v3.0 | 4 votes |

def corpus_nist(list_of_references, hypotheses, n=5): """ Calculate a single corpus-level NIST score (aka. system-level BLEU) for all the hypotheses and their respective references. :param references: a corpus of lists of reference sentences, w.r.t. hypotheses :type references: list(list(list(str))) :param hypotheses: a list of hypothesis sentences :type hypotheses: list(list(str)) :param n: highest n-gram order :type n: int """ # Before proceeding to compute NIST, perform sanity checks. assert len(list_of_references) == len(hypotheses), "The number of hypotheses and their reference(s) should be the same" p_numerators = Counter() # Key = ngram order, and value = no. of ngram matches. p_denominators = Counter() # Key = ngram order, and value = no. of ngram in ref. sysoutput_lengths = Counter() # Key = ngram order, and value = no. of ngram in hyp. hyp_lengths, ref_lengths = 0, 0 # Iterate through each hypothesis and their corresponding references. for references, hypothesis in zip(list_of_references, hypotheses): # For each order of ngram, calculate the numerator and # denominator for the corpus-level modified precision. for i, _ in enumerate(range(1,n+1)): p_i = modified_precision(references, hypothesis, i) p_numerators[i] += p_i.numerator p_denominators[i] += p_i.denominator # Adds the no. of ngrams in the hypothesis. sysoutput_lengths[i] += len(hypothesis) - (i - 1) # Calculate the hypothesis length and the closest reference length. # Adds them to the corpus-level hypothesis and reference counts. hyp_len = len(hypothesis) hyp_lengths += hyp_len ref_lengths += closest_ref_length(references, hyp_len) # Calculate corpus-level brevity penalty. bp = nist_length_penalty(ref_lengths, hyp_lengths) # Collects the various precision values for the different ngram orders. p_n = [Fraction(p_numerators[i], p_denominators[i], _normalize=False) for i, _ in enumerate(range(1,n+1))] # Eqn 2 in Doddington (2002): # Info(w_1 ... w_n) = log_2 [ (# of occurrences of w_1 ... w_n-1) / (# of occurrences of w_1 ... w_n) ] info = [0 if p_n[i].numerator == 0 or p_n[i+1].numerator == 0 # Handles math domain and zero division errors. else math.log(p_n[i].numerator / p_n[i+1].numerator) for i in range(len(p_n)-1)] return sum(info_i/sysoutput_lengths[i] for i, info_i in enumerate(info)) * bp