Python nltk.compat.Fraction() Examples

The following are code examples for showing how to use nltk.compat.Fraction(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: OpenBottle   Author: xiaozhuchacha   File: bleu_score.py    MIT License 5 votes vote down vote up
def method2(self, p_n, *args, **kwargs):
        """
        Smoothing method 2: Add 1 to both numerator and denominator from
        Chin-Yew Lin and Franz Josef Och (2004) Automatic evaluation of
        machine translation quality using longest common subsequence and
        skip-bigram statistics. In ACL04.
        """
        return [Fraction(p_i.numerator + 1, p_i.denominator + 1, _normalize=False) for p_i in p_n] 
Example 2
Project: OpenBottle   Author: xiaozhuchacha   File: test_2x_compat.py    MIT License 5 votes vote down vote up
def test_unnoramlize_fraction(self):
        from fractions import Fraction as NativePythonFraction
        from nltk.compat import Fraction as NLTKFraction
        
        # The native fraction should throw a TypeError in Python < 3.5
        with self.assertRaises(TypeError):
            NativePythonFraction(0, 1000, _normalize=False)
        
        # Using nltk.compat.Fraction in Python < 3.5
        compat_frac = NLTKFraction(0, 1000, _normalize=False)
        # The numerator and denominator does not change. 
        assert compat_frac.numerator == 0
        assert compat_frac.denominator == 1000
        # The floating point value remains normalized. 
        assert float(compat_frac) == 0.0
        
        # Checks that the division is not divided by 
        # # by greatest common divisor (gcd).
        six_twelve = NLTKFraction(6, 12, _normalize=False)
        assert six_twelve.numerator == 6
        assert six_twelve.denominator == 12
        
        one_two = NLTKFraction(1, 2, _normalize=False)
        assert one_two.numerator == 1
        assert one_two.denominator == 2
        
        # Checks against the native fraction.
        six_twelve_original = NativePythonFraction(6, 12)
        # Checks that rational values of one_two and six_twelve is the same.
        assert float(one_two) == float(six_twelve) == float(six_twelve_original)
        
        # Checks that the fraction does get normalized, even when
        # _normalize == False when numerator is using native 
        # fractions.Fraction.from_float 
        assert NLTKFraction(3.142, _normalize=False) == NativePythonFraction(3.142) 
Example 3
Project: OpenBottle   Author: xiaozhuchacha   File: bleu_score.py    MIT License 5 votes vote down vote up
def method2(self, p_n, *args, **kwargs):
        """
        Smoothing method 2: Add 1 to both numerator and denominator from
        Chin-Yew Lin and Franz Josef Och (2004) Automatic evaluation of
        machine translation quality using longest common subsequence and
        skip-bigram statistics. In ACL04.
        """
        return [Fraction(p_i.numerator + 1, p_i.denominator + 1, _normalize=False) for p_i in p_n] 
Example 4
Project: OpenBottle   Author: xiaozhuchacha   File: test_2x_compat.py    MIT License 5 votes vote down vote up
def test_unnoramlize_fraction(self):
        from fractions import Fraction as NativePythonFraction
        from nltk.compat import Fraction as NLTKFraction
        
        # The native fraction should throw a TypeError in Python < 3.5
        with self.assertRaises(TypeError):
            NativePythonFraction(0, 1000, _normalize=False)
        
        # Using nltk.compat.Fraction in Python < 3.5
        compat_frac = NLTKFraction(0, 1000, _normalize=False)
        # The numerator and denominator does not change. 
        assert compat_frac.numerator == 0
        assert compat_frac.denominator == 1000
        # The floating point value remains normalized. 
        assert float(compat_frac) == 0.0
        
        # Checks that the division is not divided by 
        # # by greatest common divisor (gcd).
        six_twelve = NLTKFraction(6, 12, _normalize=False)
        assert six_twelve.numerator == 6
        assert six_twelve.denominator == 12
        
        one_two = NLTKFraction(1, 2, _normalize=False)
        assert one_two.numerator == 1
        assert one_two.denominator == 2
        
        # Checks against the native fraction.
        six_twelve_original = NativePythonFraction(6, 12)
        # Checks that rational values of one_two and six_twelve is the same.
        assert float(one_two) == float(six_twelve) == float(six_twelve_original)
        
        # Checks that the fraction does get normalized, even when
        # _normalize == False when numerator is using native 
        # fractions.Fraction.from_float 
        assert NLTKFraction(3.142, _normalize=False) == NativePythonFraction(3.142) 
Example 5
Project: Health-Checker   Author: KriAga   File: bleu_score.py    MIT License 5 votes vote down vote up
def method2(self, p_n, *args, **kwargs):
        """
        Smoothing method 2: Add 1 to both numerator and denominator from
        Chin-Yew Lin and Franz Josef Och (2004) Automatic evaluation of
        machine translation quality using longest common subsequence and
        skip-bigram statistics. In ACL04.
        """
        return [Fraction(p_i.numerator + 1, p_i.denominator + 1, _normalize=False) for p_i in p_n] 
Example 6
Project: Health-Checker   Author: KriAga   File: test_2x_compat.py    MIT License 5 votes vote down vote up
def test_unnoramlize_fraction(self):
        from fractions import Fraction as NativePythonFraction
        from nltk.compat import Fraction as NLTKFraction
        
        # The native fraction should throw a TypeError in Python < 3.5
        with self.assertRaises(TypeError):
            NativePythonFraction(0, 1000, _normalize=False)
        
        # Using nltk.compat.Fraction in Python < 3.5
        compat_frac = NLTKFraction(0, 1000, _normalize=False)
        # The numerator and denominator does not change. 
        assert compat_frac.numerator == 0
        assert compat_frac.denominator == 1000
        # The floating point value remains normalized. 
        assert float(compat_frac) == 0.0
        
        # Checks that the division is not divided by 
        # # by greatest common divisor (gcd).
        six_twelve = NLTKFraction(6, 12, _normalize=False)
        assert six_twelve.numerator == 6
        assert six_twelve.denominator == 12
        
        one_two = NLTKFraction(1, 2, _normalize=False)
        assert one_two.numerator == 1
        assert one_two.denominator == 2
        
        # Checks against the native fraction.
        six_twelve_original = NativePythonFraction(6, 12)
        # Checks that rational values of one_two and six_twelve is the same.
        assert float(one_two) == float(six_twelve) == float(six_twelve_original)
        
        # Checks that the fraction does get normalized, even when
        # _normalize == False when numerator is using native 
        # fractions.Fraction.from_float 
        assert NLTKFraction(3.142, _normalize=False) == NativePythonFraction(3.142) 
Example 7
Project: NMT-RDPG   Author: MultiPath   File: bleu.py    MIT License 5 votes vote down vote up
def method2(self, p_n, *args, **kwargs):
        """
        Smoothing method 2: Add 1 to both numerator and denominator from
        Chin-Yew Lin and Franz Josef Och (2004) Automatic evaluation of
        machine translation quality using longest common subsequence and
        skip-bigram statistics. In ACL04.
        """
        return [Fraction(p_i.numerator + 1, p_i.denominator + 1, _normalize=False) for p_i in p_n] 
Example 8
Project: NQG   Author: magic282   File: nltk_bleu_score.py    GNU General Public License v3.0 5 votes vote down vote up
def method2(self, p_n, *args, **kwargs):
        """
        Smoothing method 2: Add 1 to both numerator and denominator from
        Chin-Yew Lin and Franz Josef Och (2004) Automatic evaluation of
        machine translation quality using longest common subsequence and
        skip-bigram statistics. In ACL04.
        """
        return [Fraction(p_i.numerator + 1, p_i.denominator + 1, _normalize=False) for p_i in p_n] 
Example 9
Project: seq2seq-keyphrase-pytorch   Author: memray   File: bleu_score(3.2).py    Apache License 2.0 5 votes vote down vote up
def method2(self, p_n, *args, **kwargs):
        """
        Smoothing method 2: Add 1 to both numerator and denominator from
        Chin-Yew Lin and Franz Josef Och (2004) Automatic evaluation of
        machine translation quality using longest common subsequence and
        skip-bigram statistics. In ACL04.
        """
        return [Fraction(p_i.numerator + 1, p_i.denominator + 1, _normalize=False) for p_i in p_n] 
Example 10
Project: honours_project   Author: JFriel   File: bleu_score.py    GNU General Public License v3.0 5 votes vote down vote up
def method2(self, p_n, *args, **kwargs):
        """
        Smoothing method 2: Add 1 to both numerator and denominator from 
        Chin-Yew Lin and Franz Josef Och (2004) Automatic evaluation of 
        machine translation quality using longest common subsequence and 
        skip-bigram statistics. In ACL04.
        """
        return [Fraction(p_i.numerator + 1, p_i.denominator + 1, _normalize=False) for p_i in p_n] 
Example 11
Project: honours_project   Author: JFriel   File: test_2x_compat.py    GNU General Public License v3.0 5 votes vote down vote up
def test_unnoramlize_fraction(self):
        from fractions import Fraction as NativePythonFraction
        from nltk.compat import Fraction as NLTKFraction
        
        # The native fraction should throw a TypeError in Python < 3.5
        with self.assertRaises(TypeError):
            NativePythonFraction(0, 1000, _normalize=False)
        
        # Using nltk.compat.Fraction in Python < 3.5
        compat_frac = NLTKFraction(0, 1000, _normalize=False)
        # The numerator and denominator does not change. 
        assert compat_frac.numerator == 0
        assert compat_frac.denominator == 1000
        # The floating point value remains normalized. 
        assert float(compat_frac) == 0.0
        
        # Checks that the division is not divided by 
        # # by greatest common divisor (gcd).
        six_twelve = NLTKFraction(6, 12, _normalize=False)
        assert six_twelve.numerator == 6
        assert six_twelve.denominator == 12
        
        one_two = NLTKFraction(1, 2, _normalize=False)
        assert one_two.numerator == 1
        assert one_two.denominator == 2
        
        # Checks against the native fraction.
        six_twelve_original = NativePythonFraction(6, 12)
        # Checks that rational values of one_two and six_twelve is the same.
        assert float(one_two) == float(six_twelve) == float(six_twelve_original)
        
        # Checks that the fraction does get normalized, even when
        # _normalize == False when numerator is using native 
        # fractions.Fraction.from_float 
        assert NLTKFraction(3.142, _normalize=False) == NativePythonFraction(3.142) 
Example 12
Project: honours_project   Author: JFriel   File: bleu_score.py    GNU General Public License v3.0 5 votes vote down vote up
def method2(self, p_n, *args, **kwargs):
        """
        Smoothing method 2: Add 1 to both numerator and denominator from 
        Chin-Yew Lin and Franz Josef Och (2004) Automatic evaluation of 
        machine translation quality using longest common subsequence and 
        skip-bigram statistics. In ACL04.
        """
        return [Fraction(p_i.numerator + 1, p_i.denominator + 1, _normalize=False) for p_i in p_n] 
Example 13
Project: honours_project   Author: JFriel   File: test_2x_compat.py    GNU General Public License v3.0 5 votes vote down vote up
def test_unnoramlize_fraction(self):
        from fractions import Fraction as NativePythonFraction
        from nltk.compat import Fraction as NLTKFraction
        
        # The native fraction should throw a TypeError in Python < 3.5
        with self.assertRaises(TypeError):
            NativePythonFraction(0, 1000, _normalize=False)
        
        # Using nltk.compat.Fraction in Python < 3.5
        compat_frac = NLTKFraction(0, 1000, _normalize=False)
        # The numerator and denominator does not change. 
        assert compat_frac.numerator == 0
        assert compat_frac.denominator == 1000
        # The floating point value remains normalized. 
        assert float(compat_frac) == 0.0
        
        # Checks that the division is not divided by 
        # # by greatest common divisor (gcd).
        six_twelve = NLTKFraction(6, 12, _normalize=False)
        assert six_twelve.numerator == 6
        assert six_twelve.denominator == 12
        
        one_two = NLTKFraction(1, 2, _normalize=False)
        assert one_two.numerator == 1
        assert one_two.denominator == 2
        
        # Checks against the native fraction.
        six_twelve_original = NativePythonFraction(6, 12)
        # Checks that rational values of one_two and six_twelve is the same.
        assert float(one_two) == float(six_twelve) == float(six_twelve_original)
        
        # Checks that the fraction does get normalized, even when
        # _normalize == False when numerator is using native 
        # fractions.Fraction.from_float 
        assert NLTKFraction(3.142, _normalize=False) == NativePythonFraction(3.142) 
Example 14
Project: aop-helpFinder   Author: jecarvaill   File: bleu_score.py    GNU General Public License v3.0 5 votes vote down vote up
def method2(self, p_n, *args, **kwargs):
        """
        Smoothing method 2: Add 1 to both numerator and denominator from
        Chin-Yew Lin and Franz Josef Och (2004) Automatic evaluation of
        machine translation quality using longest common subsequence and
        skip-bigram statistics. In ACL04.
        """
        return [Fraction(p_i.numerator + 1, p_i.denominator + 1, _normalize=False) for p_i in p_n] 
Example 15
Project: aop-helpFinder   Author: jecarvaill   File: test_2x_compat.py    GNU General Public License v3.0 5 votes vote down vote up
def test_unnoramlize_fraction(self):
        from fractions import Fraction as NativePythonFraction
        from nltk.compat import Fraction as NLTKFraction
        
        # The native fraction should throw a TypeError in Python < 3.5
        with self.assertRaises(TypeError):
            NativePythonFraction(0, 1000, _normalize=False)
        
        # Using nltk.compat.Fraction in Python < 3.5
        compat_frac = NLTKFraction(0, 1000, _normalize=False)
        # The numerator and denominator does not change. 
        assert compat_frac.numerator == 0
        assert compat_frac.denominator == 1000
        # The floating point value remains normalized. 
        assert float(compat_frac) == 0.0
        
        # Checks that the division is not divided by 
        # # by greatest common divisor (gcd).
        six_twelve = NLTKFraction(6, 12, _normalize=False)
        assert six_twelve.numerator == 6
        assert six_twelve.denominator == 12
        
        one_two = NLTKFraction(1, 2, _normalize=False)
        assert one_two.numerator == 1
        assert one_two.denominator == 2
        
        # Checks against the native fraction.
        six_twelve_original = NativePythonFraction(6, 12)
        # Checks that rational values of one_two and six_twelve is the same.
        assert float(one_two) == float(six_twelve) == float(six_twelve_original)
        
        # Checks that the fraction does get normalized, even when
        # _normalize == False when numerator is using native 
        # fractions.Fraction.from_float 
        assert NLTKFraction(3.142, _normalize=False) == NativePythonFraction(3.142) 
Example 16
Project: serverless-chatbots-workshop   Author: datteswararao   File: bleu_score.py    Apache License 2.0 5 votes vote down vote up
def method2(self, p_n, *args, **kwargs):
        """
        Smoothing method 2: Add 1 to both numerator and denominator from 
        Chin-Yew Lin and Franz Josef Och (2004) Automatic evaluation of 
        machine translation quality using longest common subsequence and 
        skip-bigram statistics. In ACL04.
        """
        return [Fraction(p_i.numerator + 1, p_i.denominator + 1, _normalize=False) for p_i in p_n] 
Example 17
Project: serverless-chatbots-workshop   Author: datteswararao   File: test_2x_compat.py    Apache License 2.0 5 votes vote down vote up
def test_unnoramlize_fraction(self):
        from fractions import Fraction as NativePythonFraction
        from nltk.compat import Fraction as NLTKFraction
        
        # The native fraction should throw a TypeError in Python < 3.5
        with self.assertRaises(TypeError):
            NativePythonFraction(0, 1000, _normalize=False)
        
        # Using nltk.compat.Fraction in Python < 3.5
        compat_frac = NLTKFraction(0, 1000, _normalize=False)
        # The numerator and denominator does not change. 
        assert compat_frac.numerator == 0
        assert compat_frac.denominator == 1000
        # The floating point value remains normalized. 
        assert float(compat_frac) == 0.0
        
        # Checks that the division is not divided by 
        # # by greatest common divisor (gcd).
        six_twelve = NLTKFraction(6, 12, _normalize=False)
        assert six_twelve.numerator == 6
        assert six_twelve.denominator == 12
        
        one_two = NLTKFraction(1, 2, _normalize=False)
        assert one_two.numerator == 1
        assert one_two.denominator == 2
        
        # Checks against the native fraction.
        six_twelve_original = NativePythonFraction(6, 12)
        # Checks that rational values of one_two and six_twelve is the same.
        assert float(one_two) == float(six_twelve) == float(six_twelve_original)
        
        # Checks that the fraction does get normalized, even when
        # _normalize == False when numerator is using native 
        # fractions.Fraction.from_float 
        assert NLTKFraction(3.142, _normalize=False) == NativePythonFraction(3.142) 
Example 18
Project: serverless-chatbots-workshop   Author: datteswararao   File: bleu_score.py    Apache License 2.0 5 votes vote down vote up
def method2(self, p_n, *args, **kwargs):
        """
        Smoothing method 2: Add 1 to both numerator and denominator from 
        Chin-Yew Lin and Franz Josef Och (2004) Automatic evaluation of 
        machine translation quality using longest common subsequence and 
        skip-bigram statistics. In ACL04.
        """
        return [Fraction(p_i.numerator + 1, p_i.denominator + 1, _normalize=False) for p_i in p_n] 
Example 19
Project: serverless-chatbots-workshop   Author: datteswararao   File: test_2x_compat.py    Apache License 2.0 5 votes vote down vote up
def test_unnoramlize_fraction(self):
        from fractions import Fraction as NativePythonFraction
        from nltk.compat import Fraction as NLTKFraction
        
        # The native fraction should throw a TypeError in Python < 3.5
        with self.assertRaises(TypeError):
            NativePythonFraction(0, 1000, _normalize=False)
        
        # Using nltk.compat.Fraction in Python < 3.5
        compat_frac = NLTKFraction(0, 1000, _normalize=False)
        # The numerator and denominator does not change. 
        assert compat_frac.numerator == 0
        assert compat_frac.denominator == 1000
        # The floating point value remains normalized. 
        assert float(compat_frac) == 0.0
        
        # Checks that the division is not divided by 
        # # by greatest common divisor (gcd).
        six_twelve = NLTKFraction(6, 12, _normalize=False)
        assert six_twelve.numerator == 6
        assert six_twelve.denominator == 12
        
        one_two = NLTKFraction(1, 2, _normalize=False)
        assert one_two.numerator == 1
        assert one_two.denominator == 2
        
        # Checks against the native fraction.
        six_twelve_original = NativePythonFraction(6, 12)
        # Checks that rational values of one_two and six_twelve is the same.
        assert float(one_two) == float(six_twelve) == float(six_twelve_original)
        
        # Checks that the fraction does get normalized, even when
        # _normalize == False when numerator is using native 
        # fractions.Fraction.from_float 
        assert NLTKFraction(3.142, _normalize=False) == NativePythonFraction(3.142) 
Example 20
Project: dl4mt-nonauto   Author: nyu-dl   File: utils.py    BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def corpus_bleu(list_of_references, hypotheses, weights=(0.25, 0.25, 0.25, 0.25),
                smoothing_function=None, auto_reweigh=False,
                emulate_multibleu=False):
    p_numerators = Counter() # Key = ngram order, and value = no. of ngram matches.
    p_denominators = Counter() # Key = ngram order, and value = no. of ngram in ref.
    hyp_lengths, ref_lengths = 0, 0

    if len(list_of_references) != len(hypotheses):
        print ("The number of hypotheses and their reference(s) should be the same")
        return (0, (0, 0, 0, 0), 0, 0, 0)

    # Iterate through each hypothesis and their corresponding references.
    for references, hypothesis in zip(list_of_references, hypotheses):
        # For each order of ngram, calculate the numerator and
        # denominator for the corpus-level modified precision.
        for i, _ in enumerate(weights, start=1):
            p_i = modified_precision(references, hypothesis, i)
            p_numerators[i] += p_i.numerator
            p_denominators[i] += p_i.denominator

        # Calculate the hypothesis length and the closest reference length.
        # Adds them to the corpus-level hypothesis and reference counts.
        hyp_len =  len(hypothesis)
        hyp_lengths += hyp_len
        ref_lengths += closest_ref_length(references, hyp_len)

    # Calculate corpus-level brevity penalty.
    bp = brevity_penalty(ref_lengths, hyp_lengths)

    # Uniformly re-weighting based on maximum hypothesis lengths if largest
    # order of n-grams < 4 and weights is set at default.
    if auto_reweigh:
        if hyp_lengths < 4 and weights == (0.25, 0.25, 0.25, 0.25):
            weights = ( 1 / hyp_lengths ,) * hyp_lengths

    # Collects the various precision values for the different ngram orders.
    p_n = [Fraction(p_numerators[i], p_denominators[i], _normalize=False)
           for i, _ in enumerate(weights, start=1)]

    p_n_ = [xx.numerator / xx.denominator * 100 for xx in p_n]

    # Returns 0 if there's no matching n-grams
    # We only need to check for p_numerators[1] == 0, since if there's
    # no unigrams, there won't be any higher order ngrams.
    if p_numerators[1] == 0:
        return (0, (0, 0, 0, 0), 0, 0, 0)

    # If there's no smoothing, set use method0 from SmoothinFunction class.
    if not smoothing_function:
        smoothing_function = SmoothingFunction().method0
    # Smoothen the modified precision.
    # Note: smoothing_function() may convert values into floats;
    #       it tries to retain the Fraction object as much as the
    #       smoothing method allows.
    p_n = smoothing_function(p_n, references=references, hypothesis=hypothesis,
                             hyp_len=hyp_len, emulate_multibleu=emulate_multibleu)
    s = (w * math.log(p_i) for i, (w, p_i) in enumerate(zip(weights, p_n)))
    s =  bp * math.exp(math.fsum(s)) * 100
    final_bleu = round(s, 4) if emulate_multibleu else s
    return (final_bleu, p_n_, bp, ref_lengths, hyp_lengths) 
Example 21
Project: aop-helpFinder   Author: jecarvaill   File: nist_score.py    GNU General Public License v3.0 4 votes vote down vote up
def corpus_nist(list_of_references, hypotheses, n=5):
    """
    Calculate a single corpus-level NIST score (aka. system-level BLEU) for all
    the hypotheses and their respective references.

    :param references: a corpus of lists of reference sentences, w.r.t. hypotheses
    :type references: list(list(list(str)))
    :param hypotheses: a list of hypothesis sentences
    :type hypotheses: list(list(str))
    :param n: highest n-gram order
    :type n: int
    """
    # Before proceeding to compute NIST, perform sanity checks.
    assert len(list_of_references) == len(hypotheses), "The number of hypotheses and their reference(s) should be the same"

    p_numerators = Counter() # Key = ngram order, and value = no. of ngram matches.
    p_denominators = Counter() # Key = ngram order, and value = no. of ngram in ref.
    sysoutput_lengths = Counter() # Key = ngram order, and value = no. of ngram in hyp.
    hyp_lengths, ref_lengths = 0, 0

    # Iterate through each hypothesis and their corresponding references.
    for references, hypothesis in zip(list_of_references, hypotheses):
        # For each order of ngram, calculate the numerator and
        # denominator for the corpus-level modified precision.
        for i, _ in enumerate(range(1,n+1)):
            p_i = modified_precision(references, hypothesis, i)
            p_numerators[i] += p_i.numerator
            p_denominators[i] += p_i.denominator
            # Adds the no. of ngrams in the hypothesis.
            sysoutput_lengths[i] += len(hypothesis) - (i - 1)

        # Calculate the hypothesis length and the closest reference length.
        # Adds them to the corpus-level hypothesis and reference counts.
        hyp_len =  len(hypothesis)
        hyp_lengths += hyp_len
        ref_lengths += closest_ref_length(references, hyp_len)

    # Calculate corpus-level brevity penalty.
    bp = nist_length_penalty(ref_lengths, hyp_lengths)

    # Collects the various precision values for the different ngram orders.
    p_n = [Fraction(p_numerators[i], p_denominators[i], _normalize=False)
           for i, _ in enumerate(range(1,n+1))]

    # Eqn 2 in Doddington (2002):
    # Info(w_1 ... w_n) = log_2 [ (# of occurrences of w_1 ... w_n-1) / (# of occurrences of w_1 ... w_n) ]
    info = [0 if p_n[i].numerator == 0 or p_n[i+1].numerator == 0 # Handles math domain and zero division errors.
            else math.log(p_n[i].numerator / p_n[i+1].numerator)
            for i in range(len(p_n)-1)]
    return sum(info_i/sysoutput_lengths[i] for i, info_i in enumerate(info)) * bp