Python nltk.translate.bleu_score.sentence_bleu() Examples

The following are 30 code examples of nltk.translate.bleu_score.sentence_bleu(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module nltk.translate.bleu_score , or try the search function .
Example #1
Source File: metrics.py    From ParlAI with MIT License 8 votes vote down vote up
def compute(guess: str, answers: List[str], k: int = 4) -> Optional['BleuMetric']:
        """
        Compute approximate BLEU score between guess and a set of answers.
        """
        if nltkbleu is None:
            # bleu library not installed, just return a default value
            return None
        # Warning: BLEU calculation *should* include proper tokenization and
        # punctuation etc. We're using the normalize_answer for everything though,
        # so we're over-estimating our BLEU scores.  Also note that NLTK's bleu is
        # going to be slower than fairseq's (which is written in C), but fairseq's
        # requires that everything be in arrays of ints (i.e. as tensors). NLTK's
        # works with strings, which is better suited for this module.
        weights = [1 / k for _ in range(k)]
        score = nltkbleu.sentence_bleu(
            [normalize_answer(a).split(" ") for a in answers],
            normalize_answer(guess).split(" "),
            smoothing_function=nltkbleu.SmoothingFunction(epsilon=1e-12).method1,
            weights=weights,
        )
        return BleuMetric(score) 
Example #2
Source File: model.py    From DeepNews with Apache License 2.0 6 votes vote down vote up
def blue_score_text(self,y_actual,y_predicated):
        #check length equal
        assert len(y_actual) ==  len(y_predicated)
        #list of healine .. each headline has words
        no_of_news = len(y_actual)
        blue_score = 0.0
        for i in range(no_of_news):
            reference = y_actual[i]
            hypothesis = y_predicated[i]
            
            #Avoid ZeroDivisionError in blue score
            #default weights
            weights=(0.25, 0.25, 0.25, 0.25)
            min_len_present = min(len(reference),len(hypothesis))
            if min_len_present==0:
                continue
            if min_len_present<4:
                weights=[1.0/min_len_present,]*min_len_present
            
            blue_score = blue_score + sentence_bleu([reference],hypothesis,weights=weights)
        
        return blue_score/float(no_of_news) 
Example #3
Source File: metrics.py    From deepAPI with MIT License 6 votes vote down vote up
def sim_bleu(self, hyps, ref):
        """
        :param ref - a list of tokens of the reference
        :param hyps - a list of tokens of the hypothesis
    
        :return maxbleu - recall bleu
        :return avgbleu - precision bleu
        """
        scores = []
        for hyp in hyps:
            try:
               # scores.append(sentence_bleu([ref], hyp, smoothing_function=SmoothingFunction().method7,
               #                         weights=[1./4, 1./4, 1./4, 1./4]))
                scores.append(smoothed_bleu(list(bleu_stats(hyp, ref))))
            except:
                scores.append(0.0)
        return np.max(scores), np.mean(scores) 
Example #4
Source File: utils.py    From quick-nlp with MIT License 6 votes vote down vote up
def print_batch(learner: Learner, modeldata: ModelData, input_field, output_field, num_batches=1, num_sentences=-1,
                is_test=False, num_beams=1, weights=None, smoothing_function=None):
    predictions, targets, inputs = learner.predict_with_targs_and_inputs(is_test=is_test, num_beams=num_beams)
    weights = (1 / 3., 1 / 3., 1 / 3.) if weights is None else weights
    smoothing_function = SmoothingFunction().method1 if smoothing_function is None else smoothing_function
    blue_scores = []
    for batch_num, (input, target, prediction) in enumerate(zip(inputs, targets, predictions)):
        inputs_str: BatchBeamTokens = modeldata.itos(input, input_field)
        predictions_str: BatchBeamTokens = modeldata.itos(prediction, output_field)
        targets_str: BatchBeamTokens = modeldata.itos(target, output_field)
        for index, (inp, targ, pred) in enumerate(zip(inputs_str, targets_str, predictions_str)):
            blue_score = sentence_bleu([targ], pred, smoothing_function=smoothing_function, weights=weights)
            print(
                f'batch: {batch_num} sample : {index}\ninput: {" ".join(inp)}\ntarget: { " ".join(targ)}\nprediction: {" ".join(pred)}\nbleu: {blue_score}\n\n')
            blue_scores.append(blue_score)
            if 0 < num_sentences <= index - 1:
                break
        if 0 < num_batches <= batch_num - 1:
            break
    print(f'mean bleu score: {np.mean(blue_scores)}') 
Example #5
Source File: precision_recall.py    From cotk with Apache License 2.0 6 votes vote down vote up
def _score(self, gen: List[int], reference: List[int]) -> float:
		'''Return a BLEU score \in [0, 1] to calculate BLEU-ngram precision and recall.

		Arguments:
			gen (list): list of generated word ids.
			reference (list): list of word ids of a reference.

		Here is an Example:

			>>> gen = [4,5]
			>>> reference = [5,6]
			>>> self._score(gen, reference)
			0.150 # assume self.weights = [0.25,0.25,0.25,0.25]
		'''
		gen = self._replace_unk(gen)
		return sentence_bleu([reference], gen, self.weights, SmoothingFunction().method1) 
Example #6
Source File: test_bleu.py    From cotk with Apache License 2.0 6 votes vote down vote up
def get_bleu(self, dataloader, input, reference_key, gen_key):
		refs = []
		gens = []
		for gen_sen, resp_sen in zip(input[gen_key], input[reference_key]):
			gen_sen_processed = dataloader.trim_in_ids(gen_sen)
			resp_sen_processed = dataloader.trim_in_ids(resp_sen[1:])
			refs.append(resp_sen_processed)
			gens.append(gen_sen_processed)
		gens = replace_unk(gens)
		bleu_irl_bw, bleu_irl_fw = [], []
		for i in range(len(gens)):
			bleu_irl_fw.append(sentence_bleu(refs, gens[i], smoothing_function=SmoothingFunction().method1))
		for i in range(len(refs)):
			bleu_irl_bw.append(sentence_bleu(gens, refs[i], smoothing_function=SmoothingFunction().method1))

		fw_bleu = (1.0 * sum(bleu_irl_fw) / len(bleu_irl_fw))
		bw_bleu = (1.0 * sum(bleu_irl_bw) / len(bleu_irl_bw))
		return 2.0 * bw_bleu * fw_bleu / (fw_bleu + bw_bleu) 
Example #7
Source File: utils.py    From lang2program with Apache License 2.0 6 votes vote down vote up
def bleu(reference, predict):
    """Compute sentence-level bleu score.

    Args:
        reference (list[str])
        predict (list[str])
    """
    from nltk.translate import bleu_score

    if len(predict) == 0:
        if len(reference) == 0:
            return 1.0
        else:
            return 0.0

    # TODO(kelvin): is this quite right?
    # use a maximum of 4-grams. If 4-grams aren't present, use only lower n-grams.
    n = min(4, len(reference), len(predict))
    weights = tuple([1. / n] * n)  # uniform weight on n-gram precisions
    return bleu_score.sentence_bleu([reference], predict, weights) 
Example #8
Source File: utils.py    From lang2program with Apache License 2.0 6 votes vote down vote up
def bleu(reference, predict):
    """Compute sentence-level bleu score.

    Args:
        reference (list[str])
        predict (list[str])
    """
    from nltk.translate import bleu_score

    if len(predict) == 0:
        if len(reference) == 0:
            return 1.0
        else:
            return 0.0

    # TODO(kelvin): is this quite right?
    # use a maximum of 4-grams. If 4-grams aren't present, use only lower n-grams.
    n = min(4, len(reference), len(predict))
    weights = tuple([1. / n] * n)  # uniform weight on n-gram precisions
    return bleu_score.sentence_bleu([reference], predict, weights) 
Example #9
Source File: metric.py    From MultiTurnDialogZoo with MIT License 6 votes vote down vote up
def cal_BLEU_nltk(refer, candidate, ngram=1):
    '''
    SmoothingFunction refer to https://github.com/PaddlePaddle/models/blob/a72760dff8574fe2cb8b803e01b44624db3f3eff/PaddleNLP/Research/IJCAI2019-MMPMS/mmpms/utils/metrics.py
    '''
    smoothie = SmoothingFunction().method7
    if ngram == 1:
        weight = (1, 0, 0, 0)
    elif ngram == 2:
        weight = (0.5, 0.5, 0, 0)
    elif ngram == 3:
        weight = (0.33, 0.33, 0.33, 0)
    elif ngram == 4:
        weight = (0.25, 0.25, 0.25, 0.25)
    return sentence_bleu(refer, candidate, 
                         weights=weight, 
                         smoothing_function=smoothie)

# BLEU of nlg-eval 
Example #10
Source File: bleu_metrics.py    From dialog-eval with MIT License 6 votes vote down vote up
def update_metrics(self, resp, gt, source):
    '''
    Params:
      :resp: Response word list.
      :gt: Ground truth word list.
      :source: Source word list.
    '''
    try:
      self.metrics['bleu-1'].append(
        bleu_score.sentence_bleu([gt], resp, weights=(1, 0, 0, 0),
                                 smoothing_function=self.smoothing))
      self.metrics['bleu-2'].append(
        bleu_score.sentence_bleu([gt], resp, weights=(0.5, 0.5, 0, 0),
                                 smoothing_function=self.smoothing))
      self.metrics['bleu-3'].append(
        bleu_score.sentence_bleu([gt], resp, weights=(0.33, 0.33, 0.33, 0),
                                 smoothing_function=self.smoothing))
      self.metrics['bleu-4'].append(
        bleu_score.sentence_bleu([gt], resp, weights=(0.25, 0.25, 0.25, 0.25),
                                 smoothing_function=self.smoothing))
    except (KeyError, ZeroDivisionError):
      self.metrics['bleu-1'].append(0)
      self.metrics['bleu-2'].append(0)
      self.metrics['bleu-3'].append(0)
      self.metrics['bleu-4'].append(0) 
Example #11
Source File: metrics.py    From KBRD with MIT License 6 votes vote down vote up
def _bleu(guess, answers):
    """Compute approximate BLEU score between guess and a set of answers."""
    if nltkbleu is None:
        # bleu library not installed, just return a default value
        return None
    # Warning: BLEU calculation *should* include proper tokenization and
    # punctuation etc. We're using the normalize_answer for everything though,
    # so we're over-estimating our BLEU scores.  Also note that NLTK's bleu is
    # going to be slower than fairseq's (which is written in C), but fairseq's
    # requires that everything be in arrays of ints (i.e. as tensors). NLTK's
    # works with strings, which is better suited for this module.
    return nltkbleu.sentence_bleu(
        [normalize_answer(a).split(" ") for a in answers],
        normalize_answer(guess).split(" "),
        smoothing_function=nltkbleu.SmoothingFunction(epsilon=1e-12).method1,
    ) 
Example #12
Source File: metrics.py    From neural_chat with MIT License 6 votes vote down vote up
def _bleu(guess, answers):
    """Compute approximate BLEU score between guess and a set of answers."""
    if nltkbleu is None:
        # bleu library not installed, just return a default value
        return None
    # Warning: BLEU calculation *should* include proper tokenization and
    # punctuation etc. We're using the normalize_answer for everything though,
    # so we're over-estimating our BLEU scores.  Also note that NLTK's bleu is
    # going to be slower than fairseq's (which is written in C), but fairseq's
    # requires that everything be in arrays of ints (i.e. as tensors). NLTK's
    # works with strings, which is better suited for this module.
    return nltkbleu.sentence_bleu(
        [normalize_answer(a).split(" ") for a in answers],
        normalize_answer(guess).split(" "),
        smoothing_function=nltkbleu.SmoothingFunction(epsilon=1e-12).method1,
    ) 
Example #13
Source File: bleu.py    From dialogbot with Apache License 2.0 6 votes vote down vote up
def bleu(answer_file, standard_answer_file):
    rf_answer = open(answer_file, 'r', "utf-8")
    rf_standard_answer = open(standard_answer_file, 'r', "utf-8")
    answer_lines = rf_answer.readlines()
    standard_answer_lines = rf_standard_answer.readlines()
    # compute score
    scores = []
    for i in range(len(answer_lines)):
        candidate = list(answer_lines[i].strip())
        each_score = 0
        for j in range(10):
            references = []
            standard_answer_line = standard_answer_lines[i * 11 + j].strip().split('\t')
            references.append(list(standard_answer_line[0].strip()))
            standard_score = standard_answer_line[1]
            bleu_score = sentence_bleu(references, candidate, weights=(0.35, 0.45, 0.1, 0.1),
                                       smoothing_function=SmoothingFunction().method1)
            each_score = bleu_score * float(standard_score) + each_score
        scores.append(each_score / 10)
    rf_answer.close()
    rf_standard_answer.close()
    score_final = sum(scores) / float(len(answer_lines))
    precision_score = round(score_final, 6)
    return precision_score 
Example #14
Source File: evaluator.py    From tranX with Apache License 2.0 5 votes vote down vote up
def get_sentence_bleu(self, example, hyp):
        return sentence_bleu([tokenize_for_bleu_eval(example.meta['example_dict']['snippet'])],
                             tokenize_for_bleu_eval(hyp.decanonical_code),
                             smoothing_function=SmoothingFunction().method3) 
Example #15
Source File: utils.py    From Deep-Reinforcement-Learning-Hands-On with MIT License 5 votes vote down vote up
def calc_bleu_many(cand_seq, ref_sequences):
    sf = bleu_score.SmoothingFunction()
    return bleu_score.sentence_bleu(ref_sequences, cand_seq,
                                    smoothing_function=sf.method1,
                                    weights=(0.5, 0.5)) 
Example #16
Source File: test_bleu.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def test_reference_or_hypothesis_shorter_than_fourgrams(self):
        # Tese case where the length of reference or hypothesis
        # is shorter than 4.
        references = ['let it go'.split()]
        hypothesis = 'let go it'.split()
        # Checks that the value the hypothesis and reference returns is 0.0
        # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
        self.assertAlmostEqual(sentence_bleu(references, hypothesis), 0.0, places=4)
        # Checks that the warning has been raised.
        try:
            self.assertWarns(UserWarning, sentence_bleu, references, hypothesis)
        except AttributeError:
            pass  # unittest.TestCase.assertWarns is only supported in Python >= 3.2. 
Example #17
Source File: test_bleu.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def test_empty_references_and_hypothesis(self):
        # Test case where both references and hypothesis is empty.
        references = [[]]
        hypothesis = []
        assert sentence_bleu(references, hypothesis) == 0 
Example #18
Source File: test_bleu.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def test_empty_hypothesis(self):
        # Test case where there's hypothesis is empty.
        references = ['The candidate has no alignment to any of the references'.split()]
        hypothesis = []
        assert sentence_bleu(references, hypothesis) == 0 
Example #19
Source File: test_bleu.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def test_case_where_n_is_bigger_than_hypothesis_length(self):
        # Test BLEU to nth order of n-grams, where n > len(hypothesis).
        references = ['John loves Mary ?'.split()]
        hypothesis = 'John loves Mary'.split()
        n = len(hypothesis) + 1  #
        weights = [1.0 / n] * n  # Uniform weights.
        # Since no n-grams matches were found the result should be zero
        # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
        self.assertAlmostEqual(
            sentence_bleu(references, hypothesis, weights), 0.0, places=4
        )
        # Checks that the warning has been raised because len(hypothesis) < 4.
        try:
            self.assertWarns(UserWarning, sentence_bleu, references, hypothesis)
        except AttributeError:
            pass  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.

        # Test case where n > len(hypothesis) but so is n > len(reference), and
        # it's a special case where reference == hypothesis.
        references = ['John loves Mary'.split()]
        hypothesis = 'John loves Mary'.split()
        # Since no 4-grams matches were found the result should be zero
        # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
        self.assertAlmostEqual(
            sentence_bleu(references, hypothesis, weights), 0.0, places=4
        ) 
Example #20
Source File: test_bleu.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def test_partial_matches_hypothesis_longer_than_reference(self):
        references = ['John loves Mary'.split()]
        hypothesis = 'John loves Mary who loves Mike'.split()
        # Since no 4-grams matches were found the result should be zero
        # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
        self.assertAlmostEqual(sentence_bleu(references, hypothesis), 0.0, places=4)
        # Checks that the warning has been raised because len(reference) < 4.
        try:
            self.assertWarns(UserWarning, sentence_bleu, references, hypothesis)
        except AttributeError:
            pass  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.


# @unittest.skip("Skipping fringe cases for BLEU.") 
Example #21
Source File: test_bleu.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def test_full_matches(self):
        # Test case where there's 100% matches
        references = ['John loves Mary'.split()]
        hypothesis = 'John loves Mary'.split()

        # Test BLEU to nth order of n-grams, where n is len(hypothesis).
        for n in range(1, len(hypothesis)):
            weights = [1.0 / n] * n  # Uniform weights.
            assert sentence_bleu(references, hypothesis, weights) == 1.0 
Example #22
Source File: test_bleu.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def test_zero_matches(self):
        # Test case where there's 0 matches
        references = ['The candidate has no alignment to any of the references'.split()]
        hypothesis = 'John loves Mary'.split()

        # Test BLEU to nth order of n-grams, where n is len(hypothesis).
        for n in range(1, len(hypothesis)):
            weights = [1.0 / n] * n  # Uniform weights.
            assert sentence_bleu(references, hypothesis, weights) == 0 
Example #23
Source File: matcher.py    From supervised-oie with MIT License 5 votes vote down vote up
def bleuMatch(ref, ex, ignoreStopwords, ignoreCase):
        sRef = ref.bow()
        sEx = ex.bow()
        bleu = sentence_bleu(references = [sRef.split(' ')], hypothesis = sEx.split(' '))
        return bleu > Matcher.BLEU_THRESHOLD 
Example #24
Source File: bleu.py    From DeepPavlov with Apache License 2.0 5 votes vote down vote up
def bleu_advanced(y_true: List[Any], y_predicted: List[Any],
                  weights: Tuple = (1,), smoothing_function=SMOOTH.method1,
                  auto_reweigh=False, penalty=True) -> float:
    """Calculate BLEU score

    Parameters:
        y_true: list of reference tokens
        y_predicted: list of query tokens
        weights: n-gram weights
        smoothing_function: SmoothingFunction
        auto_reweigh: Option to re-normalize the weights uniformly
        penalty: either enable brevity penalty or not

    Return:
        BLEU score
    """

    bleu_measure = sentence_bleu([y_true], y_predicted, weights, smoothing_function, auto_reweigh)

    hyp_len = len(y_predicted)
    hyp_lengths = hyp_len
    ref_lengths = closest_ref_length([y_true], hyp_len)

    bpenalty = brevity_penalty(ref_lengths, hyp_lengths)

    if penalty is True or bpenalty == 0:
        return bleu_measure

    return bleu_measure / bpenalty 
Example #25
Source File: utils.py    From quick-nlp with MIT License 5 votes vote down vote up
def print_dialogue_batch(learner: Learner, modeldata: ModelData, input_field, output_field, num_batches=1,
                         num_sentences=-1, is_test=False,
                         num_beams=1, smoothing_function=None, weights=None):
    weights = (1 / 3., 1 / 3., 1 / 3.) if weights is None else weights
    smoothing_function = SmoothingFunction().method1 if smoothing_function is None else smoothing_function
    predictions, targets, inputs = learner.predict_with_targs_and_inputs(is_test=is_test, num_beams=num_beams)
    blue_scores = []
    for batch_num, (input, target, prediction) in enumerate(zip(inputs, targets, predictions)):
        input = np.transpose(input, [1, 2, 0])  # transpose number of utterances to beams [sl, bs, nb]
        inputs_str: BatchBeamTokens = modeldata.itos(input, input_field)
        inputs_str: List[str] = ["\n".join(conv) for conv in inputs_str]
        predictions_str: BatchBeamTokens = modeldata.itos(prediction, output_field)
        targets_str: BatchBeamTokens = modeldata.itos(target, output_field)
        for index, (inp, targ, pred) in enumerate(zip(inputs_str, targets_str, predictions_str)):
            if targ[0].split() == pred[0].split()[1:]:
                blue_score = 1
            else:
                blue_score = sentence_bleu([targ[0].split()], pred[0].split()[1:],
                                           smoothing_function=smoothing_function,
                                           weights=weights
                                           )
            print(
                f'BATCH: {batch_num} SAMPLE : {index}\nINPUT:\n{"".join(inp)}\nTARGET:\n{ "".join(targ)}\nPREDICTON:\n{"".join(pred)}\nblue: {blue_score}\n\n')
            blue_scores.append(blue_score)
            if 0 < num_sentences <= index - 1:
                break
        if 0 < num_batches <= batch_num - 1:
            break
    print(f'bleu score: mean: {np.mean(blue_scores)}, std: {np.std(blue_scores)}') 
Example #26
Source File: bleu.py    From dialogbot with Apache License 2.0 5 votes vote down vote up
def bleu_score(candidate, reference):
    score = sentence_bleu(
        [list(reference)], list(candidate),
        weights=(0.25, 0.25, 0.25, 0.25),
        smoothing_function=SmoothingFunction().method1)
    return score 
Example #27
Source File: metrics.py    From quick-nlp with MIT License 5 votes vote down vote up
def bleu_score(preds, targs, stoi=None):
    sf = SmoothingFunction().method1
    preds = torch.max(preds, dim=-1)[1][:-1]
    bleus = np.zeros(targs.size(1))
    for res in zip(to_np(targs, preds)):
        if len(res[1]) > 2:
            bleu = sentence_bleu([res[1]], res[2], smoothing_function=sf, weights=(1 / 3., 1 / 3., 1 / 3.))
        elif len(res[1]) == 2:
            bleu = sentence_bleu([res[1]], res[2], smoothing_function=sf, weights=(0.5, 0.5))
        else:
            bleu = sentence_bleu([res[1]], res[2], smoothing_function=sf, weights=(1.0,))
        bleus.append(bleu)
    return 
Example #28
Source File: process_samples.py    From texar with Apache License 2.0 5 votes vote down vote up
def sample_from_hamming_distance_payoff_distribution(args):
    src_sents = read_corpus(args.src, 'src')
    tgt_sents = read_corpus(args.tgt, 'src')  # do not read in <s> and </s>
    f_out = open(args.output, 'w')

    vocab = torch.load(args.vocab)
    tgt_vocab = vocab.tgt

    payoff_prob, Z_qs = generate_hamming_distance_payoff_distribution(max(len(sent) for sent in tgt_sents),
                                                                      vocab_size=len(vocab.tgt),
                                                                      tau=args.temp)

    for src_sent, tgt_sent in zip(src_sents, tgt_sents):
        tgt_samples = []  # make sure the ground truth y* is in the samples
        tgt_sent_len = len(tgt_sent) - 3  # remove <s> and </s> and ending period .
        tgt_ref_tokens = tgt_sent[1:-1]
        bleu_scores = []

        # sample an edit distances
        e_samples = np.random.choice(range(tgt_sent_len + 1), p=payoff_prob[tgt_sent_len], size=args.sample_size,
                                     replace=True)

        for i, e in enumerate(e_samples):
            if e > 0:
                # sample a new tgt_sent $y$
                old_word_pos = np.random.choice(range(1, tgt_sent_len + 1), size=e, replace=False)
                new_words = [vocab.tgt.id2word[wid] for wid in np.random.randint(3, len(vocab.tgt), size=e)]
                new_tgt_sent = list(tgt_sent)
                for pos, word in zip(old_word_pos, new_words):
                    new_tgt_sent[pos] = word

                bleu_score = sentence_bleu([tgt_ref_tokens], new_tgt_sent[1:-1])
                bleu_scores.append(bleu_score)
            else:
                new_tgt_sent = list(tgt_sent)
                bleu_scores.append(1.)

            # print('y: %s' % ' '.join(new_tgt_sent))
            tgt_samples.append(new_tgt_sent) 
Example #29
Source File: model.py    From RLSeq2Seq with MIT License 5 votes vote down vote up
def reward_function(self, reference, summary, measure='rouge_l/f_score'):
    """Calculate the reward between the reference and summary.

    Args:
      reference: A list of ids representing the ground-truth data
      summary: A list of ids representing the model generated data

    Returns:
      A single value representing the evaluation value for reference and summary
    """
    if 'rouge' in measure:
      return rouge([summary],[reference])[measure]
    else:
      return sentence_bleu([reference.split()],summary.split(),weights=(0.25,0.25,0.25,0.25)) 
Example #30
Source File: bleu.py    From cotk with Apache License 2.0 5 votes vote down vote up
def _sentence_bleu(ele):
	'''Auxiliary function for computing sentence bleu:

	Arguments:
		ele (tuple): A tuple (`reference sentences`, `a hypothesis sentence`).

	Returns:

		* int: **sentence-bleu** value.
	'''

	return sentence_bleu(ele[0], ele[1], weights=ele[2], smoothing_function=SmoothingFunction().method1)