Python nltk.compat.izip() Examples

The following are code examples for showing how to use nltk.compat.izip(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: razzy-spinner   Author: rafasashi   File: scores.py    GNU General Public License v3.0 6 votes vote down vote up
def accuracy(reference, test):
    """
    Given a list of reference values and a corresponding list of test
    values, return the fraction of corresponding values that are
    equal.  In particular, return the fraction of indices
    ``0<i<=len(test)`` such that ``test[i] == reference[i]``.

    :type reference: list
    :param reference: An ordered list of reference values.
    :type test: list
    :param test: A list of values to compare against the corresponding
        reference values.
    :raise ValueError: If ``reference`` and ``length`` do not have the
        same length.
    """
    if len(reference) != len(test):
        raise ValueError("Lists must have the same length.")
    return float(sum(x == y for x, y in izip(reference, test))) / len(test) 
Example 2
Project: razzy-spinner   Author: rafasashi   File: scores.py    GNU General Public License v3.0 6 votes vote down vote up
def log_likelihood(reference, test):
    """
    Given a list of reference values and a corresponding list of test
    probability distributions, return the average log likelihood of
    the reference values, given the probability distributions.

    :param reference: A list of reference values
    :type reference: list
    :param test: A list of probability distributions over values to
        compare against the corresponding reference values.
    :type test: list(ProbDistI)
    """
    if len(reference) != len(test):
        raise ValueError("Lists must have the same length.")

    # Return the average value of dist.logprob(val).
    total_likelihood = sum(dist.logprob(val)
                            for (val, dist) in izip(reference, test))
    return total_likelihood/len(reference) 
Example 3
Project: razzy-spinner   Author: rafasashi   File: test_json2csv_corpus.py    GNU General Public License v3.0 6 votes vote down vote up
def are_files_identical(filename1, filename2, debug=False):
    """
    Compare two files, ignoring carriage returns.
    """
    with open(filename1, "rb") as fileA:
        with open(filename2, "rb") as fileB:
            result = True
            for lineA, lineB in izip(sorted(fileA.readlines()),
                                     sorted(fileB.readlines())):
                if lineA.strip() != lineB.strip():
                    if debug:
                        print("Error while comparing files. " +
                              "First difference at line below.")
                        print("=> Output file line: {0}".format(lineA))
                        print("=> Refer. file line: {0}".format(lineB))
                    result = False
                    break
            return result 
Example 4
Project: OpenBottle   Author: xiaozhuchacha   File: scores.py    MIT License 6 votes vote down vote up
def accuracy(reference, test):
    """
    Given a list of reference values and a corresponding list of test
    values, return the fraction of corresponding values that are
    equal.  In particular, return the fraction of indices
    ``0<i<=len(test)`` such that ``test[i] == reference[i]``.

    :type reference: list
    :param reference: An ordered list of reference values.
    :type test: list
    :param test: A list of values to compare against the corresponding
        reference values.
    :raise ValueError: If ``reference`` and ``length`` do not have the
        same length.
    """
    if len(reference) != len(test):
        raise ValueError("Lists must have the same length.")
    return sum(x == y for x, y in izip(reference, test)) / len(test) 
Example 5
Project: OpenBottle   Author: xiaozhuchacha   File: scores.py    MIT License 6 votes vote down vote up
def log_likelihood(reference, test):
    """
    Given a list of reference values and a corresponding list of test
    probability distributions, return the average log likelihood of
    the reference values, given the probability distributions.

    :param reference: A list of reference values
    :type reference: list
    :param test: A list of probability distributions over values to
        compare against the corresponding reference values.
    :type test: list(ProbDistI)
    """
    if len(reference) != len(test):
        raise ValueError("Lists must have the same length.")

    # Return the average value of dist.logprob(val).
    total_likelihood = sum(dist.logprob(val)
                            for (val, dist) in izip(reference, test))
    return total_likelihood / len(reference) 
Example 6
Project: OpenBottle   Author: xiaozhuchacha   File: test_json2csv_corpus.py    MIT License 6 votes vote down vote up
def are_files_identical(filename1, filename2, debug=False):
    """
    Compare two files, ignoring carriage returns.
    """
    with open(filename1, "rb") as fileA:
        with open(filename2, "rb") as fileB:
            result = True
            for lineA, lineB in izip(sorted(fileA.readlines()),
                                     sorted(fileB.readlines())):
                if lineA.strip() != lineB.strip():
                    if debug:
                        print("Error while comparing files. " +
                              "First difference at line below.")
                        print("=> Output file line: {0}".format(lineA))
                        print("=> Refer. file line: {0}".format(lineB))
                    result = False
                    break
            return result 
Example 7
Project: OpenBottle   Author: xiaozhuchacha   File: scores.py    MIT License 6 votes vote down vote up
def accuracy(reference, test):
    """
    Given a list of reference values and a corresponding list of test
    values, return the fraction of corresponding values that are
    equal.  In particular, return the fraction of indices
    ``0<i<=len(test)`` such that ``test[i] == reference[i]``.

    :type reference: list
    :param reference: An ordered list of reference values.
    :type test: list
    :param test: A list of values to compare against the corresponding
        reference values.
    :raise ValueError: If ``reference`` and ``length`` do not have the
        same length.
    """
    if len(reference) != len(test):
        raise ValueError("Lists must have the same length.")
    return sum(x == y for x, y in izip(reference, test)) / len(test) 
Example 8
Project: OpenBottle   Author: xiaozhuchacha   File: scores.py    MIT License 6 votes vote down vote up
def log_likelihood(reference, test):
    """
    Given a list of reference values and a corresponding list of test
    probability distributions, return the average log likelihood of
    the reference values, given the probability distributions.

    :param reference: A list of reference values
    :type reference: list
    :param test: A list of probability distributions over values to
        compare against the corresponding reference values.
    :type test: list(ProbDistI)
    """
    if len(reference) != len(test):
        raise ValueError("Lists must have the same length.")

    # Return the average value of dist.logprob(val).
    total_likelihood = sum(dist.logprob(val)
                            for (val, dist) in izip(reference, test))
    return total_likelihood / len(reference) 
Example 9
Project: OpenBottle   Author: xiaozhuchacha   File: test_json2csv_corpus.py    MIT License 6 votes vote down vote up
def are_files_identical(filename1, filename2, debug=False):
    """
    Compare two files, ignoring carriage returns.
    """
    with open(filename1, "rb") as fileA:
        with open(filename2, "rb") as fileB:
            result = True
            for lineA, lineB in izip(sorted(fileA.readlines()),
                                     sorted(fileB.readlines())):
                if lineA.strip() != lineB.strip():
                    if debug:
                        print("Error while comparing files. " +
                              "First difference at line below.")
                        print("=> Output file line: {0}".format(lineA))
                        print("=> Refer. file line: {0}".format(lineB))
                    result = False
                    break
            return result 
Example 10
Project: FancyWord   Author: EastonLee   File: scores.py    GNU General Public License v3.0 6 votes vote down vote up
def accuracy(reference, test):
    """
    Given a list of reference values and a corresponding list of test
    values, return the fraction of corresponding values that are
    equal.  In particular, return the fraction of indices
    ``0<i<=len(test)`` such that ``test[i] == reference[i]``.

    :type reference: list
    :param reference: An ordered list of reference values.
    :type test: list
    :param test: A list of values to compare against the corresponding
        reference values.
    :raise ValueError: If ``reference`` and ``length`` do not have the
        same length.
    """
    if len(reference) != len(test):
        raise ValueError("Lists must have the same length.")
    return float(sum(x == y for x, y in izip(reference, test))) / len(test) 
Example 11
Project: FancyWord   Author: EastonLee   File: scores.py    GNU General Public License v3.0 6 votes vote down vote up
def log_likelihood(reference, test):
    """
    Given a list of reference values and a corresponding list of test
    probability distributions, return the average log likelihood of
    the reference values, given the probability distributions.

    :param reference: A list of reference values
    :type reference: list
    :param test: A list of probability distributions over values to
        compare against the corresponding reference values.
    :type test: list(ProbDistI)
    """
    if len(reference) != len(test):
        raise ValueError("Lists must have the same length.")

    # Return the average value of dist.logprob(val).
    total_likelihood = sum(dist.logprob(val)
                            for (val, dist) in izip(reference, test))
    return total_likelihood/len(reference) 
Example 12
Project: honours_project   Author: JFriel   File: scores.py    GNU General Public License v3.0 6 votes vote down vote up
def accuracy(reference, test):
    """
    Given a list of reference values and a corresponding list of test
    values, return the fraction of corresponding values that are
    equal.  In particular, return the fraction of indices
    ``0<i<=len(test)`` such that ``test[i] == reference[i]``.

    :type reference: list
    :param reference: An ordered list of reference values.
    :type test: list
    :param test: A list of values to compare against the corresponding
        reference values.
    :raise ValueError: If ``reference`` and ``length`` do not have the
        same length.
    """
    if len(reference) != len(test):
        raise ValueError("Lists must have the same length.")
    return sum(x == y for x, y in izip(reference, test)) / len(test) 
Example 13
Project: honours_project   Author: JFriel   File: scores.py    GNU General Public License v3.0 6 votes vote down vote up
def log_likelihood(reference, test):
    """
    Given a list of reference values and a corresponding list of test
    probability distributions, return the average log likelihood of
    the reference values, given the probability distributions.

    :param reference: A list of reference values
    :type reference: list
    :param test: A list of probability distributions over values to
        compare against the corresponding reference values.
    :type test: list(ProbDistI)
    """
    if len(reference) != len(test):
        raise ValueError("Lists must have the same length.")

    # Return the average value of dist.logprob(val).
    total_likelihood = sum(dist.logprob(val)
                            for (val, dist) in izip(reference, test))
    return total_likelihood / len(reference) 
Example 14
Project: honours_project   Author: JFriel   File: test_json2csv_corpus.py    GNU General Public License v3.0 6 votes vote down vote up
def are_files_identical(filename1, filename2, debug=False):
    """
    Compare two files, ignoring carriage returns.
    """
    with open(filename1, "rb") as fileA:
        with open(filename2, "rb") as fileB:
            result = True
            for lineA, lineB in izip(sorted(fileA.readlines()),
                                     sorted(fileB.readlines())):
                if lineA.strip() != lineB.strip():
                    if debug:
                        print("Error while comparing files. " +
                              "First difference at line below.")
                        print("=> Output file line: {0}".format(lineA))
                        print("=> Refer. file line: {0}".format(lineB))
                    result = False
                    break
            return result 
Example 15
Project: honours_project   Author: JFriel   File: scores.py    GNU General Public License v3.0 6 votes vote down vote up
def accuracy(reference, test):
    """
    Given a list of reference values and a corresponding list of test
    values, return the fraction of corresponding values that are
    equal.  In particular, return the fraction of indices
    ``0<i<=len(test)`` such that ``test[i] == reference[i]``.

    :type reference: list
    :param reference: An ordered list of reference values.
    :type test: list
    :param test: A list of values to compare against the corresponding
        reference values.
    :raise ValueError: If ``reference`` and ``length`` do not have the
        same length.
    """
    if len(reference) != len(test):
        raise ValueError("Lists must have the same length.")
    return sum(x == y for x, y in izip(reference, test)) / len(test) 
Example 16
Project: honours_project   Author: JFriel   File: scores.py    GNU General Public License v3.0 6 votes vote down vote up
def log_likelihood(reference, test):
    """
    Given a list of reference values and a corresponding list of test
    probability distributions, return the average log likelihood of
    the reference values, given the probability distributions.

    :param reference: A list of reference values
    :type reference: list
    :param test: A list of probability distributions over values to
        compare against the corresponding reference values.
    :type test: list(ProbDistI)
    """
    if len(reference) != len(test):
        raise ValueError("Lists must have the same length.")

    # Return the average value of dist.logprob(val).
    total_likelihood = sum(dist.logprob(val)
                            for (val, dist) in izip(reference, test))
    return total_likelihood / len(reference) 
Example 17
Project: honours_project   Author: JFriel   File: test_json2csv_corpus.py    GNU General Public License v3.0 6 votes vote down vote up
def are_files_identical(filename1, filename2, debug=False):
    """
    Compare two files, ignoring carriage returns.
    """
    with open(filename1, "rb") as fileA:
        with open(filename2, "rb") as fileB:
            result = True
            for lineA, lineB in izip(sorted(fileA.readlines()),
                                     sorted(fileB.readlines())):
                if lineA.strip() != lineB.strip():
                    if debug:
                        print("Error while comparing files. " +
                              "First difference at line below.")
                        print("=> Output file line: {0}".format(lineA))
                        print("=> Refer. file line: {0}".format(lineB))
                    result = False
                    break
            return result 
Example 18
Project: serverless-chatbots-workshop   Author: datteswararao   File: scores.py    Apache License 2.0 6 votes vote down vote up
def accuracy(reference, test):
    """
    Given a list of reference values and a corresponding list of test
    values, return the fraction of corresponding values that are
    equal.  In particular, return the fraction of indices
    ``0<i<=len(test)`` such that ``test[i] == reference[i]``.

    :type reference: list
    :param reference: An ordered list of reference values.
    :type test: list
    :param test: A list of values to compare against the corresponding
        reference values.
    :raise ValueError: If ``reference`` and ``length`` do not have the
        same length.
    """
    if len(reference) != len(test):
        raise ValueError("Lists must have the same length.")
    return sum(x == y for x, y in izip(reference, test)) / len(test) 
Example 19
Project: serverless-chatbots-workshop   Author: datteswararao   File: scores.py    Apache License 2.0 6 votes vote down vote up
def log_likelihood(reference, test):
    """
    Given a list of reference values and a corresponding list of test
    probability distributions, return the average log likelihood of
    the reference values, given the probability distributions.

    :param reference: A list of reference values
    :type reference: list
    :param test: A list of probability distributions over values to
        compare against the corresponding reference values.
    :type test: list(ProbDistI)
    """
    if len(reference) != len(test):
        raise ValueError("Lists must have the same length.")

    # Return the average value of dist.logprob(val).
    total_likelihood = sum(dist.logprob(val)
                            for (val, dist) in izip(reference, test))
    return total_likelihood / len(reference) 
Example 20
Project: serverless-chatbots-workshop   Author: datteswararao   File: test_json2csv_corpus.py    Apache License 2.0 6 votes vote down vote up
def are_files_identical(filename1, filename2, debug=False):
    """
    Compare two files, ignoring carriage returns.
    """
    with open(filename1, "rb") as fileA:
        with open(filename2, "rb") as fileB:
            result = True
            for lineA, lineB in izip(sorted(fileA.readlines()),
                                     sorted(fileB.readlines())):
                if lineA.strip() != lineB.strip():
                    if debug:
                        print("Error while comparing files. " +
                              "First difference at line below.")
                        print("=> Output file line: {0}".format(lineA))
                        print("=> Refer. file line: {0}".format(lineB))
                    result = False
                    break
            return result 
Example 21
Project: serverless-chatbots-workshop   Author: datteswararao   File: scores.py    Apache License 2.0 6 votes vote down vote up
def accuracy(reference, test):
    """
    Given a list of reference values and a corresponding list of test
    values, return the fraction of corresponding values that are
    equal.  In particular, return the fraction of indices
    ``0<i<=len(test)`` such that ``test[i] == reference[i]``.

    :type reference: list
    :param reference: An ordered list of reference values.
    :type test: list
    :param test: A list of values to compare against the corresponding
        reference values.
    :raise ValueError: If ``reference`` and ``length`` do not have the
        same length.
    """
    if len(reference) != len(test):
        raise ValueError("Lists must have the same length.")
    return sum(x == y for x, y in izip(reference, test)) / len(test) 
Example 22
Project: serverless-chatbots-workshop   Author: datteswararao   File: scores.py    Apache License 2.0 6 votes vote down vote up
def log_likelihood(reference, test):
    """
    Given a list of reference values and a corresponding list of test
    probability distributions, return the average log likelihood of
    the reference values, given the probability distributions.

    :param reference: A list of reference values
    :type reference: list
    :param test: A list of probability distributions over values to
        compare against the corresponding reference values.
    :type test: list(ProbDistI)
    """
    if len(reference) != len(test):
        raise ValueError("Lists must have the same length.")

    # Return the average value of dist.logprob(val).
    total_likelihood = sum(dist.logprob(val)
                            for (val, dist) in izip(reference, test))
    return total_likelihood / len(reference) 
Example 23
Project: serverless-chatbots-workshop   Author: datteswararao   File: test_json2csv_corpus.py    Apache License 2.0 6 votes vote down vote up
def are_files_identical(filename1, filename2, debug=False):
    """
    Compare two files, ignoring carriage returns.
    """
    with open(filename1, "rb") as fileA:
        with open(filename2, "rb") as fileB:
            result = True
            for lineA, lineB in izip(sorted(fileA.readlines()),
                                     sorted(fileB.readlines())):
                if lineA.strip() != lineB.strip():
                    if debug:
                        print("Error while comparing files. " +
                              "First difference at line below.")
                        print("=> Output file line: {0}".format(lineA))
                        print("=> Refer. file line: {0}".format(lineB))
                    result = False
                    break
            return result 
Example 24
Project: razzy-spinner   Author: rafasashi   File: hmm.py    GNU General Public License v3.0 5 votes vote down vote up
def _tag(self, unlabeled_sequence):
        path = self._best_path(unlabeled_sequence)
        return list(izip(unlabeled_sequence, path)) 
Example 25
Project: razzy-spinner   Author: rafasashi   File: chomsky.py    GNU General Public License v3.0 5 votes vote down vote up
def generate_chomsky(times=5, line_length=72):
    parts = []
    for part in (leadins, subjects, verbs, objects):
        phraselist = list(map(str.strip, part.splitlines()))
        random.shuffle(phraselist)
        parts.append(phraselist)
    output = chain(*islice(izip(*parts), 0, times))
    print(textwrap.fill(" ".join(output), line_length)) 
Example 26
Project: OpenBottle   Author: xiaozhuchacha   File: hmm.py    MIT License 5 votes vote down vote up
def _tag(self, unlabeled_sequence):
        path = self._best_path(unlabeled_sequence)
        return list(izip(unlabeled_sequence, path)) 
Example 27
Project: OpenBottle   Author: xiaozhuchacha   File: chomsky.py    MIT License 5 votes vote down vote up
def generate_chomsky(times=5, line_length=72):
    parts = []
    for part in (leadins, subjects, verbs, objects):
        phraselist = list(map(str.strip, part.splitlines()))
        random.shuffle(phraselist)
        parts.append(phraselist)
    output = chain(*islice(izip(*parts), 0, times))
    print(textwrap.fill(" ".join(output), line_length)) 
Example 28
Project: OpenBottle   Author: xiaozhuchacha   File: hmm.py    MIT License 5 votes vote down vote up
def _tag(self, unlabeled_sequence):
        path = self._best_path(unlabeled_sequence)
        return list(izip(unlabeled_sequence, path)) 
Example 29
Project: OpenBottle   Author: xiaozhuchacha   File: scikitlearn.py    MIT License 5 votes vote down vote up
def train(self, labeled_featuresets):
        """
        Train (fit) the scikit-learn estimator.

        :param labeled_featuresets: A list of ``(featureset, label)``
            where each ``featureset`` is a dict mapping strings to either
            numbers, booleans or strings.
        """

        X, y = list(compat.izip(*labeled_featuresets))
        X = self._vectorizer.fit_transform(X)
        y = self._encoder.fit_transform(y)
        self._clf.fit(X, y)

        return self 
Example 30
Project: FancyWord   Author: EastonLee   File: hmm.py    GNU General Public License v3.0 5 votes vote down vote up
def _tag(self, unlabeled_sequence):
        path = self._best_path(unlabeled_sequence)
        return list(izip(unlabeled_sequence, path)) 
Example 31
Project: FancyWord   Author: EastonLee   File: chomsky.py    GNU General Public License v3.0 5 votes vote down vote up
def generate_chomsky(times=5, line_length=72):
    parts = []
    for part in (leadins, subjects, verbs, objects):
        phraselist = list(map(str.strip, part.splitlines()))
        random.shuffle(phraselist)
        parts.append(phraselist)
    output = chain(*islice(izip(*parts), 0, times))
    print(textwrap.fill(" ".join(output), line_length)) 
Example 32
Project: honours_project   Author: JFriel   File: hmm.py    GNU General Public License v3.0 5 votes vote down vote up
def _tag(self, unlabeled_sequence):
        path = self._best_path(unlabeled_sequence)
        return list(izip(unlabeled_sequence, path)) 
Example 33
Project: honours_project   Author: JFriel   File: chomsky.py    GNU General Public License v3.0 5 votes vote down vote up
def generate_chomsky(times=5, line_length=72):
    parts = []
    for part in (leadins, subjects, verbs, objects):
        phraselist = list(map(str.strip, part.splitlines()))
        random.shuffle(phraselist)
        parts.append(phraselist)
    output = chain(*islice(izip(*parts), 0, times))
    print(textwrap.fill(" ".join(output), line_length)) 
Example 34
Project: honours_project   Author: JFriel   File: hmm.py    GNU General Public License v3.0 5 votes vote down vote up
def _tag(self, unlabeled_sequence):
        path = self._best_path(unlabeled_sequence)
        return list(izip(unlabeled_sequence, path)) 
Example 35
Project: honours_project   Author: JFriel   File: scikitlearn.py    GNU General Public License v3.0 5 votes vote down vote up
def train(self, labeled_featuresets):
        """
        Train (fit) the scikit-learn estimator.

        :param labeled_featuresets: A list of ``(featureset, label)``
            where each ``featureset`` is a dict mapping strings to either
            numbers, booleans or strings.
        """

        X, y = list(compat.izip(*labeled_featuresets))
        X = self._vectorizer.fit_transform(X)
        y = self._encoder.fit_transform(y)
        self._clf.fit(X, y)

        return self 
Example 36
Project: weibo_scrawler_app   Author: coolspiderghy   File: evalueClassier.py    Apache License 2.0 5 votes vote down vote up
def buildClassifier_score(trainSet,devtestSet,classifier):
    #print devtestSet
    from nltk import compat
    dev, tag_dev = zip(*devtestSet) #把开发测试集(已经经过特征化和赋予标签了)分为数据和标签
    classifier = SklearnClassifier(classifier) #在nltk 中使用scikit-learn 的接口
    #x,y in  list(compat.izip(*trainSet))
    classifier.train(trainSet) #训练分类器
    #help('SklearnClassifier.batch_classify')
    pred = classifier.classify_many(dev)#batch_classify(testSet) #对开发测试集的数据进行分类,给出预测的标签
    return accuracy_score(tag_dev, pred) #对比分类预测结果和人工标注的正确结果,给出分类器准确度 
Example 37
Project: serverless-chatbots-workshop   Author: datteswararao   File: hmm.py    Apache License 2.0 5 votes vote down vote up
def _tag(self, unlabeled_sequence):
        path = self._best_path(unlabeled_sequence)
        return list(izip(unlabeled_sequence, path)) 
Example 38
Project: serverless-chatbots-workshop   Author: datteswararao   File: chomsky.py    Apache License 2.0 5 votes vote down vote up
def generate_chomsky(times=5, line_length=72):
    parts = []
    for part in (leadins, subjects, verbs, objects):
        phraselist = list(map(str.strip, part.splitlines()))
        random.shuffle(phraselist)
        parts.append(phraselist)
    output = chain(*islice(izip(*parts), 0, times))
    print(textwrap.fill(" ".join(output), line_length)) 
Example 39
Project: serverless-chatbots-workshop   Author: datteswararao   File: hmm.py    Apache License 2.0 5 votes vote down vote up
def _tag(self, unlabeled_sequence):
        path = self._best_path(unlabeled_sequence)
        return list(izip(unlabeled_sequence, path)) 
Example 40
Project: serverless-chatbots-workshop   Author: datteswararao   File: scikitlearn.py    Apache License 2.0 5 votes vote down vote up
def train(self, labeled_featuresets):
        """
        Train (fit) the scikit-learn estimator.

        :param labeled_featuresets: A list of ``(featureset, label)``
            where each ``featureset`` is a dict mapping strings to either
            numbers, booleans or strings.
        """

        X, y = list(compat.izip(*labeled_featuresets))
        X = self._vectorizer.fit_transform(X)
        y = self._encoder.fit_transform(y)
        self._clf.fit(X, y)

        return self 
Example 41
Project: razzy-spinner   Author: rafasashi   File: hmm.py    GNU General Public License v3.0 4 votes vote down vote up
def test(self, test_sequence, verbose=False, **kwargs):
        """
        Tests the HiddenMarkovModelTagger instance.

        :param test_sequence: a sequence of labeled test instances
        :type test_sequence: list(list)
        :param verbose: boolean flag indicating whether training should be
            verbose or include printed output
        :type verbose: bool
        """

        def words(sent):
            return [word for (word, tag) in sent]

        def tags(sent):
            return [tag for (word, tag) in sent]

        def flatten(seq):
            return list(itertools.chain(*seq))

        test_sequence = self._transform(test_sequence)
        predicted_sequence = list(imap(self._tag, imap(words, test_sequence)))

        if verbose:
            for test_sent, predicted_sent in izip(test_sequence, predicted_sequence):
                print('Test:',
                    ' '.join('%s/%s' % (token, tag)
                             for (token, tag) in test_sent))
                print()
                print('Untagged:',
                    ' '.join("%s" % token for (token, tag) in test_sent))
                print()
                print('HMM-tagged:',
                    ' '.join('%s/%s' % (token, tag)
                              for (token, tag) in predicted_sent))
                print()
                print('Entropy:',
                    self.entropy([(token, None) for
                                  (token, tag) in predicted_sent]))
                print()
                print('-' * 60)

        test_tags = flatten(imap(tags, test_sequence))
        predicted_tags = flatten(imap(tags, predicted_sequence))

        acc = accuracy(test_tags, predicted_tags)
        count = sum(len(sent) for sent in test_sequence)
        print('accuracy over %d tokens: %.2f' % (count, acc * 100)) 
Example 42
Project: OpenBottle   Author: xiaozhuchacha   File: hmm.py    MIT License 4 votes vote down vote up
def test(self, test_sequence, verbose=False, **kwargs):
        """
        Tests the HiddenMarkovModelTagger instance.

        :param test_sequence: a sequence of labeled test instances
        :type test_sequence: list(list)
        :param verbose: boolean flag indicating whether training should be
            verbose or include printed output
        :type verbose: bool
        """

        def words(sent):
            return [word for (word, tag) in sent]

        def tags(sent):
            return [tag for (word, tag) in sent]

        def flatten(seq):
            return list(itertools.chain(*seq))

        test_sequence = self._transform(test_sequence)
        predicted_sequence = list(imap(self._tag, imap(words, test_sequence)))

        if verbose:
            for test_sent, predicted_sent in izip(test_sequence, predicted_sequence):
                print('Test:',
                    ' '.join('%s/%s' % (token, tag)
                             for (token, tag) in test_sent))
                print()
                print('Untagged:',
                    ' '.join("%s" % token for (token, tag) in test_sent))
                print()
                print('HMM-tagged:',
                    ' '.join('%s/%s' % (token, tag)
                              for (token, tag) in predicted_sent))
                print()
                print('Entropy:',
                    self.entropy([(token, None) for
                                  (token, tag) in predicted_sent]))
                print()
                print('-' * 60)

        test_tags = flatten(imap(tags, test_sequence))
        predicted_tags = flatten(imap(tags, predicted_sequence))

        acc = accuracy(test_tags, predicted_tags)
        count = sum(len(sent) for sent in test_sequence)
        print('accuracy over %d tokens: %.2f' % (count, acc * 100)) 
Example 43
Project: OpenBottle   Author: xiaozhuchacha   File: hmm.py    MIT License 4 votes vote down vote up
def test(self, test_sequence, verbose=False, **kwargs):
        """
        Tests the HiddenMarkovModelTagger instance.

        :param test_sequence: a sequence of labeled test instances
        :type test_sequence: list(list)
        :param verbose: boolean flag indicating whether training should be
            verbose or include printed output
        :type verbose: bool
        """

        def words(sent):
            return [word for (word, tag) in sent]

        def tags(sent):
            return [tag for (word, tag) in sent]

        def flatten(seq):
            return list(itertools.chain(*seq))

        test_sequence = self._transform(test_sequence)
        predicted_sequence = list(imap(self._tag, imap(words, test_sequence)))

        if verbose:
            for test_sent, predicted_sent in izip(test_sequence, predicted_sequence):
                print('Test:',
                    ' '.join('%s/%s' % (token, tag)
                             for (token, tag) in test_sent))
                print()
                print('Untagged:',
                    ' '.join("%s" % token for (token, tag) in test_sent))
                print()
                print('HMM-tagged:',
                    ' '.join('%s/%s' % (token, tag)
                              for (token, tag) in predicted_sent))
                print()
                print('Entropy:',
                    self.entropy([(token, None) for
                                  (token, tag) in predicted_sent]))
                print()
                print('-' * 60)

        test_tags = flatten(imap(tags, test_sequence))
        predicted_tags = flatten(imap(tags, predicted_sequence))

        acc = accuracy(test_tags, predicted_tags)
        count = sum(len(sent) for sent in test_sequence)
        print('accuracy over %d tokens: %.2f' % (count, acc * 100)) 
Example 44
Project: FancyWord   Author: EastonLee   File: hmm.py    GNU General Public License v3.0 4 votes vote down vote up
def test(self, test_sequence, verbose=False, **kwargs):
        """
        Tests the HiddenMarkovModelTagger instance.

        :param test_sequence: a sequence of labeled test instances
        :type test_sequence: list(list)
        :param verbose: boolean flag indicating whether training should be
            verbose or include printed output
        :type verbose: bool
        """

        def words(sent):
            return [word for (word, tag) in sent]

        def tags(sent):
            return [tag for (word, tag) in sent]

        def flatten(seq):
            return list(itertools.chain(*seq))

        test_sequence = self._transform(test_sequence)
        predicted_sequence = list(imap(self._tag, imap(words, test_sequence)))

        if verbose:
            for test_sent, predicted_sent in izip(test_sequence, predicted_sequence):
                print('Test:',
                    ' '.join('%s/%s' % (token, tag)
                             for (token, tag) in test_sent))
                print()
                print('Untagged:',
                    ' '.join("%s" % token for (token, tag) in test_sent))
                print()
                print('HMM-tagged:',
                    ' '.join('%s/%s' % (token, tag)
                              for (token, tag) in predicted_sent))
                print()
                print('Entropy:',
                    self.entropy([(token, None) for
                                  (token, tag) in predicted_sent]))
                print()
                print('-' * 60)

        test_tags = flatten(imap(tags, test_sequence))
        predicted_tags = flatten(imap(tags, predicted_sequence))

        acc = accuracy(test_tags, predicted_tags)
        count = sum(len(sent) for sent in test_sequence)
        print('accuracy over %d tokens: %.2f' % (count, acc * 100)) 
Example 45
Project: honours_project   Author: JFriel   File: hmm.py    GNU General Public License v3.0 4 votes vote down vote up
def test(self, test_sequence, verbose=False, **kwargs):
        """
        Tests the HiddenMarkovModelTagger instance.

        :param test_sequence: a sequence of labeled test instances
        :type test_sequence: list(list)
        :param verbose: boolean flag indicating whether training should be
            verbose or include printed output
        :type verbose: bool
        """

        def words(sent):
            return [word for (word, tag) in sent]

        def tags(sent):
            return [tag for (word, tag) in sent]

        def flatten(seq):
            return list(itertools.chain(*seq))

        test_sequence = self._transform(test_sequence)
        predicted_sequence = list(imap(self._tag, imap(words, test_sequence)))

        if verbose:
            for test_sent, predicted_sent in izip(test_sequence, predicted_sequence):
                print('Test:',
                    ' '.join('%s/%s' % (token, tag)
                             for (token, tag) in test_sent))
                print()
                print('Untagged:',
                    ' '.join("%s" % token for (token, tag) in test_sent))
                print()
                print('HMM-tagged:',
                    ' '.join('%s/%s' % (token, tag)
                              for (token, tag) in predicted_sent))
                print()
                print('Entropy:',
                    self.entropy([(token, None) for
                                  (token, tag) in predicted_sent]))
                print()
                print('-' * 60)

        test_tags = flatten(imap(tags, test_sequence))
        predicted_tags = flatten(imap(tags, predicted_sequence))

        acc = accuracy(test_tags, predicted_tags)
        count = sum(len(sent) for sent in test_sequence)
        print('accuracy over %d tokens: %.2f' % (count, acc * 100)) 
Example 46
Project: honours_project   Author: JFriel   File: hmm.py    GNU General Public License v3.0 4 votes vote down vote up
def test(self, test_sequence, verbose=False, **kwargs):
        """
        Tests the HiddenMarkovModelTagger instance.

        :param test_sequence: a sequence of labeled test instances
        :type test_sequence: list(list)
        :param verbose: boolean flag indicating whether training should be
            verbose or include printed output
        :type verbose: bool
        """

        def words(sent):
            return [word for (word, tag) in sent]

        def tags(sent):
            return [tag for (word, tag) in sent]

        def flatten(seq):
            return list(itertools.chain(*seq))

        test_sequence = self._transform(test_sequence)
        predicted_sequence = list(imap(self._tag, imap(words, test_sequence)))

        if verbose:
            for test_sent, predicted_sent in izip(test_sequence, predicted_sequence):
                print('Test:',
                    ' '.join('%s/%s' % (token, tag)
                             for (token, tag) in test_sent))
                print()
                print('Untagged:',
                    ' '.join("%s" % token for (token, tag) in test_sent))
                print()
                print('HMM-tagged:',
                    ' '.join('%s/%s' % (token, tag)
                              for (token, tag) in predicted_sent))
                print()
                print('Entropy:',
                    self.entropy([(token, None) for
                                  (token, tag) in predicted_sent]))
                print()
                print('-' * 60)

        test_tags = flatten(imap(tags, test_sequence))
        predicted_tags = flatten(imap(tags, predicted_sequence))

        acc = accuracy(test_tags, predicted_tags)
        count = sum(len(sent) for sent in test_sequence)
        print('accuracy over %d tokens: %.2f' % (count, acc * 100)) 
Example 47
Project: serverless-chatbots-workshop   Author: datteswararao   File: hmm.py    Apache License 2.0 4 votes vote down vote up
def test(self, test_sequence, verbose=False, **kwargs):
        """
        Tests the HiddenMarkovModelTagger instance.

        :param test_sequence: a sequence of labeled test instances
        :type test_sequence: list(list)
        :param verbose: boolean flag indicating whether training should be
            verbose or include printed output
        :type verbose: bool
        """

        def words(sent):
            return [word for (word, tag) in sent]

        def tags(sent):
            return [tag for (word, tag) in sent]

        def flatten(seq):
            return list(itertools.chain(*seq))

        test_sequence = self._transform(test_sequence)
        predicted_sequence = list(imap(self._tag, imap(words, test_sequence)))

        if verbose:
            for test_sent, predicted_sent in izip(test_sequence, predicted_sequence):
                print('Test:',
                    ' '.join('%s/%s' % (token, tag)
                             for (token, tag) in test_sent))
                print()
                print('Untagged:',
                    ' '.join("%s" % token for (token, tag) in test_sent))
                print()
                print('HMM-tagged:',
                    ' '.join('%s/%s' % (token, tag)
                              for (token, tag) in predicted_sent))
                print()
                print('Entropy:',
                    self.entropy([(token, None) for
                                  (token, tag) in predicted_sent]))
                print()
                print('-' * 60)

        test_tags = flatten(imap(tags, test_sequence))
        predicted_tags = flatten(imap(tags, predicted_sequence))

        acc = accuracy(test_tags, predicted_tags)
        count = sum(len(sent) for sent in test_sequence)
        print('accuracy over %d tokens: %.2f' % (count, acc * 100)) 
Example 48
Project: serverless-chatbots-workshop   Author: datteswararao   File: hmm.py    Apache License 2.0 4 votes vote down vote up
def test(self, test_sequence, verbose=False, **kwargs):
        """
        Tests the HiddenMarkovModelTagger instance.

        :param test_sequence: a sequence of labeled test instances
        :type test_sequence: list(list)
        :param verbose: boolean flag indicating whether training should be
            verbose or include printed output
        :type verbose: bool
        """

        def words(sent):
            return [word for (word, tag) in sent]

        def tags(sent):
            return [tag for (word, tag) in sent]

        def flatten(seq):
            return list(itertools.chain(*seq))

        test_sequence = self._transform(test_sequence)
        predicted_sequence = list(imap(self._tag, imap(words, test_sequence)))

        if verbose:
            for test_sent, predicted_sent in izip(test_sequence, predicted_sequence):
                print('Test:',
                    ' '.join('%s/%s' % (token, tag)
                             for (token, tag) in test_sent))
                print()
                print('Untagged:',
                    ' '.join("%s" % token for (token, tag) in test_sent))
                print()
                print('HMM-tagged:',
                    ' '.join('%s/%s' % (token, tag)
                              for (token, tag) in predicted_sent))
                print()
                print('Entropy:',
                    self.entropy([(token, None) for
                                  (token, tag) in predicted_sent]))
                print()
                print('-' * 60)

        test_tags = flatten(imap(tags, test_sequence))
        predicted_tags = flatten(imap(tags, predicted_sequence))

        acc = accuracy(test_tags, predicted_tags)
        count = sum(len(sent) for sent in test_sequence)
        print('accuracy over %d tokens: %.2f' % (count, acc * 100))