Python nltk.classify() Examples

The following are 10 code examples of nltk.classify(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module nltk , or try the search function

Example #1

Source File: classifiers.py From twitter-stock-recommendation with MIT License

7 votes

def train(self, *args, **kwargs):
        """Train the classifier with a labeled feature set and return
        the classifier. Takes the same arguments as the wrapped NLTK class.
        This method is implicitly called when calling ``classify`` or
        ``accuracy`` methods and is included only to allow passing in arguments
        to the ``train`` method of the wrapped NLTK class.

        .. versionadded:: 0.6.2

        :rtype: A classifier
        """
        try:
            self.classifier = self.nltk_class.train(self.train_features,
                                                    *args, **kwargs)
            return self.classifier
        except AttributeError:
            raise ValueError("NLTKClassifier must have a nltk_class"
                            " variable that is not None.")

Example #2

Source File: util.py From razzy-spinner with GNU General Public License v3.0

6 votes

def demo_sent_subjectivity(text):
    """
    Classify a single sentence as subjective or objective using a stored
    SentimentAnalyzer.

    :param text: a sentence whose subjectivity has to be classified.
    """
    from nltk.classify import NaiveBayesClassifier
    from nltk.tokenize import regexp
    word_tokenizer = regexp.WhitespaceTokenizer()
    try:
        sentim_analyzer = load('sa_subjectivity.pickle')
    except LookupError:
        print('Cannot find the sentiment analyzer you want to load.')
        print('Training a new one using NaiveBayesClassifier.')
        sentim_analyzer = demo_subjectivity(NaiveBayesClassifier.train, True)

    # Tokenize and convert to lower case
    tokenized_text = [word.lower() for word in word_tokenizer.tokenize(text)]
    print(sentim_analyzer.classify(tokenized_text))

Example #3

Source File: util.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International

6 votes

def demo_sent_subjectivity(text):
    """
    Classify a single sentence as subjective or objective using a stored
    SentimentAnalyzer.

    :param text: a sentence whose subjectivity has to be classified.
    """
    from nltk.classify import NaiveBayesClassifier
    from nltk.tokenize import regexp

    word_tokenizer = regexp.WhitespaceTokenizer()
    try:
        sentim_analyzer = load('sa_subjectivity.pickle')
    except LookupError:
        print('Cannot find the sentiment analyzer you want to load.')
        print('Training a new one using NaiveBayesClassifier.')
        sentim_analyzer = demo_subjectivity(NaiveBayesClassifier.train, True)

    # Tokenize and convert to lower case
    tokenized_text = [word.lower() for word in word_tokenizer.tokenize(text)]
    print(sentim_analyzer.classify(tokenized_text))

Example #4

Source File: crf.py From luscan-devel with GNU General Public License v2.0

5 votes

def demo(train_size=100, test_size=100, java_home=None, mallet_home=None):
    from nltk.corpus import brown
    import textwrap

    # Define a very simple feature detector
    def fd(sentence, index):
        word = sentence[index]
        return dict(word=word, suffix=word[-2:], len=len(word))

    # Let nltk know where java & mallet are.
    nltk.internals.config_java(java_home)
    nltk.classify.mallet.config_mallet(mallet_home)

    # Get the training & test corpus.  We simplify the tagset a little:
    # just the first 2 chars.
    def strip(corpus): return [[(w, t[:2]) for (w,t) in sent]
                               for sent in corpus]
    brown_train = strip(brown.tagged_sents(categories='news')[:train_size])
    brown_test = strip(brown.tagged_sents(categories='editorial')[:test_size])

    crf = MalletCRF.train(fd, brown_train, #'/tmp/crf-model',
                          transduction_type='VITERBI')
    sample_output = crf.tag([w for (w,t) in brown_test[5]])
    acc = nltk.tag.accuracy(crf, brown_test)
    print '\nAccuracy: %.1f%%' % (acc*100)
    print 'Sample output:'
    print textwrap.fill(' '.join('%s/%s' % w for w in sample_output),
                        initial_indent='  ', subsequent_indent='  ')+'\n'

    # Clean up
    print 'Clean-up: deleting', crf.filename
    os.remove(crf.filename)

    return crf

Example #5

Source File: train_mod.py From nlp-services with MIT License

5 votes

def classify(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        return mode(votes)

Example #6

Source File: train_mod.py From nlp-services with MIT License

5 votes

def confidence(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)

        choice_votes = votes.count(mode(votes))
        conf = choice_votes / len(votes)
        return conf


# Fetching trained dataset

Example #7

Source File: classifiers.py From twitter-stock-recommendation with MIT License

5 votes

def classify(self, text):
        """Classifies a string of text."""
        raise NotImplementedError('Must implement a "classify" method.')

Example #8

Source File: classifiers.py From twitter-stock-recommendation with MIT License

5 votes

def classify(self, text):
        """Classifies the text.

        :param str text: A string of text.
        """
        text_features = self.extract_features(text)
        return self.classifier.classify(text_features)

Example #9

Source File: classifiers.py From twitter-stock-recommendation with MIT License

5 votes

def accuracy(self, test_set, format=None):
        """Compute the accuracy on a test set.

        :param test_set: A list of tuples of the form ``(text, label)``, or a
            file pointer.
        :param format: If ``test_set`` is a filename, the file format, e.g.
            ``"csv"`` or ``"json"``. If ``None``, will attempt to detect the
            file format.
        """
        if is_filelike(test_set):
            test_data = self._read_data(test_set, format)
        else:  # test_set is a list of tuples
            test_data = test_set
        test_features = [(self.extract_features(d), c) for d, c in test_data]
        return nltk.classify.accuracy(self.classifier, test_features)

Example #10

Source File: classifiers.py From twitter-stock-recommendation with MIT License

5 votes

def train(self, *args, **kwargs):
        """Train the classifier with a labeled and unlabeled feature sets and return
        the classifier. Takes the same arguments as the wrapped NLTK class.
        This method is implicitly called when calling ``classify`` or
        ``accuracy`` methods and is included only to allow passing in arguments
        to the ``train`` method of the wrapped NLTK class.

        :rtype: A classifier
        """
        self.classifier = self.nltk_class.train(self.positive_features,
                                                self.unlabeled_features,
                                                self.positive_prob_prior)
        return self.classifier