Python nltk.classify() Examples

The following are code examples for showing how to use nltk.classify(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: OpenBottle   Author: xiaozhuchacha   File: util.py    MIT License 7 votes vote down vote up
def demo_sent_subjectivity(text):
    """
    Classify a single sentence as subjective or objective using a stored
    SentimentAnalyzer.

    :param text: a sentence whose subjectivity has to be classified.
    """
    from nltk.classify import NaiveBayesClassifier
    from nltk.tokenize import regexp
    word_tokenizer = regexp.WhitespaceTokenizer()
    try:
        sentim_analyzer = load('sa_subjectivity.pickle')
    except LookupError:
        print('Cannot find the sentiment analyzer you want to load.')
        print('Training a new one using NaiveBayesClassifier.')
        sentim_analyzer = demo_subjectivity(NaiveBayesClassifier.train, True)

    # Tokenize and convert to lower case
    tokenized_text = [word.lower() for word in word_tokenizer.tokenize(text)]
    print(sentim_analyzer.classify(tokenized_text)) 
Example 2
Project: razzy-spinner   Author: rafasashi   File: util.py    GNU General Public License v3.0 6 votes vote down vote up
def demo_sent_subjectivity(text):
    """
    Classify a single sentence as subjective or objective using a stored
    SentimentAnalyzer.

    :param text: a sentence whose subjectivity has to be classified.
    """
    from nltk.classify import NaiveBayesClassifier
    from nltk.tokenize import regexp
    word_tokenizer = regexp.WhitespaceTokenizer()
    try:
        sentim_analyzer = load('sa_subjectivity.pickle')
    except LookupError:
        print('Cannot find the sentiment analyzer you want to load.')
        print('Training a new one using NaiveBayesClassifier.')
        sentim_analyzer = demo_subjectivity(NaiveBayesClassifier.train, True)

    # Tokenize and convert to lower case
    tokenized_text = [word.lower() for word in word_tokenizer.tokenize(text)]
    print(sentim_analyzer.classify(tokenized_text)) 
Example 3
Project: chattR   Author: patrickstocklin   File: classifiers.py    GNU General Public License v2.0 6 votes vote down vote up
def train(self, *args, **kwargs):
        """Train the classifier with a labeled feature set and return
        the classifier. Takes the same arguments as the wrapped NLTK class.
        This method is implicitly called when calling ``classify`` or
        ``accuracy`` methods and is included only to allow passing in arguments
        to the ``train`` method of the wrapped NLTK class.

        .. versionadded:: 0.6.2

        :rtype: A classifier
        """
        try:
            self.classifier = self.nltk_class.train(self.train_features,
                                                    *args, **kwargs)
            return self.classifier
        except AttributeError:
            raise ValueError("NLTKClassifier must have a nltk_class"
                            " variable that is not None.") 
Example 4
Project: OpenBottle   Author: xiaozhuchacha   File: util.py    MIT License 6 votes vote down vote up
def demo_sent_subjectivity(text):
    """
    Classify a single sentence as subjective or objective using a stored
    SentimentAnalyzer.

    :param text: a sentence whose subjectivity has to be classified.
    """
    from nltk.classify import NaiveBayesClassifier
    from nltk.tokenize import regexp
    word_tokenizer = regexp.WhitespaceTokenizer()
    try:
        sentim_analyzer = load('sa_subjectivity.pickle')
    except LookupError:
        print('Cannot find the sentiment analyzer you want to load.')
        print('Training a new one using NaiveBayesClassifier.')
        sentim_analyzer = demo_subjectivity(NaiveBayesClassifier.train, True)

    # Tokenize and convert to lower case
    tokenized_text = [word.lower() for word in word_tokenizer.tokenize(text)]
    print(sentim_analyzer.classify(tokenized_text)) 
Example 5
Project: Health-Checker   Author: KriAga   File: util.py    MIT License 6 votes vote down vote up
def demo_sent_subjectivity(text):
    """
    Classify a single sentence as subjective or objective using a stored
    SentimentAnalyzer.

    :param text: a sentence whose subjectivity has to be classified.
    """
    from nltk.classify import NaiveBayesClassifier
    from nltk.tokenize import regexp
    word_tokenizer = regexp.WhitespaceTokenizer()
    try:
        sentim_analyzer = load('sa_subjectivity.pickle')
    except LookupError:
        print('Cannot find the sentiment analyzer you want to load.')
        print('Training a new one using NaiveBayesClassifier.')
        sentim_analyzer = demo_subjectivity(NaiveBayesClassifier.train, True)

    # Tokenize and convert to lower case
    tokenized_text = [word.lower() for word in word_tokenizer.tokenize(text)]
    print(sentim_analyzer.classify(tokenized_text)) 
Example 6
Project: Chrono   Author: AmyOlex   File: NB_nltk_classifier.py    GNU General Public License v3.0 6 votes vote down vote up
def build_model(data_file, class_file):
    ## Import csv files
    data_list = []
    with open(data_file) as file:
        reader = csv.DictReader(file)
        data_list = [row for row in reader]
    class_list = []
    with open(class_file) as f:
        for line in f.readlines():
            class_list.append(int(line.strip()))
    
    ## Create the input for the classifier
    NB_input = []
    if(len(data_list)==len(class_list)):
        for i in range(0,len(data_list)):
            NB_input.append((data_list[i],class_list[i]))
    
    ## Train the classifier and return it along with the ordered dictionary keys.
    classifier = NaiveBayesClassifier.train(NB_input)
    print('accuracy:', nltk.classify.util.accuracy(classifier, NB_input))
    ## Create the empty orderedDict to pass back for use in the other methods.
    dict_keys = data_list[0].keys()

    dic = OrderedDict(zip(dict_keys, np.repeat('0',len(dict_keys))))
    
    return(classifier, dic, NB_input) 
Example 7
Project: honours_project   Author: JFriel   File: classifiers.py    GNU General Public License v3.0 6 votes vote down vote up
def train(self, *args, **kwargs):
        """Train the classifier with a labeled feature set and return
        the classifier. Takes the same arguments as the wrapped NLTK class.
        This method is implicitly called when calling ``classify`` or
        ``accuracy`` methods and is included only to allow passing in arguments
        to the ``train`` method of the wrapped NLTK class.

        .. versionadded:: 0.6.2

        :rtype: A classifier
        """
        try:
            self.classifier = self.nltk_class.train(self.train_features,
                                                    *args, **kwargs)
            return self.classifier
        except AttributeError:
            raise ValueError("NLTKClassifier must have a nltk_class"
                            " variable that is not None.") 
Example 8
Project: honours_project   Author: JFriel   File: classifiers.py    GNU General Public License v3.0 6 votes vote down vote up
def train(self, *args, **kwargs):
        """Train the classifier with a labeled feature set and return
        the classifier. Takes the same arguments as the wrapped NLTK class.
        This method is implicitly called when calling ``classify`` or
        ``accuracy`` methods and is included only to allow passing in arguments
        to the ``train`` method of the wrapped NLTK class.

        .. versionadded:: 0.6.2

        :rtype: A classifier
        """
        try:
            self.classifier = self.nltk_class.train(self.train_features,
                                                    *args, **kwargs)
            return self.classifier
        except AttributeError:
            raise ValueError("NLTKClassifier must have a nltk_class"
                            " variable that is not None.") 
Example 9
Project: Youtube-Comments-Analyzer   Author: arghodayah   File: websentiment.py    MIT License 6 votes vote down vote up
def find_sentiment(text):
    #Text formatting to classify
    def format_text(text):
        return({word: True for word in nltk.word_tokenize(text)})
    #Load positive categorized text
    pos = []
    with open("./pos.txt", encoding='ISO-8859-1') as f:
        for i in f:
            pos.append([format_text(i), 'positive'])
    #Load negative categorized text
    neg = []
    with open("./neg.txt", encoding='ISO-8859-1') as f:
        for i in f:
            neg.append([format_text(i), 'negative'])
    #Training classifier
    training_set = pos + neg
    classifier = NaiveBayesClassifier.train(training_set)
	
    return classifier.classify(format_text(text)) 
Example 10
Project: aop-helpFinder   Author: jecarvaill   File: util.py    GNU General Public License v3.0 6 votes vote down vote up
def demo_sent_subjectivity(text):
    """
    Classify a single sentence as subjective or objective using a stored
    SentimentAnalyzer.

    :param text: a sentence whose subjectivity has to be classified.
    """
    from nltk.classify import NaiveBayesClassifier
    from nltk.tokenize import regexp
    word_tokenizer = regexp.WhitespaceTokenizer()
    try:
        sentim_analyzer = load('sa_subjectivity.pickle')
    except LookupError:
        print('Cannot find the sentiment analyzer you want to load.')
        print('Training a new one using NaiveBayesClassifier.')
        sentim_analyzer = demo_subjectivity(NaiveBayesClassifier.train, True)

    # Tokenize and convert to lower case
    tokenized_text = [word.lower() for word in word_tokenizer.tokenize(text)]
    print(sentim_analyzer.classify(tokenized_text)) 
Example 11
Project: serverless-chatbots-workshop   Author: datteswararao   File: util.py    Apache License 2.0 6 votes vote down vote up
def demo_sent_subjectivity(text):
    """
    Classify a single sentence as subjective or objective using a stored
    SentimentAnalyzer.

    :param text: a sentence whose subjectivity has to be classified.
    """
    from nltk.classify import NaiveBayesClassifier
    from nltk.tokenize import regexp
    word_tokenizer = regexp.WhitespaceTokenizer()
    try:
        sentim_analyzer = load('sa_subjectivity.pickle')
    except LookupError:
        print('Cannot find the sentiment analyzer you want to load.')
        print('Training a new one using NaiveBayesClassifier.')
        sentim_analyzer = demo_subjectivity(NaiveBayesClassifier.train, True)

    # Tokenize and convert to lower case
    tokenized_text = [word.lower() for word in word_tokenizer.tokenize(text)]
    print(sentim_analyzer.classify(tokenized_text)) 
Example 12
Project: serverless-chatbots-workshop   Author: datteswararao   File: util.py    Apache License 2.0 6 votes vote down vote up
def demo_sent_subjectivity(text):
    """
    Classify a single sentence as subjective or objective using a stored
    SentimentAnalyzer.

    :param text: a sentence whose subjectivity has to be classified.
    """
    from nltk.classify import NaiveBayesClassifier
    from nltk.tokenize import regexp
    word_tokenizer = regexp.WhitespaceTokenizer()
    try:
        sentim_analyzer = load('sa_subjectivity.pickle')
    except LookupError:
        print('Cannot find the sentiment analyzer you want to load.')
        print('Training a new one using NaiveBayesClassifier.')
        sentim_analyzer = demo_subjectivity(NaiveBayesClassifier.train, True)

    # Tokenize and convert to lower case
    tokenized_text = [word.lower() for word in word_tokenizer.tokenize(text)]
    print(sentim_analyzer.classify(tokenized_text)) 
Example 13
Project: chattR   Author: patrickstocklin   File: classifiers.py    GNU General Public License v2.0 5 votes vote down vote up
def classify(self, text):
        """Classifies a string of text."""
        raise NotImplementedError('Must implement a "classify" method.') 
Example 14
Project: chattR   Author: patrickstocklin   File: classifiers.py    GNU General Public License v2.0 5 votes vote down vote up
def classify(self, text):
        """Classifies the text.

        :param str text: A string of text.
        """
        text_features = self.extract_features(text)
        return self.classifier.classify(text_features) 
Example 15
Project: chattR   Author: patrickstocklin   File: classifiers.py    GNU General Public License v2.0 5 votes vote down vote up
def accuracy(self, test_set, format=None):
        """Compute the accuracy on a test set.

        :param test_set: A list of tuples of the form ``(text, label)``, or a
            file pointer.
        :param format: If ``test_set`` is a filename, the file format, e.g.
            ``"csv"`` or ``"json"``. If ``None``, will attempt to detect the
            file format.
        """
        if is_filelike(test_set):
            test_data = self._read_data(test_set)
        else:  # test_set is a list of tuples
            test_data = test_set
        test_features = [(self.extract_features(d), c) for d, c in test_data]
        return nltk.classify.accuracy(self.classifier, test_features) 
Example 16
Project: chattR   Author: patrickstocklin   File: classifiers.py    GNU General Public License v2.0 5 votes vote down vote up
def train(self, *args, **kwargs):
        """Train the classifier with a labeled and unlabeled feature sets and return
        the classifier. Takes the same arguments as the wrapped NLTK class.
        This method is implicitly called when calling ``classify`` or
        ``accuracy`` methods and is included only to allow passing in arguments
        to the ``train`` method of the wrapped NLTK class.

        :rtype: A classifier
        """
        self.classifier = self.nltk_class.train(self.positive_features,
                                                self.unlabeled_features,
                                                self.positive_prob_prior)
        return self.classifier 
Example 17
Project: eKoNLPy   Author: entelecheia   File: mpck.py    GNU General Public License v3.0 5 votes vote down vote up
def classify(self, tokens, intensity_cutoff=1.3):
        eps = 1e-6
        features = {token: True for token in tokens}
        result = self.classifier.prob_classify(features)
        pos_score = result.prob(self._positive_label)
        neg_score = result.prob(self._negative_label)
        polarity = pos_score - neg_score
        intensity = pos_score / (neg_score + eps) if polarity > 0 else neg_score / (pos_score + eps)
        polarity = polarity if intensity > intensity_cutoff else 0
        return {'Polarity': polarity, 'Intensity': intensity,
                'Pos score': pos_score, 'Neg score': neg_score} 
Example 18
Project: nltk-book-2nd   Author: East196   File: combining-algo-with-a-vote.py    Apache License 2.0 5 votes vote down vote up
def classify(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        return mode(votes) 
Example 19
Project: nltk-book-2nd   Author: East196   File: combining-algo-with-a-vote.py    Apache License 2.0 5 votes vote down vote up
def confidence(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
		
        choice_votes = votes.count(mode(votes))
        conf = choice_votes / len(votes)
        return conf 
Example 20
Project: nltk-book-2nd   Author: East196   File: investigating-bias.py    Apache License 2.0 5 votes vote down vote up
def classify(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        return mode(votes) 
Example 21
Project: nltk-book-2nd   Author: East196   File: investigating-bias.py    Apache License 2.0 5 votes vote down vote up
def confidence(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
		
        choice_votes = votes.count(mode(votes))
        conf = choice_votes / len(votes)
        return conf 
Example 22
Project: Predicting-US-Presidential-Elections-TwitterData   Author: praveenkanamarlapudi   File: classifier.py    Apache License 2.0 5 votes vote down vote up
def train():
  positive_tweets = read_tweets('positive.txt', 'positive')
  negative_tweets = read_tweets('negative.txt', 'negative')
  print len(positive_tweets)
  print len(negative_tweets)

  #pos_train = positive_tweets[:2000]
  #neg_train = negative_tweets[:2000]
  #pos_test = positive_tweets[2001:3000]
  #neg_test = negative_tweets[2001:3000]
  pos_train = positive_tweets[:len(positive_tweets)*80/100]
  neg_train = negative_tweets[:len(negative_tweets)*80/100]
  pos_test = positive_tweets[len(positive_tweets)*80/100+1:]
  neg_test = negative_tweets[len(positive_tweets)*80/100+1:]

  training_data = pos_train + neg_train
  test_data = pos_test + neg_test

  sentim_analyzer = SentimentAnalyzer()
  all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_data])
  #print all_words_neg
  unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4)
  #print unigram_feats
  print len(unigram_feats)
  sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)
  training_set = sentim_analyzer.apply_features(training_data)
  test_set = sentim_analyzer.apply_features(test_data)
  print test_set  
  trainer = NaiveBayesClassifier.train
  classifier = sentim_analyzer.train(trainer, training_set)
  for key,value in sorted(sentim_analyzer.evaluate(test_set).items()):
    print('{0}: {1}'.format(key, value))
  print sentim_analyzer.classify(tokenize_sentance('I hate driving car at night'))
  
  return sentim_analyzer 
Example 23
Project: Predicting-US-Presidential-Elections-TwitterData   Author: praveenkanamarlapudi   File: classifier.py    Apache License 2.0 5 votes vote down vote up
def classify(input):
  return classifier.classify(tokenize_sentance(input)) 
Example 24
Project: Live-Twitter-Sentiment-Analysis   Author: shreyansh26   File: sentiment_module_1.py    MIT License 5 votes vote down vote up
def classify(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        return mode(votes) 
Example 25
Project: Live-Twitter-Sentiment-Analysis   Author: shreyansh26   File: sentiment_module_1.py    MIT License 5 votes vote down vote up
def confidence(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)

        choice_votes = votes.count(mode(votes))
        conf = choice_votes / len(votes)
        return conf 
Example 26
Project: Live-Twitter-Sentiment-Analysis   Author: shreyansh26   File: sentiment_mod.py    MIT License 5 votes vote down vote up
def classify(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        return mode(votes) 
Example 27
Project: Live-Twitter-Sentiment-Analysis   Author: shreyansh26   File: sentiment_mod.py    MIT License 5 votes vote down vote up
def confidence(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)

        choice_votes = votes.count(mode(votes))
        conf = choice_votes / len(votes)
        return conf 
Example 28
Project: Live-Twitter-Sentiment-Analysis   Author: shreyansh26   File: sentiment_mod.py    MIT License 5 votes vote down vote up
def sentiment(text):
    feats = find_features(text)

    return voted_classifier.classify(feats),voted_classifier.confidence(feats)



# SAVE ME AS sentiment_mod.py 
Example 29
Project: twitter_nltk_volkswagen   Author: edlectrico   File: sentiment_train.py    Apache License 2.0 5 votes vote down vote up
def classify(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        return mode(votes) 
Example 30
Project: twitter_nltk_volkswagen   Author: edlectrico   File: sentiment_train.py    Apache License 2.0 5 votes vote down vote up
def confidence(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)

        choice_votes = votes.count(mode(votes))
        conf = choice_votes / len(votes)
        return conf 
Example 31
Project: twitter_nltk_volkswagen   Author: edlectrico   File: sentiment_mod.py    Apache License 2.0 5 votes vote down vote up
def classify(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        return mode(votes) 
Example 32
Project: twitter_nltk_volkswagen   Author: edlectrico   File: sentiment_mod.py    Apache License 2.0 5 votes vote down vote up
def confidence(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)

        choice_votes = votes.count(mode(votes))
        conf = choice_votes / len(votes)
        return conf 
Example 33
Project: twitter_nltk_volkswagen   Author: edlectrico   File: sentiment_mod.py    Apache License 2.0 5 votes vote down vote up
def sentiment(text):
    feats = find_features(text)
    return voted_classifier.classify(feats),voted_classifier.confidence(feats) 
Example 34
Project: nltk-on-gae   Author: sivu22   File: crf.py    Apache License 2.0 5 votes vote down vote up
def demo(train_size=100, test_size=100, java_home=None, mallet_home=None):
    from nltk.corpus import brown
    import textwrap

    # Define a very simple feature detector
    def fd(sentence, index):
        word = sentence[index]
        return dict(word=word, suffix=word[-2:], len=len(word))

    # Let nltk know where java & mallet are.
    nltk.internals.config_java(java_home)
    nltk.classify.mallet.config_mallet(mallet_home)

    # Get the training & test corpus.  We simplify the tagset a little:
    # just the first 2 chars.
    def strip(corpus): return [[(w, t[:2]) for (w,t) in sent]
                               for sent in corpus]
    brown_train = strip(brown.tagged_sents(categories='news')[:train_size])
    brown_test = strip(brown.tagged_sents(categories='editorial')[:test_size])

    crf = MalletCRF.train(fd, brown_train, #'/tmp/crf-model',
                          transduction_type='VITERBI')
    sample_output = crf.tag([w for (w,t) in brown_test[5]])
    acc = nltk.tag.accuracy(crf, brown_test)
    print('\nAccuracy: %.1f%%' % (acc*100))
    print('Sample output:')
    print(textwrap.fill(' '.join('%s/%s' % w for w in sample_output),
                        initial_indent='  ', subsequent_indent='  ')+'\n')

    # Clean up
    print('Clean-up: deleting', crf.filename)
    os.remove(crf.filename)

    return crf 
Example 35
Project: roorkee-bot   Author: abhishekjiitr   File: roorkee-bot.py    GNU General Public License v3.0 5 votes vote down vote up
def classify(self,features):
		votes=[]
		for c in self.classifiers:
			v=c.classify(features)
			votes.append(v)
		#otes=nltk.FreqDist(votes)

		return most_common(votes) 
Example 36
Project: roorkee-bot   Author: abhishekjiitr   File: roorkee-bot.py    GNU General Public License v3.0 5 votes vote down vote up
def confidence(self,features):
		votes=[]
		for c in self.classifiers:
			v=c.classify(features)
			votes.append(v)
		choice_votes=votes.count(mode(votes))
		conf=choice_votes/len(votes)
		return conf 
Example 37
Project: roorkee-bot   Author: abhishekjiitr   File: roorkee-bot.py    GNU General Public License v3.0 5 votes vote down vote up
def sent2vec(que):
	temp=que
	que=getFocusWords(que)
	#print(que)
	
	fq=[0]*(g_index)
	for w in que:
		if(w in d_index):
			fq[d_index[w]]=1
			if not check(w):
				fq[d_index[w]] *= 1.5
	temp=process(temp)
	#print(temp)
	temp=feature(temp)
	#print(temp)
	typ=voted_classifier.classify(temp)
	#print(typ)
	if(typ=="ABBR"):
		fq[0]=1
	elif(typ=="ENTY"):
		fq[1]=1
	elif(typ=="HUM"):
		fq[2]=1
	elif(typ=="NUM"):
		fq[3]=1
	elif(typ=="DESC"):
		fq[4]=1
	elif(typ=="LOC"):
		fq[5]=1
	return fq 
Example 38
Project: NLP-application-in-indian-Judiciary   Author: Teamcoders007   File: main.py    GNU General Public License v3.0 5 votes vote down vote up
def classify(self, feature):
        vote = []
        for c in self._classifier_:
            v = c.classify(feature)
            vote.append(v)
        return mode(vote) 
Example 39
Project: NLP-application-in-indian-Judiciary   Author: Teamcoders007   File: main.py    GNU General Public License v3.0 5 votes vote down vote up
def confidence(self, feature):
        vote = []
        for c in self._classifier:
            v = c.classify(feature)
            vote.append(v)

        choice_vote = vote.count(mode(vote))
        confidence_ = choice_vote / len(vote)
        return confidence_ 
Example 40
Project: news-crawler   Author: nolram   File: nltk_classificador.py    MIT License 5 votes vote down vote up
def __init__(self):
        self.client = MongoClient()
        self.db = self.client.articles_news
        self.news = self.db.articles
        self.stop_words = _load_stop_words()
        self.classify() 
Example 41
Project: news-crawler   Author: nolram   File: nltk_classificador.py    MIT License 5 votes vote down vote up
def classificar(self, word):
        return self.classifier.classify(word) 
Example 42
Project: luscan-devel   Author: blackye   File: crf.py    GNU General Public License v2.0 5 votes vote down vote up
def demo(train_size=100, test_size=100, java_home=None, mallet_home=None):
    from nltk.corpus import brown
    import textwrap

    # Define a very simple feature detector
    def fd(sentence, index):
        word = sentence[index]
        return dict(word=word, suffix=word[-2:], len=len(word))

    # Let nltk know where java & mallet are.
    nltk.internals.config_java(java_home)
    nltk.classify.mallet.config_mallet(mallet_home)

    # Get the training & test corpus.  We simplify the tagset a little:
    # just the first 2 chars.
    def strip(corpus): return [[(w, t[:2]) for (w,t) in sent]
                               for sent in corpus]
    brown_train = strip(brown.tagged_sents(categories='news')[:train_size])
    brown_test = strip(brown.tagged_sents(categories='editorial')[:test_size])

    crf = MalletCRF.train(fd, brown_train, #'/tmp/crf-model',
                          transduction_type='VITERBI')
    sample_output = crf.tag([w for (w,t) in brown_test[5]])
    acc = nltk.tag.accuracy(crf, brown_test)
    print '\nAccuracy: %.1f%%' % (acc*100)
    print 'Sample output:'
    print textwrap.fill(' '.join('%s/%s' % w for w in sample_output),
                        initial_indent='  ', subsequent_indent='  ')+'\n'

    # Clean up
    print 'Clean-up: deleting', crf.filename
    os.remove(crf.filename)

    return crf 
Example 43
Project: nltk_sentiment_analysis   Author: edlectrico   File: sentiment_train.py    GNU General Public License v2.0 5 votes vote down vote up
def classify(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        return mode(votes) 
Example 44
Project: nltk_sentiment_analysis   Author: edlectrico   File: sentiment_train.py    GNU General Public License v2.0 5 votes vote down vote up
def confidence(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)

        choice_votes = votes.count(mode(votes))
        conf = choice_votes / len(votes)
        return conf 
Example 45
Project: nltk_sentiment_analysis   Author: edlectrico   File: sentiment_mod.py    GNU General Public License v2.0 5 votes vote down vote up
def classify(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        return mode(votes) 
Example 46
Project: nltk_sentiment_analysis   Author: edlectrico   File: sentiment_mod.py    GNU General Public License v2.0 5 votes vote down vote up
def confidence(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)

        choice_votes = votes.count(mode(votes))
        conf = choice_votes / len(votes)
        return conf 
Example 47
Project: nltk_sentiment_analysis   Author: edlectrico   File: sentiment_mod.py    GNU General Public License v2.0 5 votes vote down vote up
def sentiment(text):
    feats = find_features(text)
    return voted_classifier.classify(feats),voted_classifier.confidence(feats) 
Example 48
Project: nltk_sentiment_analysis   Author: edlectrico   File: nltk_movies_classification.py    GNU General Public License v2.0 5 votes vote down vote up
def confidence(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)

        choice_votes = votes.count(mode(votes))
        conf = choice_votes / len(votes)
        return conf 
Example 49
Project: nlp-services   Author: singnet   File: train_mod.py    MIT License 5 votes vote down vote up
def classify(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        return mode(votes) 
Example 50
Project: nlp-services   Author: singnet   File: train_mod.py    MIT License 5 votes vote down vote up
def confidence(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)

        choice_votes = votes.count(mode(votes))
        conf = choice_votes / len(votes)
        return conf


# Fetching trained dataset 
Example 51
Project: honours_project   Author: JFriel   File: classifiers.py    GNU General Public License v3.0 5 votes vote down vote up
def classify(self, text):
        """Classifies a string of text."""
        raise NotImplementedError('Must implement a "classify" method.') 
Example 52
Project: honours_project   Author: JFriel   File: classifiers.py    GNU General Public License v3.0 5 votes vote down vote up
def classify(self, text):
        """Classifies the text.

        :param str text: A string of text.
        """
        text_features = self.extract_features(text)
        return self.classifier.classify(text_features) 
Example 53
Project: honours_project   Author: JFriel   File: classifiers.py    GNU General Public License v3.0 5 votes vote down vote up
def accuracy(self, test_set, format=None):
        """Compute the accuracy on a test set.

        :param test_set: A list of tuples of the form ``(text, label)``, or a
            file pointer.
        :param format: If ``test_set`` is a filename, the file format, e.g.
            ``"csv"`` or ``"json"``. If ``None``, will attempt to detect the
            file format.
        """
        if is_filelike(test_set):
            test_data = self._read_data(test_set)
        else:  # test_set is a list of tuples
            test_data = test_set
        test_features = [(self.extract_features(d), c) for d, c in test_data]
        return nltk.classify.accuracy(self.classifier, test_features) 
Example 54
Project: honours_project   Author: JFriel   File: classifiers.py    GNU General Public License v3.0 5 votes vote down vote up
def train(self, *args, **kwargs):
        """Train the classifier with a labeled and unlabeled feature sets and return
        the classifier. Takes the same arguments as the wrapped NLTK class.
        This method is implicitly called when calling ``classify`` or
        ``accuracy`` methods and is included only to allow passing in arguments
        to the ``train`` method of the wrapped NLTK class.

        :rtype: A classifier
        """
        self.classifier = self.nltk_class.train(self.positive_features,
                                                self.unlabeled_features,
                                                self.positive_prob_prior)
        return self.classifier 
Example 55
Project: honours_project   Author: JFriel   File: classifiers.py    GNU General Public License v3.0 5 votes vote down vote up
def classify(self, text):
        """Classifies a string of text."""
        raise NotImplementedError('Must implement a "classify" method.') 
Example 56
Project: honours_project   Author: JFriel   File: classifiers.py    GNU General Public License v3.0 5 votes vote down vote up
def classify(self, text):
        """Classifies the text.

        :param str text: A string of text.
        """
        text_features = self.extract_features(text)
        return self.classifier.classify(text_features) 
Example 57
Project: honours_project   Author: JFriel   File: classifiers.py    GNU General Public License v3.0 5 votes vote down vote up
def accuracy(self, test_set, format=None):
        """Compute the accuracy on a test set.

        :param test_set: A list of tuples of the form ``(text, label)``, or a
            file pointer.
        :param format: If ``test_set`` is a filename, the file format, e.g.
            ``"csv"`` or ``"json"``. If ``None``, will attempt to detect the
            file format.
        """
        if is_filelike(test_set):
            test_data = self._read_data(test_set)
        else:  # test_set is a list of tuples
            test_data = test_set
        test_features = [(self.extract_features(d), c) for d, c in test_data]
        return nltk.classify.accuracy(self.classifier, test_features) 
Example 58
Project: honours_project   Author: JFriel   File: classifiers.py    GNU General Public License v3.0 5 votes vote down vote up
def train(self, *args, **kwargs):
        """Train the classifier with a labeled and unlabeled feature sets and return
        the classifier. Takes the same arguments as the wrapped NLTK class.
        This method is implicitly called when calling ``classify`` or
        ``accuracy`` methods and is included only to allow passing in arguments
        to the ``train`` method of the wrapped NLTK class.

        :rtype: A classifier
        """
        self.classifier = self.nltk_class.train(self.positive_features,
                                                self.unlabeled_features,
                                                self.positive_prob_prior)
        return self.classifier 
Example 59
Project: Youtube-Comments-Analyzer   Author: arghodayah   File: sentiment.py    MIT License 5 votes vote down vote up
def find_sentiment(text):
    #Text formatting to classify
    def format_text(text):
        return({word: True for word in nltk.word_tokenize(text)})
    #Load positive categorized text
    pos = []
    with open("./pos.txt", encoding='ISO-8859-1') as f:
        for i in f:
            pos.append([format_text(i), 'positive'])
    #Load negative categorized text
    neg = []
    with open("./neg.txt", encoding='ISO-8859-1') as f:
        for i in f:
            neg.append([format_text(i), 'negative'])
    #Training classifier
    training_set = pos + neg
    classifier = NaiveBayesClassifier.train(training_set)
    #Check sentiment for a single element list
    if len(text)==1	:
        return classifier.classify(format_text(text[0]))
    #Check sentiment for multi-element list (video comments)
    elif len(text)>1:
        pos = 0
        neg = 0
        for item in text:
            if classifier.classify(format_text(item))=='positive':
                pos=pos+1
                print(str(pos+neg)+"/"+str(len(text)), end="\r")
            else:
                neg=neg+1
                print(str(pos+neg)+"/"+str(len(text)), end="\r")
        return pos, neg 
Example 60
Project: Youtube-Comments-Analyzer   Author: arghodayah   File: sentiment.py    MIT License 5 votes vote down vote up
def find_scores():
    #Text formatting to classify 
    def format_text(text):
        return({word: True for word in nltk.word_tokenize(text)})
    #Load positive categorized text	
    pos = []
    with open("./pos.txt", encoding='ISO-8859-1') as f:
        for i in f: 
            pos.append([format_text(i), 'positive'])
    #Load negative categorized text
    neg = []
    with open("./neg.txt", encoding='ISO-8859-1') as f:
        for i in f: 
            neg.append([format_text(i), 'negative'])
    #Split data into training(80%) and testing(20%) sets 
    training_set = pos[:int((.80)*len(pos))] + neg[:int((.80)*len(neg))]
    test_set = pos[int((.80)*len(pos)):] + neg[int((.80)*len(neg)):]
    #Training classifier    
    classifier = NaiveBayesClassifier.train(training_set)
    #Calculate scores	
    trueset = collections.defaultdict(set)
    testset = collections.defaultdict(set)
    #Test all test-set items using defined classifier
    for i, (text, label) in enumerate(test_set):
        trueset[label].add(i)
        result = classifier.classify(text)
        testset[result].add(i)
    return accuracy(classifier, test_set), f_measure(trueset['positive'], testset['positive']), f_measure(trueset['negative'], testset['negative']) 
Example 61
Project: twitter-sentiment   Author: words-sdsc   File: SentMod.py    Apache License 2.0 5 votes vote down vote up
def classify(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        return mode(votes) 
Example 62
Project: twitter-sentiment   Author: words-sdsc   File: SentMod.py    Apache License 2.0 5 votes vote down vote up
def confidence(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)

        choice_votes = votes.count(mode(votes))
        conf = choice_votes / len(votes)
        return conf 
Example 63
Project: wazen   Author: anati89   File: classifiers.py    MIT License 5 votes vote down vote up
def nb_classifier(train_data, train_target, test_data, test_target):
    """
    Train and test using Naive Bayes classier
    :param train_data:
    :param train_target:
    :param test_data:
    :param test_target:
    :return:
    """
    train_nltk_input = format_nltk_inputs(train_data, train_target)
    kf = KFold(n_splits=10)
    sum = 0
    acc_list = []
    for train_indices, test_indices in kf.split(train_nltk_input):
        train_data_ = np.array(train_nltk_input)[train_indices]
        test_data_ = np.array(train_nltk_input)[test_indices]
        clf = nltk.NaiveBayesClassifier.train(train_data_)
        acc = nltk.classify.accuracy(clf, test_data_)
        acc_list.append(acc)
        sum += acc
    average = sum / 10
    std = statistics.stdev(acc_list)
    print(f'NLTK Accuracy on train data using K-fold: {average} std: {std}')
    test_nltk_input = format_nltk_inputs(test_data, test_target)
    model = nltk.NaiveBayesClassifier.train(train_nltk_input)
    acc = nltk.classify.accuracy(model, test_nltk_input)
    print(f'NLTK accuracy on testing data {acc}')
    ##dump model
    dump_model(model, 'naive.pkl') 
Example 64
Project: news-crawler   Author: nolram   File: nltk_classificador.py    MIT License 4 votes vote down vote up
def classify(self):
        print(u"Coletando as Notícias")
        news_items = self.collect_news()

        print(u"Coletando todas as palavras")
        all_words = self.collect_all_words(news_items)

        print(u"Coletando as principais palavras")
        top_words = self.identify_top_words(all_words)

        print(u"Embaralhando")
        random.shuffle(news_items)

        print(u"Gerando conjunto de treinamento")
        featuresets = []
        for item in news_items:
            item_features = item.features(top_words)
            tup = (item_features, item.category)
            featuresets.append(tup)

        train_set = featuresets[1000:]

        #test_set = featuresets[:1000]

        print('Featuresets tamanho: ' + str(len(featuresets)))

        print("Treinando...")
        self.classifier = nltk.NaiveBayesClassifier.train(train_set)
        print("Treinamento Completo complete")

        if DEBUG:
            arquivo_teste = codecs.open("doc_test_2.json", "r", encoding="utf-8")
            items_news = json.loads(arquivo_teste.read())
            list_test = []
            for item in items_news:
                news = NewsItem(item, self.stop_words)
                list_test.append(news)

            for i in list_test:
                feat = i.features(top_words)
                print(u"{} - {}".format(i.title, self.classificar(feat)))
            #print(nltk.classify.accuracy(self.classifier, test_set))