Python nltk.ConditionalFreqDist() Examples
The following are 6 code examples for showing how to use nltk.ConditionalFreqDist(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
You may check out the related API usage on the sidebar.
You may also want to check out all available functions/classes of the module
nltk
, or try the search function
.
Example 1
Project: nltk_teach Author: nltk File: categories.py License: Apache License 2.0 | 6 votes |
def build_word_associations(): cfd = nltk.ConditionalFreqDist() # get a list of all English stop words stopwords_list = nltk.corpus.stopwords.words('english') # count words that occur within a window of size 5 ahead of other words for sentence in nltk.corpus.brown.tagged_sents(): sentence = [(token.lower(), tag) for (token, tag) in sentence if token.lower() not in stopwords_list] for (index, (token, tag)) in enumerate(sentence): if token not in stopwords_list: window = sentence[index+1:index+5] for (window_token, window_tag) in window: if window_token not in stopwords_list and window_tag[0] is tag[0]: cfd[token].inc(window_token) return cfd
Example 2
Project: yenlp Author: stathius File: sentiwordnet.py License: GNU General Public License v3.0 | 6 votes |
def word_sense_cdf(word, context, wn_pos): '''Word sense disambiguation in terms of matching words frequency between the context each sense's definition. Adapted from www.slideshare.net/faigg/tutotial-of-sentiment-analysis''' senses = wordnet.synsets(word, wn_pos) if len(senses) > 0: cfd = nltk.ConditionalFreqDist((sense, def_word) for sense in senses for def_word in sense.definition().split() if def_word in context) best_sense = senses[0] for sense in senses: try: if cfd[sense].max() > cfd[best_sense].max(): best_sense = sense except: pass return best_sense else: return None
Example 3
Project: V1EngineeringInc-Docs Author: V1EngineeringInc File: test_cfd_mutation.py License: Creative Commons Attribution Share Alike 4.0 International | 6 votes |
def test_increment(self): # make sure that we can still mutate cfd normally text = "cow cat mouse cat tiger" cfd = ConditionalFreqDist() # create cfd with word length as condition for word in tokenize.word_tokenize(text): condition = len(word) cfd[condition][word] += 1 self.assertEqual(cfd.conditions(), [3,5]) # incrementing previously unseen key is still possible cfd[2]['hi'] += 1 self.assertEqual(set(cfd.conditions()),set([3,5,2])) # new condition added self.assertEqual(cfd[2]['hi'], 1) # key's frequency incremented from 0 (unseen) to 1
Example 4
Project: deep_disfluency Author: clp-research File: hmm.py License: MIT License | 5 votes |
def train_markov_model_from_constraint_matrix(self, csv_path, mm_path, delim="\t"): table = [line.split(delim) for line in open(csv_path)] tags = [] range_states = table.pop(0)[1:] for row in table: domain = row[0] for i, r in enumerate(row[1:]): s = r.replace(" ", "").strip("\n") if (s == ''): continue if int(s) > 0: for _ in range(0, int(s)): tags.append((domain, range_states[i])) self.cfd_tags = nltk.ConditionalFreqDist(tags) print "cfd trained, counts:" self.cfd_tags.tabulate() print "test:" print tabulate_cfd(self.cfd_tags) # save this new cfd for later use pickle.dump(self.cfd_tags, open(mm_path, "wb")) # initialize the cpd self.cpd_tags = nltk.ConditionalProbDist(self.cfd_tags, nltk.MLEProbDist) # print "cpd summary:" # print self.cpd_tags.viewitems() print tabulate_cfd(self.cpd_tags) all_outcomes = [v.keys() for v in self.cfd_tags.values()] self.tag_set = set(self.cfd_tags.keys() + [y for x in all_outcomes for y in x]) self.viterbi_init() # initialize viterbi
Example 5
Project: V1EngineeringInc-Docs Author: V1EngineeringInc File: test_cfd_mutation.py License: Creative Commons Attribution Share Alike 4.0 International | 5 votes |
def test_tabulate(self): empty = ConditionalFreqDist() self.assertEqual(empty.conditions(),[]) try: empty.tabulate(conditions="BUG") # nonexistent keys shouldn't be added except: pass self.assertEqual(empty.conditions(), [])
Example 6
Project: V1EngineeringInc-Docs Author: V1EngineeringInc File: test_cfd_mutation.py License: Creative Commons Attribution Share Alike 4.0 International | 5 votes |
def test_plot(self): empty = ConditionalFreqDist() self.assertEqual(empty.conditions(),[]) try: empty.plot(conditions=["BUG"]) # nonexistent keys shouldn't be added except: pass self.assertEqual(empty.conditions(),[])