Python nltk.corpus.wordnet.VERB Examples
The following are 30
code examples of nltk.corpus.wordnet.VERB().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
nltk.corpus.wordnet
, or try the search function
.
Example #1
Source File: normalization.py From text-analytics-with-python with Apache License 2.0 | 6 votes |
def pos_tag_text(text): def penn_to_wn_tags(pos_tag): if pos_tag.startswith('J'): return wn.ADJ elif pos_tag.startswith('V'): return wn.VERB elif pos_tag.startswith('N'): return wn.NOUN elif pos_tag.startswith('R'): return wn.ADV else: return None tagged_text = tag(text) tagged_lower_text = [(word.lower(), penn_to_wn_tags(pos_tag)) for word, pos_tag in tagged_text] return tagged_lower_text # lemmatize text based on POS tags
Example #2
Source File: _common.py From tmtoolkit with Apache License 2.0 | 6 votes |
def pos_tag_convert_penn_to_wn(tag): """ Convert POS tag from Penn tagset to WordNet tagset. :param tag: a tag from Penn tagset :return: a tag from WordNet tagset or None if no corresponding tag could be found """ from nltk.corpus import wordnet as wn if tag in ['JJ', 'JJR', 'JJS']: return wn.ADJ elif tag in ['RB', 'RBR', 'RBS']: return wn.ADV elif tag in ['NN', 'NNS', 'NNP', 'NNPS']: return wn.NOUN elif tag in ['VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ']: return wn.VERB return None
Example #3
Source File: normalization.py From text-analytics-with-python with Apache License 2.0 | 6 votes |
def pos_tag_text(text): def penn_to_wn_tags(pos_tag): if pos_tag.startswith('J'): return wn.ADJ elif pos_tag.startswith('V'): return wn.VERB elif pos_tag.startswith('N'): return wn.NOUN elif pos_tag.startswith('R'): return wn.ADV else: return None tagged_text = tag(text) tagged_lower_text = [(word.lower(), penn_to_wn_tags(pos_tag)) for word, pos_tag in tagged_text] return tagged_lower_text # lemmatize text based on POS tags
Example #4
Source File: normalization.py From text-analytics-with-python with Apache License 2.0 | 6 votes |
def pos_tag_text(text): def penn_to_wn_tags(pos_tag): if pos_tag.startswith('J'): return wn.ADJ elif pos_tag.startswith('V'): return wn.VERB elif pos_tag.startswith('N'): return wn.NOUN elif pos_tag.startswith('R'): return wn.ADV else: return None tagged_text = tag(text) tagged_lower_text = [(word.lower(), penn_to_wn_tags(pos_tag)) for word, pos_tag in tagged_text] return tagged_lower_text # lemmatize text based on POS tags
Example #5
Source File: normalization.py From text-analytics-with-python with Apache License 2.0 | 6 votes |
def pos_tag_text(text): def penn_to_wn_tags(pos_tag): if pos_tag.startswith('J'): return wn.ADJ elif pos_tag.startswith('V'): return wn.VERB elif pos_tag.startswith('N'): return wn.NOUN elif pos_tag.startswith('R'): return wn.ADV else: return None tagged_text = tag(text) tagged_lower_text = [(word.lower(), penn_to_wn_tags(pos_tag)) for word, pos_tag in tagged_text] return tagged_lower_text # lemmatize text based on POS tags
Example #6
Source File: wn_persistent_api.py From combine-FEVER-NSMN with MIT License | 6 votes |
def convert_to_wn_pos(pos): if pos.startswith("J"): return wn.ADJ elif pos.startswith("V"): return wn.VERB elif pos.startswith("N"): return wn.NOUN elif pos.startswith("R"): return wn.ADV else: return ""
Example #7
Source File: deploy.py From Election-Meddling with MIT License | 6 votes |
def get_wordnet_pos(self,treebank_tag): """ return WORDNET POS compliance to WORDENT lemmatization (a,n,r,v) """ if treebank_tag.startswith('J'): return wordnet.ADJ elif treebank_tag.startswith('V'): return wordnet.VERB elif treebank_tag.startswith('N'): return wordnet.NOUN elif treebank_tag.startswith('R'): return wordnet.ADV else: return wordnet.NOUN
Example #8
Source File: transformer.py From atap with Apache License 2.0 | 5 votes |
def lemmatize(self, token, pos_tag): tag = { 'N': wn.NOUN, 'V': wn.VERB, 'R': wn.ADV, 'J': wn.ADJ }.get(pos_tag[0], wn.NOUN) return self.lemmatizer.lemmatize(token, tag)
Example #9
Source File: agglomerative.py From atap with Apache License 2.0 | 5 votes |
def wnpos(tag): # Return the WordNet POS tag from the Penn Treebank tag return { 'N': wn.NOUN, 'V': wn.VERB, 'R': wn.ADV, 'J': wn.ADJ }.get(tag[0], wn.NOUN)
Example #10
Source File: __init__.py From wordai with MIT License | 5 votes |
def _sentence_to_mongo(typ, items): import nltk from nltk.corpus import wordnet def wordnet_pos(tag): if tag.startswith('J'): return wordnet.ADJ elif tag.startswith('V'): return wordnet.VERB elif tag.startswith('N'): return wordnet.NOUN elif tag.startswith('R'): return wordnet.ADV else: return wordnet.NOUN # nltk.download('punkt') nltk.download('averaged_perceptron_tagger') nltk.download('stopwords') nltk.download('wordnet') nltk.download('punkt') stop_words = set(nltk.corpus.stopwords.words('english')) stemmer = nltk.stem.WordNetLemmatizer() sentences = [] for trans in items: eng, chn = trans.getsource(), trans.gettarget() tokens = nltk.word_tokenize(eng) pos_tag = [pos[1] for pos in nltk.pos_tag(tokens)] roots = [stemmer.lemmatize(word, wordnet_pos(pos_tag[idx])) for idx, word in enumerate(tokens)] cleanword = [token for token in roots if token.isalpha() and token not in stop_words and len(token) >= 3] # remove duplicates clean_word = list(dict.fromkeys(cleanword)) if len(clean_word) > 0: score = Word.search_words(*clean_word).sum('star') / len(clean_word) else: score = -1 sentence = Sentence(eng=eng, chn=chn, words=tokens, pos_tag=pos_tag, roots=roots, score=score, typ=typ) sentences.append(sentence) if len(sentences) > 50: Sentence.objects.insert(sentences) sentences = []
Example #11
Source File: preprocess_lst_test.py From lexsub with Apache License 2.0 | 5 votes |
def detect_mwe(text_tokens, target_ind, wordnet_pos): if (target_ind < len(text_tokens)-1): verb_lemma = WordNetLemmatizer().lemmatize(text_tokens[target_ind], wordnet_pos) complement_lemma = WordNetLemmatizer().lemmatize(text_tokens[target_ind+1]) mwe = '_'.join([verb_lemma, complement_lemma]) synsets = wordnet.synsets(mwe, wordnet.VERB) if len(synsets) > 0: if (target_ind+1 < len(text_tokens)-1): mwe_right = '_'.join([WordNetLemmatizer().lemmatize(text_tokens[target_ind+1]), WordNetLemmatizer().lemmatize(text_tokens[target_ind+2])]) if len(wordnet.synsets(mwe_right)) > 0: return if is_atomic_mwe(mwe, verb_lemma, complement_lemma, synsets) == True: mwe = '='.join([text_tokens[target_ind], text_tokens[target_ind+1]]) text_tokens[target_ind] = mwe del text_tokens[target_ind+1]
Example #12
Source File: wordnet_app.py From luscan-devel with GNU General Public License v2.0 | 5 votes |
def _pos_tuples(): return [ (wn.NOUN,'N','noun'), (wn.VERB,'V','verb'), (wn.ADJ,'J','adj'), (wn.ADV,'R','adv')]
Example #13
Source File: synset_analysis.py From Quadflor with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self): NltkNormalizer.install_nltk_corpora('averaged_perceptron_tagger') self.normalizer = NltkNormalizer() self.lem = nltk.WordNetLemmatizer() self.tagger = nltk.PerceptronTagger() self.translation_dict = {'J': wn.ADJ, 'N': wn.NOUN, 'R': wn.ADV, 'V': wn.VERB}
Example #14
Source File: preprocessing.py From TextRank with MIT License | 5 votes |
def __get_wordnet_pos(treebank_tag): """Maps the treebank tags to WordNet part of speech names""" if treebank_tag.startswith('J'): return wordnet.ADJ elif treebank_tag.startswith('V'): return wordnet.VERB elif treebank_tag.startswith('N'): return wordnet.NOUN elif treebank_tag.startswith('R'): return wordnet.ADV else: return None
Example #15
Source File: sentiwordnet.py From yenlp with GNU General Public License v3.0 | 5 votes |
def wordnet_pos_code(tag): '''Translation from nltk tags to Wordnet code''' if tag.startswith('NN'): return wordnet.NOUN elif tag.startswith('VB'): return wordnet.VERB elif tag.startswith('JJ'): return wordnet.ADJ elif tag.startswith('RB'): return wordnet.ADV else: return ''
Example #16
Source File: wordnet_app.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International | 5 votes |
def _pos_tuples(): return [ (wn.NOUN, 'N', 'noun'), (wn.VERB, 'V', 'verb'), (wn.ADJ, 'J', 'adj'), (wn.ADV, 'R', 'adv'), ]
Example #17
Source File: dcs.py From broca with MIT License | 5 votes |
def _disambiguate_doc(self, tagged_tokens): """ Takes a list of tagged tokens, representing a document, in the form: [(token, tag), ...] And returns a mapping of terms to their disambiguated concepts (synsets). """ # Group tokens by PoS pos_groups = {pos: [] for pos in [wn.NOUN, wn.VERB, wn.ADJ, wn.ADV]} for tok, tag in tagged_tokens: if tag in pos_groups: pos_groups[tag].append(tok) #print(pos_groups) # Map of final term -> concept mappings map = {} for tag, toks in pos_groups.items(): map.update(self._disambiguate_pos(toks, tag)) #nice_map = {k: map[k].lemma_names() for k in map.keys()} #print(json.dumps(nice_map, indent=4, sort_keys=True)) return map
Example #18
Source File: word_sentence_utils.py From resilient-community-apps with MIT License | 5 votes |
def _convert(nlk_tag): """ Convert nlk tag to wordnet flag :param nlk_tag: :return: """ if nlk_tag.startswith('J'): return wordnet.ADJ elif nlk_tag.startswith('V'): return wordnet.VERB elif nlk_tag.startswith('R'): return wordnet.ADV else: return wordnet.NOUN
Example #19
Source File: intent_classification.py From voice-enabled-chatbot with MIT License | 5 votes |
def get_wordnet_pos(self, word): """Map POS tag to first character lemmatize() accepts""" tag = nltk.pos_tag([word])[0][1][0].upper() tag_dict = { "J": wordnet.ADJ, "N": wordnet.NOUN, "V": wordnet.VERB, "R": wordnet.ADV, } return tag_dict.get(tag, wordnet.NOUN)
Example #20
Source File: Auto_NLP.py From Auto_ViML with Apache License 2.0 | 5 votes |
def get_word_net_pos(treebank_tag): if treebank_tag.startswith('J'): return wordnet.ADJ elif treebank_tag.startswith('V'): return wordnet.VERB elif treebank_tag.startswith('N'): return wordnet.NOUN elif treebank_tag.startswith('R'): return wordnet.ADV else: return None
Example #21
Source File: wordnet_app.py From razzy-spinner with GNU General Public License v3.0 | 5 votes |
def _pos_tuples(): return [ (wn.NOUN,'N','noun'), (wn.VERB,'V','verb'), (wn.ADJ,'J','adj'), (wn.ADV,'R','adv')]
Example #22
Source File: main.py From tensorflow-XNN with MIT License | 5 votes |
def get_wordnet_pos(treebank_tag): if treebank_tag.startswith('J'): return wordnet.ADJ elif treebank_tag.startswith('V'): return wordnet.VERB elif treebank_tag.startswith('N'): return wordnet.NOUN elif treebank_tag.startswith('R'): return wordnet.ADV else: return None
Example #23
Source File: wordnet_similarity.py From jingwei with MIT License | 5 votes |
def wup_similarity(tagx, tagy): scores = [] for pos in [wn.NOUN, wn.VERB, wn.ADJ, wn.ADJ_SAT, wn.ADV]: try: synsetx = wn.synset('%s.%s.01' % (tagx,pos)) synsety = wn.synset('%s.%s.01' % (tagy,pos)) score = synsetx.wup_similarity(synsety) if score is None: score = 0 except Exception, e: score = 0 scores.append(score)
Example #24
Source File: test_preprocess_func.py From tmtoolkit with Apache License 2.0 | 5 votes |
def test_pos_tag_convert_penn_to_wn(): assert pos_tag_convert_penn_to_wn('JJ') == wn.ADJ assert pos_tag_convert_penn_to_wn('RB') == wn.ADV assert pos_tag_convert_penn_to_wn('NN') == wn.NOUN assert pos_tag_convert_penn_to_wn('VB') == wn.VERB for tag in ('', 'invalid', None): assert pos_tag_convert_penn_to_wn(tag) is None
Example #25
Source File: disintegrator.py From quantified-self with MIT License | 5 votes |
def __get_wordnet_pos(self, treebank_tag): if treebank_tag.startswith("J"): return wordnet.ADJ elif treebank_tag.startswith("V"): return wordnet.VERB elif treebank_tag.startswith("N"): return wordnet.NOUN elif treebank_tag.startswith("R"): return wordnet.ADV else: return ""
Example #26
Source File: learn.py From partisan-discourse with Apache License 2.0 | 5 votes |
def tagwn(self, tag): """ Returns the WordNet tag from the Penn Treebank tag. """ return { 'N': wn.NOUN, 'V': wn.VERB, 'R': wn.ADV, 'J': wn.ADJ }.get(tag[0], wn.NOUN)
Example #27
Source File: sick_extender.py From Sentence-similarity-classifier-for-pyTorch with MIT License | 5 votes |
def get_wordnet_pos(treebank_tag): """ Converts a Penn Tree-Bank part of speech tag into a corresponding WordNet-friendly tag. Borrowed from: http://stackoverflow.com/questions/15586721/wordnet-lemmatization-and-pos-tagging-in-python. """ if treebank_tag.startswith('J') or treebank_tag.startswith('A'): return wordnet.ADJ elif treebank_tag.startswith('V'): return wordnet.VERB elif treebank_tag.startswith('N'): return wordnet.NOUN elif treebank_tag.startswith('R'): return wordnet.ADV else: return 'OTHER'
Example #28
Source File: featurizer.py From combine-FEVER-NSMN with MIT License | 5 votes |
def convert_to_wn_pos(pos): if pos.startswith("J"): return wn.ADJ elif pos.startswith("V"): return wn.VERB elif pos.startswith("N"): return wn.NOUN elif pos.startswith("R"): return wn.ADV else: return ""
Example #29
Source File: build.py From atap with Apache License 2.0 | 5 votes |
def lemmatize(self, token, pos_tag): tag = { 'N': wn.NOUN, 'V': wn.VERB, 'R': wn.ADV, 'J': wn.ADJ }.get(pos_tag[0], wn.NOUN) return self.lemmatizer.lemmatize(token, tag)
Example #30
Source File: normalize.py From atap with Apache License 2.0 | 5 votes |
def lemmatize(self, token, pos_tag): tag = { 'N': wn.NOUN, 'V': wn.VERB, 'R': wn.ADV, 'J': wn.ADJ }.get(pos_tag[0], wn.NOUN) return self.lemmatizer.lemmatize(token, tag)