Python nltk.tag.pos_tag() Examples
The following are 10
code examples of nltk.tag.pos_tag().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
nltk.tag
, or try the search function
.
Example #1
Source File: sklearnLDA.py From curriculum with GNU General Public License v3.0 | 6 votes |
def load_corpus(expected_tags, lemmatizer, swlist, corpus): with open("../../corpus/news.txt", "r") as cf: for line in cf.readlines(): if (len(line.strip()) != 0): words = word_tokenize(line.strip()) tags = pos_tag(words) seglist = [] for i in range(len(words)): if tags[i][1] in expected_tags and words[i] not in swlist: if len(re.sub("[^a-zA-Z]", "", words[i])) == len(words[i]): taghead = tags[i][1][0].lower() # {}ADJ:a, ADJ_SAT:s, ADV:r, NOUN:n or VERB:v} seglist.append(lemmatizer.lemmatize(words[i], wordnet.ADJ if taghead == 'j' else taghead)) corpus.append(" ".join(seglist)) # 保存模型训练结果
Example #2
Source File: integrated.py From curriculum with GNU General Public License v3.0 | 6 votes |
def load_corpus(expected_tags, lemmatizer, swlist, corpus): with open("../../corpus/news.txt", "r") as cf: for line in cf.readlines(): if (len(line.strip()) != 0): words = word_tokenize(line.strip()) tags = pos_tag(words) seglist = [] for i in range(len(words)): if tags[i][1] in expected_tags and words[i] not in swlist: if len(re.sub("[^a-zA-Z]", "", words[i])) == len(words[i]): taghead = tags[i][1][0].lower() # {}ADJ:a, ADJ_SAT:s, ADV:r, NOUN:n or VERB:v} seglist.append(lemmatizer.lemmatize(words[i], wordnet.ADJ if taghead == 'j' else taghead)) corpus.append(" ".join(seglist)) # 保存模型训练结果
Example #3
Source File: commands.py From cybot with GNU Affero General Public License v3.0 | 6 votes |
def random_rate(args): message = args["args"] give_rating = random.randint(0, 1) message = pos_tag(message) print(message) nounlist = [] for word, tag in message: if tag == "NNP" or tag == "NN": nounlist.append(word) if not nounlist: nounlist.append("nothings") word = nounlist[random.randint(0, len(nounlist)-1)] rating = random.randint(0, 10) if give_rating or nounlist[0] == "nothings": return str(rating) + "/10" else: return word + "/10"
Example #4
Source File: test_tag.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International | 6 votes |
def test_basic(): from nltk.tag import pos_tag from nltk.tokenize import word_tokenize result = pos_tag(word_tokenize("John's big idea isn't all that bad.")) assert result == [ ('John', 'NNP'), ("'s", 'POS'), ('big', 'JJ'), ('idea', 'NN'), ('is', 'VBZ'), ("n't", 'RB'), ('all', 'PDT'), ('that', 'DT'), ('bad', 'JJ'), ('.', '.'), ]
Example #5
Source File: named_entity.py From razzy-spinner with GNU General Public License v3.0 | 5 votes |
def postag_tree(tree): # Part-of-speech tagging. words = tree.leaves() tag_iter = (pos for (word, pos) in pos_tag(words)) newtree = Tree('S', []) for child in tree: if isinstance(child, Tree): newtree.append(Tree(child.label(), [])) for subchild in child: newtree[-1].append( (subchild, next(tag_iter)) ) else: newtree.append( (child, next(tag_iter)) ) return newtree
Example #6
Source File: test_tag.py From razzy-spinner with GNU General Public License v3.0 | 5 votes |
def test_basic(): from nltk.tag import pos_tag from nltk.tokenize import word_tokenize result = pos_tag(word_tokenize("John's big idea isn't all that bad.")) assert result == [('John', 'NNP'), ("'s", 'POS'), ('big', 'JJ'), ('idea', 'NN'), ('is', 'VBZ'), ("n't", 'RB'), ('all', 'DT'), ('that', 'DT'), ('bad', 'JJ'), ('.', '.')]
Example #7
Source File: generate.py From justcopy-backend with MIT License | 5 votes |
def nltk_entity_groups(text): """Return all contiguous NER tagged chunks by NLTK.""" parse_tree = ne_chunk(pos_tag(word_tokenize(text))) ner_chunks = [' '.join([l[0] for l in t.leaves()]) for t in parse_tree.subtrees() if t.label() != 'S'] return ner_chunks # ------------------------------------------------------------------------------ # Find answer candidates. # ------------------------------------------------------------------------------
Example #8
Source File: named_entity.py From luscan-devel with GNU General Public License v2.0 | 5 votes |
def postag_tree(tree): # Part-of-speech tagging. words = tree.leaves() tag_iter = (pos for (word, pos) in pos_tag(words)) newtree = Tree('S', []) for child in tree: if isinstance(child, Tree): newtree.append(Tree(child.node, [])) for subchild in child: newtree[-1].append( (subchild, tag_iter.next()) ) else: newtree.append( (child, tag_iter.next()) ) return newtree
Example #9
Source File: rhodiola.py From rhodiola with BSD 3-Clause "New" or "Revised" License | 5 votes |
def find_proper_nouns(string): pattern = re.compile('[\W_]+', re.UNICODE) tagged_sent = pos_tag(string.split()) propernouns = [re.sub(r'\W+', '', word.lower()) for word,pos in tagged_sent if pos == 'NNP'] last_propernouns = [] for word in propernouns: if word in stopwords.words('english') or word in extra_stopwords or word == '' or len(word) < 3: pass else: last_propernouns.append(word) propernouns_dict = dict(Counter(last_propernouns)) sorted_propernouns_dict = sorted(propernouns_dict.items(), key=operator.itemgetter(1),reverse=True) return sorted_propernouns_dict[0:15]
Example #10
Source File: named_entity.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International | 5 votes |
def postag_tree(tree): # Part-of-speech tagging. words = tree.leaves() tag_iter = (pos for (word, pos) in pos_tag(words)) newtree = Tree('S', []) for child in tree: if isinstance(child, Tree): newtree.append(Tree(child.label(), [])) for subchild in child: newtree[-1].append((subchild, next(tag_iter))) else: newtree.append((child, next(tag_iter))) return newtree