Python nltk.tag.pos_tag() Examples

The following are 10 code examples of nltk.tag.pos_tag(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module nltk.tag , or try the search function .
Example #1
Source File: sklearnLDA.py    From curriculum with GNU General Public License v3.0 6 votes vote down vote up
def load_corpus(expected_tags, lemmatizer, swlist, corpus):
    with open("../../corpus/news.txt", "r") as cf:
        for line in cf.readlines():
            if (len(line.strip()) != 0):
                words = word_tokenize(line.strip())
                tags = pos_tag(words)
                seglist = []
                for i in range(len(words)):
                    if tags[i][1] in expected_tags and words[i] not in swlist:
                        if len(re.sub("[^a-zA-Z]", "", words[i])) == len(words[i]):
                            taghead = tags[i][1][0].lower()
                            # {}ADJ:a, ADJ_SAT:s, ADV:r, NOUN:n or VERB:v}
                            seglist.append(lemmatizer.lemmatize(words[i], wordnet.ADJ if taghead == 'j' else taghead))
                corpus.append(" ".join(seglist))


# 保存模型训练结果 
Example #2
Source File: integrated.py    From curriculum with GNU General Public License v3.0 6 votes vote down vote up
def load_corpus(expected_tags, lemmatizer, swlist, corpus):
    with open("../../corpus/news.txt", "r") as cf:
        for line in cf.readlines():
            if (len(line.strip()) != 0):
                words = word_tokenize(line.strip())
                tags = pos_tag(words)
                seglist = []
                for i in range(len(words)):
                    if tags[i][1] in expected_tags and words[i] not in swlist:
                        if len(re.sub("[^a-zA-Z]", "", words[i])) == len(words[i]):
                            taghead = tags[i][1][0].lower()
                            # {}ADJ:a, ADJ_SAT:s, ADV:r, NOUN:n or VERB:v}
                            seglist.append(lemmatizer.lemmatize(words[i], wordnet.ADJ if taghead == 'j' else taghead))
                corpus.append(" ".join(seglist))

# 保存模型训练结果 
Example #3
Source File: commands.py    From cybot with GNU Affero General Public License v3.0 6 votes vote down vote up
def random_rate(args):
    message = args["args"]
    give_rating = random.randint(0, 1)
    message = pos_tag(message)
    print(message)
    nounlist = []
    for word, tag in message:
        if tag == "NNP" or tag == "NN":
            nounlist.append(word)
    if not nounlist:
        nounlist.append("nothings")
    word = nounlist[random.randint(0, len(nounlist)-1)]
    rating = random.randint(0, 10)
    if give_rating or nounlist[0] == "nothings":
        return str(rating) + "/10"
    else:
        return word + "/10" 
Example #4
Source File: test_tag.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 6 votes vote down vote up
def test_basic():
    from nltk.tag import pos_tag
    from nltk.tokenize import word_tokenize

    result = pos_tag(word_tokenize("John's big idea isn't all that bad."))
    assert result == [
        ('John', 'NNP'),
        ("'s", 'POS'),
        ('big', 'JJ'),
        ('idea', 'NN'),
        ('is', 'VBZ'),
        ("n't", 'RB'),
        ('all', 'PDT'),
        ('that', 'DT'),
        ('bad', 'JJ'),
        ('.', '.'),
    ] 
Example #5
Source File: named_entity.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def postag_tree(tree):
    # Part-of-speech tagging.
    words = tree.leaves()
    tag_iter = (pos for (word, pos) in pos_tag(words))
    newtree = Tree('S', [])
    for child in tree:
        if isinstance(child, Tree):
            newtree.append(Tree(child.label(), []))
            for subchild in child:
                newtree[-1].append( (subchild, next(tag_iter)) )
        else:
            newtree.append( (child, next(tag_iter)) )
    return newtree 
Example #6
Source File: test_tag.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def test_basic():
    from nltk.tag import pos_tag
    from nltk.tokenize import word_tokenize

    result = pos_tag(word_tokenize("John's big idea isn't all that bad."))
    assert result == [('John', 'NNP'), ("'s", 'POS'), ('big', 'JJ'),
                      ('idea', 'NN'), ('is', 'VBZ'), ("n't", 'RB'),
                      ('all', 'DT'), ('that', 'DT'), ('bad', 'JJ'),
                      ('.', '.')] 
Example #7
Source File: generate.py    From justcopy-backend with MIT License 5 votes vote down vote up
def nltk_entity_groups(text):
    """Return all contiguous NER tagged chunks by NLTK."""
    parse_tree = ne_chunk(pos_tag(word_tokenize(text)))
    ner_chunks = [' '.join([l[0] for l in t.leaves()])
                  for t in parse_tree.subtrees() if t.label() != 'S']
    return ner_chunks


# ------------------------------------------------------------------------------
# Find answer candidates.
# ------------------------------------------------------------------------------ 
Example #8
Source File: named_entity.py    From luscan-devel with GNU General Public License v2.0 5 votes vote down vote up
def postag_tree(tree):
    # Part-of-speech tagging.
    words = tree.leaves()
    tag_iter = (pos for (word, pos) in pos_tag(words))
    newtree = Tree('S', [])
    for child in tree:
        if isinstance(child, Tree):
            newtree.append(Tree(child.node, []))
            for subchild in child:
                newtree[-1].append( (subchild, tag_iter.next()) )
        else:
            newtree.append( (child, tag_iter.next()) )
    return newtree 
Example #9
Source File: rhodiola.py    From rhodiola with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def find_proper_nouns(string):
    pattern = re.compile('[\W_]+', re.UNICODE)
    tagged_sent = pos_tag(string.split())
    propernouns = [re.sub(r'\W+', '', word.lower()) for word,pos in tagged_sent if pos == 'NNP']
    last_propernouns = []
    for word in propernouns:
        if word in stopwords.words('english') or word in extra_stopwords or word == '' or len(word) < 3:
            pass
        else:
            last_propernouns.append(word)   
    propernouns_dict = dict(Counter(last_propernouns))
    sorted_propernouns_dict = sorted(propernouns_dict.items(), key=operator.itemgetter(1),reverse=True)
    return sorted_propernouns_dict[0:15] 
Example #10
Source File: named_entity.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def postag_tree(tree):
    # Part-of-speech tagging.
    words = tree.leaves()
    tag_iter = (pos for (word, pos) in pos_tag(words))
    newtree = Tree('S', [])
    for child in tree:
        if isinstance(child, Tree):
            newtree.append(Tree(child.label(), []))
            for subchild in child:
                newtree[-1].append((subchild, next(tag_iter)))
        else:
            newtree.append((child, next(tag_iter)))
    return newtree