Python nltk.corpus.wordnet.ADJ Examples

The following are 30 code examples of nltk.corpus.wordnet.ADJ(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module nltk.corpus.wordnet , or try the search function .
Example #1
Source File: normalization.py    From text-analytics-with-python with Apache License 2.0 6 votes vote down vote up
def pos_tag_text(text):
    
    def penn_to_wn_tags(pos_tag):
        if pos_tag.startswith('J'):
            return wn.ADJ
        elif pos_tag.startswith('V'):
            return wn.VERB
        elif pos_tag.startswith('N'):
            return wn.NOUN
        elif pos_tag.startswith('R'):
            return wn.ADV
        else:
            return None
    
    tagged_text = tag(text)
    tagged_lower_text = [(word.lower(), penn_to_wn_tags(pos_tag))
                         for word, pos_tag in
                         tagged_text]
    return tagged_lower_text
    
# lemmatize text based on POS tags 
Example #2
Source File: test_preprocess_func.py    From tmtoolkit with Apache License 2.0 6 votes vote down vote up
def test_simplified_pos():
    assert simplified_pos('') == ''
    assert simplified_pos('N') == 'N'
    assert simplified_pos('V') == 'V'
    assert simplified_pos('ADJ') == 'ADJ'
    assert simplified_pos('ADV') == 'ADV'
    assert simplified_pos('AD') == ''
    assert simplified_pos('ADX') == ''
    assert simplified_pos('PRP') == ''
    assert simplified_pos('XYZ') == ''
    assert simplified_pos('NN') == 'N'
    assert simplified_pos('NNP') == 'N'
    assert simplified_pos('VX') == 'V'
    assert simplified_pos('ADJY') == 'ADJ'
    assert simplified_pos('ADVZ') == 'ADV'

    assert simplified_pos('NNP', tagset='penn') == 'N'
    assert simplified_pos('VFOO', tagset='penn') == 'V'
    assert simplified_pos('JJ', tagset='penn') == 'ADJ'
    assert simplified_pos('JJX', tagset='penn') == 'ADJ'
    assert simplified_pos('RB', tagset='penn') == 'ADV'
    assert simplified_pos('RBFOO', tagset='penn') == 'ADV'
    assert simplified_pos('FOOBAR', tagset='penn') == '' 
Example #3
Source File: normalization.py    From text-analytics-with-python with Apache License 2.0 6 votes vote down vote up
def pos_tag_text(text):
    
    def penn_to_wn_tags(pos_tag):
        if pos_tag.startswith('J'):
            return wn.ADJ
        elif pos_tag.startswith('V'):
            return wn.VERB
        elif pos_tag.startswith('N'):
            return wn.NOUN
        elif pos_tag.startswith('R'):
            return wn.ADV
        else:
            return None
    
    tagged_text = tag(text)
    tagged_lower_text = [(word.lower(), penn_to_wn_tags(pos_tag))
                         for word, pos_tag in
                         tagged_text]
    return tagged_lower_text
    
# lemmatize text based on POS tags 
Example #4
Source File: normalization.py    From text-analytics-with-python with Apache License 2.0 6 votes vote down vote up
def pos_tag_text(text):
    
    def penn_to_wn_tags(pos_tag):
        if pos_tag.startswith('J'):
            return wn.ADJ
        elif pos_tag.startswith('V'):
            return wn.VERB
        elif pos_tag.startswith('N'):
            return wn.NOUN
        elif pos_tag.startswith('R'):
            return wn.ADV
        else:
            return None
    
    tagged_text = tag(text)
    tagged_lower_text = [(word.lower(), penn_to_wn_tags(pos_tag))
                         for word, pos_tag in
                         tagged_text]
    return tagged_lower_text
    
# lemmatize text based on POS tags 
Example #5
Source File: _common.py    From tmtoolkit with Apache License 2.0 6 votes vote down vote up
def pos_tag_convert_penn_to_wn(tag):
    """
    Convert POS tag from Penn tagset to WordNet tagset.

    :param tag: a tag from Penn tagset
    :return: a tag from WordNet tagset or None if no corresponding tag could be found
    """
    from nltk.corpus import wordnet as wn

    if tag in ['JJ', 'JJR', 'JJS']:
        return wn.ADJ
    elif tag in ['RB', 'RBR', 'RBS']:
        return wn.ADV
    elif tag in ['NN', 'NNS', 'NNP', 'NNPS']:
        return wn.NOUN
    elif tag in ['VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ']:
        return wn.VERB
    return None 
Example #6
Source File: normalization.py    From text-analytics-with-python with Apache License 2.0 6 votes vote down vote up
def pos_tag_text(text):
    
    def penn_to_wn_tags(pos_tag):
        if pos_tag.startswith('J'):
            return wn.ADJ
        elif pos_tag.startswith('V'):
            return wn.VERB
        elif pos_tag.startswith('N'):
            return wn.NOUN
        elif pos_tag.startswith('R'):
            return wn.ADV
        else:
            return None
    
    tagged_text = tag(text)
    tagged_lower_text = [(word.lower(), penn_to_wn_tags(pos_tag))
                         for word, pos_tag in
                         tagged_text]
    return tagged_lower_text
    
# lemmatize text based on POS tags 
Example #7
Source File: wn_persistent_api.py    From combine-FEVER-NSMN with MIT License 6 votes vote down vote up
def convert_to_wn_pos(pos):
    if pos.startswith("J"):
        return wn.ADJ
    elif pos.startswith("V"):
        return wn.VERB
    elif pos.startswith("N"):
        return wn.NOUN
    elif pos.startswith("R"):
        return wn.ADV
    else:
        return "" 
Example #8
Source File: deploy.py    From Election-Meddling with MIT License 6 votes vote down vote up
def get_wordnet_pos(self,treebank_tag):
        """
        return WORDNET POS compliance to WORDENT lemmatization (a,n,r,v) 
        """

        if treebank_tag.startswith('J'):
            return wordnet.ADJ

        elif treebank_tag.startswith('V'):
            return wordnet.VERB

        elif treebank_tag.startswith('N'):
            return wordnet.NOUN

        elif treebank_tag.startswith('R'):
            return wordnet.ADV

        else:
            return wordnet.NOUN 
Example #9
Source File: sentiwordnet.py    From yenlp with GNU General Public License v3.0 5 votes vote down vote up
def wordnet_pos_code(tag):
    '''Translation from nltk tags to Wordnet code'''
    if tag.startswith('NN'):
        return wordnet.NOUN
    elif tag.startswith('VB'):
        return wordnet.VERB
    elif tag.startswith('JJ'):
        return wordnet.ADJ
    elif tag.startswith('RB'):
        return wordnet.ADV
    else:
        return '' 
Example #10
Source File: build.py    From atap with Apache License 2.0 5 votes vote down vote up
def lemmatize(self, token, pos_tag):
        tag = {
            'N': wn.NOUN,
            'V': wn.VERB,
            'R': wn.ADV,
            'J': wn.ADJ
        }.get(pos_tag[0], wn.NOUN)

        return self.lemmatizer.lemmatize(token, tag) 
Example #11
Source File: transformer.py    From atap with Apache License 2.0 5 votes vote down vote up
def lemmatize(self, token, pos_tag):
        tag = {
            'N': wn.NOUN,
            'V': wn.VERB,
            'R': wn.ADV,
            'J': wn.ADJ
        }.get(pos_tag[0], wn.NOUN)

        return self.lemmatizer.lemmatize(token, tag) 
Example #12
Source File: __init__.py    From wordai with MIT License 5 votes vote down vote up
def _sentence_to_mongo(typ, items):
    import nltk
    from nltk.corpus import wordnet

    def wordnet_pos(tag):
        if tag.startswith('J'):
            return wordnet.ADJ
        elif tag.startswith('V'):
            return wordnet.VERB
        elif tag.startswith('N'):
            return wordnet.NOUN
        elif tag.startswith('R'):
            return wordnet.ADV
        else:
            return wordnet.NOUN

    # nltk.download('punkt')
    nltk.download('averaged_perceptron_tagger')
    nltk.download('stopwords')
    nltk.download('wordnet')
    nltk.download('punkt')
    stop_words = set(nltk.corpus.stopwords.words('english'))
    stemmer = nltk.stem.WordNetLemmatizer()
    sentences = []
    for trans in items:
        eng, chn = trans.getsource(), trans.gettarget()
        tokens = nltk.word_tokenize(eng)
        pos_tag = [pos[1] for pos in nltk.pos_tag(tokens)]
        roots = [stemmer.lemmatize(word, wordnet_pos(pos_tag[idx])) for idx, word in enumerate(tokens)]
        cleanword = [token for token in roots if token.isalpha() and token not in stop_words and len(token) >= 3]
        # remove duplicates
        clean_word = list(dict.fromkeys(cleanword))
        if len(clean_word) > 0:
            score = Word.search_words(*clean_word).sum('star') / len(clean_word)
        else:
            score = -1
        sentence = Sentence(eng=eng, chn=chn, words=tokens, pos_tag=pos_tag, roots=roots, score=score, typ=typ)
        sentences.append(sentence)
        if len(sentences) > 50:
            Sentence.objects.insert(sentences)
            sentences = [] 
Example #13
Source File: wordnet_app.py    From luscan-devel with GNU General Public License v2.0 5 votes vote down vote up
def _pos_tuples():
    return [
        (wn.NOUN,'N','noun'),
        (wn.VERB,'V','verb'),
        (wn.ADJ,'J','adj'),
        (wn.ADV,'R','adv')] 
Example #14
Source File: wordnet_app.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def _pos_tuples():
    return [
        (wn.NOUN, 'N', 'noun'),
        (wn.VERB, 'V', 'verb'),
        (wn.ADJ, 'J', 'adj'),
        (wn.ADV, 'R', 'adv'),
    ] 
Example #15
Source File: synset_analysis.py    From Quadflor with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self):
        NltkNormalizer.install_nltk_corpora('averaged_perceptron_tagger')
        self.normalizer = NltkNormalizer()
        self.lem = nltk.WordNetLemmatizer()
        self.tagger = nltk.PerceptronTagger()
        self.translation_dict = {'J': wn.ADJ, 'N': wn.NOUN, 'R': wn.ADV, 'V': wn.VERB} 
Example #16
Source File: preprocessing.py    From TextRank with MIT License 5 votes vote down vote up
def __get_wordnet_pos(treebank_tag):
        """Maps the treebank tags to WordNet part of speech names"""
        if treebank_tag.startswith('J'):
            return wordnet.ADJ
        elif treebank_tag.startswith('V'):
            return wordnet.VERB
        elif treebank_tag.startswith('N'):
            return wordnet.NOUN
        elif treebank_tag.startswith('R'):
            return wordnet.ADV
        else:
            return None 
Example #17
Source File: normalization.py    From natural-language-preprocessings with MIT License 5 votes vote down vote up
def lemmatize_term(term, pos=None):
    if pos is None:
        synsets = wordnet.synsets(term)
        if not synsets:
            return term
        pos = synsets[0].pos()
        if pos == wordnet.ADJ_SAT:
            pos = wordnet.ADJ
    return nltk.WordNetLemmatizer().lemmatize(term, pos=pos) 
Example #18
Source File: transformers.py    From atap with Apache License 2.0 5 votes vote down vote up
def lemmatize(self, token, pos_tag):
        tag = {
            'N': wn.NOUN,
            'V': wn.VERB,
            'R': wn.ADV,
            'J': wn.ADJ
        }.get(pos_tag[0], wn.NOUN)

        return self.lemmatizer.lemmatize(token, tag) 
Example #19
Source File: intent_classification.py    From voice-enabled-chatbot with MIT License 5 votes vote down vote up
def get_wordnet_pos(self, word):
        """Map POS tag to first character lemmatize() accepts"""
        tag = nltk.pos_tag([word])[0][1][0].upper()
        tag_dict = {
            "J": wordnet.ADJ,
            "N": wordnet.NOUN,
            "V": wordnet.VERB,
            "R": wordnet.ADV,
        }

        return tag_dict.get(tag, wordnet.NOUN) 
Example #20
Source File: dcs.py    From broca with MIT License 5 votes vote down vote up
def _disambiguate_doc(self, tagged_tokens):
        """
        Takes a list of tagged tokens, representing a document,
        in the form:

            [(token, tag), ...]

        And returns a mapping of terms to their disambiguated concepts (synsets).
        """

        # Group tokens by PoS
        pos_groups = {pos: [] for pos in [wn.NOUN, wn.VERB, wn.ADJ, wn.ADV]}
        for tok, tag in tagged_tokens:
            if tag in pos_groups:
                pos_groups[tag].append(tok)

        #print(pos_groups)

        # Map of final term -> concept mappings
        map = {}
        for tag, toks in pos_groups.items():
            map.update(self._disambiguate_pos(toks, tag))

        #nice_map = {k: map[k].lemma_names() for k in map.keys()}
        #print(json.dumps(nice_map, indent=4, sort_keys=True))

        return map 
Example #21
Source File: word_sentence_utils.py    From resilient-community-apps with MIT License 5 votes vote down vote up
def _convert(nlk_tag):
        """
        Convert nlk tag to wordnet flag
        :param nlk_tag:
        :return:
        """
        if nlk_tag.startswith('J'):
            return wordnet.ADJ
        elif nlk_tag.startswith('V'):
            return wordnet.VERB
        elif nlk_tag.startswith('R'):
            return wordnet.ADV
        else:
            return wordnet.NOUN 
Example #22
Source File: sick_extender.py    From Sentence-similarity-classifier-for-pyTorch with MIT License 5 votes vote down vote up
def get_wordnet_pos(treebank_tag):
        """ Converts a Penn Tree-Bank part of speech tag into a corresponding WordNet-friendly tag. 
        Borrowed from: http://stackoverflow.com/questions/15586721/wordnet-lemmatization-and-pos-tagging-in-python. """
        if treebank_tag.startswith('J') or treebank_tag.startswith('A'):
            return wordnet.ADJ
        elif treebank_tag.startswith('V'):
            return wordnet.VERB
        elif treebank_tag.startswith('N'):
            return wordnet.NOUN
        elif treebank_tag.startswith('R'):
            return wordnet.ADV
        else:
            return 'OTHER' 
Example #23
Source File: wordnet_app.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def _pos_tuples():
    return [
        (wn.NOUN,'N','noun'),
        (wn.VERB,'V','verb'),
        (wn.ADJ,'J','adj'),
        (wn.ADV,'R','adv')] 
Example #24
Source File: main.py    From tensorflow-XNN with MIT License 5 votes vote down vote up
def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return None 
Example #25
Source File: wordnet_similarity.py    From jingwei with MIT License 5 votes vote down vote up
def wup_similarity(tagx, tagy):
    scores = []
    for pos in [wn.NOUN, wn.VERB, wn.ADJ, wn.ADJ_SAT, wn.ADV]:
        try:
            synsetx = wn.synset('%s.%s.01' % (tagx,pos))
            synsety = wn.synset('%s.%s.01' % (tagy,pos))
            score = synsetx.wup_similarity(synsety)
            if score is None:
                score = 0
        except Exception, e:
            score = 0
        scores.append(score) 
Example #26
Source File: test_preprocess_func.py    From tmtoolkit with Apache License 2.0 5 votes vote down vote up
def test_pos_tag_convert_penn_to_wn():
    assert pos_tag_convert_penn_to_wn('JJ') == wn.ADJ
    assert pos_tag_convert_penn_to_wn('RB') == wn.ADV
    assert pos_tag_convert_penn_to_wn('NN') == wn.NOUN
    assert pos_tag_convert_penn_to_wn('VB') == wn.VERB

    for tag in ('', 'invalid', None):
        assert pos_tag_convert_penn_to_wn(tag) is None 
Example #27
Source File: _common.py    From tmtoolkit with Apache License 2.0 5 votes vote down vote up
def _lemmatize_wrapper_general_patternlib(row, lemmatizer):
    """Wrapper function to lemmatize texts using ``pattern`` package."""
    tok, pos = row
    if pos.startswith('NP'):  # singularize noun
        return lemmatizer.singularize(tok)
    elif pos.startswith('V'):  # get infinitive of verb
        return lemmatizer.conjugate(tok, lemmatizer.INFINITIVE)
    elif pos.startswith('ADJ') or pos.startswith('ADV'):  # get baseform of adjective or adverb
        return lemmatizer.predicative(tok)
    return tok 
Example #28
Source File: disintegrator.py    From quantified-self with MIT License 5 votes vote down vote up
def __get_wordnet_pos(self, treebank_tag):

        if treebank_tag.startswith("J"):
            return wordnet.ADJ
        elif treebank_tag.startswith("V"):
            return wordnet.VERB
        elif treebank_tag.startswith("N"):
            return wordnet.NOUN
        elif treebank_tag.startswith("R"):
            return wordnet.ADV
        else:
            return "" 
Example #29
Source File: learn.py    From partisan-discourse with Apache License 2.0 5 votes vote down vote up
def tagwn(self, tag):
        """
        Returns the WordNet tag from the Penn Treebank tag.
        """
        return {
            'N': wn.NOUN,
            'V': wn.VERB,
            'R': wn.ADV,
            'J': wn.ADJ
        }.get(tag[0], wn.NOUN) 
Example #30
Source File: agglomerative.py    From atap with Apache License 2.0 5 votes vote down vote up
def wnpos(tag):
    # Return the WordNet POS tag from the Penn Treebank tag
    return {
        'N': wn.NOUN,
        'V': wn.VERB,
        'R': wn.ADV,
        'J': wn.ADJ
    }.get(tag[0], wn.NOUN)