Python nltk.corpus.wordnet.all_synsets() Examples

The following are 4 code examples of nltk.corpus.wordnet.all_synsets(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module nltk.corpus.wordnet , or try the search function .
Example #1
Source File: definition_preprocessor.py    From EWISE with Apache License 2.0 6 votes vote down vote up
def process_definitions(self):
        self.definition_map = {}
        self.lemmakey_to_synset = {}
        n_empty_definitions = 0
        print ("Processing definitions")
        all_synsets = wn.all_synsets()
        for s in tqdm(all_synsets):
            definition = s.definition().strip()
            if len(definition) == 0:
                n_empty_definitions = n_empty_definitions + 1

            self.definition_map[s.name()] = definition

            lemmas = s.lemmas()
            for lemma in lemmas:
                key = lemma.key()
                self.lemmakey_to_synset[key] = s.name()

        print ("#Empty definitions {}/{}".format(n_empty_definitions, len(self.definition_map)))

        synsets = sorted(self.definition_map.keys())
        #self.synset_to_idx = {v:i for i,v in enumerate(self.synset_to_definition.keys())}
        self.synset_to_idx = {v:i for i,v in enumerate(synsets)}
        self.idx_to_synset = {v:i for i,v in self.synset_to_idx.items()}
        self.definitions = [self.definition_map[k] for k in synsets] 
Example #2
Source File: extractors.py    From PPP-QuestionParsing-Grammatical with GNU Affero General Public License v3.0 5 votes vote down vote up
def buildNouns():
    """
        Returns the set of all nouns of NLTK
    """
    return {x.name().split('.', 1)[0] for x in wn.all_synsets('n')} 
Example #3
Source File: extractors.py    From PPP-QuestionParsing-Grammatical with GNU Affero General Public License v3.0 5 votes vote down vote up
def buildVerbs():
    """
        Returns the set of all verbs of NLTK
    """
    return {x.name().split(".", 1)[0] for x in wn.all_synsets("v")} 
Example #4
Source File: extract_wordnet.py    From kb with Apache License 2.0 4 votes vote down vote up
def extract_wordnet_from_nltk(entity_output_file, relation_output_file):
    from nltk.corpus import wordnet as wn
    import json

    # each node is a synset or synset+lemma
    # synsets have POS
    # synsets have several lemmas associated with them
    #       each lemma is keyed by something like able%3:00:00::
    #       where string = lemma, first number is POS, then sense id
    #
    # in addition to the synset-synset and lemma-lemma relationships,
    # we will also add synset_lemma relationship for lemmas contained
    # in each synset
    with open(entity_output_file, 'w') as fent, \
         open(relation_output_file, 'w') as frel:

        for synset in wn.all_synsets():
            node = {
                'id': synset.name(),
                'pos': synset.pos(),
                'lemmas': [lem.key() for lem in synset.lemmas()],
                'examples': synset.examples(),
                'definition': synset.definition(),
                'type': 'synset',
            }
            fent.write(json.dumps(node) + '\n')
    
            # synset-synset relationships
            for relation in SYNSET_RELATIONSHIP_TYPES:
                entity2 = [rel_synset.name()
                           for rel_synset in getattr(synset, relation)()]
                for e2 in entity2:
                    frel.write('{}\t{}\t{}\n'.format(synset.name(), 'synset_' + relation, e2))

            # now get synset-lemma and lemma-lemma relationships
            for lemma in synset.lemmas():
                node = {
                    'id': lemma.key(),
                    'pos': synset.pos(),
                    'synset': synset.name(),
                    'type': 'lemma',
                    'count': lemma.count(),
                }
                fent.write(json.dumps(node) + '\n')

                frel.write('{}\t{}\t{}\n'.format(synset.name(), 'synset_lemma', lemma.key()))

                # lemma-lemma
                for relation in LEMMA_RELATIONSHIP_TYPES:
                    entity2 = [rel_lemma.key()
                           for rel_lemma in getattr(lemma, relation)()]
                    for e2 in entity2:
                        frel.write('{}\t{}\t{}\n'.format(synset.name(), 'lemma_' + relation, e2))