Python nltk.parse() Examples

The following are 15 code examples for showing how to use nltk.parse(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module nltk , or try the search function .

Example 1
Project: razzy-spinner   Author: rafasashi   File: glue.py    License: GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, meaning, glue, indices=None):
        if not indices:
            indices = set()

        if isinstance(meaning, string_types):
            self.meaning = Expression.fromstring(meaning)
        elif isinstance(meaning, Expression):
            self.meaning = meaning
        else:
            raise RuntimeError('Meaning term neither string or expression: %s, %s' % (meaning, meaning.__class__))

        if isinstance(glue, string_types):
            self.glue = linearlogic.LinearLogicParser().parse(glue)
        elif isinstance(glue, linearlogic.Expression):
            self.glue = glue
        else:
            raise RuntimeError('Glue term neither string or expression: %s, %s' % (glue, glue.__class__))

        self.indices = indices 
Example 2
Project: razzy-spinner   Author: rafasashi   File: glue.py    License: GNU General Public License v3.0 6 votes vote down vote up
def dep_parse(self, sentence):
        """
        Return a dependency graph for the sentence.

        :param sentence: the sentence to be parsed
        :type sentence: list(str)
        :rtype: DependencyGraph
        """

        #Lazy-initialize the depparser
        if self.depparser is None:
            from nltk.parse import MaltParser
            self.depparser = MaltParser(tagger=self.get_pos_tagger())
        if not self.depparser._trained:
            self.train_depparser()
        return self.depparser.parse(sentence, verbose=self.verbose) 
Example 3
Project: razzy-spinner   Author: rafasashi   File: glue.py    License: GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, meaning, glue, indices=None):
        if not indices:
            indices = set()

        if isinstance(meaning, string_types):
            self.meaning = drt.DrtExpression.fromstring(meaning)
        elif isinstance(meaning, drt.DrtExpression):
            self.meaning = meaning
        else:
            raise RuntimeError('Meaning term neither string or expression: %s, %s' % (meaning, meaning.__class__))

        if isinstance(glue, string_types):
            self.glue = linearlogic.LinearLogicParser().parse(glue)
        elif isinstance(glue, linearlogic.Expression):
            self.glue = glue
        else:
            raise RuntimeError('Glue term neither string or expression: %s, %s' % (glue, glue.__class__))

        self.indices = indices 
Example 4
Project: luscan-devel   Author: blackye   File: glue.py    License: GNU General Public License v2.0 6 votes vote down vote up
def __init__(self, meaning, glue, indices=None):
        if not indices:
            indices = set()

        if isinstance(meaning, str):
            self.meaning = LogicParser().parse(meaning)
        elif isinstance(meaning, Expression):
            self.meaning = meaning
        else:
            raise RuntimeError, 'Meaning term neither string or expression: %s, %s' % (meaning, meaning.__class__)

        if isinstance(glue, str):
            self.glue = linearlogic.LinearLogicParser().parse(glue)
        elif isinstance(glue, linearlogic.Expression):
            self.glue = glue
        else:
            raise RuntimeError, 'Glue term neither string or expression: %s, %s' % (glue, glue.__class__)

        self.indices = indices 
Example 5
Project: luscan-devel   Author: blackye   File: glue.py    License: GNU General Public License v2.0 6 votes vote down vote up
def __init__(self, meaning, glue, indices=None):
        if not indices:
            indices = set()

        if isinstance(meaning, str):
            self.meaning = drt.DrtParser().parse(meaning)
        elif isinstance(meaning, drt.AbstractDrs):
            self.meaning = meaning
        else:
            raise RuntimeError, 'Meaning term neither string or expression: %s, %s' % (meaning, meaning.__class__)

        if isinstance(glue, str):
            self.glue = linearlogic.LinearLogicParser().parse(glue)
        elif isinstance(glue, linearlogic.Expression):
            self.glue = glue
        else:
            raise RuntimeError, 'Glue term neither string or expression: %s, %s' % (glue, glue.__class__)

        self.indices = indices 
Example 6
Project: ai-research-keyphrase-extraction   Author: swisscom   File: postagging.py    License: Apache License 2.0 6 votes vote down vote up
def pos_tag_raw_text(self, text, as_tuple_list=True):
        # Unfortunately for the moment there is no method to do sentence split + pos tagging in nltk.parse.corenlp
        # Ony raw_tag_sents is available but assumes a list of str (so it assumes the sentence are already split)
        # We create a small custom function highly inspired from raw_tag_sents to do both

        def raw_tag_text():
            """
            Perform tokenizing sentence splitting and PosTagging and keep the 
            sentence splits structure
            """
            properties = {'annotators':'tokenize,ssplit,pos'}
            tagged_data = self.parser.api_call(text, properties=properties)
            for tagged_sentence in tagged_data['sentences']:
                yield [(token['word'], token['pos']) for token in tagged_sentence['tokens']]
        
        tagged_text = list(raw_tag_text())

        if as_tuple_list:
            return tagged_text
        return '[ENDSENT]'.join(
            [' '.join([tuple2str(tagged_token, self.separator) for tagged_token in sent]) for sent in tagged_text]) 
Example 7
Project: V1EngineeringInc-Docs   Author: V1EngineeringInc   File: glue.py    License: Creative Commons Attribution Share Alike 4.0 International 6 votes vote down vote up
def __init__(self, meaning, glue, indices=None):
        if not indices:
            indices = set()

        if isinstance(meaning, string_types):
            self.meaning = Expression.fromstring(meaning)
        elif isinstance(meaning, Expression):
            self.meaning = meaning
        else:
            raise RuntimeError(
                'Meaning term neither string or expression: %s, %s'
                % (meaning, meaning.__class__)
            )

        if isinstance(glue, string_types):
            self.glue = linearlogic.LinearLogicParser().parse(glue)
        elif isinstance(glue, linearlogic.Expression):
            self.glue = glue
        else:
            raise RuntimeError(
                'Glue term neither string or expression: %s, %s'
                % (glue, glue.__class__)
            )

        self.indices = indices 
Example 8
Project: V1EngineeringInc-Docs   Author: V1EngineeringInc   File: glue.py    License: Creative Commons Attribution Share Alike 4.0 International 6 votes vote down vote up
def dep_parse(self, sentence):
        """
        Return a dependency graph for the sentence.

        :param sentence: the sentence to be parsed
        :type sentence: list(str)
        :rtype: DependencyGraph
        """

        # Lazy-initialize the depparser
        if self.depparser is None:
            from nltk.parse import MaltParser

            self.depparser = MaltParser(tagger=self.get_pos_tagger())
        if not self.depparser._trained:
            self.train_depparser()
        return self.depparser.parse(sentence, verbose=self.verbose) 
Example 9
Project: V1EngineeringInc-Docs   Author: V1EngineeringInc   File: glue.py    License: Creative Commons Attribution Share Alike 4.0 International 6 votes vote down vote up
def __init__(self, meaning, glue, indices=None):
        if not indices:
            indices = set()

        if isinstance(meaning, string_types):
            self.meaning = drt.DrtExpression.fromstring(meaning)
        elif isinstance(meaning, drt.DrtExpression):
            self.meaning = meaning
        else:
            raise RuntimeError(
                'Meaning term neither string or expression: %s, %s'
                % (meaning, meaning.__class__)
            )

        if isinstance(glue, string_types):
            self.glue = linearlogic.LinearLogicParser().parse(glue)
        elif isinstance(glue, linearlogic.Expression):
            self.glue = glue
        else:
            raise RuntimeError(
                'Glue term neither string or expression: %s, %s'
                % (glue, glue.__class__)
            )

        self.indices = indices 
Example 10
Project: razzy-spinner   Author: rafasashi   File: glue.py    License: GNU General Public License v3.0 5 votes vote down vote up
def demo(show_example=-1):
    from nltk.parse import MaltParser
    examples = ['David sees Mary',
                'David eats a sandwich',
                'every man chases a dog',
                'every man believes a dog sleeps',
                'John gives David a sandwich',
                'John chases himself']
#                'John persuades David to order a pizza',
#                'John tries to go',
#                'John tries to find a unicorn',
#                'John seems to vanish',
#                'a unicorn seems to approach',
#                'every big cat leaves',
#                'every gray cat leaves',
#                'every big gray cat leaves',
#                'a former senator leaves',

    print('============== DEMO ==============')

    tagger = RegexpTagger(
        [('^(David|Mary|John)$', 'NNP'),
         ('^(sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$', 'VB'),
         ('^(go|order|vanish|find|approach)$', 'VB'),
         ('^(a)$', 'ex_quant'),
         ('^(every)$', 'univ_quant'),
         ('^(sandwich|man|dog|pizza|unicorn|cat|senator)$', 'NN'),
         ('^(big|gray|former)$', 'JJ'),
         ('^(him|himself)$', 'PRP')
    ])

    depparser = MaltParser(tagger=tagger)
    glue = Glue(depparser=depparser, verbose=False)

    for (i, sentence) in enumerate(examples):
        if i==show_example or show_example==-1:
            print('[[[Example %s]]]  %s' % (i, sentence))
            for reading in glue.parse_to_meaning(sentence.split()):
                print(reading.simplify())
            print('') 
Example 11
Project: luscan-devel   Author: blackye   File: viterbi.py    License: GNU General Public License v2.0 5 votes vote down vote up
def __init__(self, grammar, trace=0):
        """
        Create a new ``ViterbiParser`` parser, that uses ``grammar`` to
        parse texts.

        :type grammar: WeightedGrammar
        :param grammar: The grammar used to parse texts.
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            and higher numbers will produce more verbose tracing
            output.
        """
        self._grammar = grammar
        self._trace = trace 
Example 12
Project: luscan-devel   Author: blackye   File: viterbi.py    License: GNU General Public License v2.0 5 votes vote down vote up
def parse(self, tokens):
        # Inherit docs from ParserI

        tokens = list(tokens)
        self._grammar.check_coverage(tokens)

        # The most likely constituent table.  This table specifies the
        # most likely constituent for a given span and type.
        # Constituents can be either Trees or tokens.  For Trees,
        # the "type" is the Nonterminal for the tree's root node
        # value.  For Tokens, the "type" is the token's type.
        # The table is stored as a dictionary, since it is sparse.
        constituents = {}

        # Initialize the constituents dictionary with the words from
        # the text.
        if self._trace: print ('Inserting tokens into the most likely'+
                               ' constituents table...')
        for index in range(len(tokens)):
            token = tokens[index]
            constituents[index,index+1,token] = token
            if self._trace > 1:
                self._trace_lexical_insertion(token, index, len(tokens))

        # Consider each span of length 1, 2, ..., n; and add any trees
        # that might cover that span to the constituents dictionary.
        for length in range(1, len(tokens)+1):
            if self._trace:
                print ('Finding the most likely constituents'+
                       ' spanning %d text elements...' % length)
            for start in range(len(tokens)-length+1):
                span = (start, start+length)
                self._add_constituents_spanning(span, constituents,
                                                tokens)

        # Return the tree that spans the entire text & have the right cat
        return constituents.get((0, len(tokens), self._grammar.start())) 
Example 13
Project: luscan-devel   Author: blackye   File: glue.py    License: GNU General Public License v2.0 5 votes vote down vote up
def dep_parse(self, sentence='every cat leaves'.split()):
        #Lazy-initialize the depparser
        if self.depparser is None:
            from nltk.parse import MaltParser
            self.depparser = MaltParser(tagger=self.get_pos_tagger())
        if not self.depparser._trained:
            self.train_depparser()

        return [self.depparser.parse(sentence, verbose=self.verbose)] 
Example 14
Project: py-nltk-svo   Author: klintan   File: svo.py    License: MIT License 4 votes vote down vote up
def process_parse_tree(self, parse_tree):
        """
        Returns the Subject-Verb-Object Representation of a Parse Tree.
        Can Vary depending on number of 'sub-sentences' in a Parse Tree
        """
        self.tree_root = parse_tree
        # Step 1 - Extract all the parse trees that start with 'S'
        output_list = []
        output_dict = {}

        for idx, subtree in enumerate(parse_tree[0].subtrees()):
            subject = None
            predicate = None
            Object = None
            if subtree.label() in ["S", "SQ", "SBAR", "SBARQ", "SINV", "FRAG"]:
                children_list = subtree
                children_values = [each_child.label() for each_child in children_list]
                children_dict = dict(zip(children_values, children_list))

                # Extract Subject, Verb-Phrase, Objects from Sentence sub-trees
                if children_dict.get("NP") is not None:
                    subject = self.get_subject(children_dict["NP"])

                if children_dict.get("VP") is not None:
                    # Extract Verb and Object
                    # i+=1
                    # """
                    # if i==1:
                    #    pdb.set_trace()
                    # """
                    predicate = self.get_predicate(children_dict["VP"])
                    Object = self.get_object(children_dict["VP"])

                try:
                    if subject['subject'] and predicate['predicate'] and Object['object']:
                        output_dict['subject'] = subject['subject']
                        output_dict['predicate'] = predicate['predicate']
                        output_dict['object'] = Object['object']
                        output_list.append(output_dict)
                except Exception as e:
                    print(e)
                    continue

        return output_list 
Example 15
Project: V1EngineeringInc-Docs   Author: V1EngineeringInc   File: glue.py    License: Creative Commons Attribution Share Alike 4.0 International 4 votes vote down vote up
def demo(show_example=-1):
    from nltk.parse import MaltParser

    examples = [
        'David sees Mary',
        'David eats a sandwich',
        'every man chases a dog',
        'every man believes a dog sleeps',
        'John gives David a sandwich',
        'John chases himself',
    ]
    #                'John persuades David to order a pizza',
    #                'John tries to go',
    #                'John tries to find a unicorn',
    #                'John seems to vanish',
    #                'a unicorn seems to approach',
    #                'every big cat leaves',
    #                'every gray cat leaves',
    #                'every big gray cat leaves',
    #                'a former senator leaves',

    print('============== DEMO ==============')

    tagger = RegexpTagger(
        [
            ('^(David|Mary|John)$', 'NNP'),
            (
                '^(sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$',
                'VB',
            ),
            ('^(go|order|vanish|find|approach)$', 'VB'),
            ('^(a)$', 'ex_quant'),
            ('^(every)$', 'univ_quant'),
            ('^(sandwich|man|dog|pizza|unicorn|cat|senator)$', 'NN'),
            ('^(big|gray|former)$', 'JJ'),
            ('^(him|himself)$', 'PRP'),
        ]
    )

    depparser = MaltParser(tagger=tagger)
    glue = Glue(depparser=depparser, verbose=False)

    for (i, sentence) in enumerate(examples):
        if i == show_example or show_example == -1:
            print('[[[Example %s]]]  %s' % (i, sentence))
            for reading in glue.parse_to_meaning(sentence.split()):
                print(reading.simplify())
            print('')