Python nltk.Tree() Examples

The following are 30 code examples for showing how to use nltk.Tree(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module nltk , or try the search function .

Example 1
Project: razzy-spinner   Author: rafasashi   File: regexp.py    License: GNU General Public License v3.0 6 votes vote down vote up
def parse(self, chunk_struct, trace=None):
        """
        Apply the chunk parser to this input.

        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
            (this tree is modified, and is also returned)
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :return: the chunked output.
        :rtype: Tree
        """
        if trace is None: trace = self._trace
        for i in range(self._loop):
            for parser in self._stages:
                chunk_struct = parser.parse(chunk_struct, trace=trace)
        return chunk_struct 
Example 2
Project: gap   Author: sattree   File: utils.py    License: MIT License 6 votes vote down vote up
def get_edges_in_tree(parent, leaves=[], path='', edges=[], lrb_rrb_fix=False):
    for i, node in enumerate(parent):
        if type(node) is nltk.Tree:
            from_node = path
            to_node = '{}-{}-{}'.format(path, node.label(), i)
            edges.append((from_node, to_node))

            if lrb_rrb_fix:
                if node.label() == '-LRB-':
                    leaves.append('(')
                if node.label() == '-RRB-':
                    leaves.append(')')

            edges, leaves = get_edges_in_tree(node, leaves, to_node, edges)
        else:
            from_node = path
            to_node = '{}-{}'.format(node, len(leaves))
            edges.append((from_node, to_node))
            leaves.append(node)
    return edges, leaves 
Example 3
Project: self-attentive-parser   Author: nikitakit   File: nltk_plugin.py    License: MIT License 6 votes vote down vote up
def parse_sents(self, sents):
        """
        Parse multiple sentences

        If "sents" is a string, it will be segmented into sentences using NLTK.
        Otherwise, each element of "sents" will be treated as a sentence.

        sents (str or Iterable[str] or Iterable[List[str]]): sentences to parse

        Returns: Iter[nltk.Tree]
        """
        if isinstance(sents, STRING_TYPES):
            if self._tokenizer_lang is None:
                raise ValueError(
                    "No tokenizer available for this language. "
                    "Please split into individual sentences and tokens "
                    "before calling the parser."
                    )
            sents = nltk.sent_tokenize(sents, self._tokenizer_lang)

        for parse_raw, tags_raw, sentence in self._batched_parsed_raw(self._nltk_process_sents(sents)):
            yield self._make_nltk_tree(sentence, tags_raw, *parse_raw) 
Example 4
Project: magnitude   Author: plasticityai   File: constituency_parser.py    License: MIT License 6 votes vote down vote up
def decode(self, output_dict                         )                           :
        u"""
        Constructs an NLTK ``Tree`` given the scored spans. We also switch to exclusive
        span ends when constructing the tree representation, because it makes indexing
        into lists cleaner for ranges of text, rather than individual indices.

        Finally, for batch prediction, we will have padded spans and class probabilities.
        In order to make this less confusing, we remove all the padded spans and
        distributions from ``spans`` and ``class_probabilities`` respectively.
        """
        all_predictions = output_dict[u'class_probabilities'].cpu().data
        all_spans = output_dict[u"spans"].cpu().data

        all_sentences = output_dict[u"tokens"]
        all_pos_tags = output_dict[u"pos_tags"] if all(output_dict[u"pos_tags"]) else None
        num_spans = output_dict[u"num_spans"].data
        trees = self.construct_trees(all_predictions, all_spans, num_spans, all_sentences, all_pos_tags)

        batch_size = all_predictions.size(0)
        output_dict[u"spans"] = [all_spans[i, :num_spans[i]] for i in range(batch_size)]
        output_dict[u"class_probabilities"] = [all_predictions[i, :num_spans[i], :] for i in range(batch_size)]

        output_dict[u"trees"] = trees
        return output_dict 
Example 5
Project: luscan-devel   Author: blackye   File: regexp.py    License: GNU General Public License v2.0 6 votes vote down vote up
def parse(self, chunk_struct, trace=None):
        """
        Apply the chunk parser to this input.

        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
            (this tree is modified, and is also returned)
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :return: the chunked output.
        :rtype: Tree
        """
        if trace is None: trace = self._trace
        for i in range(self._loop):
            for parser in self._stages:
                chunk_struct = parser.parse(chunk_struct, trace=trace)
        return chunk_struct 
Example 6
Project: tetre   Author: aoldoni   File: tree_utils.py    License: MIT License 6 votes vote down vote up
def to_nltk_tree_general(node, attr_list=("dep_", "pos_"), level=99999):
    """Tranforms a Spacy dependency tree into an NLTK tree, with certain spacy tree node attributes serving
    as parts of the NLTK tree node label content for uniqueness.

    Args:
        node: The starting node from the tree in which the transformation will occur.
        attr_list: Which attributes from the Spacy nodes will be included in the NLTK node label.
        level: The maximum depth of the tree.

    Returns:
        A NLTK Tree (nltk.tree)
    """

    # transforms attributes in a node representation
    value_list = [getattr(node, attr) for attr in attr_list]
    node_representation = "/".join(value_list)

    if level == 0:
        return node_representation

    if node.n_lefts + node.n_rights > 0:
        return Tree(node_representation, [to_nltk_tree_general(child, attr_list, level-1) for child in node.children])
    else:
        return node_representation 
Example 7
Project: tetre   Author: aoldoni   File: tree_utils.py    License: MIT License 6 votes vote down vote up
def get_node_representation(tetre_format, token):
    """Given a format and a SpaCy node (spacy.token), returns this node representation using the NLTK tree (nltk.tree).
    It recursivelly builds a NLTK tree and returns it, not only the node itself.

    Args:
        tetre_format: The attributes of this node that will be part of its string representation.
        token: The SpaCy node itself (spacy.token).

    Returns:
        A NLTK Tree (nltk.tree)
    """

    params = tetre_format.split(",")
    node_representation = token.pos_

    if token.n_lefts + token.n_rights > 0:
        tree = Tree(node_representation,
                    [to_nltk_tree_general(child, attr_list=params, level=0) for child in token.children])
    else:
        tree = Tree(node_representation, [])

    return tree 
Example 8
Project: tetre   Author: aoldoni   File: tree_utils.py    License: MIT License 6 votes vote down vote up
def nltk_tree_to_qtree(tree):
    """Transforms a NLTK Tree in a QTREE. A QTREE is a string representation of a tree.

    For details, please see: http://www.ling.upenn.edu/advice/latex/qtree/qtreenotes.pdf

    Args:
        tree: The NLTK Tree (nltk.tree).

    Returns:
        A string with the QTREE representation of the NLTK Tree (nltk.tree).
    """
    self_result = " [ "

    if isinstance(tree, Tree):
        self_result += " " + tree.label() + " "

        if len(tree) > 0:
            self_result += " ".join([nltk_tree_to_qtree(node) for node in sorted(tree)])

    else:
        self_result += " " + str(tree) + " "

    self_result += " ] "

    return self_result 
Example 9
Project: allennlp-semparse   Author: allenai   File: domain_language.py    License: Apache License 2.0 6 votes vote down vote up
def nltk_tree_to_logical_form(tree: Tree) -> str:
    """
    Given an ``nltk.Tree`` representing the syntax tree that generates a logical form, this method
    produces the actual (lisp-like) logical form, with all of the non-terminal symbols converted
    into the correct number of parentheses.

    This is used in the logic that converts action sequences back into logical forms.  It's very
    unlikely that you will need this anywhere else.
    """
    # nltk.Tree actually inherits from `list`, so you use `len()` to get the number of children.
    # We're going to be explicit about checking length, instead of using `if tree:`, just to avoid
    # any funny business nltk might have done (e.g., it's really odd if `if tree:` evaluates to
    # `False` if there's a single leaf node with no children).
    if len(tree) == 0:
        return tree.label()
    if len(tree) == 1:
        return tree[0].label()
    return "(" + " ".join(nltk_tree_to_logical_form(child) for child in tree) + ")" 
Example 10
Project: PRPN-Analysis   Author: nyu-mll   File: data_nli.py    License: MIT License 6 votes vote down vote up
def add_words(self, file_name):
        # Add words to the dictionary
        f_in = open(file_name, 'r')
        for line in f_in:
            if line.strip() == '':
                continue 
            data = eval(line)
            sen_tree = Tree.fromstring(data['sentence1_parse'])
            words = self.filter_words(sen_tree)
            words = ['<s>'] + words + ['</s>']
            for word in words:
                self.dictionary.add_word(word)
            sen_tree = Tree.fromstring(data['sentence2_parse'])
            words = self.filter_words(sen_tree)
            words = ['<s>'] + words + ['</s>']
            for word in words:
                self.dictionary.add_word(word)
        f_in.close() 
Example 11
Project: hobbs   Author: cmward   File: hobbs.py    License: MIT License 6 votes vote down vote up
def bft(tree):
    """ Perform a breadth-first traversal of a tree.
    Return the nodes in a list in level-order.

    Args:
        tree: a tree node
    Returns:
        lst: a list of tree nodes in left-to-right level-order
    """
    lst = []
    queue = Queue.Queue()
    queue.put(tree)
    while not queue.empty():
        node = queue.get()
        lst.append(node)
        for child in node:
            if isinstance(child, nltk.Tree):
                queue.put(child)
    return lst 
Example 12
Project: hobbs   Author: cmward   File: hobbs.py    License: MIT License 6 votes vote down vote up
def traverse_tree(tree, pro):
    """ Traverse a tree in a left-to-right, breadth-first manner,
    proposing any NP encountered as an antecedent. Returns the 
    tree and the position of the first possible antecedent.

    Args:
        tree: the tree being searched
        pro: the pronoun being resolved (string)
    """
    # Initialize a queue and enqueue the root of the tree
    queue = Queue.Queue()
    queue.put(tree)
    while not queue.empty():
        node = queue.get()
        # if the node is an NP, return it as a potential antecedent
        if "NP" in node.label() and match(tree, get_pos(tree,node), pro):
            return tree, get_pos(tree, node)
        for child in node:
            if isinstance(child, nltk.Tree):
                queue.put(child)
    # if no antecedent is found, return None
    return None, None 
Example 13
Project: Lango   Author: ayoungprogrammer   File: matcher.py    License: GNU General Public License v2.0 6 votes vote down vote up
def match_rules_context(tree, rules, parent_context={}):
    """Recursively matches a Tree structure with rules and returns context

    Args:
        tree (Tree): Parsed tree structure
        rules (dict): See match_rules
        parent_context (dict): Context of parent call
    Returns:
        dict: Context matched dictionary of matched rules or
        None if no match
    """
    for template, match_rules in rules.items():
        context = parent_context.copy()
        if match_template(tree, template, context):
            for key, child_rules in match_rules.items():
                child_context = match_rules_context(context[key], child_rules, context)
                if child_context:
                    for k, v in child_context.items():
                        context[k] = v
                else:
                    return None
            return context
    return None 
Example 14
Project: Lango   Author: ayoungprogrammer   File: matcher.py    License: GNU General Public License v2.0 6 votes vote down vote up
def match_rules_context_multi(tree, rules, parent_context={}):
    """Recursively matches a Tree structure with rules and returns context

    Args:
        tree (Tree): Parsed tree structure
        rules (dict): See match_rules
        parent_context (dict): Context of parent call
    Returns:
        dict: Context matched dictionary of matched rules or
        None if no match
    """
    all_contexts = []
    for template, match_rules in rules.items():
        context = parent_context.copy()
        if match_template(tree, template, context):
            child_contextss = []
            if not match_rules:
                all_contexts += [context]
            else:
                for key, child_rules in match_rules.items():
                    child_contextss.append(match_rules_context_multi(context[key], child_rules, context))
                all_contexts += cross_context(child_contextss)    
    return all_contexts 
Example 15
Project: Lango   Author: ayoungprogrammer   File: matcher.py    License: GNU General Public License v2.0 6 votes vote down vote up
def match_template(tree, template, args=None):
    """Check if match string matches Tree structure
    
    Args:
        tree (Tree): Parsed Tree structure of a sentence
        template (str): String template to match. Example: "( S ( NP ) )"
    Returns:
        bool: If they match or not
    """
    tokens = get_tokens(template.split())
    cur_args = {}
    if match_tokens(tree, tokens, cur_args):
        if args is not None:
            for k, v in cur_args.items():
                args[k] = v
        logger.debug('MATCHED: {0}'.format(template))
        return True
    else:
        return False 
Example 16
Project: Lango   Author: ayoungprogrammer   File: matcher.py    License: GNU General Public License v2.0 6 votes vote down vote up
def get_object(tree):
    """Get the object in the tree object.
    
    Method should remove unnecessary letters and words::

        the
        a/an
        's

    Args:
        tree (Tree): Parsed tree structure
    Returns:
        Resulting string of tree ``(Ex: "red car")``
    """
    if isinstance(tree, Tree):
        if tree.label() == 'DT' or tree.label() == 'POS':
            return ''
        words = []
        for child in tree:
            words.append(get_object(child))
        return ' '.join([_f for _f in words if _f])
    else:
        return tree 
Example 17
Project: props   Author: gabrielStanovsky   File: tree.py    License: MIT License 6 votes vote down vote up
def _to_nltk_format(self):
        from nltk import Tree
        
        return Tree(self.parent_relation,
                   [Tree(self.pos,
                         [self.word] + [c._to_nltk_format() for c in self.children]  )])
                
#         from nltk import Tree
#         label = "({0}) {1} ({2})".format(self.parent_relation,self.word,self.pos)
#         if not self.children:
#             return label
#         return Tree(label,[c._to_nltk_format() for c in self.children])


    # Feature functions, should conform to naming _(PREDICATE/ARGUMENT)_FEATURE_(feature_name)
    # and return a tuple of (value,span)

    #return the head of the 
Example 18
Project: props   Author: gabrielStanovsky   File: tree.py    License: MIT License 6 votes vote down vote up
def find_tree_matches(tree,pat):
    """
    Get all subtrees matching pattern
    
    @type  tree: DepTree
    @param tree: tree in which to search for matches

    @type  pat: nltk.Tree
    @param pat: a pattern to match against tree
    
    @rtype:  list [unification of pat]
    @return: all possible unification of pat in tree
    """


    ret = []
    curMatch = tree.match(pat)
    if curMatch:
        ret.append(curMatch)
    for c in tree.children:
        ret.extend(find_tree_matches(c,pat))
    return ret 
Example 19
def parse(self, chunk_struct, trace=None):
        """
        Apply the chunk parser to this input.

        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
            (this tree is modified, and is also returned)
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :return: the chunked output.
        :rtype: Tree
        """
        if trace is None:
            trace = self._trace
        for i in range(self._loop):
            for parser in self._stages:
                chunk_struct = parser.parse(chunk_struct, trace=trace)
        return chunk_struct 
Example 20
Project: razzy-spinner   Author: rafasashi   File: regexp.py    License: GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, chunk_struct, debug_level=1):
        """
        Construct a new ``ChunkString`` that encodes the chunking of
        the text ``tagged_tokens``.

        :type chunk_struct: Tree
        :param chunk_struct: The chunk structure to be further chunked.
        :type debug_level: int
        :param debug_level: The level of debugging which should be
            applied to transformations on the ``ChunkString``.  The
            valid levels are:
                - 0: no checks
                - 1: full check on to_chunkstruct
                - 2: full check on to_chunkstruct and cursory check after
                   each transformation.
                - 3: full check on to_chunkstruct and full check after
                   each transformation.
            We recommend you use at least level 1.  You should
            probably use level 3 if you use any non-standard
            subclasses of ``RegexpChunkRule``.
        """
        self._root_label = chunk_struct.label()
        self._pieces = chunk_struct[:]
        tags = [self._tag(tok) for tok in self._pieces]
        self._str = '<' + '><'.join(tags) + '>'
        self._debug = debug_level 
Example 21
Project: razzy-spinner   Author: rafasashi   File: regexp.py    License: GNU General Public License v3.0 5 votes vote down vote up
def _tag(self, tok):
        if isinstance(tok, tuple):
            return tok[1]
        elif isinstance(tok, Tree):
            return tok.label()
        else:
            raise ValueError('chunk structures must contain tagged '
                             'tokens or trees') 
Example 22
Project: razzy-spinner   Author: rafasashi   File: regexp.py    License: GNU General Public License v3.0 5 votes vote down vote up
def to_chunkstruct(self, chunk_label='CHUNK'):
        """
        Return the chunk structure encoded by this ``ChunkString``.

        :rtype: Tree
        :raise ValueError: If a transformation has generated an
            invalid chunkstring.
        """
        if self._debug > 0: self._verify(self._str, 1)

        # Use this alternating list to create the chunkstruct.
        pieces = []
        index = 0
        piece_in_chunk = 0
        for piece in re.split('[{}]', self._str):

            # Find the list of tokens contained in this piece.
            length = piece.count('<')
            subsequence = self._pieces[index:index+length]

            # Add this list of tokens to our pieces.
            if piece_in_chunk:
                pieces.append(Tree(chunk_label, subsequence))
            else:
                pieces += subsequence

            # Update index, piece_in_chunk
            index += length
            piece_in_chunk = not piece_in_chunk

        return Tree(self._root_label, pieces) 
Example 23
Project: razzy-spinner   Author: rafasashi   File: regexp.py    License: GNU General Public License v3.0 5 votes vote down vote up
def parse(self, chunk_struct, trace=None):
        """
        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :rtype: Tree
        :return: a chunk structure that encodes the chunks in a given
            tagged sentence.  A chunk is a non-overlapping linguistic
            group, such as a noun phrase.  The set of chunks
            identified in the chunk structure depends on the rules
            used to define this ``RegexpChunkParser``.
        """
        if len(chunk_struct) == 0:
            print('Warning: parsing empty text')
            return Tree(self._root_label, [])

        try:
            chunk_struct.label()
        except AttributeError:
            chunk_struct = Tree(self._root_label, chunk_struct)

        # Use the default trace value?
        if trace is None: trace = self._trace

        chunkstr = ChunkString(chunk_struct)

        # Apply the sequence of rules to the chunkstring.
        if trace:
            verbose = (trace>1)
            self._trace_apply(chunkstr, verbose)
        else:
            self._notrace_apply(chunkstr)

        # Use the chunkstring to create a chunk structure.
        return chunkstr.to_chunkstruct(self._chunk_label) 
Example 24
Project: self-attentive-parser   Author: nikitakit   File: nltk_plugin.py    License: MIT License 5 votes vote down vote up
def _make_nltk_tree(self, sentence, tags, score, p_i, p_j, p_label):
        # The optimized cython decoder implementation doesn't actually
        # generate trees, only scores and span indices. When converting to a
        # tree, we assume that the indices follow a preorder traversal.
        last_splits = []

        # Python 2 doesn't support "nonlocal", so wrap idx in a list
        idx_cell = [-1]
        def make_tree():
            idx_cell[0] += 1
            idx = idx_cell[0]
            i, j, label_idx = p_i[idx], p_j[idx], p_label[idx]
            label = self._label_vocab[label_idx]
            if (i + 1) >= j:
                if self._provides_tags:
                    word = sentence[i]
                    tag = self._tag_vocab[tags[i]]
                else:
                    word, tag = sentence[i]
                tag = PTB_TOKEN_ESCAPE.get(tag, tag)
                word = PTB_TOKEN_ESCAPE.get(word, word)
                tree = Tree(tag, [word])
                for sublabel in label[::-1]:
                    tree = Tree(sublabel, [tree])
                return [tree]
            else:
                left_trees = make_tree()
                right_trees = make_tree()
                children = left_trees + right_trees
                if label:
                    tree = Tree(label[-1], children)
                    for sublabel in reversed(label[:-1]):
                        tree = Tree(sublabel, [tree])
                    return [tree]
                else:
                    return children

        tree = make_tree()[0]
        tree.score = score

        return tree 
Example 25
Project: self-attentive-parser   Author: nikitakit   File: nltk_plugin.py    License: MIT License 5 votes vote down vote up
def parse(self, sentence):
        """
        Parse a single sentence

        The argument "sentence" can be a list of tokens to be passed to the
        parser. It can also be a string, in which case the sentence will be
        tokenized using the default NLTK tokenizer.

        sentence (str or List[str]): sentence to parse

        Returns: nltk.Tree
        """
        return list(self.parse_sents([sentence]))[0] 
Example 26
Project: readAI   Author: ayoungprogrammer   File: readai.py    License: GNU General Public License v2.0 5 votes vote down vote up
def get_word(tree):
    if isinstance(tree, Tree):
        words = []
        for child in tree:
            words.append(get_word(child))
        return ' '.join(words)
    else:
        return tree 
Example 27
Project: readAI   Author: ayoungprogrammer   File: readai.py    License: GNU General Public License v2.0 5 votes vote down vote up
def flatten_tree(tree):
    # print tree
    if len(tree) > 0:
        if isinstance(tree[0], Tree):
            if isinstance(tree, Tree) and tree.label() == "NP":
                return [tree]
            tokens = []
            for child in tree:
                tokens += flatten_tree(child)
            return tokens
        else:
            return [tree]
    else:
        return [] 
Example 28
Project: magnitude   Author: plasticityai   File: world.py    License: MIT License 5 votes vote down vote up
def nltk_tree_to_logical_form(tree      )       :
    u"""
    Given an ``nltk.Tree`` representing the syntax tree that generates a logical form, this method
    produces the actual (lisp-like) logical form, with all of the non-terminal symbols converted
    into the correct number of parentheses.
    """
    # nltk.Tree actually inherits from `list`, so you use `len()` to get the number of children.
    # We're going to be explicit about checking length, instead of using `if tree:`, just to avoid
    # any funny business nltk might have done (e.g., it's really odd if `if tree:` evaluates to
    # `False` if there's a single leaf node with no children).
    if len(tree) == 0:  # pylint: disable=len-as-condition
        return tree.label()
    if len(tree) == 1:
        return tree[0].label()
    return u'(' + u' '.join(nltk_tree_to_logical_form(child) for child in tree) + u')' 
Example 29
Project: magnitude   Author: plasticityai   File: world.py    License: MIT License 5 votes vote down vote up
def get_logical_form(self,
                         action_sequence           ,
                         add_var_function       = True)       :
        u"""
        Takes an action sequence and constructs a logical form from it. This is useful if you want
        to get a logical form from a decoded sequence of actions generated by a transition based
        semantic parser.

        Parameters
        ----------
        action_sequence : ``List[str]``
            The sequence of actions as strings (eg.: ``['{START_SYMBOL} -> t', 't -> <e,t>', ...]``).
        add_var_function : ``bool`` (optional)
             ``var`` is a special function that some languages use within lambda functions to
             indicate the use of a variable (eg.: ``(lambda x (fb:row.row.year (var x)))``). Due to
             the way constrained decoding is currently implemented, it is easier for the decoder to
             not produce these functions. In that case, setting this flag adds the function in the
             logical form even though it is not present in the action sequence.
        """
        # Basic outline: we assume that the bracketing that we get in the RHS of each action is the
        # correct bracketing for reconstructing the logical form.  This is true when there is no
        # currying in the action sequence.  Given this assumption, we just need to construct a tree
        # from the action sequence, then output all of the leaves in the tree, with brackets around
        # the children of all non-terminal nodes.

        remaining_actions = [action.split(u" -> ") for action in action_sequence]
        tree = Tree(remaining_actions[0][1], [])

        try:
            remaining_actions = self._construct_node_from_actions(tree,
                                                                  remaining_actions[1:],
                                                                  add_var_function)
        except ParsingError:
            logger.error(u"Error parsing action sequence: %s", action_sequence)
            raise

        if remaining_actions:
            logger.error(u"Error parsing action sequence: %s", action_sequence)
            logger.error(u"Remaining actions were: %s", remaining_actions)
            raise ParsingError(u"Extra actions in action sequence")
        return nltk_tree_to_logical_form(tree) 
Example 30
Project: address_extraction   Author: bagrii   File: address_extract.py    License: MIT License 5 votes vote down vote up
def parse(self, tagged_sent):
        chunks = self.tagger.tag(tagged_sent)

        # Transform the result from [((w1, t1), iob1), ...]
        # to the preferred list of triplets format [(w1, t1, iob1), ...]
        iob_triplets = [(w, t, c) for ((w, t), c) in chunks]

        # Transform the list of triplets to nltk.Tree format
        return conlltags2tree(iob_triplets)