Python nltk.Tree() Examples

The following are 30 code examples of nltk.Tree(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module nltk , or try the search function

Example #1

Source File: matcher.py From Lango with GNU General Public License v2.0

6 votes

def get_object(tree):
    """Get the object in the tree object.
    
    Method should remove unnecessary letters and words::

        the
        a/an
        's

    Args:
        tree (Tree): Parsed tree structure
    Returns:
        Resulting string of tree ``(Ex: "red car")``
    """
    if isinstance(tree, Tree):
        if tree.label() == 'DT' or tree.label() == 'POS':
            return ''
        words = []
        for child in tree:
            words.append(get_object(child))
        return ' '.join([_f for _f in words if _f])
    else:
        return tree

Example #2

Source File: nltk_plugin.py From self-attentive-parser with MIT License

6 votes

def parse_sents(self, sents):
        """
        Parse multiple sentences

        If "sents" is a string, it will be segmented into sentences using NLTK.
        Otherwise, each element of "sents" will be treated as a sentence.

        sents (str or Iterable[str] or Iterable[List[str]]): sentences to parse

        Returns: Iter[nltk.Tree]
        """
        if isinstance(sents, STRING_TYPES):
            if self._tokenizer_lang is None:
                raise ValueError(
                    "No tokenizer available for this language. "
                    "Please split into individual sentences and tokens "
                    "before calling the parser."
                    )
            sents = nltk.sent_tokenize(sents, self._tokenizer_lang)

        for parse_raw, tags_raw, sentence in self._batched_parsed_raw(self._nltk_process_sents(sents)):
            yield self._make_nltk_tree(sentence, tags_raw, *parse_raw)

Example #3

Source File: regexp.py From razzy-spinner with GNU General Public License v3.0

6 votes

def parse(self, chunk_struct, trace=None):
        """
        Apply the chunk parser to this input.

        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
            (this tree is modified, and is also returned)
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :return: the chunked output.
        :rtype: Tree
        """
        if trace is None: trace = self._trace
        for i in range(self._loop):
            for parser in self._stages:
                chunk_struct = parser.parse(chunk_struct, trace=trace)
        return chunk_struct

Example #4

Source File: tree.py From props with MIT License

6 votes

def _to_nltk_format(self):
        from nltk import Tree
        
        return Tree(self.parent_relation,
                   [Tree(self.pos,
                         [self.word] + [c._to_nltk_format() for c in self.children]  )])
                
#         from nltk import Tree
#         label = "({0}) {1} ({2})".format(self.parent_relation,self.word,self.pos)
#         if not self.children:
#             return label
#         return Tree(label,[c._to_nltk_format() for c in self.children])


    # Feature functions, should conform to naming _(PREDICATE/ARGUMENT)_FEATURE_(feature_name)
    # and return a tuple of (value,span)

    #return the head of the

Example #5

Source File: utils.py From gap with MIT License

6 votes

def get_edges_in_tree(parent, leaves=[], path='', edges=[], lrb_rrb_fix=False):
    for i, node in enumerate(parent):
        if type(node) is nltk.Tree:
            from_node = path
            to_node = '{}-{}-{}'.format(path, node.label(), i)
            edges.append((from_node, to_node))

            if lrb_rrb_fix:
                if node.label() == '-LRB-':
                    leaves.append('(')
                if node.label() == '-RRB-':
                    leaves.append(')')

            edges, leaves = get_edges_in_tree(node, leaves, to_node, edges)
        else:
            from_node = path
            to_node = '{}-{}'.format(node, len(leaves))
            edges.append((from_node, to_node))
            leaves.append(node)
    return edges, leaves

Example #6

Source File: regexp.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International

6 votes

def parse(self, chunk_struct, trace=None):
        """
        Apply the chunk parser to this input.

        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
            (this tree is modified, and is also returned)
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :return: the chunked output.
        :rtype: Tree
        """
        if trace is None:
            trace = self._trace
        for i in range(self._loop):
            for parser in self._stages:
                chunk_struct = parser.parse(chunk_struct, trace=trace)
        return chunk_struct

Example #7

Source File: tree.py From props with MIT License

6 votes

def find_tree_matches(tree,pat):
    """
    Get all subtrees matching pattern
    
    @type  tree: DepTree
    @param tree: tree in which to search for matches

    @type  pat: nltk.Tree
    @param pat: a pattern to match against tree
    
    @rtype:  list [unification of pat]
    @return: all possible unification of pat in tree
    """


    ret = []
    curMatch = tree.match(pat)
    if curMatch:
        ret.append(curMatch)
    for c in tree.children:
        ret.extend(find_tree_matches(c,pat))
    return ret

Example #8

Source File: matcher.py From Lango with GNU General Public License v2.0

6 votes

def match_template(tree, template, args=None):
    """Check if match string matches Tree structure
    
    Args:
        tree (Tree): Parsed Tree structure of a sentence
        template (str): String template to match. Example: "( S ( NP ) )"
    Returns:
        bool: If they match or not
    """
    tokens = get_tokens(template.split())
    cur_args = {}
    if match_tokens(tree, tokens, cur_args):
        if args is not None:
            for k, v in cur_args.items():
                args[k] = v
        logger.debug('MATCHED: {0}'.format(template))
        return True
    else:
        return False

Example #9

Source File: constituency_parser.py From magnitude with MIT License

6 votes

def decode(self, output_dict                         )                           :
        u"""
        Constructs an NLTK ``Tree`` given the scored spans. We also switch to exclusive
        span ends when constructing the tree representation, because it makes indexing
        into lists cleaner for ranges of text, rather than individual indices.

        Finally, for batch prediction, we will have padded spans and class probabilities.
        In order to make this less confusing, we remove all the padded spans and
        distributions from ``spans`` and ``class_probabilities`` respectively.
        """
        all_predictions = output_dict[u'class_probabilities'].cpu().data
        all_spans = output_dict[u"spans"].cpu().data

        all_sentences = output_dict[u"tokens"]
        all_pos_tags = output_dict[u"pos_tags"] if all(output_dict[u"pos_tags"]) else None
        num_spans = output_dict[u"num_spans"].data
        trees = self.construct_trees(all_predictions, all_spans, num_spans, all_sentences, all_pos_tags)

        batch_size = all_predictions.size(0)
        output_dict[u"spans"] = [all_spans[i, :num_spans[i]] for i in range(batch_size)]
        output_dict[u"class_probabilities"] = [all_predictions[i, :num_spans[i], :] for i in range(batch_size)]

        output_dict[u"trees"] = trees
        return output_dict

Example #10

Source File: matcher.py From Lango with GNU General Public License v2.0

6 votes

def match_rules_context_multi(tree, rules, parent_context={}):
    """Recursively matches a Tree structure with rules and returns context

    Args:
        tree (Tree): Parsed tree structure
        rules (dict): See match_rules
        parent_context (dict): Context of parent call
    Returns:
        dict: Context matched dictionary of matched rules or
        None if no match
    """
    all_contexts = []
    for template, match_rules in rules.items():
        context = parent_context.copy()
        if match_template(tree, template, context):
            child_contextss = []
            if not match_rules:
                all_contexts += [context]
            else:
                for key, child_rules in match_rules.items():
                    child_contextss.append(match_rules_context_multi(context[key], child_rules, context))
                all_contexts += cross_context(child_contextss)    
    return all_contexts

Example #11

Source File: matcher.py From Lango with GNU General Public License v2.0

6 votes

def match_rules_context(tree, rules, parent_context={}):
    """Recursively matches a Tree structure with rules and returns context

    Args:
        tree (Tree): Parsed tree structure
        rules (dict): See match_rules
        parent_context (dict): Context of parent call
    Returns:
        dict: Context matched dictionary of matched rules or
        None if no match
    """
    for template, match_rules in rules.items():
        context = parent_context.copy()
        if match_template(tree, template, context):
            for key, child_rules in match_rules.items():
                child_context = match_rules_context(context[key], child_rules, context)
                if child_context:
                    for k, v in child_context.items():
                        context[k] = v
                else:
                    return None
            return context
    return None

Example #12

Source File: regexp.py From luscan-devel with GNU General Public License v2.0

6 votes

def parse(self, chunk_struct, trace=None):
        """
        Apply the chunk parser to this input.

        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
            (this tree is modified, and is also returned)
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :return: the chunked output.
        :rtype: Tree
        """
        if trace is None: trace = self._trace
        for i in range(self._loop):
            for parser in self._stages:
                chunk_struct = parser.parse(chunk_struct, trace=trace)
        return chunk_struct

Example #13

Source File: tree_utils.py From tetre with MIT License

6 votes

def to_nltk_tree_general(node, attr_list=("dep_", "pos_"), level=99999):
    """Tranforms a Spacy dependency tree into an NLTK tree, with certain spacy tree node attributes serving
    as parts of the NLTK tree node label content for uniqueness.

    Args:
        node: The starting node from the tree in which the transformation will occur.
        attr_list: Which attributes from the Spacy nodes will be included in the NLTK node label.
        level: The maximum depth of the tree.

    Returns:
        A NLTK Tree (nltk.tree)
    """

    # transforms attributes in a node representation
    value_list = [getattr(node, attr) for attr in attr_list]
    node_representation = "/".join(value_list)

    if level == 0:
        return node_representation

    if node.n_lefts + node.n_rights > 0:
        return Tree(node_representation, [to_nltk_tree_general(child, attr_list, level-1) for child in node.children])
    else:
        return node_representation

Example #14

Source File: tree_utils.py From tetre with MIT License

6 votes

def get_node_representation(tetre_format, token):
    """Given a format and a SpaCy node (spacy.token), returns this node representation using the NLTK tree (nltk.tree).
    It recursivelly builds a NLTK tree and returns it, not only the node itself.

    Args:
        tetre_format: The attributes of this node that will be part of its string representation.
        token: The SpaCy node itself (spacy.token).

    Returns:
        A NLTK Tree (nltk.tree)
    """

    params = tetre_format.split(",")
    node_representation = token.pos_

    if token.n_lefts + token.n_rights > 0:
        tree = Tree(node_representation,
                    [to_nltk_tree_general(child, attr_list=params, level=0) for child in token.children])
    else:
        tree = Tree(node_representation, [])

    return tree

Example #15

Source File: tree_utils.py From tetre with MIT License

6 votes

def nltk_tree_to_qtree(tree):
    """Transforms a NLTK Tree in a QTREE. A QTREE is a string representation of a tree.

    For details, please see: http://www.ling.upenn.edu/advice/latex/qtree/qtreenotes.pdf

    Args:
        tree: The NLTK Tree (nltk.tree).

    Returns:
        A string with the QTREE representation of the NLTK Tree (nltk.tree).
    """
    self_result = " [ "

    if isinstance(tree, Tree):
        self_result += " " + tree.label() + " "

        if len(tree) > 0:
            self_result += " ".join([nltk_tree_to_qtree(node) for node in sorted(tree)])

    else:
        self_result += " " + str(tree) + " "

    self_result += " ] "

    return self_result

Example #16

Source File: hobbs.py From hobbs with MIT License

6 votes

def traverse_tree(tree, pro):
    """ Traverse a tree in a left-to-right, breadth-first manner,
    proposing any NP encountered as an antecedent. Returns the 
    tree and the position of the first possible antecedent.

    Args:
        tree: the tree being searched
        pro: the pronoun being resolved (string)
    """
    # Initialize a queue and enqueue the root of the tree
    queue = Queue.Queue()
    queue.put(tree)
    while not queue.empty():
        node = queue.get()
        # if the node is an NP, return it as a potential antecedent
        if "NP" in node.label() and match(tree, get_pos(tree,node), pro):
            return tree, get_pos(tree, node)
        for child in node:
            if isinstance(child, nltk.Tree):
                queue.put(child)
    # if no antecedent is found, return None
    return None, None

Example #17

Source File: hobbs.py From hobbs with MIT License

6 votes

def bft(tree):
    """ Perform a breadth-first traversal of a tree.
    Return the nodes in a list in level-order.

    Args:
        tree: a tree node
    Returns:
        lst: a list of tree nodes in left-to-right level-order
    """
    lst = []
    queue = Queue.Queue()
    queue.put(tree)
    while not queue.empty():
        node = queue.get()
        lst.append(node)
        for child in node:
            if isinstance(child, nltk.Tree):
                queue.put(child)
    return lst

Example #18

Source File: domain_language.py From allennlp-semparse with Apache License 2.0

6 votes

def nltk_tree_to_logical_form(tree: Tree) -> str:
    """
    Given an ``nltk.Tree`` representing the syntax tree that generates a logical form, this method
    produces the actual (lisp-like) logical form, with all of the non-terminal symbols converted
    into the correct number of parentheses.

    This is used in the logic that converts action sequences back into logical forms.  It's very
    unlikely that you will need this anywhere else.
    """
    # nltk.Tree actually inherits from `list`, so you use `len()` to get the number of children.
    # We're going to be explicit about checking length, instead of using `if tree:`, just to avoid
    # any funny business nltk might have done (e.g., it's really odd if `if tree:` evaluates to
    # `False` if there's a single leaf node with no children).
    if len(tree) == 0:
        return tree.label()
    if len(tree) == 1:
        return tree[0].label()
    return "(" + " ".join(nltk_tree_to_logical_form(child) for child in tree) + ")"

Example #19

Source File: data_nli.py From PRPN-Analysis with MIT License

6 votes

def add_words(self, file_name):
        # Add words to the dictionary
        f_in = open(file_name, 'r')
        for line in f_in:
            if line.strip() == '':
                continue 
            data = eval(line)
            sen_tree = Tree.fromstring(data['sentence1_parse'])
            words = self.filter_words(sen_tree)
            words = ['<s>'] + words + ['</s>']
            for word in words:
                self.dictionary.add_word(word)
            sen_tree = Tree.fromstring(data['sentence2_parse'])
            words = self.filter_words(sen_tree)
            words = ['<s>'] + words + ['</s>']
            for word in words:
                self.dictionary.add_word(word)
        f_in.close()

Example #20

Source File: tree.py From props with MIT License

5 votes

def _VERBAL_PREDICATE_SUBTREE_Adv(self):
        pat = Tree("self.parent_relation=='advmod'",
               [Tree("(self.parent_relation=='advmod') and (len(self.children)==0)",[]),
                Tree("$+(self.parent_relation=='ccomp')",
                     [Tree("(self.parent_relation=='mark') and (len(self.children)==0)",[])])])
        # pat identifies instances such as "as much as"
        children_copy = self.children
        adverb_children,non_adverb_children = double_filter(adverb_child_func,self.children)
        self.adverb_children = []
        for curChild in adverb_children:
            ls = find_tree_matches(tree=curChild,pat=pat)
            if ls:
                top = ls[0][0]
                advmod = ls[0][1][0]
                ccomp = ls[0][2][0]
                mark = ls[0][2][1][0]
                self.adverb_children.append((ccomp,
                                             [(t.id,t.word) for t in [advmod,top,mark]]))
                ccomp.children.remove(mark)
                top.children.remove(advmod)
                
            elif curChild.parent_relation != 'advcl':
                self.adverb_children.append((curChild,False))
            else:
                non_adverb_children.append(curChild)
        
            
        self.children = non_adverb_children
        self.adverb_subj = copy.copy(self)
        self.children = children_copy
        return self.adverb_children
    
    # if this predicate is a clausal complement of its ancestor and it has a TO child, it is defined as an infinitive

Example #21

Source File: regexp.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International

5 votes

def __init__(self, chunk_struct, debug_level=1):
        """
        Construct a new ``ChunkString`` that encodes the chunking of
        the text ``tagged_tokens``.

        :type chunk_struct: Tree
        :param chunk_struct: The chunk structure to be further chunked.
        :type debug_level: int
        :param debug_level: The level of debugging which should be
            applied to transformations on the ``ChunkString``.  The
            valid levels are:
                - 0: no checks
                - 1: full check on to_chunkstruct
                - 2: full check on to_chunkstruct and cursory check after
                   each transformation.
                - 3: full check on to_chunkstruct and full check after
                   each transformation.
            We recommend you use at least level 1.  You should
            probably use level 3 if you use any non-standard
            subclasses of ``RegexpChunkRule``.
        """
        self._root_label = chunk_struct.label()
        self._pieces = chunk_struct[:]
        tags = [self._tag(tok) for tok in self._pieces]
        self._str = '<' + '><'.join(tags) + '>'
        self._debug = debug_level

Example #22

Source File: domain_language.py From allennlp-semparse with Apache License 2.0

5 votes

def action_sequence_to_logical_form(self, action_sequence: List[str]) -> str:
        """
        Takes an action sequence as produced by :func:`logical_form_to_action_sequence`, which is a
        linearization of an abstract syntax tree, and reconstructs the logical form defined by that
        abstract syntax tree.
        """
        # Basic outline: we assume that the bracketing that we get in the RHS of each action is the
        # correct bracketing for reconstructing the logical form.  This is true when there is no
        # currying in the action sequence.  Given this assumption, we just need to construct a tree
        # from the action sequence, then output all of the leaves in the tree, with brackets around
        # the children of all non-terminal nodes.

        remaining_actions = [action.split(" -> ") for action in action_sequence]
        tree = Tree(remaining_actions[0][1], [])

        try:
            remaining_actions = self._construct_node_from_actions(tree, remaining_actions[1:])
        except ParsingError:
            logger.error("Error parsing action sequence: %s", action_sequence)
            raise

        if remaining_actions:
            logger.error("Error parsing action sequence: %s", action_sequence)
            logger.error("Remaining actions were: %s", remaining_actions)
            raise ParsingError("Extra actions in action sequence")
        return nltk_tree_to_logical_form(tree)

Example #23

Source File: semantic_types.py From ccg2lambda with Apache License 2.0

5 votes

def remove_labels_and_unaries(tree):
    assert isinstance(tree, Tree)
    leaf_treepos = tree.treepositions(order='leaves')
    for p in tree.treepositions():
        if p not in leaf_treepos and p != ():
            tree[p].set_label('')
            if len(tree[p]) == 1:
                tree[p] = tree[p][0]
    return str(tree)

Example #24

Source File: hobbs.py From hobbs with MIT License

5 votes

def count_np_nodes(tree):
    """ Function from class to count NP nodes.
    """
    np_count = 0
    if not isinstance(tree, nltk.Tree):
        return 0
    elif "NP" in tree.label() and tree.label() not in nominal_labels:
        return 1 + sum(count_np_nodes(c) for c in tree)
    else:
        return sum(count_np_nodes(c) for c in tree)

Example #25

Source File: test_phrase_grammar.py From Ordered-Neurons with BSD 3-Clause "New" or "Revised" License

5 votes

def get_brackets(tree, idx=0):
    brackets = set()
    if isinstance(tree, list) or isinstance(tree, nltk.Tree):
        for node in tree:
            node_brac, next_idx = get_brackets(node, idx)
            if next_idx - idx > 1:
                brackets.add((idx, next_idx))
                brackets.update(node_brac)
            idx = next_idx
        return brackets, idx
    else:
        return brackets, idx + 1

Example #26

Source File: test_phrase_grammar.py From Ordered-Neurons with BSD 3-Clause "New" or "Revised" License

5 votes

def MRG_labeled(tr):
    if isinstance(tr, nltk.Tree):
        if tr.label() in word_tags:
            return tr.leaves()[0] + ' '
        else:
            s = '(%s ' % (re.split(r'[-=]', tr.label())[0])
            for subtr in tr:
                s += MRG_labeled(subtr)
            s += ') '
            return s
    else:
        return ''

Example #27

Source File: data_ptb.py From PRPN with MIT License

5 votes

def tokenize(self, file_ids):

        def tree2list(tree):
            if isinstance(tree, nltk.Tree):
                if tree.label() in word_tags:
                    return tree.leaves()[0]
                else:
                    root = []
                    for child in tree:
                        c = tree2list(child)
                        if c != []:
                            root.append(c)
                    if len(root) > 1:
                        return root
                    elif len(root) == 1:
                        return root[0]
            return []

        sens_idx = []
        sens = []
        trees = []
        for id in file_ids:
            sentences = ptb.parsed_sents(id)
            for sen_tree in sentences:
                words = self.filter_words(sen_tree)
                words = ['<s>'] + words + ['</s>']
                # if len(words) > 50:
                #     continue
                sens.append(words)
                idx = []
                for word in words:
                    idx.append(self.dictionary[word])
                sens_idx.append(torch.LongTensor(idx))
                trees.append(tree2list(sen_tree))

        return sens_idx, sens, trees

Example #28

Source File: test_phrase_grammar.py From PRPN with MIT License

5 votes

def get_brackets(tree, idx=0):
    brackets = set()
    if isinstance(tree, list) or isinstance(tree, nltk.Tree):
        for node in tree:
            node_brac, next_idx = get_brackets(node, idx)
            if next_idx - idx > 1:
                brackets.add((idx, next_idx))
                brackets.update(node_brac)
            idx = next_idx
        return brackets, idx
    else:
        return brackets, idx + 1

Example #29

Source File: data_ptb.py From Ordered-Neurons with BSD 3-Clause "New" or "Revised" License

5 votes

def tokenize(self, file_ids):

        def tree2list(tree):
            if isinstance(tree, nltk.Tree):
                if tree.label() in word_tags:
                    w = tree.leaves()[0].lower()
                    w = re.sub('[0-9]+', 'N', w)
                    return w
                else:
                    root = []
                    for child in tree:
                        c = tree2list(child)
                        if c != []:
                            root.append(c)
                    if len(root) > 1:
                        return root
                    elif len(root) == 1:
                        return root[0]
            return []

        sens_idx = []
        sens = []
        trees = []
        nltk_trees = []
        for id in file_ids:
            sentences = ptb.parsed_sents(id)
            for sen_tree in sentences:
                words = self.filter_words(sen_tree)
                words = ['<eos>'] + words + ['<eos>']
                # if len(words) > 50:
                #     continue
                sens.append(words)
                idx = []
                for word in words:
                    idx.append(self.dictionary[word])
                sens_idx.append(torch.LongTensor(idx))
                trees.append(tree2list(sen_tree))
                nltk_trees.append(sen_tree)

        return sens_idx, sens, trees, nltk_trees

Example #30

Source File: regexp.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International

5 votes

def to_chunkstruct(self, chunk_label='CHUNK'):
        """
        Return the chunk structure encoded by this ``ChunkString``.

        :rtype: Tree
        :raise ValueError: If a transformation has generated an
            invalid chunkstring.
        """
        if self._debug > 0:
            self._verify(self._str, 1)

        # Use this alternating list to create the chunkstruct.
        pieces = []
        index = 0
        piece_in_chunk = 0
        for piece in re.split('[{}]', self._str):

            # Find the list of tokens contained in this piece.
            length = piece.count('<')
            subsequence = self._pieces[index : index + length]

            # Add this list of tokens to our pieces.
            if piece_in_chunk:
                pieces.append(Tree(chunk_label, subsequence))
            else:
                pieces += subsequence

            # Update index, piece_in_chunk
            index += length
            piece_in_chunk = not piece_in_chunk

        return Tree(self._root_label, pieces)