Python nltk.tree.Tree() Examples

The following are 30 code examples of nltk.tree.Tree(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module nltk.tree , or try the search function .
Example #1
Source File: rdparser_app.py    From luscan-devel with GNU General Public License v2.0 6 votes vote down vote up
def _backtrack(self, *e):
        if self._animating_lock: return
        if self._parser.backtrack():
            elt = self._parser.tree()
            for i in self._parser.frontier()[0]:
                elt = elt[i]
            self._lastoper1['text'] = 'Backtrack'
            self._lastoper2['text'] = ''
            if isinstance(elt, Tree):
                self._animate_backtrack(self._parser.frontier()[0])
            else:
                self._animate_match_backtrack(self._parser.frontier()[0])
            return 1
        else:
            self._autostep = 0
            self._lastoper1['text'] = 'Finished'
            self._lastoper2['text'] = ''
            return 0 
Example #2
Source File: toolbox.py    From razzy-spinner with GNU General Public License v3.0 6 votes vote down vote up
def _chunk_parse(self, grammar=None, root_label='record', trace=0, **kwargs):
        """
        Returns an element tree structure corresponding to a toolbox data file
        parsed according to the chunk grammar.

        :type grammar: str
        :param grammar: Contains the chunking rules used to parse the
            database.  See ``chunk.RegExp`` for documentation.
        :type root_label: str
        :param root_label: The node value that should be used for the
            top node of the chunk structure.
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            higher will generate verbose tracing output.
        :type kwargs: dict
        :param kwargs: Keyword arguments passed to ``toolbox.StandardFormat.fields()``
        :rtype: ElementTree._ElementInterface
        """
        from nltk import chunk
        from nltk.tree import Tree

        cp = chunk.RegexpParser(grammar, root_label=root_label, trace=trace)
        db = self.parse(**kwargs)
        tb_etree = Element('toolbox_data')
        header = db.find('header')
        tb_etree.append(header)
        for record in db.findall('record'):
            parsed = cp.parse([(elem.text, elem.tag) for elem in record])
            tb_etree.append(self._tree2etree(parsed))
        return tb_etree 
Example #3
Source File: regexp.py    From razzy-spinner with GNU General Public License v3.0 6 votes vote down vote up
def parse(self, chunk_struct, trace=None):
        """
        Apply the chunk parser to this input.

        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
            (this tree is modified, and is also returned)
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :return: the chunked output.
        :rtype: Tree
        """
        if trace is None: trace = self._trace
        for i in range(self._loop):
            for parser in self._stages:
                chunk_struct = parser.parse(chunk_struct, trace=trace)
        return chunk_struct 
Example #4
Source File: util.py    From razzy-spinner with GNU General Public License v3.0 6 votes vote down vote up
def tree2conlltags(t):
    """
    Return a list of 3-tuples containing ``(word, tag, IOB-tag)``.
    Convert a tree to the CoNLL IOB tag format.

    :param t: The tree to be converted.
    :type t: Tree
    :rtype: list(tuple)
    """

    tags = []
    for child in t:
        try:
            category = child.label()
            prefix = "B-"
            for contents in child:
                if isinstance(contents, Tree):
                    raise ValueError("Tree is too deeply nested to be printed in CoNLL format")
                tags.append((contents[0], contents[1], prefix+category))
                prefix = "I-"
        except AttributeError:
            tags.append((child[0], child[1], "O"))
    return tags 
Example #5
Source File: named_entity.py    From razzy-spinner with GNU General Public License v3.0 6 votes vote down vote up
def _tagged_to_parse(self, tagged_tokens):
        """
        Convert a list of tagged tokens to a chunk-parse tree.
        """
        sent = Tree('S', [])

        for (tok,tag) in tagged_tokens:
            if tag == 'O':
                sent.append(tok)
            elif tag.startswith('B-'):
                sent.append(Tree(tag[2:], [tok]))
            elif tag.startswith('I-'):
                if (sent and isinstance(sent[-1], Tree) and
                    sent[-1].label() == tag[2:]):
                    sent[-1].append(tok)
                else:
                    sent.append(Tree(tag[2:], [tok]))
        return sent 
Example #6
Source File: named_entity.py    From razzy-spinner with GNU General Public License v3.0 6 votes vote down vote up
def _parse_to_tagged(sent):
        """
        Convert a chunk-parse tree to a list of tagged tokens.
        """
        toks = []
        for child in sent:
            if isinstance(child, Tree):
                if len(child) == 0:
                    print("Warning -- empty chunk in sentence")
                    continue
                toks.append((child[0], 'B-%s' % child.label()))
                for tok in child[1:]:
                    toks.append((tok, 'I-%s' % child.label()))
            else:
                toks.append((child, 'O'))
        return toks 
Example #7
Source File: chart.py    From razzy-spinner with GNU General Public License v3.0 6 votes vote down vote up
def trees(self, edge, tree_class=Tree, complete=False):
        """
        Return an iterator of the tree structures that are associated
        with ``edge``.

        If ``edge`` is incomplete, then the unexpanded children will be
        encoded as childless subtrees, whose node value is the
        corresponding terminal or nonterminal.

        :rtype: list(Tree)
        :note: If two trees share a common subtree, then the same
            Tree may be used to encode that subtree in
            both trees.  If you need to eliminate this subtree
            sharing, then create a deep copy of each tree.
        """
        return iter(self._trees(edge, complete, memo={}, tree_class=tree_class)) 
Example #8
Source File: shiftreduce.py    From razzy-spinner with GNU General Public License v3.0 6 votes vote down vote up
def _shift(self, stack, remaining_text):
        """
        Move a token from the beginning of ``remaining_text`` to the
        end of ``stack``.

        :type stack: list(str and Tree)
        :param stack: A list of strings and Trees, encoding
            the structure of the text that has been parsed so far.
        :type remaining_text: list(str)
        :param remaining_text: The portion of the text that is not yet
            covered by ``stack``.
        :rtype: None
        """
        stack.append(remaining_text[0])
        remaining_text.remove(remaining_text[0])
        if self._trace: self._trace_shift(stack, remaining_text) 
Example #9
Source File: shiftreduce.py    From razzy-spinner with GNU General Public License v3.0 6 votes vote down vote up
def _trace_stack(self, stack, remaining_text, marker=' '):
        """
        Print trace output displaying the given stack and text.

        :rtype: None
        :param marker: A character that is printed to the left of the
            stack.  This is used with trace level 2 to print 'S'
            before shifted stacks and 'R' before reduced stacks.
        """
        s = '  '+marker+' [ '
        for elt in stack:
            if isinstance(elt, Tree):
                s += unicode_repr(Nonterminal(elt.label())) + ' '
            else:
                s += unicode_repr(elt) + ' '
        s += '* ' + ' '.join(remaining_text) + ']'
        print(s) 
Example #10
Source File: recursivedescent.py    From razzy-spinner with GNU General Public License v3.0 6 votes vote down vote up
def _production_to_tree(self, production):
        """
        :rtype: Tree
        :return: The Tree that is licensed by ``production``.
            In particular, given the production ``[lhs -> elt[1] ... elt[n]]``
            return a tree that has a node ``lhs.symbol``, and
            ``n`` children.  For each nonterminal element
            ``elt[i]`` in the production, the tree token has a
            childless subtree with node value ``elt[i].symbol``; and
            for each terminal element ``elt[j]``, the tree token has
            a leaf token with type ``elt[j]``.

        :param production: The CFG production that licenses the tree
            token that should be returned.
        :type production: Production
        """
        children = []
        for elt in production.rhs():
            if isinstance(elt, Nonterminal):
                children.append(Tree(elt.symbol(), []))
            else:
                # This will be matched.
                children.append(elt)
        return Tree(production.lhs().symbol(), children) 
Example #11
Source File: rdparser_app.py    From razzy-spinner with GNU General Public License v3.0 6 votes vote down vote up
def _backtrack(self, *e):
        if self._animating_lock: return
        if self._parser.backtrack():
            elt = self._parser.tree()
            for i in self._parser.frontier()[0]:
                elt = elt[i]
            self._lastoper1['text'] = 'Backtrack'
            self._lastoper2['text'] = ''
            if isinstance(elt, Tree):
                self._animate_backtrack(self._parser.frontier()[0])
            else:
                self._animate_match_backtrack(self._parser.frontier()[0])
            return True
        else:
            self._autostep = 0
            self._lastoper1['text'] = 'Finished'
            self._lastoper2['text'] = ''
            return False 
Example #12
Source File: parser.py    From atap with Apache License 2.0 6 votes vote down vote up
def nltk_spacy_tree(sent):
    """
    Visually inspect the SpaCy dependency tree with nltk.tree
    :param sent: string
    :return: None
    """
    doc = spacy_nlp(sent)
    def token_format(token):
        return "_".join([token.orth_, token.tag_, token.dep_])

    def to_nltk_tree(node):
        if node.n_lefts + node.n_rights > 0:
            return Tree(token_format(node),[to_nltk_tree(child) for child in node.children])
        else:
            return token_format(node)

    tree = [to_nltk_tree(sent.root) for sent in doc.sents]
    tree[0].draw() 
Example #13
Source File: tree.py    From tree2labels with MIT License 6 votes vote down vote up
def path_to_leaves(self, current_path, paths):

        for i, child in enumerate(self):
            
            pathi = []
            if isinstance(child,Tree):
                common_path = copy.deepcopy(current_path)
                
                common_path.append(child.label()+"-"+str(i))
                child.path_to_leaves(common_path, paths)
            else:
                for element in current_path:
                    pathi.append(element)
                pathi.append(child)
                paths.append(pathi)
    
        return paths 
Example #14
Source File: relextract.py    From razzy-spinner with GNU General Public License v3.0 6 votes vote down vote up
def ieer_headlines():

    from nltk.corpus import ieer
    from nltk.tree import Tree
    
    print("IEER: First 20 Headlines")
    print("=" * 45)  
    
    trees = [(doc.docno, doc.headline) for file in ieer.fileids() for doc in ieer.parsed_docs(file)]
    for tree in trees[:20]:
        print()
        print("%s:\n%s" % tree)



#############################################
## Dutch CONLL2002: take_on_role(PER, ORG
############################################# 
Example #15
Source File: conll.py    From razzy-spinner with GNU General Public License v3.0 6 votes vote down vote up
def _tree2conll(self, tree, wordnum, words, pos, synt):
        assert isinstance(tree, Tree)
        if len(tree) == 1 and isinstance(tree[0], compat.string_types):
            pos[wordnum] = tree.label()
            assert words[wordnum] == tree[0]
            return wordnum+1
        elif len(tree) == 1 and isinstance(tree[0], tuple):
            assert len(tree[0]) == 2
            pos[wordnum], pos[wordnum] = tree[0]
            return wordnum+1
        else:
            synt[wordnum] = '(%s%s' % (tree.label(), synt[wordnum])
            for child in tree:
                wordnum = self._tree2conll(child, wordnum, words,
                                                  pos, synt)
            synt[wordnum-1] += ')'
            return wordnum 
Example #16
Source File: recursivedescent.py    From razzy-spinner with GNU General Public License v3.0 6 votes vote down vote up
def initialize(self, tokens):
        """
        Start parsing a given text.  This sets the parser's tree to
        the start symbol, its frontier to the root node, and its
        remaining text to ``token['SUBTOKENS']``.
        """

        self._rtext = tokens
        start = self._grammar.start().symbol()
        self._tree = Tree(start, [])
        self._frontier = [()]
        self._tried_e = {}
        self._tried_m = {}
        self._history = []
        self._parses = []
        if self._trace:
            self._trace_start(self._tree, self._frontier, self._rtext) 
Example #17
Source File: recursivedescent.py    From razzy-spinner with GNU General Public License v3.0 6 votes vote down vote up
def _trace_fringe(self, tree, treeloc=None):
        """
        Print trace output displaying the fringe of ``tree``.  The
        fringe of ``tree`` consists of all of its leaves and all of
        its childless subtrees.

        :rtype: None
        """

        if treeloc == (): print("*", end=' ')
        if isinstance(tree, Tree):
            if len(tree) == 0:
                print(unicode_repr(Nonterminal(tree.label())), end=' ')
            for i in range(len(tree)):
                if treeloc is not None and i == treeloc[0]:
                    self._trace_fringe(tree[i], treeloc[1:])
                else:
                    self._trace_fringe(tree[i])
        else:
            print(unicode_repr(tree), end=' ') 
Example #18
Source File: pchart.py    From razzy-spinner with GNU General Public License v3.0 6 votes vote down vote up
def _setprob(self, tree, prod_probs):
        if tree.prob() is not None: return

        # Get the prob of the CFG production.
        lhs = Nonterminal(tree.label())
        rhs = []
        for child in tree:
            if isinstance(child, Tree):
                rhs.append(Nonterminal(child.label()))
            else:
                rhs.append(child)
        prob = prod_probs[lhs, tuple(rhs)]

        # Get the probs of children.
        for child in tree:
            if isinstance(child, Tree):
                self._setprob(child, prod_probs)
                prob *= child.prob()

        tree.set_prob(prob) 
Example #19
Source File: conll.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def _get_chunked_words(self, grid, chunk_types, tagset=None):
        # n.b.: this method is very similar to conllstr2tree.
        words = self._get_column(grid, self._colmap['words'])
        pos_tags = self._get_column(grid, self._colmap['pos'])
        if tagset and tagset != self._tagset:
            pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
        chunk_tags = self._get_column(grid, self._colmap['chunk'])

        stack = [Tree(self._root_label, [])]

        for (word, pos_tag, chunk_tag) in zip(words, pos_tags, chunk_tags):
            if chunk_tag == 'O':
                state, chunk_type = 'O', ''
            else:
                (state, chunk_type) = chunk_tag.split('-')
            # If it's a chunk we don't care about, treat it as O.
            if chunk_types is not None and chunk_type not in chunk_types:
                state = 'O'
            # Treat a mismatching I like a B.
            if state == 'I' and chunk_type != stack[-1].label():
                state = 'B'
            # For B or I: close any open chunks
            if state in 'BO' and len(stack) == 2:
                stack.pop()
            # For B: start a new chunk.
            if state == 'B':
                new_chunk = Tree(chunk_type, [])
                stack[-1].append(new_chunk)
                stack.append(new_chunk)
            # Add the word token.
            stack[-1].append((word, pos_tag))

        return stack[0] 
Example #20
Source File: propbank.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def treepos(self, tree):
        """
        Convert this pointer to a standard 'tree position' pointer,
        given that it points to the given tree.
        """
        if tree is None: raise ValueError('Parse tree not avaialable')
        stack = [tree]
        treepos = []

        wordnum = 0
        while True:
            #print treepos
            #print stack[-1]
            # tree node:
            if isinstance(stack[-1], Tree):
                # Select the next child.
                if len(treepos) < len(stack):
                    treepos.append(0)
                else:
                    treepos[-1] += 1
                # Update the stack.
                if treepos[-1] < len(stack[-1]):
                    stack.append(stack[-1][treepos[-1]])
                else:
                    # End of node's child list: pop up a level.
                    stack.pop()
                    treepos.pop()
            # word node:
            else:
                if wordnum == self.wordnum:
                    return tuple(treepos[:len(treepos)-self.height-1])
                else:
                    wordnum += 1
                    stack.pop() 
Example #21
Source File: propbank.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def select(self, tree):
        if tree is None: raise ValueError('Parse tree not avaialable')
        return Tree('*SPLIT*', [p.select(tree) for p in self.pieces]) 
Example #22
Source File: timit.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def phone_trees(self, utterances=None):
        if utterances is None: utterances = self._utterances
        if isinstance(utterances, compat.string_types): utterances = [utterances]

        trees = []
        for utterance in utterances:
            word_times = self.word_times(utterance)
            phone_times = self.phone_times(utterance)
            sent_times = self.sent_times(utterance)

            while sent_times:
                (sent, sent_start, sent_end) = sent_times.pop(0)
                trees.append(Tree('S', []))
                while (word_times and phone_times and
                       phone_times[0][2] <= word_times[0][1]):
                    trees[-1].append(phone_times.pop(0)[0])
                while word_times and word_times[0][2] <= sent_end:
                    (word, word_start, word_end) = word_times.pop(0)
                    trees[-1].append(Tree(word, []))
                    while phone_times and phone_times[0][2] <= word_end:
                        trees[-1][-1].append(phone_times.pop(0)[0])
                while phone_times and phone_times[0][2] <= sent_end:
                    trees[-1].append(phone_times.pop(0)[0])
        return trees

    # [xx] NOTE: This is currently broken -- we're assuming that the
    # fileids are WAV fileids (aka RIFF), but they're actually NIST SPHERE
    # fileids. 
Example #23
Source File: toolbox.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def _tree2etree(self, parent):
        from nltk.tree import Tree

        root = Element(parent.label())
        for child in parent:
            if isinstance(child, Tree):
                root.append(self._tree2etree(child))
            else:
                text, tag = child
                e = SubElement(root, tag)
                e.text = text
        return root 
Example #24
Source File: conll.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def pprint(self, include_tree=False):
        # Sanity check: trees should be the same
        for inst in self:
            if inst.tree != self.tree:
                raise ValueError('Tree mismatch!')

        # If desired, add trees:
        if include_tree:
            words = self.tree.leaves()
            pos = [None] * len(words)
            synt = ['*'] * len(words)
            self._tree2conll(self.tree, 0, words, pos, synt)

        s = ''
        for i in range(len(words)):
            # optional tree columns
            if include_tree:
                s += '%-20s ' % words[i]
                s += '%-8s ' % pos[i]
                s += '%15s*%-8s ' % tuple(synt[i].split('*'))

            # verb head column
            for inst in self:
                if i == inst.verb_head:
                    s += '%-20s ' % inst.verb_stem
                    break
            else:
                s += '%-20s ' % '-'
            # Remaining columns: self
            for inst in self:
                argstr = '*'
                for (start, end), argid in inst.tagged_spans:
                    if i==start: argstr = '(%s%s' % (argid, argstr)
                    if i==(end-1): argstr += ')'
                s += '%-12s ' % argstr
            s += '\n'
        return s 
Example #25
Source File: nombank.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def select(self, tree):
        if tree is None: raise ValueError('Parse tree not avaialable')
        return Tree('*SPLIT*', [p.select(tree) for p in self.pieces]) 
Example #26
Source File: util.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def conlltags2tree(sentence, chunk_types=('NP','PP','VP'),
                   root_label='S', strict=False):
    """
    Convert the CoNLL IOB format to a tree.
    """
    tree = Tree(root_label, [])
    for (word, postag, chunktag) in sentence:
        if chunktag is None:
            if strict:
                raise ValueError("Bad conll tag sequence")
            else:
                # Treat as O
                tree.append((word,postag))
        elif chunktag.startswith('B-'):
            tree.append(Tree(chunktag[2:], [(word,postag)]))
        elif chunktag.startswith('I-'):
            if (len(tree)==0 or not isinstance(tree[-1], Tree) or
                tree[-1].label() != chunktag[2:]):
                if strict:
                    raise ValueError("Bad conll tag sequence")
                else:
                    # Treat as B-*
                    tree.append(Tree(chunktag[2:], [(word,postag)]))
            else:
                tree[-1].append((word,postag))
        elif chunktag == 'O':
            tree.append((word,postag))
        else:
            raise ValueError("Bad conll tag %r" % chunktag)
    return tree 
Example #27
Source File: recursivedescent.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def parses(self):
        """
        :return: An iterator of the parses that have been found by this
            parser so far.
        :rtype: list of Tree
        """
        return iter(self._parses) 
Example #28
Source File: nombank.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def select(self, tree):
        if tree is None: raise ValueError('Parse tree not avaialable')
        return Tree('*CHAIN*', [p.select(tree) for p in self.pieces]) 
Example #29
Source File: conll.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def _get_parsed_sent(self, grid, pos_in_tree, tagset=None):
        words = self._get_column(grid, self._colmap['words'])
        pos_tags = self._get_column(grid, self._colmap['pos'])
        if tagset and tagset != self._tagset:
            pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
        parse_tags = self._get_column(grid, self._colmap['tree'])

        treestr = ''
        for (word, pos_tag, parse_tag) in zip(words, pos_tags, parse_tags):
            if word == '(': word = '-LRB-'
            if word == ')': word = '-RRB-'
            if pos_tag == '(': pos_tag = '-LRB-'
            if pos_tag == ')': pos_tag = '-RRB-'
            (left, right) = parse_tag.split('*')
            right = right.count(')')*')' # only keep ')'.
            treestr += '%s (%s %s) %s' % (left, pos_tag, word, right)
        try:
            tree = self._tree_class.parse(treestr)
        except (ValueError, IndexError):
            tree = self._tree_class.parse('(%s %s)' %
                                          (self._root_label, treestr))

        if not pos_in_tree:
            for subtree in tree.subtrees():
                for i, child in enumerate(subtree):
                    if (isinstance(child, Tree) and len(child)==1 and
                        isinstance(child[0], compat.string_types)):
                        subtree[i] = (child[0], child.label())

        return tree 
Example #30
Source File: nombank.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def treepos(self, tree):
        """
        Convert this pointer to a standard 'tree position' pointer,
        given that it points to the given tree.
        """
        if tree is None: raise ValueError('Parse tree not avaialable')
        stack = [tree]
        treepos = []

        wordnum = 0
        while True:
            #print treepos
            #print stack[-1]
            # tree node:
            if isinstance(stack[-1], Tree):
                # Select the next child.
                if len(treepos) < len(stack):
                    treepos.append(0)
                else:
                    treepos[-1] += 1
                # Update the stack.
                if treepos[-1] < len(stack[-1]):
                    stack.append(stack[-1][treepos[-1]])
                else:
                    # End of node's child list: pop up a level.
                    stack.pop()
                    treepos.pop()
            # word node:
            else:
                if wordnum == self.wordnum:
                    return tuple(treepos[:len(treepos)-self.height-1])
                else:
                    wordnum += 1
                    stack.pop()