Python nltk.tree.Tree.fromstring() Examples

The following are 30 code examples of nltk.tree.Tree.fromstring(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module nltk.tree.Tree , or try the search function

Example #1

Source File: discourse_parsing.py From discourse-parsing with MIT License

6 votes

def initialize_edu_data(edus):
        '''
        Create a representation of the list of EDUS that make up the input.
        '''

        wnum = 0  # counter for distance features
        res = []
        for edu_index, edu in enumerate(edus):
            # lowercase all words
            edu_words = [x[0].lower() for x in edu]
            edu_pos_tags = [x[1] for x in edu]

            # make a dictionary for each EDU
            new_tree = Tree.fromstring('(text)')
            new_tree.append('{}'.format(edu_index))
            tmp_item = {"head_idx": wnum,
                        "start_idx": wnum,
                        "end_idx": wnum,
                        "nt": "text",
                        "head": edu_words,
                        "hpos": edu_pos_tags,
                        "tree": new_tree}
            wnum += 1
            res.append(tmp_item)
        return res

Example #2

Source File: test_oracle.py From pytorch-rnng with MIT License

6 votes

def test_from_tree(self):
        s = '(S (NP (NNP John)) (VP (VBZ loves) (NP (NNP Mary))))'
        expected_actions = [
            NT('S'),
            NT('NP'),
            SHIFT,
            REDUCE,
            NT('VP'),
            SHIFT,
            NT('NP'),
            SHIFT,
            REDUCE,
            REDUCE,
            REDUCE,
        ]
        expected_pos_tags = ['NNP', 'VBZ', 'NNP']
        expected_words = ['John', 'loves', 'Mary']

        oracle = DiscOracle.from_tree(Tree.fromstring(s))

        assert isinstance(oracle, DiscOracle)
        assert oracle.actions == expected_actions
        assert oracle.pos_tags == expected_pos_tags
        assert oracle.words == expected_words

Example #3

Source File: test_oracle.py From pytorch-rnng with MIT License

6 votes

def test_from_tree(self):
        s = '(S (NP (NNP John)) (VP (VBZ loves) (NP (NNP Mary))))'
        expected_actions = [
            NT('S'),
            NT('NP'),
            GEN('John'),
            REDUCE,
            NT('VP'),
            GEN('loves'),
            NT('NP'),
            GEN('Mary'),
            REDUCE,
            REDUCE,
            REDUCE
        ]
        expected_words = ['John', 'loves', 'Mary']
        expected_pos_tags = ['NNP', 'VBZ', 'NNP']

        oracle = GenOracle.from_tree(Tree.fromstring(s))

        assert isinstance(oracle, GenOracle)
        assert oracle.actions == expected_actions
        assert oracle.words == expected_words
        assert oracle.pos_tags == expected_pos_tags

Example #4

Source File: stanford_sentiment_tree_bank.py From magnitude with MIT License

6 votes

def _read(self, file_path):
        with open(cached_path(file_path), u"r") as data_file:
            logger.info(u"Reading instances from lines in file at: %s", file_path)
            for line in data_file.readlines():
                line = line.strip(u"\n")
                if not line:
                    continue
                parsed_line = Tree.fromstring(line)
                if self._use_subtrees:
                    for subtree in parsed_line.subtrees():
                        instance = self.text_to_instance(subtree.leaves(), subtree.label())
                        if instance is not None:
                            yield instance
                else:
                    instance = self.text_to_instance(parsed_line.leaves(), parsed_line.label())
                    if instance is not None:
                        yield instance

    #overrides

Example #5

Source File: smt.py From PyTorch-NLP with BSD 3-Clause "New" or "Revised" License

6 votes

def parse_tree(data, subtrees=False, fine_grained=False):
    # https://github.com/pytorch/text/blob/6476392a801f51794c90378dd23489578896c6f2/torchtext/data/example.py#L56
    try:
        from nltk.tree import Tree
    except ImportError:
        print("Please install NLTK. " "See the docs at http://nltk.org for more information.")
        raise
    tree = Tree.fromstring(data)

    if subtrees:
        return [{
            'text': ' '.join(t.leaves()),
            'label': get_label_str(t.label(), fine_grained=fine_grained)
        } for t in tree.subtrees()]

    return {
        'text': ' '.join(tree.leaves()),
        'label': get_label_str(tree.label(), fine_grained=fine_grained)
    }

Example #6

Source File: bracket_parse.py From razzy-spinner with GNU General Public License v3.0

6 votes

def _parse(self, t):
        try:
            return Tree.fromstring(self._normalize(t))

        except ValueError as e:
            sys.stderr.write("Bad tree detected; trying to recover...\n")
            # Try to recover, if we can:
            if e.args == ('mismatched parens',):
                for n in range(1, 5):
                    try:
                        v = Tree(self._normalize(t+')'*n))
                        sys.stderr.write("  Recovered by adding %d close "
                                         "paren(s)\n" % n)
                        return v
                    except ValueError: pass
            # Try something else:
            sys.stderr.write("  Recovered by returning a flat parse.\n")
            #sys.stderr.write(' '.join(t.split())+'\n')
            return Tree('S', self._tag(t))

Example #7

Source File: bracket_parse.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International

6 votes

def _parse(self, t):
        try:
            return Tree.fromstring(self._normalize(t))

        except ValueError as e:
            sys.stderr.write("Bad tree detected; trying to recover...\n")
            # Try to recover, if we can:
            if e.args == ('mismatched parens',):
                for n in range(1, 5):
                    try:
                        v = Tree(self._normalize(t + ')' * n))
                        sys.stderr.write(
                            "  Recovered by adding %d close " "paren(s)\n" % n
                        )
                        return v
                    except ValueError:
                        pass
            # Try something else:
            sys.stderr.write("  Recovered by returning a flat parse.\n")
            # sys.stderr.write(' '.join(t.split())+'\n')
            return Tree('S', self._tag(t))

Example #8

Source File: corenlp.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International

5 votes

def make_tree(self, result):
        return Tree.fromstring(result['parse'])

Example #9

Source File: stanford.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International

5 votes

def _make_tree(self, result):
        return Tree.fromstring(result)

Example #10

Source File: bllip.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International

5 votes

def _scored_parse_to_nltk_tree(scored_parse):
    return Tree.fromstring(str(scored_parse.ptb_parse))

Example #11

Source File: treeprettyprinter.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International

5 votes

def test():
    """Do some tree drawing tests."""

    def print_tree(n, tree, sentence=None, ansi=True, **xargs):
        print()
        print('{0}: "{1}"'.format(n, ' '.join(sentence or tree.leaves())))
        print(tree)
        print()
        drawtree = TreePrettyPrinter(tree, sentence)
        try:
            print(drawtree.text(unicodelines=ansi, ansi=ansi, **xargs))
        except (UnicodeDecodeError, UnicodeEncodeError):
            print(drawtree.text(unicodelines=False, ansi=False, **xargs))

    from nltk.corpus import treebank

    for n in [0, 1440, 1591, 2771, 2170]:
        tree = treebank.parsed_sents()[n]
        print_tree(n, tree, nodedist=2, maxwidth=8)
    print()
    print('ASCII version:')
    print(TreePrettyPrinter(tree).text(nodedist=2))

    tree = Tree.fromstring(
        '(top (punct 8) (smain (noun 0) (verb 1) (inf (verb 5) (inf (verb 6) '
        '(conj (inf (pp (prep 2) (np (det 3) (noun 4))) (verb 7)) (inf (verb 9)) '
        '(vg 10) (inf (verb 11)))))) (punct 12))',
        read_leaf=int,
    )
    sentence = (
        'Ze had met haar moeder kunnen gaan winkelen ,'
        ' zwemmen of terrassen .'.split()
    )
    print_tree('Discontinuous tree', tree, sentence, nodedist=2)

Example #12

Source File: tree.py From DPLP with MIT License

5 votes

def parse(self):
        """ Get parse tree in string format

            For visualization, use nltk.tree:
            from nltk.tree import Tree
            t = Tree.fromstring(parse)
            t.draw()
        """
        parse = getparse(self.tree, "")
        return parse

Example #13

Source File: parser.py From Lango with GNU General Public License v2.0

5 votes

def _make_tree(self, result):
        return Tree.fromstring(result)

Example #14

Source File: data_helpers.py From acl2017-interactive_summarizer with Apache License 2.0

5 votes

def get_parse_info(parsestr, stemmer, language, stoplist):
    hash_token_pos = OrderedDict()
    if language=='german':
        grammar = r"""
            NBAR:
            {<N.*|ADJ.*>*<N.*>}  # Nouns and Adjectives, terminated with Nouns
            VP:
            {<V.*>}  # terminated with Verbs
            NP:
            {<NBAR>}
            {<NBAR><APPR><NBAR>}  # Above, connected with in/of/etc...
        """
    if language=='english':
        #Taken from Su Nam Kim Paper...
        grammar = r"""
            NBAR:
            {<NN.*|JJ>*<NN.*>}  # Nouns and Adjectives, terminated with Nouns
            VP:
            {<V.*>}  # terminated with Verbs
            NP:
            {<NBAR>}
            {<NBAR><IN><NBAR>}  # Above, connected with in/of/etc...
        """
    
    chunker = RegexpParser(grammar)
    
    postoks = []
    for i in Tree.fromstring(parsestr).subtrees():
        if i.height() == 2:
            word, pos = i[0], i.label()
            hash_token_pos[stemmer.stem(word)] = word + u"::" + pos
            postoks.append((word, pos))
       
    chunk_tree = chunker.parse(postoks)
    phrases = get_terms(chunk_tree, stemmer, stoplist)
    phrase_list = [ ' '.join(term) for term in phrases if term]
    return hash_token_pos, phrase_list

Example #15

Source File: example.py From pytorch-nlp with MIT License

5 votes

def fromtree(cls, data, fields, subtrees=False):
        try:
            from nltk.tree import Tree
        except ImportError:
            print("Please install NLTK. "
                  "See the docs at http://nltk.org for more information.")
            raise
        tree = Tree.fromstring(data)
        if subtrees:
            return [cls.fromlist(
                [' '.join(t.leaves()), t.label()], fields) for t in tree.subtrees()]
        return cls.fromlist([' '.join(tree.leaves()), tree.label()], fields)

Example #16

Source File: test_utils.py From pytorch-rnng with MIT License

5 votes

def test_add_dummy_pos():
    s = '(S (NP John) (VP loves (NP Mary)))'
    expected = '(S (NP (XX John)) (VP (XX loves) (NP (XX Mary))))'
    tree = Tree.fromstring(s)

    assert str(add_dummy_pos(tree)) == expected

Example #17

Source File: get_np_terminals.py From 4lang with MIT License

5 votes

def get_np_terminals():
    seen = set()
    with open(sys.argv[1]) as trees:
        for tree in trees:
            tree =  Tree.fromstring(tree)
            pos_list = tree.pos()
            for pos in pos_list:
                if pos not in seen:
                    seen.add(pos)

    #print(seen)
    for tuples in seen:
         print TEMPLATE.format(tuples[1], tuples[0])

Example #18

Source File: pdtb2.py From Deep_Enhanced_Repr_for_IDRR with MIT License

5 votes

def __process_trees(self, s):
        """
        Input
        a string representing Penn parsetrees, delimited by |||

        Value:
        A list of NLTK Tree objects.
        """
        if not s:
            return []
        tree_strs = s.split("|||")
        return [Tree.fromstring(s) for s in tree_strs]

######################################################################

Example #19

Source File: example.py From text with BSD 3-Clause "New" or "Revised" License

5 votes

def fromtree(cls, data, fields, subtrees=False):
        try:
            from nltk.tree import Tree
        except ImportError:
            print("Please install NLTK. "
                  "See the docs at http://nltk.org for more information.")
            raise
        tree = Tree.fromstring(data)
        if subtrees:
            return [cls.fromlist(
                [' '.join(t.leaves()), t.label()], fields) for t in tree.subtrees()]
        return cls.fromlist([' '.join(tree.leaves()), tree.label()], fields)

Example #20

Source File: penn_tree_bank_reader_test.py From magnitude with MIT License

5 votes

def test_get_gold_spans_correctly_extracts_spans(self):
        ptb_reader = PennTreeBankConstituencySpanDatasetReader()
        tree = Tree.fromstring(u"(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")

        span_dict = {}
        ptb_reader._get_gold_spans(tree, 0, span_dict)
        spans = list(span_dict.items()) # pylint: disable=protected-access
        assert spans == [((0, 1), u'NP'), ((3, 4), u'NP'), ((2, 4), u'VP'), ((0, 4), u'S')]

Example #21

Source File: stanford.py From razzy-spinner with GNU General Public License v3.0

5 votes

def _make_tree(self, result):
        return Tree.fromstring(result)

Example #22

Source File: bllip.py From razzy-spinner with GNU General Public License v3.0

5 votes

def _scored_parse_to_nltk_tree(scored_parse):
    return Tree.fromstring(str(scored_parse.ptb_parse))

Example #23

Source File: example.py From decaNLP with BSD 3-Clause "New" or "Revised" License

5 votes

def fromtree(cls, data, fields, subtrees=False):
        try:
            from nltk.tree import Tree
        except ImportError:
            print("Please install NLTK. "
                  "See the docs at http://nltk.org for more information.")
            raise
        tree = Tree.fromstring(data)
        if subtrees:
            return [cls.fromlist(
                [' '.join(t.leaves()), t.label()], fields) for t in tree.subtrees()]
        return cls.fromlist([' '.join(tree.leaves()), tree.label()], fields)

Example #24

Source File: pdtb2.py From pdtb2 with GNU General Public License v2.0

5 votes

def __process_trees(self, s):
        """
        Input
        a string representing Penn parsetrees, delimited by |||

        Value:
        A list of NLTK Tree objects.
        """
        if not s:
            return []
        tree_strs = s.split("|||")
        return [Tree.fromstring(s) for s in tree_strs]

######################################################################

Example #25

Source File: convert2graph.py From structured-neural-summarization with MIT License

5 votes

def parse_tree_to_sentence(parse_tree:str)-> List[str]:
    return Tree.fromstring(parse_tree).leaves()

Example #26

Source File: penn_tree_bank_reader_test.py From magnitude with MIT License

5 votes

def test_strip_functional_tags(self):
        ptb_reader = PennTreeBankConstituencySpanDatasetReader()
        # Get gold spans should strip off all the functional tags.
        tree = Tree.fromstring(u"(S (NP=PRP (D the) (N dog)) (VP-0 (V chased) (NP|FUN-TAGS (D the) (N cat))))")
        ptb_reader._strip_functional_tags(tree)
        assert tree == Tree.fromstring(u"(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")

Example #27

Source File: format_tree.py From 4lang with MIT License

5 votes

def format_tree():
    with open(sys.argv[1]) as np_doc:
        for line in np_doc:
            np = Tree.fromstring(line)
            print_tree(np)
            print()

Example #28

Source File: preprocessing.py From camr with GNU General Public License v2.0

5 votes

def _load_cparse(cparse_filename):
    '''
    load the constituent parse tree 
    '''
    from nltk.tree import Tree
    ctree_list = []
    with codecs.open(cparse_filename,'r',encoding='utf-8') as cf:
        for line in cf:
            ctree_list.append(Tree.fromstring(line.strip()))

    return ctree_list

Example #29

Source File: fix_multi_sent.py From camr with GNU General Public License v2.0

5 votes

def fix_multi_sent(line):
    tree = Tree.fromstring(line)
    if len(tree) > 1:
        newtree = Tree('S1',[Tree('S',tree[:])])
    else:
        newtree = tree
    return re.sub('\n\s*',' ',newtree.__str__())

Example #30

Source File: sort_nps_depth.py From 4lang with MIT License

5 votes

def sort_nps():
    with open(sys.argv[1]) as np_doc:
        for line in np_doc:
            np = Tree.fromstring(line)
            print(np.height(), line, end ="")