Python nltk.tree.Tree.fromstring() Examples

The following are 30 code examples of nltk.tree.Tree.fromstring(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module nltk.tree.Tree , or try the search function .
Example #1
Source File: discourse_parsing.py    From discourse-parsing with MIT License 6 votes vote down vote up
def initialize_edu_data(edus):
        '''
        Create a representation of the list of EDUS that make up the input.
        '''

        wnum = 0  # counter for distance features
        res = []
        for edu_index, edu in enumerate(edus):
            # lowercase all words
            edu_words = [x[0].lower() for x in edu]
            edu_pos_tags = [x[1] for x in edu]

            # make a dictionary for each EDU
            new_tree = Tree.fromstring('(text)')
            new_tree.append('{}'.format(edu_index))
            tmp_item = {"head_idx": wnum,
                        "start_idx": wnum,
                        "end_idx": wnum,
                        "nt": "text",
                        "head": edu_words,
                        "hpos": edu_pos_tags,
                        "tree": new_tree}
            wnum += 1
            res.append(tmp_item)
        return res 
Example #2
Source File: test_oracle.py    From pytorch-rnng with MIT License 6 votes vote down vote up
def test_from_tree(self):
        s = '(S (NP (NNP John)) (VP (VBZ loves) (NP (NNP Mary))))'
        expected_actions = [
            NT('S'),
            NT('NP'),
            SHIFT,
            REDUCE,
            NT('VP'),
            SHIFT,
            NT('NP'),
            SHIFT,
            REDUCE,
            REDUCE,
            REDUCE,
        ]
        expected_pos_tags = ['NNP', 'VBZ', 'NNP']
        expected_words = ['John', 'loves', 'Mary']

        oracle = DiscOracle.from_tree(Tree.fromstring(s))

        assert isinstance(oracle, DiscOracle)
        assert oracle.actions == expected_actions
        assert oracle.pos_tags == expected_pos_tags
        assert oracle.words == expected_words 
Example #3
Source File: test_oracle.py    From pytorch-rnng with MIT License 6 votes vote down vote up
def test_from_tree(self):
        s = '(S (NP (NNP John)) (VP (VBZ loves) (NP (NNP Mary))))'
        expected_actions = [
            NT('S'),
            NT('NP'),
            GEN('John'),
            REDUCE,
            NT('VP'),
            GEN('loves'),
            NT('NP'),
            GEN('Mary'),
            REDUCE,
            REDUCE,
            REDUCE
        ]
        expected_words = ['John', 'loves', 'Mary']
        expected_pos_tags = ['NNP', 'VBZ', 'NNP']

        oracle = GenOracle.from_tree(Tree.fromstring(s))

        assert isinstance(oracle, GenOracle)
        assert oracle.actions == expected_actions
        assert oracle.words == expected_words
        assert oracle.pos_tags == expected_pos_tags 
Example #4
Source File: stanford_sentiment_tree_bank.py    From magnitude with MIT License 6 votes vote down vote up
def _read(self, file_path):
        with open(cached_path(file_path), u"r") as data_file:
            logger.info(u"Reading instances from lines in file at: %s", file_path)
            for line in data_file.readlines():
                line = line.strip(u"\n")
                if not line:
                    continue
                parsed_line = Tree.fromstring(line)
                if self._use_subtrees:
                    for subtree in parsed_line.subtrees():
                        instance = self.text_to_instance(subtree.leaves(), subtree.label())
                        if instance is not None:
                            yield instance
                else:
                    instance = self.text_to_instance(parsed_line.leaves(), parsed_line.label())
                    if instance is not None:
                        yield instance

    #overrides 
Example #5
Source File: smt.py    From PyTorch-NLP with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def parse_tree(data, subtrees=False, fine_grained=False):
    # https://github.com/pytorch/text/blob/6476392a801f51794c90378dd23489578896c6f2/torchtext/data/example.py#L56
    try:
        from nltk.tree import Tree
    except ImportError:
        print("Please install NLTK. " "See the docs at http://nltk.org for more information.")
        raise
    tree = Tree.fromstring(data)

    if subtrees:
        return [{
            'text': ' '.join(t.leaves()),
            'label': get_label_str(t.label(), fine_grained=fine_grained)
        } for t in tree.subtrees()]

    return {
        'text': ' '.join(tree.leaves()),
        'label': get_label_str(tree.label(), fine_grained=fine_grained)
    } 
Example #6
Source File: bracket_parse.py    From razzy-spinner with GNU General Public License v3.0 6 votes vote down vote up
def _parse(self, t):
        try:
            return Tree.fromstring(self._normalize(t))

        except ValueError as e:
            sys.stderr.write("Bad tree detected; trying to recover...\n")
            # Try to recover, if we can:
            if e.args == ('mismatched parens',):
                for n in range(1, 5):
                    try:
                        v = Tree(self._normalize(t+')'*n))
                        sys.stderr.write("  Recovered by adding %d close "
                                         "paren(s)\n" % n)
                        return v
                    except ValueError: pass
            # Try something else:
            sys.stderr.write("  Recovered by returning a flat parse.\n")
            #sys.stderr.write(' '.join(t.split())+'\n')
            return Tree('S', self._tag(t)) 
Example #7
Source File: bracket_parse.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 6 votes vote down vote up
def _parse(self, t):
        try:
            return Tree.fromstring(self._normalize(t))

        except ValueError as e:
            sys.stderr.write("Bad tree detected; trying to recover...\n")
            # Try to recover, if we can:
            if e.args == ('mismatched parens',):
                for n in range(1, 5):
                    try:
                        v = Tree(self._normalize(t + ')' * n))
                        sys.stderr.write(
                            "  Recovered by adding %d close " "paren(s)\n" % n
                        )
                        return v
                    except ValueError:
                        pass
            # Try something else:
            sys.stderr.write("  Recovered by returning a flat parse.\n")
            # sys.stderr.write(' '.join(t.split())+'\n')
            return Tree('S', self._tag(t)) 
Example #8
Source File: corenlp.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def make_tree(self, result):
        return Tree.fromstring(result['parse']) 
Example #9
Source File: stanford.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def _make_tree(self, result):
        return Tree.fromstring(result) 
Example #10
Source File: bllip.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def _scored_parse_to_nltk_tree(scored_parse):
    return Tree.fromstring(str(scored_parse.ptb_parse)) 
Example #11
Source File: treeprettyprinter.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def test():
    """Do some tree drawing tests."""

    def print_tree(n, tree, sentence=None, ansi=True, **xargs):
        print()
        print('{0}: "{1}"'.format(n, ' '.join(sentence or tree.leaves())))
        print(tree)
        print()
        drawtree = TreePrettyPrinter(tree, sentence)
        try:
            print(drawtree.text(unicodelines=ansi, ansi=ansi, **xargs))
        except (UnicodeDecodeError, UnicodeEncodeError):
            print(drawtree.text(unicodelines=False, ansi=False, **xargs))

    from nltk.corpus import treebank

    for n in [0, 1440, 1591, 2771, 2170]:
        tree = treebank.parsed_sents()[n]
        print_tree(n, tree, nodedist=2, maxwidth=8)
    print()
    print('ASCII version:')
    print(TreePrettyPrinter(tree).text(nodedist=2))

    tree = Tree.fromstring(
        '(top (punct 8) (smain (noun 0) (verb 1) (inf (verb 5) (inf (verb 6) '
        '(conj (inf (pp (prep 2) (np (det 3) (noun 4))) (verb 7)) (inf (verb 9)) '
        '(vg 10) (inf (verb 11)))))) (punct 12))',
        read_leaf=int,
    )
    sentence = (
        'Ze had met haar moeder kunnen gaan winkelen ,'
        ' zwemmen of terrassen .'.split()
    )
    print_tree('Discontinuous tree', tree, sentence, nodedist=2) 
Example #12
Source File: tree.py    From DPLP with MIT License 5 votes vote down vote up
def parse(self):
        """ Get parse tree in string format

            For visualization, use nltk.tree:
            from nltk.tree import Tree
            t = Tree.fromstring(parse)
            t.draw()
        """
        parse = getparse(self.tree, "")
        return parse 
Example #13
Source File: parser.py    From Lango with GNU General Public License v2.0 5 votes vote down vote up
def _make_tree(self, result):
        return Tree.fromstring(result) 
Example #14
Source File: data_helpers.py    From acl2017-interactive_summarizer with Apache License 2.0 5 votes vote down vote up
def get_parse_info(parsestr, stemmer, language, stoplist):
    hash_token_pos = OrderedDict()
    if language=='german':
        grammar = r"""
            NBAR:
            {<N.*|ADJ.*>*<N.*>}  # Nouns and Adjectives, terminated with Nouns
            VP:
            {<V.*>}  # terminated with Verbs
            NP:
            {<NBAR>}
            {<NBAR><APPR><NBAR>}  # Above, connected with in/of/etc...
        """
    if language=='english':
        #Taken from Su Nam Kim Paper...
        grammar = r"""
            NBAR:
            {<NN.*|JJ>*<NN.*>}  # Nouns and Adjectives, terminated with Nouns
            VP:
            {<V.*>}  # terminated with Verbs
            NP:
            {<NBAR>}
            {<NBAR><IN><NBAR>}  # Above, connected with in/of/etc...
        """
    
    chunker = RegexpParser(grammar)
    
    postoks = []
    for i in Tree.fromstring(parsestr).subtrees():
        if i.height() == 2:
            word, pos = i[0], i.label()
            hash_token_pos[stemmer.stem(word)] = word + u"::" + pos
            postoks.append((word, pos))
       
    chunk_tree = chunker.parse(postoks)
    phrases = get_terms(chunk_tree, stemmer, stoplist)
    phrase_list = [ ' '.join(term) for term in phrases if term]
    return hash_token_pos, phrase_list 
Example #15
Source File: example.py    From pytorch-nlp with MIT License 5 votes vote down vote up
def fromtree(cls, data, fields, subtrees=False):
        try:
            from nltk.tree import Tree
        except ImportError:
            print("Please install NLTK. "
                  "See the docs at http://nltk.org for more information.")
            raise
        tree = Tree.fromstring(data)
        if subtrees:
            return [cls.fromlist(
                [' '.join(t.leaves()), t.label()], fields) for t in tree.subtrees()]
        return cls.fromlist([' '.join(tree.leaves()), tree.label()], fields) 
Example #16
Source File: test_utils.py    From pytorch-rnng with MIT License 5 votes vote down vote up
def test_add_dummy_pos():
    s = '(S (NP John) (VP loves (NP Mary)))'
    expected = '(S (NP (XX John)) (VP (XX loves) (NP (XX Mary))))'
    tree = Tree.fromstring(s)

    assert str(add_dummy_pos(tree)) == expected 
Example #17
Source File: get_np_terminals.py    From 4lang with MIT License 5 votes vote down vote up
def get_np_terminals():
    seen = set()
    with open(sys.argv[1]) as trees:
        for tree in trees:
            tree =  Tree.fromstring(tree)
            pos_list = tree.pos()
            for pos in pos_list:
                if pos not in seen:
                    seen.add(pos)

    #print(seen)
    for tuples in seen:
         print TEMPLATE.format(tuples[1], tuples[0]) 
Example #18
Source File: pdtb2.py    From Deep_Enhanced_Repr_for_IDRR with MIT License 5 votes vote down vote up
def __process_trees(self, s):
        """
        Input
        a string representing Penn parsetrees, delimited by |||

        Value:
        A list of NLTK Tree objects.
        """
        if not s:
            return []
        tree_strs = s.split("|||")
        return [Tree.fromstring(s) for s in tree_strs]

###################################################################### 
Example #19
Source File: example.py    From text with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fromtree(cls, data, fields, subtrees=False):
        try:
            from nltk.tree import Tree
        except ImportError:
            print("Please install NLTK. "
                  "See the docs at http://nltk.org for more information.")
            raise
        tree = Tree.fromstring(data)
        if subtrees:
            return [cls.fromlist(
                [' '.join(t.leaves()), t.label()], fields) for t in tree.subtrees()]
        return cls.fromlist([' '.join(tree.leaves()), tree.label()], fields) 
Example #20
Source File: penn_tree_bank_reader_test.py    From magnitude with MIT License 5 votes vote down vote up
def test_get_gold_spans_correctly_extracts_spans(self):
        ptb_reader = PennTreeBankConstituencySpanDatasetReader()
        tree = Tree.fromstring(u"(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")

        span_dict = {}
        ptb_reader._get_gold_spans(tree, 0, span_dict)
        spans = list(span_dict.items()) # pylint: disable=protected-access
        assert spans == [((0, 1), u'NP'), ((3, 4), u'NP'), ((2, 4), u'VP'), ((0, 4), u'S')] 
Example #21
Source File: stanford.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def _make_tree(self, result):
        return Tree.fromstring(result) 
Example #22
Source File: bllip.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def _scored_parse_to_nltk_tree(scored_parse):
    return Tree.fromstring(str(scored_parse.ptb_parse)) 
Example #23
Source File: example.py    From decaNLP with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fromtree(cls, data, fields, subtrees=False):
        try:
            from nltk.tree import Tree
        except ImportError:
            print("Please install NLTK. "
                  "See the docs at http://nltk.org for more information.")
            raise
        tree = Tree.fromstring(data)
        if subtrees:
            return [cls.fromlist(
                [' '.join(t.leaves()), t.label()], fields) for t in tree.subtrees()]
        return cls.fromlist([' '.join(tree.leaves()), tree.label()], fields) 
Example #24
Source File: pdtb2.py    From pdtb2 with GNU General Public License v2.0 5 votes vote down vote up
def __process_trees(self, s):
        """
        Input
        a string representing Penn parsetrees, delimited by |||

        Value:
        A list of NLTK Tree objects.
        """
        if not s:
            return []
        tree_strs = s.split("|||")
        return [Tree.fromstring(s) for s in tree_strs]

###################################################################### 
Example #25
Source File: convert2graph.py    From structured-neural-summarization with MIT License 5 votes vote down vote up
def parse_tree_to_sentence(parse_tree:str)-> List[str]:
    return Tree.fromstring(parse_tree).leaves() 
Example #26
Source File: penn_tree_bank_reader_test.py    From magnitude with MIT License 5 votes vote down vote up
def test_strip_functional_tags(self):
        ptb_reader = PennTreeBankConstituencySpanDatasetReader()
        # Get gold spans should strip off all the functional tags.
        tree = Tree.fromstring(u"(S (NP=PRP (D the) (N dog)) (VP-0 (V chased) (NP|FUN-TAGS (D the) (N cat))))")
        ptb_reader._strip_functional_tags(tree)
        assert tree == Tree.fromstring(u"(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))") 
Example #27
Source File: format_tree.py    From 4lang with MIT License 5 votes vote down vote up
def format_tree():
    with open(sys.argv[1]) as np_doc:
        for line in np_doc:
            np = Tree.fromstring(line)
            print_tree(np)
            print() 
Example #28
Source File: preprocessing.py    From camr with GNU General Public License v2.0 5 votes vote down vote up
def _load_cparse(cparse_filename):
    '''
    load the constituent parse tree 
    '''
    from nltk.tree import Tree
    ctree_list = []
    with codecs.open(cparse_filename,'r',encoding='utf-8') as cf:
        for line in cf:
            ctree_list.append(Tree.fromstring(line.strip()))

    return ctree_list 
Example #29
Source File: fix_multi_sent.py    From camr with GNU General Public License v2.0 5 votes vote down vote up
def fix_multi_sent(line):
    tree = Tree.fromstring(line)
    if len(tree) > 1:
        newtree = Tree('S1',[Tree('S',tree[:])])
    else:
        newtree = tree
    return re.sub('\n\s*',' ',newtree.__str__()) 
Example #30
Source File: sort_nps_depth.py    From 4lang with MIT License 5 votes vote down vote up
def sort_nps():
    with open(sys.argv[1]) as np_doc:
        for line in np_doc:
            np = Tree.fromstring(line)
            print(np.height(), line, end ="")