Python nltk.Tree.fromstring() Examples

The following are 30 code examples of nltk.Tree.fromstring(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module nltk.Tree , or try the search function .
Example #1
Source File: tree.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 6 votes vote down vote up
def leaves(self):
        """
        Return the leaves of the tree.

            >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
            >>> t.leaves()
            ['the', 'dog', 'chased', 'the', 'cat']

        :return: a list containing this tree's leaves.
            The order reflects the order of the
            leaves in the tree's hierarchical structure.
        :rtype: list
        """
        leaves = []
        for child in self:
            if isinstance(child, Tree):
                leaves.extend(child.leaves())
            else:
                leaves.append(child)
        return leaves 
Example #2
Source File: data_nli.py    From PRPN-Analysis with MIT License 6 votes vote down vote up
def add_words(self, file_name):
        # Add words to the dictionary
        f_in = open(file_name, 'r')
        for line in f_in:
            if line.strip() == '':
                continue 
            data = eval(line)
            sen_tree = Tree.fromstring(data['sentence1_parse'])
            words = self.filter_words(sen_tree)
            words = ['<s>'] + words + ['</s>']
            for word in words:
                self.dictionary.add_word(word)
            sen_tree = Tree.fromstring(data['sentence2_parse'])
            words = self.filter_words(sen_tree)
            words = ['<s>'] + words + ['</s>']
            for word in words:
                self.dictionary.add_word(word)
        f_in.close() 
Example #3
Source File: வெண்பா.py    From pytamil with MIT License 6 votes vote down vote up
def main():
    infilename = os.path.join(os.path.dirname(__file__),'வெண்பாinput.txt')
    outfilename = os.path.join(os.path.dirname(__file__),'வெண்பாoutput.txt')
    data = open(infilename).read()   
    input_stream = antlr4.InputStream(data)
    lexer = வெண்பாLexer(input_stream)
    stream = CommonTokenStream(lexer)
    parser = வெண்பாParser(stream)
    tree = parser.வெண்பா()

    # print(tree.toStringTree())
    strtree = Trees.toStringTree(tree, None, parser)
    print(strtree)
    t = nltkTree.fromstring(strtree)
    # t.pretty_print()
    a = TreePrettyPrinter(t).text()
    print (a)
    # t.pprint(margin=70, indent=0, nodesep=u'', parens=u'()', quotes=False)
    # pprint(Trees.toStringTree(tree, None, parser), width=20, indent=4)
 
    with open(outfilename, 'w', encoding='utf8') as f:
        f.write( a) 
Example #4
Source File: parse.py    From StageDP with MIT License 6 votes vote down vote up
def main():
    args = parse_args()
    parser = RstParser()
    parser.load('../data/model')
    with gzip.open('../data/resources/bc3200.pickle.gz') as fin:
        print('Load Brown clusters for creating features ...')
        brown_clusters = pickle.load(fin)
    core_nlp = StanfordCoreNLP('http://localhost:9000')
    annotate = lambda x: core_nlp.annotate(x, properties={
        'annotators': 'tokenize,ssplit,pos,lemma,parse,depparse',
        'outputFormat': 'json',
        'ssplit.isOneSentence': True
    })
    edu_file_list = [os.path.join(args.edu_file_dir, fname) for fname in os.listdir(args.edu_file_dir) if fname.endswith('.edu.txt')]
    for edu_file in edu_file_list:
        print('Parsing {}...'.format(edu_file))
        doc = create_doc_from_edu_file(edu_file, annotate_func=annotate)
        pred_rst = parser.sr_parse(doc, brown_clusters)
        tree_str = pred_rst.get_parse()
        pprint_tree_str = Tree.fromstring(tree_str).pformat(margin=150)
        with open(os.path.join(args.output_dir, os.path.basename(edu_file) + '.parse'), 'w') as fout:
            fout.write(pprint_tree_str) 
Example #5
Source File: tree.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 6 votes vote down vote up
def subtrees(self, filter=None):
        """
        Generate all the subtrees of this tree, optionally restricted
        to trees matching the filter function.

            >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
            >>> for s in t.subtrees(lambda t: t.height() == 2):
            ...     print(s)
            (D the)
            (N dog)
            (V chased)
            (D the)
            (N cat)

        :type filter: function
        :param filter: the function to filter all local trees
        """
        if not filter or filter(self):
            yield self
        for child in self:
            if isinstance(child, Tree):
                for subtree in child.subtrees(filter):
                    yield subtree 
Example #6
Source File: tree.py    From razzy-spinner with GNU General Public License v3.0 6 votes vote down vote up
def pos(self):
        """
        Return a sequence of pos-tagged words extracted from the tree.

            >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
            >>> t.pos()
            [('the', 'D'), ('dog', 'N'), ('chased', 'V'), ('the', 'D'), ('cat', 'N')]

        :return: a list of tuples containing leaves and pre-terminals (part-of-speech tags).
            The order reflects the order of the leaves in the tree's hierarchical structure.
        :rtype: list(tuple)
        """
        pos = []
        for child in self:
            if isinstance(child, Tree):
                pos.extend(child.pos())
            else:
                pos.append((child, self._label))
        return pos 
Example #7
Source File: tree.py    From razzy-spinner with GNU General Public License v3.0 6 votes vote down vote up
def subtrees(self, filter=None):
        """
        Generate all the subtrees of this tree, optionally restricted
        to trees matching the filter function.

            >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
            >>> for s in t.subtrees(lambda t: t.height() == 2):
            ...     print(s)
            (D the)
            (N dog)
            (V chased)
            (D the)
            (N cat)

        :type filter: function
        :param filter: the function to filter all local trees
        """
        if not filter or filter(self):
            yield self
        for child in self:
            if isinstance(child, Tree):
                for subtree in child.subtrees(filter):
                    yield subtree 
Example #8
Source File: tree.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 6 votes vote down vote up
def productions(self):
        """
        Generate the productions that correspond to the non-terminal nodes of the tree.
        For each subtree of the form (P: C1 C2 ... Cn) this produces a production of the
        form P -> C1 C2 ... Cn.

            >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
            >>> t.productions()
            [S -> NP VP, NP -> D N, D -> 'the', N -> 'dog', VP -> V NP, V -> 'chased',
            NP -> D N, D -> 'the', N -> 'cat']

        :rtype: list(Production)
        """

        if not isinstance(self._label, string_types):
            raise TypeError(
                'Productions can only be generated from trees having node labels that are strings'
            )

        prods = [Production(Nonterminal(self._label), _child_names(self))]
        for child in self:
            if isinstance(child, Tree):
                prods += child.productions()
        return prods 
Example #9
Source File: tree.py    From razzy-spinner with GNU General Public License v3.0 6 votes vote down vote up
def productions(self):
        """
        Generate the productions that correspond to the non-terminal nodes of the tree.
        For each subtree of the form (P: C1 C2 ... Cn) this produces a production of the
        form P -> C1 C2 ... Cn.

            >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
            >>> t.productions()
            [S -> NP VP, NP -> D N, D -> 'the', N -> 'dog', VP -> V NP, V -> 'chased',
            NP -> D N, D -> 'the', N -> 'cat']

        :rtype: list(Production)
        """

        if not isinstance(self._label, string_types):
            raise TypeError('Productions can only be generated from trees having node labels that are strings')

        prods = [Production(Nonterminal(self._label), _child_names(self))]
        for child in self:
            if isinstance(child, Tree):
                prods += child.productions()
        return prods 
Example #10
Source File: tree.py    From razzy-spinner with GNU General Public License v3.0 6 votes vote down vote up
def leaves(self):
        """
        Return the leaves of the tree.

            >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
            >>> t.leaves()
            ['the', 'dog', 'chased', 'the', 'cat']

        :return: a list containing this tree's leaves.
            The order reflects the order of the
            leaves in the tree's hierarchical structure.
        :rtype: list
        """
        leaves = []
        for child in self:
            if isinstance(child, Tree):
                leaves.extend(child.leaves())
            else:
                leaves.append(child)
        return leaves 
Example #11
Source File: tree.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 6 votes vote down vote up
def pos(self):
        """
        Return a sequence of pos-tagged words extracted from the tree.

            >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
            >>> t.pos()
            [('the', 'D'), ('dog', 'N'), ('chased', 'V'), ('the', 'D'), ('cat', 'N')]

        :return: a list of tuples containing leaves and pre-terminals (part-of-speech tags).
            The order reflects the order of the leaves in the tree's hierarchical structure.
        :rtype: list(tuple)
        """
        pos = []
        for child in self:
            if isinstance(child, Tree):
                pos.extend(child.pos())
            else:
                pos.append((child, self._label))
        return pos 
Example #12
Source File: tree.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def sinica_parse(s):
    """
    Parse a Sinica Treebank string and return a tree.  Trees are represented as nested brackettings,
    as shown in the following example (X represents a Chinese character):
    S(goal:NP(Head:Nep:XX)|theme:NP(Head:Nhaa:X)|quantity:Dab:X|Head:VL2:X)#0(PERIODCATEGORY)

    :return: A tree corresponding to the string representation.
    :rtype: Tree
    :param s: The string to be converted
    :type s: str
    """
    tokens = re.split(r'([()| ])', s)
    for i in range(len(tokens)):
        if tokens[i] == '(':
            tokens[i - 1], tokens[i] = (
                tokens[i],
                tokens[i - 1],
            )  # pull nonterminal inside parens
        elif ':' in tokens[i]:
            fields = tokens[i].split(':')
            if len(fields) == 2:  # non-terminal
                tokens[i] = fields[1]
            else:
                tokens[i] = "(" + fields[-2] + " " + fields[-1] + ")"
        elif tokens[i] == '|':
            tokens[i] = ''

    treebank_string = " ".join(tokens)
    return Tree.fromstring(treebank_string, remove_empty_top_bracketing=True)


#    s = re.sub(r'^#[^\s]*\s', '', s)  # remove leading identifier
#    s = re.sub(r'\w+:', '', s)       # remove role tags

#    return s

######################################################################
## Demonstration
###################################################################### 
Example #13
Source File: hobbs.py    From hobbs with MIT License 5 votes vote down vote up
def main(argv):
    if len(sys.argv) == 2 and argv[1] == "demo":
        demo()
    else:
        if len(sys.argv) > 3 or len(sys.argv) < 2:
            print "Enter the file and the pronoun to resolve."
        elif len(sys.argv) == 3:
            p = ["He", "he", "Him", "him", "She", "she", "Her",
                "her", "It", "it", "They", "they"]
            r = ["Himself", "himself", "Herself", "herself",
                "Itself", "itself", "Themselves", "themselves"]
            fname = sys.argv[1]
            pro = sys.argv[2]
            with open(fname) as f:
                sents = f.readlines()
            trees = [Tree.fromstring(s) for s in sents]
            pos = get_pos(trees[-1], pro)
            pos = pos[:-1]
            if pro in p:
                tree, pos = hobbs(trees, pos)
                for t in trees:
                    print t, '\n'
                print "Proposed antecedent for '"+pro+"':", tree[pos]
            elif pro in r:
                tree, pos = resolve_reflexive(trees, pos)
                for t in trees:
                    print t, '\n'
                print "Proposed antecedent for '"+pro+"':", tree[pos] 
Example #14
Source File: easyccg2jigg.py    From ccg2lambda with Apache License 2.0 5 votes vote down vote up
def make_tree(line):
  tree_str = substitute_chars(line.strip())
  # from pudb import set_trace; set_trace()
  try:
    tree = Tree.fromstring(tree_str)
  except ValueError:
    tree = None
    logging.warning('Failed to Tree parse line: {0}'.format(line))
  return tree 
Example #15
Source File: tree_tools.py    From ccg2lambda with Apache License 2.0 5 votes vote down vote up
def tree_or_string(s):
    """Given a string loaded from the yaml, produce either a Tree or a string,
    if it's just a terminal."""
    if s.startswith(u"("):
        return NLTKTree.fromstring(s)
    return s 
Example #16
Source File: get_nli_sentences.py    From ccg2lambda with Apache License 2.0 5 votes vote down vote up
def print_tokenized_sentence(problem, sent_prefix):
    toks = []
    try:
        tree = Tree.fromstring(problem[sent_prefix + '_parse'])
        toks = tree.leaves()
    except ValueError:
        sentence = problem.get(sent_prefix, None)
        if sentence is None:
            toks = []
        else:
          toks = sentence.split()
    if len(toks) > 0 and toks[-1] not in string.punctuation:
        toks.append('.')
    print(' '.join(toks).replace('#', '_POUND_')) 
Example #17
Source File: util.py    From DPLP with MIT License 5 votes vote down vote up
def drawrst(strtree, fname):
    """ Draw RST tree into a file
    """
    if not fname.endswith(".ps"):
        fname += ".ps"
    cf = CanvasFrame()
    t = Tree.fromstring(strtree)
    tc = TreeWidget(cf.canvas(), t)
    cf.add_widget(tc,10,10) # (10,10) offsets
    cf.print_to_file(fname)
    cf.destroy() 
Example #18
Source File: wsj.py    From struct-learning-with-flow with MIT License 5 votes vote down vote up
def treebank_bracket_parse(t):
    try:
        return Tree.fromstring(t, remove_empty_top_bracketing=True)
    except IndexError:
        # in case it's the real treebank format,
        # strip first and last brackets before parsing
        return tree.bracket_parse(t.strip()[1:-1]) 
Example #19
Source File: tree.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def label(self):
        """
        Return the node label of the tree.

            >>> t = Tree.fromstring('(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))')
            >>> t.label()
            'S'

        :return: the node label (typically a string)
        :rtype: any
        """
        return self._label 
Example #20
Source File: tree.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def set_label(self, label):
        """
        Set the node label of the tree.

            >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
            >>> t.set_label("T")
            >>> print(t)
            (T (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))

        :param label: the node label (typically a string)
        :type label: any
        """
        self._label = label 
Example #21
Source File: evalb_bracketing_scorer_test.py    From magnitude with MIT License 5 votes vote down vote up
def test_evalb_with_terrible_trees_handles_nan_f1(self):
        # If precision and recall are zero, evalb returns nan f1.
        # This checks that we handle the zero division.
        tree1 = Tree.fromstring(u"(PP (VROOT (PP That) (VROOT (PP could) "
                                u"(VROOT (PP cost) (VROOT (PP him))))) (PP .))")
        tree2 = Tree.fromstring(u"(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
        evalb_scorer = EvalbBracketingScorer()
        evalb_scorer([tree1], [tree2])
        metrics = evalb_scorer.get_metric()
        assert metrics[u"evalb_recall"] == 0.0
        assert metrics[u"evalb_precision"] == 0.0
        assert metrics[u"evalb_f1_measure"] == 0.0 
Example #22
Source File: tree.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def flatten(self):
        """
        Return a flat version of the tree, with all non-root non-terminals removed.

            >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
            >>> print(t.flatten())
            (S the dog chased the cat)

        :return: a tree consisting of this tree's root connected directly to
            its leaves, omitting all intervening non-terminal nodes.
        :rtype: Tree
        """
        return Tree(self.label(), self.leaves()) 
Example #23
Source File: tree.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def height(self):
        """
        Return the height of the tree.

            >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
            >>> t.height()
            5
            >>> print(t[0,0])
            (D the)
            >>> t[0,0].height()
            2

        :return: The height of this tree.  The height of a tree
            containing no children is 1; the height of a tree
            containing only leaves is 2; and the height of any other
            tree is one plus the maximum of its children's
            heights.
        :rtype: int
        """
        max_child_height = 0
        for child in self:
            if isinstance(child, Tree):
                max_child_height = max(max_child_height, child.height())
            else:
                max_child_height = max(max_child_height, 1)
        return 1 + max_child_height 
Example #24
Source File: tree.py    From StageDP with MIT License 5 votes vote down vote up
def draw_rst(self, fname):
        """ Draw RST tree into a file
        """
        tree_str = self.get_parse()
        if not fname.endswith(".ps"):
            fname += ".ps"
        cf = CanvasFrame()
        t = Tree.fromstring(tree_str)
        tc = TreeWidget(cf.canvas(), t)
        cf.add_widget(tc, 10, 10)  # (10,10) offsets
        cf.print_to_file(fname)
        cf.destroy() 
Example #25
Source File: tree.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def set_label(self, label):
        """
        Set the node label of the tree.

            >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
            >>> t.set_label("T")
            >>> print(t)
            (T (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))

        :param label: the node label (typically a string)
        :type label: any
        """
        self._label = label 
Example #26
Source File: tree.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def flatten(self):
        """
        Return a flat version of the tree, with all non-root non-terminals removed.

            >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
            >>> print(t.flatten())
            (S the dog chased the cat)

        :return: a tree consisting of this tree's root connected directly to
            its leaves, omitting all intervening non-terminal nodes.
        :rtype: Tree
        """
        return Tree(self.label(), self.leaves()) 
Example #27
Source File: tree.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def height(self):
        """
        Return the height of the tree.

            >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
            >>> t.height()
            5
            >>> print(t[0,0])
            (D the)
            >>> t[0,0].height()
            2

        :return: The height of this tree.  The height of a tree
            containing no children is 1; the height of a tree
            containing only leaves is 2; and the height of any other
            tree is one plus the maximum of its children's
            heights.
        :rtype: int
        """
        max_child_height = 0
        for child in self:
            if isinstance(child, Tree):
                max_child_height = max(max_child_height, child.height())
            else:
                max_child_height = max(max_child_height, 1)
        return 1 + max_child_height 
Example #28
Source File: tree.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def sinica_parse(s):
    """
    Parse a Sinica Treebank string and return a tree.  Trees are represented as nested brackettings,
    as shown in the following example (X represents a Chinese character):
    S(goal:NP(Head:Nep:XX)|theme:NP(Head:Nhaa:X)|quantity:Dab:X|Head:VL2:X)#0(PERIODCATEGORY)

    :return: A tree corresponding to the string representation.
    :rtype: Tree
    :param s: The string to be converted
    :type s: str
    """
    tokens = re.split(r'([()| ])', s)
    for i in range(len(tokens)):
        if tokens[i] == '(':
            tokens[i-1], tokens[i] = tokens[i], tokens[i-1]     # pull nonterminal inside parens
        elif ':' in tokens[i]:
            fields = tokens[i].split(':')
            if len(fields) == 2:                                # non-terminal
                tokens[i] = fields[1]
            else:
                tokens[i] = "(" + fields[-2] + " " + fields[-1] + ")"
        elif tokens[i] == '|':
            tokens[i] = ''

    treebank_string = " ".join(tokens)
    return Tree.fromstring(treebank_string, remove_empty_top_bracketing=True)

#    s = re.sub(r'^#[^\s]*\s', '', s)  # remove leading identifier
#    s = re.sub(r'\w+:', '', s)       # remove role tags

#    return s

######################################################################
## Demonstration
###################################################################### 
Example #29
Source File: classification.py    From fastNLP with Apache License 2.0 5 votes vote down vote up
def process(self, data_bundle: DataBundle):
        r"""
        对DataBundle中的数据进行预处理。输入的DataSet应该至少拥有raw_words这一列,且内容类似与

        .. csv-table:: 下面是使用SSTLoader读取的DataSet所具备的field
            :header: "raw_words"

            "(2 (3 (3 Effective) (2 but)) (1 (1 too-tepid)..."
            "(3 (3 (2 If) (3 (2 you) (3 (2 sometimes) ..."
            "..."

        :param ~fastNLP.io.DataBundle data_bundle: 需要处理的DataBundle对象
        :return:
        """
        #  先取出subtree
        for name in list(data_bundle.datasets.keys()):
            dataset = data_bundle.get_dataset(name)
            ds = DataSet()
            use_subtree = self.subtree or (name == 'train' and self.train_tree)
            for ins in dataset:
                raw_words = ins[Const.RAW_WORD]
                tree = Tree.fromstring(raw_words)
                if use_subtree:
                    for t in tree.subtrees():
                        raw_words = " ".join(t.leaves())
                        instance = Instance(raw_words=raw_words, target=t.label())
                        ds.append(instance)
                else:
                    instance = Instance(raw_words=' '.join(tree.leaves()), target=tree.label())
                    ds.append(instance)
            data_bundle.set_dataset(ds, name)

        # 根据granularity设置tag
        data_bundle = _granularize(data_bundle, tag_map=self.tag_map)
        
        data_bundle = super().process(data_bundle)
        
        return data_bundle 
Example #30
Source File: sstloader.py    From fastNLP with Apache License 2.0 5 votes vote down vote up
def _get_one(data, subtree):
        tree = Tree.fromstring(data)
        if subtree:
            return [(t.leaves(), t.label()) for t in tree.subtrees()]
        return [(tree.leaves(), tree.label())]