Python nltk.tree.Tree.fromstring() Examples
The following are 30
code examples of nltk.tree.Tree.fromstring().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
nltk.tree.Tree
, or try the search function
.
Example #1
Source File: discourse_parsing.py From discourse-parsing with MIT License | 6 votes |
def initialize_edu_data(edus): ''' Create a representation of the list of EDUS that make up the input. ''' wnum = 0 # counter for distance features res = [] for edu_index, edu in enumerate(edus): # lowercase all words edu_words = [x[0].lower() for x in edu] edu_pos_tags = [x[1] for x in edu] # make a dictionary for each EDU new_tree = Tree.fromstring('(text)') new_tree.append('{}'.format(edu_index)) tmp_item = {"head_idx": wnum, "start_idx": wnum, "end_idx": wnum, "nt": "text", "head": edu_words, "hpos": edu_pos_tags, "tree": new_tree} wnum += 1 res.append(tmp_item) return res
Example #2
Source File: test_oracle.py From pytorch-rnng with MIT License | 6 votes |
def test_from_tree(self): s = '(S (NP (NNP John)) (VP (VBZ loves) (NP (NNP Mary))))' expected_actions = [ NT('S'), NT('NP'), SHIFT, REDUCE, NT('VP'), SHIFT, NT('NP'), SHIFT, REDUCE, REDUCE, REDUCE, ] expected_pos_tags = ['NNP', 'VBZ', 'NNP'] expected_words = ['John', 'loves', 'Mary'] oracle = DiscOracle.from_tree(Tree.fromstring(s)) assert isinstance(oracle, DiscOracle) assert oracle.actions == expected_actions assert oracle.pos_tags == expected_pos_tags assert oracle.words == expected_words
Example #3
Source File: test_oracle.py From pytorch-rnng with MIT License | 6 votes |
def test_from_tree(self): s = '(S (NP (NNP John)) (VP (VBZ loves) (NP (NNP Mary))))' expected_actions = [ NT('S'), NT('NP'), GEN('John'), REDUCE, NT('VP'), GEN('loves'), NT('NP'), GEN('Mary'), REDUCE, REDUCE, REDUCE ] expected_words = ['John', 'loves', 'Mary'] expected_pos_tags = ['NNP', 'VBZ', 'NNP'] oracle = GenOracle.from_tree(Tree.fromstring(s)) assert isinstance(oracle, GenOracle) assert oracle.actions == expected_actions assert oracle.words == expected_words assert oracle.pos_tags == expected_pos_tags
Example #4
Source File: stanford_sentiment_tree_bank.py From magnitude with MIT License | 6 votes |
def _read(self, file_path): with open(cached_path(file_path), u"r") as data_file: logger.info(u"Reading instances from lines in file at: %s", file_path) for line in data_file.readlines(): line = line.strip(u"\n") if not line: continue parsed_line = Tree.fromstring(line) if self._use_subtrees: for subtree in parsed_line.subtrees(): instance = self.text_to_instance(subtree.leaves(), subtree.label()) if instance is not None: yield instance else: instance = self.text_to_instance(parsed_line.leaves(), parsed_line.label()) if instance is not None: yield instance #overrides
Example #5
Source File: smt.py From PyTorch-NLP with BSD 3-Clause "New" or "Revised" License | 6 votes |
def parse_tree(data, subtrees=False, fine_grained=False): # https://github.com/pytorch/text/blob/6476392a801f51794c90378dd23489578896c6f2/torchtext/data/example.py#L56 try: from nltk.tree import Tree except ImportError: print("Please install NLTK. " "See the docs at http://nltk.org for more information.") raise tree = Tree.fromstring(data) if subtrees: return [{ 'text': ' '.join(t.leaves()), 'label': get_label_str(t.label(), fine_grained=fine_grained) } for t in tree.subtrees()] return { 'text': ' '.join(tree.leaves()), 'label': get_label_str(tree.label(), fine_grained=fine_grained) }
Example #6
Source File: bracket_parse.py From razzy-spinner with GNU General Public License v3.0 | 6 votes |
def _parse(self, t): try: return Tree.fromstring(self._normalize(t)) except ValueError as e: sys.stderr.write("Bad tree detected; trying to recover...\n") # Try to recover, if we can: if e.args == ('mismatched parens',): for n in range(1, 5): try: v = Tree(self._normalize(t+')'*n)) sys.stderr.write(" Recovered by adding %d close " "paren(s)\n" % n) return v except ValueError: pass # Try something else: sys.stderr.write(" Recovered by returning a flat parse.\n") #sys.stderr.write(' '.join(t.split())+'\n') return Tree('S', self._tag(t))
Example #7
Source File: bracket_parse.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International | 6 votes |
def _parse(self, t): try: return Tree.fromstring(self._normalize(t)) except ValueError as e: sys.stderr.write("Bad tree detected; trying to recover...\n") # Try to recover, if we can: if e.args == ('mismatched parens',): for n in range(1, 5): try: v = Tree(self._normalize(t + ')' * n)) sys.stderr.write( " Recovered by adding %d close " "paren(s)\n" % n ) return v except ValueError: pass # Try something else: sys.stderr.write(" Recovered by returning a flat parse.\n") # sys.stderr.write(' '.join(t.split())+'\n') return Tree('S', self._tag(t))
Example #8
Source File: corenlp.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International | 5 votes |
def make_tree(self, result): return Tree.fromstring(result['parse'])
Example #9
Source File: stanford.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International | 5 votes |
def _make_tree(self, result): return Tree.fromstring(result)
Example #10
Source File: bllip.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International | 5 votes |
def _scored_parse_to_nltk_tree(scored_parse): return Tree.fromstring(str(scored_parse.ptb_parse))
Example #11
Source File: treeprettyprinter.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International | 5 votes |
def test(): """Do some tree drawing tests.""" def print_tree(n, tree, sentence=None, ansi=True, **xargs): print() print('{0}: "{1}"'.format(n, ' '.join(sentence or tree.leaves()))) print(tree) print() drawtree = TreePrettyPrinter(tree, sentence) try: print(drawtree.text(unicodelines=ansi, ansi=ansi, **xargs)) except (UnicodeDecodeError, UnicodeEncodeError): print(drawtree.text(unicodelines=False, ansi=False, **xargs)) from nltk.corpus import treebank for n in [0, 1440, 1591, 2771, 2170]: tree = treebank.parsed_sents()[n] print_tree(n, tree, nodedist=2, maxwidth=8) print() print('ASCII version:') print(TreePrettyPrinter(tree).text(nodedist=2)) tree = Tree.fromstring( '(top (punct 8) (smain (noun 0) (verb 1) (inf (verb 5) (inf (verb 6) ' '(conj (inf (pp (prep 2) (np (det 3) (noun 4))) (verb 7)) (inf (verb 9)) ' '(vg 10) (inf (verb 11)))))) (punct 12))', read_leaf=int, ) sentence = ( 'Ze had met haar moeder kunnen gaan winkelen ,' ' zwemmen of terrassen .'.split() ) print_tree('Discontinuous tree', tree, sentence, nodedist=2)
Example #12
Source File: tree.py From DPLP with MIT License | 5 votes |
def parse(self): """ Get parse tree in string format For visualization, use nltk.tree: from nltk.tree import Tree t = Tree.fromstring(parse) t.draw() """ parse = getparse(self.tree, "") return parse
Example #13
Source File: parser.py From Lango with GNU General Public License v2.0 | 5 votes |
def _make_tree(self, result): return Tree.fromstring(result)
Example #14
Source File: data_helpers.py From acl2017-interactive_summarizer with Apache License 2.0 | 5 votes |
def get_parse_info(parsestr, stemmer, language, stoplist): hash_token_pos = OrderedDict() if language=='german': grammar = r""" NBAR: {<N.*|ADJ.*>*<N.*>} # Nouns and Adjectives, terminated with Nouns VP: {<V.*>} # terminated with Verbs NP: {<NBAR>} {<NBAR><APPR><NBAR>} # Above, connected with in/of/etc... """ if language=='english': #Taken from Su Nam Kim Paper... grammar = r""" NBAR: {<NN.*|JJ>*<NN.*>} # Nouns and Adjectives, terminated with Nouns VP: {<V.*>} # terminated with Verbs NP: {<NBAR>} {<NBAR><IN><NBAR>} # Above, connected with in/of/etc... """ chunker = RegexpParser(grammar) postoks = [] for i in Tree.fromstring(parsestr).subtrees(): if i.height() == 2: word, pos = i[0], i.label() hash_token_pos[stemmer.stem(word)] = word + u"::" + pos postoks.append((word, pos)) chunk_tree = chunker.parse(postoks) phrases = get_terms(chunk_tree, stemmer, stoplist) phrase_list = [ ' '.join(term) for term in phrases if term] return hash_token_pos, phrase_list
Example #15
Source File: example.py From pytorch-nlp with MIT License | 5 votes |
def fromtree(cls, data, fields, subtrees=False): try: from nltk.tree import Tree except ImportError: print("Please install NLTK. " "See the docs at http://nltk.org for more information.") raise tree = Tree.fromstring(data) if subtrees: return [cls.fromlist( [' '.join(t.leaves()), t.label()], fields) for t in tree.subtrees()] return cls.fromlist([' '.join(tree.leaves()), tree.label()], fields)
Example #16
Source File: test_utils.py From pytorch-rnng with MIT License | 5 votes |
def test_add_dummy_pos(): s = '(S (NP John) (VP loves (NP Mary)))' expected = '(S (NP (XX John)) (VP (XX loves) (NP (XX Mary))))' tree = Tree.fromstring(s) assert str(add_dummy_pos(tree)) == expected
Example #17
Source File: get_np_terminals.py From 4lang with MIT License | 5 votes |
def get_np_terminals(): seen = set() with open(sys.argv[1]) as trees: for tree in trees: tree = Tree.fromstring(tree) pos_list = tree.pos() for pos in pos_list: if pos not in seen: seen.add(pos) #print(seen) for tuples in seen: print TEMPLATE.format(tuples[1], tuples[0])
Example #18
Source File: pdtb2.py From Deep_Enhanced_Repr_for_IDRR with MIT License | 5 votes |
def __process_trees(self, s): """ Input a string representing Penn parsetrees, delimited by ||| Value: A list of NLTK Tree objects. """ if not s: return [] tree_strs = s.split("|||") return [Tree.fromstring(s) for s in tree_strs] ######################################################################
Example #19
Source File: example.py From text with BSD 3-Clause "New" or "Revised" License | 5 votes |
def fromtree(cls, data, fields, subtrees=False): try: from nltk.tree import Tree except ImportError: print("Please install NLTK. " "See the docs at http://nltk.org for more information.") raise tree = Tree.fromstring(data) if subtrees: return [cls.fromlist( [' '.join(t.leaves()), t.label()], fields) for t in tree.subtrees()] return cls.fromlist([' '.join(tree.leaves()), tree.label()], fields)
Example #20
Source File: penn_tree_bank_reader_test.py From magnitude with MIT License | 5 votes |
def test_get_gold_spans_correctly_extracts_spans(self): ptb_reader = PennTreeBankConstituencySpanDatasetReader() tree = Tree.fromstring(u"(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))") span_dict = {} ptb_reader._get_gold_spans(tree, 0, span_dict) spans = list(span_dict.items()) # pylint: disable=protected-access assert spans == [((0, 1), u'NP'), ((3, 4), u'NP'), ((2, 4), u'VP'), ((0, 4), u'S')]
Example #21
Source File: stanford.py From razzy-spinner with GNU General Public License v3.0 | 5 votes |
def _make_tree(self, result): return Tree.fromstring(result)
Example #22
Source File: bllip.py From razzy-spinner with GNU General Public License v3.0 | 5 votes |
def _scored_parse_to_nltk_tree(scored_parse): return Tree.fromstring(str(scored_parse.ptb_parse))
Example #23
Source File: example.py From decaNLP with BSD 3-Clause "New" or "Revised" License | 5 votes |
def fromtree(cls, data, fields, subtrees=False): try: from nltk.tree import Tree except ImportError: print("Please install NLTK. " "See the docs at http://nltk.org for more information.") raise tree = Tree.fromstring(data) if subtrees: return [cls.fromlist( [' '.join(t.leaves()), t.label()], fields) for t in tree.subtrees()] return cls.fromlist([' '.join(tree.leaves()), tree.label()], fields)
Example #24
Source File: pdtb2.py From pdtb2 with GNU General Public License v2.0 | 5 votes |
def __process_trees(self, s): """ Input a string representing Penn parsetrees, delimited by ||| Value: A list of NLTK Tree objects. """ if not s: return [] tree_strs = s.split("|||") return [Tree.fromstring(s) for s in tree_strs] ######################################################################
Example #25
Source File: convert2graph.py From structured-neural-summarization with MIT License | 5 votes |
def parse_tree_to_sentence(parse_tree:str)-> List[str]: return Tree.fromstring(parse_tree).leaves()
Example #26
Source File: penn_tree_bank_reader_test.py From magnitude with MIT License | 5 votes |
def test_strip_functional_tags(self): ptb_reader = PennTreeBankConstituencySpanDatasetReader() # Get gold spans should strip off all the functional tags. tree = Tree.fromstring(u"(S (NP=PRP (D the) (N dog)) (VP-0 (V chased) (NP|FUN-TAGS (D the) (N cat))))") ptb_reader._strip_functional_tags(tree) assert tree == Tree.fromstring(u"(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
Example #27
Source File: format_tree.py From 4lang with MIT License | 5 votes |
def format_tree(): with open(sys.argv[1]) as np_doc: for line in np_doc: np = Tree.fromstring(line) print_tree(np) print()
Example #28
Source File: preprocessing.py From camr with GNU General Public License v2.0 | 5 votes |
def _load_cparse(cparse_filename): ''' load the constituent parse tree ''' from nltk.tree import Tree ctree_list = [] with codecs.open(cparse_filename,'r',encoding='utf-8') as cf: for line in cf: ctree_list.append(Tree.fromstring(line.strip())) return ctree_list
Example #29
Source File: fix_multi_sent.py From camr with GNU General Public License v2.0 | 5 votes |
def fix_multi_sent(line): tree = Tree.fromstring(line) if len(tree) > 1: newtree = Tree('S1',[Tree('S',tree[:])]) else: newtree = tree return re.sub('\n\s*',' ',newtree.__str__())
Example #30
Source File: sort_nps_depth.py From 4lang with MIT License | 5 votes |
def sort_nps(): with open(sys.argv[1]) as np_doc: for line in np_doc: np = Tree.fromstring(line) print(np.height(), line, end ="")