Python nltk.Tree() Examples

The following are code examples for showing how to use nltk.Tree(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: controcurator   Author: ControCurator   File: summarization.py    MIT License 7 votes vote down vote up
def extract_nltk(comment):
    body = comment['text']
    entities = {}
    sentences = nltk.sent_tokenize(body)
    print(sentences)
    for sentence in sentences:

        words = nltk.word_tokenize(sentence)
        tagged = nltk.pos_tag(words)
        chunks = nltk.ne_chunk(tagged)
        for chunk in chunks:
            if type(chunk) is nltk.Tree:
              t = ''.join(c[0] for c in chunk.leaves())
              entities[t] = chunk.label()
    #print entities
    return entities 
Example 2
Project: razzy-spinner   Author: rafasashi   File: regexp.py    GNU General Public License v3.0 6 votes vote down vote up
def parse(self, chunk_struct, trace=None):
        """
        Apply the chunk parser to this input.

        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
            (this tree is modified, and is also returned)
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :return: the chunked output.
        :rtype: Tree
        """
        if trace is None: trace = self._trace
        for i in range(self._loop):
            for parser in self._stages:
                chunk_struct = parser.parse(chunk_struct, trace=trace)
        return chunk_struct 
Example 3
Project: OpenBottle   Author: xiaozhuchacha   File: regexp.py    MIT License 6 votes vote down vote up
def parse(self, chunk_struct, trace=None):
        """
        Apply the chunk parser to this input.

        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
            (this tree is modified, and is also returned)
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :return: the chunked output.
        :rtype: Tree
        """
        if trace is None: trace = self._trace
        for i in range(self._loop):
            for parser in self._stages:
                chunk_struct = parser.parse(chunk_struct, trace=trace)
        return chunk_struct 
Example 4
Project: OpenBottle   Author: xiaozhuchacha   File: regexp.py    MIT License 6 votes vote down vote up
def parse(self, chunk_struct, trace=None):
        """
        Apply the chunk parser to this input.

        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
            (this tree is modified, and is also returned)
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :return: the chunked output.
        :rtype: Tree
        """
        if trace is None: trace = self._trace
        for i in range(self._loop):
            for parser in self._stages:
                chunk_struct = parser.parse(chunk_struct, trace=trace)
        return chunk_struct 
Example 5
Project: Health-Checker   Author: KriAga   File: regexp.py    MIT License 6 votes vote down vote up
def parse(self, chunk_struct, trace=None):
        """
        Apply the chunk parser to this input.

        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
            (this tree is modified, and is also returned)
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :return: the chunked output.
        :rtype: Tree
        """
        if trace is None: trace = self._trace
        for i in range(self._loop):
            for parser in self._stages:
                chunk_struct = parser.parse(chunk_struct, trace=trace)
        return chunk_struct 
Example 6
Project: gap   Author: sattree   File: utils.py    MIT License 6 votes vote down vote up
def get_edges_in_tree(parent, leaves=[], path='', edges=[], lrb_rrb_fix=False):
    for i, node in enumerate(parent):
        if type(node) is nltk.Tree:
            from_node = path
            to_node = '{}-{}-{}'.format(path, node.label(), i)
            edges.append((from_node, to_node))

            if lrb_rrb_fix:
                if node.label() == '-LRB-':
                    leaves.append('(')
                if node.label() == '-RRB-':
                    leaves.append(')')

            edges, leaves = get_edges_in_tree(node, leaves, to_node, edges)
        else:
            from_node = path
            to_node = '{}-{}'.format(node, len(leaves))
            edges.append((from_node, to_node))
            leaves.append(node)
    return edges, leaves 
Example 7
Project: self-attentive-parser   Author: nikitakit   File: nltk_plugin.py    MIT License 6 votes vote down vote up
def parse_sents(self, sents):
        """
        Parse multiple sentences

        If "sents" is a string, it will be segmented into sentences using NLTK.
        Otherwise, each element of "sents" will be treated as a sentence.

        sents (str or Iterable[str] or Iterable[List[str]]): sentences to parse

        Returns: Iter[nltk.Tree]
        """
        if isinstance(sents, STRING_TYPES):
            if self._tokenizer_lang is None:
                raise ValueError(
                    "No tokenizer available for this language. "
                    "Please split into individual sentences and tokens "
                    "before calling the parser."
                    )
            sents = nltk.sent_tokenize(sents, self._tokenizer_lang)

        for parse_raw, tags_raw, sentence in self._batched_parsed_raw(self._nltk_process_sents(sents)):
            yield self._make_nltk_tree(sentence, tags_raw, *parse_raw) 
Example 8
Project: controcurator   Author: ControCurator   File: test.py    MIT License 6 votes vote down vote up
def processLanguage():
    try:
        for item in contentArray:
            tokenized = nltk.word_tokenize(item)
            tagged = nltk.pos_tag(tokenized)
            #print tagged
 
            namedEnt = nltk.ne_chunk(tagged)
            for s in namedEnt:
            	if type(s) is nltk.Tree:
					t = ' '.join(c[0] for c in s.leaves())
					print s.label(), t
            time.sleep(1)
 
    except Exception, e:
        print str(e) 
Example 9
Project: FancyWord   Author: EastonLee   File: regexp.py    GNU General Public License v3.0 6 votes vote down vote up
def parse(self, chunk_struct, trace=None):
        """
        Apply the chunk parser to this input.

        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
            (this tree is modified, and is also returned)
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :return: the chunked output.
        :rtype: Tree
        """
        if trace is None: trace = self._trace
        for i in range(self._loop):
            for parser in self._stages:
                chunk_struct = parser.parse(chunk_struct, trace=trace)
        return chunk_struct 
Example 10
Project: nltk-on-gae   Author: sivu22   File: regexp.py    Apache License 2.0 6 votes vote down vote up
def parse(self, chunk_struct, trace=None):
        """
        Apply the chunk parser to this input.

        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
            (this tree is modified, and is also returned)
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :return: the chunked output.
        :rtype: Tree
        """
        if trace is None: trace = self._trace
        for i in range(self._loop):
            for parser in self._stages:
                chunk_struct = parser.parse(chunk_struct, trace=trace)
        return chunk_struct 
Example 11
Project: FAParser   Author: baoy-nlp   File: helper.py    GNU General Public License v3.0 6 votes vote down vote up
def process_NONE(tree):
    if isinstance(tree, nltk.Tree):
        label = tree.label()
        if label == '-NONE-':
            return None
        else:
            tr = []
            for node in tree:
                new_node = process_NONE(node)
                if new_node is not None:
                    tr.append(new_node)
            if tr == []:
                return None
            else:
                return nltk.Tree(label, tr)
    else:
        return tree 
Example 12
Project: FAParser   Author: baoy-nlp   File: helper.py    GNU General Public License v3.0 6 votes vote down vote up
def get_brackets(tree, start_idx=0, root=False):
    assert isinstance(tree, nltk.Tree)
    label = tree.label()
    label = label.replace('ADVP', 'PRT')

    brackets = set()
    if isinstance(tree[0], nltk.Tree):
        end_idx = start_idx
        for node in tree:
            node_brac, next_idx = get_brackets(node, end_idx)
            brackets.update(node_brac)
            end_idx = next_idx
        if not root:
            brackets.add((start_idx, end_idx, label))
    else:
        end_idx = start_idx + 1

    return brackets, end_idx 
Example 13
Project: FAParser   Author: baoy-nlp   File: distance_helper.py    GNU General Public License v3.0 6 votes vote down vote up
def process_NONE(tree):
    if isinstance(tree, nltk.Tree):
        label = tree.label()
        if label == '-NONE-':
            return None
        else:
            tr = []
            for node in tree:
                new_node = process_NONE(node)
                if new_node is not None:
                    tr.append(new_node)
            if tr == []:
                return None
            else:
                return nltk.Tree(label, tr)
    else:
        return tree 
Example 14
Project: FAParser   Author: baoy-nlp   File: distance_helper.py    GNU General Public License v3.0 6 votes vote down vote up
def get_brackets(tree, start_idx=0, root=False):
    assert isinstance(tree, nltk.Tree)
    label = tree.label()
    label = label.replace('ADVP', 'PRT')

    brackets = set()
    if isinstance(tree[0], nltk.Tree):
        end_idx = start_idx
        for node in tree:
            node_brac, next_idx = get_brackets(node, end_idx)
            brackets.update(node_brac)
            end_idx = next_idx
        if not root:
            brackets.add((start_idx, end_idx, label))
    else:
        end_idx = start_idx + 1

    return brackets, end_idx 
Example 15
Project: luscan-devel   Author: blackye   File: regexp.py    GNU General Public License v2.0 6 votes vote down vote up
def parse(self, chunk_struct, trace=None):
        """
        Apply the chunk parser to this input.

        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
            (this tree is modified, and is also returned)
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :return: the chunked output.
        :rtype: Tree
        """
        if trace is None: trace = self._trace
        for i in range(self._loop):
            for parser in self._stages:
                chunk_struct = parser.parse(chunk_struct, trace=trace)
        return chunk_struct 
Example 16
Project: extract_phrase   Author: yardstick17   File: chunker.py    MIT License 6 votes vote down vote up
def extract_rule_and_chunk(self, chunked_tree: nltk.Tree) -> dict:
        def recursively_get_pos_only(tree, collector_list=None, depth_limit=100):
            if collector_list is None:
                collector_list = []
            if depth_limit <= 0:
                return collector_list
            for subtree in tree:
                if isinstance(subtree, nltk.Tree):
                    recursively_get_pos_only(subtree, collector_list, depth_limit - 1)
                else:
                    collector_list.append(subtree)
            return collector_list

        def get_pos_tagged_and_append_to_chunk_dict(chunk_dict, subtrees):  # params can be removed now
            pos_tagged = recursively_get_pos_only(subtrees)
            chunk_dict[subtrees.label()].append(pos_tagged)

        chunk_dict = nltk.defaultdict(list)
        for subtrees in chunked_tree:
            if isinstance(subtrees, nltk.Tree):
                get_pos_tagged_and_append_to_chunk_dict(chunk_dict, subtrees)
                for sub in subtrees:
                    if isinstance(sub, nltk.Tree):
                        get_pos_tagged_and_append_to_chunk_dict(chunk_dict, sub)
        return chunk_dict 
Example 17
Project: tetre   Author: aoldoni   File: tree_utils.py    MIT License 6 votes vote down vote up
def to_nltk_tree_general(node, attr_list=("dep_", "pos_"), level=99999):
    """Tranforms a Spacy dependency tree into an NLTK tree, with certain spacy tree node attributes serving
    as parts of the NLTK tree node label content for uniqueness.

    Args:
        node: The starting node from the tree in which the transformation will occur.
        attr_list: Which attributes from the Spacy nodes will be included in the NLTK node label.
        level: The maximum depth of the tree.

    Returns:
        A NLTK Tree (nltk.tree)
    """

    # transforms attributes in a node representation
    value_list = [getattr(node, attr) for attr in attr_list]
    node_representation = "/".join(value_list)

    if level == 0:
        return node_representation

    if node.n_lefts + node.n_rights > 0:
        return Tree(node_representation, [to_nltk_tree_general(child, attr_list, level-1) for child in node.children])
    else:
        return node_representation 
Example 18
Project: tetre   Author: aoldoni   File: tree_utils.py    MIT License 6 votes vote down vote up
def get_node_representation(tetre_format, token):
    """Given a format and a SpaCy node (spacy.token), returns this node representation using the NLTK tree (nltk.tree).
    It recursivelly builds a NLTK tree and returns it, not only the node itself.

    Args:
        tetre_format: The attributes of this node that will be part of its string representation.
        token: The SpaCy node itself (spacy.token).

    Returns:
        A NLTK Tree (nltk.tree)
    """

    params = tetre_format.split(",")
    node_representation = token.pos_

    if token.n_lefts + token.n_rights > 0:
        tree = Tree(node_representation,
                    [to_nltk_tree_general(child, attr_list=params, level=0) for child in token.children])
    else:
        tree = Tree(node_representation, [])

    return tree 
Example 19
Project: tetre   Author: aoldoni   File: tree_utils.py    MIT License 6 votes vote down vote up
def nltk_tree_to_qtree(tree):
    """Transforms a NLTK Tree in a QTREE. A QTREE is a string representation of a tree.

    For details, please see: http://www.ling.upenn.edu/advice/latex/qtree/qtreenotes.pdf

    Args:
        tree: The NLTK Tree (nltk.tree).

    Returns:
        A string with the QTREE representation of the NLTK Tree (nltk.tree).
    """
    self_result = " [ "

    if isinstance(tree, Tree):
        self_result += " " + tree.label() + " "

        if len(tree) > 0:
            self_result += " ".join([nltk_tree_to_qtree(node) for node in sorted(tree)])

    else:
        self_result += " " + str(tree) + " "

    self_result += " ] "

    return self_result 
Example 20
Project: allennlp-semparse   Author: allenai   File: domain_language.py    Apache License 2.0 6 votes vote down vote up
def nltk_tree_to_logical_form(tree: Tree) -> str:
    """
    Given an ``nltk.Tree`` representing the syntax tree that generates a logical form, this method
    produces the actual (lisp-like) logical form, with all of the non-terminal symbols converted
    into the correct number of parentheses.

    This is used in the logic that converts action sequences back into logical forms.  It's very
    unlikely that you will need this anywhere else.
    """
    # nltk.Tree actually inherits from `list`, so you use `len()` to get the number of children.
    # We're going to be explicit about checking length, instead of using `if tree:`, just to avoid
    # any funny business nltk might have done (e.g., it's really odd if `if tree:` evaluates to
    # `False` if there's a single leaf node with no children).
    if len(tree) == 0:
        return tree.label()
    if len(tree) == 1:
        return tree[0].label()
    return "(" + " ".join(nltk_tree_to_logical_form(child) for child in tree) + ")" 
Example 21
Project: honours_project   Author: JFriel   File: regexp.py    GNU General Public License v3.0 6 votes vote down vote up
def parse(self, chunk_struct, trace=None):
        """
        Apply the chunk parser to this input.

        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
            (this tree is modified, and is also returned)
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :return: the chunked output.
        :rtype: Tree
        """
        if trace is None: trace = self._trace
        for i in range(self._loop):
            for parser in self._stages:
                chunk_struct = parser.parse(chunk_struct, trace=trace)
        return chunk_struct 
Example 22
Project: pcfg-sampling   Author: wilkeraziz   File: parse.py    Apache License 2.0 5 votes vote down vote up
def make_nltk_tree(derivation):
    """
    Recursively constructs an nlt Tree from a list of rules.
    @param top: index to the top rule (0 and -1 are the most common values)
    """
    d = defaultdict(None, ((r.lhs, r) for r in derivation))

    def make_tree(sym):
        r = d[sym]
        return Tree(str(r.lhs), (str(child) if child not in d else make_tree(child) for child in r.rhs))
    return make_tree(derivation[0].lhs) 
Example 23
Project: pcfg-sampling   Author: wilkeraziz   File: mcmcparse.py    Apache License 2.0 5 votes vote down vote up
def make_nltk_tree(derivation):
    """
    Recursively constructs an nlt Tree from a list of rules.
    @param top: index to the top rule (0 and -1 are the most common values)
    """
    d = defaultdict(None, ((r.lhs, r) for r in derivation))

    def make_tree(sym):
        r = d[sym]
        return Tree(str(r.lhs), (str(child) if child not in d else make_tree(child) for child in r.rhs))
    return make_tree(derivation[0].lhs) 
Example 24
Project: razzy-spinner   Author: rafasashi   File: regexp.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, chunk_struct, debug_level=1):
        """
        Construct a new ``ChunkString`` that encodes the chunking of
        the text ``tagged_tokens``.

        :type chunk_struct: Tree
        :param chunk_struct: The chunk structure to be further chunked.
        :type debug_level: int
        :param debug_level: The level of debugging which should be
            applied to transformations on the ``ChunkString``.  The
            valid levels are:
                - 0: no checks
                - 1: full check on to_chunkstruct
                - 2: full check on to_chunkstruct and cursory check after
                   each transformation.
                - 3: full check on to_chunkstruct and full check after
                   each transformation.
            We recommend you use at least level 1.  You should
            probably use level 3 if you use any non-standard
            subclasses of ``RegexpChunkRule``.
        """
        self._root_label = chunk_struct.label()
        self._pieces = chunk_struct[:]
        tags = [self._tag(tok) for tok in self._pieces]
        self._str = '<' + '><'.join(tags) + '>'
        self._debug = debug_level 
Example 25
Project: razzy-spinner   Author: rafasashi   File: regexp.py    GNU General Public License v3.0 5 votes vote down vote up
def _tag(self, tok):
        if isinstance(tok, tuple):
            return tok[1]
        elif isinstance(tok, Tree):
            return tok.label()
        else:
            raise ValueError('chunk structures must contain tagged '
                             'tokens or trees') 
Example 26
Project: razzy-spinner   Author: rafasashi   File: regexp.py    GNU General Public License v3.0 5 votes vote down vote up
def to_chunkstruct(self, chunk_label='CHUNK'):
        """
        Return the chunk structure encoded by this ``ChunkString``.

        :rtype: Tree
        :raise ValueError: If a transformation has generated an
            invalid chunkstring.
        """
        if self._debug > 0: self._verify(self._str, 1)

        # Use this alternating list to create the chunkstruct.
        pieces = []
        index = 0
        piece_in_chunk = 0
        for piece in re.split('[{}]', self._str):

            # Find the list of tokens contained in this piece.
            length = piece.count('<')
            subsequence = self._pieces[index:index+length]

            # Add this list of tokens to our pieces.
            if piece_in_chunk:
                pieces.append(Tree(chunk_label, subsequence))
            else:
                pieces += subsequence

            # Update index, piece_in_chunk
            index += length
            piece_in_chunk = not piece_in_chunk

        return Tree(self._root_label, pieces) 
Example 27
Project: razzy-spinner   Author: rafasashi   File: regexp.py    GNU General Public License v3.0 5 votes vote down vote up
def parse(self, chunk_struct, trace=None):
        """
        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :rtype: Tree
        :return: a chunk structure that encodes the chunks in a given
            tagged sentence.  A chunk is a non-overlapping linguistic
            group, such as a noun phrase.  The set of chunks
            identified in the chunk structure depends on the rules
            used to define this ``RegexpChunkParser``.
        """
        if len(chunk_struct) == 0:
            print('Warning: parsing empty text')
            return Tree(self._root_label, [])

        try:
            chunk_struct.label()
        except AttributeError:
            chunk_struct = Tree(self._root_label, chunk_struct)

        # Use the default trace value?
        if trace is None: trace = self._trace

        chunkstr = ChunkString(chunk_struct)

        # Apply the sequence of rules to the chunkstring.
        if trace:
            verbose = (trace>1)
            self._trace_apply(chunkstr, verbose)
        else:
            self._notrace_apply(chunkstr)

        # Use the chunkstring to create a chunk structure.
        return chunkstr.to_chunkstruct(self._chunk_label) 
Example 28
Project: OpenBottle   Author: xiaozhuchacha   File: regexp.py    MIT License 5 votes vote down vote up
def __init__(self, chunk_struct, debug_level=1):
        """
        Construct a new ``ChunkString`` that encodes the chunking of
        the text ``tagged_tokens``.

        :type chunk_struct: Tree
        :param chunk_struct: The chunk structure to be further chunked.
        :type debug_level: int
        :param debug_level: The level of debugging which should be
            applied to transformations on the ``ChunkString``.  The
            valid levels are:
                - 0: no checks
                - 1: full check on to_chunkstruct
                - 2: full check on to_chunkstruct and cursory check after
                   each transformation.
                - 3: full check on to_chunkstruct and full check after
                   each transformation.
            We recommend you use at least level 1.  You should
            probably use level 3 if you use any non-standard
            subclasses of ``RegexpChunkRule``.
        """
        self._root_label = chunk_struct.label()
        self._pieces = chunk_struct[:]
        tags = [self._tag(tok) for tok in self._pieces]
        self._str = '<' + '><'.join(tags) + '>'
        self._debug = debug_level 
Example 29
Project: OpenBottle   Author: xiaozhuchacha   File: regexp.py    MIT License 5 votes vote down vote up
def _tag(self, tok):
        if isinstance(tok, tuple):
            return tok[1]
        elif isinstance(tok, Tree):
            return tok.label()
        else:
            raise ValueError('chunk structures must contain tagged '
                             'tokens or trees') 
Example 30
Project: OpenBottle   Author: xiaozhuchacha   File: regexp.py    MIT License 5 votes vote down vote up
def to_chunkstruct(self, chunk_label='CHUNK'):
        """
        Return the chunk structure encoded by this ``ChunkString``.

        :rtype: Tree
        :raise ValueError: If a transformation has generated an
            invalid chunkstring.
        """
        if self._debug > 0: self._verify(self._str, 1)

        # Use this alternating list to create the chunkstruct.
        pieces = []
        index = 0
        piece_in_chunk = 0
        for piece in re.split('[{}]', self._str):

            # Find the list of tokens contained in this piece.
            length = piece.count('<')
            subsequence = self._pieces[index:index+length]

            # Add this list of tokens to our pieces.
            if piece_in_chunk:
                pieces.append(Tree(chunk_label, subsequence))
            else:
                pieces += subsequence

            # Update index, piece_in_chunk
            index += length
            piece_in_chunk = not piece_in_chunk

        return Tree(self._root_label, pieces) 
Example 31
Project: OpenBottle   Author: xiaozhuchacha   File: regexp.py    MIT License 5 votes vote down vote up
def parse(self, chunk_struct, trace=None):
        """
        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :rtype: Tree
        :return: a chunk structure that encodes the chunks in a given
            tagged sentence.  A chunk is a non-overlapping linguistic
            group, such as a noun phrase.  The set of chunks
            identified in the chunk structure depends on the rules
            used to define this ``RegexpChunkParser``.
        """
        if len(chunk_struct) == 0:
            print('Warning: parsing empty text')
            return Tree(self._root_label, [])

        try:
            chunk_struct.label()
        except AttributeError:
            chunk_struct = Tree(self._root_label, chunk_struct)

        # Use the default trace value?
        if trace is None: trace = self._trace

        chunkstr = ChunkString(chunk_struct)

        # Apply the sequence of rules to the chunkstring.
        if trace:
            verbose = (trace>1)
            self._trace_apply(chunkstr, verbose)
        else:
            self._notrace_apply(chunkstr)

        # Use the chunkstring to create a chunk structure.
        return chunkstr.to_chunkstruct(self._chunk_label) 
Example 32
Project: OpenBottle   Author: xiaozhuchacha   File: regexp.py    MIT License 5 votes vote down vote up
def __init__(self, chunk_struct, debug_level=1):
        """
        Construct a new ``ChunkString`` that encodes the chunking of
        the text ``tagged_tokens``.

        :type chunk_struct: Tree
        :param chunk_struct: The chunk structure to be further chunked.
        :type debug_level: int
        :param debug_level: The level of debugging which should be
            applied to transformations on the ``ChunkString``.  The
            valid levels are:
                - 0: no checks
                - 1: full check on to_chunkstruct
                - 2: full check on to_chunkstruct and cursory check after
                   each transformation.
                - 3: full check on to_chunkstruct and full check after
                   each transformation.
            We recommend you use at least level 1.  You should
            probably use level 3 if you use any non-standard
            subclasses of ``RegexpChunkRule``.
        """
        self._root_label = chunk_struct.label()
        self._pieces = chunk_struct[:]
        tags = [self._tag(tok) for tok in self._pieces]
        self._str = '<' + '><'.join(tags) + '>'
        self._debug = debug_level 
Example 33
Project: OpenBottle   Author: xiaozhuchacha   File: regexp.py    MIT License 5 votes vote down vote up
def _tag(self, tok):
        if isinstance(tok, tuple):
            return tok[1]
        elif isinstance(tok, Tree):
            return tok.label()
        else:
            raise ValueError('chunk structures must contain tagged '
                             'tokens or trees') 
Example 34
Project: OpenBottle   Author: xiaozhuchacha   File: regexp.py    MIT License 5 votes vote down vote up
def to_chunkstruct(self, chunk_label='CHUNK'):
        """
        Return the chunk structure encoded by this ``ChunkString``.

        :rtype: Tree
        :raise ValueError: If a transformation has generated an
            invalid chunkstring.
        """
        if self._debug > 0: self._verify(self._str, 1)

        # Use this alternating list to create the chunkstruct.
        pieces = []
        index = 0
        piece_in_chunk = 0
        for piece in re.split('[{}]', self._str):

            # Find the list of tokens contained in this piece.
            length = piece.count('<')
            subsequence = self._pieces[index:index+length]

            # Add this list of tokens to our pieces.
            if piece_in_chunk:
                pieces.append(Tree(chunk_label, subsequence))
            else:
                pieces += subsequence

            # Update index, piece_in_chunk
            index += length
            piece_in_chunk = not piece_in_chunk

        return Tree(self._root_label, pieces) 
Example 35
Project: OpenBottle   Author: xiaozhuchacha   File: regexp.py    MIT License 5 votes vote down vote up
def parse(self, chunk_struct, trace=None):
        """
        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :rtype: Tree
        :return: a chunk structure that encodes the chunks in a given
            tagged sentence.  A chunk is a non-overlapping linguistic
            group, such as a noun phrase.  The set of chunks
            identified in the chunk structure depends on the rules
            used to define this ``RegexpChunkParser``.
        """
        if len(chunk_struct) == 0:
            print('Warning: parsing empty text')
            return Tree(self._root_label, [])

        try:
            chunk_struct.label()
        except AttributeError:
            chunk_struct = Tree(self._root_label, chunk_struct)

        # Use the default trace value?
        if trace is None: trace = self._trace

        chunkstr = ChunkString(chunk_struct)

        # Apply the sequence of rules to the chunkstring.
        if trace:
            verbose = (trace>1)
            self._trace_apply(chunkstr, verbose)
        else:
            self._notrace_apply(chunkstr)

        # Use the chunkstring to create a chunk structure.
        return chunkstr.to_chunkstruct(self._chunk_label) 
Example 36
Project: Health-Checker   Author: KriAga   File: regexp.py    MIT License 5 votes vote down vote up
def __init__(self, chunk_struct, debug_level=1):
        """
        Construct a new ``ChunkString`` that encodes the chunking of
        the text ``tagged_tokens``.

        :type chunk_struct: Tree
        :param chunk_struct: The chunk structure to be further chunked.
        :type debug_level: int
        :param debug_level: The level of debugging which should be
            applied to transformations on the ``ChunkString``.  The
            valid levels are:
                - 0: no checks
                - 1: full check on to_chunkstruct
                - 2: full check on to_chunkstruct and cursory check after
                   each transformation.
                - 3: full check on to_chunkstruct and full check after
                   each transformation.
            We recommend you use at least level 1.  You should
            probably use level 3 if you use any non-standard
            subclasses of ``RegexpChunkRule``.
        """
        self._root_label = chunk_struct.label()
        self._pieces = chunk_struct[:]
        tags = [self._tag(tok) for tok in self._pieces]
        self._str = '<' + '><'.join(tags) + '>'
        self._debug = debug_level 
Example 37
Project: Health-Checker   Author: KriAga   File: regexp.py    MIT License 5 votes vote down vote up
def _tag(self, tok):
        if isinstance(tok, tuple):
            return tok[1]
        elif isinstance(tok, Tree):
            return tok.label()
        else:
            raise ValueError('chunk structures must contain tagged '
                             'tokens or trees') 
Example 38
Project: Health-Checker   Author: KriAga   File: regexp.py    MIT License 5 votes vote down vote up
def to_chunkstruct(self, chunk_label='CHUNK'):
        """
        Return the chunk structure encoded by this ``ChunkString``.

        :rtype: Tree
        :raise ValueError: If a transformation has generated an
            invalid chunkstring.
        """
        if self._debug > 0: self._verify(self._str, 1)

        # Use this alternating list to create the chunkstruct.
        pieces = []
        index = 0
        piece_in_chunk = 0
        for piece in re.split('[{}]', self._str):

            # Find the list of tokens contained in this piece.
            length = piece.count('<')
            subsequence = self._pieces[index:index+length]

            # Add this list of tokens to our pieces.
            if piece_in_chunk:
                pieces.append(Tree(chunk_label, subsequence))
            else:
                pieces += subsequence

            # Update index, piece_in_chunk
            index += length
            piece_in_chunk = not piece_in_chunk

        return Tree(self._root_label, pieces) 
Example 39
Project: Health-Checker   Author: KriAga   File: regexp.py    MIT License 5 votes vote down vote up
def parse(self, chunk_struct, trace=None):
        """
        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :rtype: Tree
        :return: a chunk structure that encodes the chunks in a given
            tagged sentence.  A chunk is a non-overlapping linguistic
            group, such as a noun phrase.  The set of chunks
            identified in the chunk structure depends on the rules
            used to define this ``RegexpChunkParser``.
        """
        if len(chunk_struct) == 0:
            print('Warning: parsing empty text')
            return Tree(self._root_label, [])

        try:
            chunk_struct.label()
        except AttributeError:
            chunk_struct = Tree(self._root_label, chunk_struct)

        # Use the default trace value?
        if trace is None: trace = self._trace

        chunkstr = ChunkString(chunk_struct)

        # Apply the sequence of rules to the chunkstring.
        if trace:
            verbose = (trace>1)
            self._trace_apply(chunkstr, verbose)
        else:
            self._notrace_apply(chunkstr)

        # Use the chunkstring to create a chunk structure.
        return chunkstr.to_chunkstruct(self._chunk_label) 
Example 40
Project: self-attentive-parser   Author: nikitakit   File: nltk_plugin.py    MIT License 5 votes vote down vote up
def _make_nltk_tree(self, sentence, tags, score, p_i, p_j, p_label):
        # The optimized cython decoder implementation doesn't actually
        # generate trees, only scores and span indices. When converting to a
        # tree, we assume that the indices follow a preorder traversal.
        last_splits = []

        # Python 2 doesn't support "nonlocal", so wrap idx in a list
        idx_cell = [-1]
        def make_tree():
            idx_cell[0] += 1
            idx = idx_cell[0]
            i, j, label_idx = p_i[idx], p_j[idx], p_label[idx]
            label = self._label_vocab[label_idx]
            if (i + 1) >= j:
                if self._provides_tags:
                    word = sentence[i]
                    tag = self._tag_vocab[tags[i]]
                else:
                    word, tag = sentence[i]
                tag = PTB_TOKEN_ESCAPE.get(tag, tag)
                word = PTB_TOKEN_ESCAPE.get(word, word)
                tree = Tree(tag, [word])
                for sublabel in label[::-1]:
                    tree = Tree(sublabel, [tree])
                return [tree]
            else:
                left_trees = make_tree()
                right_trees = make_tree()
                children = left_trees + right_trees
                if label:
                    tree = Tree(label[-1], children)
                    for sublabel in reversed(label[:-1]):
                        tree = Tree(sublabel, [tree])
                    return [tree]
                else:
                    return children

        tree = make_tree()[0]
        tree.score = score

        return tree 
Example 41
Project: self-attentive-parser   Author: nikitakit   File: nltk_plugin.py    MIT License 5 votes vote down vote up
def parse(self, sentence):
        """
        Parse a single sentence

        The argument "sentence" can be a list of tokens to be passed to the
        parser. It can also be a string, in which case the sentence will be
        tokenized using the default NLTK tokenizer.

        sentence (str or List[str]): sentence to parse

        Returns: nltk.Tree
        """
        return list(self.parse_sents([sentence]))[0] 
Example 42
Project: controcurator   Author: ControCurator   File: article.py    MIT License 5 votes vote down vote up
def ExtractNounPhrases(self, myTree, phrase):
	    myPhrases = []
	    if (myTree.label()==phrase):
	        myPhrases.append( myTree.copy(True) )
	    for child in myTree:
	        if (type(child) is Tree):
	            list_of_phrases = self.ExtractNounPhrases(child, phrase)
	            if (len(list_of_phrases) > 0):
	                myPhrases.extend(list_of_phrases)
	    return myPhrases 
Example 43
Project: controcurator   Author: ControCurator   File: article.py    MIT License 5 votes vote down vote up
def getNamedEntities(self):
		entities = {}
		chunks = nltk.ne_chunk(self.tagged)
		for chunk in chunks:
			if type(chunk) is nltk.Tree:
				t = ' '.join(c[0] for c in chunk.leaves())
				cat = chunk.label()
				entities[t] = cat
		return entities 
Example 44
Project: FancyWord   Author: EastonLee   File: regexp.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, chunk_struct, debug_level=1):
        """
        Construct a new ``ChunkString`` that encodes the chunking of
        the text ``tagged_tokens``.

        :type chunk_struct: Tree
        :param chunk_struct: The chunk structure to be further chunked.
        :type debug_level: int
        :param debug_level: The level of debugging which should be
            applied to transformations on the ``ChunkString``.  The
            valid levels are:
                - 0: no checks
                - 1: full check on to_chunkstruct
                - 2: full check on to_chunkstruct and cursory check after
                   each transformation.
                - 3: full check on to_chunkstruct and full check after
                   each transformation.
            We recommend you use at least level 1.  You should
            probably use level 3 if you use any non-standard
            subclasses of ``RegexpChunkRule``.
        """
        self._root_label = chunk_struct.label()
        self._pieces = chunk_struct[:]
        tags = [self._tag(tok) for tok in self._pieces]
        self._str = '<' + '><'.join(tags) + '>'
        self._debug = debug_level 
Example 45
Project: FancyWord   Author: EastonLee   File: regexp.py    GNU General Public License v3.0 5 votes vote down vote up
def _tag(self, tok):
        if isinstance(tok, tuple):
            return tok[1]
        elif isinstance(tok, Tree):
            return tok.label()
        else:
            raise ValueError('chunk structures must contain tagged '
                             'tokens or trees') 
Example 46
Project: FancyWord   Author: EastonLee   File: regexp.py    GNU General Public License v3.0 5 votes vote down vote up
def to_chunkstruct(self, chunk_label='CHUNK'):
        """
        Return the chunk structure encoded by this ``ChunkString``.

        :rtype: Tree
        :raise ValueError: If a transformation has generated an
            invalid chunkstring.
        """
        if self._debug > 0: self._verify(self._str, 1)

        # Use this alternating list to create the chunkstruct.
        pieces = []
        index = 0
        piece_in_chunk = 0
        for piece in re.split('[{}]', self._str):

            # Find the list of tokens contained in this piece.
            length = piece.count('<')
            subsequence = self._pieces[index:index+length]

            # Add this list of tokens to our pieces.
            if piece_in_chunk:
                pieces.append(Tree(chunk_label, subsequence))
            else:
                pieces += subsequence

            # Update index, piece_in_chunk
            index += length
            piece_in_chunk = not piece_in_chunk

        return Tree(self._root_label, pieces) 
Example 47
Project: FancyWord   Author: EastonLee   File: regexp.py    GNU General Public License v3.0 5 votes vote down vote up
def parse(self, chunk_struct, trace=None):
        """
        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :rtype: Tree
        :return: a chunk structure that encodes the chunks in a given
            tagged sentence.  A chunk is a non-overlapping linguistic
            group, such as a noun phrase.  The set of chunks
            identified in the chunk structure depends on the rules
            used to define this ``RegexpChunkParser``.
        """
        if len(chunk_struct) == 0:
            print('Warning: parsing empty text')
            return Tree(self._root_label, [])

        try:
            chunk_struct.label()
        except AttributeError:
            chunk_struct = Tree(self._root_label, chunk_struct)

        # Use the default trace value?
        if trace is None: trace = self._trace

        chunkstr = ChunkString(chunk_struct)

        # Apply the sequence of rules to the chunkstring.
        if trace:
            verbose = (trace>1)
            self._trace_apply(chunkstr, verbose)
        else:
            self._notrace_apply(chunkstr)

        # Use the chunkstring to create a chunk structure.
        return chunkstr.to_chunkstruct(self._chunk_label) 
Example 48
Project: allennlp   Author: allenai   File: constituency_parser.py    Apache License 2.0 5 votes vote down vote up
def decode(self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
        """
        Constructs an NLTK ``Tree`` given the scored spans. We also switch to exclusive
        span ends when constructing the tree representation, because it makes indexing
        into lists cleaner for ranges of text, rather than individual indices.

        Finally, for batch prediction, we will have padded spans and class probabilities.
        In order to make this less confusing, we remove all the padded spans and
        distributions from ``spans`` and ``class_probabilities`` respectively.
        """
        all_predictions = output_dict["class_probabilities"].cpu().data
        all_spans = output_dict["spans"].cpu().data

        all_sentences = output_dict["tokens"]
        all_pos_tags = output_dict["pos_tags"] if all(output_dict["pos_tags"]) else None
        num_spans = output_dict["num_spans"].data
        trees = self.construct_trees(
            all_predictions, all_spans, num_spans, all_sentences, all_pos_tags
        )

        batch_size = all_predictions.size(0)
        output_dict["spans"] = [all_spans[i, : num_spans[i]] for i in range(batch_size)]
        output_dict["class_probabilities"] = [
            all_predictions[i, : num_spans[i], :] for i in range(batch_size)
        ]

        output_dict["trees"] = trees
        return output_dict 
Example 49
Project: nltk-on-gae   Author: sivu22   File: regexp.py    Apache License 2.0 5 votes vote down vote up
def __init__(self, chunk_struct, debug_level=1):
        """
        Construct a new ``ChunkString`` that encodes the chunking of
        the text ``tagged_tokens``.

        :type chunk_struct: Tree
        :param chunk_struct: The chunk structure to be further chunked.
        :type debug_level: int
        :param debug_level: The level of debugging which should be
            applied to transformations on the ``ChunkString``.  The
            valid levels are:
                - 0: no checks
                - 1: full check on to_chunkstruct
                - 2: full check on to_chunkstruct and cursory check after
                   each transformation.
                - 3: full check on to_chunkstruct and full check after
                   each transformation.
            We recommend you use at least level 1.  You should
            probably use level 3 if you use any non-standard
            subclasses of ``RegexpChunkRule``.
        """
        self._top_node = chunk_struct.node
        self._pieces = chunk_struct[:]
        tags = [self._tag(tok) for tok in self._pieces]
        self._str = '<' + '><'.join(tags) + '>'
        self._debug = debug_level 
Example 50
Project: nltk-on-gae   Author: sivu22   File: regexp.py    Apache License 2.0 5 votes vote down vote up
def _tag(self, tok):
        if type(tok) == types.TupleType:
            return tok[1]
        elif isinstance(tok, Tree):
            return tok.node
        else:
            raise ValueError('chunk structures must contain tagged '
                             'tokens or trees') 
Example 51
Project: nltk-on-gae   Author: sivu22   File: regexp.py    Apache License 2.0 5 votes vote down vote up
def to_chunkstruct(self, chunk_node='CHUNK'):
        """
        Return the chunk structure encoded by this ``ChunkString``.

        :rtype: Tree
        :raise ValueError: If a transformation has generated an
            invalid chunkstring.
        """
        if self._debug > 0: self._verify(self._str, 1)

        # Use this alternating list to create the chunkstruct.
        pieces = []
        index = 0
        piece_in_chunk = 0
        for piece in re.split('[{}]', self._str):

            # Find the list of tokens contained in this piece.
            length = piece.count('<')
            subsequence = self._pieces[index:index+length]

            # Add this list of tokens to our pieces.
            if piece_in_chunk:
                pieces.append(Tree(chunk_node, subsequence))
            else:
                pieces += subsequence

            # Update index, piece_in_chunk
            index += length
            piece_in_chunk = not piece_in_chunk

        return Tree(self._top_node, pieces) 
Example 52
Project: nltk-on-gae   Author: sivu22   File: regexp.py    Apache License 2.0 5 votes vote down vote up
def parse(self, chunk_struct, trace=None):
        """
        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :rtype: Tree
        :return: a chunk structure that encodes the chunks in a given
            tagged sentence.  A chunk is a non-overlapping linguistic
            group, such as a noun phrase.  The set of chunks
            identified in the chunk structure depends on the rules
            used to define this ``RegexpChunkParser``.
        """
        if len(chunk_struct) == 0:
            print('Warning: parsing empty text')
            return Tree(self._top_node, [])

        try:
            chunk_struct.node
        except AttributeError:
            chunk_struct = Tree(self._top_node, chunk_struct)

        # Use the default trace value?
        if trace is None: trace = self._trace

        chunkstr = ChunkString(chunk_struct)

        # Apply the sequence of rules to the chunkstring.
        if trace:
            verbose = (trace>1)
            self._trace_apply(chunkstr, verbose)
        else:
            self._notrace_apply(chunkstr)

        # Use the chunkstring to create a chunk structure.
        return chunkstr.to_chunkstruct(self._chunk_node) 
Example 53
Project: georef-ar-address   Author: datosgobar   File: address_parser.py    MIT License 5 votes vote down vote up
def _with_labels(labels):
    """Crea un predicado para nltk.Tree que devuelve True si su etiqueta está
    dentro de un conjunto de valores.

    Args:
        list: Lista de valores.

    Returns:
        function: función que retorna True si el llamado a 'label' de su
            argumento retorna un objeto que pertenece a un conjunto de valores.

    """
    return lambda t: t.label() in set(labels) 
Example 54
Project: georef-ar-address   Author: datosgobar   File: address_parser.py    MIT License 5 votes vote down vote up
def __init__(self, tree):
        """Inicializa un objeto de tipo TreeVisitor.

        Args:
            tree (nltk.Tree): Árbol de parseo.

        """
        self._tree = tree
        self._rank = None
        self._components_leaves_indices = None 
Example 55
Project: FAParser   Author: baoy-nlp   File: helper.py    GNU General Public License v3.0 5 votes vote down vote up
def tree2list(tree, parent_arc=[]):
    if isinstance(tree, nltk.Tree):
        label = tree.label()
        if isinstance(tree[0], nltk.Tree):
            label = re.split('-|=', tree.label())[0]
        root_arc_list = parent_arc + [label]
        root_arc = '+'.join(root_arc_list)
        if len(tree) == 1:
            root, arc, tag = tree2list(tree[0], parent_arc=root_arc_list)
        elif len(tree) == 2:
            c0, arc0, tag0 = tree2list(tree[0])
            c1, arc1, tag1 = tree2list(tree[1])
            root = [c0, c1]
            arc = arc0 + [root_arc] + arc1
            tag = tag0 + tag1
        else:
            c0, arc0, tag0 = tree2list(tree[0])
            c1, arc1, tag1 = tree2list(nltk.Tree('<empty>', tree[1:]))
            if bin == 0:
                root = [c0] + c1
            else:
                root = [c0, c1]
            arc = arc0 + [root_arc] + arc1
            tag = tag0 + tag1
        return root, arc, tag
    else:
        if len(parent_arc) == 1:
            parent_arc.insert(0, '<empty>')
        # parent_arc[-1] = '<POS>'
        del parent_arc[-1]
        return str(tree), [], ['+'.join(parent_arc)] 
Example 56
Project: FAParser   Author: baoy-nlp   File: distance_helper.py    GNU General Public License v3.0 5 votes vote down vote up
def tree2list(tree, parent_arc=[]):
    if isinstance(tree, nltk.Tree):
        label = tree.label()
        if isinstance(tree[0], nltk.Tree):
            label = re.split('-|=', tree.label())[0]
        root_arc_list = parent_arc + [label]
        root_arc = '+'.join(root_arc_list)
        if len(tree) == 1:
            root, arc, tag = tree2list(tree[0], parent_arc=root_arc_list)
        elif len(tree) == 2:
            c0, arc0, tag0 = tree2list(tree[0])
            c1, arc1, tag1 = tree2list(tree[1])
            root = [c0, c1]
            arc = arc0 + [root_arc] + arc1
            tag = tag0 + tag1
        else:
            c0, arc0, tag0 = tree2list(tree[0])
            c1, arc1, tag1 = tree2list(nltk.Tree('<empty>', tree[1:]))
            if bin == 0:
                root = [c0] + c1
            else:
                root = [c0, c1]
            arc = arc0 + [root_arc] + arc1
            tag = tag0 + tag1
        return root, arc, tag
    else:
        if len(parent_arc) == 1:
            parent_arc.insert(0, '<empty>')
        # parent_arc[-1] = '<POS>'
        del parent_arc[-1]
        return str(tree), [], ['+'.join(parent_arc)] 
Example 57
Project: nlok   Author: glovguy   File: language.py    MIT License 5 votes vote down vote up
def words_in_flattened_tree(self):
        allSubtrees = [x for x in self.chunkedSentence if type(x) == Tree]
        flattenedSubtrees = [x.flatten().leaves() for x in allSubtrees]
        listOfAllWordsInSubtrees = [Word(x) for sublist in flattenedSubtrees for x in sublist]
        return listOfAllWordsInSubtrees 
Example 58
Project: nlok   Author: glovguy   File: unitTests.py    MIT License 5 votes vote down vote up
def test_parse_with_grammar(self):
        grammar = r"""
          NP: {<DT|PP\$>?<JJ>*<NN>}
        """
        mys = Sentence("Rapunzel let down her long golden hair.").parse_with_grammar(grammar)
        self.assertEqual(('long', 'JJ'), mys.chunkedSentence[4][0])
        self.assertEqual(Tree, type(mys.chunkedSentence[4])) 
Example 59
Project: address_extraction   Author: bagrii   File: address_extract.py    MIT License 5 votes vote down vote up
def parse(self, tagged_sent):
        chunks = self.tagger.tag(tagged_sent)

        # Transform the result from [((w1, t1), iob1), ...]
        # to the preferred list of triplets format [(w1, t1, iob1), ...]
        iob_triplets = [(w, t, c) for ((w, t), c) in chunks]

        # Transform the list of triplets to nltk.Tree format
        return conlltags2tree(iob_triplets) 
Example 60
Project: luscan-devel   Author: blackye   File: regexp.py    GNU General Public License v2.0 5 votes vote down vote up
def __init__(self, chunk_struct, debug_level=1):
        """
        Construct a new ``ChunkString`` that encodes the chunking of
        the text ``tagged_tokens``.

        :type chunk_struct: Tree
        :param chunk_struct: The chunk structure to be further chunked.
        :type debug_level: int
        :param debug_level: The level of debugging which should be
            applied to transformations on the ``ChunkString``.  The
            valid levels are:
                - 0: no checks
                - 1: full check on to_chunkstruct
                - 2: full check on to_chunkstruct and cursory check after
                   each transformation.
                - 3: full check on to_chunkstruct and full check after
                   each transformation.
            We recommend you use at least level 1.  You should
            probably use level 3 if you use any non-standard
            subclasses of ``RegexpChunkRule``.
        """
        self._top_node = chunk_struct.node
        self._pieces = chunk_struct[:]
        tags = [self._tag(tok) for tok in self._pieces]
        self._str = '<' + '><'.join(tags) + '>'
        self._debug = debug_level 
Example 61
Project: luscan-devel   Author: blackye   File: regexp.py    GNU General Public License v2.0 5 votes vote down vote up
def _tag(self, tok):
        if type(tok) == types.TupleType:
            return tok[1]
        elif isinstance(tok, Tree):
            return tok.node
        else:
            raise ValueError('chunk structures must contain tagged '
                             'tokens or trees') 
Example 62
Project: luscan-devel   Author: blackye   File: regexp.py    GNU General Public License v2.0 5 votes vote down vote up
def to_chunkstruct(self, chunk_node='CHUNK'):
        """
        Return the chunk structure encoded by this ``ChunkString``.

        :rtype: Tree
        :raise ValueError: If a transformation has generated an
            invalid chunkstring.
        """
        if self._debug > 0: self._verify(self._str, 1)

        # Use this alternating list to create the chunkstruct.
        pieces = []
        index = 0
        piece_in_chunk = 0
        for piece in re.split('[{}]', self._str):

            # Find the list of tokens contained in this piece.
            length = piece.count('<')
            subsequence = self._pieces[index:index+length]

            # Add this list of tokens to our pieces.
            if piece_in_chunk:
                pieces.append(Tree(chunk_node, subsequence))
            else:
                pieces += subsequence

            # Update index, piece_in_chunk
            index += length
            piece_in_chunk = not piece_in_chunk

        return Tree(self._top_node, pieces) 
Example 63
Project: luscan-devel   Author: blackye   File: regexp.py    GNU General Public License v2.0 5 votes vote down vote up
def parse(self, chunk_struct, trace=None):
        """
        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :rtype: Tree
        :return: a chunk structure that encodes the chunks in a given
            tagged sentence.  A chunk is a non-overlapping linguistic
            group, such as a noun phrase.  The set of chunks
            identified in the chunk structure depends on the rules
            used to define this ``RegexpChunkParser``.
        """
        if len(chunk_struct) == 0:
            print 'Warning: parsing empty text'
            return Tree(self._top_node, [])

        try:
            chunk_struct.node
        except AttributeError:
            chunk_struct = Tree(self._top_node, chunk_struct)

        # Use the default trace value?
        if trace is None: trace = self._trace

        chunkstr = ChunkString(chunk_struct)

        # Apply the sequence of rules to the chunkstring.
        if trace:
            verbose = (trace>1)
            self._trace_apply(chunkstr, verbose)
        else:
            self._notrace_apply(chunkstr)

        # Use the chunkstring to create a chunk structure.
        return chunkstr.to_chunkstruct(self._chunk_node) 
Example 64
Project: nlpvis   Author: shusenl   File: sen_tree_parser.py    GNU General Public License v2.0 5 votes vote down vote up
def tree2dict(tree):
    return {tree.label(): [tree2dict(t) if isinstance(t, Tree) else t for t in tree]} 
Example 65
Project: tetre   Author: aoldoni   File: tree_utils.py    MIT License 5 votes vote down vote up
def to_nltk_tree(node):
    """Creates a fixed representation of a Spacy dependency tree as a NLTK tree. This fixed representation
    will be formed by the Spacy's node attributes: dep_, orth_ and pos_.

    Args:
        node: The starting node from the tree in which the transformation will occur.

    Returns:
        A NLTK Tree (nltk.tree)
    """
    if node.n_lefts + node.n_rights > 0:
        return Tree(node.dep_+"/"+node.orth_+"/"+node.pos_, [to_nltk_tree(child) for child in node.children])
    else:
        return node.dep_+"/"+node.orth_+"/"+node.pos_ 
Example 66
Project: allennlp-semparse   Author: allenai   File: world.py    Apache License 2.0 5 votes vote down vote up
def get_logical_form(self, action_sequence: List[str], add_var_function: bool = True) -> str:
        """
        Takes an action sequence and constructs a logical form from it. This is useful if you want
        to get a logical form from a decoded sequence of actions generated by a transition based
        semantic parser.

        Parameters
        ----------
        action_sequence : ``List[str]``
            The sequence of actions as strings (eg.: ``['{START_SYMBOL} -> t', 't -> <e,t>', ...]``).
        add_var_function : ``bool`` (optional)
             ``var`` is a special function that some languages use within lambda functions to
             indicate the use of a variable (eg.: ``(lambda x (fb:row.row.year (var x)))``). Due to
             the way constrained decoding is currently implemented, it is easier for the decoder to
             not produce these functions. In that case, setting this flag adds the function in the
             logical form even though it is not present in the action sequence.
        """
        # Basic outline: we assume that the bracketing that we get in the RHS of each action is the
        # correct bracketing for reconstructing the logical form.  This is true when there is no
        # currying in the action sequence.  Given this assumption, we just need to construct a tree
        # from the action sequence, then output all of the leaves in the tree, with brackets around
        # the children of all non-terminal nodes.

        remaining_actions = [action.split(" -> ") for action in action_sequence]
        tree = Tree(remaining_actions[0][1], [])

        try:
            remaining_actions = self._construct_node_from_actions(
                tree, remaining_actions[1:], add_var_function
            )
        except ParsingError:
            logger.error("Error parsing action sequence: %s", action_sequence)
            raise

        if remaining_actions:
            logger.error("Error parsing action sequence: %s", action_sequence)
            logger.error("Remaining actions were: %s", remaining_actions)
            raise ParsingError("Extra actions in action sequence")
        return nltk_tree_to_logical_form(tree) 
Example 67
Project: allennlp-semparse   Author: allenai   File: domain_language.py    Apache License 2.0 5 votes vote down vote up
def action_sequence_to_logical_form(self, action_sequence: List[str]) -> str:
        """
        Takes an action sequence as produced by :func:`logical_form_to_action_sequence`, which is a
        linearization of an abstract syntax tree, and reconstructs the logical form defined by that
        abstract syntax tree.
        """
        # Basic outline: we assume that the bracketing that we get in the RHS of each action is the
        # correct bracketing for reconstructing the logical form.  This is true when there is no
        # currying in the action sequence.  Given this assumption, we just need to construct a tree
        # from the action sequence, then output all of the leaves in the tree, with brackets around
        # the children of all non-terminal nodes.

        remaining_actions = [action.split(" -> ") for action in action_sequence]
        tree = Tree(remaining_actions[0][1], [])

        try:
            remaining_actions = self._construct_node_from_actions(tree, remaining_actions[1:])
        except ParsingError:
            logger.error("Error parsing action sequence: %s", action_sequence)
            raise

        if remaining_actions:
            logger.error("Error parsing action sequence: %s", action_sequence)
            logger.error("Remaining actions were: %s", remaining_actions)
            raise ParsingError("Extra actions in action sequence")
        return nltk_tree_to_logical_form(tree) 
Example 68
Project: honours_project   Author: JFriel   File: regexp.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, chunk_struct, debug_level=1):
        """
        Construct a new ``ChunkString`` that encodes the chunking of
        the text ``tagged_tokens``.

        :type chunk_struct: Tree
        :param chunk_struct: The chunk structure to be further chunked.
        :type debug_level: int
        :param debug_level: The level of debugging which should be
            applied to transformations on the ``ChunkString``.  The
            valid levels are:
                - 0: no checks
                - 1: full check on to_chunkstruct
                - 2: full check on to_chunkstruct and cursory check after
                   each transformation.
                - 3: full check on to_chunkstruct and full check after
                   each transformation.
            We recommend you use at least level 1.  You should
            probably use level 3 if you use any non-standard
            subclasses of ``RegexpChunkRule``.
        """
        self._root_label = chunk_struct.label()
        self._pieces = chunk_struct[:]
        tags = [self._tag(tok) for tok in self._pieces]
        self._str = '<' + '><'.join(tags) + '>'
        self._debug = debug_level 
Example 69
Project: honours_project   Author: JFriel   File: regexp.py    GNU General Public License v3.0 5 votes vote down vote up
def _tag(self, tok):
        if isinstance(tok, tuple):
            return tok[1]
        elif isinstance(tok, Tree):
            return tok.label()
        else:
            raise ValueError('chunk structures must contain tagged '
                             'tokens or trees') 
Example 70
Project: honours_project   Author: JFriel   File: regexp.py    GNU General Public License v3.0 5 votes vote down vote up
def to_chunkstruct(self, chunk_label='CHUNK'):
        """
        Return the chunk structure encoded by this ``ChunkString``.

        :rtype: Tree
        :raise ValueError: If a transformation has generated an
            invalid chunkstring.
        """
        if self._debug > 0: self._verify(self._str, 1)

        # Use this alternating list to create the chunkstruct.
        pieces = []
        index = 0
        piece_in_chunk = 0
        for piece in re.split('[{}]', self._str):

            # Find the list of tokens contained in this piece.
            length = piece.count('<')
            subsequence = self._pieces[index:index+length]

            # Add this list of tokens to our pieces.
            if piece_in_chunk:
                pieces.append(Tree(chunk_label, subsequence))
            else:
                pieces += subsequence

            # Update index, piece_in_chunk
            index += length
            piece_in_chunk = not piece_in_chunk

        return Tree(self._root_label, pieces) 
Example 71
Project: honours_project   Author: JFriel   File: regexp.py    GNU General Public License v3.0 5 votes vote down vote up
def parse(self, chunk_struct, trace=None):
        """
        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :rtype: Tree
        :return: a chunk structure that encodes the chunks in a given
            tagged sentence.  A chunk is a non-overlapping linguistic
            group, such as a noun phrase.  The set of chunks
            identified in the chunk structure depends on the rules
            used to define this ``RegexpChunkParser``.
        """
        if len(chunk_struct) == 0:
            print('Warning: parsing empty text')
            return Tree(self._root_label, [])

        try:
            chunk_struct.label()
        except AttributeError:
            chunk_struct = Tree(self._root_label, chunk_struct)

        # Use the default trace value?
        if trace is None: trace = self._trace

        chunkstr = ChunkString(chunk_struct)

        # Apply the sequence of rules to the chunkstring.
        if trace:
            verbose = (trace>1)
            self._trace_apply(chunkstr, verbose)
        else:
            self._notrace_apply(chunkstr)

        # Use the chunkstring to create a chunk structure.
        return chunkstr.to_chunkstruct(self._chunk_label) 
Example 72
Project: honours_project   Author: JFriel   File: regexp.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, chunk_struct, debug_level=1):
        """
        Construct a new ``ChunkString`` that encodes the chunking of
        the text ``tagged_tokens``.

        :type chunk_struct: Tree
        :param chunk_struct: The chunk structure to be further chunked.
        :type debug_level: int
        :param debug_level: The level of debugging which should be
            applied to transformations on the ``ChunkString``.  The
            valid levels are:
                - 0: no checks
                - 1: full check on to_chunkstruct
                - 2: full check on to_chunkstruct and cursory check after
                   each transformation.
                - 3: full check on to_chunkstruct and full check after
                   each transformation.
            We recommend you use at least level 1.  You should
            probably use level 3 if you use any non-standard
            subclasses of ``RegexpChunkRule``.
        """
        self._root_label = chunk_struct.label()
        self._pieces = chunk_struct[:]
        tags = [self._tag(tok) for tok in self._pieces]
        self._str = '<' + '><'.join(tags) + '>'
        self._debug = debug_level 
Example 73
Project: honours_project   Author: JFriel   File: regexp.py    GNU General Public License v3.0 5 votes vote down vote up
def _tag(self, tok):
        if isinstance(tok, tuple):
            return tok[1]
        elif isinstance(tok, Tree):
            return tok.label()
        else:
            raise ValueError('chunk structures must contain tagged '
                             'tokens or trees') 
Example 74
Project: honours_project   Author: JFriel   File: regexp.py    GNU General Public License v3.0 5 votes vote down vote up
def to_chunkstruct(self, chunk_label='CHUNK'):
        """
        Return the chunk structure encoded by this ``ChunkString``.

        :rtype: Tree
        :raise ValueError: If a transformation has generated an
            invalid chunkstring.
        """
        if self._debug > 0: self._verify(self._str, 1)

        # Use this alternating list to create the chunkstruct.
        pieces = []
        index = 0
        piece_in_chunk = 0
        for piece in re.split('[{}]', self._str):

            # Find the list of tokens contained in this piece.
            length = piece.count('<')
            subsequence = self._pieces[index:index+length]

            # Add this list of tokens to our pieces.
            if piece_in_chunk:
                pieces.append(Tree(chunk_label, subsequence))
            else:
                pieces += subsequence

            # Update index, piece_in_chunk
            index += length
            piece_in_chunk = not piece_in_chunk

        return Tree(self._root_label, pieces) 
Example 75
Project: honours_project   Author: JFriel   File: regexp.py    GNU General Public License v3.0 5 votes vote down vote up
def parse(self, chunk_struct, trace=None):
        """
        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
            constructor.
        :rtype: Tree
        :return: a chunk structure that encodes the chunks in a given
            tagged sentence.  A chunk is a non-overlapping linguistic
            group, such as a noun phrase.  The set of chunks
            identified in the chunk structure depends on the rules
            used to define this ``RegexpChunkParser``.
        """
        if len(chunk_struct) == 0:
            print('Warning: parsing empty text')
            return Tree(self._root_label, [])

        try:
            chunk_struct.label()
        except AttributeError:
            chunk_struct = Tree(self._root_label, chunk_struct)

        # Use the default trace value?
        if trace is None: trace = self._trace

        chunkstr = ChunkString(chunk_struct)

        # Apply the sequence of rules to the chunkstring.
        if trace:
            verbose = (trace>1)
            self._trace_apply(chunkstr, verbose)
        else:
            self._notrace_apply(chunkstr)

        # Use the chunkstring to create a chunk structure.
        return chunkstr.to_chunkstruct(self._chunk_label) 
Example 76
Project: allennlp   Author: allenai   File: constituency_parser.py    Apache License 2.0 4 votes vote down vote up
def _build_hierplane_tree(self, tree: Tree, index: int, is_root: bool) -> JsonDict:
        """
        Recursively builds a JSON dictionary from an NLTK ``Tree`` suitable for
        rendering trees using the `Hierplane library<https://allenai.github.io/hierplane/>`.

        Parameters
        ----------
        tree : ``Tree``, required.
            The tree to convert into Hierplane JSON.
        index : int, required.
            The character index into the tree, used for creating spans.
        is_root : bool
            An indicator which allows us to add the outer Hierplane JSON which
            is required for rendering.

        Returns
        -------
        A JSON dictionary render-able by Hierplane for the given tree.
        """
        children = []
        for child in tree:
            if isinstance(child, Tree):
                # If the child is a tree, it has children,
                # as NLTK leaves are just strings.
                children.append(self._build_hierplane_tree(child, index, is_root=False))
            else:
                # We're at a leaf, so add the length of
                # the word to the character index.
                index += len(child)

        label = tree.label()
        span = " ".join(tree.leaves())
        hierplane_node = {"word": span, "nodeType": label, "attributes": [label], "link": label}
        if children:
            hierplane_node["children"] = children
        # TODO(Mark): Figure out how to span highlighting to the leaves.
        if is_root:
            hierplane_node = {
                "linkNameToLabel": LINK_TO_LABEL,
                "nodeTypeToStyle": NODE_TYPE_TO_STYLE,
                "text": span,
                "root": hierplane_node,
            }
        return hierplane_node 
Example 77
Project: FAParser   Author: baoy-nlp   File: helper.py    GNU General Public License v3.0 4 votes vote down vote up
def build_nltktree(depth, arc, tag, sen, arcdict, tagdict, stagdict, stags=None):
    """stags are the stanford predicted tags present in the train/valid/test files.
    """
    assert len(sen) > 0
    assert len(depth) == len(sen) - 1, ("%s_%s" % (len(depth), len(sen)))
    if stags:
        assert len(stags) == len(tag)

    if len(sen) == 1:
        tag_list = str(tagdict[tag[0]]).split('+')
        tag_list.reverse()
        # if stags, put the real stanford pos TAG for the word and leave the
        # unary chain on top.
        if stags is not None:
            assert len(stags) > 0
            tag_list.insert(0, str(stagdict[stags[0]]))
        word = str(sen[0])
        for t in tag_list:
            word = nltk.Tree(t, [word])
        assert isinstance(word, nltk.Tree)
        return word
    else:
        idx = numpy.argmax(depth)
        node0 = build_nltktree(
            depth[:idx], arc[:idx], tag[:idx + 1], sen[:idx + 1],
            arcdict, tagdict, stagdict, stags[:idx + 1] if stags else None)
        node1 = build_nltktree(
            depth[idx + 1:], arc[idx + 1:], tag[idx + 1:], sen[idx + 1:],
            arcdict, tagdict, stagdict, stags[idx + 1:] if stags else None)

        if node0.label() != '<empty>' and node1.label() != '<empty>':
            tr = [node0, node1]
        elif node0.label() == '<empty>' and node1.label() != '<empty>':
            tr = [c for c in node0] + [node1]
        elif node0.label() != '<empty>' and node1.label() == '<empty>':
            tr = [node0] + [c for c in node1]
        elif node0.label() == '<empty>' and node1.label() == '<empty>':
            tr = [c for c in node0] + [c for c in node1]

        arc_list = str(arcdict[arc[idx]]).split('+')
        arc_list.reverse()
        for a in arc_list:
            if isinstance(tr, nltk.Tree):
                tr = [tr]
            tr = nltk.Tree(a, tr)

        return tr 
Example 78
Project: news-audit   Author: clips   File: source_checker.py    GNU General Public License v3.0 4 votes vote down vote up
def get_queries(self):

		"""Function to extract search queries from the text: 
		breaks text into ngrams, filters ngrams that consist mostly of stopwords or named entities, 
		selects an evenly spaced sample of the remaining ngrams"""

		text = self.text
		beg_quotes = re.findall(r'\"\S', text)
		for each in beg_quotes:
			text = text.replace(each, 'BEGQ' + each[-1])

		end_quotes = re.findall(r'\S\"', text)
		for each in end_quotes:
			text = text.replace(each, each[0] + 'ENDQ')

		text = re.sub('(ENDQ)+', 'ENDQ', text)
		text = re.sub('(BEGQ)+', 'BEGQ', text)
		text = text.replace('--', 'DOUBLEDASH')

		all_ngrams = ngrams(text, n = self.span, punctuation = "", continuous = True)
		if self.language in stopwords.fileids():
			stop_words = stopwords.words(self.language)
		else:
			stop_words = []	
		queries = []
		for ngram in all_ngrams:
			num_stop = len([w for w in ngram if w in stop_words])
			stop_score = float(num_stop)/len(ngram)
			if self.language == 'english':
				chunked = ne_chunk(pos_tag(ngram))
				named_entities = [[w for w, t in elt] for elt in chunked if isinstance(elt, nltk.Tree)]
				num_ent = sum([len(ent_list) for ent_list in named_entities])
				ent_score = float(num_ent)/len(ngram)
			else:
				ent_score = 0

			if stop_score < self.threshold and ent_score < self.threshold:
				r_string = self.reconstruct_ngram(ngram)
				if r_string in self.text:
					queries.append(r_string)

		reduction = len(queries)/self.max_queries
		return queries[0::reduction] 
Example 79
Project: tetre   Author: aoldoni   File: rule_applier.py    MIT License 4 votes vote down vote up
def apply(self, nltk_tree, spacy_tree, tree_root=""):
        """Apply registered rules.

        Args:
            nltk_tree: The tree in the NLTK structure that represents the grouping.
            spacy_tree: The actual TreeNode in which the rules will be extracted from, rooted at the word being
                searched for.
            tree_root: A string containing the dependency tree tag of the immediate child not of the word being
                searched for that will serve as the new root. This new root is used when the rules are applied in the
                child nodes, mostly obj and subj.

        Returns:
            t: The new NLTK tree after rule application.
            applied: A list with the method signatures of the applied rules.
        """

        try:
            root = nltk_tree.label()
        except AttributeError:
            root = str(nltk_tree)

        node_set = []
        if hasattr(nltk_tree, '__iter__'):
            node_set = [node for node in nltk_tree]

        root_spacy_tree = spacy_tree

        if tree_root != "":
            root_spacy_tree = None
            for child in spacy_tree.children:
                if tree_root in child.dep_:
                    root_spacy_tree = child

        applied = []

        if root_spacy_tree is not None:
            for rule in self.get_rules():
                root, node_set, spacy_tree, is_applied = rule(self, root, node_set, root_spacy_tree)

                if is_applied:
                    rule_representation = str(rule).replace("<function ", "")
                    rule_representation = rule_representation[:rule_representation.find(" at")]
                    applied.append(rule_representation)

        t = Tree(root, list(sorted(node_set)))

        return t, applied 
Example 80
Project: allennlp-semparse   Author: allenai   File: domain_language.py    Apache License 2.0 4 votes vote down vote up
def _construct_node_from_actions(
        self, current_node: Tree, remaining_actions: List[List[str]]
    ) -> List[List[str]]:
        """
        Given a current node in the logical form tree, and a list of actions in an action sequence,
        this method fills in the children of the current node from the action sequence, then
        returns whatever actions are left.

        For example, we could get a node with type ``c``, and an action sequence that begins with
        ``c -> [<r,c>, r]``.  This method will add two children to the input node, consuming
        actions from the action sequence for nodes of type ``<r,c>`` (and all of its children,
        recursively) and ``r`` (and all of its children, recursively).  This method assumes that
        action sequences are produced `depth-first`, so all actions for the subtree under ``<r,c>``
        appear before actions for the subtree under ``r``.  If there are any actions in the action
        sequence after the ``<r,c>`` and ``r`` subtrees have terminated in leaf nodes, they will be
        returned.
        """
        if not remaining_actions:
            logger.error("No actions left to construct current node: %s", current_node)
            raise ParsingError("Incomplete action sequence")
        left_side, right_side = remaining_actions.pop(0)
        if left_side != current_node.label():
            logger.error("Current node: %s", current_node)
            logger.error("Next action: %s -> %s", left_side, right_side)
            logger.error("Remaining actions were: %s", remaining_actions)
            raise ParsingError("Current node does not match next action")
        if right_side[0] == "[":
            # This is a non-terminal expansion, with more than one child node.
            for child_type in right_side[1:-1].split(", "):
                child_node = Tree(child_type, [])
                current_node.append(child_node)  # you add a child to an nltk.Tree with `append`
                # For now, we assume that all children in a list like this are non-terminals, so we
                # recurse on them.  I'm pretty sure that will always be true for the way our
                # grammar induction works.  We can revisit this later if we need to.
                remaining_actions = self._construct_node_from_actions(child_node, remaining_actions)
        else:
            # The current node is a pre-terminal; we'll add a single terminal child.  By
            # construction, the right-hand side of our production rules are only ever terminal
            # productions or lists of non-terminals.
            current_node.append(
                Tree(right_side, [])
            )  # you add a child to an nltk.Tree with `append`
        return remaining_actions