Python nltk.compat.text_type() Examples

The following are code examples for showing how to use nltk.compat.text_type(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: razzy-spinner   Author: rafasashi   File: text.py    GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, tokens, name=None):
        """
        Create a Text object.

        :param tokens: The source text.
        :type tokens: sequence of str
        """
        if self._COPY_TOKENS:
            tokens = list(tokens)
        self.tokens = tokens

        if name:
            self.name = name
        elif ']' in tokens[:20]:
            end = tokens[:20].index(']')
            self.name = " ".join(text_type(tok) for tok in tokens[1:end])
        else:
            self.name = " ".join(text_type(tok) for tok in tokens[:8]) + "..."

    #////////////////////////////////////////////////////////////
    # Support item & slice access
    #//////////////////////////////////////////////////////////// 
Example 2
Project: razzy-spinner   Author: rafasashi   File: hunpos.py    GNU General Public License v3.0 6 votes vote down vote up
def tag(self, tokens):
        """Tags a single sentence: a list of words.
        The tokens should not contain any newline characters.
        """
        for token in tokens:
            assert "\n" not in token, "Tokens should not contain newlines"
            if isinstance(token, compat.text_type):
                token = token.encode(self._encoding)
            self._hunpos.stdin.write(token + b"\n")
        # We write a final empty line to tell hunpos that the sentence is finished:
        self._hunpos.stdin.write(b"\n")
        self._hunpos.stdin.flush()

        tagged_tokens = []
        for token in tokens:
            tagged = self._hunpos.stdout.readline().strip().split(b"\t")
            tag = (tagged[1] if len(tagged) > 1 else None)
            tagged_tokens.append((token, tag))
        # We have to read (and dismiss) the final empty line:
        self._hunpos.stdout.readline()

        return tagged_tokens

# skip doctests if Hunpos tagger is not installed 
Example 3
Project: OpenBottle   Author: xiaozhuchacha   File: text.py    MIT License 6 votes vote down vote up
def __init__(self, tokens, name=None):
        """
        Create a Text object.

        :param tokens: The source text.
        :type tokens: sequence of str
        """
        if self._COPY_TOKENS:
            tokens = list(tokens)
        self.tokens = tokens

        if name:
            self.name = name
        elif ']' in tokens[:20]:
            end = tokens[:20].index(']')
            self.name = " ".join(text_type(tok) for tok in tokens[1:end])
        else:
            self.name = " ".join(text_type(tok) for tok in tokens[:8]) + "..."

    #////////////////////////////////////////////////////////////
    # Support item & slice access
    #//////////////////////////////////////////////////////////// 
Example 4
Project: OpenBottle   Author: xiaozhuchacha   File: hunpos.py    MIT License 6 votes vote down vote up
def tag(self, tokens):
        """Tags a single sentence: a list of words.
        The tokens should not contain any newline characters.
        """
        for token in tokens:
            assert "\n" not in token, "Tokens should not contain newlines"
            if isinstance(token, compat.text_type):
                token = token.encode(self._encoding)
            self._hunpos.stdin.write(token + b"\n")
        # We write a final empty line to tell hunpos that the sentence is finished:
        self._hunpos.stdin.write(b"\n")
        self._hunpos.stdin.flush()

        tagged_tokens = []
        for token in tokens:
            tagged = self._hunpos.stdout.readline().strip().split(b"\t")
            tag = (tagged[1] if len(tagged) > 1 else None)
            tagged_tokens.append((token, tag))
        # We have to read (and dismiss) the final empty line:
        self._hunpos.stdout.readline()

        return tagged_tokens

# skip doctests if Hunpos tagger is not installed 
Example 5
Project: OpenBottle   Author: xiaozhuchacha   File: hunpos.py    MIT License 6 votes vote down vote up
def tag(self, tokens):
        """Tags a single sentence: a list of words.
        The tokens should not contain any newline characters.
        """
        for token in tokens:
            assert "\n" not in token, "Tokens should not contain newlines"
            if isinstance(token, compat.text_type):
                token = token.encode(self._encoding)
            self._hunpos.stdin.write(token + b"\n")
        # We write a final empty line to tell hunpos that the sentence is finished:
        self._hunpos.stdin.write(b"\n")
        self._hunpos.stdin.flush()

        tagged_tokens = []
        for token in tokens:
            tagged = self._hunpos.stdout.readline().strip().split(b"\t")
            tag = (tagged[1] if len(tagged) > 1 else None)
            tagged_tokens.append((token, tag))
        # We have to read (and dismiss) the final empty line:
        self._hunpos.stdout.readline()

        return tagged_tokens

# skip doctests if Hunpos tagger is not installed 
Example 6
Project: FancyWord   Author: EastonLee   File: text.py    GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, tokens, name=None):
        """
        Create a Text object.

        :param tokens: The source text.
        :type tokens: sequence of str
        """
        if self._COPY_TOKENS:
            tokens = list(tokens)
        self.tokens = tokens

        if name:
            self.name = name
        elif ']' in tokens[:20]:
            end = tokens[:20].index(']')
            self.name = " ".join(text_type(tok) for tok in tokens[1:end])
        else:
            self.name = " ".join(text_type(tok) for tok in tokens[:8]) + "..."

    #////////////////////////////////////////////////////////////
    # Support item & slice access
    #//////////////////////////////////////////////////////////// 
Example 7
Project: FancyWord   Author: EastonLee   File: hunpos.py    GNU General Public License v3.0 6 votes vote down vote up
def tag(self, tokens):
        """Tags a single sentence: a list of words.
        The tokens should not contain any newline characters.
        """
        for token in tokens:
            assert "\n" not in token, "Tokens should not contain newlines"
            if isinstance(token, compat.text_type):
                token = token.encode(self._encoding)
            self._hunpos.stdin.write(token + b"\n")
        # We write a final empty line to tell hunpos that the sentence is finished:
        self._hunpos.stdin.write(b"\n")
        self._hunpos.stdin.flush()

        tagged_tokens = []
        for token in tokens:
            tagged = self._hunpos.stdout.readline().strip().split(b"\t")
            tag = (tagged[1] if len(tagged) > 1 else None)
            tagged_tokens.append((token, tag))
        # We have to read (and dismiss) the final empty line:
        self._hunpos.stdout.readline()

        return tagged_tokens

# skip doctests if Hunpos tagger is not installed 
Example 8
Project: honours_project   Author: JFriel   File: text.py    GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, tokens, name=None):
        """
        Create a Text object.

        :param tokens: The source text.
        :type tokens: sequence of str
        """
        if self._COPY_TOKENS:
            tokens = list(tokens)
        self.tokens = tokens

        if name:
            self.name = name
        elif ']' in tokens[:20]:
            end = tokens[:20].index(']')
            self.name = " ".join(text_type(tok) for tok in tokens[1:end])
        else:
            self.name = " ".join(text_type(tok) for tok in tokens[:8]) + "..."

    #////////////////////////////////////////////////////////////
    # Support item & slice access
    #//////////////////////////////////////////////////////////// 
Example 9
Project: honours_project   Author: JFriel   File: hunpos.py    GNU General Public License v3.0 6 votes vote down vote up
def tag(self, tokens):
        """Tags a single sentence: a list of words.
        The tokens should not contain any newline characters.
        """
        for token in tokens:
            assert "\n" not in token, "Tokens should not contain newlines"
            if isinstance(token, compat.text_type):
                token = token.encode(self._encoding)
            self._hunpos.stdin.write(token + b"\n")
        # We write a final empty line to tell hunpos that the sentence is finished:
        self._hunpos.stdin.write(b"\n")
        self._hunpos.stdin.flush()

        tagged_tokens = []
        for token in tokens:
            tagged = self._hunpos.stdout.readline().strip().split(b"\t")
            tag = (tagged[1] if len(tagged) > 1 else None)
            tagged_tokens.append((token, tag))
        # We have to read (and dismiss) the final empty line:
        self._hunpos.stdout.readline()

        return tagged_tokens

# skip doctests if Hunpos tagger is not installed 
Example 10
Project: honours_project   Author: JFriel   File: text.py    GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, tokens, name=None):
        """
        Create a Text object.

        :param tokens: The source text.
        :type tokens: sequence of str
        """
        if self._COPY_TOKENS:
            tokens = list(tokens)
        self.tokens = tokens

        if name:
            self.name = name
        elif ']' in tokens[:20]:
            end = tokens[:20].index(']')
            self.name = " ".join(text_type(tok) for tok in tokens[1:end])
        else:
            self.name = " ".join(text_type(tok) for tok in tokens[:8]) + "..."

    #////////////////////////////////////////////////////////////
    # Support item & slice access
    #//////////////////////////////////////////////////////////// 
Example 11
Project: razzy-spinner   Author: rafasashi   File: stanford.py    GNU General Public License v3.0 5 votes vote down vote up
def tag_sents(self, sentences):
        encoding = self._encoding
        default_options = ' '.join(_java_options)
        config_java(options=self.java_options, verbose=False)

        # Create a temporary input file
        _input_fh, self._input_file_path = tempfile.mkstemp(text=True)

        cmd = list(self._cmd)
        cmd.extend(['-encoding', encoding])
        
        # Write the actual sentences to the temporary input file
        _input_fh = os.fdopen(_input_fh, 'wb')
        _input = '\n'.join((' '.join(x) for x in sentences))
        if isinstance(_input, compat.text_type) and encoding:
            _input = _input.encode(encoding)
        _input_fh.write(_input)
        _input_fh.close()
        
        # Run the tagger and get the output
        stanpos_output, _stderr = java(cmd, classpath=self._stanford_jar,
                                                       stdout=PIPE, stderr=PIPE)
        stanpos_output = stanpos_output.decode(encoding)
        
        # Delete the temporary file
        os.unlink(self._input_file_path) 

        # Return java configurations to their default values
        config_java(options=default_options, verbose=False)
                
        return self.parse_output(stanpos_output, sentences) 
Example 12
Project: razzy-spinner   Author: rafasashi   File: tgrep.py    GNU General Public License v3.0 5 votes vote down vote up
def tgrep_positions(pattern, trees, search_leaves=True):
    """
    Return the tree positions in the trees which match the given pattern.

    :param pattern: a tgrep search pattern
    :type pattern: str or output of tgrep_compile()
    :param trees: a sequence of NLTK trees (usually ParentedTrees)
    :type trees: iter(ParentedTree) or iter(Tree)
    :param search_leaves: whether ot return matching leaf nodes
    :type search_leaves: bool
    :rtype: iter(tree positions)
    """

    if isinstance(pattern, (binary_type, text_type)):
        pattern = tgrep_compile(pattern)

    for tree in trees:
        try:
            if search_leaves:
                positions = tree.treepositions()
            else:
                positions = treepositions_no_leaves(tree)
            yield [position for position in positions
                      if pattern(tree[position])]
        except AttributeError:
            yield [] 
Example 13
Project: razzy-spinner   Author: rafasashi   File: tgrep.py    GNU General Public License v3.0 5 votes vote down vote up
def tgrep_nodes(pattern, trees, search_leaves=True):
    """
    Return the tree nodes in the trees which match the given pattern.

    :param pattern: a tgrep search pattern
    :type pattern: str or output of tgrep_compile()
    :param trees: a sequence of NLTK trees (usually ParentedTrees)
    :type trees: iter(ParentedTree) or iter(Tree)
    :param search_leaves: whether ot return matching leaf nodes
    :type search_leaves: bool
    :rtype: iter(tree nodes)
    """

    if isinstance(pattern, (binary_type, text_type)):
        pattern = tgrep_compile(pattern)

    for tree in trees:
        try:
            if search_leaves:
                positions = tree.treepositions()
            else:
                positions = treepositions_no_leaves(tree)
            yield [tree[position] for position in positions
                      if pattern(tree[position])]
        except AttributeError:
            yield [] 
Example 14
Project: razzy-spinner   Author: rafasashi   File: stanford.py    GNU General Public License v3.0 5 votes vote down vote up
def _execute(self, cmd, input_, verbose=False):
        encoding = self._encoding
        cmd.extend(['-charset', encoding])
        _options_cmd = self._options_cmd
        if _options_cmd:
            cmd.extend(['-options', self._options_cmd])

        default_options = ' '.join(_java_options)

        # Configure java.
        config_java(options=self.java_options, verbose=verbose)

        # Windows is incompatible with NamedTemporaryFile() without passing in delete=False.
        with tempfile.NamedTemporaryFile(mode='wb', delete=False) as input_file:
            # Write the actual sentences to the temporary input file
            if isinstance(input_, compat.text_type) and encoding:
                input_ = input_.encode(encoding)
            input_file.write(input_)
            input_file.flush()

            cmd.append(input_file.name)

            # Run the tagger and get the output.
            stdout, stderr = java(cmd, classpath=self._stanford_jar,
                                  stdout=PIPE, stderr=PIPE)
            stdout = stdout.decode(encoding)

        os.unlink(input_file.name)

        # Return java configurations to their default values.
        config_java(options=default_options, verbose=False)

        return stdout 
Example 15
Project: razzy-spinner   Author: rafasashi   File: downloader.py    GNU General Public License v3.0 5 votes vote down vote up
def fromxml(xml):
        if isinstance(xml, compat.string_types):
            xml = ElementTree.parse(xml)
        for key in xml.attrib:
            xml.attrib[key] = compat.text_type(xml.attrib[key])
        return Package(**xml.attrib) 
Example 16
Project: razzy-spinner   Author: rafasashi   File: downloader.py    GNU General Public License v3.0 5 votes vote down vote up
def fromxml(xml):
        if isinstance(xml, compat.string_types):
            xml = ElementTree.parse(xml)
        for key in xml.attrib:
            xml.attrib[key] = compat.text_type(xml.attrib[key])
        children = [child.get('ref') for child in xml.findall('item')]
        return Collection(children=children, **xml.attrib) 
Example 17
Project: razzy-spinner   Author: rafasashi   File: framenet.py    GNU General Public License v3.0 5 votes vote down vote up
def __repr__(self):
        """
        Return a string representation for this corpus view that is
        similar to a list's representation; but if it would be more
        than 60 characters long, it is truncated.
        """
        pieces = []
        length = 5

        for elt in self:
            pieces.append(elt._short_repr()) # key difference from inherited version: call to _short_repr()
            length += len(pieces[-1]) + 2
            if self._MAX_REPR_SIZE and length > self._MAX_REPR_SIZE and len(pieces) > 2:
                return "[%s, ...]" % text_type(',\n ' if self._BREAK_LINES else ', ').join(pieces[:-1])
        return "[%s]" % text_type(',\n ' if self._BREAK_LINES else ', ').join(pieces) 
Example 18
Project: razzy-spinner   Author: rafasashi   File: framenet.py    GNU General Public License v3.0 5 votes vote down vote up
def __repr__(self):
        """
        Return a string representation for this corpus view that is
        similar to a list's representation; but if it would be more
        than 60 characters long, it is truncated.
        """
        pieces = []
        length = 5
        for elt in self:
            pieces.append(elt._short_repr()) # key difference from inherited version: call to _short_repr()
            length += len(pieces[-1]) + 2
            if length > self._MAX_REPR_SIZE and len(pieces) > 2:
                return "[%s, ...]" % text_type(', ').join(pieces[:-1])
        else:
            return "[%s]" % text_type(', ').join(pieces) 
Example 19
Project: OpenBottle   Author: xiaozhuchacha   File: stanford.py    MIT License 5 votes vote down vote up
def tag_sents(self, sentences):
        encoding = self._encoding
        default_options = ' '.join(_java_options)
        config_java(options=self.java_options, verbose=False)

        # Create a temporary input file
        _input_fh, self._input_file_path = tempfile.mkstemp(text=True)

        cmd = list(self._cmd)
        cmd.extend(['-encoding', encoding])
        
        # Write the actual sentences to the temporary input file
        _input_fh = os.fdopen(_input_fh, 'wb')
        _input = '\n'.join((' '.join(x) for x in sentences))
        if isinstance(_input, compat.text_type) and encoding:
            _input = _input.encode(encoding)
        _input_fh.write(_input)
        _input_fh.close()
        
        # Run the tagger and get the output
        stanpos_output, _stderr = java(cmd, classpath=self._stanford_jar,
                                                       stdout=PIPE, stderr=PIPE)
        stanpos_output = stanpos_output.decode(encoding)
        
        # Delete the temporary file
        os.unlink(self._input_file_path) 

        # Return java configurations to their default values
        config_java(options=default_options, verbose=False)
                
        return self.parse_output(stanpos_output, sentences) 
Example 20
Project: OpenBottle   Author: xiaozhuchacha   File: tgrep.py    MIT License 5 votes vote down vote up
def tgrep_positions(pattern, trees, search_leaves=True):
    """
    Return the tree positions in the trees which match the given pattern.

    :param pattern: a tgrep search pattern
    :type pattern: str or output of tgrep_compile()
    :param trees: a sequence of NLTK trees (usually ParentedTrees)
    :type trees: iter(ParentedTree) or iter(Tree)
    :param search_leaves: whether ot return matching leaf nodes
    :type search_leaves: bool
    :rtype: iter(tree positions)
    """

    if isinstance(pattern, (binary_type, text_type)):
        pattern = tgrep_compile(pattern)

    for tree in trees:
        try:
            if search_leaves:
                positions = tree.treepositions()
            else:
                positions = treepositions_no_leaves(tree)
            yield [position for position in positions
                      if pattern(tree[position])]
        except AttributeError:
            yield [] 
Example 21
Project: OpenBottle   Author: xiaozhuchacha   File: tgrep.py    MIT License 5 votes vote down vote up
def tgrep_nodes(pattern, trees, search_leaves=True):
    """
    Return the tree nodes in the trees which match the given pattern.

    :param pattern: a tgrep search pattern
    :type pattern: str or output of tgrep_compile()
    :param trees: a sequence of NLTK trees (usually ParentedTrees)
    :type trees: iter(ParentedTree) or iter(Tree)
    :param search_leaves: whether ot return matching leaf nodes
    :type search_leaves: bool
    :rtype: iter(tree nodes)
    """

    if isinstance(pattern, (binary_type, text_type)):
        pattern = tgrep_compile(pattern)

    for tree in trees:
        try:
            if search_leaves:
                positions = tree.treepositions()
            else:
                positions = treepositions_no_leaves(tree)
            yield [tree[position] for position in positions
                      if pattern(tree[position])]
        except AttributeError:
            yield [] 
Example 22
Project: OpenBottle   Author: xiaozhuchacha   File: stanford.py    MIT License 5 votes vote down vote up
def _execute(self, cmd, input_, verbose=False):
        encoding = self._encoding
        cmd.extend(['-charset', encoding])
        _options_cmd = self._options_cmd
        if _options_cmd:
            cmd.extend(['-options', self._options_cmd])

        default_options = ' '.join(_java_options)

        # Configure java.
        config_java(options=self.java_options, verbose=verbose)

        # Windows is incompatible with NamedTemporaryFile() without passing in delete=False.
        with tempfile.NamedTemporaryFile(mode='wb', delete=False) as input_file:
            # Write the actual sentences to the temporary input file
            if isinstance(input_, compat.text_type) and encoding:
                input_ = input_.encode(encoding)
            input_file.write(input_)
            input_file.flush()

            cmd.append(input_file.name)

            # Run the tagger and get the output.
            stdout, stderr = java(cmd, classpath=self._stanford_jar,
                                  stdout=PIPE, stderr=PIPE)
            stdout = stdout.decode(encoding)

        os.unlink(input_file.name)

        # Return java configurations to their default values.
        config_java(options=default_options, verbose=False)

        return stdout 
Example 23
Project: OpenBottle   Author: xiaozhuchacha   File: stanford_segmenter.py    MIT License 5 votes vote down vote up
def segment_sents(self, sentences):
        """
        """
        encoding = self._encoding
        # Create a temporary input file
        _input_fh, self._input_file_path = tempfile.mkstemp(text=True)

        # Write the actural sentences to the temporary input file
        _input_fh = os.fdopen(_input_fh, 'wb')
        _input = '\n'.join((' '.join(x) for x in sentences))
        if isinstance(_input, compat.text_type) and encoding:
            _input = _input.encode(encoding)
        _input_fh.write(_input)
        _input_fh.close()

        cmd = [
            'edu.stanford.nlp.ie.crf.CRFClassifier',
            '-sighanCorporaDict', self._sihan_corpora_dict,
            '-textFile', self._input_file_path,
            '-sighanPostProcessing', 'true',
            '-keepAllWhitespaces', 'false',
            '-loadClassifier', self._model,
            '-serDictionary', self._dict
        ]

        stdout = self._execute(cmd)

        # Delete the temporary file
        os.unlink(self._input_file_path)

        return stdout 
Example 24
Project: OpenBottle   Author: xiaozhuchacha   File: downloader.py    MIT License 5 votes vote down vote up
def fromxml(xml):
        if isinstance(xml, compat.string_types):
            xml = ElementTree.parse(xml)
        for key in xml.attrib:
            xml.attrib[key] = compat.text_type(xml.attrib[key])
        return Package(**xml.attrib) 
Example 25
Project: OpenBottle   Author: xiaozhuchacha   File: stanford.py    MIT License 5 votes vote down vote up
def _execute(self, cmd, input_, verbose=False):
        encoding = self._encoding
        cmd.extend(['-encoding', encoding])
        if self.corenlp_options:
            cmd.append(self.corenlp_options)

        default_options = ' '.join(_java_options)

        # Configure java.
        config_java(options=self.java_options, verbose=verbose)

        # Windows is incompatible with NamedTemporaryFile() without passing in delete=False.
        with tempfile.NamedTemporaryFile(mode='wb', delete=False) as input_file:
            # Write the actual sentences to the temporary input file
            if isinstance(input_, compat.text_type) and encoding:
                input_ = input_.encode(encoding)
            input_file.write(input_)
            input_file.flush()

            # Run the tagger and get the output.
            if self._USE_STDIN:
                input_file.seek(0)
                stdout, stderr = java(cmd, classpath=self._classpath,
                                      stdin=input_file, stdout=PIPE, stderr=PIPE)
            else:
                cmd.append(input_file.name)
                stdout, stderr = java(cmd, classpath=self._classpath,
                                      stdout=PIPE, stderr=PIPE)
                
            stdout = stdout.replace(b'\xc2\xa0',b' ')
            stdout = stdout.replace(b'\xa0',b' ')
            stdout = stdout.decode(encoding)

        os.unlink(input_file.name)

        # Return java configurations to their default values.
        config_java(options=default_options, verbose=False)

        return stdout 
Example 26
Project: OpenBottle   Author: xiaozhuchacha   File: framenet.py    MIT License 5 votes vote down vote up
def __repr__(self):
        """
        Return a string representation for this corpus view that is
        similar to a list's representation; but if it would be more
        than 60 characters long, it is truncated.
        """
        pieces = []
        length = 5

        for elt in self:
            pieces.append(elt._short_repr()) # key difference from inherited version: call to _short_repr()
            length += len(pieces[-1]) + 2
            if self._MAX_REPR_SIZE and length > self._MAX_REPR_SIZE and len(pieces) > 2:
                return "[%s, ...]" % text_type(',\n ' if self._BREAK_LINES else ', ').join(pieces[:-1])
        return "[%s]" % text_type(',\n ' if self._BREAK_LINES else ', ').join(pieces) 
Example 27
Project: OpenBottle   Author: xiaozhuchacha   File: framenet.py    MIT License 5 votes vote down vote up
def __repr__(self):
        """
        Return a string representation for this corpus view that is
        similar to a list's representation; but if it would be more
        than 60 characters long, it is truncated.
        """
        pieces = []
        length = 5
        for elt in self:
            pieces.append(elt._short_repr()) # key difference from inherited version: call to _short_repr()
            length += len(pieces[-1]) + 2
            if length > self._MAX_REPR_SIZE and len(pieces) > 2:
                return "[%s, ...]" % text_type(', ').join(pieces[:-1])
        else:
            return "[%s]" % text_type(', ').join(pieces) 
Example 28
Project: OpenBottle   Author: xiaozhuchacha   File: framenet.py    MIT License 5 votes vote down vote up
def __repr__(self):
        """
        Return a string representation for this corpus view that is
        similar to a list's representation; but if it would be more
        than 60 characters long, it is truncated.
        """
        pieces = []
        length = 5
        for elt in self:
            pieces.append(elt._short_repr()) # key difference from inherited version: call to _short_repr()
            length += len(pieces[-1]) + 2
            if length > self._MAX_REPR_SIZE and len(pieces) > 2:
                return "[%s, ...]" % text_type(', ').join(pieces[:-1])
        else:
            return "[%s]" % text_type(', ').join(pieces) 
Example 29
Project: OpenBottle   Author: xiaozhuchacha   File: framenet.py    MIT License 5 votes vote down vote up
def __repr__(self):
        """
        Return a string representation for this corpus view that is
        similar to a list's representation; but if it would be more
        than 60 characters long, it is truncated.
        """
        pieces = []
        length = 5
        for elt in self:
            pieces.append(elt._short_repr()) # key difference from inherited version: call to _short_repr()
            length += len(pieces[-1]) + 2
            if length > self._MAX_REPR_SIZE and len(pieces) > 2:
                return "[%s, ...]" % text_type(', ').join(pieces[:-1])
        else:
            return "[%s]" % text_type(', ').join(pieces) 
Example 30
Project: OpenBottle   Author: xiaozhuchacha   File: stanford.py    MIT License 5 votes vote down vote up
def tag_sents(self, sentences):
        encoding = self._encoding
        default_options = ' '.join(_java_options)
        config_java(options=self.java_options, verbose=False)

        # Create a temporary input file
        _input_fh, self._input_file_path = tempfile.mkstemp(text=True)

        cmd = list(self._cmd)
        cmd.extend(['-encoding', encoding])
        
        # Write the actual sentences to the temporary input file
        _input_fh = os.fdopen(_input_fh, 'wb')
        _input = '\n'.join((' '.join(x) for x in sentences))
        if isinstance(_input, compat.text_type) and encoding:
            _input = _input.encode(encoding)
        _input_fh.write(_input)
        _input_fh.close()
        
        # Run the tagger and get the output
        stanpos_output, _stderr = java(cmd, classpath=self._stanford_jar,
                                                       stdout=PIPE, stderr=PIPE)
        stanpos_output = stanpos_output.decode(encoding)
        
        # Delete the temporary file
        os.unlink(self._input_file_path) 

        # Return java configurations to their default values
        config_java(options=default_options, verbose=False)
                
        return self.parse_output(stanpos_output, sentences) 
Example 31
Project: OpenBottle   Author: xiaozhuchacha   File: tgrep.py    MIT License 5 votes vote down vote up
def _tgrep_node_literal_value(node):
    '''
    Gets the string value of a given parse tree node, for comparison
    using the tgrep node literal predicates.
    '''
    return (node.label() if _istree(node) else text_type(node)) 
Example 32
Project: OpenBottle   Author: xiaozhuchacha   File: tgrep.py    MIT License 5 votes vote down vote up
def tgrep_nodes(pattern, trees, search_leaves=True):
    """
    Return the tree nodes in the trees which match the given pattern.

    :param pattern: a tgrep search pattern
    :type pattern: str or output of tgrep_compile()
    :param trees: a sequence of NLTK trees (usually ParentedTrees)
    :type trees: iter(ParentedTree) or iter(Tree)
    :param search_leaves: whether ot return matching leaf nodes
    :type search_leaves: bool
    :rtype: iter(tree nodes)
    """

    if isinstance(pattern, (binary_type, text_type)):
        pattern = tgrep_compile(pattern)

    for tree in trees:
        try:
            if search_leaves:
                positions = tree.treepositions()
            else:
                positions = treepositions_no_leaves(tree)
            yield [tree[position] for position in positions
                      if pattern(tree[position])]
        except AttributeError:
            yield [] 
Example 33
Project: OpenBottle   Author: xiaozhuchacha   File: stanford.py    MIT License 5 votes vote down vote up
def _execute(self, cmd, input_, verbose=False):
        encoding = self._encoding
        cmd.extend(['-charset', encoding])
        _options_cmd = self._options_cmd
        if _options_cmd:
            cmd.extend(['-options', self._options_cmd])

        default_options = ' '.join(_java_options)

        # Configure java.
        config_java(options=self.java_options, verbose=verbose)

        # Windows is incompatible with NamedTemporaryFile() without passing in delete=False.
        with tempfile.NamedTemporaryFile(mode='wb', delete=False) as input_file:
            # Write the actual sentences to the temporary input file
            if isinstance(input_, compat.text_type) and encoding:
                input_ = input_.encode(encoding)
            input_file.write(input_)
            input_file.flush()

            cmd.append(input_file.name)

            # Run the tagger and get the output.
            stdout, stderr = java(cmd, classpath=self._stanford_jar,
                                  stdout=PIPE, stderr=PIPE)
            stdout = stdout.decode(encoding)

        os.unlink(input_file.name)

        # Return java configurations to their default values.
        config_java(options=default_options, verbose=False)

        return stdout 
Example 34
Project: OpenBottle   Author: xiaozhuchacha   File: stanford_segmenter.py    MIT License 5 votes vote down vote up
def segment_sents(self, sentences):
        """
        """
        encoding = self._encoding
        # Create a temporary input file
        _input_fh, self._input_file_path = tempfile.mkstemp(text=True)

        # Write the actural sentences to the temporary input file
        _input_fh = os.fdopen(_input_fh, 'wb')
        _input = '\n'.join((' '.join(x) for x in sentences))
        if isinstance(_input, compat.text_type) and encoding:
            _input = _input.encode(encoding)
        _input_fh.write(_input)
        _input_fh.close()

        cmd = [
            'edu.stanford.nlp.ie.crf.CRFClassifier',
            '-sighanCorporaDict', self._sihan_corpora_dict,
            '-textFile', self._input_file_path,
            '-sighanPostProcessing', 'true',
            '-keepAllWhitespaces', 'false',
            '-loadClassifier', self._model,
            '-serDictionary', self._dict
        ]

        stdout = self._execute(cmd)

        # Delete the temporary file
        os.unlink(self._input_file_path)

        return stdout 
Example 35
Project: OpenBottle   Author: xiaozhuchacha   File: downloader.py    MIT License 5 votes vote down vote up
def fromxml(xml):
        if isinstance(xml, compat.string_types):
            xml = ElementTree.parse(xml)
        for key in xml.attrib:
            xml.attrib[key] = compat.text_type(xml.attrib[key])
        return Package(**xml.attrib) 
Example 36
Project: OpenBottle   Author: xiaozhuchacha   File: downloader.py    MIT License 5 votes vote down vote up
def fromxml(xml):
        if isinstance(xml, compat.string_types):
            xml = ElementTree.parse(xml)
        for key in xml.attrib:
            xml.attrib[key] = compat.text_type(xml.attrib[key])
        children = [child.get('ref') for child in xml.findall('item')]
        return Collection(children=children, **xml.attrib) 
Example 37
Project: OpenBottle   Author: xiaozhuchacha   File: framenet.py    MIT License 5 votes vote down vote up
def __repr__(self):
        """
        Return a string representation for this corpus view that is
        similar to a list's representation; but if it would be more
        than 60 characters long, it is truncated.
        """
        pieces = []
        length = 5

        for elt in self:
            pieces.append(elt._short_repr()) # key difference from inherited version: call to _short_repr()
            length += len(pieces[-1]) + 2
            if self._MAX_REPR_SIZE and length > self._MAX_REPR_SIZE and len(pieces) > 2:
                return "[%s, ...]" % text_type(',\n ' if self._BREAK_LINES else ', ').join(pieces[:-1])
        return "[%s]" % text_type(',\n ' if self._BREAK_LINES else ', ').join(pieces) 
Example 38
Project: OpenBottle   Author: xiaozhuchacha   File: framenet.py    MIT License 5 votes vote down vote up
def __repr__(self):
        """
        Return a string representation for this corpus view that is
        similar to a list's representation; but if it would be more
        than 60 characters long, it is truncated.
        """
        pieces = []
        length = 5
        for elt in self:
            pieces.append(elt._short_repr()) # key difference from inherited version: call to _short_repr()
            length += len(pieces[-1]) + 2
            if length > self._MAX_REPR_SIZE and len(pieces) > 2:
                return "[%s, ...]" % text_type(', ').join(pieces[:-1])
        else:
            return "[%s]" % text_type(', ').join(pieces) 
Example 39
Project: OpenBottle   Author: xiaozhuchacha   File: framenet.py    MIT License 5 votes vote down vote up
def __repr__(self):
        """
        Return a string representation for this corpus view that is
        similar to a list's representation; but if it would be more
        than 60 characters long, it is truncated.
        """
        pieces = []
        length = 5
        for elt in self:
            pieces.append(elt._short_repr()) # key difference from inherited version: call to _short_repr()
            length += len(pieces[-1]) + 2
            if length > self._MAX_REPR_SIZE and len(pieces) > 2:
                return "[%s, ...]" % text_type(', ').join(pieces[:-1])
        else:
            return "[%s]" % text_type(', ').join(pieces) 
Example 40
Project: OpenBottle   Author: xiaozhuchacha   File: framenet.py    MIT License 5 votes vote down vote up
def __repr__(self):
        """
        Return a string representation for this corpus view that is
        similar to a list's representation; but if it would be more
        than 60 characters long, it is truncated.
        """
        pieces = []
        length = 5
        for elt in self:
            pieces.append(elt._short_repr()) # key difference from inherited version: call to _short_repr()
            length += len(pieces[-1]) + 2
            if length > self._MAX_REPR_SIZE and len(pieces) > 2:
                return "[%s, ...]" % text_type(', ').join(pieces[:-1])
        else:
            return "[%s]" % text_type(', ').join(pieces) 
Example 41
Project: SRLTagger   Author: jawahar273   File: srlnltk.py    Apache License 2.0 5 votes vote down vote up
def tag_sents_yield(self, sentences, index_s):
        
        encoding = self._encoding

        if not path.isfile(self.executable(self._path)):
            raise OSError("Senna executable expected at %s but not found" % self.executable(self._path))


        # Build the senna command to run the tagger
        _senna_cmd = [self.executable(self._path), '-path', self._path, '-usrtokens', '-iobtags']
        _senna_cmd.extend(['-'+op for op in self.operations])

        # Serialize the actual sentences to a temporary string
        _input = '\n'.join((' '.join(x) for x in sentences))+'\n'
        if isinstance(_input, text_type) and encoding:
            _input = _input.encode(encoding)

        # Run the tagger and get the output
        p = Popen(_senna_cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE)
        (stdout, stderr) = p.communicate(input=_input)
        if p.returncode != 0:
            raise RuntimeError('Senna command failed! Details: %s' % stderr)
        stdout =  stdout.decode(encoding).split()
        index = stdout.index(index_s)
        length_stdout = len(stdout) 
        for i in range(0, length_stdout, index): 
                yield (index,  stdout[i:i+index]) 
Example 42
Project: SRLTagger   Author: jawahar273   File: srlnltk.py    Apache License 2.0 5 votes vote down vote up
def tag_sents(self, sentences, index_s):
        
        encoding = self._encoding
        tagged_sents = []
        if not path.isfile(self.executable(self._path)):
            raise OSError("Senna executable expected at %s but not found" % self.executable(self._path))


        # Build the senna command to run the tagger
        _senna_cmd = [self.executable(self._path), '-path', self._path, '-usrtokens', '-iobtags']
        _senna_cmd.extend(['-'+op for op in self.operations])

        # Serialize the actual sentences to a temporary string
        _input = '\n'.join((' '.join(x) for x in sentences))+'\n'
        if isinstance(_input, text_type) and encoding:
            _input = _input.encode(encoding)

        # Run the tagger and get the output
        p = Popen(_senna_cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE)
        (stdout, stderr) = p.communicate(input=_input)
        if p.returncode != 0:
            raise RuntimeError('Senna command failed! Details: %s' % stderr)
        stdout =  stdout.decode(encoding).split()
        index = stdout.index(index_s) # finding the next word through list index
        length_stdout = len(stdout) 
        for i in range(0, length_stdout, index): 
                tagged_sents.append(( index,  stdout[i:i+index]))

        return tagged_sents 
Example 43
Project: SRLTagger   Author: jawahar273   File: srlnltk.py    Apache License 2.0 5 votes vote down vote up
def tag_sents2file(self, sentences, file_name, file_mode):
        '''
        :Return: 0 on success on writting file 
        '''
        encoding = self._encoding

        if not path.isfile(self.executable(self._path)):
            raise OSError("Senna executable expected at %s but not found" % self.executable(self._path))


        # Build the senna command to run the tagger
        _senna_cmd = [self.executable(self._path), '-path', self._path, '-usrtokens', '-iobtags']
        _senna_cmd.extend(['-'+op for op in self.operations])

        # Serialize the actual sentences to a temporary string
        _input = '\n'.join((' '.join(x) for x in sentences))+'\n'
        if isinstance(_input, text_type) and encoding:
            _input = _input.encode(encoding)

        # Run the tagger and get the output
        file_name = open(str(file_name)+'.txt', file_mode)
        p = Popen(_senna_cmd, stdin=PIPE, stdout=file_name, stderr=PIPE)
        (stdout, stderr) = p.communicate(input=_input)
        # Check the return code.
        if p.returncode != 0:
            raise RuntimeError('Senna command failed! Details: %s' % stderr)
        return p.returncode 
Example 44
Project: robot_sds   Author: raybrshen   File: StanfordTagger_Revised.py    Apache License 2.0 5 votes vote down vote up
def tag_sents(self, sentences):
        encoding = self._encoding
        default_options = ' '.join(_java_options)
        config_java(options=self.java_options, verbose=False)

        # Create a temporary input file
        _input_fh, self._input_file_path = tempfile.mkstemp(text=True)

        cmd = list(self._cmd)
        cmd.extend(['-encoding', encoding])

        # Write the actual sentences to the temporary input file
        _input_fh = os.fdopen(_input_fh, 'wb')
        _input = '\n'.join((' '.join(x) for x in sentences))
        if isinstance(_input, compat.text_type) and encoding:
            _input = _input.encode(encoding)
        _input_fh.write(_input)
        _input_fh.close()

        # Run the tagger and get the output
        # stanpos_output, _stderr = java(cmd, classpath=self._stanford_jar, stdout=PIPE, stderr=PIPE)
        stanpos_output, _stderr = java(cmd, classpath=os.environ['CLASSPATH'], stdout=PIPE, stderr=PIPE)
        stanpos_output = stanpos_output.decode(encoding)

        # Delete the temporary file
        os.unlink(self._input_file_path)

        # Return java configurations to their default values
        config_java(options=default_options, verbose=False)

        return self.parse_output(stanpos_output, sentences) 
Example 45
Project: seq2seq-keyphrase   Author: memray   File: stanford-pos-tagger.py    MIT License 5 votes vote down vote up
def tag_sents(self, sentences):
    encoding = self._encoding
    default_options = ' '.join(_java_options)
    config_java(options=self.java_options, verbose=False)

    # Create a temporary input file
    _input_fh, self._input_file_path = tempfile.mkstemp(text=True)

    cmd = list(self._cmd)
    cmd.extend(['-encoding', encoding])

    # Write the actual sentences to the temporary input file
    _input_fh = os.fdopen(_input_fh, 'wb')
    _input = '\n'.join((' '.join(x) for x in sentences))
    if isinstance(_input, compat.text_type) and encoding:
        _input = _input.encode(encoding)
    _input_fh.write(_input)
    _input_fh.close()

    # Run the tagger and get the output
    stanpos_output, _stderr = java(cmd, classpath=self._stanford_jar,
                                   stdout=PIPE, stderr=PIPE)
    stanpos_output = stanpos_output.decode(encoding)

    # Delete the temporary file
    os.unlink(self._input_file_path)

    # Return java configurations to their default values
    config_java(options=default_options, verbose=False)

    return self.parse_output(stanpos_output, sentences) 
Example 46
Project: FancyWord   Author: EastonLee   File: stanford.py    GNU General Public License v3.0 5 votes vote down vote up
def tag_sents(self, sentences):
        encoding = self._encoding
        default_options = ' '.join(_java_options)
        config_java(options=self.java_options, verbose=False)

        # Create a temporary input file
        _input_fh, self._input_file_path = tempfile.mkstemp(text=True)

        self._cmd.extend(['-encoding', encoding])
        
        # Write the actual sentences to the temporary input file
        _input_fh = os.fdopen(_input_fh, 'wb')
        _input = '\n'.join((' '.join(x) for x in sentences))
        if isinstance(_input, compat.text_type) and encoding:
            _input = _input.encode(encoding)
        _input_fh.write(_input)
        _input_fh.close()
        
        # Run the tagger and get the output
        stanpos_output, _stderr = java(self._cmd,classpath=self._stanford_jar,
                                                       stdout=PIPE, stderr=PIPE)
        stanpos_output = stanpos_output.decode(encoding)
        
        # Delete the temporary file
        os.unlink(self._input_file_path) 

        # Return java configurations to their default values
        config_java(options=default_options, verbose=False)
                
        return self.parse_output(stanpos_output, sentences) 
Example 47
Project: FancyWord   Author: EastonLee   File: tgrep.py    GNU General Public License v3.0 5 votes vote down vote up
def tgrep_positions(pattern, trees, search_leaves=True):
    """
    Return the tree positions in the trees which match the given pattern.

    :param pattern: a tgrep search pattern
    :type pattern: str or output of tgrep_compile()
    :param trees: a sequence of NLTK trees (usually ParentedTrees)
    :type trees: iter(ParentedTree) or iter(Tree)
    :param search_leaves: whether ot return matching leaf nodes
    :type search_leaves: bool
    :rtype: iter(tree positions)
    """

    if isinstance(pattern, (binary_type, text_type)):
        pattern = tgrep_compile(pattern)

    for tree in trees:
        try:
            if search_leaves:
                positions = tree.treepositions()
            else:
                positions = treepositions_no_leaves(tree)
            yield [position for position in positions
                      if pattern(tree[position])]
        except AttributeError:
            yield [] 
Example 48
Project: FancyWord   Author: EastonLee   File: tgrep.py    GNU General Public License v3.0 5 votes vote down vote up
def tgrep_nodes(pattern, trees, search_leaves=True):
    """
    Return the tree nodes in the trees which match the given pattern.

    :param pattern: a tgrep search pattern
    :type pattern: str or output of tgrep_compile()
    :param trees: a sequence of NLTK trees (usually ParentedTrees)
    :type trees: iter(ParentedTree) or iter(Tree)
    :param search_leaves: whether ot return matching leaf nodes
    :type search_leaves: bool
    :rtype: iter(tree nodes)
    """

    if isinstance(pattern, (binary_type, text_type)):
        pattern = tgrep_compile(pattern)

    for tree in trees:
        try:
            if search_leaves:
                positions = tree.treepositions()
            else:
                positions = treepositions_no_leaves(tree)
            yield [tree[position] for position in positions
                      if pattern(tree[position])]
        except AttributeError:
            yield []


# run module doctests 
Example 49
Project: FancyWord   Author: EastonLee   File: stanford.py    GNU General Public License v3.0 5 votes vote down vote up
def _execute(self, cmd, input_, verbose=False):
        encoding = self._encoding
        cmd.extend(['-charset', encoding])
        _options_cmd = self._options_cmd
        if _options_cmd:
            cmd.extend(['-options', self._options_cmd])

        default_options = ' '.join(_java_options)

        # Configure java.
        config_java(options=self.java_options, verbose=verbose)

        # Windows is incompatible with NamedTemporaryFile() without passing in delete=False.
        with tempfile.NamedTemporaryFile(mode='wb', delete=False) as input_file:
            # Write the actual sentences to the temporary input file
            if isinstance(input_, compat.text_type) and encoding:
                input_ = input_.encode(encoding)
            input_file.write(input_)
            input_file.flush()

            cmd.append(input_file.name)

            # Run the tagger and get the output.
            stdout, stderr = java(cmd, classpath=self._stanford_jar,
                                  stdout=PIPE, stderr=PIPE)
            stdout = stdout.decode(encoding)

        os.unlink(input_file.name)

        # Return java configurations to their default values.
        config_java(options=default_options, verbose=False)

        return stdout 
Example 50
Project: FancyWord   Author: EastonLee   File: downloader.py    GNU General Public License v3.0 5 votes vote down vote up
def fromxml(xml):
        if isinstance(xml, compat.string_types):
            xml = ElementTree.parse(xml)
        for key in xml.attrib:
            xml.attrib[key] = compat.text_type(xml.attrib[key])
        return Package(**xml.attrib) 
Example 51
Project: FancyWord   Author: EastonLee   File: downloader.py    GNU General Public License v3.0 5 votes vote down vote up
def fromxml(xml):
        if isinstance(xml, compat.string_types):
            xml = ElementTree.parse(xml)
        for key in xml.attrib:
            xml.attrib[key] = compat.text_type(xml.attrib[key])
        children = [child.get('ref') for child in xml.findall('item')]
        return Collection(children=children, **xml.attrib) 
Example 52
Project: FancyWord   Author: EastonLee   File: framenet.py    GNU General Public License v3.0 5 votes vote down vote up
def __repr__(self):
        """
        Return a string representation for this corpus view that is
        similar to a list's representation; but if it would be more
        than 60 characters long, it is truncated.
        """
        pieces = []
        length = 5

        for elt in self:
            pieces.append(elt._short_repr()) # key difference from inherited version: call to _short_repr()
            length += len(pieces[-1]) + 2
            if self._MAX_REPR_SIZE and length > self._MAX_REPR_SIZE and len(pieces) > 2:
                return "[%s, ...]" % text_type(',\n ' if self._BREAK_LINES else ', ').join(pieces[:-1])
        return "[%s]" % text_type(',\n ' if self._BREAK_LINES else ', ').join(pieces) 
Example 53
Project: FancyWord   Author: EastonLee   File: framenet.py    GNU General Public License v3.0 5 votes vote down vote up
def __repr__(self):
        """
        Return a string representation for this corpus view that is
        similar to a list's representation; but if it would be more
        than 60 characters long, it is truncated.
        """
        pieces = []
        length = 5
        for elt in self:
            pieces.append(elt._short_repr()) # key difference from inherited version: call to _short_repr()
            length += len(pieces[-1]) + 2
            if length > self._MAX_REPR_SIZE and len(pieces) > 2:
                return "[%s, ...]" % text_type(', ').join(pieces[:-1])
        else:
            return "[%s]" % text_type(', ').join(pieces) 
Example 54
Project: honours_project   Author: JFriel   File: stanford.py    GNU General Public License v3.0 5 votes vote down vote up
def tag_sents(self, sentences):
        encoding = self._encoding
        default_options = ' '.join(_java_options)
        config_java(options=self.java_options, verbose=False)

        # Create a temporary input file
        _input_fh, self._input_file_path = tempfile.mkstemp(text=True)

        cmd = list(self._cmd)
        cmd.extend(['-encoding', encoding])
        
        # Write the actual sentences to the temporary input file
        _input_fh = os.fdopen(_input_fh, 'wb')
        _input = '\n'.join((' '.join(x) for x in sentences))
        if isinstance(_input, compat.text_type) and encoding:
            _input = _input.encode(encoding)
        _input_fh.write(_input)
        _input_fh.close()
        
        # Run the tagger and get the output
        stanpos_output, _stderr = java(cmd, classpath=self._stanford_jar,
                                                       stdout=PIPE, stderr=PIPE)
        stanpos_output = stanpos_output.decode(encoding)
        
        # Delete the temporary file
        os.unlink(self._input_file_path) 

        # Return java configurations to their default values
        config_java(options=default_options, verbose=False)
                
        return self.parse_output(stanpos_output, sentences) 
Example 55
Project: honours_project   Author: JFriel   File: tgrep.py    GNU General Public License v3.0 5 votes vote down vote up
def tgrep_positions(pattern, trees, search_leaves=True):
    """
    Return the tree positions in the trees which match the given pattern.

    :param pattern: a tgrep search pattern
    :type pattern: str or output of tgrep_compile()
    :param trees: a sequence of NLTK trees (usually ParentedTrees)
    :type trees: iter(ParentedTree) or iter(Tree)
    :param search_leaves: whether ot return matching leaf nodes
    :type search_leaves: bool
    :rtype: iter(tree positions)
    """

    if isinstance(pattern, (binary_type, text_type)):
        pattern = tgrep_compile(pattern)

    for tree in trees:
        try:
            if search_leaves:
                positions = tree.treepositions()
            else:
                positions = treepositions_no_leaves(tree)
            yield [position for position in positions
                      if pattern(tree[position])]
        except AttributeError:
            yield [] 
Example 56
Project: honours_project   Author: JFriel   File: tgrep.py    GNU General Public License v3.0 5 votes vote down vote up
def tgrep_nodes(pattern, trees, search_leaves=True):
    """
    Return the tree nodes in the trees which match the given pattern.

    :param pattern: a tgrep search pattern
    :type pattern: str or output of tgrep_compile()
    :param trees: a sequence of NLTK trees (usually ParentedTrees)
    :type trees: iter(ParentedTree) or iter(Tree)
    :param search_leaves: whether ot return matching leaf nodes
    :type search_leaves: bool
    :rtype: iter(tree nodes)
    """

    if isinstance(pattern, (binary_type, text_type)):
        pattern = tgrep_compile(pattern)

    for tree in trees:
        try:
            if search_leaves:
                positions = tree.treepositions()
            else:
                positions = treepositions_no_leaves(tree)
            yield [tree[position] for position in positions
                      if pattern(tree[position])]
        except AttributeError:
            yield [] 
Example 57
Project: honours_project   Author: JFriel   File: stanford.py    GNU General Public License v3.0 5 votes vote down vote up
def _execute(self, cmd, input_, verbose=False):
        encoding = self._encoding
        cmd.extend(['-charset', encoding])
        _options_cmd = self._options_cmd
        if _options_cmd:
            cmd.extend(['-options', self._options_cmd])

        default_options = ' '.join(_java_options)

        # Configure java.
        config_java(options=self.java_options, verbose=verbose)

        # Windows is incompatible with NamedTemporaryFile() without passing in delete=False.
        with tempfile.NamedTemporaryFile(mode='wb', delete=False) as input_file:
            # Write the actual sentences to the temporary input file
            if isinstance(input_, compat.text_type) and encoding:
                input_ = input_.encode(encoding)
            input_file.write(input_)
            input_file.flush()

            cmd.append(input_file.name)

            # Run the tagger and get the output.
            stdout, stderr = java(cmd, classpath=self._stanford_jar,
                                  stdout=PIPE, stderr=PIPE)
            stdout = stdout.decode(encoding)

        os.unlink(input_file.name)

        # Return java configurations to their default values.
        config_java(options=default_options, verbose=False)

        return stdout 
Example 58
Project: honours_project   Author: JFriel   File: stanford_segmenter.py    GNU General Public License v3.0 5 votes vote down vote up
def segment_sents(self, sentences):
        """
        """
        encoding = self._encoding
        # Create a temporary input file
        _input_fh, self._input_file_path = tempfile.mkstemp(text=True)

        # Write the actural sentences to the temporary input file
        _input_fh = os.fdopen(_input_fh, 'wb')
        _input = '\n'.join((' '.join(x) for x in sentences))
        if isinstance(_input, compat.text_type) and encoding:
            _input = _input.encode(encoding)
        _input_fh.write(_input)
        _input_fh.close()

        cmd = [
            'edu.stanford.nlp.ie.crf.CRFClassifier',
            '-sighanCorporaDict', self._sihan_corpora_dict,
            '-textFile', self._input_file_path,
            '-sighanPostProcessing', 'true',
            '-keepAllWhitespaces', 'false',
            '-loadClassifier', self._model,
            '-serDictionary', self._dict
        ]

        stdout = self._execute(cmd)

        # Delete the temporary file
        os.unlink(self._input_file_path)

        return stdout 
Example 59
Project: honours_project   Author: JFriel   File: downloader.py    GNU General Public License v3.0 5 votes vote down vote up
def fromxml(xml):
        if isinstance(xml, compat.string_types):
            xml = ElementTree.parse(xml)
        for key in xml.attrib:
            xml.attrib[key] = compat.text_type(xml.attrib[key])
        return Package(**xml.attrib) 
Example 60
Project: honours_project   Author: JFriel   File: stanford.py    GNU General Public License v3.0 5 votes vote down vote up
def _execute(self, cmd, input_, verbose=False):
        encoding = self._encoding
        cmd.extend(['-encoding', encoding])
        if self.corenlp_options:
            cmd.append(self.corenlp_options)

        default_options = ' '.join(_java_options)

        # Configure java.
        config_java(options=self.java_options, verbose=verbose)

        # Windows is incompatible with NamedTemporaryFile() without passing in delete=False.
        with tempfile.NamedTemporaryFile(mode='wb', delete=False) as input_file:
            # Write the actual sentences to the temporary input file
            if isinstance(input_, compat.text_type) and encoding:
                input_ = input_.encode(encoding)
            input_file.write(input_)
            input_file.flush()

            # Run the tagger and get the output.
            if self._USE_STDIN:
                input_file.seek(0)
                stdout, stderr = java(cmd, classpath=self._classpath,
                                      stdin=input_file, stdout=PIPE, stderr=PIPE)
            else:
                cmd.append(input_file.name)
                stdout, stderr = java(cmd, classpath=self._classpath,
                                      stdout=PIPE, stderr=PIPE)
                
            stdout = stdout.replace(b'\xc2\xa0',b' ')
            stdout = stdout.replace(b'\xa0',b' ')
            stdout = stdout.decode(encoding)

        os.unlink(input_file.name)

        # Return java configurations to their default values.
        config_java(options=default_options, verbose=False)

        return stdout 
Example 61
Project: honours_project   Author: JFriel   File: framenet.py    GNU General Public License v3.0 5 votes vote down vote up
def __repr__(self):
        """
        Return a string representation for this corpus view that is
        similar to a list's representation; but if it would be more
        than 60 characters long, it is truncated.
        """
        pieces = []
        length = 5

        for elt in self:
            pieces.append(elt._short_repr()) # key difference from inherited version: call to _short_repr()
            length += len(pieces[-1]) + 2
            if self._MAX_REPR_SIZE and length > self._MAX_REPR_SIZE and len(pieces) > 2:
                return "[%s, ...]" % text_type(',\n ' if self._BREAK_LINES else ', ').join(pieces[:-1])
        return "[%s]" % text_type(',\n ' if self._BREAK_LINES else ', ').join(pieces) 
Example 62
Project: honours_project   Author: JFriel   File: framenet.py    GNU General Public License v3.0 5 votes vote down vote up
def __repr__(self):
        """
        Return a string representation for this corpus view that is
        similar to a list's representation; but if it would be more
        than 60 characters long, it is truncated.
        """
        pieces = []
        length = 5
        for elt in self:
            pieces.append(elt._short_repr()) # key difference from inherited version: call to _short_repr()
            length += len(pieces[-1]) + 2
            if length > self._MAX_REPR_SIZE and len(pieces) > 2:
                return "[%s, ...]" % text_type(', ').join(pieces[:-1])
        else:
            return "[%s]" % text_type(', ').join(pieces) 
Example 63
Project: honours_project   Author: JFriel   File: stanford.py    GNU General Public License v3.0 5 votes vote down vote up
def tag_sents(self, sentences):
        encoding = self._encoding
        default_options = ' '.join(_java_options)
        config_java(options=self.java_options, verbose=False)

        # Create a temporary input file
        _input_fh, self._input_file_path = tempfile.mkstemp(text=True)

        cmd = list(self._cmd)
        cmd.extend(['-encoding', encoding])
        
        # Write the actual sentences to the temporary input file
        _input_fh = os.fdopen(_input_fh, 'wb')
        _input = '\n'.join((' '.join(x) for x in sentences))
        if isinstance(_input, compat.text_type) and encoding:
            _input = _input.encode(encoding)
        _input_fh.write(_input)
        _input_fh.close()
        
        # Run the tagger and get the output
        stanpos_output, _stderr = java(cmd, classpath=self._stanford_jar,
                                                       stdout=PIPE, stderr=PIPE)
        stanpos_output = stanpos_output.decode(encoding)
        
        # Delete the temporary file
        os.unlink(self._input_file_path) 

        # Return java configurations to their default values
        config_java(options=default_options, verbose=False)
                
        return self.parse_output(stanpos_output, sentences) 
Example 64
Project: honours_project   Author: JFriel   File: tgrep.py    GNU General Public License v3.0 5 votes vote down vote up
def _tgrep_node_literal_value(node):
    '''
    Gets the string value of a given parse tree node, for comparison
    using the tgrep node literal predicates.
    '''
    return (node.label() if _istree(node) else text_type(node)) 
Example 65
Project: honours_project   Author: JFriel   File: tgrep.py    GNU General Public License v3.0 5 votes vote down vote up
def tgrep_positions(pattern, trees, search_leaves=True):
    """
    Return the tree positions in the trees which match the given pattern.

    :param pattern: a tgrep search pattern
    :type pattern: str or output of tgrep_compile()
    :param trees: a sequence of NLTK trees (usually ParentedTrees)
    :type trees: iter(ParentedTree) or iter(Tree)
    :param search_leaves: whether ot return matching leaf nodes
    :type search_leaves: bool
    :rtype: iter(tree positions)
    """

    if isinstance(pattern, (binary_type, text_type)):
        pattern = tgrep_compile(pattern)

    for tree in trees:
        try:
            if search_leaves:
                positions = tree.treepositions()
            else:
                positions = treepositions_no_leaves(tree)
            yield [position for position in positions
                      if pattern(tree[position])]
        except AttributeError:
            yield [] 
Example 66
Project: honours_project   Author: JFriel   File: tgrep.py    GNU General Public License v3.0 5 votes vote down vote up
def tgrep_nodes(pattern, trees, search_leaves=True):
    """
    Return the tree nodes in the trees which match the given pattern.

    :param pattern: a tgrep search pattern
    :type pattern: str or output of tgrep_compile()
    :param trees: a sequence of NLTK trees (usually ParentedTrees)
    :type trees: iter(ParentedTree) or iter(Tree)
    :param search_leaves: whether ot return matching leaf nodes
    :type search_leaves: bool
    :rtype: iter(tree nodes)
    """

    if isinstance(pattern, (binary_type, text_type)):
        pattern = tgrep_compile(pattern)

    for tree in trees:
        try:
            if search_leaves:
                positions = tree.treepositions()
            else:
                positions = treepositions_no_leaves(tree)
            yield [tree[position] for position in positions
                      if pattern(tree[position])]
        except AttributeError:
            yield [] 
Example 67
Project: honours_project   Author: JFriel   File: stanford_segmenter.py    GNU General Public License v3.0 5 votes vote down vote up
def segment_sents(self, sentences):
        """
        """
        encoding = self._encoding
        # Create a temporary input file
        _input_fh, self._input_file_path = tempfile.mkstemp(text=True)

        # Write the actural sentences to the temporary input file
        _input_fh = os.fdopen(_input_fh, 'wb')
        _input = '\n'.join((' '.join(x) for x in sentences))
        if isinstance(_input, compat.text_type) and encoding:
            _input = _input.encode(encoding)
        _input_fh.write(_input)
        _input_fh.close()

        cmd = [
            'edu.stanford.nlp.ie.crf.CRFClassifier',
            '-sighanCorporaDict', self._sihan_corpora_dict,
            '-textFile', self._input_file_path,
            '-sighanPostProcessing', 'true',
            '-keepAllWhitespaces', 'false',
            '-loadClassifier', self._model,
            '-serDictionary', self._dict
        ]

        stdout = self._execute(cmd)

        # Delete the temporary file
        os.unlink(self._input_file_path)

        return stdout 
Example 68
Project: honours_project   Author: JFriel   File: downloader.py    GNU General Public License v3.0 5 votes vote down vote up
def fromxml(xml):
        if isinstance(xml, compat.string_types):
            xml = ElementTree.parse(xml)
        for key in xml.attrib:
            xml.attrib[key] = compat.text_type(xml.attrib[key])
        return Package(**xml.attrib) 
Example 69
Project: razzy-spinner   Author: rafasashi   File: probability.py    GNU General Public License v3.0 4 votes vote down vote up
def plot(self, *args, **kwargs):
        """
        Plot samples from the frequency distribution
        displaying the most frequent sample first.  If an integer
        parameter is supplied, stop after this many samples have been
        plotted.  For a cumulative plot, specify cumulative=True.
        (Requires Matplotlib to be installed.)

        :param title: The title for the graph
        :type title: str
        :param cumulative: A flag to specify whether the plot is cumulative (default = False)
        :type title: bool
        """
        try:
            from matplotlib import pylab
        except ImportError:
            raise ValueError('The plot function requires matplotlib to be installed.'
                         'See http://matplotlib.org/')

        if len(args) == 0:
            args = [len(self)]
        samples = [item for item, _ in self.most_common(*args)]

        cumulative = _get_kwarg(kwargs, 'cumulative', False)
        if cumulative:
            freqs = list(self._cumulative_frequencies(samples))
            ylabel = "Cumulative Counts"
        else:
            freqs = [self[sample] for sample in samples]
            ylabel = "Counts"
        # percents = [f * 100 for f in freqs]  only in ProbDist?

        pylab.grid(True, color="silver")
        if not "linewidth" in kwargs:
            kwargs["linewidth"] = 2
        if "title" in kwargs:
            pylab.title(kwargs["title"])
            del kwargs["title"]
        pylab.plot(freqs, **kwargs)
        pylab.xticks(range(len(samples)), [compat.text_type(s) for s in samples], rotation=90)
        pylab.xlabel("Samples")
        pylab.ylabel(ylabel)
        pylab.show() 
Example 70
Project: razzy-spinner   Author: rafasashi   File: probability.py    GNU General Public License v3.0 4 votes vote down vote up
def plot(self, *args, **kwargs):
        """
        Plot the given samples from the conditional frequency distribution.
        For a cumulative plot, specify cumulative=True.
        (Requires Matplotlib to be installed.)

        :param samples: The samples to plot
        :type samples: list
        :param title: The title for the graph
        :type title: str
        :param conditions: The conditions to plot (default is all)
        :type conditions: list
        """
        try:
            from matplotlib import pylab
        except ImportError:
            raise ValueError('The plot function requires matplotlib to be installed.'
                         'See http://matplotlib.org/')

        cumulative = _get_kwarg(kwargs, 'cumulative', False)
        conditions = _get_kwarg(kwargs, 'conditions', sorted(self.conditions()))
        title = _get_kwarg(kwargs, 'title', '')
        samples = _get_kwarg(kwargs, 'samples',
                             sorted(set(v for c in conditions for v in self[c])))  # this computation could be wasted
        if not "linewidth" in kwargs:
            kwargs["linewidth"] = 2

        for condition in conditions:
            if cumulative:
                freqs = list(self[condition]._cumulative_frequencies(samples))
                ylabel = "Cumulative Counts"
                legend_loc = 'lower right'
            else:
                freqs = [self[condition][sample] for sample in samples]
                ylabel = "Counts"
                legend_loc = 'upper right'
            # percents = [f * 100 for f in freqs] only in ConditionalProbDist?
            kwargs['label'] = "%s" % condition
            pylab.plot(freqs, *args, **kwargs)

        pylab.legend(loc=legend_loc)
        pylab.grid(True, color="silver")
        pylab.xticks(range(len(samples)), [compat.text_type(s) for s in samples], rotation=90)
        if title:
            pylab.title(title)
        pylab.xlabel("Samples")
        pylab.ylabel(ylabel)
        pylab.show() 
Example 71
Project: OpenBottle   Author: xiaozhuchacha   File: probability.py    MIT License 4 votes vote down vote up
def plot(self, *args, **kwargs):
        """
        Plot samples from the frequency distribution
        displaying the most frequent sample first.  If an integer
        parameter is supplied, stop after this many samples have been
        plotted.  For a cumulative plot, specify cumulative=True.
        (Requires Matplotlib to be installed.)

        :param title: The title for the graph
        :type title: str
        :param cumulative: A flag to specify whether the plot is cumulative (default = False)
        :type title: bool
        """
        try:
            from matplotlib import pylab
        except ImportError:
            raise ValueError('The plot function requires matplotlib to be installed.'
                         'See http://matplotlib.org/')

        if len(args) == 0:
            args = [len(self)]
        samples = [item for item, _ in self.most_common(*args)]

        cumulative = _get_kwarg(kwargs, 'cumulative', False)
        if cumulative:
            freqs = list(self._cumulative_frequencies(samples))
            ylabel = "Cumulative Counts"
        else:
            freqs = [self[sample] for sample in samples]
            ylabel = "Counts"
        # percents = [f * 100 for f in freqs]  only in ProbDist?

        pylab.grid(True, color="silver")
        if not "linewidth" in kwargs:
            kwargs["linewidth"] = 2
        if "title" in kwargs:
            pylab.title(kwargs["title"])
            del kwargs["title"]
        pylab.plot(freqs, **kwargs)
        pylab.xticks(range(len(samples)), [compat.text_type(s) for s in samples], rotation=90)
        pylab.xlabel("Samples")
        pylab.ylabel(ylabel)
        pylab.show() 
Example 72
Project: OpenBottle   Author: xiaozhuchacha   File: probability.py    MIT License 4 votes vote down vote up
def plot(self, *args, **kwargs):
        """
        Plot the given samples from the conditional frequency distribution.
        For a cumulative plot, specify cumulative=True.
        (Requires Matplotlib to be installed.)

        :param samples: The samples to plot
        :type samples: list
        :param title: The title for the graph
        :type title: str
        :param conditions: The conditions to plot (default is all)
        :type conditions: list
        """
        try:
            from matplotlib import pylab
        except ImportError:
            raise ValueError('The plot function requires matplotlib to be installed.'
                         'See http://matplotlib.org/')

        cumulative = _get_kwarg(kwargs, 'cumulative', False)
        conditions = _get_kwarg(kwargs, 'conditions', sorted(self.conditions()))
        title = _get_kwarg(kwargs, 'title', '')
        samples = _get_kwarg(kwargs, 'samples',
                             sorted(set(v for c in conditions for v in self[c])))  # this computation could be wasted
        if not "linewidth" in kwargs:
            kwargs["linewidth"] = 2

        for condition in conditions:
            if cumulative:
                freqs = list(self[condition]._cumulative_frequencies(samples))
                ylabel = "Cumulative Counts"
                legend_loc = 'lower right'
            else:
                freqs = [self[condition][sample] for sample in samples]
                ylabel = "Counts"
                legend_loc = 'upper right'
            # percents = [f * 100 for f in freqs] only in ConditionalProbDist?
            kwargs['label'] = "%s" % condition
            pylab.plot(freqs, *args, **kwargs)

        pylab.legend(loc=legend_loc)
        pylab.grid(True, color="silver")
        pylab.xticks(range(len(samples)), [compat.text_type(s) for s in samples], rotation=90)
        if title:
            pylab.title(title)
        pylab.xlabel("Samples")
        pylab.ylabel(ylabel)
        pylab.show() 
Example 73
Project: OpenBottle   Author: xiaozhuchacha   File: probability.py    MIT License 4 votes vote down vote up
def plot(self, *args, **kwargs):
        """
        Plot samples from the frequency distribution
        displaying the most frequent sample first.  If an integer
        parameter is supplied, stop after this many samples have been
        plotted.  For a cumulative plot, specify cumulative=True.
        (Requires Matplotlib to be installed.)

        :param title: The title for the graph
        :type title: str
        :param cumulative: A flag to specify whether the plot is cumulative (default = False)
        :type title: bool
        """
        try:
            from matplotlib import pylab
        except ImportError:
            raise ValueError('The plot function requires matplotlib to be installed.'
                         'See http://matplotlib.org/')

        if len(args) == 0:
            args = [len(self)]
        samples = [item for item, _ in self.most_common(*args)]

        cumulative = _get_kwarg(kwargs, 'cumulative', False)
        if cumulative:
            freqs = list(self._cumulative_frequencies(samples))
            ylabel = "Cumulative Counts"
        else:
            freqs = [self[sample] for sample in samples]
            ylabel = "Counts"
        # percents = [f * 100 for f in freqs]  only in ProbDist?

        pylab.grid(True, color="silver")
        if not "linewidth" in kwargs:
            kwargs["linewidth"] = 2
        if "title" in kwargs:
            pylab.title(kwargs["title"])
            del kwargs["title"]
        pylab.plot(freqs, **kwargs)
        pylab.xticks(range(len(samples)), [compat.text_type(s) for s in samples], rotation=90)
        pylab.xlabel("Samples")
        pylab.ylabel(ylabel)
        pylab.show() 
Example 74
Project: OpenBottle   Author: xiaozhuchacha   File: probability.py    MIT License 4 votes vote down vote up
def plot(self, *args, **kwargs):
        """
        Plot the given samples from the conditional frequency distribution.
        For a cumulative plot, specify cumulative=True.
        (Requires Matplotlib to be installed.)

        :param samples: The samples to plot
        :type samples: list
        :param title: The title for the graph
        :type title: str
        :param conditions: The conditions to plot (default is all)
        :type conditions: list
        """
        try:
            from matplotlib import pylab
        except ImportError:
            raise ValueError('The plot function requires matplotlib to be installed.'
                         'See http://matplotlib.org/')

        cumulative = _get_kwarg(kwargs, 'cumulative', False)
        conditions = _get_kwarg(kwargs, 'conditions', sorted(self.conditions()))
        title = _get_kwarg(kwargs, 'title', '')
        samples = _get_kwarg(kwargs, 'samples',
                             sorted(set(v for c in conditions for v in self[c])))  # this computation could be wasted
        if not "linewidth" in kwargs:
            kwargs["linewidth"] = 2

        for condition in conditions:
            if cumulative:
                freqs = list(self[condition]._cumulative_frequencies(samples))
                ylabel = "Cumulative Counts"
                legend_loc = 'lower right'
            else:
                freqs = [self[condition][sample] for sample in samples]
                ylabel = "Counts"
                legend_loc = 'upper right'
            # percents = [f * 100 for f in freqs] only in ConditionalProbDist?
            kwargs['label'] = "%s" % condition
            pylab.plot(freqs, *args, **kwargs)

        pylab.legend(loc=legend_loc)
        pylab.grid(True, color="silver")
        pylab.xticks(range(len(samples)), [compat.text_type(s) for s in samples], rotation=90)
        if title:
            pylab.title(title)
        pylab.xlabel("Samples")
        pylab.ylabel(ylabel)
        pylab.show() 
Example 75
Project: FancyWord   Author: EastonLee   File: probability.py    GNU General Public License v3.0 4 votes vote down vote up
def plot(self, *args, **kwargs):
        """
        Plot samples from the frequency distribution
        displaying the most frequent sample first.  If an integer
        parameter is supplied, stop after this many samples have been
        plotted.  For a cumulative plot, specify cumulative=True.
        (Requires Matplotlib to be installed.)

        :param title: The title for the graph
        :type title: str
        :param cumulative: A flag to specify whether the plot is cumulative (default = False)
        :type title: bool
        """
        try:
            from matplotlib import pylab
        except ImportError:
            raise ValueError('The plot function requires matplotlib to be installed.'
                         'See http://matplotlib.org/')

        if len(args) == 0:
            args = [len(self)]
        samples = [item for item, _ in self.most_common(*args)]

        cumulative = _get_kwarg(kwargs, 'cumulative', False)
        if cumulative:
            freqs = list(self._cumulative_frequencies(samples))
            ylabel = "Cumulative Counts"
        else:
            freqs = [self[sample] for sample in samples]
            ylabel = "Counts"
        # percents = [f * 100 for f in freqs]  only in ProbDist?

        pylab.grid(True, color="silver")
        if not "linewidth" in kwargs:
            kwargs["linewidth"] = 2
        if "title" in kwargs:
            pylab.title(kwargs["title"])
            del kwargs["title"]
        pylab.plot(freqs, **kwargs)
        pylab.xticks(range(len(samples)), [compat.text_type(s) for s in samples], rotation=90)
        pylab.xlabel("Samples")
        pylab.ylabel(ylabel)
        pylab.show() 
Example 76
Project: FancyWord   Author: EastonLee   File: probability.py    GNU General Public License v3.0 4 votes vote down vote up
def plot(self, *args, **kwargs):
        """
        Plot the given samples from the conditional frequency distribution.
        For a cumulative plot, specify cumulative=True.
        (Requires Matplotlib to be installed.)

        :param samples: The samples to plot
        :type samples: list
        :param title: The title for the graph
        :type title: str
        :param conditions: The conditions to plot (default is all)
        :type conditions: list
        """
        try:
            from matplotlib import pylab
        except ImportError:
            raise ValueError('The plot function requires matplotlib to be installed.'
                         'See http://matplotlib.org/')

        cumulative = _get_kwarg(kwargs, 'cumulative', False)
        conditions = _get_kwarg(kwargs, 'conditions', sorted(self.conditions()))
        title = _get_kwarg(kwargs, 'title', '')
        samples = _get_kwarg(kwargs, 'samples',
                             sorted(set(v for c in conditions for v in self[c])))  # this computation could be wasted
        if not "linewidth" in kwargs:
            kwargs["linewidth"] = 2

        for condition in conditions:
            if cumulative:
                freqs = list(self[condition]._cumulative_frequencies(samples))
                ylabel = "Cumulative Counts"
                legend_loc = 'lower right'
            else:
                freqs = [self[condition][sample] for sample in samples]
                ylabel = "Counts"
                legend_loc = 'upper right'
            # percents = [f * 100 for f in freqs] only in ConditionalProbDist?
            kwargs['label'] = "%s" % condition
            pylab.plot(freqs, *args, **kwargs)

        pylab.legend(loc=legend_loc)
        pylab.grid(True, color="silver")
        pylab.xticks(range(len(samples)), [compat.text_type(s) for s in samples], rotation=90)
        if title:
            pylab.title(title)
        pylab.xlabel("Samples")
        pylab.ylabel(ylabel)
        pylab.show() 
Example 77
Project: honours_project   Author: JFriel   File: probability.py    GNU General Public License v3.0 4 votes vote down vote up
def plot(self, *args, **kwargs):
        """
        Plot samples from the frequency distribution
        displaying the most frequent sample first.  If an integer
        parameter is supplied, stop after this many samples have been
        plotted.  For a cumulative plot, specify cumulative=True.
        (Requires Matplotlib to be installed.)

        :param title: The title for the graph
        :type title: str
        :param cumulative: A flag to specify whether the plot is cumulative (default = False)
        :type title: bool
        """
        try:
            from matplotlib import pylab
        except ImportError:
            raise ValueError('The plot function requires matplotlib to be installed.'
                         'See http://matplotlib.org/')

        if len(args) == 0:
            args = [len(self)]
        samples = [item for item, _ in self.most_common(*args)]

        cumulative = _get_kwarg(kwargs, 'cumulative', False)
        if cumulative:
            freqs = list(self._cumulative_frequencies(samples))
            ylabel = "Cumulative Counts"
        else:
            freqs = [self[sample] for sample in samples]
            ylabel = "Counts"
        # percents = [f * 100 for f in freqs]  only in ProbDist?

        pylab.grid(True, color="silver")
        if not "linewidth" in kwargs:
            kwargs["linewidth"] = 2
        if "title" in kwargs:
            pylab.title(kwargs["title"])
            del kwargs["title"]
        pylab.plot(freqs, **kwargs)
        pylab.xticks(range(len(samples)), [compat.text_type(s) for s in samples], rotation=90)
        pylab.xlabel("Samples")
        pylab.ylabel(ylabel)
        pylab.show() 
Example 78
Project: honours_project   Author: JFriel   File: probability.py    GNU General Public License v3.0 4 votes vote down vote up
def plot(self, *args, **kwargs):
        """
        Plot the given samples from the conditional frequency distribution.
        For a cumulative plot, specify cumulative=True.
        (Requires Matplotlib to be installed.)

        :param samples: The samples to plot
        :type samples: list
        :param title: The title for the graph
        :type title: str
        :param conditions: The conditions to plot (default is all)
        :type conditions: list
        """
        try:
            from matplotlib import pylab
        except ImportError:
            raise ValueError('The plot function requires matplotlib to be installed.'
                         'See http://matplotlib.org/')

        cumulative = _get_kwarg(kwargs, 'cumulative', False)
        conditions = _get_kwarg(kwargs, 'conditions', sorted(self.conditions()))
        title = _get_kwarg(kwargs, 'title', '')
        samples = _get_kwarg(kwargs, 'samples',
                             sorted(set(v for c in conditions for v in self[c])))  # this computation could be wasted
        if not "linewidth" in kwargs:
            kwargs["linewidth"] = 2

        for condition in conditions:
            if cumulative:
                freqs = list(self[condition]._cumulative_frequencies(samples))
                ylabel = "Cumulative Counts"
                legend_loc = 'lower right'
            else:
                freqs = [self[condition][sample] for sample in samples]
                ylabel = "Counts"
                legend_loc = 'upper right'
            # percents = [f * 100 for f in freqs] only in ConditionalProbDist?
            kwargs['label'] = "%s" % condition
            pylab.plot(freqs, *args, **kwargs)

        pylab.legend(loc=legend_loc)
        pylab.grid(True, color="silver")
        pylab.xticks(range(len(samples)), [compat.text_type(s) for s in samples], rotation=90)
        if title:
            pylab.title(title)
        pylab.xlabel("Samples")
        pylab.ylabel(ylabel)
        pylab.show() 
Example 79
Project: honours_project   Author: JFriel   File: probability.py    GNU General Public License v3.0 4 votes vote down vote up
def plot(self, *args, **kwargs):
        """
        Plot samples from the frequency distribution
        displaying the most frequent sample first.  If an integer
        parameter is supplied, stop after this many samples have been
        plotted.  For a cumulative plot, specify cumulative=True.
        (Requires Matplotlib to be installed.)

        :param title: The title for the graph
        :type title: str
        :param cumulative: A flag to specify whether the plot is cumulative (default = False)
        :type title: bool
        """
        try:
            from matplotlib import pylab
        except ImportError:
            raise ValueError('The plot function requires matplotlib to be installed.'
                         'See http://matplotlib.org/')

        if len(args) == 0:
            args = [len(self)]
        samples = [item for item, _ in self.most_common(*args)]

        cumulative = _get_kwarg(kwargs, 'cumulative', False)
        if cumulative:
            freqs = list(self._cumulative_frequencies(samples))
            ylabel = "Cumulative Counts"
        else:
            freqs = [self[sample] for sample in samples]
            ylabel = "Counts"
        # percents = [f * 100 for f in freqs]  only in ProbDist?

        pylab.grid(True, color="silver")
        if not "linewidth" in kwargs:
            kwargs["linewidth"] = 2
        if "title" in kwargs:
            pylab.title(kwargs["title"])
            del kwargs["title"]
        pylab.plot(freqs, **kwargs)
        pylab.xticks(range(len(samples)), [compat.text_type(s) for s in samples], rotation=90)
        pylab.xlabel("Samples")
        pylab.ylabel(ylabel)
        pylab.show() 
Example 80
Project: honours_project   Author: JFriel   File: probability.py    GNU General Public License v3.0 4 votes vote down vote up
def plot(self, *args, **kwargs):
        """
        Plot the given samples from the conditional frequency distribution.
        For a cumulative plot, specify cumulative=True.
        (Requires Matplotlib to be installed.)

        :param samples: The samples to plot
        :type samples: list
        :param title: The title for the graph
        :type title: str
        :param conditions: The conditions to plot (default is all)
        :type conditions: list
        """
        try:
            from matplotlib import pylab
        except ImportError:
            raise ValueError('The plot function requires matplotlib to be installed.'
                         'See http://matplotlib.org/')

        cumulative = _get_kwarg(kwargs, 'cumulative', False)
        conditions = _get_kwarg(kwargs, 'conditions', sorted(self.conditions()))
        title = _get_kwarg(kwargs, 'title', '')
        samples = _get_kwarg(kwargs, 'samples',
                             sorted(set(v for c in conditions for v in self[c])))  # this computation could be wasted
        if not "linewidth" in kwargs:
            kwargs["linewidth"] = 2

        for condition in conditions:
            if cumulative:
                freqs = list(self[condition]._cumulative_frequencies(samples))
                ylabel = "Cumulative Counts"
                legend_loc = 'lower right'
            else:
                freqs = [self[condition][sample] for sample in samples]
                ylabel = "Counts"
                legend_loc = 'upper right'
            # percents = [f * 100 for f in freqs] only in ConditionalProbDist?
            kwargs['label'] = "%s" % condition
            pylab.plot(freqs, *args, **kwargs)

        pylab.legend(loc=legend_loc)
        pylab.grid(True, color="silver")
        pylab.xticks(range(len(samples)), [compat.text_type(s) for s in samples], rotation=90)
        if title:
            pylab.title(title)
        pylab.xlabel("Samples")
        pylab.ylabel(ylabel)
        pylab.show()