Python nltk.compat.string_types() Examples

The following are code examples for showing how to use nltk.compat.string_types(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: razzy-spinner   Author: rafasashi   File: tree.py    GNU General Public License v3.0 6 votes vote down vote up
def productions(self):
        """
        Generate the productions that correspond to the non-terminal nodes of the tree.
        For each subtree of the form (P: C1 C2 ... Cn) this produces a production of the
        form P -> C1 C2 ... Cn.

            >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
            >>> t.productions()
            [S -> NP VP, NP -> D N, D -> 'the', N -> 'dog', VP -> V NP, V -> 'chased',
            NP -> D N, D -> 'the', N -> 'cat']

        :rtype: list(Production)
        """

        if not isinstance(self._label, string_types):
            raise TypeError('Productions can only be generated from trees having node labels that are strings')

        prods = [Production(Nonterminal(self._label), _child_names(self))]
        for child in self:
            if isinstance(child, Tree):
                prods += child.productions()
        return prods 
Example 2
Project: razzy-spinner   Author: rafasashi   File: tree.py    GNU General Public License v3.0 6 votes vote down vote up
def _pformat_flat(self, nodesep, parens, quotes):
        childstrs = []
        for child in self:
            if isinstance(child, Tree):
                childstrs.append(child._pformat_flat(nodesep, parens, quotes))
            elif isinstance(child, tuple):
                childstrs.append("/".join(child))
            elif isinstance(child, string_types) and not quotes:
                childstrs.append('%s' % child)
            else:
                childstrs.append(unicode_repr(child))
        if isinstance(self._label, string_types):
            return '%s%s%s %s%s' % (parens[0], self._label, nodesep,
                                    " ".join(childstrs), parens[1])
        else:
            return '%s%s%s %s%s' % (parens[0], unicode_repr(self._label), nodesep,
                                    " ".join(childstrs), parens[1]) 
Example 3
Project: razzy-spinner   Author: rafasashi   File: regexp.py    GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, regexp, repl, descr):
        """
        Construct a new RegexpChunkRule.

        :type regexp: regexp or str
        :param regexp: The regular expression for this ``RegexpChunkRule``.
            When this rule is applied to a ``ChunkString``, any
            substring that matches ``regexp`` will be replaced using
            the replacement string ``repl``.  Note that this must be a
            normal regular expression, not a tag pattern.
        :type repl: str
        :param repl: The replacement expression for this ``RegexpChunkRule``.
            When this rule is applied to a ``ChunkString``, any substring
            that matches ``regexp`` will be replaced using ``repl``.
        :type descr: str
        :param descr: A short description of the purpose and/or effect
            of this rule.
        """
        if isinstance(regexp, string_types):
            regexp = re.compile(regexp)
        self._repl = repl
        self._descr = descr
        self._regexp = regexp 
Example 4
Project: razzy-spinner   Author: rafasashi   File: ipipan.py    GNU General Public License v3.0 6 votes vote down vote up
def fileids(self, channels=None, domains=None, categories=None):
        if channels is not None and domains is not None and \
                categories is not None:
            raise ValueError('You can specify only one of channels, domains '
                             'and categories parameter at once')
        if channels is None and domains is None and \
                categories is None:
            return CorpusReader.fileids(self)
        if isinstance(channels, compat.string_types):
            channels = [channels]
        if isinstance(domains, compat.string_types):
            domains = [domains]
        if isinstance(categories, compat.string_types):
            categories = [categories]
        if channels:
            return self._list_morph_files_by('channel', channels)
        elif domains:
            return self._list_morph_files_by('domain', domains)
        else:
            return self._list_morph_files_by('keyTerm', categories,
                    map=self._map_category) 
Example 5
Project: razzy-spinner   Author: rafasashi   File: ycoe.py    GNU General Public License v3.0 6 votes vote down vote up
def _getfileids(self, documents, subcorpus):
        """
        Helper that selects the appropriate fileids for a given set of
        documents from a given subcorpus (pos or psd).
        """
        if documents is None:
            documents = self._documents
        else:
            if isinstance(documents, compat.string_types):
                documents = [documents]
            for document in documents:
                if document not in self._documents:
                    if document[-4:] in ('.pos', '.psd'):
                        raise ValueError(
                            'Expected a document identifier, not a file '
                            'identifier.  (Use corpus.documents() to get '
                            'a list of document identifiers.')
                    else:
                        raise ValueError('Document identifier %s not found'
                                         % document)
        return ['%s.%s' % (d, subcorpus) for d in documents]

    # Delegate to one of our two sub-readers: 
Example 6
Project: razzy-spinner   Author: rafasashi   File: conll.py    GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, root, fileids, columntypes,
                 chunk_types=None, root_label='S', pos_in_tree=False,
                 srl_includes_roleset=True, encoding='utf8',
                 tree_class=Tree, tagset=None):
        for columntype in columntypes:
            if columntype not in self.COLUMN_TYPES:
                raise ValueError('Bad column type %r' % columntype)
        if isinstance(chunk_types, compat.string_types):
            chunk_types = [chunk_types]
        self._chunk_types = chunk_types
        self._colmap = dict((c,i) for (i,c) in enumerate(columntypes))
        self._pos_in_tree = pos_in_tree
        self._root_label = root_label # for chunks
        self._srl_includes_roleset = srl_includes_roleset
        self._tree_class = tree_class
        CorpusReader.__init__(self, root, fileids, encoding)
        self._tagset = tagset

    #/////////////////////////////////////////////////////////////////
    # Data Access Methods
    #///////////////////////////////////////////////////////////////// 
Example 7
Project: razzy-spinner   Author: rafasashi   File: verbnet.py    GNU General Public License v3.0 6 votes vote down vote up
def fileids(self, vnclass_ids=None):
        """
        Return a list of fileids that make up this corpus.  If
        ``vnclass_ids`` is specified, then return the fileids that make
        up the specified verbnet class(es).
        """
        if vnclass_ids is None:
            return self._fileids
        elif isinstance(vnclass_ids, compat.string_types):
            return [self._class_to_fileid[self.longid(vnclass_ids)]]
        else:
            return [self._class_to_fileid[self.longid(vnclass_id)]
                    for vnclass_id in vnclass_ids]


    ######################################################################
    #{ Index Initialization
    ###################################################################### 
Example 8
Project: razzy-spinner   Author: rafasashi   File: verbnet.py    GNU General Public License v3.0 6 votes vote down vote up
def pprint(self, vnclass):
        """
        Return a string containing a pretty-printed representation of
        the given verbnet class.

        :param vnclass: A verbnet class identifier; or an ElementTree
        containing the xml contents of a verbnet class.
        """
        if isinstance(vnclass, compat.string_types):
            vnclass = self.vnclass(vnclass)

        s = vnclass.get('ID') + '\n'
        s += self.pprint_subclasses(vnclass, indent='  ') + '\n'
        s += self.pprint_members(vnclass, indent='  ') + '\n'
        s += '  Thematic roles:\n'
        s += self.pprint_themroles(vnclass, indent='    ') + '\n'
        s += '  Frames:\n'
        s += '\n'.join(self.pprint_frame(vnframe, indent='    ')
                       for vnframe in vnclass.findall('FRAMES/FRAME'))
        return s 
Example 9
Project: razzy-spinner   Author: rafasashi   File: verbnet.py    GNU General Public License v3.0 6 votes vote down vote up
def pprint_subclasses(self, vnclass, indent=''):
        """
        Return a string containing a pretty-printed representation of
        the given verbnet class's subclasses.

        :param vnclass: A verbnet class identifier; or an ElementTree
            containing the xml contents of a verbnet class.
        """
        if isinstance(vnclass, compat.string_types):
            vnclass = self.vnclass(vnclass)

        subclasses = [subclass.get('ID') for subclass in
                      vnclass.findall('SUBCLASSES/VNSUBCLASS')]
        if not subclasses: subclasses = ['(none)']
        s = 'Subclasses: ' + ' '.join(subclasses)
        return textwrap.fill(s, 70, initial_indent=indent,
                             subsequent_indent=indent+'  ') 
Example 10
Project: razzy-spinner   Author: rafasashi   File: verbnet.py    GNU General Public License v3.0 6 votes vote down vote up
def pprint_members(self, vnclass, indent=''):
        """
        Return a string containing a pretty-printed representation of
        the given verbnet class's member verbs.

        :param vnclass: A verbnet class identifier; or an ElementTree
            containing the xml contents of a verbnet class.
        """
        if isinstance(vnclass, compat.string_types):
            vnclass = self.vnclass(vnclass)

        members = [member.get('name') for member in
                   vnclass.findall('MEMBERS/MEMBER')]
        if not members: members = ['(none)']
        s = 'Members: ' + ' '.join(members)
        return textwrap.fill(s, 70, initial_indent=indent,
                             subsequent_indent=indent+'  ') 
Example 11
Project: razzy-spinner   Author: rafasashi   File: verbnet.py    GNU General Public License v3.0 6 votes vote down vote up
def pprint_themroles(self, vnclass, indent=''):
        """
        Return a string containing a pretty-printed representation of
        the given verbnet class's thematic roles.

        :param vnclass: A verbnet class identifier; or an ElementTree
            containing the xml contents of a verbnet class.
        """
        if isinstance(vnclass, compat.string_types):
            vnclass = self.vnclass(vnclass)

        pieces = []
        for themrole in vnclass.findall('THEMROLES/THEMROLE'):
            piece = indent + '* ' + themrole.get('type')
            modifiers = ['%(Value)s%(type)s' % restr.attrib
                         for restr in themrole.findall('SELRESTRS/SELRESTR')]
            if modifiers:
                piece += '[%s]' % ' '.join(modifiers)
            pieces.append(piece)

        return '\n'.join(pieces) 
Example 12
Project: razzy-spinner   Author: rafasashi   File: timit.py    GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, root, encoding='utf8'):
        """
        Construct a new TIMIT corpus reader in the given directory.
        :param root: The root directory for this corpus.
        """
        # Ensure that wave files don't get treated as unicode data:
        if isinstance(encoding, compat.string_types):
            encoding = [('.*\.wav', None), ('.*', encoding)]

        CorpusReader.__init__(self, root,
                              find_corpus_fileids(root, self._FILE_RE),
                              encoding=encoding)

        self._utterances = [name[:-4] for name in
                            find_corpus_fileids(root, self._UTTERANCE_RE)]
        """A list of the utterance identifiers for all utterances in
        this corpus."""

        self._speakerinfo = None
        self._root = root
        self.speakers = sorted(set(u.split('/')[0] for u in self._utterances)) 
Example 13
Project: razzy-spinner   Author: rafasashi   File: api.py    GNU General Public License v3.0 6 votes vote down vote up
def fileids(self, categories=None):
        """
        Return a list of file identifiers for the files that make up
        this corpus, or that make up the given category(s) if specified.
        """
        if categories is None:
            return super(CategorizedCorpusReader, self).fileids()
        elif isinstance(categories, compat.string_types):
            if self._f2c is None:
                self._init()
            if categories in self._c2f:
                return sorted(self._c2f[categories])
            else:
                raise ValueError('Category %s not found' % categories)
        else:
            if self._f2c is None:
                self._init()
            return sorted(set.union(*[self._c2f[c] for c in categories]))

######################################################################
#{ Treebank readers
######################################################################

#[xx] is it worth it to factor this out? 
Example 14
Project: razzy-spinner   Author: rafasashi   File: nkjp.py    GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, root, fileids='.*'):
        """
        Corpus reader designed to work with National Corpus of Polish.
        See http://nkjp.pl/ for more details about NKJP.
        use example:
        import nltk
        import nkjp
        from nkjp import NKJPCorpusReader
        x = NKJPCorpusReader(root='/home/USER/nltk_data/corpora/nkjp/', fileids='') # obtain the whole corpus
        x.header()
        x.raw()
        x.words()
        x.tagged_words(tags=['subst', 'comp'])  #Link to find more tags: nkjp.pl/poliqarp/help/ense2.html
        x.sents()
        x = NKJPCorpusReader(root='/home/USER/nltk_data/corpora/nkjp/', fileids='Wilk*') # obtain particular file(s)
        x.header(fileids=['WilkDom', '/home/USER/nltk_data/corpora/nkjp/WilkWilczy'])
        x.tagged_words(fileids=['WilkDom', '/home/USER/nltk_data/corpora/nkjp/WilkWilczy'], tags=['subst', 'comp'])
        """
        if isinstance(fileids, compat.string_types):
            XMLCorpusReader.__init__(self, root, fileids + '.*/header.xml')
        else:
            XMLCorpusReader.__init__(self, root, [fileid + '/header.xml' for fileid in fileids])
        self._paths = self.get_paths() 
Example 15
Project: razzy-spinner   Author: rafasashi   File: pl196x.py    GNU General Public License v3.0 6 votes vote down vote up
def _resolve(self, fileids, categories, textids=None):
		tmp = None
		if fileids is not None:
			if not tmp:
				tmp = fileids, None
			else:
				raise ValueError('Specify only fileids, categories or textids')
		if categories is not None:
			if not tmp:
				tmp = self.fileids(categories), None
			else:
				raise ValueError('Specify only fileids, categories or textids')
		if textids is not None:
			if not tmp:
				if isinstance(textids, compat.string_types): textids = [textids]
				files = sum((self._t2f[t] for t in textids), [])
				tdict = dict()
				for f in files:
					tdict[f] = (set(self._f2t[f]) & set(textids))
				tmp = files, tdict
			else:
				raise ValueError('Specify only fileids, categories or textids')
		return None, None 
Example 16
Project: razzy-spinner   Author: rafasashi   File: pl196x.py    GNU General Public License v3.0 6 votes vote down vote up
def words(self, fileids=None, categories=None, textids=None):
		fileids, textids = self._resolve(fileids, categories, textids)
		if fileids is None: fileids = self._fileids
		elif isinstance(fileids, compat.string_types): fileids = [fileids]

		if textids:
			return concat([TEICorpusView(self.abspath(fileid),
										False, False, False,
										headLen=self.headLen,
										textids=textids[fileid])
								for fileid in fileids])
		else:
			return concat([TEICorpusView(self.abspath(fileid),
										False, False, False,
										headLen=self.headLen)
								for fileid in fileids]) 
Example 17
Project: razzy-spinner   Author: rafasashi   File: pl196x.py    GNU General Public License v3.0 6 votes vote down vote up
def sents(self, fileids=None, categories=None, textids=None):
		fileids, textids = self._resolve(fileids, categories, textids)
		if fileids is None: fileids = self._fileids
		elif isinstance(fileids, compat.string_types): fileids = [fileids]

		if textids:
			return concat([TEICorpusView(self.abspath(fileid),
										False, True, False,
										headLen=self.headLen,
										textids=textids[fileid])
								for fileid in fileids])
		else:
			return concat([TEICorpusView(self.abspath(fileid),
										False, True, False,
										headLen=self.headLen)
								for fileid in fileids]) 
Example 18
Project: razzy-spinner   Author: rafasashi   File: pl196x.py    GNU General Public License v3.0 6 votes vote down vote up
def paras(self, fileids=None, categories=None, textids=None):
		fileids, textids = self._resolve(fileids, categories, textids)
		if fileids is None: fileids = self._fileids
		elif isinstance(fileids, compat.string_types): fileids = [fileids]

		if textids:
			return concat([TEICorpusView(self.abspath(fileid),
										False, True, True,
										headLen=self.headLen,
										textids=textids[fileid])
								for fileid in fileids])
		else:
			return concat([TEICorpusView(self.abspath(fileid),
										False, True, True,
										headLen=self.headLen)
								for fileid in fileids]) 
Example 19
Project: razzy-spinner   Author: rafasashi   File: pl196x.py    GNU General Public License v3.0 6 votes vote down vote up
def tagged_words(self, fileids=None, categories=None, textids=None):
		fileids, textids = self._resolve(fileids, categories, textids)
		if fileids is None: fileids = self._fileids
		elif isinstance(fileids, compat.string_types): fileids = [fileids]

		if textids:
			return concat([TEICorpusView(self.abspath(fileid),
										True, False, False,
										headLen=self.headLen,
										textids=textids[fileid])
								for fileid in fileids])
		else:
			return concat([TEICorpusView(self.abspath(fileid),
										True, False, False,
										headLen=self.headLen)
								for fileid in fileids]) 
Example 20
Project: razzy-spinner   Author: rafasashi   File: pl196x.py    GNU General Public License v3.0 6 votes vote down vote up
def tagged_sents(self, fileids=None, categories=None, textids=None):
		fileids, textids = self._resolve(fileids, categories, textids)
		if fileids is None: fileids = self._fileids
		elif isinstance(fileids, compat.string_types): fileids = [fileids]

		if textids:
			return concat([TEICorpusView(self.abspath(fileid),
										True, True, False,
										headLen=self.headLen,
										textids=textids[fileid])
								for fileid in fileids])
		else:
			return concat([TEICorpusView(self.abspath(fileid),
										True, True, False,
										headLen=self.headLen)
								for fileid in fileids]) 
Example 21
Project: razzy-spinner   Author: rafasashi   File: framenet.py    GNU General Public License v3.0 6 votes vote down vote up
def _pretty_any(obj):

    """
    Helper function for pretty-printing any AttrDict object.

    :param obj: The obj to be printed.
    :type obj: AttrDict
    :return: A nicely formated string representation of the AttrDict object.
    :rtype: str
    """

    outstr = ""
    for k in obj:
        if isinstance(obj[k], string_types) and len(obj[k]) > 65:
            outstr += "[{0}]\n".format(k)
            outstr += "{0}".format(_pretty_longstring(obj[k], prefix='  '))
            outstr += '\n'
        else:
            outstr += "[{0}] {1}\n".format(k, obj[k])

    return outstr 
Example 22
Project: razzy-spinner   Author: rafasashi   File: megam.py    GNU General Public License v3.0 6 votes vote down vote up
def call_megam(args):
    """
    Call the ``megam`` binary with the given arguments.
    """
    if isinstance(args, compat.string_types):
        raise TypeError('args should be a list of strings')
    if _megam_bin is None:
        config_megam()

    # Call megam via a subprocess
    cmd = [_megam_bin] + args
    p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
    (stdout, stderr) = p.communicate()

    # Check the return code.
    if p.returncode != 0:
        print()
        print(stderr)
        raise OSError('megam command failed!')

    if isinstance(stdout, compat.string_types):
        return stdout
    else:
        return stdout.decode('utf-8') 
Example 23
Project: razzy-spinner   Author: rafasashi   File: tadm.py    GNU General Public License v3.0 6 votes vote down vote up
def call_tadm(args):
    """
    Call the ``tadm`` binary with the given arguments.
    """
    if isinstance(args, compat.string_types):
        raise TypeError('args should be a list of strings')
    if _tadm_bin is None:
        config_tadm()

    # Call tadm via a subprocess
    cmd = [_tadm_bin] + args
    p = subprocess.Popen(cmd, stdout=sys.stdout)
    (stdout, stderr) = p.communicate()

    # Check the return code.
    if p.returncode != 0:
        print()
        print(stderr)
        raise OSError('tadm command failed!') 
Example 24
Project: razzy-spinner   Author: rafasashi   File: internals.py    GNU General Public License v3.0 6 votes vote down vote up
def config_java(bin=None, options=None, verbose=True):
    """
    Configure nltk's java interface, by letting nltk know where it can
    find the Java binary, and what extra options (if any) should be
    passed to Java when it is run.

    :param bin: The full path to the Java binary.  If not specified,
        then nltk will search the system for a Java binary; and if
        one is not found, it will raise a ``LookupError`` exception.
    :type bin: str
    :param options: A list of options that should be passed to the
        Java binary when it is called.  A common value is
        ``'-Xmx512m'``, which tells Java binary to increase
        the maximum heap size to 512 megabytes.  If no options are
        specified, then do not modify the options list.
    :type options: list(str)
    """
    global _java_bin, _java_options
    _java_bin = find_binary('java', bin, env_vars=['JAVAHOME', 'JAVA_HOME'], verbose=verbose, binary_names=['java.exe'])

    if options is not None:
        if isinstance(options, compat.string_types):
            options = options.split()
        _java_options = list(options) 
Example 25
Project: razzy-spinner   Author: rafasashi   File: chat80.py    GNU General Public License v3.0 6 votes vote down vote up
def concepts(items = items):
    """
    Build a list of concepts corresponding to the relation names in ``items``.

    :param items: names of the Chat-80 relations to extract
    :type items: list of strings
    :return: the ``Concept`` objects which are extracted from the relations
    :rtype: list
    """
    if isinstance(items, string_types): items = (items,)

    rels = [item_metadata[r] for r in items]

    concept_map = process_bundle(rels)
    return concept_map.values()




########################################################################### 
Example 26
Project: razzy-spinner   Author: rafasashi   File: drt.py    GNU General Public License v3.0 6 votes vote down vote up
def _draw_command(self, item, x, y):
        """
        Draw the given item at the given location

        :param item: the item to draw
        :param x: the top of the current drawing area
        :param y: the left side of the current drawing area
        :return: the bottom-rightmost point
        """
        if isinstance(item, string_types):
            self.canvas.create_text(x, y, anchor='nw', font=self.canvas.font, text=item)
        elif isinstance(item, tuple):
            # item is the lower-right of a box
            (right, bottom) = item
            self.canvas.create_rectangle(x, y, right, bottom)
            horiz_line_y = y + self._get_text_height() + (self.BUFFER * 2) #the line separating refs from conds
            self.canvas.create_line(x, horiz_line_y, right, horiz_line_y)

        return self._visit_command(item, x, y) 
Example 27
Project: razzy-spinner   Author: rafasashi   File: glue.py    GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, meaning, glue, indices=None):
        if not indices:
            indices = set()

        if isinstance(meaning, string_types):
            self.meaning = Expression.fromstring(meaning)
        elif isinstance(meaning, Expression):
            self.meaning = meaning
        else:
            raise RuntimeError('Meaning term neither string or expression: %s, %s' % (meaning, meaning.__class__))

        if isinstance(glue, string_types):
            self.glue = linearlogic.LinearLogicParser().parse(glue)
        elif isinstance(glue, linearlogic.Expression):
            self.glue = glue
        else:
            raise RuntimeError('Glue term neither string or expression: %s, %s' % (glue, glue.__class__))

        self.indices = indices 
Example 28
Project: razzy-spinner   Author: rafasashi   File: glue.py    GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, meaning, glue, indices=None):
        if not indices:
            indices = set()

        if isinstance(meaning, string_types):
            self.meaning = drt.DrtExpression.fromstring(meaning)
        elif isinstance(meaning, drt.DrtExpression):
            self.meaning = meaning
        else:
            raise RuntimeError('Meaning term neither string or expression: %s, %s' % (meaning, meaning.__class__))

        if isinstance(glue, string_types):
            self.glue = linearlogic.LinearLogicParser().parse(glue)
        elif isinstance(glue, linearlogic.Expression):
            self.glue = glue
        else:
            raise RuntimeError('Glue term neither string or expression: %s, %s' % (glue, glue.__class__))

        self.indices = indices 
Example 29
Project: razzy-spinner   Author: rafasashi   File: evaluate.py    GNU General Public License v3.0 6 votes vote down vote up
def set2rel(s):
    """
    Convert a set containing individuals (strings or numbers) into a set of
    unary tuples. Any tuples of strings already in the set are passed through
    unchanged.

    For example:
      - set(['a', 'b']) => set([('a',), ('b',)])
      - set([3, 27]) => set([('3',), ('27',)])

    :type s: set
    :rtype: set of tuple of str
    """
    new = set()
    for elem in s:
        if isinstance(elem, string_types):
            new.add((elem,))
        elif isinstance(elem, int):
            new.add((str(elem,)))
        else:
            new.add(elem)
    return new 
Example 30
Project: OpenBottle   Author: xiaozhuchacha   File: tree.py    MIT License 6 votes vote down vote up
def productions(self):
        """
        Generate the productions that correspond to the non-terminal nodes of the tree.
        For each subtree of the form (P: C1 C2 ... Cn) this produces a production of the
        form P -> C1 C2 ... Cn.

            >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
            >>> t.productions()
            [S -> NP VP, NP -> D N, D -> 'the', N -> 'dog', VP -> V NP, V -> 'chased',
            NP -> D N, D -> 'the', N -> 'cat']

        :rtype: list(Production)
        """

        if not isinstance(self._label, string_types):
            raise TypeError('Productions can only be generated from trees having node labels that are strings')

        prods = [Production(Nonterminal(self._label), _child_names(self))]
        for child in self:
            if isinstance(child, Tree):
                prods += child.productions()
        return prods 
Example 31
Project: OpenBottle   Author: xiaozhuchacha   File: tree.py    MIT License 6 votes vote down vote up
def _pformat_flat(self, nodesep, parens, quotes):
        childstrs = []
        for child in self:
            if isinstance(child, Tree):
                childstrs.append(child._pformat_flat(nodesep, parens, quotes))
            elif isinstance(child, tuple):
                childstrs.append("/".join(child))
            elif isinstance(child, string_types) and not quotes:
                childstrs.append('%s' % child)
            else:
                childstrs.append(unicode_repr(child))
        if isinstance(self._label, string_types):
            return '%s%s%s %s%s' % (parens[0], self._label, nodesep,
                                    " ".join(childstrs), parens[1])
        else:
            return '%s%s%s %s%s' % (parens[0], unicode_repr(self._label), nodesep,
                                    " ".join(childstrs), parens[1]) 
Example 32
Project: razzy-spinner   Author: rafasashi   File: tree.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, node, children=None):
        if children is None:
            raise TypeError("%s: Expected a node value and child list "
                                % type(self).__name__)
        elif isinstance(children, string_types):
            raise TypeError("%s() argument 2 should be a list, not a "
                            "string" % type(self).__name__)
        else:
            list.__init__(self, children)
            self._label = node

    #////////////////////////////////////////////////////////////
    # Comparison operators
    #//////////////////////////////////////////////////////////// 
Example 33
Project: razzy-spinner   Author: rafasashi   File: tree.py    GNU General Public License v3.0 5 votes vote down vote up
def pformat(self, margin=70, indent=0, nodesep='', parens='()', quotes=False):
        """
        :return: A pretty-printed string representation of this tree.
        :rtype: str
        :param margin: The right margin at which to do line-wrapping.
        :type margin: int
        :param indent: The indentation level at which printing
            begins.  This number is used to decide how far to indent
            subsequent lines.
        :type indent: int
        :param nodesep: A string that is used to separate the node
            from the children.  E.g., the default value ``':'`` gives
            trees like ``(S: (NP: I) (VP: (V: saw) (NP: it)))``.
        """

        # Try writing it on one line.
        s = self._pformat_flat(nodesep, parens, quotes)
        if len(s) + indent < margin:
            return s

        # If it doesn't fit on one line, then write it on multi-lines.
        if isinstance(self._label, string_types):
            s = '%s%s%s' % (parens[0], self._label, nodesep)
        else:
            s = '%s%s%s' % (parens[0], unicode_repr(self._label), nodesep)
        for child in self:
            if isinstance(child, Tree):
                s += '\n'+' '*(indent+2)+child.pformat(margin, indent+2,
                                                  nodesep, parens, quotes)
            elif isinstance(child, tuple):
                s += '\n'+' '*(indent+2)+ "/".join(child)
            elif isinstance(child, string_types) and not quotes:
                s += '\n'+' '*(indent+2)+ '%s' % child
            else:
                s += '\n'+' '*(indent+2)+ unicode_repr(child)
        return s+parens[1] 
Example 34
Project: razzy-spinner   Author: rafasashi   File: punkt.py    GNU General Public License v3.0 5 votes vote down vote up
def train(self, train_text, verbose=False):
        """
        Derives parameters from a given training text, or uses the parameters
        given. Repeated calls to this method destroy previous parameters. For
        incremental training, instantiate a separate PunktTrainer instance.
        """
        if not isinstance(train_text, string_types):
            return train_text
        return PunktTrainer(train_text, lang_vars=self._lang_vars,
                token_cls=self._Token).get_params()

    #////////////////////////////////////////////////////////////
    #{ Tokenization
    #//////////////////////////////////////////////////////////// 
Example 35
Project: razzy-spinner   Author: rafasashi   File: data.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, zipfile, entry=''):
        """
        Create a new path pointer pointing at the specified entry
        in the given zipfile.

        :raise IOError: If the given zipfile does not exist, or if it
        does not contain the specified entry.
        """
        if isinstance(zipfile, string_types):
            zipfile = OpenOnDemandZipFile(os.path.abspath(zipfile))

        # Normalize the entry string, it should be relative:
        entry = normalize_resource_name(entry, True, '/').lstrip('/')

        # Check that the entry exists:
        if entry:
            try:
                zipfile.getinfo(entry)
            except Exception:
                # Sometimes directories aren't explicitly listed in
                # the zip file.  So if `entry` is a directory name,
                # then check if the zipfile contains any files that
                # are under the given directory.
                if (entry.endswith('/') and
                        [n for n in zipfile.namelist() if n.startswith(entry)]):
                    pass  # zipfile contains a file in that directory.
                else:
                    # Otherwise, complain.
                    raise IOError('Zipfile %r does not contain %r' %
                                  (zipfile.filename, entry))
        self._zipfile = zipfile
        self._entry = entry 
Example 36
Project: razzy-spinner   Author: rafasashi   File: data.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, filename):
        if not isinstance(filename, string_types):
            raise TypeError('ReopenableZipFile filename must be a string')
        zipfile.ZipFile.__init__(self, filename)
        assert self.filename == filename
        self.close() 
Example 37
Project: razzy-spinner   Author: rafasashi   File: downloader.py    GNU General Public License v3.0 5 votes vote down vote up
def fromxml(xml):
        if isinstance(xml, compat.string_types):
            xml = ElementTree.parse(xml)
        for key in xml.attrib:
            xml.attrib[key] = compat.text_type(xml.attrib[key])
        return Package(**xml.attrib) 
Example 38
Project: razzy-spinner   Author: rafasashi   File: downloader.py    GNU General Public License v3.0 5 votes vote down vote up
def fromxml(xml):
        if isinstance(xml, compat.string_types):
            xml = ElementTree.parse(xml)
        for key in xml.attrib:
            xml.attrib[key] = compat.text_type(xml.attrib[key])
        children = [child.get('ref') for child in xml.findall('item')]
        return Collection(children=children, **xml.attrib) 
Example 39
Project: razzy-spinner   Author: rafasashi   File: downloader.py    GNU General Public License v3.0 5 votes vote down vote up
def _info_or_id(self, info_or_id):
        if isinstance(info_or_id, compat.string_types):
            return self.info(info_or_id)
        else:
            return info_or_id

    # [xx] When during downloading is it 'safe' to abort?  Only unsafe
    # time is *during* an unzip -- we don't want to leave a
    # partially-unzipped corpus in place because we wouldn't notice
    # it.  But if we had the exact total size of the unzipped corpus,
    # then that would be fine.  Then we could abort anytime we want!
    # So this is really what we should do.  That way the threaded
    # downloader in the gui can just kill the download thread anytime
    # it wants. 
Example 40
Project: razzy-spinner   Author: rafasashi   File: downloader.py    GNU General Public License v3.0 5 votes vote down vote up
def md5_hexdigest(file):
    """
    Calculate and return the MD5 checksum for a given file.
    ``file`` may either be a filename or an open stream.
    """
    if isinstance(file, compat.string_types):
        with open(file, 'rb') as infile:
            return _md5_hexdigest(infile)
    return _md5_hexdigest(file) 
Example 41
Project: razzy-spinner   Author: rafasashi   File: regexp.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, grammar, root_label='S', loop=1, trace=0):
        """
        Create a new chunk parser, from the given start state
        and set of chunk patterns.

        :param grammar: The grammar, or a list of RegexpChunkParser objects
        :type grammar: str or list(RegexpChunkParser)
        :param root_label: The top node of the tree being created
        :type root_label: str or Nonterminal
        :param loop: The number of times to run through the patterns
        :type loop: int
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            higher will generate verbose tracing output.
        """
        self._trace = trace
        self._stages = []
        self._grammar = grammar
        self._loop = loop

        if isinstance(grammar, string_types):
            self._read_grammar(grammar, root_label, trace)
        else:
            # Make sur the grammar looks like it has the right type:
            type_err = ('Expected string or list of RegexpChunkParsers '
                        'for the grammar.')
            try: grammar = list(grammar)
            except: raise TypeError(type_err)
            for elt in grammar:
                if not isinstance(elt, RegexpChunkParser):
                    raise TypeError(type_err)
            self._stages = grammar 
Example 42
Project: razzy-spinner   Author: rafasashi   File: knbc.py    GNU General Public License v3.0 5 votes vote down vote up
def test():

    from nltk.corpus.util import LazyCorpusLoader
    knbc = LazyCorpusLoader(
        'knbc/corpus1', KNBCorpusReader, r'.*/KN.*', encoding='euc-jp')
    assert isinstance(knbc.words()[0], string_types)
    assert isinstance(knbc.sents()[0][0], string_types)
    assert isinstance(knbc.tagged_words()[0], tuple)
    assert isinstance(knbc.tagged_sents()[0][0], tuple) 
Example 43
Project: razzy-spinner   Author: rafasashi   File: util.py    GNU General Public License v3.0 5 votes vote down vote up
def write(cls, sequence, output_file):
        if isinstance(output_file, string_types):
            output_file = open(output_file, 'wb')
        for item in sequence:
            pickle.dump(item, output_file, cls.PROTOCOL) 
Example 44
Project: razzy-spinner   Author: rafasashi   File: senseval.py    GNU General Public License v3.0 5 votes vote down vote up
def raw(self, fileids=None):
        """
        :return: the text contents of the given fileids, as a single string.
        """
        if fileids is None: fileids = self._fileids
        elif isinstance(fileids, compat.string_types): fileids = [fileids]
        return concat([self.open(f).read() for f in fileids]) 
Example 45
Project: razzy-spinner   Author: rafasashi   File: plaintext.py    GNU General Public License v3.0 5 votes vote down vote up
def raw(self, fileids=None):
        """
        :return: the given file(s) as a single string.
        :rtype: str
        """
        if fileids is None: fileids = self._fileids
        elif isinstance(fileids, string_types): fileids = [fileids]
        return concat([self.open(f).read() for f in fileids]) 
Example 46
Project: razzy-spinner   Author: rafasashi   File: nombank.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, root, nomfile, framefiles='',
                 nounsfile=None, parse_fileid_xform=None,
                 parse_corpus=None, encoding='utf8'):
        """
        :param root: The root directory for this corpus.
        :param nomfile: The name of the file containing the predicate-
            argument annotations (relative to ``root``).
        :param framefiles: A list or regexp specifying the frameset
            fileids for this corpus.
        :param parse_fileid_xform: A transform that should be applied
            to the fileids in this corpus.  This should be a function
            of one argument (a fileid) that returns a string (the new
            fileid).
        :param parse_corpus: The corpus containing the parse trees
            corresponding to this corpus.  These parse trees are
            necessary to resolve the tree pointers used by nombank.
        """
        # If framefiles is specified as a regexp, expand it.
        if isinstance(framefiles, string_types):
            framefiles = find_corpus_fileids(root, framefiles)
        framefiles = list(framefiles)
        # Initialze the corpus reader.
        CorpusReader.__init__(self, root, [nomfile, nounsfile] + framefiles,
                              encoding)

        # Record our frame fileids & nom file.
        self._nomfile = nomfile
        self._framefiles = framefiles
        self._nounsfile = nounsfile
        self._parse_fileid_xform = parse_fileid_xform
        self._parse_corpus = parse_corpus 
Example 47
Project: razzy-spinner   Author: rafasashi   File: nombank.py    GNU General Public License v3.0 5 votes vote down vote up
def raw(self, fileids=None):
        """
        :return: the text contents of the given fileids, as a single string.
        """
        if fileids is None: fileids = self._fileids
        elif isinstance(fileids, compat.string_types): fileids = [fileids]
        return concat([self.open(f).read() for f in fileids]) 
Example 48
Project: razzy-spinner   Author: rafasashi   File: ycoe.py    GNU General Public License v3.0 5 votes vote down vote up
def fileids(self, documents=None):
        """
        Return a list of file identifiers for the files that make up
        this corpus, or that store the given document(s) if specified.
        """
        if documents is None:
            return self._fileids
        elif isinstance(documents, compat.string_types):
            documents = [documents]
        return sorted(set(['%s.pos' % doc for doc in documents] +
                          ['%s.psd' % doc for doc in documents])) 
Example 49
Project: razzy-spinner   Author: rafasashi   File: conll.py    GNU General Public License v3.0 5 votes vote down vote up
def raw(self, fileids=None):
        if fileids is None: fileids = self._fileids
        elif isinstance(fileids, compat.string_types): fileids = [fileids]
        return concat([self.open(f).read() for f in fileids]) 
Example 50
Project: razzy-spinner   Author: rafasashi   File: conll.py    GNU General Public License v3.0 5 votes vote down vote up
def _get_parsed_sent(self, grid, pos_in_tree, tagset=None):
        words = self._get_column(grid, self._colmap['words'])
        pos_tags = self._get_column(grid, self._colmap['pos'])
        if tagset and tagset != self._tagset:
            pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
        parse_tags = self._get_column(grid, self._colmap['tree'])

        treestr = ''
        for (word, pos_tag, parse_tag) in zip(words, pos_tags, parse_tags):
            if word == '(': word = '-LRB-'
            if word == ')': word = '-RRB-'
            if pos_tag == '(': pos_tag = '-LRB-'
            if pos_tag == ')': pos_tag = '-RRB-'
            (left, right) = parse_tag.split('*')
            right = right.count(')')*')' # only keep ')'.
            treestr += '%s (%s %s) %s' % (left, pos_tag, word, right)
        try:
            tree = self._tree_class.parse(treestr)
        except (ValueError, IndexError):
            tree = self._tree_class.parse('(%s %s)' %
                                          (self._root_label, treestr))

        if not pos_in_tree:
            for subtree in tree.subtrees():
                for i, child in enumerate(subtree):
                    if (isinstance(child, Tree) and len(child)==1 and
                        isinstance(child[0], compat.string_types)):
                        subtree[i] = (child[0], child.label())

        return tree 
Example 51
Project: razzy-spinner   Author: rafasashi   File: chasen.py    GNU General Public License v3.0 5 votes vote down vote up
def raw(self, fileids=None):
        if fileids is None: fileids = self._fileids
        elif isinstance(fileids, compat.string_types): fileids = [fileids]
        return concat([self.open(f).read() for f in fileids]) 
Example 52
Project: razzy-spinner   Author: rafasashi   File: chasen.py    GNU General Public License v3.0 5 votes vote down vote up
def test():

    from nltk.corpus.util import LazyCorpusLoader

    jeita = LazyCorpusLoader(
        'jeita', ChasenCorpusReader, r'.*chasen', encoding='utf-8')

    assert isinstance(jeita.tagged_words()[0][1], compat.string_types) 
Example 53
Project: razzy-spinner   Author: rafasashi   File: mte.py    GNU General Public License v3.0 5 votes vote down vote up
def __fileids(self, fileids):
        if fileids is None: fileids = self._fileids
        elif isinstance(fileids, compat.string_types): fileids = [fileids]
        # filter wrong userinput
        fileids = filter(lambda x : x in self._fileids, fileids)
        # filter multext-east sourcefiles that are not compatible to the teip5 specification
        fileids = filter(lambda x : x not in ["oana-bg.xml", "oana-mk.xml"], fileids)
        if not fileids:
            print("No valid multext-east file specified")
        return fileids 
Example 54
Project: razzy-spinner   Author: rafasashi   File: timit.py    GNU General Public License v3.0 5 votes vote down vote up
def utteranceids(self, dialect=None, sex=None, spkrid=None,
                   sent_type=None, sentid=None):
        """
        :return: A list of the utterance identifiers for all
        utterances in this corpus, or for the given speaker, dialect
        region, gender, sentence type, or sentence number, if
        specified.
        """
        if isinstance(dialect, compat.string_types): dialect = [dialect]
        if isinstance(sex, compat.string_types): sex = [sex]
        if isinstance(spkrid, compat.string_types): spkrid = [spkrid]
        if isinstance(sent_type, compat.string_types): sent_type = [sent_type]
        if isinstance(sentid, compat.string_types): sentid = [sentid]

        utterances = self._utterances[:]
        if dialect is not None:
            utterances = [u for u in utterances if u[2] in dialect]
        if sex is not None:
            utterances = [u for u in utterances if u[4] in sex]
        if spkrid is not None:
            utterances = [u for u in utterances if u[:9] in spkrid]
        if sent_type is not None:
            utterances = [u for u in utterances if u[11] in sent_type]
        if sentid is not None:
            utterances = [u for u in utterances if u[10:] in spkrid]
        return utterances 
Example 55
Project: razzy-spinner   Author: rafasashi   File: timit.py    GNU General Public License v3.0 5 votes vote down vote up
def _utterance_fileids(self, utterances, extension):
        if utterances is None: utterances = self._utterances
        if isinstance(utterances, compat.string_types): utterances = [utterances]
        return ['%s%s' % (u, extension) for u in utterances] 
Example 56
Project: razzy-spinner   Author: rafasashi   File: propbank.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, root, propfile, framefiles='',
                 verbsfile=None, parse_fileid_xform=None,
                 parse_corpus=None, encoding='utf8'):
        """
        :param root: The root directory for this corpus.
        :param propfile: The name of the file containing the predicate-
            argument annotations (relative to ``root``).
        :param framefiles: A list or regexp specifying the frameset
            fileids for this corpus.
        :param parse_fileid_xform: A transform that should be applied
            to the fileids in this corpus.  This should be a function
            of one argument (a fileid) that returns a string (the new
            fileid).
        :param parse_corpus: The corpus containing the parse trees
            corresponding to this corpus.  These parse trees are
            necessary to resolve the tree pointers used by propbank.
        """
        # If framefiles is specified as a regexp, expand it.
        if isinstance(framefiles, compat.string_types):
            framefiles = find_corpus_fileids(root, framefiles)
        framefiles = list(framefiles)
        # Initialze the corpus reader.
        CorpusReader.__init__(self, root, [propfile, verbsfile] + framefiles,
                              encoding)

        # Record our frame fileids & prop file.
        self._propfile = propfile
        self._framefiles = framefiles
        self._verbsfile = verbsfile
        self._parse_fileid_xform = parse_fileid_xform
        self._parse_corpus = parse_corpus 
Example 57
Project: razzy-spinner   Author: rafasashi   File: propbank.py    GNU General Public License v3.0 5 votes vote down vote up
def raw(self, fileids=None):
        """
        :return: the text contents of the given fileids, as a single string.
        """
        if fileids is None: fileids = self._fileids
        elif isinstance(fileids, compat.string_types): fileids = [fileids]
        return concat([self.open(f).read() for f in fileids]) 
Example 58
Project: razzy-spinner   Author: rafasashi   File: propbank.py    GNU General Public License v3.0 5 votes vote down vote up
def parse(s):
        if not isinstance(s, compat.string_types):
            raise TypeError('expected a string')
        if (len(s) != 5 or
            not PropbankInflection._VALIDATE.match(s)):
            raise ValueError('Bad propbank inflection string %r' % s)
        return PropbankInflection(*s) 
Example 59
Project: razzy-spinner   Author: rafasashi   File: toolbox.py    GNU General Public License v3.0 5 votes vote down vote up
def raw(self, fileids):
        if fileids is None: fileids = self._fileids
        elif isinstance(fileids, compat.string_types): fileids = [fileids]
        return concat([self.open(f).read() for f in fileids]) 
Example 60
Project: razzy-spinner   Author: rafasashi   File: xmldocs.py    GNU General Public License v3.0 5 votes vote down vote up
def raw(self, fileids=None):
        if fileids is None: fileids = self._fileids
        elif isinstance(fileids, compat.string_types): fileids = [fileids]
        return concat([self.open(f).read() for f in fileids]) 
Example 61
Project: razzy-spinner   Author: rafasashi   File: chunked.py    GNU General Public License v3.0 5 votes vote down vote up
def raw(self, fileids=None):
        """
        :return: the given file(s) as a single string.
        :rtype: str
        """
        if fileids is None: fileids = self._fileids
        elif isinstance(fileids, compat.string_types): fileids = [fileids]
        return concat([self.open(f).read() for f in fileids]) 
Example 62
Project: razzy-spinner   Author: rafasashi   File: indian.py    GNU General Public License v3.0 5 votes vote down vote up
def raw(self, fileids=None):
        if fileids is None: fileids = self._fileids
        elif isinstance(fileids, compat.string_types): fileids = [fileids]
        return concat([self.open(f).read() for f in fileids]) 
Example 63
Project: razzy-spinner   Author: rafasashi   File: rte.py    GNU General Public License v3.0 5 votes vote down vote up
def pairs(self, fileids):
        """
        Build a list of RTEPairs from a RTE corpus.

        :param fileids: a list of RTE corpus fileids
        :type: list
        :rtype: list(RTEPair)
        """
        if isinstance(fileids, compat.string_types): fileids = [fileids]
        return concat([self._read_etree(self.xml(fileid)) for fileid in fileids]) 
Example 64
Project: razzy-spinner   Author: rafasashi   File: api.py    GNU General Public License v3.0 5 votes vote down vote up
def abspaths(self, fileids=None, include_encoding=False,
                 include_fileid=False):
        """
        Return a list of the absolute paths for all fileids in this corpus;
        or for the given list of fileids, if specified.

        :type fileids: None or str or list
        :param fileids: Specifies the set of fileids for which paths should
            be returned.  Can be None, for all fileids; a list of
            file identifiers, for a specified set of fileids; or a single
            file identifier, for a single file.  Note that the return
            value is always a list of paths, even if ``fileids`` is a
            single file identifier.

        :param include_encoding: If true, then return a list of
            ``(path_pointer, encoding)`` tuples.

        :rtype: list(PathPointer)
        """
        if fileids is None:
            fileids = self._fileids
        elif isinstance(fileids, compat.string_types):
            fileids = [fileids]

        paths = [self._root.join(f) for f in fileids]

        if include_encoding and include_fileid:
            return list(zip(paths, [self.encoding(f) for f in fileids], fileids))
        elif include_fileid:
            return list(zip(paths, fileids))
        elif include_encoding:
            return list(zip(paths, [self.encoding(f) for f in fileids]))
        else:
            return paths 
Example 65
Project: razzy-spinner   Author: rafasashi   File: api.py    GNU General Public License v3.0 5 votes vote down vote up
def raw(self, fileids=None):
        if fileids is None: fileids = self._fileids
        elif isinstance(fileids, compat.string_types): fileids = [fileids]
        return concat([self.open(f).read() for f in fileids]) 
Example 66
Project: razzy-spinner   Author: rafasashi   File: tagged.py    GNU General Public License v3.0 5 votes vote down vote up
def raw(self, fileids=None):
        """
        :return: the given file(s) as a single string.
        :rtype: str
        """
        if fileids is None: fileids = self._fileids
        elif isinstance(fileids, compat.string_types): fileids = [fileids]
        return concat([self.open(f).read() for f in fileids]) 
Example 67
Project: razzy-spinner   Author: rafasashi   File: ieer.py    GNU General Public License v3.0 5 votes vote down vote up
def raw(self, fileids=None):
        if fileids is None: fileids = self._fileids
        elif isinstance(fileids, compat.string_types): fileids = [fileids]
        return concat([self.open(f).read() for f in fileids]) 
Example 68
Project: razzy-spinner   Author: rafasashi   File: twitter.py    GNU General Public License v3.0 5 votes vote down vote up
def raw(self, fileids=None):
        """
        Return the corpora in their raw form.
        """
        if fileids is None:
            fileids = self._fileids
        elif isinstance(fileids, compat.string_types):
            fileids = [fileids]
        return concat([self.open(f).read() for f in fileids]) 
Example 69
Project: razzy-spinner   Author: rafasashi   File: cmudict.py    GNU General Public License v3.0 5 votes vote down vote up
def raw(self):
        """
        :return: the cmudict lexicon as a raw string.
        """
        fileids = self._fileids
        if isinstance(fileids, compat.string_types):
            fileids = [fileids]
        return concat([self.open(f).read() for f in fileids]) 
Example 70
Project: razzy-spinner   Author: rafasashi   File: string_category.py    GNU General Public License v3.0 5 votes vote down vote up
def tuples(self, fileids=None):
        if fileids is None: fileids = self._fileids
        elif isinstance(fileids, compat.string_types): fileids = [fileids]
        return concat([StreamBackedCorpusView(fileid, self._read_tuple_block,
                                              encoding=enc)
                       for (fileid, enc) in self.abspaths(fileids, True)]) 
Example 71
Project: razzy-spinner   Author: rafasashi   File: string_category.py    GNU General Public License v3.0 5 votes vote down vote up
def raw(self, fileids=None):
        """
        :return: the text contents of the given fileids, as a single string.
        """
        if fileids is None: fileids = self._fileids
        elif isinstance(fileids, compat.string_types): fileids = [fileids]
        return concat([self.open(f).read() for f in fileids]) 
Example 72
Project: razzy-spinner   Author: rafasashi   File: pl196x.py    GNU General Public License v3.0 5 votes vote down vote up
def raw(self, fileids=None, categories=None):
		fileids, _ = self._resolve(fileids, categories)
		if fileids is None: fileids = self._fileids
		elif isinstance(fileids, compat.string_types): fileids = [fileids]
		return concat([self.open(f).read() for f in fileids]) 
Example 73
Project: razzy-spinner   Author: rafasashi   File: aligned.py    GNU General Public License v3.0 5 votes vote down vote up
def raw(self, fileids=None):
        """
        :return: the given file(s) as a single string.
        :rtype: str
        """
        if fileids is None: fileids = self._fileids
        elif isinstance(fileids, compat.string_types): fileids = [fileids]
        return concat([self.open(f).read() for f in fileids]) 
Example 74
Project: razzy-spinner   Author: rafasashi   File: internals.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, etree):
        r"""
        Initialize a new Element wrapper for ``etree``.

        If ``etree`` is a string, then it will be converted to an
        Element object using ``ElementTree.fromstring()`` first:

            >>> ElementWrapper("<test></test>")
            <Element "<?xml version='1.0' encoding='utf8'?>\n<test />">

        """
        if isinstance(etree, compat.string_types):
            etree = ElementTree.fromstring(etree)
        self.__dict__['_etree'] = etree 
Example 75
Project: razzy-spinner   Author: rafasashi   File: linearlogic.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, name, dependencies=None):
        """
        :param name: str for the constant name
        :param dependencies: list of int for the indices on which this atom is dependent
        """
        assert isinstance(name, string_types)
        self.name = name

        if not dependencies:
            dependencies = []
        self.dependencies = dependencies 
Example 76
Project: razzy-spinner   Author: rafasashi   File: evaluate.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, xs):
        """
        :param xs: a list of (symbol, value) pairs.
        """
        super(Valuation, self).__init__()
        for (sym, val) in xs:
            if isinstance(val, string_types) or isinstance(val, bool):
                self[sym] = val
            elif isinstance(val, set):
                self[sym] = set2rel(val)
            else:
                msg = textwrap.fill("Error in initializing Valuation. "
                                    "Unrecognized value for symbol '%s':\n%s" % (sym, val), width=66)

                raise ValueError(msg) 
Example 77
Project: razzy-spinner   Author: rafasashi   File: evaluate.py    GNU General Public License v3.0 5 votes vote down vote up
def domain(self):
        """Set-theoretic domain of the value-space of a Valuation."""
        dom = []
        for val in self.values():
            if isinstance(val, string_types):
                dom.append(val)
            elif not isinstance(val, bool):
                dom.extend([elem for tuple_ in val for elem in tuple_ if elem is not None])
        return set(dom) 
Example 78
Project: OpenBottle   Author: xiaozhuchacha   File: tree.py    MIT License 5 votes vote down vote up
def __init__(self, node, children=None):
        if children is None:
            raise TypeError("%s: Expected a node value and child list "
                                % type(self).__name__)
        elif isinstance(children, string_types):
            raise TypeError("%s() argument 2 should be a list, not a "
                            "string" % type(self).__name__)
        else:
            list.__init__(self, children)
            self._label = node

    #////////////////////////////////////////////////////////////
    # Comparison operators
    #//////////////////////////////////////////////////////////// 
Example 79
Project: OpenBottle   Author: xiaozhuchacha   File: tree.py    MIT License 5 votes vote down vote up
def pformat(self, margin=70, indent=0, nodesep='', parens='()', quotes=False):
        """
        :return: A pretty-printed string representation of this tree.
        :rtype: str
        :param margin: The right margin at which to do line-wrapping.
        :type margin: int
        :param indent: The indentation level at which printing
            begins.  This number is used to decide how far to indent
            subsequent lines.
        :type indent: int
        :param nodesep: A string that is used to separate the node
            from the children.  E.g., the default value ``':'`` gives
            trees like ``(S: (NP: I) (VP: (V: saw) (NP: it)))``.
        """

        # Try writing it on one line.
        s = self._pformat_flat(nodesep, parens, quotes)
        if len(s) + indent < margin:
            return s

        # If it doesn't fit on one line, then write it on multi-lines.
        if isinstance(self._label, string_types):
            s = '%s%s%s' % (parens[0], self._label, nodesep)
        else:
            s = '%s%s%s' % (parens[0], unicode_repr(self._label), nodesep)
        for child in self:
            if isinstance(child, Tree):
                s += '\n'+' '*(indent+2)+child.pformat(margin, indent+2,
                                                  nodesep, parens, quotes)
            elif isinstance(child, tuple):
                s += '\n'+' '*(indent+2)+ "/".join(child)
            elif isinstance(child, string_types) and not quotes:
                s += '\n'+' '*(indent+2)+ '%s' % child
            else:
                s += '\n'+' '*(indent+2)+ unicode_repr(child)
        return s+parens[1] 
Example 80
Project: OpenBottle   Author: xiaozhuchacha   File: punkt.py    MIT License 5 votes vote down vote up
def train(self, train_text, verbose=False):
        """
        Derives parameters from a given training text, or uses the parameters
        given. Repeated calls to this method destroy previous parameters. For
        incremental training, instantiate a separate PunktTrainer instance.
        """
        if not isinstance(train_text, string_types):
            return train_text
        return PunktTrainer(train_text, lang_vars=self._lang_vars,
                token_cls=self._Token).get_params()

    #////////////////////////////////////////////////////////////
    #{ Tokenization
    #////////////////////////////////////////////////////////////