Python string.whitespace() Examples

The following are 30 code examples for showing how to use string.whitespace(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module string , or try the search function .

Example 1
Project: hyper-h2   Author: python-hyper   File: utilities.py    License: MIT License 6 votes vote down vote up
def _reject_surrounding_whitespace(headers, hdr_validation_flags):
    """
    Raises a ProtocolError if any header name or value is surrounded by
    whitespace characters.
    """
    # For compatibility with RFC 7230 header fields, we need to allow the field
    # value to be an empty string. This is ludicrous, but technically allowed.
    # The field name may not be empty, though, so we can safely assume that it
    # must have at least one character in it and throw exceptions if it
    # doesn't.
    for header in headers:
        if header[0][0] in _WHITESPACE or header[0][-1] in _WHITESPACE:
            raise ProtocolError(
                "Received header name surrounded by whitespace %r" % header[0])
        if header[1] and ((header[1][0] in _WHITESPACE) or
           (header[1][-1] in _WHITESPACE)):
            raise ProtocolError(
                "Received header value surrounded by whitespace %r" % header[1]
            )
        yield header 
Example 2
Project: ironpython2   Author: IronLanguages   File: IDLEenvironment.py    License: Apache License 2.0 6 votes vote down vote up
def _NextTok(str, pos):
	# Returns (token, endPos)
	end = len(str)
	if pos>=end: return None, 0
	while pos < end and str[pos] in string.whitespace:
		pos = pos + 1
	# Special case for +-
	if str[pos] in '+-':
		return str[pos],pos+1
	# Digits also a special case.
	endPos = pos
	while endPos < end and str[endPos] in string.digits+".":
		endPos = endPos + 1
	if pos!=endPos: return str[pos:endPos], endPos
	endPos = pos
	while endPos < end and str[endPos] not in string.whitespace + string.digits + "+-":
		endPos = endPos + 1
	if pos!=endPos: return str[pos:endPos], endPos
	return None, 0 
Example 3
Project: kobo-predict   Author: awemulya   File: pyparsing.py    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
def originalTextFor(expr, asString=True):
    """Helper to return the original, untokenized text for a given expression.  Useful to
       restore the parsed fields of an HTML start tag into the raw tag text itself, or to
       revert separate tokens with intervening whitespace back to the original matching
       input text. By default, returns astring containing the original parsed text.  
       
       If the optional C{asString} argument is passed as C{False}, then the return value is a 
       C{L{ParseResults}} containing any results names that were originally matched, and a 
       single token containing the original matched text from the input string.  So if 
       the expression passed to C{L{originalTextFor}} contains expressions with defined
       results names, you must set C{asString} to C{False} if you want to preserve those
       results name values."""
    locMarker = Empty().setParseAction(lambda s,loc,t: loc)
    endlocMarker = locMarker.copy()
    endlocMarker.callPreparse = False
    matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
    if asString:
        extractText = lambda s,l,t: s[t._original_start:t._original_end]
    else:
        def extractText(s,l,t):
            t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
    matchExpr.setParseAction(extractText)
    return matchExpr 
Example 4
Project: hyper-h2   Author: python-hyper   File: utilities.py    License: MIT License 5 votes vote down vote up
def _strip_surrounding_whitespace(headers, hdr_validation_flags):
    """
    Given an iterable of header two-tuples, strip both leading and trailing
    whitespace from both header names and header values. This generator
    produces tuples that preserve the original type of the header tuple for
    tuple and any ``HeaderTuple``.
    """
    for header in headers:
        if isinstance(header, HeaderTuple):
            yield header.__class__(header[0].strip(), header[1].strip())
        else:
            yield (header[0].strip(), header[1].strip()) 
Example 5
Project: timefhuman   Author: alvinwan   File: tokenize.py    License: Apache License 2.0 5 votes vote down vote up
def generic_tokenize(characters):
    """Default tokenizer

    >>> list(generic_tokenize('7/17/18 3:00 p.m.'))
    ['7/17/18', '3:00', 'p.m.']
    >>> list(generic_tokenize('July 17, 2018 at 3p.m.'))
    ['July', '17', ',', '2018', 'at', '3', 'p.m.']
    >>> list(generic_tokenize('July 17, 2018 3 p.m.'))
    ['July', '17', ',', '2018', '3', 'p.m.']
    >>> list(generic_tokenize('3PM on July 17'))
    ['3', 'PM', 'on', 'July', '17']
    >>> list(generic_tokenize('tomorrow noon,Wed 3 p.m.,Fri 11 AM'))
    ['tomorrow', 'noon', ',', 'Wed', '3', 'p.m.', ',', 'Fri', '11', 'AM']
    """
    token = ''
    punctuation = ''
    last_type = None
    for character in characters:
        type = get_character_type(character)
        is_different_type = None not in (type, last_type) and type != last_type \
            and 'punctuation' not in (type, last_type)
        is_skip_character = character in string.whitespace
        is_break_character = character in ','

        if is_skip_character or is_different_type or is_break_character:
            if token:
                yield token
                token = ''
            token = character if not is_skip_character else ''
            if is_break_character:
                yield token
                token = ''
            last_type = type
            continue
        token += character
        last_type = type
    yield token 
Example 6
Project: VASPy   Author: PytLab   File: functions.py    License: MIT License 5 votes vote down vote up
def str2list(rawstr):
    rawlist = rawstr.strip(string.whitespace).split(' ')
    # Remove space elements in list.
    cleanlist = [x for x in rawlist if x != ' ' and x != '']
    return cleanlist 
Example 7
Project: VASPy   Author: PytLab   File: iter.py    License: MIT License 5 votes vote down vote up
def ifreq(self):
        """
        返回频率信息字典的迭代器。
        Return frequency iterator to generating frequency related data.
        """
        with open(self.filename, "r") as f:
            collecting = False

            for line in f:
                freq = self.freq_regex.match(line)
                title = self.title_regex.match(line)
                empty_line = (line.strip(whitespace) == "")

                if freq:
                    freq_data = list(freq.groups())

                # Collect start.
                if title and not collecting:
                    collecting = True
                    coords, deltas = [], []
                # Collect stop.
                elif empty_line and collecting:
                    collecting = False
                    freq_data.append(coords)
                    freq_data.append(deltas)
                    freq_dict = dict(zip(self.freq_info, freq_data))
                    yield freq_dict
                # Collect data.
                elif collecting:
                    x, y, z, dx, dy, dz = line2list(line)
                    coord = (x, y, z)
                    delta = (dx, dy, dz)
                    coords.append(coord)
                    deltas.append(delta) 
Example 8
Project: VASPy   Author: PytLab   File: electro.py    License: MIT License 5 votes vote down vote up
def load(self):
        "Rewrite load method"
        PosCar.load(self)
        with open(self.filename, 'r') as f:
            for i in range(self.totline):
                f.readline()
            #get dimension of 3d array
            grid = f.readline().strip(whitespace)
            empty = not grid  # empty row
            while empty:
                grid = f.readline().strip(whitespace)
                empty = not grid
            x, y, z = line2list(grid, dtype=int)
            #read electron localization function data
            elf_data = []
            for line in f:
                datalist = line2list(line)
                elf_data.extend(datalist)
        #########################################
        #                                       #
        #           !!! Notice !!!              #
        # NGX is the length of the **0th** axis #
        # NGY is the length of the **1st** axis #
        # NGZ is the length of the **2nd** axis #
        #                                       #
        #########################################
        #reshape to 3d array
        elf_data = np.array(elf_data).reshape((x, y, z), order='F')
        #set attrs
        self.grid = x, y, z
        self.elf_data = elf_data

        return 
Example 9
Project: vulscan   Author: vulscanteam   File: pyparsing.py    License: MIT License 5 votes vote down vote up
def setDefaultWhitespaceChars( chars ):
        """Overrides the default whitespace chars
        """
        ParserElement.DEFAULT_WHITE_CHARS = chars 
Example 10
Project: vulscan   Author: vulscanteam   File: pyparsing.py    License: MIT License 5 votes vote down vote up
def leaveWhitespace( self ):
        """Disables the skipping of whitespace before matching the characters in the
           C{ParserElement}'s defined pattern.  This is normally only used internally by
           the pyparsing module, but may be needed in some whitespace-sensitive grammars.
        """
        self.skipWhitespace = False
        return self 
Example 11
Project: vulscan   Author: vulscanteam   File: pyparsing.py    License: MIT License 5 votes vote down vote up
def setWhitespaceChars( self, chars ):
        """Overrides the default whitespace chars
        """
        self.skipWhitespace = True
        self.whiteChars = chars
        self.copyDefaultWhiteChars = False
        return self 
Example 12
Project: vulscan   Author: vulscanteam   File: pyparsing.py    License: MIT License 5 votes vote down vote up
def parseImpl( self, instring, loc, doActions=True ):
        if loc != 0:
            # see if entire string up to here is just whitespace and ignoreables
            if loc != self.preParse( instring, 0 ):
                raise ParseException(instring, loc, self.errmsg, self)
        return loc, [] 
Example 13
Project: vulscan   Author: vulscanteam   File: pyparsing.py    License: MIT License 5 votes vote down vote up
def __init__( self, expr, joinString="", adjacent=True ):
        super(Combine,self).__init__( expr )
        # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
        if adjacent:
            self.leaveWhitespace()
        self.adjacent = adjacent
        self.skipWhitespace = True
        self.joinString = joinString
        self.callPreparse = True 
Example 14
Project: vulscan   Author: vulscanteam   File: pyparsing.py    License: MIT License 5 votes vote down vote up
def originalTextFor(expr, asString=True):
    """Helper to return the original, untokenized text for a given expression.  Useful to
       restore the parsed fields of an HTML start tag into the raw tag text itself, or to
       revert separate tokens with intervening whitespace back to the original matching
       input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not
       require the inspect module to chase up the call stack.  By default, returns a 
       string containing the original parsed text.  
       
       If the optional C{asString} argument is passed as C{False}, then the return value is a 
       C{L{ParseResults}} containing any results names that were originally matched, and a 
       single token containing the original matched text from the input string.  So if 
       the expression passed to C{L{originalTextFor}} contains expressions with defined
       results names, you must set C{asString} to C{False} if you want to preserve those
       results name values."""
    locMarker = Empty().setParseAction(lambda s,loc,t: loc)
    endlocMarker = locMarker.copy()
    endlocMarker.callPreparse = False
    matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
    if asString:
        extractText = lambda s,l,t: s[t._original_start:t._original_end]
    else:
        def extractText(s,l,t):
            del t[:]
            t.insert(0, s[t._original_start:t._original_end])
            del t["_original_start"]
            del t["_original_end"]
    matchExpr.setParseAction(extractText)
    return matchExpr 
Example 15
Project: cgpm   Author: probcomp   File: vsinline.py    License: Apache License 2.0 5 votes vote down vote up
def _validate_expression_abstract(self, expression, inputs):
        # We are expecting an expression of the form (lambda (<args>) (exp))
        # so remove the whitespace, split by left parens, check the first
        # token is spaces, the second token is "lambda", the third token is the
        # the arguments. Note this will fail if (exp) is just exp i.e. 1.
        expression = expression.replace('\n', ' ')
        tokens = expression.split('(')
        # assert all(t in ['', ' '] for t in tokens[0])
        assert tokens[1].strip() == 'lambda'
        arguments = tokens[2]
        assert len([i for i in arguments if i ==')']) == 1
        arguments = arguments.replace(')', '')
        arguments = arguments.split()
        assert len(arguments) == len(inputs) 
Example 16
Project: cgpm   Author: probcomp   File: vsinline.py    License: Apache License 2.0 5 votes vote down vote up
def _validate_expression_concrete(self, expression, inputs):
        # We are expecting an expression of the form (a,b,c) ~> {}
        # so remove the whitespace, split by left parens, check the first
        # token is spaces, the second token is "lambda", the third token is the
        # the arguments.
        # Eliminate surrounding whitespace.
        expression = expression.encode('ascii','ignore')
        expression = expression.translate(None, string.whitespace)
        # Retrieve symbols before ~>.
        tokens = expression.split('~>')
        # Eliminate the parens.
        arguments = tokens[0].replace('(','').replace(')','')
        arguments = [a for a in arguments.split(',') if a != '']
        assert len(arguments) == len(inputs) 
Example 17
Project: jbox   Author: jpush   File: pyparsing.py    License: MIT License 5 votes vote down vote up
def setDefaultWhitespaceChars( chars ):
        """Overrides the default whitespace chars
        """
        ParserElement.DEFAULT_WHITE_CHARS = chars 
Example 18
Project: jbox   Author: jpush   File: pyparsing.py    License: MIT License 5 votes vote down vote up
def leaveWhitespace( self ):
        """Disables the skipping of whitespace before matching the characters in the
           C{ParserElement}'s defined pattern.  This is normally only used internally by
           the pyparsing module, but may be needed in some whitespace-sensitive grammars.
        """
        self.skipWhitespace = False
        return self 
Example 19
Project: jbox   Author: jpush   File: pyparsing.py    License: MIT License 5 votes vote down vote up
def setWhitespaceChars( self, chars ):
        """Overrides the default whitespace chars
        """
        self.skipWhitespace = True
        self.whiteChars = chars
        self.copyDefaultWhiteChars = False
        return self 
Example 20
Project: jbox   Author: jpush   File: pyparsing.py    License: MIT License 5 votes vote down vote up
def parseImpl( self, instring, loc, doActions=True ):
        if loc != 0:
            # see if entire string up to here is just whitespace and ignoreables
            if loc != self.preParse( instring, 0 ):
                raise ParseException(instring, loc, self.errmsg, self)
        return loc, [] 
Example 21
Project: jbox   Author: jpush   File: pyparsing.py    License: MIT License 5 votes vote down vote up
def __init__( self, expr, joinString="", adjacent=True ):
        super(Combine,self).__init__( expr )
        # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
        if adjacent:
            self.leaveWhitespace()
        self.adjacent = adjacent
        self.skipWhitespace = True
        self.joinString = joinString
        self.callPreparse = True 
Example 22
Project: jbox   Author: jpush   File: pyparsing.py    License: MIT License 5 votes vote down vote up
def originalTextFor(expr, asString=True):
    """Helper to return the original, untokenized text for a given expression.  Useful to
       restore the parsed fields of an HTML start tag into the raw tag text itself, or to
       revert separate tokens with intervening whitespace back to the original matching
       input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not
       require the inspect module to chase up the call stack.  By default, returns a 
       string containing the original parsed text.  
       
       If the optional C{asString} argument is passed as C{False}, then the return value is a 
       C{L{ParseResults}} containing any results names that were originally matched, and a 
       single token containing the original matched text from the input string.  So if 
       the expression passed to C{L{originalTextFor}} contains expressions with defined
       results names, you must set C{asString} to C{False} if you want to preserve those
       results name values."""
    locMarker = Empty().setParseAction(lambda s,loc,t: loc)
    endlocMarker = locMarker.copy()
    endlocMarker.callPreparse = False
    matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
    if asString:
        extractText = lambda s,l,t: s[t._original_start:t._original_end]
    else:
        def extractText(s,l,t):
            del t[:]
            t.insert(0, s[t._original_start:t._original_end])
            del t["_original_start"]
            del t["_original_end"]
    matchExpr.setParseAction(extractText)
    return matchExpr 
Example 23
Project: jbox   Author: jpush   File: pyparsing.py    License: MIT License 5 votes vote down vote up
def setDefaultWhitespaceChars( chars ):
        """Overrides the default whitespace chars
        """
        ParserElement.DEFAULT_WHITE_CHARS = chars 
Example 24
Project: jbox   Author: jpush   File: pyparsing.py    License: MIT License 5 votes vote down vote up
def leaveWhitespace( self ):
        """Disables the skipping of whitespace before matching the characters in the
           C{ParserElement}'s defined pattern.  This is normally only used internally by
           the pyparsing module, but may be needed in some whitespace-sensitive grammars.
        """
        self.skipWhitespace = False
        return self 
Example 25
Project: jbox   Author: jpush   File: pyparsing.py    License: MIT License 5 votes vote down vote up
def setWhitespaceChars( self, chars ):
        """Overrides the default whitespace chars
        """
        self.skipWhitespace = True
        self.whiteChars = chars
        self.copyDefaultWhiteChars = False
        return self 
Example 26
Project: jbox   Author: jpush   File: pyparsing.py    License: MIT License 5 votes vote down vote up
def __init__( self, expr, joinString="", adjacent=True ):
        super(Combine,self).__init__( expr )
        # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
        if adjacent:
            self.leaveWhitespace()
        self.adjacent = adjacent
        self.skipWhitespace = True
        self.joinString = joinString
        self.callPreparse = True 
Example 27
Project: jbox   Author: jpush   File: pyparsing.py    License: MIT License 5 votes vote down vote up
def delimitedList( expr, delim=",", combine=False ):
    """Helper to define a delimited list of expressions - the delimiter defaults to ','.
       By default, the list elements and delimiters can have intervening whitespace, and
       comments, but this can be overridden by passing C{combine=True} in the constructor.
       If C{combine} is set to C{True}, the matching tokens are returned as a single token
       string, with the delimiters included; otherwise, the matching tokens are returned
       as a list of tokens, with the delimiters suppressed.
    """
    dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
    if combine:
        return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
    else:
        return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName) 
Example 28
Project: jbox   Author: jpush   File: pyparsing.py    License: MIT License 5 votes vote down vote up
def originalTextFor(expr, asString=True):
    """Helper to return the original, untokenized text for a given expression.  Useful to
       restore the parsed fields of an HTML start tag into the raw tag text itself, or to
       revert separate tokens with intervening whitespace back to the original matching
       input text. By default, returns astring containing the original parsed text.  
       
       If the optional C{asString} argument is passed as C{False}, then the return value is a 
       C{L{ParseResults}} containing any results names that were originally matched, and a 
       single token containing the original matched text from the input string.  So if 
       the expression passed to C{L{originalTextFor}} contains expressions with defined
       results names, you must set C{asString} to C{False} if you want to preserve those
       results name values."""
    locMarker = Empty().setParseAction(lambda s,loc,t: loc)
    endlocMarker = locMarker.copy()
    endlocMarker.callPreparse = False
    matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
    if asString:
        extractText = lambda s,l,t: s[t._original_start:t._original_end]
    else:
        def extractText(s,l,t):
            del t[:]
            t.insert(0, s[t._original_start:t._original_end])
            del t["_original_start"]
            del t["_original_end"]
    matchExpr.setParseAction(extractText)
    return matchExpr 
Example 29
Project: FARM   Author: deepset-ai   File: utils.py    License: Apache License 2.0 5 votes vote down vote up
def generate_tok_to_ch_map(text):
    """ Generates a mapping from token to character index when a string text is split using .split()
    TODO e.g."""
    map = [0]
    follows_whitespace = False
    for i, ch in enumerate(text):
        if follows_whitespace:
            if ch not in string.whitespace:
                map.append(i)
                follows_whitespace = False
        else:
            if ch in string.whitespace:
                follows_whitespace = True
    return map 
Example 30
Project: FARM   Author: deepset-ai   File: utils.py    License: Apache License 2.0 5 votes vote down vote up
def split_with_metadata(text):
    """" Splits a string text by whitespace and also returns indexes which is a mapping from token index
    to character index"""
    split_text = text.split()
    indexes = generate_tok_to_ch_map(text)
    assert len(split_text) == len(indexes)
    return split_text, indexes