Python tokenize.NUMBER Examples

The following are 30 code examples of tokenize.NUMBER(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tokenize , or try the search function

Example #1

Source File: test_inputtransformer.py From Computable with MIT License

6 votes

def decistmt(tokens):
    """Substitute Decimals for floats in a string of statements.

    Based on an example from the tokenize module docs.
    """
    result = []
    for toknum, tokval, _, _, _  in tokens:
        if toknum == tokenize.NUMBER and '.' in tokval:  # replace NUMBER tokens
            for newtok in [
                (tokenize.NAME, 'Decimal'),
                (tokenize.OP, '('),
                (tokenize.STRING, repr(tokval)),
                (tokenize.OP, ')')
            ]:
                yield newtok
        else:
            yield (toknum, tokval)

Example #2

Source File: format.py From elasticintel with GNU General Public License v3.0

5 votes

def _filter_header(s):
    """Clean up 'L' in npz header ints.

    Cleans up the 'L' in strings representing integers. Needed to allow npz
    headers produced in Python2 to be read in Python3.

    Parameters
    ----------
    s : byte string
        Npy file header.

    Returns
    -------
    header : str
        Cleaned up header.

    """
    import tokenize
    if sys.version_info[0] >= 3:
        from io import StringIO
    else:
        from StringIO import StringIO

    tokens = []
    last_token_was_number = False
    for token in tokenize.generate_tokens(StringIO(asstr(s)).read):
        token_type = token[0]
        token_string = token[1]
        if (last_token_was_number and
                token_type == tokenize.NAME and
                token_string == "L"):
            continue
        else:
            tokens.append(token)
        last_token_was_number = (token_type == tokenize.NUMBER)
    return tokenize.untokenize(tokens)

Example #3

Source File: format.py From pySINDy with MIT License

5 votes

def _filter_header(s):
    """Clean up 'L' in npz header ints.

    Cleans up the 'L' in strings representing integers. Needed to allow npz
    headers produced in Python2 to be read in Python3.

    Parameters
    ----------
    s : byte string
        Npy file header.

    Returns
    -------
    header : str
        Cleaned up header.

    """
    import tokenize
    if sys.version_info[0] >= 3:
        from io import StringIO
    else:
        from StringIO import StringIO

    tokens = []
    last_token_was_number = False
    # adding newline as python 2.7.5 workaround
    string = asstr(s) + "\n"
    for token in tokenize.generate_tokens(StringIO(string).readline):
        token_type = token[0]
        token_string = token[1]
        if (last_token_was_number and
                token_type == tokenize.NAME and
                token_string == "L"):
            continue
        else:
            tokens.append(token)
        last_token_was_number = (token_type == tokenize.NUMBER)
    # removing newline (see above) as python 2.7.5 workaround
    return tokenize.untokenize(tokens)[:-1]

Example #4

Source File: gftools-rangify.py From gftools with Apache License 2.0

5 votes

def get_codepoints(cps):
    results = []
    for cp in cps:
        if not cp.type == tokenize.NUMBER:
            continue
        results.append(int(cp.string, 16))
    return results

Example #5

Source File: format.py From mxnet-lambda with Apache License 2.0

5 votes

def _filter_header(s):
    """Clean up 'L' in npz header ints.

    Cleans up the 'L' in strings representing integers. Needed to allow npz
    headers produced in Python2 to be read in Python3.

    Parameters
    ----------
    s : byte string
        Npy file header.

    Returns
    -------
    header : str
        Cleaned up header.

    """
    import tokenize
    if sys.version_info[0] >= 3:
        from io import StringIO
    else:
        from StringIO import StringIO

    tokens = []
    last_token_was_number = False
    for token in tokenize.generate_tokens(StringIO(asstr(s)).read):
        token_type = token[0]
        token_string = token[1]
        if (last_token_was_number and
                token_type == tokenize.NAME and
                token_string == "L"):
            continue
        else:
            tokens.append(token)
        last_token_was_number = (token_type == tokenize.NUMBER)
    return tokenize.untokenize(tokens)

Example #6

Source File: config_parser.py From gin-config with Apache License 2.0

5 votes

def _maybe_parse_basic_type(self):
    """Try to parse a basic type (str, bool, number)."""
    token_value = ''
    # Allow a leading dash to handle negative numbers.
    if self._current_token.value == '-':
      token_value += self._current_token.value
      self._advance()

    basic_type_tokens = [tokenize.NAME, tokenize.NUMBER, tokenize.STRING]
    continue_parsing = self._current_token.kind in basic_type_tokens
    if not continue_parsing:
      return False, None

    while continue_parsing:
      token_value += self._current_token.value

      try:
        value = ast.literal_eval(token_value)
      except Exception as e:  # pylint: disable=broad-except
        err_str = "{}\n    Failed to parse token '{}'"
        self._raise_syntax_error(err_str.format(e, token_value))

      was_string = self._current_token.kind == tokenize.STRING
      self._advance()
      is_string = self._current_token.kind == tokenize.STRING
      continue_parsing = was_string and is_string

    return True, value

Example #7

Source File: format.py From ImageFusion with MIT License

5 votes

def _filter_header(s):
    """Clean up 'L' in npz header ints.

    Cleans up the 'L' in strings representing integers. Needed to allow npz
    headers produced in Python2 to be read in Python3.

    Parameters
    ----------
    s : byte string
        Npy file header.

    Returns
    -------
    header : str
        Cleaned up header.

    """
    import tokenize
    if sys.version_info[0] >= 3:
        from io import StringIO
    else:
        from StringIO import StringIO

    tokens = []
    last_token_was_number = False
    for token in tokenize.generate_tokens(StringIO(asstr(s)).read):
        token_type = token[0]
        token_string = token[1]
        if (last_token_was_number and
                token_type == tokenize.NAME and
                token_string == "L"):
            continue
        else:
            tokens.append(token)
        last_token_was_number = (token_type == tokenize.NUMBER)
    return tokenize.untokenize(tokens)

Example #8

Source File: format.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def _filter_header(s):
    """Clean up 'L' in npz header ints.

    Cleans up the 'L' in strings representing integers. Needed to allow npz
    headers produced in Python2 to be read in Python3.

    Parameters
    ----------
    s : byte string
        Npy file header.

    Returns
    -------
    header : str
        Cleaned up header.

    """
    import tokenize
    if sys.version_info[0] >= 3:
        from io import StringIO
    else:
        from StringIO import StringIO

    tokens = []
    last_token_was_number = False
    for token in tokenize.generate_tokens(StringIO(asstr(s)).read):
        token_type = token[0]
        token_string = token[1]
        if (last_token_was_number and
                token_type == tokenize.NAME and
                token_string == "L"):
            continue
        else:
            tokens.append(token)
        last_token_was_number = (token_type == tokenize.NUMBER)
    return tokenize.untokenize(tokens)

Example #9

Source File: tokens.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def test_python_tokenize():
    code = "a + (foo * -1)"
    tokens = list(python_tokenize(code))
    expected = [(tokenize.NAME, "a", Origin(code, 0, 1)),
                (tokenize.OP, "+", Origin(code, 2, 3)),
                (tokenize.OP, "(", Origin(code, 4, 5)),
                (tokenize.NAME, "foo", Origin(code, 5, 8)),
                (tokenize.OP, "*", Origin(code, 9, 10)),
                (tokenize.OP, "-", Origin(code, 11, 12)),
                (tokenize.NUMBER, "1", Origin(code, 12, 13)),
                (tokenize.OP, ")", Origin(code, 13, 14))]
    assert tokens == expected

    code2 = "a + (b"
    tokens2 = list(python_tokenize(code2))
    expected2 = [(tokenize.NAME, "a", Origin(code2, 0, 1)),
                 (tokenize.OP, "+", Origin(code2, 2, 3)),
                 (tokenize.OP, "(", Origin(code2, 4, 5)),
                 (tokenize.NAME, "b", Origin(code2, 5, 6))]
    assert tokens2 == expected2

    from nose.tools import assert_raises
    assert_raises(PatsyError, list, python_tokenize("a b # c"))

    from nose.tools import assert_raises
    assert_raises(PatsyError, list, python_tokenize("a b \"c"))

Example #10

Source File: QuestParser.py From Pirates-Online-Rewritten with BSD 3-Clause "New" or "Revised" License

5 votes

def getLineOfTokens(gen):
    tokens = []
    nextNeg = 0
    token = gen.next()
    if token[0] == tokenize.ENDMARKER:
        return None
    while token[0] != tokenize.NEWLINE and token[0] != tokenize.NL:
        if token[0] == tokenize.COMMENT:
            pass
        elif token[0] == tokenize.OP and token[1] == '-':
            nextNeg = 1
        elif token[0] == tokenize.NUMBER:
            if nextNeg:
                tokens.append(-eval(token[1]))
                nextNeg = 0
            else:
                tokens.append(eval(token[1]))
        elif token[0] == tokenize.STRING:
            tokens.append(eval(token[1]))
        elif token[0] == tokenize.NAME:
            tokens.append(token[1])
        else:
            notify.warning('Ignored token type: %s on line: %s' % (tokenize.tok_name[token[0]], token[2][0]))
        token = gen.next()

    return tokens

Example #11

Source File: format.py From coffeegrindsize with MIT License

5 votes

def _filter_header(s):
    """Clean up 'L' in npz header ints.

    Cleans up the 'L' in strings representing integers. Needed to allow npz
    headers produced in Python2 to be read in Python3.

    Parameters
    ----------
    s : byte string
        Npy file header.

    Returns
    -------
    header : str
        Cleaned up header.

    """
    import tokenize
    if sys.version_info[0] >= 3:
        from io import StringIO
    else:
        from StringIO import StringIO

    tokens = []
    last_token_was_number = False
    # adding newline as python 2.7.5 workaround
    string = asstr(s) + "\n"
    for token in tokenize.generate_tokens(StringIO(string).readline):
        token_type = token[0]
        token_string = token[1]
        if (last_token_was_number and
                token_type == tokenize.NAME and
                token_string == "L"):
            continue
        else:
            tokens.append(token)
        last_token_was_number = (token_type == tokenize.NUMBER)
    # removing newline (see above) as python 2.7.5 workaround
    return tokenize.untokenize(tokens)[:-1]

Example #12

Source File: format.py From Carnets with BSD 3-Clause "New" or "Revised" License

5 votes

def _filter_header(s):
    """Clean up 'L' in npz header ints.

    Cleans up the 'L' in strings representing integers. Needed to allow npz
    headers produced in Python2 to be read in Python3.

    Parameters
    ----------
    s : byte string
        Npy file header.

    Returns
    -------
    header : str
        Cleaned up header.

    """
    import tokenize
    if sys.version_info[0] >= 3:
        from io import StringIO
    else:
        from StringIO import StringIO

    tokens = []
    last_token_was_number = False
    # adding newline as python 2.7.5 workaround
    string = asstr(s) + "\n"
    for token in tokenize.generate_tokens(StringIO(string).readline):
        token_type = token[0]
        token_string = token[1]
        if (last_token_was_number and
                token_type == tokenize.NAME and
                token_string == "L"):
            continue
        else:
            tokens.append(token)
        last_token_was_number = (token_type == tokenize.NUMBER)
    # removing newline (see above) as python 2.7.5 workaround
    return tokenize.untokenize(tokens)[:-1]

Example #13

Source File: parser.py From lax with GNU General Public License v2.0

5 votes

def build(data):
    result = []
    for num, val, _, _, _  in split(StringIO(data).readline):
        if num == NUMBER: 
            result.extend([(NAME, 'Num'), (OP, '('),
            (STRING, str(val)), (OP, ')')])
        elif num == NAME and not val.startswith('_'):
            result.extend([(NAME, 'Chk'), 
            (OP, '('), (OP, "'"),
            (STRING, str(val)), (OP, "'"), (OP, ')')])
        else:
            result.append((num, val))
    return untokenize(result)

Example #14

Source File: format.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License

5 votes

def _filter_header(s):
    """Clean up 'L' in npz header ints.

    Cleans up the 'L' in strings representing integers. Needed to allow npz
    headers produced in Python2 to be read in Python3.

    Parameters
    ----------
    s : byte string
        Npy file header.

    Returns
    -------
    header : str
        Cleaned up header.

    """
    import tokenize
    if sys.version_info[0] >= 3:
        from io import StringIO
    else:
        from StringIO import StringIO

    tokens = []
    last_token_was_number = False
    # adding newline as python 2.7.5 workaround
    string = asstr(s) + "\n"
    for token in tokenize.generate_tokens(StringIO(string).readline):
        token_type = token[0]
        token_string = token[1]
        if (last_token_was_number and
                token_type == tokenize.NAME and
                token_string == "L"):
            continue
        else:
            tokens.append(token)
        last_token_was_number = (token_type == tokenize.NUMBER)
    # removing newline (see above) as python 2.7.5 workaround
    return tokenize.untokenize(tokens)[:-1]

Example #15

Source File: reader.py From pycodesuggest with MIT License

5 votes

def preprocess(tokentype, tokenval):
    if tokentype == tokenize.NUMBER:
        return number_token

    elif tokentype == tokenize.INDENT:
        return "<indent>"

    elif tokentype == tokenize.DEDENT:
        return "<dedent>"

    # Need to replace spaces with some other character because the ngram processor
    # splits on spaces
    return tokenval.replace(" ", "§").replace("\n", "<newline>")

Example #16

Source File: pyreader.py From pycodesuggest with MIT License

5 votes

def preprocess(tokentype, tokenval):
    if tokentype == tokenize.NUMBER:
        return number_token

    elif tokentype == tokenize.INDENT:
        return indent_token

    elif tokentype == tokenize.DEDENT:
        return dedent_token

    return tokenval

Example #17

Source File: format.py From twitter-stock-recommendation with MIT License

5 votes

def _filter_header(s):
    """Clean up 'L' in npz header ints.

    Cleans up the 'L' in strings representing integers. Needed to allow npz
    headers produced in Python2 to be read in Python3.

    Parameters
    ----------
    s : byte string
        Npy file header.

    Returns
    -------
    header : str
        Cleaned up header.

    """
    import tokenize
    if sys.version_info[0] >= 3:
        from io import StringIO
    else:
        from StringIO import StringIO

    tokens = []
    last_token_was_number = False
    # adding newline as python 2.7.5 workaround
    string = asstr(s) + "\n"
    for token in tokenize.generate_tokens(StringIO(string).readline):
        token_type = token[0]
        token_string = token[1]
        if (last_token_was_number and
                token_type == tokenize.NAME and
                token_string == "L"):
            continue
        else:
            tokens.append(token)
        last_token_was_number = (token_type == tokenize.NUMBER)
    # removing newline (see above) as python 2.7.5 workaround
    return tokenize.untokenize(tokens)[:-1]

Example #18

Source File: format.py From keras-lambda with MIT License

5 votes

def _filter_header(s):
    """Clean up 'L' in npz header ints.

    Cleans up the 'L' in strings representing integers. Needed to allow npz
    headers produced in Python2 to be read in Python3.

    Parameters
    ----------
    s : byte string
        Npy file header.

    Returns
    -------
    header : str
        Cleaned up header.

    """
    import tokenize
    if sys.version_info[0] >= 3:
        from io import StringIO
    else:
        from StringIO import StringIO

    tokens = []
    last_token_was_number = False
    for token in tokenize.generate_tokens(StringIO(asstr(s)).read):
        token_type = token[0]
        token_string = token[1]
        if (last_token_was_number and
                token_type == tokenize.NAME and
                token_string == "L"):
            continue
        else:
            tokens.append(token)
        last_token_was_number = (token_type == tokenize.NUMBER)
    return tokenize.untokenize(tokens)

Example #19

Source File: format.py From auto-alt-text-lambda-api with MIT License

5 votes

def _filter_header(s):
    """Clean up 'L' in npz header ints.

    Cleans up the 'L' in strings representing integers. Needed to allow npz
    headers produced in Python2 to be read in Python3.

    Parameters
    ----------
    s : byte string
        Npy file header.

    Returns
    -------
    header : str
        Cleaned up header.

    """
    import tokenize
    if sys.version_info[0] >= 3:
        from io import StringIO
    else:
        from StringIO import StringIO

    tokens = []
    last_token_was_number = False
    for token in tokenize.generate_tokens(StringIO(asstr(s)).read):
        token_type = token[0]
        token_string = token[1]
        if (last_token_was_number and
                token_type == tokenize.NAME and
                token_string == "L"):
            continue
        else:
            tokens.append(token)
        last_token_was_number = (token_type == tokenize.NUMBER)
    return tokenize.untokenize(tokens)

Example #20

Source File: python3.py From linter-pylama with MIT License

5 votes

def process_tokens(self, tokens):
        for idx, (tok_type, token, start, _, _) in enumerate(tokens):
            if tok_type == tokenize.NUMBER:
                if token.lower().endswith('l'):
                    # This has a different semantic than lowercase-l-suffix.
                    self.add_message('long-suffix', line=start[0])
                elif _is_old_octal(token):
                    self.add_message('old-octal-literal', line=start[0])
            if tokens[idx][1] == '<>':
                self.add_message('old-ne-operator', line=tokens[idx][2][0])
            if tok_type == tokenize.STRING and token.startswith('b'):
                if any(elem for elem in token if ord(elem) > 127):
                    self.add_message('non-ascii-bytes-literal', line=start[0])
            if tok_type == tokenize.STRING and token.startswith('ur'):
                self.add_message('invalid-unicode-literal', line=start[0])

Example #21

Source File: format.py From recruit with Apache License 2.0

5 votes

def _filter_header(s):
    """Clean up 'L' in npz header ints.

    Cleans up the 'L' in strings representing integers. Needed to allow npz
    headers produced in Python2 to be read in Python3.

    Parameters
    ----------
    s : byte string
        Npy file header.

    Returns
    -------
    header : str
        Cleaned up header.

    """
    import tokenize
    if sys.version_info[0] >= 3:
        from io import StringIO
    else:
        from StringIO import StringIO

    tokens = []
    last_token_was_number = False
    # adding newline as python 2.7.5 workaround
    string = asstr(s) + "\n"
    for token in tokenize.generate_tokens(StringIO(string).readline):
        token_type = token[0]
        token_string = token[1]
        if (last_token_was_number and
                token_type == tokenize.NAME and
                token_string == "L"):
            continue
        else:
            tokens.append(token)
        last_token_was_number = (token_type == tokenize.NUMBER)
    # removing newline (see above) as python 2.7.5 workaround
    return tokenize.untokenize(tokens)[:-1]

Example #22

Source File: python3.py From python-netsurv with MIT License

5 votes

def process_tokens(self, tokens):
        for idx, (tok_type, token, start, _, _) in enumerate(tokens):
            if tok_type == tokenize.NUMBER:
                if token.lower().endswith("l"):
                    # This has a different semantic than lowercase-l-suffix.
                    self.add_message("long-suffix", line=start[0])
                elif _is_old_octal(token):
                    self.add_message("old-octal-literal", line=start[0])
            if tokens[idx][1] == "<>":
                self.add_message("old-ne-operator", line=tokens[idx][2][0])
            if tok_type == tokenize.STRING and token.startswith("b"):
                if any(elem for elem in token if ord(elem) > 127):
                    self.add_message("non-ascii-bytes-literal", line=start[0])

Example #23

Source File: python3.py From python-netsurv with MIT License

5 votes

def process_tokens(self, tokens):
        for idx, (tok_type, token, start, _, _) in enumerate(tokens):
            if tok_type == tokenize.NUMBER:
                if token.lower().endswith("l"):
                    # This has a different semantic than lowercase-l-suffix.
                    self.add_message("long-suffix", line=start[0])
                elif _is_old_octal(token):
                    self.add_message("old-octal-literal", line=start[0])
            if tokens[idx][1] == "<>":
                self.add_message("old-ne-operator", line=tokens[idx][2][0])
            if tok_type == tokenize.STRING and token.startswith("b"):
                if any(elem for elem in token if ord(elem) > 127):
                    self.add_message("non-ascii-bytes-literal", line=start[0])

Example #24

Source File: format.py From lambda-packs with MIT License

5 votes

def _filter_header(s):
    """Clean up 'L' in npz header ints.

    Cleans up the 'L' in strings representing integers. Needed to allow npz
    headers produced in Python2 to be read in Python3.

    Parameters
    ----------
    s : byte string
        Npy file header.

    Returns
    -------
    header : str
        Cleaned up header.

    """
    import tokenize
    if sys.version_info[0] >= 3:
        from io import StringIO
    else:
        from StringIO import StringIO

    tokens = []
    last_token_was_number = False
    # adding newline as python 2.7.5 workaround
    string = asstr(s) + "\n"
    for token in tokenize.generate_tokens(StringIO(string).readline):
        token_type = token[0]
        token_string = token[1]
        if (last_token_was_number and
                token_type == tokenize.NAME and
                token_string == "L"):
            continue
        else:
            tokens.append(token)
        last_token_was_number = (token_type == tokenize.NUMBER)
    # removing newline (see above) as python 2.7.5 workaround
    return tokenize.untokenize(tokens)[:-1]

Example #25

Source File: format.py From lambda-packs with MIT License

5 votes

def _filter_header(s):
    """Clean up 'L' in npz header ints.

    Cleans up the 'L' in strings representing integers. Needed to allow npz
    headers produced in Python2 to be read in Python3.

    Parameters
    ----------
    s : byte string
        Npy file header.

    Returns
    -------
    header : str
        Cleaned up header.

    """
    import tokenize
    if sys.version_info[0] >= 3:
        from io import StringIO
    else:
        from StringIO import StringIO

    tokens = []
    last_token_was_number = False
    for token in tokenize.generate_tokens(StringIO(asstr(s)).read):
        token_type = token[0]
        token_string = token[1]
        if (last_token_was_number and
                token_type == tokenize.NAME and
                token_string == "L"):
            continue
        else:
            tokens.append(token)
        last_token_was_number = (token_type == tokenize.NUMBER)
    return tokenize.untokenize(tokens)

Example #26

Source File: codecontainer.py From ironpython2 with Apache License 2.0

5 votes

def _ProcessToken(self, type, token, spos, epos, line):
        srow, scol = spos
        erow, ecol = epos
        self.GetText() # Prime us.
        linenum = srow - 1 # Lines zero based for us too.
        realCharPos = self.lineOffsets[linenum] + scol
        numskipped = realCharPos - self.lastPos
        if numskipped==0:
            pass
        elif numskipped==1:
            self.attrs.append(axdebug.SOURCETEXT_ATTR_COMMENT)
        else:
            self.attrs.append((axdebug.SOURCETEXT_ATTR_COMMENT, numskipped))
        kwSize = len(token)
        self.lastPos = realCharPos + kwSize
        attr = 0

        if type==tokenize.NAME:
            if token in _keywords:
                attr = axdebug.SOURCETEXT_ATTR_KEYWORD
        elif type==tokenize.STRING:
            attr = axdebug.SOURCETEXT_ATTR_STRING
        elif type==tokenize.NUMBER:
            attr = axdebug.SOURCETEXT_ATTR_NUMBER
        elif type==tokenize.OP:
            attr = axdebug.SOURCETEXT_ATTR_OPERATOR
        elif type==tokenize.COMMENT:
            attr = axdebug.SOURCETEXT_ATTR_COMMENT
        # else attr remains zero...
        if kwSize==0:
            pass
        elif kwSize==1:
            self.attrs.append(attr)
        else:
            self.attrs.append((attr, kwSize))

Example #27

Source File: format.py From Fluid-Designer with GNU General Public License v3.0

5 votes

def _filter_header(s):
    """Clean up 'L' in npz header ints.

    Cleans up the 'L' in strings representing integers. Needed to allow npz
    headers produced in Python2 to be read in Python3.

    Parameters
    ----------
    s : byte string
        Npy file header.

    Returns
    -------
    header : str
        Cleaned up header.

    """
    import tokenize
    if sys.version_info[0] >= 3:
        from io import StringIO
    else:
        from StringIO import StringIO

    tokens = []
    last_token_was_number = False
    for token in tokenize.generate_tokens(StringIO(asstr(s)).read):
        token_type = token[0]
        token_string = token[1]
        if (last_token_was_number and
                token_type == tokenize.NAME and
                token_string == "L"):
            continue
        else:
            tokens.append(token)
        last_token_was_number = (token_type == tokenize.NUMBER)
    return tokenize.untokenize(tokens)

Example #28

Source File: format.py From vnpy_crypto with MIT License

5 votes

def _filter_header(s):
    """Clean up 'L' in npz header ints.

    Cleans up the 'L' in strings representing integers. Needed to allow npz
    headers produced in Python2 to be read in Python3.

    Parameters
    ----------
    s : byte string
        Npy file header.

    Returns
    -------
    header : str
        Cleaned up header.

    """
    import tokenize
    if sys.version_info[0] >= 3:
        from io import StringIO
    else:
        from StringIO import StringIO

    tokens = []
    last_token_was_number = False
    # adding newline as python 2.7.5 workaround
    string = asstr(s) + "\n"
    for token in tokenize.generate_tokens(StringIO(string).readline):
        token_type = token[0]
        token_string = token[1]
        if (last_token_was_number and
                token_type == tokenize.NAME and
                token_string == "L"):
            continue
        else:
            tokens.append(token)
        last_token_was_number = (token_type == tokenize.NUMBER)
    # removing newline (see above) as python 2.7.5 workaround
    return tokenize.untokenize(tokens)[:-1]

Example #29

Source File: tokens.py From vnpy_crypto with MIT License

5 votes

def test_python_tokenize():
    code = "a + (foo * -1)"
    tokens = list(python_tokenize(code))
    expected = [(tokenize.NAME, "a", Origin(code, 0, 1)),
                (tokenize.OP, "+", Origin(code, 2, 3)),
                (tokenize.OP, "(", Origin(code, 4, 5)),
                (tokenize.NAME, "foo", Origin(code, 5, 8)),
                (tokenize.OP, "*", Origin(code, 9, 10)),
                (tokenize.OP, "-", Origin(code, 11, 12)),
                (tokenize.NUMBER, "1", Origin(code, 12, 13)),
                (tokenize.OP, ")", Origin(code, 13, 14))]
    assert tokens == expected

    code2 = "a + (b"
    tokens2 = list(python_tokenize(code2))
    expected2 = [(tokenize.NAME, "a", Origin(code2, 0, 1)),
                 (tokenize.OP, "+", Origin(code2, 2, 3)),
                 (tokenize.OP, "(", Origin(code2, 4, 5)),
                 (tokenize.NAME, "b", Origin(code2, 5, 6))]
    assert tokens2 == expected2

    from nose.tools import assert_raises
    assert_raises(PatsyError, list, python_tokenize("a b # c"))

    from nose.tools import assert_raises
    assert_raises(PatsyError, list, python_tokenize("a b \"c"))

Example #30

Source File: format.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def _filter_header(s):
    """Clean up 'L' in npz header ints.

    Cleans up the 'L' in strings representing integers. Needed to allow npz
    headers produced in Python2 to be read in Python3.

    Parameters
    ----------
    s : byte string
        Npy file header.

    Returns
    -------
    header : str
        Cleaned up header.

    """
    import tokenize
    if sys.version_info[0] >= 3:
        from io import StringIO
    else:
        from StringIO import StringIO

    tokens = []
    last_token_was_number = False
    # adding newline as python 2.7.5 workaround
    string = asstr(s) + "\n"
    for token in tokenize.generate_tokens(StringIO(string).readline):
        token_type = token[0]
        token_string = token[1]
        if (last_token_was_number and
                token_type == tokenize.NAME and
                token_string == "L"):
            continue
        else:
            tokens.append(token)
        last_token_was_number = (token_type == tokenize.NUMBER)
    # removing newline (see above) as python 2.7.5 workaround
    return tokenize.untokenize(tokens)[:-1]