Python tokenize.generate_tokens() Examples

The following are 30 code examples of tokenize.generate_tokens(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tokenize , or try the search function .
Example #1
Source File: cgitb.py    From BinderFilter with MIT License 6 votes vote down vote up
def scanvars(reader, frame, locals):
    """Scan one logical line of Python and look up values of variables used."""
    vars, lasttoken, parent, prefix, value = [], None, None, '', __UNDEF__
    for ttype, token, start, end, line in tokenize.generate_tokens(reader):
        if ttype == tokenize.NEWLINE: break
        if ttype == tokenize.NAME and token not in keyword.kwlist:
            if lasttoken == '.':
                if parent is not __UNDEF__:
                    value = getattr(parent, token, __UNDEF__)
                    vars.append((prefix + token, prefix, value))
            else:
                where, value = lookup(token, frame, locals)
                vars.append((token, where, value))
        elif token == '.':
            prefix += lasttoken + '.'
            parent = value
        else:
            parent, prefix = None, ''
        lasttoken = token
    return vars 
Example #2
Source File: trace.py    From Computable with MIT License 6 votes vote down vote up
def find_strings(filename):
    """Return a dict of possible docstring positions.

    The dict maps line numbers to strings.  There is an entry for
    line that contains only a string or a part of a triple-quoted
    string.
    """
    d = {}
    # If the first token is a string, then it's the module docstring.
    # Add this special case so that the test in the loop passes.
    prev_ttype = token.INDENT
    f = open(filename)
    for ttype, tstr, start, end, line in tokenize.generate_tokens(f.readline):
        if ttype == token.STRING:
            if prev_ttype == token.INDENT:
                sline, scol = start
                eline, ecol = end
                for i in range(sline, eline + 1):
                    d[i] = 1
        prev_ttype = ttype
    f.close()
    return d 
Example #3
Source File: autopep8.py    From python-netsurv with MIT License 6 votes vote down vote up
def fix_e402(self, result):
        (line_index, offset, target) = get_index_offset_contents(result,
                                                                 self.source)
        for i in range(1, 100):
            line = "".join(self.source[line_index:line_index+i])
            try:
                generate_tokens("".join(line))
            except (SyntaxError, tokenize.TokenError):
                continue
            break
        if not (target in self.imports and self.imports[target] != line_index):
            mod_offset = get_module_imports_on_top_of_file(self.source,
                                                           line_index)
            self.source[mod_offset] = line + self.source[mod_offset]
        for offset in range(i):
            self.source[line_index+offset] = '' 
Example #4
Source File: importer.py    From importmagic with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def _parse(self, source):
        reader = StringIO(source)
        # parse until EOF or TokenError (allows incomplete modules)
        tokens = []
        try:
            tokens.extend(tokenize.generate_tokens(reader.readline))
        except tokenize.TokenError:
            # TokenError happens always at EOF, for unclosed strings or brackets.
            # We don't care about that here, since we still can recover the whole
            # source code.
            pass
        self._tokens = tokens
        it = Iterator(self._tokens)
        self._imports_begin, self._imports_end = self._find_import_range(it)
        it = Iterator(self._tokens, start=self._imports_begin, end=self._imports_end)
        self._parse_imports(it) 
Example #5
Source File: trace.py    From meddle with MIT License 6 votes vote down vote up
def find_strings(filename):
    """Return a dict of possible docstring positions.

    The dict maps line numbers to strings.  There is an entry for
    line that contains only a string or a part of a triple-quoted
    string.
    """
    d = {}
    # If the first token is a string, then it's the module docstring.
    # Add this special case so that the test in the loop passes.
    prev_ttype = token.INDENT
    f = open(filename)
    for ttype, tstr, start, end, line in tokenize.generate_tokens(f.readline):
        if ttype == token.STRING:
            if prev_ttype == token.INDENT:
                sline, scol = start
                eline, ecol = end
                for i in range(sline, eline + 1):
                    d[i] = 1
        prev_ttype = ttype
    f.close()
    return d 
Example #6
Source File: cgitb.py    From meddle with MIT License 6 votes vote down vote up
def scanvars(reader, frame, locals):
    """Scan one logical line of Python and look up values of variables used."""
    vars, lasttoken, parent, prefix, value = [], None, None, '', __UNDEF__
    for ttype, token, start, end, line in tokenize.generate_tokens(reader):
        if ttype == tokenize.NEWLINE: break
        if ttype == tokenize.NAME and token not in keyword.kwlist:
            if lasttoken == '.':
                if parent is not __UNDEF__:
                    value = getattr(parent, token, __UNDEF__)
                    vars.append((prefix + token, prefix, value))
            else:
                where, value = lookup(token, frame, locals)
                vars.append((token, where, value))
        elif token == '.':
            prefix += lasttoken + '.'
            parent = value
        else:
            parent, prefix = None, ''
        lasttoken = token
    return vars 
Example #7
Source File: __init__.py    From pyta with GNU General Public License v3.0 6 votes vote down vote up
def _verify_pre_check(filepath):
    """Check student code for certain issues."""
    # Make sure the program doesn't crash for students.
    # Could use some improvement for better logging and error reporting.
    try:
        # Check for inline "pylint:" comment, which may indicate a student
        # trying to disable a check.
        with tokenize.open(os.path.expanduser(filepath)) as f:
            for tok_type, content, _, _, _ in tokenize.generate_tokens(f.readline):
                if tok_type != tokenize.COMMENT:
                    continue
                match = pylint.constants.OPTION_RGX.search(content)
                if match is not None:
                    print('[ERROR] String "pylint:" found in comment. ' +
                          'No check run on file `{}.`\n'.format(filepath))
                    return False
    except IndentationError as e:
        print('[ERROR] python_ta could not check your code due to an ' +
              'indentation error at line {}.'.format(e.lineno))
        return False
    except tokenize.TokenError as e:
        print('[ERROR] python_ta could not check your code due to a ' +
              'syntax error in your file.')
        return False
    return True 
Example #8
Source File: autopep8.py    From python-netsurv with MIT License 6 votes vote down vote up
def _find_logical(source_lines):
    # Make a variable which is the index of all the starts of lines.
    logical_start = []
    logical_end = []
    last_newline = True
    parens = 0
    for t in generate_tokens(''.join(source_lines)):
        if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
                    tokenize.INDENT, tokenize.NL,
                    tokenize.ENDMARKER]:
            continue
        if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
            last_newline = True
            logical_end.append((t[3][0] - 1, t[2][1]))
            continue
        if last_newline and not parens:
            logical_start.append((t[2][0] - 1, t[2][1]))
            last_newline = False
        if t[0] == tokenize.OP:
            if t[1] in '([{':
                parens += 1
            elif t[1] in '}])':
                parens -= 1
    return (logical_start, logical_end) 
Example #9
Source File: trace.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def find_strings(filename):
    """Return a dict of possible docstring positions.

    The dict maps line numbers to strings.  There is an entry for
    line that contains only a string or a part of a triple-quoted
    string.
    """
    d = {}
    # If the first token is a string, then it's the module docstring.
    # Add this special case so that the test in the loop passes.
    prev_ttype = token.INDENT
    f = open(filename)
    for ttype, tstr, start, end, line in tokenize.generate_tokens(f.readline):
        if ttype == token.STRING:
            if prev_ttype == token.INDENT:
                sline, scol = start
                eline, ecol = end
                for i in range(sline, eline + 1):
                    d[i] = 1
        prev_ttype = ttype
    f.close()
    return d 
Example #10
Source File: cgitb.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def scanvars(reader, frame, locals):
    """Scan one logical line of Python and look up values of variables used."""
    vars, lasttoken, parent, prefix, value = [], None, None, '', __UNDEF__
    for ttype, token, start, end, line in tokenize.generate_tokens(reader):
        if ttype == tokenize.NEWLINE: break
        if ttype == tokenize.NAME and token not in keyword.kwlist:
            if lasttoken == '.':
                if parent is not __UNDEF__:
                    value = getattr(parent, token, __UNDEF__)
                    vars.append((prefix + token, prefix, value))
            else:
                where, value = lookup(token, frame, locals)
                vars.append((token, where, value))
        elif token == '.':
            prefix += lasttoken + '.'
            parent = value
        else:
            parent, prefix = None, ''
        lasttoken = token
    return vars 
Example #11
Source File: cgitb.py    From Computable with MIT License 6 votes vote down vote up
def scanvars(reader, frame, locals):
    """Scan one logical line of Python and look up values of variables used."""
    vars, lasttoken, parent, prefix, value = [], None, None, '', __UNDEF__
    for ttype, token, start, end, line in tokenize.generate_tokens(reader):
        if ttype == tokenize.NEWLINE: break
        if ttype == tokenize.NAME and token not in keyword.kwlist:
            if lasttoken == '.':
                if parent is not __UNDEF__:
                    value = getattr(parent, token, __UNDEF__)
                    vars.append((prefix + token, prefix, value))
            else:
                where, value = lookup(token, frame, locals)
                vars.append((token, where, value))
        elif token == '.':
            prefix += lasttoken + '.'
            parent = value
        else:
            parent, prefix = None, ''
        lasttoken = token
    return vars 
Example #12
Source File: autopep8.py    From python-netsurv with MIT License 6 votes vote down vote up
def fix_e402(self, result):
        (line_index, offset, target) = get_index_offset_contents(result,
                                                                 self.source)
        for i in range(1, 100):
            line = "".join(self.source[line_index:line_index+i])
            try:
                generate_tokens("".join(line))
            except (SyntaxError, tokenize.TokenError):
                continue
            break
        if not (target in self.imports and self.imports[target] != line_index):
            mod_offset = get_module_imports_on_top_of_file(self.source,
                                                           line_index)
            self.source[mod_offset] = line + self.source[mod_offset]
        for offset in range(i):
            self.source[line_index+offset] = '' 
Example #13
Source File: processor.py    From linter-pylama with MIT License 6 votes vote down vote up
def file_tokens(self):
        """The complete set of tokens for a file.

        Accessing this attribute *may* raise an InvalidSyntax exception.

        :raises: flake8.exceptions.InvalidSyntax
        """
        if self._file_tokens is None:
            line_iter = iter(self.lines)
            try:
                self._file_tokens = list(tokenize.generate_tokens(
                    lambda: next(line_iter)
                ))
            except tokenize.TokenError as exc:
                raise exceptions.InvalidSyntax(exc.message, exception=exc)

        return self._file_tokens 
Example #14
Source File: check_whitespace.py    From D-VAE with MIT License 6 votes vote down vote up
def get_parse_error(code):
    """
    Checks code for ambiguous tabs or other basic parsing issues.

    :param code: a string containing a file's worth of Python code
    :returns: a string containing a description of the first parse error encountered,
              or None if the code is ok
    """
    # note that this uses non-public elements from stdlib's tabnanny, because tabnanny
    # is (very frustratingly) written only to be used as a script, but using it that way
    # in this context requires writing temporarily files, running subprocesses, blah blah blah
    code_buffer = StringIO(code)
    try:
        tabnanny.process_tokens(tokenize.generate_tokens(code_buffer.readline))
    except tokenize.TokenError as err:
        return "Could not parse code: %s" % err
    except IndentationError as err:
        return "Indentation error: %s" % err
    except tabnanny.NannyNag as err:
        return "Ambiguous tab at line %d; line is '%s'." % (err.get_lineno(), err.get_line())
    return None 
Example #15
Source File: trace.py    From BinderFilter with MIT License 6 votes vote down vote up
def find_strings(filename):
    """Return a dict of possible docstring positions.

    The dict maps line numbers to strings.  There is an entry for
    line that contains only a string or a part of a triple-quoted
    string.
    """
    d = {}
    # If the first token is a string, then it's the module docstring.
    # Add this special case so that the test in the loop passes.
    prev_ttype = token.INDENT
    f = open(filename)
    for ttype, tstr, start, end, line in tokenize.generate_tokens(f.readline):
        if ttype == token.STRING:
            if prev_ttype == token.INDENT:
                sline, scol = start
                eline, ecol = end
                for i in range(sline, eline + 1):
                    d[i] = 1
        prev_ttype = ttype
    f.close()
    return d 
Example #16
Source File: source.py    From py with MIT License 6 votes vote down vote up
def deindent(lines, offset=None):
    if offset is None:
        for line in lines:
            line = line.expandtabs()
            s = line.lstrip()
            if s:
                offset = len(line)-len(s)
                break
        else:
            offset = 0
    if offset == 0:
        return list(lines)
    newlines = []
    def readline_generator(lines):
        for line in lines:
            yield line + '\n'
        while True:
            yield ''

    it = readline_generator(lines)

    try:
        for _, _, (sline, _), (eline, _), _ in tokenize.generate_tokens(lambda: next(it)):
            if sline > len(lines):
                break # End of input reached
            if sline > len(newlines):
                line = lines[sline - 1].expandtabs()
                if line.lstrip() and line[:offset].isspace():
                    line = line[offset:] # Deindent
                newlines.append(line)

            for i in range(sline, eline):
                # Don't deindent continuing lines of
                # multiline tokens (i.e. multiline strings)
                newlines.append(lines[i])
    except (IndentationError, tokenize.TokenError):
        pass
    # Add any lines we didn't see. E.g. if an exception was raised.
    newlines.extend(lines[len(newlines):])
    return newlines 
Example #17
Source File: utils.py    From typed-argument-parser with MIT License 5 votes vote down vote up
def tokenize_source(obj: object) -> Generator:
    """Returns a generator for the tokens of the object's source code."""
    source = inspect.getsource(obj)
    token_generator = tokenize.generate_tokens(StringIO(source).readline)

    return token_generator 
Example #18
Source File: tokens.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def normalize_token_spacing(code):
    tokens = [(t[0], t[1])
              for t in tokenize.generate_tokens(StringIO(code).readline)]
    return pretty_untokenize(tokens) 
Example #19
Source File: tokens.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def python_tokenize(code):
    # Since formulas can only contain Python expressions, and Python
    # expressions cannot meaningfully contain newlines, we'll just remove all
    # the newlines up front to avoid any complications:
    code = code.replace("\n", " ").strip()
    it = tokenize.generate_tokens(StringIO(code).readline)
    try:
        for (pytype, string, (_, start), (_, end), code) in it:
            if pytype == tokenize.ENDMARKER:
                break
            origin = Origin(code, start, end)
            assert pytype not in (tokenize.NL, tokenize.NEWLINE)
            if pytype == tokenize.ERRORTOKEN:
                raise PatsyError("error tokenizing input "
                                 "(maybe an unclosed string?)",
                                 origin)
            if pytype == tokenize.COMMENT:
                raise PatsyError("comments are not allowed", origin)
            yield (pytype, string, origin)
        else: # pragma: no cover
            raise ValueError("stream ended without ENDMARKER?!?")
    except tokenize.TokenError as e:
        # TokenError is raised iff the tokenizer thinks that there is
        # some sort of multi-line construct in progress (e.g., an
        # unclosed parentheses, which in Python lets a virtual line
        # continue past the end of the physical line), and it hits the
        # end of the source text. We have our own error handling for
        # such cases, so just treat this as an end-of-stream.
        # 
        # Just in case someone adds some other error case:
        assert e.args[0].startswith("EOF in multi-line")
        return 
Example #20
Source File: expr.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def tokenize_string(source):
    """Tokenize a Python source code string.

    Parameters
    ----------
    source : str
        A Python source code string
    """
    line_reader = StringIO(source).readline
    for toknum, tokval, _, _, _ in tokenize.generate_tokens(line_reader):
        yield toknum, tokval 
Example #21
Source File: template.py    From nightmare with GNU General Public License v2.0 5 votes vote down vote up
def __init__(self, text):
        self.text = text
        readline = iter([text]).next
        self.tokens = tokenize.generate_tokens(readline)
        self.index = 0 
Example #22
Source File: source.py    From python-netsurv with MIT License 5 votes vote down vote up
def deindent(lines, offset=None):
    if offset is None:
        for line in lines:
            line = line.expandtabs()
            s = line.lstrip()
            if s:
                offset = len(line)-len(s)
                break
        else:
            offset = 0
    if offset == 0:
        return list(lines)
    newlines = []
    def readline_generator(lines):
        for line in lines:
            yield line + '\n'
        while True:
            yield ''

    it = readline_generator(lines)

    try:
        for _, _, (sline, _), (eline, _), _ in tokenize.generate_tokens(lambda: next(it)):
            if sline > len(lines):
                break # End of input reached
            if sline > len(newlines):
                line = lines[sline - 1].expandtabs()
                if line.lstrip() and line[:offset].isspace():
                    line = line[offset:] # Deindent
                newlines.append(line)

            for i in range(sline, eline):
                # Don't deindent continuing lines of
                # multiline tokens (i.e. multiline strings)
                newlines.append(lines[i])
    except (IndentationError, tokenize.TokenError):
        pass
    # Add any lines we didn't see. E.g. if an exception was raised.
    newlines.extend(lines[len(newlines):])
    return newlines 
Example #23
Source File: autopep8.py    From python-netsurv with MIT License 5 votes vote down vote up
def fix_w605(self, result):
        (line_index, _, target) = get_index_offset_contents(result,
                                                            self.source)
        try:
            tokens = list(generate_tokens(target))
        except (SyntaxError, tokenize.TokenError):
            return
        for (pos, _msg) in get_w605_position(tokens):
            self.source[line_index] = '{}r{}'.format(
                target[:pos], target[pos:]) 
Example #24
Source File: autopep8.py    From python-netsurv with MIT License 5 votes vote down vote up
def join_logical_line(logical_line):
    """Return single line based on logical line input."""
    indentation = _get_indentation(logical_line)

    return indentation + untokenize_without_newlines(
        generate_tokens(logical_line.lstrip())) + '\n' 
Example #25
Source File: pycodestyle.py    From python-netsurv with MIT License 5 votes vote down vote up
def generate_tokens(self):
        """Tokenize file, run physical line checks and yield tokens."""
        if self._io_error:
            self.report_error(1, 0, 'E902 %s' % self._io_error, readlines)
        tokengen = tokenize.generate_tokens(self.readline)
        try:
            for token in tokengen:
                if token[2][0] > self.total_lines:
                    return
                self.noqa = token[4] and noqa(token[4])
                self.maybe_check_physical(token)
                yield token
        except (SyntaxError, tokenize.TokenError):
            self.report_invalid_syntax() 
Example #26
Source File: autopep8.py    From python-netsurv with MIT License 5 votes vote down vote up
def _get_indentword(source):
    """Return indentation type."""
    indent_word = '    '  # Default in case source has no indentation
    try:
        for t in generate_tokens(source):
            if t[0] == token.INDENT:
                indent_word = t[1]
                break
    except (SyntaxError, tokenize.TokenError):
        pass
    return indent_word 
Example #27
Source File: autopep8.py    From python-netsurv with MIT License 5 votes vote down vote up
def multiline_string_lines(source, include_docstrings=False):
    """Return line numbers that are within multiline strings.

    The line numbers are indexed at 1.

    Docstrings are ignored.

    """
    line_numbers = set()
    previous_token_type = ''
    try:
        for t in generate_tokens(source):
            token_type = t[0]
            start_row = t[2][0]
            end_row = t[3][0]

            if token_type == tokenize.STRING and start_row != end_row:
                if (
                    include_docstrings or
                    previous_token_type != tokenize.INDENT
                ):
                    # We increment by one since we want the contents of the
                    # string.
                    line_numbers |= set(range(1 + start_row, 1 + end_row))

            previous_token_type = token_type
    except (SyntaxError, tokenize.TokenError):
        pass

    return line_numbers 
Example #28
Source File: source.py    From python-netsurv with MIT License 5 votes vote down vote up
def getstatementrange_ast(lineno, source, assertion=False, astnode=None):
    if astnode is None:
        content = str(source)
        # See #4260:
        # don't produce duplicate warnings when compiling source to find ast
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            astnode = compile(content, "source", "exec", _AST_FLAG)

    start, end = get_statement_startend2(lineno, astnode)
    # we need to correct the end:
    # - ast-parsing strips comments
    # - there might be empty lines
    # - we might have lesser indented code blocks at the end
    if end is None:
        end = len(source.lines)

    if end > start + 1:
        # make sure we don't span differently indented code blocks
        # by using the BlockFinder helper used which inspect.getsource() uses itself
        block_finder = inspect.BlockFinder()
        # if we start with an indented line, put blockfinder to "started" mode
        block_finder.started = source.lines[start][0].isspace()
        it = ((x + "\n") for x in source.lines[start:end])
        try:
            for tok in tokenize.generate_tokens(lambda: next(it)):
                block_finder.tokeneater(*tok)
        except (inspect.EndOfBlock, IndentationError):
            end = block_finder.last + start
        except Exception:
            pass

    # the end might still point to a comment or empty line, correct it
    while end:
        line = source.lines[end - 1].lstrip()
        if line.startswith("#") or not line:
            end -= 1
        else:
            break
    return astnode, start, end 
Example #29
Source File: autopep8.py    From python-netsurv with MIT License 5 votes vote down vote up
def generate_tokens(self, text):
        """A stand-in for tokenize.generate_tokens()."""
        if text != self.last_text:
            string_io = io.StringIO(text)
            self.last_tokens = list(
                tokenize.generate_tokens(string_io.readline)
            )
            self.last_text = text
        return self.last_tokens 
Example #30
Source File: autopep8.py    From python-netsurv with MIT License 5 votes vote down vote up
def commented_out_code_lines(source):
    """Return line numbers of comments that are likely code.

    Commented-out code is bad practice, but modifying it just adds even
    more clutter.

    """
    line_numbers = []
    try:
        for t in generate_tokens(source):
            token_type = t[0]
            token_string = t[1]
            start_row = t[2][0]
            line = t[4]

            # Ignore inline comments.
            if not line.lstrip().startswith('#'):
                continue

            if token_type == tokenize.COMMENT:
                stripped_line = token_string.lstrip('#').strip()
                if (
                    ' ' in stripped_line and
                    '#' not in stripped_line and
                    check_syntax(stripped_line)
                ):
                    line_numbers.append(start_row)
    except (SyntaxError, tokenize.TokenError):
        pass

    return line_numbers