Python tokenize.tokenize() Examples

The following are 30 code examples of tokenize.tokenize(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tokenize , or try the search function .
Example #1
Source File: pythondoc.py    From InternationalizationScript-iOS with MIT License 6 votes vote down vote up
def skip_decorator(self, type, token, start, end, line):
        if token == "(":
            self.decorator_parens = self.decorator_parens + 1
        elif token == ")":
            self.decorator_parens = self.decorator_parens - 1
        if self.decorator_parens or type != tokenize.NEWLINE:
            return self.skip_decorator
        return self.process_subject

    ##
    # (Token handler helper) Processes a PythonDoc comment.  This
    # method creates an "info" element based on the current comment,
    # and attaches it to the current subject element.
    #
    # @param subject_name Subject name (or None if the name is not known).
    # @param subject_elem The current subject element.
    # @return The info element.  Note that this element has already
    #     been attached to the subject element.
    # @defreturn Element 
Example #2
Source File: AutoIndent.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def run(self):
        OPENERS=('class', 'def', 'for', 'if', 'try', 'while')
        INDENT=tokenize.INDENT
        NAME=tokenize.NAME
                   
        save_tabsize = tokenize.tabsize
        tokenize.tabsize = self.tabwidth
        try:
            try:
                for (typ, token, start, end, line) in token_generator(self.readline):
                    if typ == NAME and token in OPENERS:
                        self.blkopenline = line
                    elif type == INDENT and self.blkopenline:
                        self.indentedline = line
                        break

            except (tokenize.TokenError, IndentationError):
                # since we cut off the tokenizer early, we can trigger
                # spurious errors
                pass
        finally:
            tokenize.tabsize = save_tabsize
        return self.blkopenline, self.indentedline 
Example #3
Source File: config_scope.py    From sacred with MIT License 6 votes vote down vote up
def find_doc_for(ast_entry, body_lines):
    lineno = ast_entry.lineno - 1
    line_io = io.BytesIO(body_lines[lineno].encode())
    try:
        tokens = tokenize(line_io.readline) or []
        line_comments = [t.string for t in tokens if t.type == COMMENT]

        if line_comments:
            formatted_lcs = [l[1:].strip() for l in line_comments]
            filtered_lcs = [l for l in formatted_lcs if not is_ignored(l)]
            if filtered_lcs:
                return filtered_lcs[0]
    except TokenError:
        pass

    lineno -= 1
    while lineno >= 0:
        if iscomment(body_lines[lineno]):
            comment = body_lines[lineno].strip("# ")
            if not is_ignored(comment):
                return comment
        if not body_lines[lineno].strip() == "":
            return None
        lineno -= 1
    return None 
Example #4
Source File: gftools-rangify.py    From gftools with Apache License 2.0 6 votes vote down vote up
def main():
  if len(sys.argv) != 2:
    sys.exit("Usage: rangify <nam file>")

  codepoints_data = list(tokenize.tokenize(open(sys.argv[1], 'rb').readline))
  codepoints = get_codepoints(codepoints_data)
  codepoints.sort()

  seqs = []
  seq = (None,)
  for cp in codepoints:
    if seq[0] is None:
      seq = (cp,cp)
    elif seq[1] == cp - 1:
      seq = (seq[0], cp)
    else:
      seqs.append(seq)
      seq = (None,)

  for seq in seqs:
    print(seq) 
Example #5
Source File: reindent.py    From D-VAE with MIT License 6 votes vote down vote up
def __init__(self, f):
        self.find_stmt = 1  # next token begins a fresh stmt?
        self.level = 0      # current indent level

        # Raw file lines.
        self.raw = f.readlines()

        # File lines, rstripped & tab-expanded.  Dummy at start is so
        # that we can use tokenize's 1-based line numbering easily.
        # Note that a line is all-blank iff it's "\n".
        self.lines = [_rstrip(line).expandtabs() + "\n"
                      for line in self.raw]
        self.lines.insert(0, None)
        self.index = 1  # index into self.lines of next line

        # List of (lineno, indentlevel) pairs, one for each stmt and
        # comment line.  indentlevel is -1 for comment lines, as a
        # signal that tokenize doesn't know what to do about them;
        # indeed, they're our headache!
        self.stats = [] 
Example #6
Source File: pygettext.py    From oss-ftp with MIT License 6 votes vote down vote up
def __waiting(self, ttype, tstring, lineno):
        opts = self.__options
        # Do docstring extractions, if enabled
        if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
            # module docstring?
            if self.__freshmodule:
                if ttype == tokenize.STRING:
                    self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
                    self.__freshmodule = 0
                elif ttype not in (tokenize.COMMENT, tokenize.NL):
                    self.__freshmodule = 0
                return
            # class docstring?
            if ttype == tokenize.NAME and tstring in ('class', 'def'):
                self.__state = self.__suiteseen
                return
        if ttype == tokenize.NAME and tstring in opts.keywords:
            self.__state = self.__keywordseen 
Example #7
Source File: pygettext.py    From oss-ftp with MIT License 6 votes vote down vote up
def __openseen(self, ttype, tstring, lineno):
        if ttype == tokenize.OP and tstring == ')':
            # We've seen the last of the translatable strings.  Record the
            # line number of the first line of the strings and update the list
            # of messages seen.  Reset state for the next batch.  If there
            # were no strings inside _(), then just ignore this entry.
            if self.__data:
                self.__addentry(EMPTYSTRING.join(self.__data))
            self.__state = self.__waiting
        elif ttype == tokenize.STRING:
            self.__data.append(safe_eval(tstring))
        elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
                           token.NEWLINE, tokenize.NL]:
            # warn if we see anything else than STRING or whitespace
            print >> sys.stderr, _(
                '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
                ) % {
                'token': tstring,
                'file': self.__curfile,
                'lineno': self.__lineno
                }
            self.__state = self.__waiting 
Example #8
Source File: evaluate.py    From python_autocomplete with MIT License 6 votes vote down vote up
def __init__(self, model, lstm_layers, lstm_size):
        self.__model = model

        # Initial state
        self._h0 = torch.zeros((lstm_layers, 1, lstm_size), device=device)
        self._c0 = torch.zeros((lstm_layers, 1, lstm_size), device=device)

        # Last line of source code read
        self._last_line = ""

        self._tokens: List[tokenize.TokenInfo] = []

        # Last token, because we need to input that to the model for inference
        self._last_token = 0

        # Last bit of the input string
        self._untokenized = ""

        # For timing
        self.time_add = 0
        self.time_predict = 0
        self.time_check = 0 
Example #9
Source File: reader.py    From py2nb with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def read(filename):
    """
    Read a regular Python file with special formatting and performance
    preprocessing on it.  The result is a string that conforms to the IPython
    notebook version 3 python script format.
    """
    with open(filename, 'rb') as fin:
        token_gen = _generate_tokens(fin.readline)
        cvt_docstr_gen = convert_toplevel_docstring(token_gen)
        nl_gen = fix_newlines(cvt_docstr_gen)
        out = list(nl_gen)

    formatted = tokenize.untokenize(out).decode('utf-8')
    return fix_empty_lines(formatted)


# =============================================================================
#                                   Helpers
# ============================================================================= 
Example #10
Source File: reader.py    From py2nb with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def convert_toplevel_docstring(tokens):
    for token in tokens:
        # For each string
        if token.type == tokenize.STRING:
            text = token.string
            # Must be a docstring
            if text.startswith('"""') or text.startswith("'''"):
                startline, startcol = token.start
                # Starting column MUST be 0
                if startcol == 0:
                    endline, endcol = token.end
                    lines = ['# ' + line
                             for line in text.strip('"\' \n').split('\n')]
                    text = '\n'.join(lines)
                    fmt = '# <markdowncell>\n{0}\n# <codecell>'.format(text)
                    yield TokenInfo(type=tokenize.COMMENT,
                                    start=(startline, startcol),
                                    end=(endline, endcol),
                                    string=fmt,
                                    line='#')
                    # To next token
                    continue
        # Return untouched
        yield token 
Example #11
Source File: reindent.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def __init__(self, f):
        self.find_stmt = 1  # next token begins a fresh stmt?
        self.level = 0      # current indent level

        # Raw file lines.
        self.raw = f.readlines()

        # File lines, rstripped & tab-expanded.  Dummy at start is so
        # that we can use tokenize's 1-based line numbering easily.
        # Note that a line is all-blank iff it's "\n".
        self.lines = [_rstrip(line).expandtabs() + "\n"
                      for line in self.raw]
        self.lines.insert(0, None)
        self.index = 1  # index into self.lines of next line

        # List of (lineno, indentlevel) pairs, one for each stmt and
        # comment line.  indentlevel is -1 for comment lines, as a
        # signal that tokenize doesn't know what to do about them;
        # indeed, they're our headache!
        self.stats = [] 
Example #12
Source File: split.py    From flynt with MIT License 6 votes vote down vote up
def get_chunks(code) -> Generator[Chunk, None, None]:
    g = tokenize.tokenize(io.BytesIO(code.encode("utf-8")).readline)
    chunk = Chunk()

    try:
        for item in g:
            t = PyToken(item)
            reuse = chunk.append(t)

            if chunk.complete:

                yield chunk
                chunk = Chunk()
                if reuse:
                    reuse = chunk.append(t)
                    # assert not reuse
                    if chunk.complete:
                        yield chunk
                        chunk = Chunk()

        yield chunk
    except tokenize.TokenError as e:
        if state.verbose:
            traceback.print_exc()
            print(e) 
Example #13
Source File: aot.py    From learn_python3_spider with MIT License 6 votes vote down vote up
def indentify(s):
    out = []
    stack = []
    l = ['', s]
    for (tokenType, tokenString, (startRow, startColumn),
         (endRow, endColumn), logicalLine) in tokenize(l.pop):
        if tokenString in ['[', '(', '{']:
            stack.append(tokenString)
        elif tokenString in [']', ')', '}']:
            stack.pop()
        if tokenString == '\0':
            out.append('  '*len(stack))
        else:
            out.append(tokenString)
    return ''.join(out)



###########
# Unjelly #
########### 
Example #14
Source File: recipe-491274.py    From code with MIT License 6 votes vote down vote up
def format(self):
        """ Parse and send the colored source.
        """
        # store line offsets in self.lines
        self.lines = [0, 0]
        pos = 0
        while 1:
            pos = string.find(self.raw, '\n', pos) + 1
            if not pos: break
            self.lines.append(pos)
        self.lines.append(len(self.raw))

        # parse the source and write it
        self.pos = 0
        text = cStringIO.StringIO(self.raw)
        self.out.write('<pre class="code">\n')
        try:
            tokenize.tokenize(text.readline, self)
        except tokenize.TokenError, ex:
            msg = ex[0]
            line = ex[1][0]
            self.out.write("<h3>ERROR: %s</h3>%s\n" % (
                msg, self.raw[self.lines[line]:])) 
Example #15
Source File: pythondoc.py    From InternationalizationScript-iOS with MIT License 6 votes vote down vote up
def handle_token(self, *args):
        # dispatch incoming tokens to the current handler
        if DEBUG > 1:
            print self.handler.im_func.func_name, self.indent,
            print tokenize.tok_name[args[0]], repr(args[1])
        if args[0] == tokenize.DEDENT:
            self.indent = self.indent - 1
            while self.scope and self.scope[-1][0] >= self.indent:
                del self.scope[-1]
                del self.stack[-1]
        self.handler = apply(self.handler, args)
        if args[0] == tokenize.INDENT:
            self.indent = self.indent + 1

    ##
    # (Token handler) Scans for encoding directive. 
Example #16
Source File: pythondoc.py    From InternationalizationScript-iOS with MIT License 6 votes vote down vote up
def look_for_pythondoc(self, type, token, start, end, line):
        if type == tokenize.COMMENT and string.rstrip(token) == "##":
            # found a comment: set things up for comment processing
            self.comment_start = start
            self.comment = []
            return self.process_comment_body
        else:
            # deal with "bare" subjects
            if token == "def" or token == "class":
                self.subject_indent = self.indent
                self.subject_parens = 0
                self.subject_start = self.comment_start = None
                self.subject = []
                return self.process_subject(type, token, start, end, line)
            return self.look_for_pythondoc

    ##
    # (Token handler) Processes a comment body.  This handler adds
    # comment lines to the current comment. 
Example #17
Source File: pythondoc.py    From InternationalizationScript-iOS with MIT License 6 votes vote down vote up
def handle_token(self, *args):
        # dispatch incoming tokens to the current handler
        if DEBUG > 1:
            print self.handler.im_func.func_name, self.indent,
            print tokenize.tok_name[args[0]], repr(args[1])
        if args[0] == tokenize.DEDENT:
            self.indent = self.indent - 1
            while self.scope and self.scope[-1][0] >= self.indent:
                del self.scope[-1]
                del self.stack[-1]
        self.handler = apply(self.handler, args)
        if args[0] == tokenize.INDENT:
            self.indent = self.indent + 1

    ##
    # (Token handler) Scans for encoding directive. 
Example #18
Source File: pythondoc.py    From InternationalizationScript-iOS with MIT License 6 votes vote down vote up
def look_for_pythondoc(self, type, token, start, end, line):
        if type == tokenize.COMMENT and string.rstrip(token) == "##":
            # found a comment: set things up for comment processing
            self.comment_start = start
            self.comment = []
            return self.process_comment_body
        else:
            # deal with "bare" subjects
            if token == "def" or token == "class":
                self.subject_indent = self.indent
                self.subject_parens = 0
                self.subject_start = self.comment_start = None
                self.subject = []
                return self.process_subject(type, token, start, end, line)
            return self.look_for_pythondoc

    ##
    # (Token handler) Processes a comment body.  This handler adds
    # comment lines to the current comment. 
Example #19
Source File: reader.py    From py2nb with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fix_newlines(tokens):
    first = True
    curline = 1
    for token in tokens:
        if first:
            first = False
            curline = token.end[0] + 1
        else:
            # Fill NEWLINE token in between
            while curline < token.start[0]:
                yield TokenInfo(type=tokenize.NEWLINE,
                                string='\n',
                                start=(curline, 0),
                                end=(curline, 0),
                                line='\n', )
                curline += 1

            curline = token.end[0] + 1
        yield token 
Example #20
Source File: aot.py    From Safejumper-for-Desktop with GNU General Public License v2.0 6 votes vote down vote up
def indentify(s):
    out = []
    stack = []
    l = ['', s]
    for (tokenType, tokenString, (startRow, startColumn),
         (endRow, endColumn), logicalLine) in tokenize(l.pop):
        if tokenString in ['[', '(', '{']:
            stack.append(tokenString)
        elif tokenString in [']', ')', '}']:
            stack.pop()
        if tokenString == '\0':
            out.append('  '*len(stack))
        else:
            out.append(tokenString)
    return ''.join(out)



###########
# Unjelly #
########### 
Example #21
Source File: inspect.py    From ironpython2 with Apache License 2.0 5 votes vote down vote up
def tokeneater(self, type, token, srow_scol, erow_ecol, line):
        srow, scol = srow_scol
        erow, ecol = erow_ecol
        if not self.started:
            # look for the first "def", "class" or "lambda"
            if token in ("def", "class", "lambda"):
                if token == "lambda":
                    self.islambda = True
                self.started = True
            self.passline = True    # skip to the end of the line
        elif type == tokenize.NEWLINE:
            self.passline = False   # stop skipping when a NEWLINE is seen
            self.last = srow
            if self.islambda:       # lambdas always end at the first NEWLINE
                raise EndOfBlock
        elif self.passline:
            pass
        elif type == tokenize.INDENT:
            self.indent = self.indent + 1
            self.passline = True
        elif type == tokenize.DEDENT:
            self.indent = self.indent - 1
            # the end of matching indent/dedent pairs end a block
            # (note that this only works for "def"/"class" blocks,
            #  not e.g. for "if: else:" or "try: finally:" blocks)
            if self.indent <= 0:
                raise EndOfBlock
        elif self.indent == 0 and type not in (tokenize.COMMENT, tokenize.NL):
            # any other token on the same indentation level end the previous
            # block as well, except the pseudo-tokens COMMENT and NL.
            raise EndOfBlock 
Example #22
Source File: inspect.py    From meddle with MIT License 5 votes vote down vote up
def getblock(lines):
    """Extract the block of code at the top of the given list of lines."""
    blockfinder = BlockFinder()
    try:
        tokenize.tokenize(iter(lines).next, blockfinder.tokeneater)
    except (EndOfBlock, IndentationError):
        pass
    return lines[:blockfinder.last] 
Example #23
Source File: reader.py    From py2nb with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _generate_tokens(readline):
        return map(lambda x: TokenInfo(*x), tokenize.generate_tokens(readline)) 
Example #24
Source File: reindent.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def getline(self):
        if self.index >= len(self.lines):
            line = ""
        else:
            line = self.lines[self.index]
            self.index += 1
        return line

    # Line-eater for tokenize. 
Example #25
Source File: reindent.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def tokeneater(self, type, token, pos, end, line,
                   INDENT=tokenize.INDENT,
                   DEDENT=tokenize.DEDENT,
                   NEWLINE=tokenize.NEWLINE,
                   COMMENT=tokenize.COMMENT,
                   NL=tokenize.NL):
        sline, scol = pos
        if type == NEWLINE:
            # A program statement, or ENDMARKER, will eventually follow,
            # after some (possibly empty) run of tokens of the form
            #     (NL | COMMENT)* (INDENT | DEDENT+)?
            self.find_stmt = 1

        elif type == INDENT:
            self.find_stmt = 1
            self.level += 1

        elif type == DEDENT:
            self.find_stmt = 1
            self.level -= 1

        elif type == COMMENT:
            if self.find_stmt:
                self.stats.append((sline, -1))
                # but we're still looking for a new stmt, so leave
                # find_stmt alone

        elif type == NL:
            pass

        elif self.find_stmt:
            # This is the first "real token" following a NEWLINE, so it
            # must be the first token of the next program statement, or an
            # ENDMARKER.
            self.find_stmt = 0
            if line:   # not endmarker
                self.stats.append((sline, self.level))

# Count number of leading blanks. 
Example #26
Source File: inspect.py    From Fluid-Designer with GNU General Public License v3.0 5 votes vote down vote up
def getblock(lines):
    """Extract the block of code at the top of the given list of lines."""
    blockfinder = BlockFinder()
    try:
        tokens = tokenize.generate_tokens(iter(lines).__next__)
        for _token in tokens:
            blockfinder.tokeneater(*_token)
    except (EndOfBlock, IndentationError):
        pass
    return lines[:blockfinder.last] 
Example #27
Source File: inspect.py    From Imogen with MIT License 5 votes vote down vote up
def getblock(lines):
    """Extract the block of code at the top of the given list of lines."""
    blockfinder = BlockFinder()
    try:
        tokens = tokenize.generate_tokens(iter(lines).__next__)
        for _token in tokens:
            blockfinder.tokeneater(*_token)
    except (EndOfBlock, IndentationError):
        pass
    return lines[:blockfinder.last] 
Example #28
Source File: manager.py    From bandit with Apache License 2.0 5 votes vote down vote up
def _parse_file(self, fname, fdata, new_files_list):
        try:
            # parse the current file
            data = fdata.read()
            lines = data.splitlines()
            self.metrics.begin(fname)
            self.metrics.count_locs(lines)
            if self.ignore_nosec:
                nosec_lines = set()
            else:
                try:
                    fdata.seek(0)
                    if six.PY2:
                        tokens = tokenize.generate_tokens(fdata.readline)
                    else:
                        tokens = tokenize.tokenize(fdata.readline)
                    nosec_lines = set(
                        lineno for toktype, tokval, (lineno, _), _, _ in tokens
                        if toktype == tokenize.COMMENT and
                        '#nosec' in tokval or '# nosec' in tokval)
                except tokenize.TokenError:
                    nosec_lines = set()
            score = self._execute_ast_visitor(fname, data, nosec_lines)
            self.scores.append(score)
            self.metrics.count_issues([score, ])
        except KeyboardInterrupt:
            sys.exit(2)
        except SyntaxError:
            self.skipped.append((fname,
                                 "syntax error while parsing AST from file"))
            new_files_list.remove(fname)
        except Exception as e:
            LOG.error("Exception occurred when executing tests against "
                      "%s. Run \"bandit --debug %s\" to see the full "
                      "traceback.", fname, fname)
            self.skipped.append((fname, 'exception while scanning file'))
            new_files_list.remove(fname)
            LOG.debug("  Exception string: %s", e)
            LOG.debug("  Exception traceback: %s", traceback.format_exc()) 
Example #29
Source File: tokenizer.py    From python_autocomplete with MIT License 5 votes vote down vote up
def parse_string(content: str) -> List[ParsedToken]:
    """
    Encode source code
    """
    g = tokenize.tokenize(BytesIO(content.encode('utf-8')).readline)

    return parse(g) 
Example #30
Source File: reindent.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def write(self, f):
        f.writelines(self.after)

    # Line-getter for tokenize.