Python pygments.lex() Examples

The following are 27 code examples of pygments.lex(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pygments , or try the search function .
Example #1
Source File: pygments_code_block_directive.py    From rst2pdf with MIT License 6 votes vote down vote up
def __iter__(self):
        """parse code string and yield "clasified" tokens
        """
        try:
            tokens = self.lex()
        except IOError:
            log.info("Pygments lexer not found, using fallback")
            # TODO: write message to INFO
            yield ('', self.code)
            return

        for ttype, value in self.join(tokens):
            yield (_get_ttype_class(ttype), value)


# code_block_directive
# --------------------
# :: 
Example #2
Source File: utils.py    From aries-cloudagent-python with Apache License 2.0 6 votes vote down vote up
def print_lexer(
    body: str, lexer: Lexer, label: str = None, prefix: str = None, indent: int = None
):
    if COLORIZE:
        prefix_str = prefix + " " if prefix else ""
        if prefix_str or indent:
            prefix_body = prefix_str + " " * (indent or 0)
            lexer.add_filter(PrefixFilter(prefix=prefix_body))
        tokens = list(pygments.lex(body, lexer=lexer))
        if label:
            fmt_label = [("fg:ansimagenta", label)]
            if prefix_str:
                fmt_label.insert(0, ("", prefix_str))
            print_formatted(FormattedText(fmt_label))
        print_formatted(PygmentsTokens(tokens))
    else:
        print_ext(body, label=label, prefix=prefix) 
Example #3
Source File: snippets.py    From diff_cover with Apache License 2.0 6 votes vote down vote up
def _parse_src(cls, src_contents, src_filename):
        """
        Return a stream of `(token_type, value)` tuples
        parsed from `src_contents` (str)

        Uses `src_filename` to guess the type of file
        so it can highlight syntax correctly.
        """

        # Parse the source into tokens
        try:
            lexer = guess_lexer_for_filename(src_filename, src_contents)
        except ClassNotFound:
            lexer = TextLexer()

        # Ensure that we don't strip newlines from
        # the source file when lexing.
        lexer.stripnl = False

        return pygments.lex(src_contents, lexer) 
Example #4
Source File: disassembly.py    From gxf with MIT License 6 votes vote down vote up
def __init__(self, disassembly, lexer=lexer, msg=None):

        self.lines = []
        if isinstance(disassembly, list):
            self.lines = disassembly
        elif disassembly:
            line = []
            if msg:
                current_function = msg.rsplit(None, 1)[-1][:-1]
            else:
                current_function = None
            with currentfunctiontfilter.current_function(current_function):
                for ttype, value in pygments.lex(disassembly, lexer):
                    if '\n' in value:
                        self.lines.append(DisassemblyLine(line))
                        line = []
                    else:
                        line.append((ttype, value))

        self.linenos = {}
        for i, line in enumerate(self.lines):
            self.linenos[line.address] = line, i

        self.lexer = lexer
        self.msg = msg 
Example #5
Source File: code_manager.py    From stata_kernel with GNU General Public License v3.0 6 votes vote down vote up
def tokenize_first_pass(self, code):
        """Tokenize input code for Comments and Delimit blocks

        Args:
            code (str):
                Input string. Should use `\\n` for end of lines.

        Return:
            (List[Tuple[Token, str]]):
                List of token tuples. The only token types currently used in the
                lexer are:
                - Text (plain text)
                - Comment.Single (// and *)
                - Comment.Special (///)
                - Comment.Multiline (/* */)
                - Keyword.Namespace (code inside #delimit ; block)
                - Keyword.Reserved (; delimiter)
        """
        comment_lexer = CommentAndDelimitLexer(stripall=False, stripnl=False)
        return [x for x in lex(code, comment_lexer)] 
Example #6
Source File: code_manager.py    From stata_kernel with GNU General Public License v3.0 6 votes vote down vote up
def tokenize_second_pass(self, code):
        """Tokenize clean code for syntactic blocks

        Args:
            code (str):
                Input string. Should have `\\n` as the delimiter. Should have no
                comments. Should use `\\n` for end of lines.

        Return:
            (List[Tuple[Token, str]]):
                List of token tuples. Some of the token types:
                lexer are:
                - Text (plain text)
                - Comment.Single (// and *)
                - Comment.Special (///)
                - Comment.Multiline (/* */)
                - Keyword.Namespace (code inside #delimit ; block)
                - Keyword.Reserved (; delimiter)
        """
        block_lexer = StataLexer(stripall=False, stripnl=False)
        return [x for x in lex(code, block_lexer)] 
Example #7
Source File: printers.py    From fuzzowski with GNU General Public License v2.0 6 votes vote down vote up
def print_packets(path: list, nodes: dict) -> None:
    tokens = []
    for e in path[:-1]:
        node = nodes[e.dst]
        p = node.render()
        line = '{} = {}'.format(node.name.replace('-', '_'), repr(p))
        tokens.extend(list(pygments.lex(line, lexer=Python3Lexer())))

    # p = self.fuzz_node.render()
    node = nodes[path[-1].dst]
    p = node.render()
    line = '{} = {}'.format(node.name.replace('-', '_'), repr(p))

    print(pygments.highlight(line, Python3Lexer(), Terminal256Formatter(style='rrt')))

    # tokens.extend(list(pygments.lex(line, lexer=Python3Lexer())))
    # style = style_from_pygments_cls(get_style_by_name('colorful'))
    # print_formatted_text(PygmentsTokens(tokens), style=style)


# --------------------------------------------------------------- # 
Example #8
Source File: code_analyzer.py    From aws-extender with MIT License 5 votes vote down vote up
def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value) 
Example #9
Source File: pygments_code_block_directive.py    From rst2pdf with MIT License 5 votes vote down vote up
def lex(self):
        # Get lexer for language (use text as fallback)
        try:
            if self.language and str(self.language).lower() != 'none':
                lexer = get_lexer_by_name(self.language.lower(), **self.custom_args)
            else:
                lexer = get_lexer_by_name('text', **self.custom_args)
        except ValueError:
            log.info("no pygments lexer for %s, using 'text'" % self.language)
            # what happens if pygment isn't present ?
            lexer = get_lexer_by_name('text')
        return pygments.lex(self.code, lexer) 
Example #10
Source File: code_analyzer.py    From aws-builders-fair-projects with Apache License 2.0 5 votes vote down vote up
def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value) 
Example #11
Source File: printers.py    From fuzzowski with GNU General Public License v2.0 5 votes vote down vote up
def print_poc(target: Target, path: list,
              receive_data_after_each_request, receive_data_after_fuzz) -> None:
    tokens = []

    exploit_code = get_exploit_code(target, path, receive_data_after_each_request, receive_data_after_fuzz)
    print(pygments.highlight(exploit_code, Python3Lexer(), Terminal256Formatter(style='rrt')))

    # tokens.extend(list(pygments.lex(exploit_code, lexer=Python3Lexer())))
    # print_formatted_text(PygmentsTokens(tokens))

# --------------------------------------------------------------- # 
Example #12
Source File: printers.py    From fuzzowski with GNU General Public License v2.0 5 votes vote down vote up
def print_python(path: list) -> None:
    tokens = []
    block_code = path_to_python(path)
    print(pygments.highlight(block_code, Python3Lexer(), Terminal256Formatter(style='rrt')))

    # tokens.extend(list(pygments.lex(block_code, lexer=Python3Lexer())))
    # print_formatted_text(PygmentsTokens(tokens))

# --------------------------------------------------------------- # 
Example #13
Source File: lexer.py    From suplemon with MIT License 5 votes vote down vote up
def lex(self, code, lex):
        """Return tokenified code.

        Return a list of tuples (scope, word) where word is the word to be
        printed and scope the scope name representing the context.

        :param str code: Code to tokenify.
        :param lex: Lexer to use.
        :return:
        """
        if lex is None:
            if not type(code) is str:
                # if not suitable lexer is found, return decoded code
                code = code.decode("utf-8")
            return (("global", code),)

        words = pygments.lex(code, lex)

        scopes = []
        for word in words:
            token = word[0]
            scope = "global"

            if token in self.token_map.keys():
                scope = self.token_map[token]

            scopes.append((scope, word[1]))
        return scopes 
Example #14
Source File: test__meta.py    From libnl with GNU Lesser General Public License v2.1 5 votes vote down vote up
def test_print_hunter():
    """Verify that there are no print statements in the codebase."""
    root_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
    assert 'tests' in os.listdir(root_directory)
    generator = (os.path.join(r, s) for r, d, f in os.walk(root_directory) if '.egg/' not in r and '/.tox/' not in r
                 for s in f if s.endswith('.py') and not s.startswith('example_'))
    regex_print = re.compile(r'^(.*)(?<!\w)print(\(|\s)(.*)$', re.MULTILINE)

    # Find all potential prints in Python files. May or may not be in strings.
    potential_prints = set()
    for file_path in generator:
        with open(file_path) as f:
            for line in f:
                if regex_print.search(line):
                    potential_prints.add(file_path)
                    break
    if not potential_prints:
        return

    # Perform lexical analysis on the source code and find all valid print statements/function calls.
    current_line = list()
    actual_prints = dict()
    for file_path in potential_prints:
        with open(file_path) as f:
            code = f.read(52428800)  # Up to 50 MiB.
        for token, code_piece in lex(code, get_lexer_by_name('Python')):
            if code_piece == '\n':
                current_line = list()  # References new list, doesn't necessarily remove old list.
                continue
            current_line.append(code_piece)
            if (str(token), code_piece) != ('Token.Keyword', 'print'):
                continue
            # If this is reached, there is a print statement in the library!
            if file_path not in actual_prints:
                actual_prints[file_path] = list()
            actual_prints[file_path].append(current_line)  # Keeps reference to current list() alive.
    actual_prints = dict((f, [''.join(l) for l in lst]) for f, lst in actual_prints.items())
    assert not actual_prints 
Example #15
Source File: pygments-tokens.py    From python-prompt-toolkit with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def main():
    # Printing a manually constructed list of (Token, text) tuples.
    text = [
        (Token.Keyword, "print"),
        (Token.Punctuation, "("),
        (Token.Literal.String.Double, '"'),
        (Token.Literal.String.Double, "hello"),
        (Token.Literal.String.Double, '"'),
        (Token.Punctuation, ")"),
        (Token.Text, "\n"),
    ]

    print_formatted_text(PygmentsTokens(text))

    # Printing the output of a pygments lexer.
    tokens = list(pygments.lex('print("Hello")', lexer=PythonLexer()))
    print_formatted_text(PygmentsTokens(tokens))

    # With a custom style.
    style = Style.from_dict(
        {
            "pygments.keyword": "underline",
            "pygments.literal.string": "bg:#00ff00 #ffffff",
        }
    )
    print_formatted_text(PygmentsTokens(tokens), style=style) 
Example #16
Source File: test_basic_api.py    From pygments with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_bare_class_handler():
    from pygments.formatters import HtmlFormatter
    from pygments.lexers import PythonLexer
    try:
        lex('test\n', PythonLexer)
    except TypeError as e:
        assert 'lex() argument must be a lexer instance' in str(e)
    else:
        assert False, 'nothing raised'
    try:
        format([], HtmlFormatter)
    except TypeError as e:
        assert 'format() argument must be a formatter instance' in str(e)
    else:
        assert False, 'nothing raised' 
Example #17
Source File: code_analyzer.py    From faces with GNU General Public License v2.0 5 votes vote down vote up
def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value) 
Example #18
Source File: code_analyzer.py    From blackmamba with MIT License 5 votes vote down vote up
def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value) 
Example #19
Source File: code_analyzer.py    From AWS-Transit-Gateway-Demo-MultiAccount with MIT License 5 votes vote down vote up
def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value) 
Example #20
Source File: code_analyzer.py    From AWS-Transit-Gateway-Demo-MultiAccount with MIT License 5 votes vote down vote up
def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value) 
Example #21
Source File: code_analyzer.py    From cadquery-freecad-module with GNU Lesser General Public License v3.0 5 votes vote down vote up
def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value) 
Example #22
Source File: code_analyzer.py    From bash-lambda-layer with MIT License 5 votes vote down vote up
def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value) 
Example #23
Source File: code_analyzer.py    From deepWordBug with Apache License 2.0 5 votes vote down vote up
def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value) 
Example #24
Source File: code_analyzer.py    From faces with GNU General Public License v2.0 5 votes vote down vote up
def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value) 
Example #25
Source File: test__meta.py    From libnl with GNU Lesser General Public License v2.1 4 votes vote down vote up
def test_todo_issue_validator():
    """Verify that each T.O.D.O is associated with an open GitHub issue."""
    root_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
    assert 'tests' in os.listdir(root_directory)
    generator = (os.path.join(r, s) for r, d, f in os.walk(root_directory)
                 if '.tox' not in r
                 for s in f
                 if s.endswith('.py') and not s.startswith('example_'))
    regex_todo = re.compile(r'^(.*)(?<!\w)(TODO|FIXME)(?!\w)(.*)$', re.IGNORECASE | re.MULTILINE)

    # Find all potential TODOs in Python files. May or may not be in comments/docstrings.
    potential_todos = set()
    for file_path in generator:
        with open(file_path) as f:
            for line in f:
                if regex_todo.search(line):
                    potential_todos.add(file_path)
                    break
    if not potential_todos:
        return

    # Get all open issues.
    repo_slug = os.environ['TRAVIS_REPO_SLUG']
    assert re.match(r'^[a-zA-Z0-9_-]+/[a-zA-Z0-9_-]+$', repo_slug)
    response = urlopen('https://api.github.com/repos/{0}/issues'.format(repo_slug))
    raw_data = response.read().decode('utf-8')
    parsed_data = json.loads(raw_data)
    open_issues = set(['issues/{0:d}'.format(int(i.get('number'))) for i in parsed_data if i.get('state') == 'open'])

    # Perform lexical analysis on the source code and find all docstrings and comments with TODOs.
    todos_with_no_issues = dict()
    for file_path in potential_todos:
        with open(file_path) as f:
            code = f.read(52428800)  # Up to 50 MiB.
        for token, code_piece in lex(code, get_lexer_by_name('Python')):
            if str(token) not in ('Token.Comment', 'Token.Literal.String.Doc'):
                continue
            if not regex_todo.search(code_piece):
                continue
            code_line = ''.join(b for a in regex_todo.findall(code_piece) for b in a)
            has_issue = bool([i for i in open_issues if i in code_line])
            if has_issue:
                continue  # This t.o.d.o has an open issue, skipping.
            # If this is reached, there is a t.o.d.o without an open issue!
            if file_path not in todos_with_no_issues:
                todos_with_no_issues[file_path] = list()
            todos_with_no_issues[file_path].append(code_line)
    assert not todos_with_no_issues 
Example #26
Source File: bib_manager.py    From bibmanager with MIT License 4 votes vote down vote up
def display_bibs(labels, bibs, meta=False):
  r"""
  Display a list of bib entries on screen with flying colors.

  Parameters
  ----------
  labels: List of Strings
      Header labels to show above each Bib() entry.
  bibs: List of Bib() objects
      BibTeX entries to display.
  meta: Bool
      If True, also display the meta-information.

  Examples
  --------
  >>> import bibmanager.bib_manager as bm
  >>> e1 = '''@Misc{JonesEtal2001scipy,
         author = {Eric Jones and Travis Oliphant and Pearu Peterson},
         title  = {{SciPy}: Open source scientific tools for {Python}},
         year   = {2001},
       }'''
  >>> e2 = '''@Misc{Jones2001,
         author = {Eric Jones and Travis Oliphant and Pearu Peterson},
         title  = {SciPy: Open source scientific tools for Python},
         year   = {2001},
       }'''
  >>> bibs = [bm.Bib(e1), bm.Bib(e2)]
  >>> bm.display_bibs(["DATABASE:\n", "NEW:\n"], bibs)
  ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
  DATABASE:
  @Misc{JonesEtal2001scipy,
         author = {Eric Jones and Travis Oliphant and Pearu Peterson},
         title  = {{SciPy}: Open source scientific tools for {Python}},
         year   = {2001},
       }

  NEW:
  @Misc{Jones2001,
         author = {Eric Jones and Travis Oliphant and Pearu Peterson},
         title  = {SciPy: Open source scientific tools for Python},
         year   = {2001},
       }
  """
  style = prompt_toolkit.styles.style_from_pygments_cls(
              pygments.styles.get_style_by_name(cm.get('style')))
  if labels is None:
      labels = ["" for _ in bibs]
  tokens = [(Token.Comment, u.BANNER)]
  for label,bib in zip(labels, bibs):
      tokens += [(Token.Text, label)]
      if meta:
          tokens += [(Token.Comment, bib.meta())]
      tokens += list(pygments.lex(bib.content, lexer=BibTeXLexer()))
      tokens += [(Token.Text, "\n")]
  print_formatted_text(PygmentsTokens(tokens), end="", style=style,
      output=create_output(sys.stdout)) 
Example #27
Source File: __init__.py    From autostack with MIT License 4 votes vote down vote up
def print_code_block(code_block):
    '''
    Prints a code block from Stack Overflow with syntax highlighting.

    On Stack Overflow, the code in a HTML 'code' element contains
    a 'span' element for each token. Because of this, it's necessary
    to grab each of the 'code' element's 'span' elements' values to get
    the actual code.

    Parameter {bs4.Tag} code_block: 'soup' of a HTML
    'code' element from a Stack Overflow post.
    '''

    token_colors = {
        'Token.Keyword': 'blue',
        'Token.Name.Builtin.Pseudo': 'blue',
        'Token.Literal.Number.Integer': 'green',
        'Token.Literal.Number.Float': 'green',
        'Token.Comment.Single': 'green',
        'Token.Comment.Hashbang': 'green',
        'Token.Literal.String.Single': 'yellow',
        'Token.Literal.String.Double': 'yellow',
        'Token.Literal.String.Doc': 'yellow'
    }

    print('')

    # Store the code's text.
    code = get_src_code(code_block)

    # Loop over code, and highlight.
    for token, content in pygments.lex(code, PythonLexer()):
        try:
            print(
                colored(content, token_colors[str(token)]),
                end=''
            )
        except KeyError:
            print(
                content,
                end=''
            )

    print('')