"""Tests looking for TODOs and prints.""" import json import os import re try: from urllib.request import urlopen except ImportError: from urllib import urlopen import pytest from pygments import lex from pygments.lexers import get_lexer_by_name @pytest.mark.skipif('"TRAVIS_REPO_SLUG" not in os.environ') def test_todo_issue_validator(): """Verify that each T.O.D.O is associated with an open GitHub issue.""" root_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) assert 'tests' in os.listdir(root_directory) generator = (os.path.join(r, s) for r, d, f in os.walk(root_directory) if '.tox' not in r for s in f if s.endswith('.py') and not s.startswith('example_')) regex_todo = re.compile(r'^(.*)(?<!\w)(TODO|FIXME)(?!\w)(.*)$', re.IGNORECASE | re.MULTILINE) # Find all potential TODOs in Python files. May or may not be in comments/docstrings. potential_todos = set() for file_path in generator: with open(file_path) as f: for line in f: if regex_todo.search(line): potential_todos.add(file_path) break if not potential_todos: return # Get all open issues. repo_slug = os.environ['TRAVIS_REPO_SLUG'] assert re.match(r'^[a-zA-Z0-9_-]+/[a-zA-Z0-9_-]+$', repo_slug) response = urlopen('https://api.github.com/repos/{0}/issues'.format(repo_slug)) raw_data = response.read().decode('utf-8') parsed_data = json.loads(raw_data) open_issues = set(['issues/{0:d}'.format(int(i.get('number'))) for i in parsed_data if i.get('state') == 'open']) # Perform lexical analysis on the source code and find all docstrings and comments with TODOs. todos_with_no_issues = dict() for file_path in potential_todos: with open(file_path) as f: code = f.read(52428800) # Up to 50 MiB. for token, code_piece in lex(code, get_lexer_by_name('Python')): if str(token) not in ('Token.Comment', 'Token.Literal.String.Doc'): continue if not regex_todo.search(code_piece): continue code_line = ''.join(b for a in regex_todo.findall(code_piece) for b in a) has_issue = bool([i for i in open_issues if i in code_line]) if has_issue: continue # This t.o.d.o has an open issue, skipping. # If this is reached, there is a t.o.d.o without an open issue! if file_path not in todos_with_no_issues: todos_with_no_issues[file_path] = list() todos_with_no_issues[file_path].append(code_line) assert not todos_with_no_issues def test_print_hunter(): """Verify that there are no print statements in the codebase.""" root_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) assert 'tests' in os.listdir(root_directory) generator = (os.path.join(r, s) for r, d, f in os.walk(root_directory) if '.egg/' not in r and '/.tox/' not in r for s in f if s.endswith('.py') and not s.startswith('example_')) regex_print = re.compile(r'^(.*)(?<!\w)print(\(|\s)(.*)$', re.MULTILINE) # Find all potential prints in Python files. May or may not be in strings. potential_prints = set() for file_path in generator: with open(file_path) as f: for line in f: if regex_print.search(line): potential_prints.add(file_path) break if not potential_prints: return # Perform lexical analysis on the source code and find all valid print statements/function calls. current_line = list() actual_prints = dict() for file_path in potential_prints: with open(file_path) as f: code = f.read(52428800) # Up to 50 MiB. for token, code_piece in lex(code, get_lexer_by_name('Python')): if code_piece == '\n': current_line = list() # References new list, doesn't necessarily remove old list. continue current_line.append(code_piece) if (str(token), code_piece) != ('Token.Keyword', 'print'): continue # If this is reached, there is a print statement in the library! if file_path not in actual_prints: actual_prints[file_path] = list() actual_prints[file_path].append(current_line) # Keeps reference to current list() alive. actual_prints = dict((f, [''.join(l) for l in lst]) for f, lst in actual_prints.items()) assert not actual_prints