#!/usr/bin/env python # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. import argparse import fileinput import os import re import shlex import subprocess import sys import bashate from bashate import messages MESSAGES = messages.MESSAGES def is_continuation(line): return re.search(r'\\\s*$', line) def check_for_do(line, report): if not is_continuation(line): match = re.match(r'^\s*(for|while|until)\s', line) if match: operator = match.group(1).strip() if operator == "for": # "for i in ..." and "for ((" is bash, but # "for (" is likely from an embedded awk script, # so skip it if re.search(r'for \([^\(]', line): return if not re.search(r';\s*do$', line): report.print_error((MESSAGES['E010'].msg % operator), line) def check_if_then(line, report): if not is_continuation(line): if re.search(r'^\s*(el)?if \[', line): if not re.search(r';\s*then$', line): report.print_error(MESSAGES['E011'].msg, line) def check_no_trailing_whitespace(line, report): if re.search(r'[ \t]+$', line): report.print_error(MESSAGES['E001'].msg, line) def check_no_long_lines(line, report, max_line_length): if len(line.rstrip("\r\n")) > max_line_length: report.print_error(MESSAGES['E006'].msg, line) def check_indents(logical_line, report): # this is rather complex to handle argument offset indenting; # primarily done by emacs. If there is an argument, it will try # to line up the following arguments underneath it, e.g. # foobar_cmd bar baz \ # moo boo # Thus the offset in this case might not be a strict multiple of 4 # Find the offset of the first argument of the command (if it has # one) m = re.search(r'^(?P<indent>[ \t]+)?(?P<cmd>\S+)(?P<ws>\s+)(?P<arg>\S+)', logical_line[0]) arg_offset = None if m: arg_offset = len(m.group('indent')) if m.group('indent') else 0 arg_offset += len(m.group('cmd')) + len(m.group('ws')) # go through each line for lineno, line in enumerate(logical_line): m = re.search(r'^(?P<indent>[ \t]+)', line) if m: # no tabs, only spaces if re.search(r'\t', m.group('indent')): report.print_error(MESSAGES['E002'].msg, line) offset = len(m.group('indent')) # the first line and lines without an argument should be # offset by 4 spaces if (lineno == 0) or (arg_offset is None): if (offset % 4) != 0: report.print_error(MESSAGES['E003'].msg, line) else: # other lines are allowed to line up with the first # argument, or be multiple-of 4 spaces if offset != arg_offset and (offset % 4) != 0: report.print_error(MESSAGES['E003'].msg, line) def check_function_decl(line, report): failed = False if line.startswith("function"): if not re.search(r'^function [\w-]* \{$', line): failed = True else: # catch the case without "function", e.g. # things like '^foo() {' if re.search(r'^\s*?\(\)\s*?\{', line): failed = True if failed: report.print_error(MESSAGES['E020'].msg, line) def starts_heredoc(line): # note, watch out for <<EOF and <<'EOF' ; quotes in the # deliminator are part of syntax m = re.search(r"[^<]<<\s*([\'\"]?)(?P<token>\w+)([\'\"]?)", line) return m.group('token') if m else False def end_of_heredoc(line, token): return token and re.search(r"^%s\s*$" % token, line) def check_arithmetic(line, report): if "$[" in line: report.print_error(MESSAGES['E041'].msg, line) def check_bare_arithmetic(line, report): if line.lstrip().startswith("(("): report.print_error(MESSAGES['E043'].msg, line) def check_local_subshell(line, report): # XXX: should we increase the string checking to see if the $( is # anywhere with a string being set? Risk of false positives?x if line.lstrip().startswith('local ') and \ any(s in line for s in ('=$(', '=`', '="$(', '="`')): report.print_error(MESSAGES['E042'].msg, line) def check_hashbang(line, filename, report): # this check only runs on the first line # maybe this should check for shell? if (not filename.endswith(".sh") and not line.startswith("#!") and not os.path.basename(filename).startswith('.')): report.print_error(MESSAGES['E005'].msg, line) def check_conditional_expression(line, report): # We're really starting to push the limits of what we can do without # a complete bash syntax parser here. For example # > [[ $foo =~ " [ " ]] && [[ $bar =~ " ] " ]] # would be valid but mess up a simple regex matcher for "[.*]". # Let alone dealing with multiple-line-spanning etc... # # So we'll KISS and just look for simple, one line, # > if [ $foo =~ "bar" ]; then # type statements, which are the vast majority of typo errors. # # shlex is pretty helpful in getting us something we can walk to # find this pattern. It does however have issues with # unterminated quotes on multi-line strings (e.g.) # # foo="bar <-- we only see this bit in "line" # baz" # # So we're just going to ignore parser failures here and move on. # Possibly in the future we could pull such multi-line strings # into "logical_line" below, and pass that here and have shlex # break that up. try: toks = shlex.shlex(line) toks.wordchars = "[]=~" toks = list(toks) except ValueError: return in_single_bracket = False for tok in toks: if tok == '[': in_single_bracket = True elif tok in ('=~', '<', '>') and in_single_bracket: report.print_error(MESSAGES['E044'].msg, line) elif tok == ']': in_single_bracket = False def check_syntax(filename, report): # run the file through "bash -n" to catch basic syntax errors and # other warnings matches = [] # sample lines we want to match: # foo.sh: line 4: warning: \ # here-document at line 1 delimited by end-of-file (wanted `EOF') # foo.sh: line 9: syntax error: unexpected end of file # foo.sh: line 7: syntax error near unexpected token `}' # # i.e. consistency with ":"'s isn't constant, so just do our # best... r = re.compile( '^(?P<file>.*): line (?P<lineno>[0-9]+): (?P<error>.*)') # we are parsing the error message, so force it to ignore the # system locale so we don't get messages in another language bash_environment = os.environ bash_environment['LC_ALL'] = 'C' proc = subprocess.Popen( ['bash', '-n', filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=bash_environment, universal_newlines=True) outputs = proc.communicate() for line in outputs[1].split('\n'): m = r.match(line) if m: matches.append(m) for m in matches: if 'syntax error' in m.group('error'): msg = '%s: %s' % (MESSAGES['E040'].msg, m.group('error')) report.print_error(msg, filename=filename, filelineno=int(m.group('lineno'))) # Matching output from bash warning about here-documents not # ending. # FIXME: are there other warnings that might come out # with "bash -n"? A quick scan of the source code suggests # no, but there might be other interesting things we could # catch. if 'warning:' in m.group('error'): if 'delimited by end-of-file' in m.group('error'): start = re.match('^.*line (?P<start>[0-9]+).*$', m.group('error')) report.print_error( MESSAGES['E012'].msg % int(start.group('start')), filename=filename, filelineno=int(m.group('lineno'))) class BashateRun(object): def __init__(self): self.error_count = 0 self.error_list = None self.ignore_list = None self.warning_count = 0 self.warning_list = None def register_ignores(self, ignores): if ignores: self.ignore_list = '^(' + '|'.join(ignores.split(',')) + ')' def register_warnings(self, warnings): if warnings: self.warning_list = '^(' + '|'.join(warnings.split(',')) + ')' def register_errors(self, errors): if errors: self.error_list = '^(' + '|'.join(errors.split(',')) + ')' def should_ignore(self, error): return self.ignore_list and re.search(self.ignore_list, error) def should_warn(self, error): # if in the errors list, overrides warning level if self.error_list and re.search(self.error_list, error): return False if messages.is_default_warning(error): return True return self.warning_list and re.search(self.warning_list, error) def print_error(self, error, line='', filename=None, filelineno=None): if self.should_ignore(error): return warn = self.should_warn(error) if not filename: filename = fileinput.filename() if not filelineno: filelineno = fileinput.filelineno() if warn: self.warning_count = self.warning_count + 1 else: self.error_count = self.error_count + 1 self.log_error(error, line, filename, filelineno, warn) def log_error(self, error, line, filename, filelineno, warn=False): # following pycodestyle/pep8 default output format # https://github.com/PyCQA/pycodestyle/blob/master/pycodestyle.py#L108 print("%(filename)s:%(filelineno)s:1: %(error)s" % {'filename': filename, 'filelineno': filelineno, 'warn': "W" if warn else "E", 'error': error.replace(":", "", 1), 'line': line.rstrip('\n')}) def check_files(self, files, verbose, max_line_length=79): logical_line = "" token = False # NOTE(mrodden): magic; replace with proper # report class when necessary report = self for fname in files: # reset world in_heredoc = False in_continuation = False # simple syntax checking, as files can pass style but still cause # syntax errors when you try to run them. check_syntax(fname, report) for line in fileinput.input(fname): if fileinput.isfirstline(): check_hashbang(line, fileinput.filename(), report) if verbose: print("Running bashate on %s" % fileinput.filename()) # Don't run any tests on comment lines (but remember # inside a heredoc this might be part of the syntax of # an embedded script, just ignore that) if line.lstrip().startswith('#') and not in_heredoc: continue # Strip trailing comments. From bash: # # a word beginning with # causes that word and all # remaining characters on that line to be ignored. # ... # A character that, when unquoted, separates # words. One of the following: | & ; ( ) < > space # tab # # for simplicity, we strip inline comments by # matching just '<space>#'. if not in_heredoc: ll_split = line.split(' #', 1) if len(ll_split) > 1: line = ll_split[0].rstrip() # see if this starts a heredoc if not in_heredoc: token = starts_heredoc(line) if token: in_heredoc = True logical_line = [line] continue # see if this starts a continuation if not in_continuation: if is_continuation(line): in_continuation = True logical_line = [line] continue # if we are in a heredoc or continuation, just loop # back and keep buffering the lines into # "logical_line" until the end of the # heredoc/continuation. if in_heredoc: logical_line.append(line) if not end_of_heredoc(line, token): continue else: in_heredoc = False # FIXME: if we want to do something with # heredocs in the future, then the whole thing # is now stored in logical_line. for now, # skip continue elif in_continuation: logical_line.append(line) if is_continuation(line): continue else: in_continuation = False else: logical_line = [line] check_indents(logical_line, report) # at this point, logical_line is an array that holds # the whole continuation. XXX : historically, we've # just handled every line in a continuation # separatley. Stick with what works... for line in logical_line: check_no_trailing_whitespace(line, report) check_no_long_lines(line, report, max_line_length) check_for_do(line, report) check_if_then(line, report) check_function_decl(line, report) check_arithmetic(line, report) check_local_subshell(line, report) check_bare_arithmetic(line, report) check_conditional_expression(line, report) # finished processing the file # last line should always end with a newline if not line.endswith('\n'): report.print_error(MESSAGES['E004'].msg, line) def main(args=None): if args is None: args = sys.argv[1:] parser = argparse.ArgumentParser( description='A bash script style checker') parser.add_argument('files', metavar='file', nargs='*', help='files to scan for errors') parser.add_argument('-i', '--ignore', help='Rules to ignore') parser.add_argument('-w', '--warn', help='Rules to always warn (rather than error)') parser.add_argument('-e', '--error', help='Rules to always error (rather than warn)') parser.add_argument('--max-line-length', default=79, type=int, help='Max line length') parser.add_argument('-v', '--verbose', action='store_true', default=False) parser.add_argument('--version', action='store_true', help='show bashate version number and exit', default=False) parser.add_argument('-s', '--show', action='store_true', default=False) opts = parser.parse_args(args) if opts.version: print("bashate: %s" % bashate.__version__) sys.exit(0) if opts.show: messages.print_messages() sys.exit(0) files = opts.files if not files: parser.print_usage() return 1 run = BashateRun() run.register_ignores(opts.ignore) run.register_warnings(opts.warn) run.register_errors(opts.error) try: run.check_files(files, opts.verbose, opts.max_line_length) except IOError as e: print("bashate: %s" % e) return 1 if run.warning_count > 0: print("%d bashate warning(s) found" % run.warning_count) if run.error_count > 0: print("%d bashate error(s) found" % run.error_count) return 1 return 0 if __name__ == "__main__": sys.exit(main())