# HG changeset patch # User Matt Mackall # Date 2011-10-17 01:26:20 # Node ID aeeb2afcdc25aa0ae40ae00394f07ecd2598624c # Parent 52bc0c4daaf9ac97cb7b4018a7c79aadeb015c6a check-code: support multiline matches like try/except/finally - match one pattern at a time against entire file - find line containing match - sort matches by line number diff --git a/contrib/check-code.py b/contrib/check-code.py --- a/contrib/check-code.py +++ b/contrib/check-code.py @@ -13,7 +13,7 @@ import optparse def repquote(m): t = re.sub(r"\w", "x", m.group('text')) - t = re.sub(r"[^\sx]", "o", t) + t = re.sub(r"[^\s\nx]", "o", t) return m.group('quote') + t + m.group('quote') def reppython(m): @@ -44,30 +44,30 @@ def rephere(m): testpats = [ [ (r'(pushd|popd)', "don't use 'pushd' or 'popd', use 'cd'"), - (r'\W\$?\(\([^\)]*\)\)', "don't use (()) or $(()), use 'expr'"), + (r'\W\$?\(\([^\)\n]*\)\)', "don't use (()) or $(()), use 'expr'"), (r'^function', "don't use 'function', use old style"), (r'grep.*-q', "don't use 'grep -q', redirect to /dev/null"), (r'echo.*\\n', "don't use 'echo \\n', use printf"), (r'echo -n', "don't use 'echo -n', use printf"), (r'^diff.*-\w*N', "don't use 'diff -N'"), - (r'(^| )wc[^|]*$', "filter wc output"), + (r'(^| )wc[^|\n]*$', "filter wc output"), (r'head -c', "don't use 'head -c', use 'dd'"), (r'ls.*-\w*R', "don't use 'ls -R', use 'find'"), (r'printf.*\\\d\d\d', "don't use 'printf \NNN', use Python"), (r'printf.*\\x', "don't use printf \\x, use Python"), (r'\$\(.*\)', "don't use $(expr), use `expr`"), (r'rm -rf \*', "don't use naked rm -rf, target a directory"), - (r'(^|\|\s*)grep (-\w\s+)*[^|]*[(|]\w', + (r'(^|\|\s*)grep (-\w\s+)*[^|\n]*[(|]\w', "use egrep for extended grep syntax"), (r'/bin/', "don't use explicit paths for tools"), (r'\$PWD', "don't use $PWD, use `pwd`"), (r'[^\n]\Z', "no trailing newline"), (r'export.*=', "don't export and assign at once"), - ('^([^"\']|("[^"]*")|(\'[^\']*\'))*\\^', "^ must be quoted"), + ('^([^"\'\n]|("[^"\n]*")|(\'[^\'\n]*\'))*\\^', "^ must be quoted"), (r'^source\b', "don't use 'source', use '.'"), (r'touch -d', "don't use 'touch -d', use 'touch -t' instead"), - (r'ls\s+[^|-]+\s+-', "options to 'ls' must come before filenames"), - (r'[^>]>\s*\$HGRCPATH', "don't overwrite $HGRCPATH, append to it"), + (r'ls\s+[^|\n-]+\s+-', "options to 'ls' must come before filenames"), + (r'[^>\n]>\s*\$HGRCPATH', "don't overwrite $HGRCPATH, append to it"), (r'stop\(\)', "don't use 'stop' as a shell function name"), ], # warnings @@ -83,7 +83,7 @@ uprefix = r"^ \$ " uprefixc = r"^ > " utestpats = [ [ - (r'^(\S| $ ).*(\S\s+|^\s+)\n', "trailing whitespace on non-output"), + (r'^(\S| $ ).*(\S[ \t]+|^[ \t]+)\n', "trailing whitespace on non-output"), (uprefix + r'.*\|\s*sed', "use regex test output patterns instead of sed"), (uprefix + r'(true|exit 0)', "explicit zero exit unnecessary"), (uprefix + r'.*\$\?', "explicit exit code checks unnecessary"), @@ -121,16 +121,18 @@ pypats = [ (r'\S;\s*\n', "semicolon"), (r'\w,\w', "missing whitespace after ,"), (r'\w[+/*\-<>]\w', "missing whitespace in expression"), - (r'^\s+\w+=\w+[^,)]$', "missing whitespace in assignment"), + (r'^\s+\w+=\w+[^,)\n]$', "missing whitespace in assignment"), + (r'(?m)(\s+)try:\n((?:\n|\1\s.*\n)+?)\1except.*?:\n' + r'((?:\n|\1\s.*\n)+?)\1finally:', 'no try/except/finally in Py2.4'), (r'.{85}', "line too long"), (r'[^\n]\Z', "no trailing newline"), - (r'(\S\s+|^\s+)\n', "trailing whitespace"), -# (r'^\s+[^_ ][^_. ]+_[^_]+\s*=', "don't use underbars in identifiers"), + (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"), +# (r'^\s+[^_ \n][^_. \n]+_[^_\n]+\s*=', "don't use underbars in identifiers"), # (r'\w*[a-z][A-Z]\w*\s*=', "don't use camelcase in identifiers"), - (r'^\s*(if|while|def|class|except|try)\s[^[]*:\s*[^\]#\s]+', + (r'^\s*(if|while|def|class|except|try)\s[^[\n]*:\s*[^\\n]#\s]+', "linebreak after :"), - (r'class\s[^( ]+:', "old-style class, use class foo(object)"), - (r'class\s[^( ]+\(\):', + (r'class\s[^( \n]+:', "old-style class, use class foo(object)"), + (r'class\s[^( \n]+\(\):', "class foo() not available in Python 2.4, use class foo(object)"), (r'\b(%s)\(' % '|'.join(keyword.kwlist), "Python keyword is not a function"), @@ -152,7 +154,7 @@ pypats = [ (r'if\s.*\selse', "if ... else form not available in Python 2.4"), (r'^\s*(%s)\s\s' % '|'.join(keyword.kwlist), "gratuitous whitespace after Python keyword"), - (r'([\(\[]\s\S)|(\S\s[\)\]])', "gratuitous whitespace in () or []"), + (r'([\(\[][ \t]\S)|(\S[ \t][\)\]])', "gratuitous whitespace in () or []"), # (r'\s\s=', "gratuitous whitespace before ="), (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=)\S', "missing whitespace around operator"), @@ -206,7 +208,7 @@ cpats = [ (r'//', "don't use //-style comments"), (r'^ ', "don't use spaces to indent"), (r'\S\t', "don't use tabs except for indent"), - (r'(\S\s+|^\s+)\n', "trailing whitespace"), + (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"), (r'.{85}', "line too long"), (r'(while|if|do|for)\(', "use space after while/if/do/for"), (r'return\(', "return is not a function"), @@ -331,32 +333,54 @@ def checkfile(f, logfunc=_defaultlogger. else: pats = pats[0] # print post # uncomment to show filtered version - z = enumerate(zip(pre.splitlines(), post.splitlines(True))) + if debug: print "Checking %s for %s" % (name, f) - for n, l in z: - if "check-code" + "-ignore" in l[0]: - if debug: - print "Skipping %s for %s:%s (check-code -ignore)" % ( - name, f, n) - continue - for p, msg in pats: - if re.search(p, l[1]): - bd = "" - if blame: - bd = 'working directory' - if not blamecache: - blamecache = getblame(f) - if n < len(blamecache): - bl, bu, br = blamecache[n] - if bl == l[0]: - bd = '%s@%s' % (bu, br) - logfunc(f, n + 1, l[0], msg, bd) - fc += 1 - result = False + + prelines = None + errors = [] + for p, msg in pats: + pos = 0 + n = 0 + for m in re.finditer(p, post): + if prelines is None: + prelines = pre.splitlines() + postlines = post.splitlines(True) + + start = m.start() + while n < len(postlines): + step = len(postlines[n]) + if pos + step > start: + break + pos += step + n += 1 + l = prelines[n] + + if "check-code" + "-ignore" in l: + if debug: + print "Skipping %s for %s:%s (check-code -ignore)" % ( + name, f, n) + continue + bd = "" + if blame: + bd = 'working directory' + if not blamecache: + blamecache = getblame(f) + if n < len(blamecache): + bl, bu, br = blamecache[n] + if bl == l: + bd = '%s@%s' % (bu, br) + errors.append((f, n + 1, l, msg, bd)) + result = False + + errors.sort() + for e in errors: + logfunc(*e) + fc += 1 if maxerr is not None and fc >= maxerr: print " (too many errors, giving up)" break + return result if __name__ == "__main__": diff --git a/tests/test-check-code.t b/tests/test-check-code.t --- a/tests/test-check-code.t +++ b/tests/test-check-code.t @@ -50,8 +50,8 @@ Python keyword is not a function ./wrong.py:3: > return ( 5+6, 9) + gratuitous whitespace in () or [] missing whitespace in expression - gratuitous whitespace in () or [] ./quote.py:5: > '"""', 42+1, """and missing whitespace in expression