##// END OF EJS Templates
check-code: support multiline matches like try/except/finally...
Matt Mackall -
r15281:aeeb2afc stable
parent child Browse files
Show More
@@ -1,386 +1,410 b''
1 1 #!/usr/bin/env python
2 2 #
3 3 # check-code - a style and portability checker for Mercurial
4 4 #
5 5 # Copyright 2010 Matt Mackall <mpm@selenic.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 import re, glob, os, sys
11 11 import keyword
12 12 import optparse
13 13
14 14 def repquote(m):
15 15 t = re.sub(r"\w", "x", m.group('text'))
16 t = re.sub(r"[^\sx]", "o", t)
16 t = re.sub(r"[^\s\nx]", "o", t)
17 17 return m.group('quote') + t + m.group('quote')
18 18
19 19 def reppython(m):
20 20 comment = m.group('comment')
21 21 if comment:
22 22 return "#" * len(comment)
23 23 return repquote(m)
24 24
25 25 def repcomment(m):
26 26 return m.group(1) + "#" * len(m.group(2))
27 27
28 28 def repccomment(m):
29 29 t = re.sub(r"((?<=\n) )|\S", "x", m.group(2))
30 30 return m.group(1) + t + "*/"
31 31
32 32 def repcallspaces(m):
33 33 t = re.sub(r"\n\s+", "\n", m.group(2))
34 34 return m.group(1) + t
35 35
36 36 def repinclude(m):
37 37 return m.group(1) + "<foo>"
38 38
39 39 def rephere(m):
40 40 t = re.sub(r"\S", "x", m.group(2))
41 41 return m.group(1) + t
42 42
43 43
44 44 testpats = [
45 45 [
46 46 (r'(pushd|popd)', "don't use 'pushd' or 'popd', use 'cd'"),
47 (r'\W\$?\(\([^\)]*\)\)', "don't use (()) or $(()), use 'expr'"),
47 (r'\W\$?\(\([^\)\n]*\)\)', "don't use (()) or $(()), use 'expr'"),
48 48 (r'^function', "don't use 'function', use old style"),
49 49 (r'grep.*-q', "don't use 'grep -q', redirect to /dev/null"),
50 50 (r'echo.*\\n', "don't use 'echo \\n', use printf"),
51 51 (r'echo -n', "don't use 'echo -n', use printf"),
52 52 (r'^diff.*-\w*N', "don't use 'diff -N'"),
53 (r'(^| )wc[^|]*$', "filter wc output"),
53 (r'(^| )wc[^|\n]*$', "filter wc output"),
54 54 (r'head -c', "don't use 'head -c', use 'dd'"),
55 55 (r'ls.*-\w*R', "don't use 'ls -R', use 'find'"),
56 56 (r'printf.*\\\d\d\d', "don't use 'printf \NNN', use Python"),
57 57 (r'printf.*\\x', "don't use printf \\x, use Python"),
58 58 (r'\$\(.*\)', "don't use $(expr), use `expr`"),
59 59 (r'rm -rf \*', "don't use naked rm -rf, target a directory"),
60 (r'(^|\|\s*)grep (-\w\s+)*[^|]*[(|]\w',
60 (r'(^|\|\s*)grep (-\w\s+)*[^|\n]*[(|]\w',
61 61 "use egrep for extended grep syntax"),
62 62 (r'/bin/', "don't use explicit paths for tools"),
63 63 (r'\$PWD', "don't use $PWD, use `pwd`"),
64 64 (r'[^\n]\Z', "no trailing newline"),
65 65 (r'export.*=', "don't export and assign at once"),
66 ('^([^"\']|("[^"]*")|(\'[^\']*\'))*\\^', "^ must be quoted"),
66 ('^([^"\'\n]|("[^"\n]*")|(\'[^\'\n]*\'))*\\^', "^ must be quoted"),
67 67 (r'^source\b', "don't use 'source', use '.'"),
68 68 (r'touch -d', "don't use 'touch -d', use 'touch -t' instead"),
69 (r'ls\s+[^|-]+\s+-', "options to 'ls' must come before filenames"),
70 (r'[^>]>\s*\$HGRCPATH', "don't overwrite $HGRCPATH, append to it"),
69 (r'ls\s+[^|\n-]+\s+-', "options to 'ls' must come before filenames"),
70 (r'[^>\n]>\s*\$HGRCPATH', "don't overwrite $HGRCPATH, append to it"),
71 71 (r'stop\(\)', "don't use 'stop' as a shell function name"),
72 72 ],
73 73 # warnings
74 74 []
75 75 ]
76 76
77 77 testfilters = [
78 78 (r"( *)(#([^\n]*\S)?)", repcomment),
79 79 (r"<<(\S+)((.|\n)*?\n\1)", rephere),
80 80 ]
81 81
82 82 uprefix = r"^ \$ "
83 83 uprefixc = r"^ > "
84 84 utestpats = [
85 85 [
86 (r'^(\S| $ ).*(\S\s+|^\s+)\n', "trailing whitespace on non-output"),
86 (r'^(\S| $ ).*(\S[ \t]+|^[ \t]+)\n', "trailing whitespace on non-output"),
87 87 (uprefix + r'.*\|\s*sed', "use regex test output patterns instead of sed"),
88 88 (uprefix + r'(true|exit 0)', "explicit zero exit unnecessary"),
89 89 (uprefix + r'.*\$\?', "explicit exit code checks unnecessary"),
90 90 (uprefix + r'.*\|\| echo.*(fail|error)',
91 91 "explicit exit code checks unnecessary"),
92 92 (uprefix + r'set -e', "don't use set -e"),
93 93 (uprefixc + r'( *)\t', "don't use tabs to indent"),
94 94 ],
95 95 # warnings
96 96 []
97 97 ]
98 98
99 99 for i in [0, 1]:
100 100 for p, m in testpats[i]:
101 101 if p.startswith('^'):
102 102 p = uprefix + p[1:]
103 103 else:
104 104 p = uprefix + p
105 105 utestpats[i].append((p, m))
106 106
107 107 utestfilters = [
108 108 (r"( *)(#([^\n]*\S)?)", repcomment),
109 109 ]
110 110
111 111 pypats = [
112 112 [
113 113 (r'^\s*def\s*\w+\s*\(.*,\s*\(',
114 114 "tuple parameter unpacking not available in Python 3+"),
115 115 (r'lambda\s*\(.*,.*\)',
116 116 "tuple parameter unpacking not available in Python 3+"),
117 117 (r'(?<!def)\s+(cmp)\(', "cmp is not available in Python 3+"),
118 118 (r'\breduce\s*\(.*', "reduce is not available in Python 3+"),
119 119 (r'\.has_key\b', "dict.has_key is not available in Python 3+"),
120 120 (r'^\s*\t', "don't use tabs"),
121 121 (r'\S;\s*\n', "semicolon"),
122 122 (r'\w,\w', "missing whitespace after ,"),
123 123 (r'\w[+/*\-<>]\w', "missing whitespace in expression"),
124 (r'^\s+\w+=\w+[^,)]$', "missing whitespace in assignment"),
124 (r'^\s+\w+=\w+[^,)\n]$', "missing whitespace in assignment"),
125 (r'(?m)(\s+)try:\n((?:\n|\1\s.*\n)+?)\1except.*?:\n'
126 r'((?:\n|\1\s.*\n)+?)\1finally:', 'no try/except/finally in Py2.4'),
125 127 (r'.{85}', "line too long"),
126 128 (r'[^\n]\Z', "no trailing newline"),
127 (r'(\S\s+|^\s+)\n', "trailing whitespace"),
128 # (r'^\s+[^_ ][^_. ]+_[^_]+\s*=', "don't use underbars in identifiers"),
129 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
130 # (r'^\s+[^_ \n][^_. \n]+_[^_\n]+\s*=', "don't use underbars in identifiers"),
129 131 # (r'\w*[a-z][A-Z]\w*\s*=', "don't use camelcase in identifiers"),
130 (r'^\s*(if|while|def|class|except|try)\s[^[]*:\s*[^\]#\s]+',
132 (r'^\s*(if|while|def|class|except|try)\s[^[\n]*:\s*[^\\n]#\s]+',
131 133 "linebreak after :"),
132 (r'class\s[^( ]+:', "old-style class, use class foo(object)"),
133 (r'class\s[^( ]+\(\):',
134 (r'class\s[^( \n]+:', "old-style class, use class foo(object)"),
135 (r'class\s[^( \n]+\(\):',
134 136 "class foo() not available in Python 2.4, use class foo(object)"),
135 137 (r'\b(%s)\(' % '|'.join(keyword.kwlist),
136 138 "Python keyword is not a function"),
137 139 (r',]', "unneeded trailing ',' in list"),
138 140 # (r'class\s[A-Z][^\(]*\((?!Exception)',
139 141 # "don't capitalize non-exception classes"),
140 142 # (r'in range\(', "use xrange"),
141 143 # (r'^\s*print\s+', "avoid using print in core and extensions"),
142 144 (r'[\x80-\xff]', "non-ASCII character literal"),
143 145 (r'("\')\.format\(', "str.format() not available in Python 2.4"),
144 146 (r'^\s*with\s+', "with not available in Python 2.4"),
145 147 (r'\.isdisjoint\(', "set.isdisjoint not available in Python 2.4"),
146 148 (r'^\s*except.* as .*:', "except as not available in Python 2.4"),
147 149 (r'^\s*os\.path\.relpath', "relpath not available in Python 2.4"),
148 150 (r'(?<!def)\s+(any|all|format)\(',
149 151 "any/all/format not available in Python 2.4"),
150 152 (r'(?<!def)\s+(callable)\(',
151 153 "callable not available in Python 3, use getattr(f, '__call__', None)"),
152 154 (r'if\s.*\selse', "if ... else form not available in Python 2.4"),
153 155 (r'^\s*(%s)\s\s' % '|'.join(keyword.kwlist),
154 156 "gratuitous whitespace after Python keyword"),
155 (r'([\(\[]\s\S)|(\S\s[\)\]])', "gratuitous whitespace in () or []"),
157 (r'([\(\[][ \t]\S)|(\S[ \t][\)\]])', "gratuitous whitespace in () or []"),
156 158 # (r'\s\s=', "gratuitous whitespace before ="),
157 159 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=)\S',
158 160 "missing whitespace around operator"),
159 161 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=)\s',
160 162 "missing whitespace around operator"),
161 163 (r'\s(\+=|-=|!=|<>|<=|>=|<<=|>>=)\S',
162 164 "missing whitespace around operator"),
163 165 (r'[^+=*/!<>&| -](\s=|=\s)[^= ]',
164 166 "wrong whitespace around ="),
165 167 (r'raise Exception', "don't raise generic exceptions"),
166 168 (r' is\s+(not\s+)?["\'0-9-]', "object comparison with literal"),
167 169 (r' [=!]=\s+(True|False|None)',
168 170 "comparison with singleton, use 'is' or 'is not' instead"),
169 171 (r'^\s*(while|if) [01]:',
170 172 "use True/False for constant Boolean expression"),
171 173 (r'(?<!def)\s+hasattr',
172 174 'hasattr(foo, bar) is broken, use util.safehasattr(foo, bar) instead'),
173 175 (r'opener\([^)]*\).read\(',
174 176 "use opener.read() instead"),
175 177 (r'opener\([^)]*\).write\(',
176 178 "use opener.write() instead"),
177 179 (r'[\s\(](open|file)\([^)]*\)\.read\(',
178 180 "use util.readfile() instead"),
179 181 (r'[\s\(](open|file)\([^)]*\)\.write\(',
180 182 "use util.readfile() instead"),
181 183 (r'^[\s\(]*(open(er)?|file)\([^)]*\)',
182 184 "always assign an opened file to a variable, and close it afterwards"),
183 185 (r'[\s\(](open|file)\([^)]*\)\.',
184 186 "always assign an opened file to a variable, and close it afterwards"),
185 187 (r'(?i)descendent', "the proper spelling is descendAnt"),
186 188 (r'\.debug\(\_', "don't mark debug messages for translation"),
187 189 ],
188 190 # warnings
189 191 [
190 192 (r'.{81}', "warning: line over 80 characters"),
191 193 (r'^\s*except:$', "warning: naked except clause"),
192 194 (r'ui\.(status|progress|write|note|warn)\([\'\"]x',
193 195 "warning: unwrapped ui message"),
194 196 ]
195 197 ]
196 198
197 199 pyfilters = [
198 200 (r"""(?msx)(?P<comment>\#.*?$)|
199 201 ((?P<quote>('''|\"\"\"|(?<!')'(?!')|(?<!")"(?!")))
200 202 (?P<text>(([^\\]|\\.)*?))
201 203 (?P=quote))""", reppython),
202 204 ]
203 205
204 206 cpats = [
205 207 [
206 208 (r'//', "don't use //-style comments"),
207 209 (r'^ ', "don't use spaces to indent"),
208 210 (r'\S\t', "don't use tabs except for indent"),
209 (r'(\S\s+|^\s+)\n', "trailing whitespace"),
211 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
210 212 (r'.{85}', "line too long"),
211 213 (r'(while|if|do|for)\(', "use space after while/if/do/for"),
212 214 (r'return\(', "return is not a function"),
213 215 (r' ;', "no space before ;"),
214 216 (r'\w+\* \w+', "use int *foo, not int* foo"),
215 217 (r'\([^\)]+\) \w+', "use (int)foo, not (int) foo"),
216 218 (r'\S+ (\+\+|--)', "use foo++, not foo ++"),
217 219 (r'\w,\w', "missing whitespace after ,"),
218 220 (r'^[^#]\w[+/*]\w', "missing whitespace in expression"),
219 221 (r'^#\s+\w', "use #foo, not # foo"),
220 222 (r'[^\n]\Z', "no trailing newline"),
221 223 (r'^\s*#import\b', "use only #include in standard C code"),
222 224 ],
223 225 # warnings
224 226 []
225 227 ]
226 228
227 229 cfilters = [
228 230 (r'(/\*)(((\*(?!/))|[^*])*)\*/', repccomment),
229 231 (r'''(?P<quote>(?<!")")(?P<text>([^"]|\\")+)"(?!")''', repquote),
230 232 (r'''(#\s*include\s+<)([^>]+)>''', repinclude),
231 233 (r'(\()([^)]+\))', repcallspaces),
232 234 ]
233 235
234 236 inutilpats = [
235 237 [
236 238 (r'\bui\.', "don't use ui in util"),
237 239 ],
238 240 # warnings
239 241 []
240 242 ]
241 243
242 244 inrevlogpats = [
243 245 [
244 246 (r'\brepo\.', "don't use repo in revlog"),
245 247 ],
246 248 # warnings
247 249 []
248 250 ]
249 251
250 252 checks = [
251 253 ('python', r'.*\.(py|cgi)$', pyfilters, pypats),
252 254 ('test script', r'(.*/)?test-[^.~]*$', testfilters, testpats),
253 255 ('c', r'.*\.c$', cfilters, cpats),
254 256 ('unified test', r'.*\.t$', utestfilters, utestpats),
255 257 ('layering violation repo in revlog', r'mercurial/revlog\.py', pyfilters,
256 258 inrevlogpats),
257 259 ('layering violation ui in util', r'mercurial/util\.py', pyfilters,
258 260 inutilpats),
259 261 ]
260 262
261 263 class norepeatlogger(object):
262 264 def __init__(self):
263 265 self._lastseen = None
264 266
265 267 def log(self, fname, lineno, line, msg, blame):
266 268 """print error related a to given line of a given file.
267 269
268 270 The faulty line will also be printed but only once in the case
269 271 of multiple errors.
270 272
271 273 :fname: filename
272 274 :lineno: line number
273 275 :line: actual content of the line
274 276 :msg: error message
275 277 """
276 278 msgid = fname, lineno, line
277 279 if msgid != self._lastseen:
278 280 if blame:
279 281 print "%s:%d (%s):" % (fname, lineno, blame)
280 282 else:
281 283 print "%s:%d:" % (fname, lineno)
282 284 print " > %s" % line
283 285 self._lastseen = msgid
284 286 print " " + msg
285 287
286 288 _defaultlogger = norepeatlogger()
287 289
288 290 def getblame(f):
289 291 lines = []
290 292 for l in os.popen('hg annotate -un %s' % f):
291 293 start, line = l.split(':', 1)
292 294 user, rev = start.split()
293 295 lines.append((line[1:-1], user, rev))
294 296 return lines
295 297
296 298 def checkfile(f, logfunc=_defaultlogger.log, maxerr=None, warnings=False,
297 299 blame=False, debug=False):
298 300 """checks style and portability of a given file
299 301
300 302 :f: filepath
301 303 :logfunc: function used to report error
302 304 logfunc(filename, linenumber, linecontent, errormessage)
303 305 :maxerr: number of error to display before arborting.
304 306 Set to None (default) to report all errors
305 307
306 308 return True if no error is found, False otherwise.
307 309 """
308 310 blamecache = None
309 311 result = True
310 312 for name, match, filters, pats in checks:
311 313 if debug:
312 314 print name, f
313 315 fc = 0
314 316 if not re.match(match, f):
315 317 if debug:
316 318 print "Skipping %s for %s it doesn't match %s" % (
317 319 name, match, f)
318 320 continue
319 321 fp = open(f)
320 322 pre = post = fp.read()
321 323 fp.close()
322 324 if "no-" + "check-code" in pre:
323 325 if debug:
324 326 print "Skipping %s for %s it has no- and check-code" % (
325 327 name, f)
326 328 break
327 329 for p, r in filters:
328 330 post = re.sub(p, r, post)
329 331 if warnings:
330 332 pats = pats[0] + pats[1]
331 333 else:
332 334 pats = pats[0]
333 335 # print post # uncomment to show filtered version
334 z = enumerate(zip(pre.splitlines(), post.splitlines(True)))
336
335 337 if debug:
336 338 print "Checking %s for %s" % (name, f)
337 for n, l in z:
338 if "check-code" + "-ignore" in l[0]:
339
340 prelines = None
341 errors = []
342 for p, msg in pats:
343 pos = 0
344 n = 0
345 for m in re.finditer(p, post):
346 if prelines is None:
347 prelines = pre.splitlines()
348 postlines = post.splitlines(True)
349
350 start = m.start()
351 while n < len(postlines):
352 step = len(postlines[n])
353 if pos + step > start:
354 break
355 pos += step
356 n += 1
357 l = prelines[n]
358
359 if "check-code" + "-ignore" in l:
339 360 if debug:
340 361 print "Skipping %s for %s:%s (check-code -ignore)" % (
341 362 name, f, n)
342 363 continue
343 for p, msg in pats:
344 if re.search(p, l[1]):
345 364 bd = ""
346 365 if blame:
347 366 bd = 'working directory'
348 367 if not blamecache:
349 368 blamecache = getblame(f)
350 369 if n < len(blamecache):
351 370 bl, bu, br = blamecache[n]
352 if bl == l[0]:
371 if bl == l:
353 372 bd = '%s@%s' % (bu, br)
354 logfunc(f, n + 1, l[0], msg, bd)
373 errors.append((f, n + 1, l, msg, bd))
374 result = False
375
376 errors.sort()
377 for e in errors:
378 logfunc(*e)
355 379 fc += 1
356 result = False
357 380 if maxerr is not None and fc >= maxerr:
358 381 print " (too many errors, giving up)"
359 382 break
383
360 384 return result
361 385
362 386 if __name__ == "__main__":
363 387 parser = optparse.OptionParser("%prog [options] [files]")
364 388 parser.add_option("-w", "--warnings", action="store_true",
365 389 help="include warning-level checks")
366 390 parser.add_option("-p", "--per-file", type="int",
367 391 help="max warnings per file")
368 392 parser.add_option("-b", "--blame", action="store_true",
369 393 help="use annotate to generate blame info")
370 394 parser.add_option("", "--debug", action="store_true",
371 395 help="show debug information")
372 396
373 397 parser.set_defaults(per_file=15, warnings=False, blame=False, debug=False)
374 398 (options, args) = parser.parse_args()
375 399
376 400 if len(args) == 0:
377 401 check = glob.glob("*")
378 402 else:
379 403 check = args
380 404
381 405 for f in check:
382 406 ret = 0
383 407 if not checkfile(f, maxerr=options.per_file, warnings=options.warnings,
384 408 blame=options.blame, debug=options.debug):
385 409 ret = 1
386 410 sys.exit(ret)
@@ -1,114 +1,114 b''
1 1 $ cat > correct.py <<EOF
2 2 > def toto(arg1, arg2):
3 3 > del arg2
4 4 > return (5 + 6, 9)
5 5 > EOF
6 6 $ cat > wrong.py <<EOF
7 7 > def toto( arg1, arg2):
8 8 > del(arg2)
9 9 > return ( 5+6, 9)
10 10 > EOF
11 11 $ cat > quote.py <<EOF
12 12 > # let's use quote in comments
13 13 > (''' ( 4x5 )
14 14 > but """\\''' and finally''',
15 15 > """let's fool checkpatch""", '1+2',
16 16 > '"""', 42+1, """and
17 17 > ( 4-1 ) """, "( 1+1 )\" and ")
18 18 > a, '\\\\\\\\', "\\\\\\" x-2", "c-1"
19 19 > EOF
20 20 $ cat > non-py24.py <<EOF
21 21 > # Using builtins that does not exist in Python 2.4
22 22 > if any():
23 23 > x = all()
24 24 > y = format(x)
25 25 >
26 26 > # Do not complain about our own definition
27 27 > def any(x):
28 28 > pass
29 29 > EOF
30 30 $ cat > classstyle.py <<EOF
31 31 > class newstyle_class(object):
32 32 > pass
33 33 >
34 34 > class oldstyle_class:
35 35 > pass
36 36 >
37 37 > class empty():
38 38 > pass
39 39 >
40 40 > no_class = 1:
41 41 > pass
42 42 > EOF
43 43 $ check_code="$TESTDIR"/../contrib/check-code.py
44 44 $ "$check_code" ./wrong.py ./correct.py ./quote.py ./non-py24.py ./classstyle.py
45 45 ./wrong.py:1:
46 46 > def toto( arg1, arg2):
47 47 gratuitous whitespace in () or []
48 48 ./wrong.py:2:
49 49 > del(arg2)
50 50 Python keyword is not a function
51 51 ./wrong.py:3:
52 52 > return ( 5+6, 9)
53 gratuitous whitespace in () or []
53 54 missing whitespace in expression
54 gratuitous whitespace in () or []
55 55 ./quote.py:5:
56 56 > '"""', 42+1, """and
57 57 missing whitespace in expression
58 58 ./non-py24.py:2:
59 59 > if any():
60 60 any/all/format not available in Python 2.4
61 61 ./non-py24.py:3:
62 62 > x = all()
63 63 any/all/format not available in Python 2.4
64 64 ./non-py24.py:4:
65 65 > y = format(x)
66 66 any/all/format not available in Python 2.4
67 67 ./classstyle.py:4:
68 68 > class oldstyle_class:
69 69 old-style class, use class foo(object)
70 70 ./classstyle.py:7:
71 71 > class empty():
72 72 class foo() not available in Python 2.4, use class foo(object)
73 73 [1]
74 74
75 75 $ cat > is-op.py <<EOF
76 76 > # is-operator comparing number or string literal
77 77 > x = None
78 78 > y = x is 'foo'
79 79 > y = x is "foo"
80 80 > y = x is 5346
81 81 > y = x is -6
82 82 > y = x is not 'foo'
83 83 > y = x is not "foo"
84 84 > y = x is not 5346
85 85 > y = x is not -6
86 86 > EOF
87 87
88 88 $ "$check_code" ./is-op.py
89 89 ./is-op.py:3:
90 90 > y = x is 'foo'
91 91 object comparison with literal
92 92 ./is-op.py:4:
93 93 > y = x is "foo"
94 94 object comparison with literal
95 95 ./is-op.py:5:
96 96 > y = x is 5346
97 97 object comparison with literal
98 98 ./is-op.py:6:
99 99 > y = x is -6
100 100 object comparison with literal
101 101 ./is-op.py:7:
102 102 > y = x is not 'foo'
103 103 object comparison with literal
104 104 ./is-op.py:8:
105 105 > y = x is not "foo"
106 106 object comparison with literal
107 107 ./is-op.py:9:
108 108 > y = x is not 5346
109 109 object comparison with literal
110 110 ./is-op.py:10:
111 111 > y = x is not -6
112 112 object comparison with literal
113 113 [1]
114 114
General Comments 0
You need to be logged in to leave comments. Login now