##// END OF EJS Templates
check-code: compile filters when loading
Simon Heimberg -
r19309:7d77fa1c default
parent child Browse files
Show More
@@ -1,495 +1,498 b''
1 1 #!/usr/bin/env python
2 2 #
3 3 # check-code - a style and portability checker for Mercurial
4 4 #
5 5 # Copyright 2010 Matt Mackall <mpm@selenic.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 import re, glob, os, sys
11 11 import keyword
12 12 import optparse
13 13
14 14 def repquote(m):
15 15 t = re.sub(r"\w", "x", m.group('text'))
16 16 t = re.sub(r"[^\s\nx]", "o", t)
17 17 return m.group('quote') + t + m.group('quote')
18 18
19 19 def reppython(m):
20 20 comment = m.group('comment')
21 21 if comment:
22 22 l = len(comment.rstrip())
23 23 return "#" * l + comment[l:]
24 24 return repquote(m)
25 25
26 26 def repcomment(m):
27 27 return m.group(1) + "#" * len(m.group(2))
28 28
29 29 def repccomment(m):
30 30 t = re.sub(r"((?<=\n) )|\S", "x", m.group(2))
31 31 return m.group(1) + t + "*/"
32 32
33 33 def repcallspaces(m):
34 34 t = re.sub(r"\n\s+", "\n", m.group(2))
35 35 return m.group(1) + t
36 36
37 37 def repinclude(m):
38 38 return m.group(1) + "<foo>"
39 39
40 40 def rephere(m):
41 41 t = re.sub(r"\S", "x", m.group(2))
42 42 return m.group(1) + t
43 43
44 44
45 45 testpats = [
46 46 [
47 47 (r'pushd|popd', "don't use 'pushd' or 'popd', use 'cd'"),
48 48 (r'\W\$?\(\([^\)\n]*\)\)', "don't use (()) or $(()), use 'expr'"),
49 49 (r'grep.*-q', "don't use 'grep -q', redirect to /dev/null"),
50 50 (r'sed.*-i', "don't use 'sed -i', use a temporary file"),
51 51 (r'\becho\b.*\\n', "don't use 'echo \\n', use printf"),
52 52 (r'echo -n', "don't use 'echo -n', use printf"),
53 53 (r'(^| )wc[^|]*$\n(?!.*\(re\))', "filter wc output"),
54 54 (r'head -c', "don't use 'head -c', use 'dd'"),
55 55 (r'sha1sum', "don't use sha1sum, use $TESTDIR/md5sum.py"),
56 56 (r'ls.*-\w*R', "don't use 'ls -R', use 'find'"),
57 57 (r'printf.*\\([1-9]|0\d)', "don't use 'printf \NNN', use Python"),
58 58 (r'printf.*\\x', "don't use printf \\x, use Python"),
59 59 (r'\$\(.*\)', "don't use $(expr), use `expr`"),
60 60 (r'rm -rf \*', "don't use naked rm -rf, target a directory"),
61 61 (r'(^|\|\s*)grep (-\w\s+)*[^|]*[(|]\w',
62 62 "use egrep for extended grep syntax"),
63 63 (r'/bin/', "don't use explicit paths for tools"),
64 64 (r'[^\n]\Z', "no trailing newline"),
65 65 (r'export.*=', "don't export and assign at once"),
66 66 (r'^source\b', "don't use 'source', use '.'"),
67 67 (r'touch -d', "don't use 'touch -d', use 'touch -t' instead"),
68 68 (r'ls +[^|\n-]+ +-', "options to 'ls' must come before filenames"),
69 69 (r'[^>\n]>\s*\$HGRCPATH', "don't overwrite $HGRCPATH, append to it"),
70 70 (r'^stop\(\)', "don't use 'stop' as a shell function name"),
71 71 (r'(\[|\btest\b).*-e ', "don't use 'test -e', use 'test -f'"),
72 72 (r'^alias\b.*=', "don't use alias, use a function"),
73 73 (r'if\s*!', "don't use '!' to negate exit status"),
74 74 (r'/dev/u?random', "don't use entropy, use /dev/zero"),
75 75 (r'do\s*true;\s*done', "don't use true as loop body, use sleep 0"),
76 76 (r'^( *)\t', "don't use tabs to indent"),
77 77 (r'sed (-e )?\'(\d+|/[^/]*/)i(?!\\\n)',
78 78 "put a backslash-escaped newline after sed 'i' command"),
79 79 ],
80 80 # warnings
81 81 [
82 82 (r'^function', "don't use 'function', use old style"),
83 83 (r'^diff.*-\w*N', "don't use 'diff -N'"),
84 84 (r'\$PWD|\${PWD}', "don't use $PWD, use `pwd`"),
85 85 (r'^([^"\'\n]|("[^"\n]*")|(\'[^\'\n]*\'))*\^', "^ must be quoted"),
86 86 (r'kill (`|\$\()', "don't use kill, use killdaemons.py")
87 87 ]
88 88 ]
89 89
90 90 testfilters = [
91 91 (r"( *)(#([^\n]*\S)?)", repcomment),
92 92 (r"<<(\S+)((.|\n)*?\n\1)", rephere),
93 93 ]
94 94
95 95 winglobmsg = "use (glob) to match Windows paths too"
96 96 uprefix = r"^ \$ "
97 97 utestpats = [
98 98 [
99 99 (r'^(\S.*|| [$>] .*)[ \t]\n', "trailing whitespace on non-output"),
100 100 (uprefix + r'.*\|\s*sed[^|>\n]*\n',
101 101 "use regex test output patterns instead of sed"),
102 102 (uprefix + r'(true|exit 0)', "explicit zero exit unnecessary"),
103 103 (uprefix + r'.*(?<!\[)\$\?', "explicit exit code checks unnecessary"),
104 104 (uprefix + r'.*\|\| echo.*(fail|error)',
105 105 "explicit exit code checks unnecessary"),
106 106 (uprefix + r'set -e', "don't use set -e"),
107 107 (uprefix + r'\s', "don't indent commands, use > for continued lines"),
108 108 (r'^ saved backup bundle to \$TESTTMP.*\.hg$', winglobmsg),
109 109 (r'^ changeset .* references (corrupted|missing) \$TESTTMP/.*[^)]$',
110 110 winglobmsg),
111 111 (r'^ pulling from \$TESTTMP/.*[^)]$', winglobmsg, '\$TESTTMP/unix-repo$'),
112 112 (r'^ reverting .*/.*[^)]$', winglobmsg, '\$TESTTMP/unix-repo$'),
113 113 (r'^ cloning subrepo \S+/.*[^)]$', winglobmsg, '\$TESTTMP/unix-repo$'),
114 114 (r'^ pushing to \$TESTTMP/.*[^)]$', winglobmsg, '\$TESTTMP/unix-repo$'),
115 115 (r'^ pushing subrepo \S+/\S+ to.*[^)]$', winglobmsg,
116 116 '\$TESTTMP/unix-repo$'),
117 117 (r'^ moving \S+/.*[^)]$', winglobmsg),
118 118 (r'^ no changes made to subrepo since.*/.*[^)]$',
119 119 winglobmsg, '\$TESTTMP/unix-repo$'),
120 120 (r'^ .*: largefile \S+ not available from file:.*/.*[^)]$',
121 121 winglobmsg, '\$TESTTMP/unix-repo$'),
122 122 ],
123 123 # warnings
124 124 [
125 125 (r'^ [^*?/\n]* \(glob\)$',
126 126 "warning: glob match with no glob character (?*/)"),
127 127 ]
128 128 ]
129 129
130 130 for i in [0, 1]:
131 131 for p, m in testpats[i]:
132 132 if p.startswith(r'^'):
133 133 p = r"^ [$>] (%s)" % p[1:]
134 134 else:
135 135 p = r"^ [$>] .*(%s)" % p
136 136 utestpats[i].append((p, m))
137 137
138 138 utestfilters = [
139 139 (r"<<(\S+)((.|\n)*?\n > \1)", rephere),
140 140 (r"( *)(#([^\n]*\S)?)", repcomment),
141 141 ]
142 142
143 143 pypats = [
144 144 [
145 145 (r'^\s*def\s*\w+\s*\(.*,\s*\(',
146 146 "tuple parameter unpacking not available in Python 3+"),
147 147 (r'lambda\s*\(.*,.*\)',
148 148 "tuple parameter unpacking not available in Python 3+"),
149 149 (r'(?<!def)\s+(cmp)\(', "cmp is not available in Python 3+"),
150 150 (r'\breduce\s*\(.*', "reduce is not available in Python 3+"),
151 151 (r'\.has_key\b', "dict.has_key is not available in Python 3+"),
152 152 (r'\s<>\s', '<> operator is not available in Python 3+, use !='),
153 153 (r'^\s*\t', "don't use tabs"),
154 154 (r'\S;\s*\n', "semicolon"),
155 155 (r'[^_]_\("[^"]+"\s*%', "don't use % inside _()"),
156 156 (r"[^_]_\('[^']+'\s*%", "don't use % inside _()"),
157 157 (r'(\w|\)),\w', "missing whitespace after ,"),
158 158 (r'(\w|\))[+/*\-<>]\w', "missing whitespace in expression"),
159 159 (r'^\s+(\w|\.)+=\w[^,()\n]*$', "missing whitespace in assignment"),
160 160 (r'(\s+)try:\n((?:\n|\1\s.*\n)+?)\1except.*?:\n'
161 161 r'((?:\n|\1\s.*\n)+?)\1finally:', 'no try/except/finally in Python 2.4'),
162 162 (r'(\s+)try:\n((?:\n|\1\s.*\n)*?)\1\s*yield\b.*?'
163 163 r'((?:\n|\1\s.*\n)+?)\1finally:',
164 164 'no yield inside try/finally in Python 2.4'),
165 165 (r'.{81}', "line too long"),
166 166 (r' x+[xo][\'"]\n\s+[\'"]x', 'string join across lines with no space'),
167 167 (r'[^\n]\Z', "no trailing newline"),
168 168 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
169 169 # (r'^\s+[^_ \n][^_. \n]+_[^_\n]+\s*=',
170 170 # "don't use underbars in identifiers"),
171 171 (r'^\s+(self\.)?[A-za-z][a-z0-9]+[A-Z]\w* = ',
172 172 "don't use camelcase in identifiers"),
173 173 (r'^\s*(if|while|def|class|except|try)\s[^[\n]*:\s*[^\\n]#\s]+',
174 174 "linebreak after :"),
175 175 (r'class\s[^( \n]+:', "old-style class, use class foo(object)"),
176 176 (r'class\s[^( \n]+\(\):',
177 177 "class foo() not available in Python 2.4, use class foo(object)"),
178 178 (r'\b(%s)\(' % '|'.join(keyword.kwlist),
179 179 "Python keyword is not a function"),
180 180 (r',]', "unneeded trailing ',' in list"),
181 181 # (r'class\s[A-Z][^\(]*\((?!Exception)',
182 182 # "don't capitalize non-exception classes"),
183 183 # (r'in range\(', "use xrange"),
184 184 # (r'^\s*print\s+', "avoid using print in core and extensions"),
185 185 (r'[\x80-\xff]', "non-ASCII character literal"),
186 186 (r'("\')\.format\(', "str.format() not available in Python 2.4"),
187 187 (r'^\s*with\s+', "with not available in Python 2.4"),
188 188 (r'\.isdisjoint\(', "set.isdisjoint not available in Python 2.4"),
189 189 (r'^\s*except.* as .*:', "except as not available in Python 2.4"),
190 190 (r'^\s*os\.path\.relpath', "relpath not available in Python 2.4"),
191 191 (r'(?<!def)\s+(any|all|format)\(',
192 192 "any/all/format not available in Python 2.4"),
193 193 (r'(?<!def)\s+(callable)\(',
194 194 "callable not available in Python 3, use getattr(f, '__call__', None)"),
195 195 (r'if\s.*\selse', "if ... else form not available in Python 2.4"),
196 196 (r'^\s*(%s)\s\s' % '|'.join(keyword.kwlist),
197 197 "gratuitous whitespace after Python keyword"),
198 198 (r'([\(\[][ \t]\S)|(\S[ \t][\)\]])', "gratuitous whitespace in () or []"),
199 199 # (r'\s\s=', "gratuitous whitespace before ="),
200 200 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
201 201 "missing whitespace around operator"),
202 202 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\s',
203 203 "missing whitespace around operator"),
204 204 (r'\s(\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
205 205 "missing whitespace around operator"),
206 206 (r'[^^+=*/!<>&| %-](\s=|=\s)[^= ]',
207 207 "wrong whitespace around ="),
208 208 (r'raise Exception', "don't raise generic exceptions"),
209 209 (r'raise [^,(]+, (\([^\)]+\)|[^,\(\)]+)$',
210 210 "don't use old-style two-argument raise, use Exception(message)"),
211 211 (r' is\s+(not\s+)?["\'0-9-]', "object comparison with literal"),
212 212 (r' [=!]=\s+(True|False|None)',
213 213 "comparison with singleton, use 'is' or 'is not' instead"),
214 214 (r'^\s*(while|if) [01]:',
215 215 "use True/False for constant Boolean expression"),
216 216 (r'(?:(?<!def)\s+|\()hasattr',
217 217 'hasattr(foo, bar) is broken, use util.safehasattr(foo, bar) instead'),
218 218 (r'opener\([^)]*\).read\(',
219 219 "use opener.read() instead"),
220 220 (r'BaseException', 'not in Python 2.4, use Exception'),
221 221 (r'os\.path\.relpath', 'os.path.relpath is not in Python 2.5'),
222 222 (r'opener\([^)]*\).write\(',
223 223 "use opener.write() instead"),
224 224 (r'[\s\(](open|file)\([^)]*\)\.read\(',
225 225 "use util.readfile() instead"),
226 226 (r'[\s\(](open|file)\([^)]*\)\.write\(',
227 227 "use util.readfile() instead"),
228 228 (r'^[\s\(]*(open(er)?|file)\([^)]*\)',
229 229 "always assign an opened file to a variable, and close it afterwards"),
230 230 (r'[\s\(](open|file)\([^)]*\)\.',
231 231 "always assign an opened file to a variable, and close it afterwards"),
232 232 (r'(?i)descendent', "the proper spelling is descendAnt"),
233 233 (r'\.debug\(\_', "don't mark debug messages for translation"),
234 234 (r'\.strip\(\)\.split\(\)', "no need to strip before splitting"),
235 235 (r'^\s*except\s*:', "naked except clause", r'#.*re-raises'),
236 236 (r':\n( )*( ){1,3}[^ ]', "must indent 4 spaces"),
237 237 (r'ui\.(status|progress|write|note|warn)\([\'\"]x',
238 238 "missing _() in ui message (use () to hide false-positives)"),
239 239 (r'release\(.*wlock, .*lock\)', "wrong lock release order"),
240 240 ],
241 241 # warnings
242 242 [
243 243 ]
244 244 ]
245 245
246 246 pyfilters = [
247 247 (r"""(?msx)(?P<comment>\#.*?$)|
248 248 ((?P<quote>('''|\"\"\"|(?<!')'(?!')|(?<!")"(?!")))
249 249 (?P<text>(([^\\]|\\.)*?))
250 250 (?P=quote))""", reppython),
251 251 ]
252 252
253 253 txtfilters = []
254 254
255 255 txtpats = [
256 256 [
257 257 ('\s$', 'trailing whitespace'),
258 258 ],
259 259 []
260 260 ]
261 261
262 262 cpats = [
263 263 [
264 264 (r'//', "don't use //-style comments"),
265 265 (r'^ ', "don't use spaces to indent"),
266 266 (r'\S\t', "don't use tabs except for indent"),
267 267 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
268 268 (r'.{81}', "line too long"),
269 269 (r'(while|if|do|for)\(', "use space after while/if/do/for"),
270 270 (r'return\(', "return is not a function"),
271 271 (r' ;', "no space before ;"),
272 272 (r'\w+\* \w+', "use int *foo, not int* foo"),
273 273 (r'\([^\)]+\) \w+', "use (int)foo, not (int) foo"),
274 274 (r'\w+ (\+\+|--)', "use foo++, not foo ++"),
275 275 (r'\w,\w', "missing whitespace after ,"),
276 276 (r'^[^#]\w[+/*]\w', "missing whitespace in expression"),
277 277 (r'^#\s+\w', "use #foo, not # foo"),
278 278 (r'[^\n]\Z', "no trailing newline"),
279 279 (r'^\s*#import\b', "use only #include in standard C code"),
280 280 ],
281 281 # warnings
282 282 []
283 283 ]
284 284
285 285 cfilters = [
286 286 (r'(/\*)(((\*(?!/))|[^*])*)\*/', repccomment),
287 287 (r'''(?P<quote>(?<!")")(?P<text>([^"]|\\")+)"(?!")''', repquote),
288 288 (r'''(#\s*include\s+<)([^>]+)>''', repinclude),
289 289 (r'(\()([^)]+\))', repcallspaces),
290 290 ]
291 291
292 292 inutilpats = [
293 293 [
294 294 (r'\bui\.', "don't use ui in util"),
295 295 ],
296 296 # warnings
297 297 []
298 298 ]
299 299
300 300 inrevlogpats = [
301 301 [
302 302 (r'\brepo\.', "don't use repo in revlog"),
303 303 ],
304 304 # warnings
305 305 []
306 306 ]
307 307
308 308 checks = [
309 309 ('python', r'.*\.(py|cgi)$', pyfilters, pypats),
310 310 ('test script', r'(.*/)?test-[^.~]*$', testfilters, testpats),
311 311 ('c', r'.*\.c$', cfilters, cpats),
312 312 ('unified test', r'.*\.t$', utestfilters, utestpats),
313 313 ('layering violation repo in revlog', r'mercurial/revlog\.py', pyfilters,
314 314 inrevlogpats),
315 315 ('layering violation ui in util', r'mercurial/util\.py', pyfilters,
316 316 inutilpats),
317 317 ('txt', r'.*\.txt$', txtfilters, txtpats),
318 318 ]
319 319
320 320 def _preparepats():
321 321 for c in checks:
322 322 failandwarn = c[-1]
323 323 for pats in failandwarn:
324 324 for i, pseq in enumerate(pats):
325 325 # fix-up regexes for multi-line searches
326 326 po = p = pseq[0]
327 327 # \s doesn't match \n
328 328 p = re.sub(r'(?<!\\)\\s', r'[ \\t]', p)
329 329 # [^...] doesn't match newline
330 330 p = re.sub(r'(?<!\\)\[\^', r'[^\\n', p)
331 331
332 332 #print po, '=>', p
333 333 pats[i] = (re.compile(p, re.MULTILINE),) + pseq[1:]
334 filters = c[2]
335 for i, flt in enumerate(filters):
336 filters[i] = re.compile(flt[0]), flt[1]
334 337 _preparepats()
335 338
336 339 class norepeatlogger(object):
337 340 def __init__(self):
338 341 self._lastseen = None
339 342
340 343 def log(self, fname, lineno, line, msg, blame):
341 344 """print error related a to given line of a given file.
342 345
343 346 The faulty line will also be printed but only once in the case
344 347 of multiple errors.
345 348
346 349 :fname: filename
347 350 :lineno: line number
348 351 :line: actual content of the line
349 352 :msg: error message
350 353 """
351 354 msgid = fname, lineno, line
352 355 if msgid != self._lastseen:
353 356 if blame:
354 357 print "%s:%d (%s):" % (fname, lineno, blame)
355 358 else:
356 359 print "%s:%d:" % (fname, lineno)
357 360 print " > %s" % line
358 361 self._lastseen = msgid
359 362 print " " + msg
360 363
361 364 _defaultlogger = norepeatlogger()
362 365
363 366 def getblame(f):
364 367 lines = []
365 368 for l in os.popen('hg annotate -un %s' % f):
366 369 start, line = l.split(':', 1)
367 370 user, rev = start.split()
368 371 lines.append((line[1:-1], user, rev))
369 372 return lines
370 373
371 374 def checkfile(f, logfunc=_defaultlogger.log, maxerr=None, warnings=False,
372 375 blame=False, debug=False, lineno=True):
373 376 """checks style and portability of a given file
374 377
375 378 :f: filepath
376 379 :logfunc: function used to report error
377 380 logfunc(filename, linenumber, linecontent, errormessage)
378 381 :maxerr: number of error to display before aborting.
379 382 Set to false (default) to report all errors
380 383
381 384 return True if no error is found, False otherwise.
382 385 """
383 386 blamecache = None
384 387 result = True
385 388 for name, match, filters, pats in checks:
386 389 if debug:
387 390 print name, f
388 391 fc = 0
389 392 if not re.match(match, f):
390 393 if debug:
391 394 print "Skipping %s for %s it doesn't match %s" % (
392 395 name, match, f)
393 396 continue
394 397 fp = open(f)
395 398 pre = post = fp.read()
396 399 fp.close()
397 400 if "no-" + "check-code" in pre:
398 401 if debug:
399 402 print "Skipping %s for %s it has no- and check-code" % (
400 403 name, f)
401 404 break
402 405 for p, r in filters:
403 406 post = re.sub(p, r, post)
404 407 if warnings:
405 408 pats = pats[0] + pats[1]
406 409 else:
407 410 pats = pats[0]
408 411 # print post # uncomment to show filtered version
409 412
410 413 if debug:
411 414 print "Checking %s for %s" % (name, f)
412 415
413 416 prelines = None
414 417 errors = []
415 418 for pat in pats:
416 419 if len(pat) == 3:
417 420 p, msg, ignore = pat
418 421 else:
419 422 p, msg = pat
420 423 ignore = None
421 424
422 425 pos = 0
423 426 n = 0
424 427 for m in p.finditer(post):
425 428 if prelines is None:
426 429 prelines = pre.splitlines()
427 430 postlines = post.splitlines(True)
428 431
429 432 start = m.start()
430 433 while n < len(postlines):
431 434 step = len(postlines[n])
432 435 if pos + step > start:
433 436 break
434 437 pos += step
435 438 n += 1
436 439 l = prelines[n]
437 440
438 441 if "check-code" + "-ignore" in l:
439 442 if debug:
440 443 print "Skipping %s for %s:%s (check-code -ignore)" % (
441 444 name, f, n)
442 445 continue
443 446 elif ignore and re.search(ignore, l, re.MULTILINE):
444 447 continue
445 448 bd = ""
446 449 if blame:
447 450 bd = 'working directory'
448 451 if not blamecache:
449 452 blamecache = getblame(f)
450 453 if n < len(blamecache):
451 454 bl, bu, br = blamecache[n]
452 455 if bl == l:
453 456 bd = '%s@%s' % (bu, br)
454 457 errors.append((f, lineno and n + 1, l, msg, bd))
455 458 result = False
456 459
457 460 errors.sort()
458 461 for e in errors:
459 462 logfunc(*e)
460 463 fc += 1
461 464 if maxerr and fc >= maxerr:
462 465 print " (too many errors, giving up)"
463 466 break
464 467
465 468 return result
466 469
467 470 if __name__ == "__main__":
468 471 parser = optparse.OptionParser("%prog [options] [files]")
469 472 parser.add_option("-w", "--warnings", action="store_true",
470 473 help="include warning-level checks")
471 474 parser.add_option("-p", "--per-file", type="int",
472 475 help="max warnings per file")
473 476 parser.add_option("-b", "--blame", action="store_true",
474 477 help="use annotate to generate blame info")
475 478 parser.add_option("", "--debug", action="store_true",
476 479 help="show debug information")
477 480 parser.add_option("", "--nolineno", action="store_false",
478 481 dest='lineno', help="don't show line numbers")
479 482
480 483 parser.set_defaults(per_file=15, warnings=False, blame=False, debug=False,
481 484 lineno=True)
482 485 (options, args) = parser.parse_args()
483 486
484 487 if len(args) == 0:
485 488 check = glob.glob("*")
486 489 else:
487 490 check = args
488 491
489 492 ret = 0
490 493 for f in check:
491 494 if not checkfile(f, maxerr=options.per_file, warnings=options.warnings,
492 495 blame=options.blame, debug=options.debug,
493 496 lineno=options.lineno):
494 497 ret = 1
495 498 sys.exit(ret)
General Comments 0
You need to be logged in to leave comments. Login now