##// END OF EJS Templates
check-code: check txt files for trailing whitespace
Mads Kiilerich -
r18960:170fc094 default
parent child Browse files
Show More
@@ -1,465 +1,475
1 1 #!/usr/bin/env python
2 2 #
3 3 # check-code - a style and portability checker for Mercurial
4 4 #
5 5 # Copyright 2010 Matt Mackall <mpm@selenic.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 import re, glob, os, sys
11 11 import keyword
12 12 import optparse
13 13
14 14 def repquote(m):
15 15 t = re.sub(r"\w", "x", m.group('text'))
16 16 t = re.sub(r"[^\s\nx]", "o", t)
17 17 return m.group('quote') + t + m.group('quote')
18 18
19 19 def reppython(m):
20 20 comment = m.group('comment')
21 21 if comment:
22 22 l = len(comment.rstrip())
23 23 return "#" * l + comment[l:]
24 24 return repquote(m)
25 25
26 26 def repcomment(m):
27 27 return m.group(1) + "#" * len(m.group(2))
28 28
29 29 def repccomment(m):
30 30 t = re.sub(r"((?<=\n) )|\S", "x", m.group(2))
31 31 return m.group(1) + t + "*/"
32 32
33 33 def repcallspaces(m):
34 34 t = re.sub(r"\n\s+", "\n", m.group(2))
35 35 return m.group(1) + t
36 36
37 37 def repinclude(m):
38 38 return m.group(1) + "<foo>"
39 39
40 40 def rephere(m):
41 41 t = re.sub(r"\S", "x", m.group(2))
42 42 return m.group(1) + t
43 43
44 44
45 45 testpats = [
46 46 [
47 47 (r'pushd|popd', "don't use 'pushd' or 'popd', use 'cd'"),
48 48 (r'\W\$?\(\([^\)\n]*\)\)', "don't use (()) or $(()), use 'expr'"),
49 49 (r'grep.*-q', "don't use 'grep -q', redirect to /dev/null"),
50 50 (r'sed.*-i', "don't use 'sed -i', use a temporary file"),
51 51 (r'\becho\b.*\\n', "don't use 'echo \\n', use printf"),
52 52 (r'echo -n', "don't use 'echo -n', use printf"),
53 53 (r'(^| )wc[^|]*$\n(?!.*\(re\))', "filter wc output"),
54 54 (r'head -c', "don't use 'head -c', use 'dd'"),
55 55 (r'sha1sum', "don't use sha1sum, use $TESTDIR/md5sum.py"),
56 56 (r'ls.*-\w*R', "don't use 'ls -R', use 'find'"),
57 57 (r'printf.*\\([1-9]|0\d)', "don't use 'printf \NNN', use Python"),
58 58 (r'printf.*\\x', "don't use printf \\x, use Python"),
59 59 (r'\$\(.*\)', "don't use $(expr), use `expr`"),
60 60 (r'rm -rf \*', "don't use naked rm -rf, target a directory"),
61 61 (r'(^|\|\s*)grep (-\w\s+)*[^|]*[(|]\w',
62 62 "use egrep for extended grep syntax"),
63 63 (r'/bin/', "don't use explicit paths for tools"),
64 64 (r'[^\n]\Z', "no trailing newline"),
65 65 (r'export.*=', "don't export and assign at once"),
66 66 (r'^source\b', "don't use 'source', use '.'"),
67 67 (r'touch -d', "don't use 'touch -d', use 'touch -t' instead"),
68 68 (r'ls +[^|\n-]+ +-', "options to 'ls' must come before filenames"),
69 69 (r'[^>\n]>\s*\$HGRCPATH', "don't overwrite $HGRCPATH, append to it"),
70 70 (r'^stop\(\)', "don't use 'stop' as a shell function name"),
71 71 (r'(\[|\btest\b).*-e ', "don't use 'test -e', use 'test -f'"),
72 72 (r'^alias\b.*=', "don't use alias, use a function"),
73 73 (r'if\s*!', "don't use '!' to negate exit status"),
74 74 (r'/dev/u?random', "don't use entropy, use /dev/zero"),
75 75 (r'do\s*true;\s*done', "don't use true as loop body, use sleep 0"),
76 76 (r'^( *)\t', "don't use tabs to indent"),
77 77 ],
78 78 # warnings
79 79 [
80 80 (r'^function', "don't use 'function', use old style"),
81 81 (r'^diff.*-\w*N', "don't use 'diff -N'"),
82 82 (r'\$PWD|\${PWD}', "don't use $PWD, use `pwd`"),
83 83 (r'^([^"\'\n]|("[^"\n]*")|(\'[^\'\n]*\'))*\^', "^ must be quoted"),
84 84 (r'kill (`|\$\()', "don't use kill, use killdaemons.py")
85 85 ]
86 86 ]
87 87
88 88 testfilters = [
89 89 (r"( *)(#([^\n]*\S)?)", repcomment),
90 90 (r"<<(\S+)((.|\n)*?\n\1)", rephere),
91 91 ]
92 92
93 93 winglobmsg = "use (glob) to match Windows paths too"
94 94 uprefix = r"^ \$ "
95 95 utestpats = [
96 96 [
97 97 (r'^(\S.*|| [$>] .*)[ \t]\n', "trailing whitespace on non-output"),
98 98 (uprefix + r'.*\|\s*sed[^|>\n]*\n',
99 99 "use regex test output patterns instead of sed"),
100 100 (uprefix + r'(true|exit 0)', "explicit zero exit unnecessary"),
101 101 (uprefix + r'.*(?<!\[)\$\?', "explicit exit code checks unnecessary"),
102 102 (uprefix + r'.*\|\| echo.*(fail|error)',
103 103 "explicit exit code checks unnecessary"),
104 104 (uprefix + r'set -e', "don't use set -e"),
105 105 (uprefix + r'\s', "don't indent commands, use > for continued lines"),
106 106 (r'^ saved backup bundle to \$TESTTMP.*\.hg$', winglobmsg),
107 107 (r'^ changeset .* references (corrupted|missing) \$TESTTMP/.*[^)]$',
108 108 winglobmsg),
109 109 (r'^ pulling from \$TESTTMP/.*[^)]$', winglobmsg, '\$TESTTMP/unix-repo$'),
110 110 ],
111 111 # warnings
112 112 [
113 113 (r'^ [^*?/\n]* \(glob\)$',
114 114 "warning: glob match with no glob character (?*/)"),
115 115 ]
116 116 ]
117 117
118 118 for i in [0, 1]:
119 119 for p, m in testpats[i]:
120 120 if p.startswith(r'^'):
121 121 p = r"^ [$>] (%s)" % p[1:]
122 122 else:
123 123 p = r"^ [$>] .*(%s)" % p
124 124 utestpats[i].append((p, m))
125 125
126 126 utestfilters = [
127 127 (r"<<(\S+)((.|\n)*?\n > \1)", rephere),
128 128 (r"( *)(#([^\n]*\S)?)", repcomment),
129 129 ]
130 130
131 131 pypats = [
132 132 [
133 133 (r'^\s*def\s*\w+\s*\(.*,\s*\(',
134 134 "tuple parameter unpacking not available in Python 3+"),
135 135 (r'lambda\s*\(.*,.*\)',
136 136 "tuple parameter unpacking not available in Python 3+"),
137 137 (r'(?<!def)\s+(cmp)\(', "cmp is not available in Python 3+"),
138 138 (r'\breduce\s*\(.*', "reduce is not available in Python 3+"),
139 139 (r'\.has_key\b', "dict.has_key is not available in Python 3+"),
140 140 (r'\s<>\s', '<> operator is not available in Python 3+, use !='),
141 141 (r'^\s*\t', "don't use tabs"),
142 142 (r'\S;\s*\n', "semicolon"),
143 143 (r'[^_]_\("[^"]+"\s*%', "don't use % inside _()"),
144 144 (r"[^_]_\('[^']+'\s*%", "don't use % inside _()"),
145 145 (r'(\w|\)),\w', "missing whitespace after ,"),
146 146 (r'(\w|\))[+/*\-<>]\w', "missing whitespace in expression"),
147 147 (r'^\s+(\w|\.)+=\w[^,()\n]*$', "missing whitespace in assignment"),
148 148 (r'(\s+)try:\n((?:\n|\1\s.*\n)+?)\1except.*?:\n'
149 149 r'((?:\n|\1\s.*\n)+?)\1finally:', 'no try/except/finally in Python 2.4'),
150 150 (r'(\s+)try:\n((?:\n|\1\s.*\n)*?)\1\s*yield\b.*?'
151 151 r'((?:\n|\1\s.*\n)+?)\1finally:',
152 152 'no yield inside try/finally in Python 2.4'),
153 153 (r'.{81}', "line too long"),
154 154 (r' x+[xo][\'"]\n\s+[\'"]x', 'string join across lines with no space'),
155 155 (r'[^\n]\Z', "no trailing newline"),
156 156 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
157 157 # (r'^\s+[^_ \n][^_. \n]+_[^_\n]+\s*=',
158 158 # "don't use underbars in identifiers"),
159 159 (r'^\s+(self\.)?[A-za-z][a-z0-9]+[A-Z]\w* = ',
160 160 "don't use camelcase in identifiers"),
161 161 (r'^\s*(if|while|def|class|except|try)\s[^[\n]*:\s*[^\\n]#\s]+',
162 162 "linebreak after :"),
163 163 (r'class\s[^( \n]+:', "old-style class, use class foo(object)"),
164 164 (r'class\s[^( \n]+\(\):',
165 165 "class foo() not available in Python 2.4, use class foo(object)"),
166 166 (r'\b(%s)\(' % '|'.join(keyword.kwlist),
167 167 "Python keyword is not a function"),
168 168 (r',]', "unneeded trailing ',' in list"),
169 169 # (r'class\s[A-Z][^\(]*\((?!Exception)',
170 170 # "don't capitalize non-exception classes"),
171 171 # (r'in range\(', "use xrange"),
172 172 # (r'^\s*print\s+', "avoid using print in core and extensions"),
173 173 (r'[\x80-\xff]', "non-ASCII character literal"),
174 174 (r'("\')\.format\(', "str.format() not available in Python 2.4"),
175 175 (r'^\s*with\s+', "with not available in Python 2.4"),
176 176 (r'\.isdisjoint\(', "set.isdisjoint not available in Python 2.4"),
177 177 (r'^\s*except.* as .*:', "except as not available in Python 2.4"),
178 178 (r'^\s*os\.path\.relpath', "relpath not available in Python 2.4"),
179 179 (r'(?<!def)\s+(any|all|format)\(',
180 180 "any/all/format not available in Python 2.4"),
181 181 (r'(?<!def)\s+(callable)\(',
182 182 "callable not available in Python 3, use getattr(f, '__call__', None)"),
183 183 (r'if\s.*\selse', "if ... else form not available in Python 2.4"),
184 184 (r'^\s*(%s)\s\s' % '|'.join(keyword.kwlist),
185 185 "gratuitous whitespace after Python keyword"),
186 186 (r'([\(\[][ \t]\S)|(\S[ \t][\)\]])', "gratuitous whitespace in () or []"),
187 187 # (r'\s\s=', "gratuitous whitespace before ="),
188 188 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
189 189 "missing whitespace around operator"),
190 190 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\s',
191 191 "missing whitespace around operator"),
192 192 (r'\s(\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
193 193 "missing whitespace around operator"),
194 194 (r'[^^+=*/!<>&| %-](\s=|=\s)[^= ]',
195 195 "wrong whitespace around ="),
196 196 (r'raise Exception', "don't raise generic exceptions"),
197 197 (r'raise [^,(]+, (\([^\)]+\)|[^,\(\)]+)$',
198 198 "don't use old-style two-argument raise, use Exception(message)"),
199 199 (r' is\s+(not\s+)?["\'0-9-]', "object comparison with literal"),
200 200 (r' [=!]=\s+(True|False|None)',
201 201 "comparison with singleton, use 'is' or 'is not' instead"),
202 202 (r'^\s*(while|if) [01]:',
203 203 "use True/False for constant Boolean expression"),
204 204 (r'(?:(?<!def)\s+|\()hasattr',
205 205 'hasattr(foo, bar) is broken, use util.safehasattr(foo, bar) instead'),
206 206 (r'opener\([^)]*\).read\(',
207 207 "use opener.read() instead"),
208 208 (r'BaseException', 'not in Python 2.4, use Exception'),
209 209 (r'os\.path\.relpath', 'os.path.relpath is not in Python 2.5'),
210 210 (r'opener\([^)]*\).write\(',
211 211 "use opener.write() instead"),
212 212 (r'[\s\(](open|file)\([^)]*\)\.read\(',
213 213 "use util.readfile() instead"),
214 214 (r'[\s\(](open|file)\([^)]*\)\.write\(',
215 215 "use util.readfile() instead"),
216 216 (r'^[\s\(]*(open(er)?|file)\([^)]*\)',
217 217 "always assign an opened file to a variable, and close it afterwards"),
218 218 (r'[\s\(](open|file)\([^)]*\)\.',
219 219 "always assign an opened file to a variable, and close it afterwards"),
220 220 (r'(?i)descendent', "the proper spelling is descendAnt"),
221 221 (r'\.debug\(\_', "don't mark debug messages for translation"),
222 222 (r'\.strip\(\)\.split\(\)', "no need to strip before splitting"),
223 223 (r'^\s*except\s*:', "naked except clause", r'#.*re-raises'),
224 224 (r':\n( )*( ){1,3}[^ ]', "must indent 4 spaces"),
225 225 (r'ui\.(status|progress|write|note|warn)\([\'\"]x',
226 226 "missing _() in ui message (use () to hide false-positives)"),
227 227 ],
228 228 # warnings
229 229 [
230 230 ]
231 231 ]
232 232
233 233 pyfilters = [
234 234 (r"""(?msx)(?P<comment>\#.*?$)|
235 235 ((?P<quote>('''|\"\"\"|(?<!')'(?!')|(?<!")"(?!")))
236 236 (?P<text>(([^\\]|\\.)*?))
237 237 (?P=quote))""", reppython),
238 238 ]
239 239
240 txtfilters = []
241
242 txtpats = [
243 [
244 ('\s$', 'trailing whitespace'),
245 ],
246 []
247 ]
248
240 249 cpats = [
241 250 [
242 251 (r'//', "don't use //-style comments"),
243 252 (r'^ ', "don't use spaces to indent"),
244 253 (r'\S\t', "don't use tabs except for indent"),
245 254 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
246 255 (r'.{81}', "line too long"),
247 256 (r'(while|if|do|for)\(', "use space after while/if/do/for"),
248 257 (r'return\(', "return is not a function"),
249 258 (r' ;', "no space before ;"),
250 259 (r'\w+\* \w+', "use int *foo, not int* foo"),
251 260 (r'\([^\)]+\) \w+', "use (int)foo, not (int) foo"),
252 261 (r'\w+ (\+\+|--)', "use foo++, not foo ++"),
253 262 (r'\w,\w', "missing whitespace after ,"),
254 263 (r'^[^#]\w[+/*]\w', "missing whitespace in expression"),
255 264 (r'^#\s+\w', "use #foo, not # foo"),
256 265 (r'[^\n]\Z', "no trailing newline"),
257 266 (r'^\s*#import\b', "use only #include in standard C code"),
258 267 ],
259 268 # warnings
260 269 []
261 270 ]
262 271
263 272 cfilters = [
264 273 (r'(/\*)(((\*(?!/))|[^*])*)\*/', repccomment),
265 274 (r'''(?P<quote>(?<!")")(?P<text>([^"]|\\")+)"(?!")''', repquote),
266 275 (r'''(#\s*include\s+<)([^>]+)>''', repinclude),
267 276 (r'(\()([^)]+\))', repcallspaces),
268 277 ]
269 278
270 279 inutilpats = [
271 280 [
272 281 (r'\bui\.', "don't use ui in util"),
273 282 ],
274 283 # warnings
275 284 []
276 285 ]
277 286
278 287 inrevlogpats = [
279 288 [
280 289 (r'\brepo\.', "don't use repo in revlog"),
281 290 ],
282 291 # warnings
283 292 []
284 293 ]
285 294
286 295 checks = [
287 296 ('python', r'.*\.(py|cgi)$', pyfilters, pypats),
288 297 ('test script', r'(.*/)?test-[^.~]*$', testfilters, testpats),
289 298 ('c', r'.*\.c$', cfilters, cpats),
290 299 ('unified test', r'.*\.t$', utestfilters, utestpats),
291 300 ('layering violation repo in revlog', r'mercurial/revlog\.py', pyfilters,
292 301 inrevlogpats),
293 302 ('layering violation ui in util', r'mercurial/util\.py', pyfilters,
294 303 inutilpats),
304 ('txt', r'.*\.txt$', txtfilters, txtpats),
295 305 ]
296 306
297 307 class norepeatlogger(object):
298 308 def __init__(self):
299 309 self._lastseen = None
300 310
301 311 def log(self, fname, lineno, line, msg, blame):
302 312 """print error related a to given line of a given file.
303 313
304 314 The faulty line will also be printed but only once in the case
305 315 of multiple errors.
306 316
307 317 :fname: filename
308 318 :lineno: line number
309 319 :line: actual content of the line
310 320 :msg: error message
311 321 """
312 322 msgid = fname, lineno, line
313 323 if msgid != self._lastseen:
314 324 if blame:
315 325 print "%s:%d (%s):" % (fname, lineno, blame)
316 326 else:
317 327 print "%s:%d:" % (fname, lineno)
318 328 print " > %s" % line
319 329 self._lastseen = msgid
320 330 print " " + msg
321 331
322 332 _defaultlogger = norepeatlogger()
323 333
324 334 def getblame(f):
325 335 lines = []
326 336 for l in os.popen('hg annotate -un %s' % f):
327 337 start, line = l.split(':', 1)
328 338 user, rev = start.split()
329 339 lines.append((line[1:-1], user, rev))
330 340 return lines
331 341
332 342 def checkfile(f, logfunc=_defaultlogger.log, maxerr=None, warnings=False,
333 343 blame=False, debug=False, lineno=True):
334 344 """checks style and portability of a given file
335 345
336 346 :f: filepath
337 347 :logfunc: function used to report error
338 348 logfunc(filename, linenumber, linecontent, errormessage)
339 349 :maxerr: number of error to display before aborting.
340 350 Set to false (default) to report all errors
341 351
342 352 return True if no error is found, False otherwise.
343 353 """
344 354 blamecache = None
345 355 result = True
346 356 for name, match, filters, pats in checks:
347 357 if debug:
348 358 print name, f
349 359 fc = 0
350 360 if not re.match(match, f):
351 361 if debug:
352 362 print "Skipping %s for %s it doesn't match %s" % (
353 363 name, match, f)
354 364 continue
355 365 fp = open(f)
356 366 pre = post = fp.read()
357 367 fp.close()
358 368 if "no-" + "check-code" in pre:
359 369 if debug:
360 370 print "Skipping %s for %s it has no- and check-code" % (
361 371 name, f)
362 372 break
363 373 for p, r in filters:
364 374 post = re.sub(p, r, post)
365 375 if warnings:
366 376 pats = pats[0] + pats[1]
367 377 else:
368 378 pats = pats[0]
369 379 # print post # uncomment to show filtered version
370 380
371 381 if debug:
372 382 print "Checking %s for %s" % (name, f)
373 383
374 384 prelines = None
375 385 errors = []
376 386 for pat in pats:
377 387 if len(pat) == 3:
378 388 p, msg, ignore = pat
379 389 else:
380 390 p, msg = pat
381 391 ignore = None
382 392
383 393 # fix-up regexes for multi-line searches
384 394 po = p
385 395 # \s doesn't match \n
386 396 p = re.sub(r'(?<!\\)\\s', r'[ \\t]', p)
387 397 # [^...] doesn't match newline
388 398 p = re.sub(r'(?<!\\)\[\^', r'[^\\n', p)
389 399
390 400 #print po, '=>', p
391 401
392 402 pos = 0
393 403 n = 0
394 404 for m in re.finditer(p, post, re.MULTILINE):
395 405 if prelines is None:
396 406 prelines = pre.splitlines()
397 407 postlines = post.splitlines(True)
398 408
399 409 start = m.start()
400 410 while n < len(postlines):
401 411 step = len(postlines[n])
402 412 if pos + step > start:
403 413 break
404 414 pos += step
405 415 n += 1
406 416 l = prelines[n]
407 417
408 418 if "check-code" + "-ignore" in l:
409 419 if debug:
410 420 print "Skipping %s for %s:%s (check-code -ignore)" % (
411 421 name, f, n)
412 422 continue
413 423 elif ignore and re.search(ignore, l, re.MULTILINE):
414 424 continue
415 425 bd = ""
416 426 if blame:
417 427 bd = 'working directory'
418 428 if not blamecache:
419 429 blamecache = getblame(f)
420 430 if n < len(blamecache):
421 431 bl, bu, br = blamecache[n]
422 432 if bl == l:
423 433 bd = '%s@%s' % (bu, br)
424 434 errors.append((f, lineno and n + 1, l, msg, bd))
425 435 result = False
426 436
427 437 errors.sort()
428 438 for e in errors:
429 439 logfunc(*e)
430 440 fc += 1
431 441 if maxerr and fc >= maxerr:
432 442 print " (too many errors, giving up)"
433 443 break
434 444
435 445 return result
436 446
437 447 if __name__ == "__main__":
438 448 parser = optparse.OptionParser("%prog [options] [files]")
439 449 parser.add_option("-w", "--warnings", action="store_true",
440 450 help="include warning-level checks")
441 451 parser.add_option("-p", "--per-file", type="int",
442 452 help="max warnings per file")
443 453 parser.add_option("-b", "--blame", action="store_true",
444 454 help="use annotate to generate blame info")
445 455 parser.add_option("", "--debug", action="store_true",
446 456 help="show debug information")
447 457 parser.add_option("", "--nolineno", action="store_false",
448 458 dest='lineno', help="don't show line numbers")
449 459
450 460 parser.set_defaults(per_file=15, warnings=False, blame=False, debug=False,
451 461 lineno=True)
452 462 (options, args) = parser.parse_args()
453 463
454 464 if len(args) == 0:
455 465 check = glob.glob("*")
456 466 else:
457 467 check = args
458 468
459 469 ret = 0
460 470 for f in check:
461 471 if not checkfile(f, maxerr=options.per_file, warnings=options.warnings,
462 472 blame=options.blame, debug=options.debug,
463 473 lineno=options.lineno):
464 474 ret = 1
465 475 sys.exit(ret)
General Comments 0
You need to be logged in to leave comments. Login now