##// END OF EJS Templates
check-code: make it possible to ignore the PWD check in some situation...
marmoute -
r48793:b84fe613 stable
parent child Browse files
Show More
@@ -1,1126 +1,1126 b''
1 1 #!/usr/bin/env python3
2 2 #
3 3 # check-code - a style and portability checker for Mercurial
4 4 #
5 5 # Copyright 2010 Olivia Mackall <olivia@selenic.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """style and portability checker for Mercurial
11 11
12 12 when a rule triggers wrong, do one of the following (prefer one from top):
13 13 * do the work-around the rule suggests
14 14 * doublecheck that it is a false match
15 15 * improve the rule pattern
16 16 * add an ignore pattern to the rule (3rd arg) which matches your good line
17 17 (you can append a short comment and match this, like: #re-raises)
18 18 * change the pattern to a warning and list the exception in test-check-code-hg
19 19 * ONLY use no--check-code for skipping entire files from external sources
20 20 """
21 21
22 22 from __future__ import absolute_import, print_function
23 23 import glob
24 24 import keyword
25 25 import optparse
26 26 import os
27 27 import re
28 28 import sys
29 29
30 30 if sys.version_info[0] < 3:
31 31 opentext = open
32 32 else:
33 33
34 34 def opentext(f):
35 35 return open(f, encoding='latin1')
36 36
37 37
38 38 try:
39 39 xrange
40 40 except NameError:
41 41 xrange = range
42 42 try:
43 43 import re2
44 44 except ImportError:
45 45 re2 = None
46 46
47 47 import testparseutil
48 48
49 49
50 50 def compilere(pat, multiline=False):
51 51 if multiline:
52 52 pat = '(?m)' + pat
53 53 if re2:
54 54 try:
55 55 return re2.compile(pat)
56 56 except re2.error:
57 57 pass
58 58 return re.compile(pat)
59 59
60 60
61 61 # check "rules depending on implementation of repquote()" in each
62 62 # patterns (especially pypats), before changing around repquote()
63 63 _repquotefixedmap = {
64 64 ' ': ' ',
65 65 '\n': '\n',
66 66 '.': 'p',
67 67 ':': 'q',
68 68 '%': '%',
69 69 '\\': 'b',
70 70 '*': 'A',
71 71 '+': 'P',
72 72 '-': 'M',
73 73 }
74 74
75 75
76 76 def _repquoteencodechr(i):
77 77 if i > 255:
78 78 return 'u'
79 79 c = chr(i)
80 80 if c in _repquotefixedmap:
81 81 return _repquotefixedmap[c]
82 82 if c.isalpha():
83 83 return 'x'
84 84 if c.isdigit():
85 85 return 'n'
86 86 return 'o'
87 87
88 88
89 89 _repquotett = ''.join(_repquoteencodechr(i) for i in xrange(256))
90 90
91 91
92 92 def repquote(m):
93 93 t = m.group('text')
94 94 t = t.translate(_repquotett)
95 95 return m.group('quote') + t + m.group('quote')
96 96
97 97
98 98 def reppython(m):
99 99 comment = m.group('comment')
100 100 if comment:
101 101 l = len(comment.rstrip())
102 102 return "#" * l + comment[l:]
103 103 return repquote(m)
104 104
105 105
106 106 def repcomment(m):
107 107 return m.group(1) + "#" * len(m.group(2))
108 108
109 109
110 110 def repccomment(m):
111 111 t = re.sub(r"((?<=\n) )|\S", "x", m.group(2))
112 112 return m.group(1) + t + "*/"
113 113
114 114
115 115 def repcallspaces(m):
116 116 t = re.sub(r"\n\s+", "\n", m.group(2))
117 117 return m.group(1) + t
118 118
119 119
120 120 def repinclude(m):
121 121 return m.group(1) + "<foo>"
122 122
123 123
124 124 def rephere(m):
125 125 t = re.sub(r"\S", "x", m.group(2))
126 126 return m.group(1) + t
127 127
128 128
129 129 testpats = [
130 130 [
131 131 (r'\b(push|pop)d\b', "don't use 'pushd' or 'popd', use 'cd'"),
132 132 (r'\W\$?\(\([^\)\n]*\)\)', "don't use (()) or $(()), use 'expr'"),
133 133 (r'grep.*-q', "don't use 'grep -q', redirect to /dev/null"),
134 134 (r'(?<!hg )grep.* -a', "don't use 'grep -a', use in-line python"),
135 135 (r'sed.*-i', "don't use 'sed -i', use a temporary file"),
136 136 (r'\becho\b.*\\n', "don't use 'echo \\n', use printf"),
137 137 (r'echo -n', "don't use 'echo -n', use printf"),
138 138 (r'(^|\|\s*)\bwc\b[^|]*$\n(?!.*\(re\))', "filter wc output"),
139 139 (r'head -c', "don't use 'head -c', use 'dd'"),
140 140 (r'tail -n', "don't use the '-n' option to tail, just use '-<num>'"),
141 141 (r'sha1sum', "don't use sha1sum, use $TESTDIR/md5sum.py"),
142 142 (r'\bls\b.*-\w*R', "don't use 'ls -R', use 'find'"),
143 143 (r'printf.*[^\\]\\([1-9]|0\d)', r"don't use 'printf \NNN', use Python"),
144 144 (r'printf.*[^\\]\\x', "don't use printf \\x, use Python"),
145 145 (r'rm -rf \*', "don't use naked rm -rf, target a directory"),
146 146 (
147 147 r'\[[^\]]+==',
148 148 '[ foo == bar ] is a bashism, use [ foo = bar ] instead',
149 149 ),
150 150 (
151 151 r'(^|\|\s*)grep (-\w\s+)*[^|]*[(|]\w',
152 152 "use egrep for extended grep syntax",
153 153 ),
154 154 (r'(^|\|\s*)e?grep .*\\S', "don't use \\S in regular expression"),
155 155 (r'(?<!!)/bin/', "don't use explicit paths for tools"),
156 156 (r'#!.*/bash', "don't use bash in shebang, use sh"),
157 157 (r'[^\n]\Z', "no trailing newline"),
158 158 (r'export .*=', "don't export and assign at once"),
159 159 (r'^source\b', "don't use 'source', use '.'"),
160 160 (r'touch -d', "don't use 'touch -d', use 'touch -t' instead"),
161 161 (r'\bls +[^|\n-]+ +-', "options to 'ls' must come before filenames"),
162 162 (r'[^>\n]>\s*\$HGRCPATH', "don't overwrite $HGRCPATH, append to it"),
163 163 (r'^stop\(\)', "don't use 'stop' as a shell function name"),
164 164 (r'(\[|\btest\b).*-e ', "don't use 'test -e', use 'test -f'"),
165 165 (r'\[\[\s+[^\]]*\]\]', "don't use '[[ ]]', use '[ ]'"),
166 166 (r'^alias\b.*=', "don't use alias, use a function"),
167 167 (r'if\s*!', "don't use '!' to negate exit status"),
168 168 (r'/dev/u?random', "don't use entropy, use /dev/zero"),
169 169 (r'do\s*true;\s*done', "don't use true as loop body, use sleep 0"),
170 170 (
171 171 r'sed (-e )?\'(\d+|/[^/]*/)i(?!\\\n)',
172 172 "put a backslash-escaped newline after sed 'i' command",
173 173 ),
174 174 (r'^diff *-\w*[uU].*$\n(^ \$ |^$)', "prefix diff -u/-U with cmp"),
175 175 (r'^\s+(if)? diff *-\w*[uU]', "prefix diff -u/-U with cmp"),
176 176 (r'[\s="`\']python\s(?!bindings)', "don't use 'python', use '$PYTHON'"),
177 177 (r'seq ', "don't use 'seq', use $TESTDIR/seq.py"),
178 178 (r'\butil\.Abort\b', "directly use error.Abort"),
179 179 (r'\|&', "don't use |&, use 2>&1"),
180 180 (r'\w = +\w', "only one space after = allowed"),
181 181 (
182 182 r'\bsed\b.*[^\\]\\n',
183 183 "don't use 'sed ... \\n', use a \\ and a newline",
184 184 ),
185 185 (r'env.*-u', "don't use 'env -u VAR', use 'unset VAR'"),
186 186 (r'cp.* -r ', "don't use 'cp -r', use 'cp -R'"),
187 187 (r'grep.* -[ABC]', "don't use grep's context flags"),
188 188 (
189 189 r'find.*-printf',
190 190 "don't use 'find -printf', it doesn't exist on BSD find(1)",
191 191 ),
192 192 (r'\$RANDOM ', "don't use bash-only $RANDOM to generate random values"),
193 193 ],
194 194 # warnings
195 195 [
196 196 (r'^function', "don't use 'function', use old style"),
197 197 (r'^diff.*-\w*N', "don't use 'diff -N'"),
198 (r'\$PWD|\${PWD}', "don't use $PWD, use `pwd`"),
198 (r'\$PWD|\${PWD}', "don't use $PWD, use `pwd`", "no-pwd-check"),
199 199 (r'^([^"\'\n]|("[^"\n]*")|(\'[^\'\n]*\'))*\^', "^ must be quoted"),
200 200 (r'kill (`|\$\()', "don't use kill, use killdaemons.py"),
201 201 ],
202 202 ]
203 203
204 204 testfilters = [
205 205 (r"( *)(#([^!][^\n]*\S)?)", repcomment),
206 206 (r"<<(\S+)((.|\n)*?\n\1)", rephere),
207 207 ]
208 208
209 209 uprefix = r"^ \$ "
210 210 utestpats = [
211 211 [
212 212 (r'^(\S.*|| [$>] \S.*)[ \t]\n', "trailing whitespace on non-output"),
213 213 (
214 214 uprefix + r'.*\|\s*sed[^|>\n]*\n',
215 215 "use regex test output patterns instead of sed",
216 216 ),
217 217 (uprefix + r'(true|exit 0)', "explicit zero exit unnecessary"),
218 218 (
219 219 uprefix + r'.*\|\| echo.*(fail|error)',
220 220 "explicit exit code checks unnecessary",
221 221 ),
222 222 (uprefix + r'set -e', "don't use set -e"),
223 223 (uprefix + r'(\s|fi\b|done\b)', "use > for continued lines"),
224 224 (
225 225 uprefix + r'.*:\.\S*/',
226 226 "x:.y in a path does not work on msys, rewrite "
227 227 "as x://.y, or see `hg log -k msys` for alternatives",
228 228 r'-\S+:\.|' '# no-msys', # -Rxxx
229 229 ), # in test-pull.t which is skipped on windows
230 230 (
231 231 r'^ [^$>].*27\.0\.0\.1',
232 232 'use $LOCALIP not an explicit loopback address',
233 233 ),
234 234 (
235 235 r'^ (?![>$] ).*\$LOCALIP.*[^)]$',
236 236 'mark $LOCALIP output lines with (glob) to help tests in BSD jails',
237 237 ),
238 238 (
239 239 r'^ (cat|find): .*: \$ENOENT\$',
240 240 'use test -f to test for file existence',
241 241 ),
242 242 (
243 243 r'^ diff -[^ -]*p',
244 244 "don't use (external) diff with -p for portability",
245 245 ),
246 246 (r' readlink ', 'use readlink.py instead of readlink'),
247 247 (
248 248 r'^ [-+][-+][-+] .* [-+]0000 \(glob\)',
249 249 "glob timezone field in diff output for portability",
250 250 ),
251 251 (
252 252 r'^ @@ -[0-9]+ [+][0-9]+,[0-9]+ @@',
253 253 "use '@@ -N* +N,n @@ (glob)' style chunk header for portability",
254 254 ),
255 255 (
256 256 r'^ @@ -[0-9]+,[0-9]+ [+][0-9]+ @@',
257 257 "use '@@ -N,n +N* @@ (glob)' style chunk header for portability",
258 258 ),
259 259 (
260 260 r'^ @@ -[0-9]+ [+][0-9]+ @@',
261 261 "use '@@ -N* +N* @@ (glob)' style chunk header for portability",
262 262 ),
263 263 (
264 264 uprefix + r'hg( +-[^ ]+( +[^ ]+)?)* +extdiff'
265 265 r'( +(-[^ po-]+|--(?!program|option)[^ ]+|[^-][^ ]*))*$',
266 266 "use $RUNTESTDIR/pdiff via extdiff (or -o/-p for false-positives)",
267 267 ),
268 268 ],
269 269 # warnings
270 270 [
271 271 (
272 272 r'^ (?!.*\$LOCALIP)[^*?/\n]* \(glob\)$',
273 273 "glob match with no glob string (?, *, /, and $LOCALIP)",
274 274 ),
275 275 ],
276 276 ]
277 277
278 278 # transform plain test rules to unified test's
279 279 for i in [0, 1]:
280 280 for tp in testpats[i]:
281 281 p = tp[0]
282 282 m = tp[1]
283 283 if p.startswith('^'):
284 284 p = "^ [$>] (%s)" % p[1:]
285 285 else:
286 286 p = "^ [$>] .*(%s)" % p
287 287 utestpats[i].append((p, m) + tp[2:])
288 288
289 289 # don't transform the following rules:
290 290 # " > \t" and " \t" should be allowed in unified tests
291 291 testpats[0].append((r'^( *)\t', "don't use tabs to indent"))
292 292 utestpats[0].append((r'^( ?)\t', "don't use tabs to indent"))
293 293
294 294 utestfilters = [
295 295 (r"<<(\S+)((.|\n)*?\n > \1)", rephere),
296 296 (r"( +)(#([^!][^\n]*\S)?)", repcomment),
297 297 ]
298 298
299 299 # common patterns to check *.py
300 300 commonpypats = [
301 301 [
302 302 (r'\\$', 'Use () to wrap long lines in Python, not \\'),
303 303 (
304 304 r'^\s*def\s*\w+\s*\(.*,\s*\(',
305 305 "tuple parameter unpacking not available in Python 3+",
306 306 ),
307 307 (
308 308 r'lambda\s*\(.*,.*\)',
309 309 "tuple parameter unpacking not available in Python 3+",
310 310 ),
311 311 (r'(?<!def)\s+(cmp)\(', "cmp is not available in Python 3+"),
312 312 (r'(?<!\.)\breduce\s*\(.*', "reduce is not available in Python 3+"),
313 313 (
314 314 r'\bdict\(.*=',
315 315 'dict() is different in Py2 and 3 and is slower than {}',
316 316 'dict-from-generator',
317 317 ),
318 318 (r'\.has_key\b', "dict.has_key is not available in Python 3+"),
319 319 (r'\s<>\s', '<> operator is not available in Python 3+, use !='),
320 320 (r'^\s*\t', "don't use tabs"),
321 321 (r'\S;\s*\n', "semicolon"),
322 322 (r'[^_]_\([ \t\n]*(?:"[^"]+"[ \t\n+]*)+%', "don't use % inside _()"),
323 323 (r"[^_]_\([ \t\n]*(?:'[^']+'[ \t\n+]*)+%", "don't use % inside _()"),
324 324 (r'(\w|\)),\w', "missing whitespace after ,"),
325 325 (r'(\w|\))[+/*\-<>]\w', "missing whitespace in expression"),
326 326 (r'\w\s=\s\s+\w', "gratuitous whitespace after ="),
327 327 (
328 328 (
329 329 # a line ending with a colon, potentially with trailing comments
330 330 r':([ \t]*#[^\n]*)?\n'
331 331 # one that is not a pass and not only a comment
332 332 r'(?P<indent>[ \t]+)[^#][^\n]+\n'
333 333 # more lines at the same indent level
334 334 r'((?P=indent)[^\n]+\n)*'
335 335 # a pass at the same indent level, which is bogus
336 336 r'(?P=indent)pass[ \t\n#]'
337 337 ),
338 338 'omit superfluous pass',
339 339 ),
340 340 (r'[^\n]\Z', "no trailing newline"),
341 341 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
342 342 (
343 343 r'^\s*(if|while|def|class|except|try)\s[^[\n]*:\s*[^\\n]#\s]+',
344 344 "linebreak after :",
345 345 ),
346 346 (
347 347 r'class\s[^( \n]+:',
348 348 "old-style class, use class foo(object)",
349 349 r'#.*old-style',
350 350 ),
351 351 (
352 352 r'class\s[^( \n]+\(\):',
353 353 "class foo() creates old style object, use class foo(object)",
354 354 r'#.*old-style',
355 355 ),
356 356 (
357 357 r'\b(%s)\('
358 358 % '|'.join(k for k in keyword.kwlist if k not in ('print', 'exec')),
359 359 "Python keyword is not a function",
360 360 ),
361 361 # (r'class\s[A-Z][^\(]*\((?!Exception)',
362 362 # "don't capitalize non-exception classes"),
363 363 # (r'in range\(', "use xrange"),
364 364 # (r'^\s*print\s+', "avoid using print in core and extensions"),
365 365 (r'[\x80-\xff]', "non-ASCII character literal"),
366 366 (r'("\')\.format\(', "str.format() has no bytes counterpart, use %"),
367 367 (
368 368 r'([\(\[][ \t]\S)|(\S[ \t][\)\]])',
369 369 "gratuitous whitespace in () or []",
370 370 ),
371 371 # (r'\s\s=', "gratuitous whitespace before ="),
372 372 (
373 373 r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
374 374 "missing whitespace around operator",
375 375 ),
376 376 (
377 377 r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\s',
378 378 "missing whitespace around operator",
379 379 ),
380 380 (
381 381 r'\s(\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
382 382 "missing whitespace around operator",
383 383 ),
384 384 (r'[^^+=*/!<>&| %-](\s=|=\s)[^= ]', "wrong whitespace around ="),
385 385 (
386 386 r'\([^()]*( =[^=]|[^<>!=]= )',
387 387 "no whitespace around = for named parameters",
388 388 ),
389 389 (
390 390 r'raise [^,(]+, (\([^\)]+\)|[^,\(\)]+)$',
391 391 "don't use old-style two-argument raise, use Exception(message)",
392 392 ),
393 393 (r' is\s+(not\s+)?["\'0-9-]', "object comparison with literal"),
394 394 (
395 395 r' [=!]=\s+(True|False|None)',
396 396 "comparison with singleton, use 'is' or 'is not' instead",
397 397 ),
398 398 (
399 399 r'^\s*(while|if) [01]:',
400 400 "use True/False for constant Boolean expression",
401 401 ),
402 402 (r'^\s*if False(:| +and)', 'Remove code instead of using `if False`'),
403 403 (
404 404 r'(?:(?<!def)\s+|\()hasattr\(',
405 405 'hasattr(foo, bar) is broken on py2, use util.safehasattr(foo, bar) '
406 406 'instead',
407 407 r'#.*hasattr-py3-only',
408 408 ),
409 409 (r'opener\([^)]*\).read\(', "use opener.read() instead"),
410 410 (r'opener\([^)]*\).write\(', "use opener.write() instead"),
411 411 (r'(?i)descend[e]nt', "the proper spelling is descendAnt"),
412 412 (r'\.debug\(\_', "don't mark debug messages for translation"),
413 413 (r'\.strip\(\)\.split\(\)', "no need to strip before splitting"),
414 414 (r'^\s*except\s*:', "naked except clause", r'#.*re-raises'),
415 415 (
416 416 r'^\s*except\s([^\(,]+|\([^\)]+\))\s*,',
417 417 'legacy exception syntax; use "as" instead of ","',
418 418 ),
419 419 (r'release\(.*wlock, .*lock\)', "wrong lock release order"),
420 420 (r'\bdef\s+__bool__\b', "__bool__ should be __nonzero__ in Python 2"),
421 421 (
422 422 r'os\.path\.join\(.*, *(""|\'\')\)',
423 423 "use pathutil.normasprefix(path) instead of os.path.join(path, '')",
424 424 ),
425 425 (r'\s0[0-7]+\b', 'legacy octal syntax; use "0o" prefix instead of "0"'),
426 426 # XXX only catch mutable arguments on the first line of the definition
427 427 (r'def.*[( ]\w+=\{\}', "don't use mutable default arguments"),
428 428 (r'\butil\.Abort\b', "directly use error.Abort"),
429 429 (
430 430 r'^@(\w*\.)?cachefunc',
431 431 "module-level @cachefunc is risky, please avoid",
432 432 ),
433 433 (
434 434 r'^import Queue',
435 435 "don't use Queue, use pycompat.queue.Queue + "
436 436 "pycompat.queue.Empty",
437 437 ),
438 438 (
439 439 r'^import cStringIO',
440 440 "don't use cStringIO.StringIO, use util.stringio",
441 441 ),
442 442 (r'^import urllib', "don't use urllib, use util.urlreq/util.urlerr"),
443 443 (
444 444 r'^import SocketServer',
445 445 "don't use SockerServer, use util.socketserver",
446 446 ),
447 447 (r'^import urlparse', "don't use urlparse, use util.urlreq"),
448 448 (r'^import xmlrpclib', "don't use xmlrpclib, use util.xmlrpclib"),
449 449 (r'^import cPickle', "don't use cPickle, use util.pickle"),
450 450 (r'^import pickle', "don't use pickle, use util.pickle"),
451 451 (r'^import httplib', "don't use httplib, use util.httplib"),
452 452 (r'^import BaseHTTPServer', "use util.httpserver instead"),
453 453 (
454 454 r'^(from|import) mercurial\.(cext|pure|cffi)',
455 455 "use mercurial.policy.importmod instead",
456 456 ),
457 457 (r'\.next\(\)', "don't use .next(), use next(...)"),
458 458 (
459 459 r'([a-z]*).revision\(\1\.node\(',
460 460 "don't convert rev to node before passing to revision(nodeorrev)",
461 461 ),
462 462 (r'platform\.system\(\)', "don't use platform.system(), use pycompat"),
463 463 ],
464 464 # warnings
465 465 [],
466 466 ]
467 467
468 468 # patterns to check normal *.py files
469 469 pypats = [
470 470 [
471 471 # Ideally, these should be placed in "commonpypats" for
472 472 # consistency of coding rules in Mercurial source tree.
473 473 # But on the other hand, these are not so seriously required for
474 474 # python code fragments embedded in test scripts. Fixing test
475 475 # scripts for these patterns requires many changes, and has less
476 476 # profit than effort.
477 477 (r'raise Exception', "don't raise generic exceptions"),
478 478 (r'[\s\(](open|file)\([^)]*\)\.read\(', "use util.readfile() instead"),
479 479 (
480 480 r'[\s\(](open|file)\([^)]*\)\.write\(',
481 481 "use util.writefile() instead",
482 482 ),
483 483 (
484 484 r'^[\s\(]*(open(er)?|file)\([^)]*\)(?!\.close\(\))',
485 485 "always assign an opened file to a variable, and close it afterwards",
486 486 ),
487 487 (
488 488 r'[\s\(](open|file)\([^)]*\)\.(?!close\(\))',
489 489 "always assign an opened file to a variable, and close it afterwards",
490 490 ),
491 491 (r':\n( )*( ){1,3}[^ ]', "must indent 4 spaces"),
492 492 (r'^import atexit', "don't use atexit, use ui.atexit"),
493 493 # rules depending on implementation of repquote()
494 494 (
495 495 r' x+[xpqo%APM][\'"]\n\s+[\'"]x',
496 496 'string join across lines with no space',
497 497 ),
498 498 (
499 499 r'''(?x)ui\.(status|progress|write|note|warn)\(
500 500 [ \t\n#]*
501 501 (?# any strings/comments might precede a string, which
502 502 # contains translatable message)
503 503 b?((['"]|\'\'\'|""")[ \npq%bAPMxno]*(['"]|\'\'\'|""")[ \t\n#]+)*
504 504 (?# sequence consisting of below might precede translatable message
505 505 # - formatting string: "% 10s", "%05d", "% -3.2f", "%*s", "%%" ...
506 506 # - escaped character: "\\", "\n", "\0" ...
507 507 # - character other than '%', 'b' as '\', and 'x' as alphabet)
508 508 (['"]|\'\'\'|""")
509 509 ((%([ n]?[PM]?([np]+|A))?x)|%%|b[bnx]|[ \nnpqAPMo])*x
510 510 (?# this regexp can't use [^...] style,
511 511 # because _preparepats forcibly adds "\n" into [^...],
512 512 # even though this regexp wants match it against "\n")''',
513 513 "missing _() in ui message (use () to hide false-positives)",
514 514 ),
515 515 ]
516 516 + commonpypats[0],
517 517 # warnings
518 518 [
519 519 # rules depending on implementation of repquote()
520 520 (r'(^| )pp +xxxxqq[ \n][^\n]', "add two newlines after '.. note::'"),
521 521 ]
522 522 + commonpypats[1],
523 523 ]
524 524
525 525 # patterns to check *.py for embedded ones in test script
526 526 embeddedpypats = [
527 527 [] + commonpypats[0],
528 528 # warnings
529 529 [] + commonpypats[1],
530 530 ]
531 531
532 532 # common filters to convert *.py
533 533 commonpyfilters = [
534 534 (
535 535 r"""(?msx)(?P<comment>\#.*?$)|
536 536 ((?P<quote>('''|\"\"\"|(?<!')'(?!')|(?<!")"(?!")))
537 537 (?P<text>(([^\\]|\\.)*?))
538 538 (?P=quote))""",
539 539 reppython,
540 540 ),
541 541 ]
542 542
543 543 # pattern only for mercurial and extensions
544 544 core_py_pats = [
545 545 [
546 546 # Windows tend to get confused about capitalization of the drive letter
547 547 #
548 548 # see mercurial.windows.abspath for details
549 549 (
550 550 r'os\.path\.abspath',
551 551 "use util.abspath instead (windows)",
552 552 r'#.*re-exports',
553 553 ),
554 554 ],
555 555 # warnings
556 556 [],
557 557 ]
558 558
559 559 # filters to convert normal *.py files
560 560 pyfilters = [] + commonpyfilters
561 561
562 562 # non-filter patterns
563 563 pynfpats = [
564 564 [
565 565 (r'pycompat\.osname\s*[=!]=\s*[\'"]nt[\'"]', "use pycompat.iswindows"),
566 566 (r'pycompat\.osname\s*[=!]=\s*[\'"]posix[\'"]', "use pycompat.isposix"),
567 567 (
568 568 r'pycompat\.sysplatform\s*[!=]=\s*[\'"]darwin[\'"]',
569 569 "use pycompat.isdarwin",
570 570 ),
571 571 ],
572 572 # warnings
573 573 [],
574 574 ]
575 575
576 576 # filters to convert *.py for embedded ones in test script
577 577 embeddedpyfilters = [] + commonpyfilters
578 578
579 579 # extension non-filter patterns
580 580 pyextnfpats = [
581 581 [(r'^"""\n?[A-Z]', "don't capitalize docstring title")],
582 582 # warnings
583 583 [],
584 584 ]
585 585
586 586 txtfilters = []
587 587
588 588 txtpats = [
589 589 [
590 590 (r'\s$', 'trailing whitespace'),
591 591 ('.. note::[ \n][^\n]', 'add two newlines after note::'),
592 592 ],
593 593 [],
594 594 ]
595 595
596 596 cpats = [
597 597 [
598 598 (r'//', "don't use //-style comments"),
599 599 (r'\S\t', "don't use tabs except for indent"),
600 600 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
601 601 (r'(while|if|do|for)\(', "use space after while/if/do/for"),
602 602 (r'return\(', "return is not a function"),
603 603 (r' ;', "no space before ;"),
604 604 (r'[^;] \)', "no space before )"),
605 605 (r'[)][{]', "space between ) and {"),
606 606 (r'\w+\* \w+', "use int *foo, not int* foo"),
607 607 (r'\W\([^\)]+\) \w+', "use (int)foo, not (int) foo"),
608 608 (r'\w+ (\+\+|--)', "use foo++, not foo ++"),
609 609 (r'\w,\w', "missing whitespace after ,"),
610 610 (r'^[^#]\w[+/*]\w', "missing whitespace in expression"),
611 611 (r'\w\s=\s\s+\w', "gratuitous whitespace after ="),
612 612 (r'^#\s+\w', "use #foo, not # foo"),
613 613 (r'[^\n]\Z', "no trailing newline"),
614 614 (r'^\s*#import\b', "use only #include in standard C code"),
615 615 (r'strcpy\(', "don't use strcpy, use strlcpy or memcpy"),
616 616 (r'strcat\(', "don't use strcat"),
617 617 # rules depending on implementation of repquote()
618 618 ],
619 619 # warnings
620 620 [
621 621 # rules depending on implementation of repquote()
622 622 ],
623 623 ]
624 624
625 625 cfilters = [
626 626 (r'(/\*)(((\*(?!/))|[^*])*)\*/', repccomment),
627 627 (r'''(?P<quote>(?<!")")(?P<text>([^"]|\\")+)"(?!")''', repquote),
628 628 (r'''(#\s*include\s+<)([^>]+)>''', repinclude),
629 629 (r'(\()([^)]+\))', repcallspaces),
630 630 ]
631 631
632 632 inutilpats = [
633 633 [
634 634 (r'\bui\.', "don't use ui in util"),
635 635 ],
636 636 # warnings
637 637 [],
638 638 ]
639 639
640 640 inrevlogpats = [
641 641 [
642 642 (r'\brepo\.', "don't use repo in revlog"),
643 643 ],
644 644 # warnings
645 645 [],
646 646 ]
647 647
648 648 webtemplatefilters = []
649 649
650 650 webtemplatepats = [
651 651 [],
652 652 [
653 653 (
654 654 r'{desc(\|(?!websub|firstline)[^\|]*)+}',
655 655 'follow desc keyword with either firstline or websub',
656 656 ),
657 657 ],
658 658 ]
659 659
660 660 allfilesfilters = []
661 661
662 662 allfilespats = [
663 663 [
664 664 (
665 665 r'(http|https)://[a-zA-Z0-9./]*selenic.com/',
666 666 'use mercurial-scm.org domain URL',
667 667 ),
668 668 (
669 669 r'mercurial@selenic\.com',
670 670 'use mercurial-scm.org domain for mercurial ML address',
671 671 ),
672 672 (
673 673 r'mercurial-devel@selenic\.com',
674 674 'use mercurial-scm.org domain for mercurial-devel ML address',
675 675 ),
676 676 ],
677 677 # warnings
678 678 [],
679 679 ]
680 680
681 681 py3pats = [
682 682 [
683 683 (
684 684 r'os\.environ',
685 685 "use encoding.environ instead (py3)",
686 686 r'#.*re-exports',
687 687 ),
688 688 (r'os\.name', "use pycompat.osname instead (py3)"),
689 689 (r'os\.getcwd', "use encoding.getcwd instead (py3)", r'#.*re-exports'),
690 690 (r'os\.sep', "use pycompat.ossep instead (py3)"),
691 691 (r'os\.pathsep', "use pycompat.ospathsep instead (py3)"),
692 692 (r'os\.altsep', "use pycompat.osaltsep instead (py3)"),
693 693 (r'sys\.platform', "use pycompat.sysplatform instead (py3)"),
694 694 (r'getopt\.getopt', "use pycompat.getoptb instead (py3)"),
695 695 (r'os\.getenv', "use encoding.environ.get instead"),
696 696 (r'os\.setenv', "modifying the environ dict is not preferred"),
697 697 (r'(?<!pycompat\.)xrange', "use pycompat.xrange instead (py3)"),
698 698 ],
699 699 # warnings
700 700 [],
701 701 ]
702 702
703 703 checks = [
704 704 ('python', r'.*\.(py|cgi)$', r'^#!.*python', pyfilters, pypats),
705 705 ('python', r'.*\.(py|cgi)$', r'^#!.*python', [], pynfpats),
706 706 ('python', r'.*hgext.*\.py$', '', [], pyextnfpats),
707 707 (
708 708 'python 3',
709 709 r'.*(hgext|mercurial)/(?!demandimport|policy|pycompat).*\.py',
710 710 '',
711 711 pyfilters,
712 712 py3pats,
713 713 ),
714 714 (
715 715 'core files',
716 716 r'.*(hgext|mercurial)/(?!demandimport|policy|pycompat).*\.py',
717 717 '',
718 718 pyfilters,
719 719 core_py_pats,
720 720 ),
721 721 ('test script', r'(.*/)?test-[^.~]*$', '', testfilters, testpats),
722 722 ('c', r'.*\.[ch]$', '', cfilters, cpats),
723 723 ('unified test', r'.*\.t$', '', utestfilters, utestpats),
724 724 (
725 725 'layering violation repo in revlog',
726 726 r'mercurial/revlog\.py',
727 727 '',
728 728 pyfilters,
729 729 inrevlogpats,
730 730 ),
731 731 (
732 732 'layering violation ui in util',
733 733 r'mercurial/util\.py',
734 734 '',
735 735 pyfilters,
736 736 inutilpats,
737 737 ),
738 738 ('txt', r'.*\.txt$', '', txtfilters, txtpats),
739 739 (
740 740 'web template',
741 741 r'mercurial/templates/.*\.tmpl',
742 742 '',
743 743 webtemplatefilters,
744 744 webtemplatepats,
745 745 ),
746 746 ('all except for .po', r'.*(?<!\.po)$', '', allfilesfilters, allfilespats),
747 747 ]
748 748
749 749 # (desc,
750 750 # func to pick up embedded code fragments,
751 751 # list of patterns to convert target files
752 752 # list of patterns to detect errors/warnings)
753 753 embeddedchecks = [
754 754 (
755 755 'embedded python',
756 756 testparseutil.pyembedded,
757 757 embeddedpyfilters,
758 758 embeddedpypats,
759 759 )
760 760 ]
761 761
762 762
763 763 def _preparepats():
764 764 def preparefailandwarn(failandwarn):
765 765 for pats in failandwarn:
766 766 for i, pseq in enumerate(pats):
767 767 # fix-up regexes for multi-line searches
768 768 p = pseq[0]
769 769 # \s doesn't match \n (done in two steps)
770 770 # first, we replace \s that appears in a set already
771 771 p = re.sub(r'\[\\s', r'[ \\t', p)
772 772 # now we replace other \s instances.
773 773 p = re.sub(r'(?<!(\\|\[))\\s', r'[ \\t]', p)
774 774 # [^...] doesn't match newline
775 775 p = re.sub(r'(?<!\\)\[\^', r'[^\\n', p)
776 776
777 777 pats[i] = (re.compile(p, re.MULTILINE),) + pseq[1:]
778 778
779 779 def preparefilters(filters):
780 780 for i, flt in enumerate(filters):
781 781 filters[i] = re.compile(flt[0]), flt[1]
782 782
783 783 for cs in (checks, embeddedchecks):
784 784 for c in cs:
785 785 failandwarn = c[-1]
786 786 preparefailandwarn(failandwarn)
787 787
788 788 filters = c[-2]
789 789 preparefilters(filters)
790 790
791 791
792 792 class norepeatlogger(object):
793 793 def __init__(self):
794 794 self._lastseen = None
795 795
796 796 def log(self, fname, lineno, line, msg, blame):
797 797 """print error related a to given line of a given file.
798 798
799 799 The faulty line will also be printed but only once in the case
800 800 of multiple errors.
801 801
802 802 :fname: filename
803 803 :lineno: line number
804 804 :line: actual content of the line
805 805 :msg: error message
806 806 """
807 807 msgid = fname, lineno, line
808 808 if msgid != self._lastseen:
809 809 if blame:
810 810 print("%s:%d (%s):" % (fname, lineno, blame))
811 811 else:
812 812 print("%s:%d:" % (fname, lineno))
813 813 print(" > %s" % line)
814 814 self._lastseen = msgid
815 815 print(" " + msg)
816 816
817 817
818 818 _defaultlogger = norepeatlogger()
819 819
820 820
821 821 def getblame(f):
822 822 lines = []
823 823 for l in os.popen('hg annotate -un %s' % f):
824 824 start, line = l.split(':', 1)
825 825 user, rev = start.split()
826 826 lines.append((line[1:-1], user, rev))
827 827 return lines
828 828
829 829
830 830 def checkfile(
831 831 f,
832 832 logfunc=_defaultlogger.log,
833 833 maxerr=None,
834 834 warnings=False,
835 835 blame=False,
836 836 debug=False,
837 837 lineno=True,
838 838 ):
839 839 """checks style and portability of a given file
840 840
841 841 :f: filepath
842 842 :logfunc: function used to report error
843 843 logfunc(filename, linenumber, linecontent, errormessage)
844 844 :maxerr: number of error to display before aborting.
845 845 Set to false (default) to report all errors
846 846
847 847 return True if no error is found, False otherwise.
848 848 """
849 849 result = True
850 850
851 851 try:
852 852 with opentext(f) as fp:
853 853 try:
854 854 pre = fp.read()
855 855 except UnicodeDecodeError as e:
856 856 print("%s while reading %s" % (e, f))
857 857 return result
858 858 except IOError as e:
859 859 print("Skipping %s, %s" % (f, str(e).split(':', 1)[0]))
860 860 return result
861 861
862 862 # context information shared while single checkfile() invocation
863 863 context = {'blamecache': None}
864 864
865 865 for name, match, magic, filters, pats in checks:
866 866 if debug:
867 867 print(name, f)
868 868 if not (re.match(match, f) or (magic and re.search(magic, pre))):
869 869 if debug:
870 870 print(
871 871 "Skipping %s for %s it doesn't match %s" % (name, match, f)
872 872 )
873 873 continue
874 874 if "no-" "check-code" in pre:
875 875 # If you're looking at this line, it's because a file has:
876 876 # no- check- code
877 877 # but the reason to output skipping is to make life for
878 878 # tests easier. So, instead of writing it with a normal
879 879 # spelling, we write it with the expected spelling from
880 880 # tests/test-check-code.t
881 881 print("Skipping %s it has no-che?k-code (glob)" % f)
882 882 return "Skip" # skip checking this file
883 883
884 884 fc = _checkfiledata(
885 885 name,
886 886 f,
887 887 pre,
888 888 filters,
889 889 pats,
890 890 context,
891 891 logfunc,
892 892 maxerr,
893 893 warnings,
894 894 blame,
895 895 debug,
896 896 lineno,
897 897 )
898 898 if fc:
899 899 result = False
900 900
901 901 if f.endswith('.t') and "no-" "check-code" not in pre:
902 902 if debug:
903 903 print("Checking embedded code in %s" % f)
904 904
905 905 prelines = pre.splitlines()
906 906 embeddederros = []
907 907 for name, embedded, filters, pats in embeddedchecks:
908 908 # "reset curmax at each repetition" treats maxerr as "max
909 909 # nubmer of errors in an actual file per entry of
910 910 # (embedded)checks"
911 911 curmaxerr = maxerr
912 912
913 913 for found in embedded(f, prelines, embeddederros):
914 914 filename, starts, ends, code = found
915 915 fc = _checkfiledata(
916 916 name,
917 917 f,
918 918 code,
919 919 filters,
920 920 pats,
921 921 context,
922 922 logfunc,
923 923 curmaxerr,
924 924 warnings,
925 925 blame,
926 926 debug,
927 927 lineno,
928 928 offset=starts - 1,
929 929 )
930 930 if fc:
931 931 result = False
932 932 if curmaxerr:
933 933 if fc >= curmaxerr:
934 934 break
935 935 curmaxerr -= fc
936 936
937 937 return result
938 938
939 939
940 940 def _checkfiledata(
941 941 name,
942 942 f,
943 943 filedata,
944 944 filters,
945 945 pats,
946 946 context,
947 947 logfunc,
948 948 maxerr,
949 949 warnings,
950 950 blame,
951 951 debug,
952 952 lineno,
953 953 offset=None,
954 954 ):
955 955 """Execute actual error check for file data
956 956
957 957 :name: of the checking category
958 958 :f: filepath
959 959 :filedata: content of a file
960 960 :filters: to be applied before checking
961 961 :pats: to detect errors
962 962 :context: a dict of information shared while single checkfile() invocation
963 963 Valid keys: 'blamecache'.
964 964 :logfunc: function used to report error
965 965 logfunc(filename, linenumber, linecontent, errormessage)
966 966 :maxerr: number of error to display before aborting, or False to
967 967 report all errors
968 968 :warnings: whether warning level checks should be applied
969 969 :blame: whether blame information should be displayed at error reporting
970 970 :debug: whether debug information should be displayed
971 971 :lineno: whether lineno should be displayed at error reporting
972 972 :offset: line number offset of 'filedata' in 'f' for checking
973 973 an embedded code fragment, or None (offset=0 is different
974 974 from offset=None)
975 975
976 976 returns number of detected errors.
977 977 """
978 978 blamecache = context['blamecache']
979 979 if offset is None:
980 980 lineoffset = 0
981 981 else:
982 982 lineoffset = offset
983 983
984 984 fc = 0
985 985 pre = post = filedata
986 986
987 987 if True: # TODO: get rid of this redundant 'if' block
988 988 for p, r in filters:
989 989 post = re.sub(p, r, post)
990 990 nerrs = len(pats[0]) # nerr elements are errors
991 991 if warnings:
992 992 pats = pats[0] + pats[1]
993 993 else:
994 994 pats = pats[0]
995 995 # print post # uncomment to show filtered version
996 996
997 997 if debug:
998 998 print("Checking %s for %s" % (name, f))
999 999
1000 1000 prelines = None
1001 1001 errors = []
1002 1002 for i, pat in enumerate(pats):
1003 1003 if len(pat) == 3:
1004 1004 p, msg, ignore = pat
1005 1005 else:
1006 1006 p, msg = pat
1007 1007 ignore = None
1008 1008 if i >= nerrs:
1009 1009 msg = "warning: " + msg
1010 1010
1011 1011 pos = 0
1012 1012 n = 0
1013 1013 for m in p.finditer(post):
1014 1014 if prelines is None:
1015 1015 prelines = pre.splitlines()
1016 1016 postlines = post.splitlines(True)
1017 1017
1018 1018 start = m.start()
1019 1019 while n < len(postlines):
1020 1020 step = len(postlines[n])
1021 1021 if pos + step > start:
1022 1022 break
1023 1023 pos += step
1024 1024 n += 1
1025 1025 l = prelines[n]
1026 1026
1027 1027 if ignore and re.search(ignore, l, re.MULTILINE):
1028 1028 if debug:
1029 1029 print(
1030 1030 "Skipping %s for %s:%s (ignore pattern)"
1031 1031 % (name, f, (n + lineoffset))
1032 1032 )
1033 1033 continue
1034 1034 bd = ""
1035 1035 if blame:
1036 1036 bd = 'working directory'
1037 1037 if blamecache is None:
1038 1038 blamecache = getblame(f)
1039 1039 context['blamecache'] = blamecache
1040 1040 if (n + lineoffset) < len(blamecache):
1041 1041 bl, bu, br = blamecache[(n + lineoffset)]
1042 1042 if offset is None and bl == l:
1043 1043 bd = '%s@%s' % (bu, br)
1044 1044 elif offset is not None and bl.endswith(l):
1045 1045 # "offset is not None" means "checking
1046 1046 # embedded code fragment". In this case,
1047 1047 # "l" does not have information about the
1048 1048 # beginning of an *original* line in the
1049 1049 # file (e.g. ' > ').
1050 1050 # Therefore, use "str.endswith()", and
1051 1051 # show "maybe" for a little loose
1052 1052 # examination.
1053 1053 bd = '%s@%s, maybe' % (bu, br)
1054 1054
1055 1055 errors.append((f, lineno and (n + lineoffset + 1), l, msg, bd))
1056 1056
1057 1057 errors.sort()
1058 1058 for e in errors:
1059 1059 logfunc(*e)
1060 1060 fc += 1
1061 1061 if maxerr and fc >= maxerr:
1062 1062 print(" (too many errors, giving up)")
1063 1063 break
1064 1064
1065 1065 return fc
1066 1066
1067 1067
1068 1068 def main():
1069 1069 parser = optparse.OptionParser("%prog [options] [files | -]")
1070 1070 parser.add_option(
1071 1071 "-w",
1072 1072 "--warnings",
1073 1073 action="store_true",
1074 1074 help="include warning-level checks",
1075 1075 )
1076 1076 parser.add_option(
1077 1077 "-p", "--per-file", type="int", help="max warnings per file"
1078 1078 )
1079 1079 parser.add_option(
1080 1080 "-b",
1081 1081 "--blame",
1082 1082 action="store_true",
1083 1083 help="use annotate to generate blame info",
1084 1084 )
1085 1085 parser.add_option(
1086 1086 "", "--debug", action="store_true", help="show debug information"
1087 1087 )
1088 1088 parser.add_option(
1089 1089 "",
1090 1090 "--nolineno",
1091 1091 action="store_false",
1092 1092 dest='lineno',
1093 1093 help="don't show line numbers",
1094 1094 )
1095 1095
1096 1096 parser.set_defaults(
1097 1097 per_file=15, warnings=False, blame=False, debug=False, lineno=True
1098 1098 )
1099 1099 (options, args) = parser.parse_args()
1100 1100
1101 1101 if len(args) == 0:
1102 1102 check = glob.glob("*")
1103 1103 elif args == ['-']:
1104 1104 # read file list from stdin
1105 1105 check = sys.stdin.read().splitlines()
1106 1106 else:
1107 1107 check = args
1108 1108
1109 1109 _preparepats()
1110 1110
1111 1111 ret = 0
1112 1112 for f in check:
1113 1113 if not checkfile(
1114 1114 f,
1115 1115 maxerr=options.per_file,
1116 1116 warnings=options.warnings,
1117 1117 blame=options.blame,
1118 1118 debug=options.debug,
1119 1119 lineno=options.lineno,
1120 1120 ):
1121 1121 ret = 1
1122 1122 return ret
1123 1123
1124 1124
1125 1125 if __name__ == "__main__":
1126 1126 sys.exit(main())
General Comments 0
You need to be logged in to leave comments. Login now