##// END OF EJS Templates
check-code: add a rules to catch os.path.abspath...
marmoute -
r48435:752109dc default
parent child Browse files
Show More
@@ -1,1108 +1,1131 b''
1 1 #!/usr/bin/env python3
2 2 #
3 3 # check-code - a style and portability checker for Mercurial
4 4 #
5 5 # Copyright 2010 Olivia Mackall <olivia@selenic.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """style and portability checker for Mercurial
11 11
12 12 when a rule triggers wrong, do one of the following (prefer one from top):
13 13 * do the work-around the rule suggests
14 14 * doublecheck that it is a false match
15 15 * improve the rule pattern
16 16 * add an ignore pattern to the rule (3rd arg) which matches your good line
17 17 (you can append a short comment and match this, like: #re-raises)
18 18 * change the pattern to a warning and list the exception in test-check-code-hg
19 19 * ONLY use no--check-code for skipping entire files from external sources
20 20 """
21 21
22 22 from __future__ import absolute_import, print_function
23 23 import glob
24 24 import keyword
25 25 import optparse
26 26 import os
27 27 import re
28 28 import sys
29 29
30 30 if sys.version_info[0] < 3:
31 31 opentext = open
32 32 else:
33 33
34 34 def opentext(f):
35 35 return open(f, encoding='latin1')
36 36
37 37
38 38 try:
39 39 xrange
40 40 except NameError:
41 41 xrange = range
42 42 try:
43 43 import re2
44 44 except ImportError:
45 45 re2 = None
46 46
47 47 import testparseutil
48 48
49 49
50 50 def compilere(pat, multiline=False):
51 51 if multiline:
52 52 pat = '(?m)' + pat
53 53 if re2:
54 54 try:
55 55 return re2.compile(pat)
56 56 except re2.error:
57 57 pass
58 58 return re.compile(pat)
59 59
60 60
61 61 # check "rules depending on implementation of repquote()" in each
62 62 # patterns (especially pypats), before changing around repquote()
63 63 _repquotefixedmap = {
64 64 ' ': ' ',
65 65 '\n': '\n',
66 66 '.': 'p',
67 67 ':': 'q',
68 68 '%': '%',
69 69 '\\': 'b',
70 70 '*': 'A',
71 71 '+': 'P',
72 72 '-': 'M',
73 73 }
74 74
75 75
76 76 def _repquoteencodechr(i):
77 77 if i > 255:
78 78 return 'u'
79 79 c = chr(i)
80 80 if c in _repquotefixedmap:
81 81 return _repquotefixedmap[c]
82 82 if c.isalpha():
83 83 return 'x'
84 84 if c.isdigit():
85 85 return 'n'
86 86 return 'o'
87 87
88 88
89 89 _repquotett = ''.join(_repquoteencodechr(i) for i in xrange(256))
90 90
91 91
92 92 def repquote(m):
93 93 t = m.group('text')
94 94 t = t.translate(_repquotett)
95 95 return m.group('quote') + t + m.group('quote')
96 96
97 97
98 98 def reppython(m):
99 99 comment = m.group('comment')
100 100 if comment:
101 101 l = len(comment.rstrip())
102 102 return "#" * l + comment[l:]
103 103 return repquote(m)
104 104
105 105
106 106 def repcomment(m):
107 107 return m.group(1) + "#" * len(m.group(2))
108 108
109 109
110 110 def repccomment(m):
111 111 t = re.sub(r"((?<=\n) )|\S", "x", m.group(2))
112 112 return m.group(1) + t + "*/"
113 113
114 114
115 115 def repcallspaces(m):
116 116 t = re.sub(r"\n\s+", "\n", m.group(2))
117 117 return m.group(1) + t
118 118
119 119
120 120 def repinclude(m):
121 121 return m.group(1) + "<foo>"
122 122
123 123
124 124 def rephere(m):
125 125 t = re.sub(r"\S", "x", m.group(2))
126 126 return m.group(1) + t
127 127
128 128
129 129 testpats = [
130 130 [
131 131 (r'\b(push|pop)d\b', "don't use 'pushd' or 'popd', use 'cd'"),
132 132 (r'\W\$?\(\([^\)\n]*\)\)', "don't use (()) or $(()), use 'expr'"),
133 133 (r'grep.*-q', "don't use 'grep -q', redirect to /dev/null"),
134 134 (r'(?<!hg )grep.* -a', "don't use 'grep -a', use in-line python"),
135 135 (r'sed.*-i', "don't use 'sed -i', use a temporary file"),
136 136 (r'\becho\b.*\\n', "don't use 'echo \\n', use printf"),
137 137 (r'echo -n', "don't use 'echo -n', use printf"),
138 138 (r'(^|\|\s*)\bwc\b[^|]*$\n(?!.*\(re\))', "filter wc output"),
139 139 (r'head -c', "don't use 'head -c', use 'dd'"),
140 140 (r'tail -n', "don't use the '-n' option to tail, just use '-<num>'"),
141 141 (r'sha1sum', "don't use sha1sum, use $TESTDIR/md5sum.py"),
142 142 (r'\bls\b.*-\w*R', "don't use 'ls -R', use 'find'"),
143 143 (r'printf.*[^\\]\\([1-9]|0\d)', r"don't use 'printf \NNN', use Python"),
144 144 (r'printf.*[^\\]\\x', "don't use printf \\x, use Python"),
145 145 (r'rm -rf \*', "don't use naked rm -rf, target a directory"),
146 146 (
147 147 r'\[[^\]]+==',
148 148 '[ foo == bar ] is a bashism, use [ foo = bar ] instead',
149 149 ),
150 150 (
151 151 r'(^|\|\s*)grep (-\w\s+)*[^|]*[(|]\w',
152 152 "use egrep for extended grep syntax",
153 153 ),
154 154 (r'(^|\|\s*)e?grep .*\\S', "don't use \\S in regular expression"),
155 155 (r'(?<!!)/bin/', "don't use explicit paths for tools"),
156 156 (r'#!.*/bash', "don't use bash in shebang, use sh"),
157 157 (r'[^\n]\Z', "no trailing newline"),
158 158 (r'export .*=', "don't export and assign at once"),
159 159 (r'^source\b', "don't use 'source', use '.'"),
160 160 (r'touch -d', "don't use 'touch -d', use 'touch -t' instead"),
161 161 (r'\bls +[^|\n-]+ +-', "options to 'ls' must come before filenames"),
162 162 (r'[^>\n]>\s*\$HGRCPATH', "don't overwrite $HGRCPATH, append to it"),
163 163 (r'^stop\(\)', "don't use 'stop' as a shell function name"),
164 164 (r'(\[|\btest\b).*-e ', "don't use 'test -e', use 'test -f'"),
165 165 (r'\[\[\s+[^\]]*\]\]', "don't use '[[ ]]', use '[ ]'"),
166 166 (r'^alias\b.*=', "don't use alias, use a function"),
167 167 (r'if\s*!', "don't use '!' to negate exit status"),
168 168 (r'/dev/u?random', "don't use entropy, use /dev/zero"),
169 169 (r'do\s*true;\s*done', "don't use true as loop body, use sleep 0"),
170 170 (
171 171 r'sed (-e )?\'(\d+|/[^/]*/)i(?!\\\n)',
172 172 "put a backslash-escaped newline after sed 'i' command",
173 173 ),
174 174 (r'^diff *-\w*[uU].*$\n(^ \$ |^$)', "prefix diff -u/-U with cmp"),
175 175 (r'^\s+(if)? diff *-\w*[uU]', "prefix diff -u/-U with cmp"),
176 176 (r'[\s="`\']python\s(?!bindings)', "don't use 'python', use '$PYTHON'"),
177 177 (r'seq ', "don't use 'seq', use $TESTDIR/seq.py"),
178 178 (r'\butil\.Abort\b', "directly use error.Abort"),
179 179 (r'\|&', "don't use |&, use 2>&1"),
180 180 (r'\w = +\w', "only one space after = allowed"),
181 181 (
182 182 r'\bsed\b.*[^\\]\\n',
183 183 "don't use 'sed ... \\n', use a \\ and a newline",
184 184 ),
185 185 (r'env.*-u', "don't use 'env -u VAR', use 'unset VAR'"),
186 186 (r'cp.* -r ', "don't use 'cp -r', use 'cp -R'"),
187 187 (r'grep.* -[ABC]', "don't use grep's context flags"),
188 188 (
189 189 r'find.*-printf',
190 190 "don't use 'find -printf', it doesn't exist on BSD find(1)",
191 191 ),
192 192 (r'\$RANDOM ', "don't use bash-only $RANDOM to generate random values"),
193 193 ],
194 194 # warnings
195 195 [
196 196 (r'^function', "don't use 'function', use old style"),
197 197 (r'^diff.*-\w*N', "don't use 'diff -N'"),
198 198 (r'\$PWD|\${PWD}', "don't use $PWD, use `pwd`"),
199 199 (r'^([^"\'\n]|("[^"\n]*")|(\'[^\'\n]*\'))*\^', "^ must be quoted"),
200 200 (r'kill (`|\$\()', "don't use kill, use killdaemons.py"),
201 201 ],
202 202 ]
203 203
204 204 testfilters = [
205 205 (r"( *)(#([^!][^\n]*\S)?)", repcomment),
206 206 (r"<<(\S+)((.|\n)*?\n\1)", rephere),
207 207 ]
208 208
209 209 uprefix = r"^ \$ "
210 210 utestpats = [
211 211 [
212 212 (r'^(\S.*|| [$>] \S.*)[ \t]\n', "trailing whitespace on non-output"),
213 213 (
214 214 uprefix + r'.*\|\s*sed[^|>\n]*\n',
215 215 "use regex test output patterns instead of sed",
216 216 ),
217 217 (uprefix + r'(true|exit 0)', "explicit zero exit unnecessary"),
218 218 (
219 219 uprefix + r'.*\|\| echo.*(fail|error)',
220 220 "explicit exit code checks unnecessary",
221 221 ),
222 222 (uprefix + r'set -e', "don't use set -e"),
223 223 (uprefix + r'(\s|fi\b|done\b)', "use > for continued lines"),
224 224 (
225 225 uprefix + r'.*:\.\S*/',
226 226 "x:.y in a path does not work on msys, rewrite "
227 227 "as x://.y, or see `hg log -k msys` for alternatives",
228 228 r'-\S+:\.|' '# no-msys', # -Rxxx
229 229 ), # in test-pull.t which is skipped on windows
230 230 (
231 231 r'^ [^$>].*27\.0\.0\.1',
232 232 'use $LOCALIP not an explicit loopback address',
233 233 ),
234 234 (
235 235 r'^ (?![>$] ).*\$LOCALIP.*[^)]$',
236 236 'mark $LOCALIP output lines with (glob) to help tests in BSD jails',
237 237 ),
238 238 (
239 239 r'^ (cat|find): .*: \$ENOENT\$',
240 240 'use test -f to test for file existence',
241 241 ),
242 242 (
243 243 r'^ diff -[^ -]*p',
244 244 "don't use (external) diff with -p for portability",
245 245 ),
246 246 (r' readlink ', 'use readlink.py instead of readlink'),
247 247 (
248 248 r'^ [-+][-+][-+] .* [-+]0000 \(glob\)',
249 249 "glob timezone field in diff output for portability",
250 250 ),
251 251 (
252 252 r'^ @@ -[0-9]+ [+][0-9]+,[0-9]+ @@',
253 253 "use '@@ -N* +N,n @@ (glob)' style chunk header for portability",
254 254 ),
255 255 (
256 256 r'^ @@ -[0-9]+,[0-9]+ [+][0-9]+ @@',
257 257 "use '@@ -N,n +N* @@ (glob)' style chunk header for portability",
258 258 ),
259 259 (
260 260 r'^ @@ -[0-9]+ [+][0-9]+ @@',
261 261 "use '@@ -N* +N* @@ (glob)' style chunk header for portability",
262 262 ),
263 263 (
264 264 uprefix + r'hg( +-[^ ]+( +[^ ]+)?)* +extdiff'
265 265 r'( +(-[^ po-]+|--(?!program|option)[^ ]+|[^-][^ ]*))*$',
266 266 "use $RUNTESTDIR/pdiff via extdiff (or -o/-p for false-positives)",
267 267 ),
268 268 ],
269 269 # warnings
270 270 [
271 271 (
272 272 r'^ (?!.*\$LOCALIP)[^*?/\n]* \(glob\)$',
273 273 "glob match with no glob string (?, *, /, and $LOCALIP)",
274 274 ),
275 275 ],
276 276 ]
277 277
278 278 # transform plain test rules to unified test's
279 279 for i in [0, 1]:
280 280 for tp in testpats[i]:
281 281 p = tp[0]
282 282 m = tp[1]
283 283 if p.startswith('^'):
284 284 p = "^ [$>] (%s)" % p[1:]
285 285 else:
286 286 p = "^ [$>] .*(%s)" % p
287 287 utestpats[i].append((p, m) + tp[2:])
288 288
289 289 # don't transform the following rules:
290 290 # " > \t" and " \t" should be allowed in unified tests
291 291 testpats[0].append((r'^( *)\t', "don't use tabs to indent"))
292 292 utestpats[0].append((r'^( ?)\t', "don't use tabs to indent"))
293 293
294 294 utestfilters = [
295 295 (r"<<(\S+)((.|\n)*?\n > \1)", rephere),
296 296 (r"( +)(#([^!][^\n]*\S)?)", repcomment),
297 297 ]
298 298
299 299 # common patterns to check *.py
300 300 commonpypats = [
301 301 [
302 302 (r'\\$', 'Use () to wrap long lines in Python, not \\'),
303 303 (
304 304 r'^\s*def\s*\w+\s*\(.*,\s*\(',
305 305 "tuple parameter unpacking not available in Python 3+",
306 306 ),
307 307 (
308 308 r'lambda\s*\(.*,.*\)',
309 309 "tuple parameter unpacking not available in Python 3+",
310 310 ),
311 311 (r'(?<!def)\s+(cmp)\(', "cmp is not available in Python 3+"),
312 312 (r'(?<!\.)\breduce\s*\(.*', "reduce is not available in Python 3+"),
313 313 (
314 314 r'\bdict\(.*=',
315 315 'dict() is different in Py2 and 3 and is slower than {}',
316 316 'dict-from-generator',
317 317 ),
318 318 (r'\.has_key\b', "dict.has_key is not available in Python 3+"),
319 319 (r'\s<>\s', '<> operator is not available in Python 3+, use !='),
320 320 (r'^\s*\t', "don't use tabs"),
321 321 (r'\S;\s*\n', "semicolon"),
322 322 (r'[^_]_\([ \t\n]*(?:"[^"]+"[ \t\n+]*)+%', "don't use % inside _()"),
323 323 (r"[^_]_\([ \t\n]*(?:'[^']+'[ \t\n+]*)+%", "don't use % inside _()"),
324 324 (r'(\w|\)),\w', "missing whitespace after ,"),
325 325 (r'(\w|\))[+/*\-<>]\w', "missing whitespace in expression"),
326 326 (r'\w\s=\s\s+\w', "gratuitous whitespace after ="),
327 327 (
328 328 (
329 329 # a line ending with a colon, potentially with trailing comments
330 330 r':([ \t]*#[^\n]*)?\n'
331 331 # one that is not a pass and not only a comment
332 332 r'(?P<indent>[ \t]+)[^#][^\n]+\n'
333 333 # more lines at the same indent level
334 334 r'((?P=indent)[^\n]+\n)*'
335 335 # a pass at the same indent level, which is bogus
336 336 r'(?P=indent)pass[ \t\n#]'
337 337 ),
338 338 'omit superfluous pass',
339 339 ),
340 340 (r'[^\n]\Z', "no trailing newline"),
341 341 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
342 342 (
343 343 r'^\s+(self\.)?[A-Za-z][a-z0-9]+[A-Z]\w* = ',
344 344 "don't use camelcase in identifiers",
345 345 r'#.*camelcase-required',
346 346 ),
347 347 (
348 348 r'^\s*(if|while|def|class|except|try)\s[^[\n]*:\s*[^\\n]#\s]+',
349 349 "linebreak after :",
350 350 ),
351 351 (
352 352 r'class\s[^( \n]+:',
353 353 "old-style class, use class foo(object)",
354 354 r'#.*old-style',
355 355 ),
356 356 (
357 357 r'class\s[^( \n]+\(\):',
358 358 "class foo() creates old style object, use class foo(object)",
359 359 r'#.*old-style',
360 360 ),
361 361 (
362 362 r'\b(%s)\('
363 363 % '|'.join(k for k in keyword.kwlist if k not in ('print', 'exec')),
364 364 "Python keyword is not a function",
365 365 ),
366 366 # (r'class\s[A-Z][^\(]*\((?!Exception)',
367 367 # "don't capitalize non-exception classes"),
368 368 # (r'in range\(', "use xrange"),
369 369 # (r'^\s*print\s+', "avoid using print in core and extensions"),
370 370 (r'[\x80-\xff]', "non-ASCII character literal"),
371 371 (r'("\')\.format\(', "str.format() has no bytes counterpart, use %"),
372 372 (
373 373 r'([\(\[][ \t]\S)|(\S[ \t][\)\]])',
374 374 "gratuitous whitespace in () or []",
375 375 ),
376 376 # (r'\s\s=', "gratuitous whitespace before ="),
377 377 (
378 378 r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
379 379 "missing whitespace around operator",
380 380 ),
381 381 (
382 382 r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\s',
383 383 "missing whitespace around operator",
384 384 ),
385 385 (
386 386 r'\s(\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
387 387 "missing whitespace around operator",
388 388 ),
389 389 (r'[^^+=*/!<>&| %-](\s=|=\s)[^= ]', "wrong whitespace around ="),
390 390 (
391 391 r'\([^()]*( =[^=]|[^<>!=]= )',
392 392 "no whitespace around = for named parameters",
393 393 ),
394 394 (
395 395 r'raise [^,(]+, (\([^\)]+\)|[^,\(\)]+)$',
396 396 "don't use old-style two-argument raise, use Exception(message)",
397 397 ),
398 398 (r' is\s+(not\s+)?["\'0-9-]', "object comparison with literal"),
399 399 (
400 400 r' [=!]=\s+(True|False|None)',
401 401 "comparison with singleton, use 'is' or 'is not' instead",
402 402 ),
403 403 (
404 404 r'^\s*(while|if) [01]:',
405 405 "use True/False for constant Boolean expression",
406 406 ),
407 407 (r'^\s*if False(:| +and)', 'Remove code instead of using `if False`'),
408 408 (
409 409 r'(?:(?<!def)\s+|\()hasattr\(',
410 410 'hasattr(foo, bar) is broken on py2, use util.safehasattr(foo, bar) '
411 411 'instead',
412 412 r'#.*hasattr-py3-only',
413 413 ),
414 414 (r'opener\([^)]*\).read\(', "use opener.read() instead"),
415 415 (r'opener\([^)]*\).write\(', "use opener.write() instead"),
416 416 (r'(?i)descend[e]nt', "the proper spelling is descendAnt"),
417 417 (r'\.debug\(\_', "don't mark debug messages for translation"),
418 418 (r'\.strip\(\)\.split\(\)', "no need to strip before splitting"),
419 419 (r'^\s*except\s*:', "naked except clause", r'#.*re-raises'),
420 420 (
421 421 r'^\s*except\s([^\(,]+|\([^\)]+\))\s*,',
422 422 'legacy exception syntax; use "as" instead of ","',
423 423 ),
424 424 (r'release\(.*wlock, .*lock\)', "wrong lock release order"),
425 425 (r'\bdef\s+__bool__\b', "__bool__ should be __nonzero__ in Python 2"),
426 426 (
427 427 r'os\.path\.join\(.*, *(""|\'\')\)',
428 428 "use pathutil.normasprefix(path) instead of os.path.join(path, '')",
429 429 ),
430 430 (r'\s0[0-7]+\b', 'legacy octal syntax; use "0o" prefix instead of "0"'),
431 431 # XXX only catch mutable arguments on the first line of the definition
432 432 (r'def.*[( ]\w+=\{\}', "don't use mutable default arguments"),
433 433 (r'\butil\.Abort\b', "directly use error.Abort"),
434 434 (
435 435 r'^@(\w*\.)?cachefunc',
436 436 "module-level @cachefunc is risky, please avoid",
437 437 ),
438 438 (
439 439 r'^import Queue',
440 440 "don't use Queue, use pycompat.queue.Queue + "
441 441 "pycompat.queue.Empty",
442 442 ),
443 443 (
444 444 r'^import cStringIO',
445 445 "don't use cStringIO.StringIO, use util.stringio",
446 446 ),
447 447 (r'^import urllib', "don't use urllib, use util.urlreq/util.urlerr"),
448 448 (
449 449 r'^import SocketServer',
450 450 "don't use SockerServer, use util.socketserver",
451 451 ),
452 452 (r'^import urlparse', "don't use urlparse, use util.urlreq"),
453 453 (r'^import xmlrpclib', "don't use xmlrpclib, use util.xmlrpclib"),
454 454 (r'^import cPickle', "don't use cPickle, use util.pickle"),
455 455 (r'^import pickle', "don't use pickle, use util.pickle"),
456 456 (r'^import httplib', "don't use httplib, use util.httplib"),
457 457 (r'^import BaseHTTPServer', "use util.httpserver instead"),
458 458 (
459 459 r'^(from|import) mercurial\.(cext|pure|cffi)',
460 460 "use mercurial.policy.importmod instead",
461 461 ),
462 462 (r'\.next\(\)', "don't use .next(), use next(...)"),
463 463 (
464 464 r'([a-z]*).revision\(\1\.node\(',
465 465 "don't convert rev to node before passing to revision(nodeorrev)",
466 466 ),
467 467 (r'platform\.system\(\)', "don't use platform.system(), use pycompat"),
468 468 ],
469 469 # warnings
470 470 [],
471 471 ]
472 472
473 473 # patterns to check normal *.py files
474 474 pypats = [
475 475 [
476 476 # Ideally, these should be placed in "commonpypats" for
477 477 # consistency of coding rules in Mercurial source tree.
478 478 # But on the other hand, these are not so seriously required for
479 479 # python code fragments embedded in test scripts. Fixing test
480 480 # scripts for these patterns requires many changes, and has less
481 481 # profit than effort.
482 482 (r'raise Exception', "don't raise generic exceptions"),
483 483 (r'[\s\(](open|file)\([^)]*\)\.read\(', "use util.readfile() instead"),
484 484 (
485 485 r'[\s\(](open|file)\([^)]*\)\.write\(',
486 486 "use util.writefile() instead",
487 487 ),
488 488 (
489 489 r'^[\s\(]*(open(er)?|file)\([^)]*\)(?!\.close\(\))',
490 490 "always assign an opened file to a variable, and close it afterwards",
491 491 ),
492 492 (
493 493 r'[\s\(](open|file)\([^)]*\)\.(?!close\(\))',
494 494 "always assign an opened file to a variable, and close it afterwards",
495 495 ),
496 496 (r':\n( )*( ){1,3}[^ ]', "must indent 4 spaces"),
497 497 (r'^import atexit', "don't use atexit, use ui.atexit"),
498 498 # rules depending on implementation of repquote()
499 499 (
500 500 r' x+[xpqo%APM][\'"]\n\s+[\'"]x',
501 501 'string join across lines with no space',
502 502 ),
503 503 (
504 504 r'''(?x)ui\.(status|progress|write|note|warn)\(
505 505 [ \t\n#]*
506 506 (?# any strings/comments might precede a string, which
507 507 # contains translatable message)
508 508 b?((['"]|\'\'\'|""")[ \npq%bAPMxno]*(['"]|\'\'\'|""")[ \t\n#]+)*
509 509 (?# sequence consisting of below might precede translatable message
510 510 # - formatting string: "% 10s", "%05d", "% -3.2f", "%*s", "%%" ...
511 511 # - escaped character: "\\", "\n", "\0" ...
512 512 # - character other than '%', 'b' as '\', and 'x' as alphabet)
513 513 (['"]|\'\'\'|""")
514 514 ((%([ n]?[PM]?([np]+|A))?x)|%%|b[bnx]|[ \nnpqAPMo])*x
515 515 (?# this regexp can't use [^...] style,
516 516 # because _preparepats forcibly adds "\n" into [^...],
517 517 # even though this regexp wants match it against "\n")''',
518 518 "missing _() in ui message (use () to hide false-positives)",
519 519 ),
520 520 ]
521 521 + commonpypats[0],
522 522 # warnings
523 523 [
524 524 # rules depending on implementation of repquote()
525 525 (r'(^| )pp +xxxxqq[ \n][^\n]', "add two newlines after '.. note::'"),
526 526 ]
527 527 + commonpypats[1],
528 528 ]
529 529
530 530 # patterns to check *.py for embedded ones in test script
531 531 embeddedpypats = [
532 532 [] + commonpypats[0],
533 533 # warnings
534 534 [] + commonpypats[1],
535 535 ]
536 536
537 537 # common filters to convert *.py
538 538 commonpyfilters = [
539 539 (
540 540 r"""(?msx)(?P<comment>\#.*?$)|
541 541 ((?P<quote>('''|\"\"\"|(?<!')'(?!')|(?<!")"(?!")))
542 542 (?P<text>(([^\\]|\\.)*?))
543 543 (?P=quote))""",
544 544 reppython,
545 545 ),
546 546 ]
547 547
548 # pattern only for mercurial and extensions
549 core_py_pats = [
550 [
551 # Windows tend to get confused about capitalization of the drive letter
552 #
553 # see mercurial.windows.abspath for details
554 (
555 r'os\.path\.abspath',
556 "use util.abspath instead (windows)",
557 r'#.*re-exports',
558 ),
559 ],
560 # warnings
561 [],
562 ]
563
548 564 # filters to convert normal *.py files
549 565 pyfilters = [] + commonpyfilters
550 566
551 567 # non-filter patterns
552 568 pynfpats = [
553 569 [
554 570 (r'pycompat\.osname\s*[=!]=\s*[\'"]nt[\'"]', "use pycompat.iswindows"),
555 571 (r'pycompat\.osname\s*[=!]=\s*[\'"]posix[\'"]', "use pycompat.isposix"),
556 572 (
557 573 r'pycompat\.sysplatform\s*[!=]=\s*[\'"]darwin[\'"]',
558 574 "use pycompat.isdarwin",
559 575 ),
560 576 ],
561 577 # warnings
562 578 [],
563 579 ]
564 580
565 581 # filters to convert *.py for embedded ones in test script
566 582 embeddedpyfilters = [] + commonpyfilters
567 583
568 584 # extension non-filter patterns
569 585 pyextnfpats = [
570 586 [(r'^"""\n?[A-Z]', "don't capitalize docstring title")],
571 587 # warnings
572 588 [],
573 589 ]
574 590
575 591 txtfilters = []
576 592
577 593 txtpats = [
578 594 [
579 595 (r'\s$', 'trailing whitespace'),
580 596 ('.. note::[ \n][^\n]', 'add two newlines after note::'),
581 597 ],
582 598 [],
583 599 ]
584 600
585 601 cpats = [
586 602 [
587 603 (r'//', "don't use //-style comments"),
588 604 (r'\S\t', "don't use tabs except for indent"),
589 605 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
590 606 (r'(while|if|do|for)\(', "use space after while/if/do/for"),
591 607 (r'return\(', "return is not a function"),
592 608 (r' ;', "no space before ;"),
593 609 (r'[^;] \)', "no space before )"),
594 610 (r'[)][{]', "space between ) and {"),
595 611 (r'\w+\* \w+', "use int *foo, not int* foo"),
596 612 (r'\W\([^\)]+\) \w+', "use (int)foo, not (int) foo"),
597 613 (r'\w+ (\+\+|--)', "use foo++, not foo ++"),
598 614 (r'\w,\w', "missing whitespace after ,"),
599 615 (r'^[^#]\w[+/*]\w', "missing whitespace in expression"),
600 616 (r'\w\s=\s\s+\w', "gratuitous whitespace after ="),
601 617 (r'^#\s+\w', "use #foo, not # foo"),
602 618 (r'[^\n]\Z', "no trailing newline"),
603 619 (r'^\s*#import\b', "use only #include in standard C code"),
604 620 (r'strcpy\(', "don't use strcpy, use strlcpy or memcpy"),
605 621 (r'strcat\(', "don't use strcat"),
606 622 # rules depending on implementation of repquote()
607 623 ],
608 624 # warnings
609 625 [
610 626 # rules depending on implementation of repquote()
611 627 ],
612 628 ]
613 629
614 630 cfilters = [
615 631 (r'(/\*)(((\*(?!/))|[^*])*)\*/', repccomment),
616 632 (r'''(?P<quote>(?<!")")(?P<text>([^"]|\\")+)"(?!")''', repquote),
617 633 (r'''(#\s*include\s+<)([^>]+)>''', repinclude),
618 634 (r'(\()([^)]+\))', repcallspaces),
619 635 ]
620 636
621 637 inutilpats = [
622 638 [
623 639 (r'\bui\.', "don't use ui in util"),
624 640 ],
625 641 # warnings
626 642 [],
627 643 ]
628 644
629 645 inrevlogpats = [
630 646 [
631 647 (r'\brepo\.', "don't use repo in revlog"),
632 648 ],
633 649 # warnings
634 650 [],
635 651 ]
636 652
637 653 webtemplatefilters = []
638 654
639 655 webtemplatepats = [
640 656 [],
641 657 [
642 658 (
643 659 r'{desc(\|(?!websub|firstline)[^\|]*)+}',
644 660 'follow desc keyword with either firstline or websub',
645 661 ),
646 662 ],
647 663 ]
648 664
649 665 allfilesfilters = []
650 666
651 667 allfilespats = [
652 668 [
653 669 (
654 670 r'(http|https)://[a-zA-Z0-9./]*selenic.com/',
655 671 'use mercurial-scm.org domain URL',
656 672 ),
657 673 (
658 674 r'mercurial@selenic\.com',
659 675 'use mercurial-scm.org domain for mercurial ML address',
660 676 ),
661 677 (
662 678 r'mercurial-devel@selenic\.com',
663 679 'use mercurial-scm.org domain for mercurial-devel ML address',
664 680 ),
665 681 ],
666 682 # warnings
667 683 [],
668 684 ]
669 685
670 686 py3pats = [
671 687 [
672 688 (
673 689 r'os\.environ',
674 690 "use encoding.environ instead (py3)",
675 691 r'#.*re-exports',
676 692 ),
677 693 (r'os\.name', "use pycompat.osname instead (py3)"),
678 694 (r'os\.getcwd', "use encoding.getcwd instead (py3)", r'#.*re-exports'),
679 695 (r'os\.sep', "use pycompat.ossep instead (py3)"),
680 696 (r'os\.pathsep', "use pycompat.ospathsep instead (py3)"),
681 697 (r'os\.altsep', "use pycompat.osaltsep instead (py3)"),
682 698 (r'sys\.platform', "use pycompat.sysplatform instead (py3)"),
683 699 (r'getopt\.getopt', "use pycompat.getoptb instead (py3)"),
684 700 (r'os\.getenv', "use encoding.environ.get instead"),
685 701 (r'os\.setenv', "modifying the environ dict is not preferred"),
686 702 (r'(?<!pycompat\.)xrange', "use pycompat.xrange instead (py3)"),
687 703 ],
688 704 # warnings
689 705 [],
690 706 ]
691 707
692 708 checks = [
693 709 ('python', r'.*\.(py|cgi)$', r'^#!.*python', pyfilters, pypats),
694 710 ('python', r'.*\.(py|cgi)$', r'^#!.*python', [], pynfpats),
695 711 ('python', r'.*hgext.*\.py$', '', [], pyextnfpats),
696 712 (
697 713 'python 3',
698 714 r'.*(hgext|mercurial)/(?!demandimport|policy|pycompat).*\.py',
699 715 '',
700 716 pyfilters,
701 717 py3pats,
702 718 ),
719 (
720 'core files',
721 r'.*(hgext|mercurial)/(?!demandimport|policy|pycompat).*\.py',
722 '',
723 pyfilters,
724 core_py_pats,
725 ),
703 726 ('test script', r'(.*/)?test-[^.~]*$', '', testfilters, testpats),
704 727 ('c', r'.*\.[ch]$', '', cfilters, cpats),
705 728 ('unified test', r'.*\.t$', '', utestfilters, utestpats),
706 729 (
707 730 'layering violation repo in revlog',
708 731 r'mercurial/revlog\.py',
709 732 '',
710 733 pyfilters,
711 734 inrevlogpats,
712 735 ),
713 736 (
714 737 'layering violation ui in util',
715 738 r'mercurial/util\.py',
716 739 '',
717 740 pyfilters,
718 741 inutilpats,
719 742 ),
720 743 ('txt', r'.*\.txt$', '', txtfilters, txtpats),
721 744 (
722 745 'web template',
723 746 r'mercurial/templates/.*\.tmpl',
724 747 '',
725 748 webtemplatefilters,
726 749 webtemplatepats,
727 750 ),
728 751 ('all except for .po', r'.*(?<!\.po)$', '', allfilesfilters, allfilespats),
729 752 ]
730 753
731 754 # (desc,
732 755 # func to pick up embedded code fragments,
733 756 # list of patterns to convert target files
734 757 # list of patterns to detect errors/warnings)
735 758 embeddedchecks = [
736 759 (
737 760 'embedded python',
738 761 testparseutil.pyembedded,
739 762 embeddedpyfilters,
740 763 embeddedpypats,
741 764 )
742 765 ]
743 766
744 767
745 768 def _preparepats():
746 769 def preparefailandwarn(failandwarn):
747 770 for pats in failandwarn:
748 771 for i, pseq in enumerate(pats):
749 772 # fix-up regexes for multi-line searches
750 773 p = pseq[0]
751 774 # \s doesn't match \n (done in two steps)
752 775 # first, we replace \s that appears in a set already
753 776 p = re.sub(r'\[\\s', r'[ \\t', p)
754 777 # now we replace other \s instances.
755 778 p = re.sub(r'(?<!(\\|\[))\\s', r'[ \\t]', p)
756 779 # [^...] doesn't match newline
757 780 p = re.sub(r'(?<!\\)\[\^', r'[^\\n', p)
758 781
759 782 pats[i] = (re.compile(p, re.MULTILINE),) + pseq[1:]
760 783
761 784 def preparefilters(filters):
762 785 for i, flt in enumerate(filters):
763 786 filters[i] = re.compile(flt[0]), flt[1]
764 787
765 788 for cs in (checks, embeddedchecks):
766 789 for c in cs:
767 790 failandwarn = c[-1]
768 791 preparefailandwarn(failandwarn)
769 792
770 793 filters = c[-2]
771 794 preparefilters(filters)
772 795
773 796
774 797 class norepeatlogger(object):
775 798 def __init__(self):
776 799 self._lastseen = None
777 800
778 801 def log(self, fname, lineno, line, msg, blame):
779 802 """print error related a to given line of a given file.
780 803
781 804 The faulty line will also be printed but only once in the case
782 805 of multiple errors.
783 806
784 807 :fname: filename
785 808 :lineno: line number
786 809 :line: actual content of the line
787 810 :msg: error message
788 811 """
789 812 msgid = fname, lineno, line
790 813 if msgid != self._lastseen:
791 814 if blame:
792 815 print("%s:%d (%s):" % (fname, lineno, blame))
793 816 else:
794 817 print("%s:%d:" % (fname, lineno))
795 818 print(" > %s" % line)
796 819 self._lastseen = msgid
797 820 print(" " + msg)
798 821
799 822
800 823 _defaultlogger = norepeatlogger()
801 824
802 825
803 826 def getblame(f):
804 827 lines = []
805 828 for l in os.popen('hg annotate -un %s' % f):
806 829 start, line = l.split(':', 1)
807 830 user, rev = start.split()
808 831 lines.append((line[1:-1], user, rev))
809 832 return lines
810 833
811 834
812 835 def checkfile(
813 836 f,
814 837 logfunc=_defaultlogger.log,
815 838 maxerr=None,
816 839 warnings=False,
817 840 blame=False,
818 841 debug=False,
819 842 lineno=True,
820 843 ):
821 844 """checks style and portability of a given file
822 845
823 846 :f: filepath
824 847 :logfunc: function used to report error
825 848 logfunc(filename, linenumber, linecontent, errormessage)
826 849 :maxerr: number of error to display before aborting.
827 850 Set to false (default) to report all errors
828 851
829 852 return True if no error is found, False otherwise.
830 853 """
831 854 result = True
832 855
833 856 try:
834 857 with opentext(f) as fp:
835 858 try:
836 859 pre = fp.read()
837 860 except UnicodeDecodeError as e:
838 861 print("%s while reading %s" % (e, f))
839 862 return result
840 863 except IOError as e:
841 864 print("Skipping %s, %s" % (f, str(e).split(':', 1)[0]))
842 865 return result
843 866
844 867 # context information shared while single checkfile() invocation
845 868 context = {'blamecache': None}
846 869
847 870 for name, match, magic, filters, pats in checks:
848 871 if debug:
849 872 print(name, f)
850 873 if not (re.match(match, f) or (magic and re.search(magic, pre))):
851 874 if debug:
852 875 print(
853 876 "Skipping %s for %s it doesn't match %s" % (name, match, f)
854 877 )
855 878 continue
856 879 if "no-" "check-code" in pre:
857 880 # If you're looking at this line, it's because a file has:
858 881 # no- check- code
859 882 # but the reason to output skipping is to make life for
860 883 # tests easier. So, instead of writing it with a normal
861 884 # spelling, we write it with the expected spelling from
862 885 # tests/test-check-code.t
863 886 print("Skipping %s it has no-che?k-code (glob)" % f)
864 887 return "Skip" # skip checking this file
865 888
866 889 fc = _checkfiledata(
867 890 name,
868 891 f,
869 892 pre,
870 893 filters,
871 894 pats,
872 895 context,
873 896 logfunc,
874 897 maxerr,
875 898 warnings,
876 899 blame,
877 900 debug,
878 901 lineno,
879 902 )
880 903 if fc:
881 904 result = False
882 905
883 906 if f.endswith('.t') and "no-" "check-code" not in pre:
884 907 if debug:
885 908 print("Checking embedded code in %s" % f)
886 909
887 910 prelines = pre.splitlines()
888 911 embeddederros = []
889 912 for name, embedded, filters, pats in embeddedchecks:
890 913 # "reset curmax at each repetition" treats maxerr as "max
891 914 # nubmer of errors in an actual file per entry of
892 915 # (embedded)checks"
893 916 curmaxerr = maxerr
894 917
895 918 for found in embedded(f, prelines, embeddederros):
896 919 filename, starts, ends, code = found
897 920 fc = _checkfiledata(
898 921 name,
899 922 f,
900 923 code,
901 924 filters,
902 925 pats,
903 926 context,
904 927 logfunc,
905 928 curmaxerr,
906 929 warnings,
907 930 blame,
908 931 debug,
909 932 lineno,
910 933 offset=starts - 1,
911 934 )
912 935 if fc:
913 936 result = False
914 937 if curmaxerr:
915 938 if fc >= curmaxerr:
916 939 break
917 940 curmaxerr -= fc
918 941
919 942 return result
920 943
921 944
922 945 def _checkfiledata(
923 946 name,
924 947 f,
925 948 filedata,
926 949 filters,
927 950 pats,
928 951 context,
929 952 logfunc,
930 953 maxerr,
931 954 warnings,
932 955 blame,
933 956 debug,
934 957 lineno,
935 958 offset=None,
936 959 ):
937 960 """Execute actual error check for file data
938 961
939 962 :name: of the checking category
940 963 :f: filepath
941 964 :filedata: content of a file
942 965 :filters: to be applied before checking
943 966 :pats: to detect errors
944 967 :context: a dict of information shared while single checkfile() invocation
945 968 Valid keys: 'blamecache'.
946 969 :logfunc: function used to report error
947 970 logfunc(filename, linenumber, linecontent, errormessage)
948 971 :maxerr: number of error to display before aborting, or False to
949 972 report all errors
950 973 :warnings: whether warning level checks should be applied
951 974 :blame: whether blame information should be displayed at error reporting
952 975 :debug: whether debug information should be displayed
953 976 :lineno: whether lineno should be displayed at error reporting
954 977 :offset: line number offset of 'filedata' in 'f' for checking
955 978 an embedded code fragment, or None (offset=0 is different
956 979 from offset=None)
957 980
958 981 returns number of detected errors.
959 982 """
960 983 blamecache = context['blamecache']
961 984 if offset is None:
962 985 lineoffset = 0
963 986 else:
964 987 lineoffset = offset
965 988
966 989 fc = 0
967 990 pre = post = filedata
968 991
969 992 if True: # TODO: get rid of this redundant 'if' block
970 993 for p, r in filters:
971 994 post = re.sub(p, r, post)
972 995 nerrs = len(pats[0]) # nerr elements are errors
973 996 if warnings:
974 997 pats = pats[0] + pats[1]
975 998 else:
976 999 pats = pats[0]
977 1000 # print post # uncomment to show filtered version
978 1001
979 1002 if debug:
980 1003 print("Checking %s for %s" % (name, f))
981 1004
982 1005 prelines = None
983 1006 errors = []
984 1007 for i, pat in enumerate(pats):
985 1008 if len(pat) == 3:
986 1009 p, msg, ignore = pat
987 1010 else:
988 1011 p, msg = pat
989 1012 ignore = None
990 1013 if i >= nerrs:
991 1014 msg = "warning: " + msg
992 1015
993 1016 pos = 0
994 1017 n = 0
995 1018 for m in p.finditer(post):
996 1019 if prelines is None:
997 1020 prelines = pre.splitlines()
998 1021 postlines = post.splitlines(True)
999 1022
1000 1023 start = m.start()
1001 1024 while n < len(postlines):
1002 1025 step = len(postlines[n])
1003 1026 if pos + step > start:
1004 1027 break
1005 1028 pos += step
1006 1029 n += 1
1007 1030 l = prelines[n]
1008 1031
1009 1032 if ignore and re.search(ignore, l, re.MULTILINE):
1010 1033 if debug:
1011 1034 print(
1012 1035 "Skipping %s for %s:%s (ignore pattern)"
1013 1036 % (name, f, (n + lineoffset))
1014 1037 )
1015 1038 continue
1016 1039 bd = ""
1017 1040 if blame:
1018 1041 bd = 'working directory'
1019 1042 if blamecache is None:
1020 1043 blamecache = getblame(f)
1021 1044 context['blamecache'] = blamecache
1022 1045 if (n + lineoffset) < len(blamecache):
1023 1046 bl, bu, br = blamecache[(n + lineoffset)]
1024 1047 if offset is None and bl == l:
1025 1048 bd = '%s@%s' % (bu, br)
1026 1049 elif offset is not None and bl.endswith(l):
1027 1050 # "offset is not None" means "checking
1028 1051 # embedded code fragment". In this case,
1029 1052 # "l" does not have information about the
1030 1053 # beginning of an *original* line in the
1031 1054 # file (e.g. ' > ').
1032 1055 # Therefore, use "str.endswith()", and
1033 1056 # show "maybe" for a little loose
1034 1057 # examination.
1035 1058 bd = '%s@%s, maybe' % (bu, br)
1036 1059
1037 1060 errors.append((f, lineno and (n + lineoffset + 1), l, msg, bd))
1038 1061
1039 1062 errors.sort()
1040 1063 for e in errors:
1041 1064 logfunc(*e)
1042 1065 fc += 1
1043 1066 if maxerr and fc >= maxerr:
1044 1067 print(" (too many errors, giving up)")
1045 1068 break
1046 1069
1047 1070 return fc
1048 1071
1049 1072
1050 1073 def main():
1051 1074 parser = optparse.OptionParser("%prog [options] [files | -]")
1052 1075 parser.add_option(
1053 1076 "-w",
1054 1077 "--warnings",
1055 1078 action="store_true",
1056 1079 help="include warning-level checks",
1057 1080 )
1058 1081 parser.add_option(
1059 1082 "-p", "--per-file", type="int", help="max warnings per file"
1060 1083 )
1061 1084 parser.add_option(
1062 1085 "-b",
1063 1086 "--blame",
1064 1087 action="store_true",
1065 1088 help="use annotate to generate blame info",
1066 1089 )
1067 1090 parser.add_option(
1068 1091 "", "--debug", action="store_true", help="show debug information"
1069 1092 )
1070 1093 parser.add_option(
1071 1094 "",
1072 1095 "--nolineno",
1073 1096 action="store_false",
1074 1097 dest='lineno',
1075 1098 help="don't show line numbers",
1076 1099 )
1077 1100
1078 1101 parser.set_defaults(
1079 1102 per_file=15, warnings=False, blame=False, debug=False, lineno=True
1080 1103 )
1081 1104 (options, args) = parser.parse_args()
1082 1105
1083 1106 if len(args) == 0:
1084 1107 check = glob.glob("*")
1085 1108 elif args == ['-']:
1086 1109 # read file list from stdin
1087 1110 check = sys.stdin.read().splitlines()
1088 1111 else:
1089 1112 check = args
1090 1113
1091 1114 _preparepats()
1092 1115
1093 1116 ret = 0
1094 1117 for f in check:
1095 1118 if not checkfile(
1096 1119 f,
1097 1120 maxerr=options.per_file,
1098 1121 warnings=options.warnings,
1099 1122 blame=options.blame,
1100 1123 debug=options.debug,
1101 1124 lineno=options.lineno,
1102 1125 ):
1103 1126 ret = 1
1104 1127 return ret
1105 1128
1106 1129
1107 1130 if __name__ == "__main__":
1108 1131 sys.exit(main())
General Comments 0
You need to be logged in to leave comments. Login now