##// END OF EJS Templates
contrib: factor out actual error check for file data of check-code.py...
FUJIWARA Katsunori -
r41989:7a139fc6 default
parent child Browse files
Show More
@@ -1,774 +1,811 b''
1 1 #!/usr/bin/env python
2 2 #
3 3 # check-code - a style and portability checker for Mercurial
4 4 #
5 5 # Copyright 2010 Matt Mackall <mpm@selenic.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """style and portability checker for Mercurial
11 11
12 12 when a rule triggers wrong, do one of the following (prefer one from top):
13 13 * do the work-around the rule suggests
14 14 * doublecheck that it is a false match
15 15 * improve the rule pattern
16 16 * add an ignore pattern to the rule (3rd arg) which matches your good line
17 17 (you can append a short comment and match this, like: #re-raises)
18 18 * change the pattern to a warning and list the exception in test-check-code-hg
19 19 * ONLY use no--check-code for skipping entire files from external sources
20 20 """
21 21
22 22 from __future__ import absolute_import, print_function
23 23 import glob
24 24 import keyword
25 25 import optparse
26 26 import os
27 27 import re
28 28 import sys
29 29 if sys.version_info[0] < 3:
30 30 opentext = open
31 31 else:
32 32 def opentext(f):
33 33 return open(f, encoding='latin1')
34 34 try:
35 35 xrange
36 36 except NameError:
37 37 xrange = range
38 38 try:
39 39 import re2
40 40 except ImportError:
41 41 re2 = None
42 42
43 43 def compilere(pat, multiline=False):
44 44 if multiline:
45 45 pat = '(?m)' + pat
46 46 if re2:
47 47 try:
48 48 return re2.compile(pat)
49 49 except re2.error:
50 50 pass
51 51 return re.compile(pat)
52 52
53 53 # check "rules depending on implementation of repquote()" in each
54 54 # patterns (especially pypats), before changing around repquote()
55 55 _repquotefixedmap = {' ': ' ', '\n': '\n', '.': 'p', ':': 'q',
56 56 '%': '%', '\\': 'b', '*': 'A', '+': 'P', '-': 'M'}
57 57 def _repquoteencodechr(i):
58 58 if i > 255:
59 59 return 'u'
60 60 c = chr(i)
61 61 if c in _repquotefixedmap:
62 62 return _repquotefixedmap[c]
63 63 if c.isalpha():
64 64 return 'x'
65 65 if c.isdigit():
66 66 return 'n'
67 67 return 'o'
68 68 _repquotett = ''.join(_repquoteencodechr(i) for i in xrange(256))
69 69
70 70 def repquote(m):
71 71 t = m.group('text')
72 72 t = t.translate(_repquotett)
73 73 return m.group('quote') + t + m.group('quote')
74 74
75 75 def reppython(m):
76 76 comment = m.group('comment')
77 77 if comment:
78 78 l = len(comment.rstrip())
79 79 return "#" * l + comment[l:]
80 80 return repquote(m)
81 81
82 82 def repcomment(m):
83 83 return m.group(1) + "#" * len(m.group(2))
84 84
85 85 def repccomment(m):
86 86 t = re.sub(r"((?<=\n) )|\S", "x", m.group(2))
87 87 return m.group(1) + t + "*/"
88 88
89 89 def repcallspaces(m):
90 90 t = re.sub(r"\n\s+", "\n", m.group(2))
91 91 return m.group(1) + t
92 92
93 93 def repinclude(m):
94 94 return m.group(1) + "<foo>"
95 95
96 96 def rephere(m):
97 97 t = re.sub(r"\S", "x", m.group(2))
98 98 return m.group(1) + t
99 99
100 100
101 101 testpats = [
102 102 [
103 103 (r'\b(push|pop)d\b', "don't use 'pushd' or 'popd', use 'cd'"),
104 104 (r'\W\$?\(\([^\)\n]*\)\)', "don't use (()) or $(()), use 'expr'"),
105 105 (r'grep.*-q', "don't use 'grep -q', redirect to /dev/null"),
106 106 (r'(?<!hg )grep.* -a', "don't use 'grep -a', use in-line python"),
107 107 (r'sed.*-i', "don't use 'sed -i', use a temporary file"),
108 108 (r'\becho\b.*\\n', "don't use 'echo \\n', use printf"),
109 109 (r'echo -n', "don't use 'echo -n', use printf"),
110 110 (r'(^|\|\s*)\bwc\b[^|]*$\n(?!.*\(re\))', "filter wc output"),
111 111 (r'head -c', "don't use 'head -c', use 'dd'"),
112 112 (r'tail -n', "don't use the '-n' option to tail, just use '-<num>'"),
113 113 (r'sha1sum', "don't use sha1sum, use $TESTDIR/md5sum.py"),
114 114 (r'\bls\b.*-\w*R', "don't use 'ls -R', use 'find'"),
115 115 (r'printf.*[^\\]\\([1-9]|0\d)', r"don't use 'printf \NNN', use Python"),
116 116 (r'printf.*[^\\]\\x', "don't use printf \\x, use Python"),
117 117 (r'\$\(.*\)', "don't use $(expr), use `expr`"),
118 118 (r'rm -rf \*', "don't use naked rm -rf, target a directory"),
119 119 (r'\[[^\]]+==', '[ foo == bar ] is a bashism, use [ foo = bar ] instead'),
120 120 (r'(^|\|\s*)grep (-\w\s+)*[^|]*[(|]\w',
121 121 "use egrep for extended grep syntax"),
122 122 (r'(^|\|\s*)e?grep .*\\S', "don't use \\S in regular expression"),
123 123 (r'(?<!!)/bin/', "don't use explicit paths for tools"),
124 124 (r'#!.*/bash', "don't use bash in shebang, use sh"),
125 125 (r'[^\n]\Z', "no trailing newline"),
126 126 (r'export .*=', "don't export and assign at once"),
127 127 (r'^source\b', "don't use 'source', use '.'"),
128 128 (r'touch -d', "don't use 'touch -d', use 'touch -t' instead"),
129 129 (r'\bls +[^|\n-]+ +-', "options to 'ls' must come before filenames"),
130 130 (r'[^>\n]>\s*\$HGRCPATH', "don't overwrite $HGRCPATH, append to it"),
131 131 (r'^stop\(\)', "don't use 'stop' as a shell function name"),
132 132 (r'(\[|\btest\b).*-e ', "don't use 'test -e', use 'test -f'"),
133 133 (r'\[\[\s+[^\]]*\]\]', "don't use '[[ ]]', use '[ ]'"),
134 134 (r'^alias\b.*=', "don't use alias, use a function"),
135 135 (r'if\s*!', "don't use '!' to negate exit status"),
136 136 (r'/dev/u?random', "don't use entropy, use /dev/zero"),
137 137 (r'do\s*true;\s*done', "don't use true as loop body, use sleep 0"),
138 138 (r'sed (-e )?\'(\d+|/[^/]*/)i(?!\\\n)',
139 139 "put a backslash-escaped newline after sed 'i' command"),
140 140 (r'^diff *-\w*[uU].*$\n(^ \$ |^$)', "prefix diff -u/-U with cmp"),
141 141 (r'^\s+(if)? diff *-\w*[uU]', "prefix diff -u/-U with cmp"),
142 142 (r'[\s="`\']python\s(?!bindings)', "don't use 'python', use '$PYTHON'"),
143 143 (r'seq ', "don't use 'seq', use $TESTDIR/seq.py"),
144 144 (r'\butil\.Abort\b', "directly use error.Abort"),
145 145 (r'\|&', "don't use |&, use 2>&1"),
146 146 (r'\w = +\w', "only one space after = allowed"),
147 147 (r'\bsed\b.*[^\\]\\n', "don't use 'sed ... \\n', use a \\ and a newline"),
148 148 (r'env.*-u', "don't use 'env -u VAR', use 'unset VAR'"),
149 149 (r'cp.* -r ', "don't use 'cp -r', use 'cp -R'"),
150 150 (r'grep.* -[ABC]', "don't use grep's context flags"),
151 151 (r'find.*-printf',
152 152 "don't use 'find -printf', it doesn't exist on BSD find(1)"),
153 153 (r'\$RANDOM ', "don't use bash-only $RANDOM to generate random values"),
154 154 ],
155 155 # warnings
156 156 [
157 157 (r'^function', "don't use 'function', use old style"),
158 158 (r'^diff.*-\w*N', "don't use 'diff -N'"),
159 159 (r'\$PWD|\${PWD}', "don't use $PWD, use `pwd`"),
160 160 (r'^([^"\'\n]|("[^"\n]*")|(\'[^\'\n]*\'))*\^', "^ must be quoted"),
161 161 (r'kill (`|\$\()', "don't use kill, use killdaemons.py")
162 162 ]
163 163 ]
164 164
165 165 testfilters = [
166 166 (r"( *)(#([^!][^\n]*\S)?)", repcomment),
167 167 (r"<<(\S+)((.|\n)*?\n\1)", rephere),
168 168 ]
169 169
170 170 uprefix = r"^ \$ "
171 171 utestpats = [
172 172 [
173 173 (r'^(\S.*|| [$>] \S.*)[ \t]\n', "trailing whitespace on non-output"),
174 174 (uprefix + r'.*\|\s*sed[^|>\n]*\n',
175 175 "use regex test output patterns instead of sed"),
176 176 (uprefix + r'(true|exit 0)', "explicit zero exit unnecessary"),
177 177 (uprefix + r'.*(?<!\[)\$\?', "explicit exit code checks unnecessary"),
178 178 (uprefix + r'.*\|\| echo.*(fail|error)',
179 179 "explicit exit code checks unnecessary"),
180 180 (uprefix + r'set -e', "don't use set -e"),
181 181 (uprefix + r'(\s|fi\b|done\b)', "use > for continued lines"),
182 182 (uprefix + r'.*:\.\S*/', "x:.y in a path does not work on msys, rewrite "
183 183 "as x://.y, or see `hg log -k msys` for alternatives", r'-\S+:\.|' #-Rxxx
184 184 '# no-msys'), # in test-pull.t which is skipped on windows
185 185 (r'^ [^$>].*27\.0\.0\.1',
186 186 'use $LOCALIP not an explicit loopback address'),
187 187 (r'^ (?![>$] ).*\$LOCALIP.*[^)]$',
188 188 'mark $LOCALIP output lines with (glob) to help tests in BSD jails'),
189 189 (r'^ (cat|find): .*: \$ENOENT\$',
190 190 'use test -f to test for file existence'),
191 191 (r'^ diff -[^ -]*p',
192 192 "don't use (external) diff with -p for portability"),
193 193 (r' readlink ', 'use readlink.py instead of readlink'),
194 194 (r'^ [-+][-+][-+] .* [-+]0000 \(glob\)',
195 195 "glob timezone field in diff output for portability"),
196 196 (r'^ @@ -[0-9]+ [+][0-9]+,[0-9]+ @@',
197 197 "use '@@ -N* +N,n @@ (glob)' style chunk header for portability"),
198 198 (r'^ @@ -[0-9]+,[0-9]+ [+][0-9]+ @@',
199 199 "use '@@ -N,n +N* @@ (glob)' style chunk header for portability"),
200 200 (r'^ @@ -[0-9]+ [+][0-9]+ @@',
201 201 "use '@@ -N* +N* @@ (glob)' style chunk header for portability"),
202 202 (uprefix + r'hg( +-[^ ]+( +[^ ]+)?)* +extdiff'
203 203 r'( +(-[^ po-]+|--(?!program|option)[^ ]+|[^-][^ ]*))*$',
204 204 "use $RUNTESTDIR/pdiff via extdiff (or -o/-p for false-positives)"),
205 205 ],
206 206 # warnings
207 207 [
208 208 (r'^ (?!.*\$LOCALIP)[^*?/\n]* \(glob\)$',
209 209 "glob match with no glob string (?, *, /, and $LOCALIP)"),
210 210 ]
211 211 ]
212 212
213 213 # transform plain test rules to unified test's
214 214 for i in [0, 1]:
215 215 for tp in testpats[i]:
216 216 p = tp[0]
217 217 m = tp[1]
218 218 if p.startswith(r'^'):
219 219 p = r"^ [$>] (%s)" % p[1:]
220 220 else:
221 221 p = r"^ [$>] .*(%s)" % p
222 222 utestpats[i].append((p, m) + tp[2:])
223 223
224 224 # don't transform the following rules:
225 225 # " > \t" and " \t" should be allowed in unified tests
226 226 testpats[0].append((r'^( *)\t', "don't use tabs to indent"))
227 227 utestpats[0].append((r'^( ?)\t', "don't use tabs to indent"))
228 228
229 229 utestfilters = [
230 230 (r"<<(\S+)((.|\n)*?\n > \1)", rephere),
231 231 (r"( +)(#([^!][^\n]*\S)?)", repcomment),
232 232 ]
233 233
234 234 # common patterns to check *.py
235 235 commonpypats = [
236 236 [
237 237 (r'\\$', 'Use () to wrap long lines in Python, not \\'),
238 238 (r'^\s*def\s*\w+\s*\(.*,\s*\(',
239 239 "tuple parameter unpacking not available in Python 3+"),
240 240 (r'lambda\s*\(.*,.*\)',
241 241 "tuple parameter unpacking not available in Python 3+"),
242 242 (r'(?<!def)\s+(cmp)\(', "cmp is not available in Python 3+"),
243 243 (r'(?<!\.)\breduce\s*\(.*', "reduce is not available in Python 3+"),
244 244 (r'\bdict\(.*=', 'dict() is different in Py2 and 3 and is slower than {}',
245 245 'dict-from-generator'),
246 246 (r'\.has_key\b', "dict.has_key is not available in Python 3+"),
247 247 (r'\s<>\s', '<> operator is not available in Python 3+, use !='),
248 248 (r'^\s*\t', "don't use tabs"),
249 249 (r'\S;\s*\n', "semicolon"),
250 250 (r'[^_]_\([ \t\n]*(?:"[^"]+"[ \t\n+]*)+%', "don't use % inside _()"),
251 251 (r"[^_]_\([ \t\n]*(?:'[^']+'[ \t\n+]*)+%", "don't use % inside _()"),
252 252 (r'(\w|\)),\w', "missing whitespace after ,"),
253 253 (r'(\w|\))[+/*\-<>]\w', "missing whitespace in expression"),
254 254 (r'^\s+(\w|\.)+=\w[^,()\n]*$', "missing whitespace in assignment"),
255 255 (r'\w\s=\s\s+\w', "gratuitous whitespace after ="),
256 256 ((
257 257 # a line ending with a colon, potentially with trailing comments
258 258 r':([ \t]*#[^\n]*)?\n'
259 259 # one that is not a pass and not only a comment
260 260 r'(?P<indent>[ \t]+)[^#][^\n]+\n'
261 261 # more lines at the same indent level
262 262 r'((?P=indent)[^\n]+\n)*'
263 263 # a pass at the same indent level, which is bogus
264 264 r'(?P=indent)pass[ \t\n#]'
265 265 ), 'omit superfluous pass'),
266 266 (r'[^\n]\Z', "no trailing newline"),
267 267 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
268 268 # (r'^\s+[^_ \n][^_. \n]+_[^_\n]+\s*=',
269 269 # "don't use underbars in identifiers"),
270 270 (r'^\s+(self\.)?[A-Za-z][a-z0-9]+[A-Z]\w* = ',
271 271 "don't use camelcase in identifiers", r'#.*camelcase-required'),
272 272 (r'^\s*(if|while|def|class|except|try)\s[^[\n]*:\s*[^\\n]#\s]+',
273 273 "linebreak after :"),
274 274 (r'class\s[^( \n]+:', "old-style class, use class foo(object)",
275 275 r'#.*old-style'),
276 276 (r'class\s[^( \n]+\(\):',
277 277 "class foo() creates old style object, use class foo(object)",
278 278 r'#.*old-style'),
279 279 (r'\b(%s)\(' % '|'.join(k for k in keyword.kwlist
280 280 if k not in ('print', 'exec')),
281 281 "Python keyword is not a function"),
282 282 (r',]', "unneeded trailing ',' in list"),
283 283 # (r'class\s[A-Z][^\(]*\((?!Exception)',
284 284 # "don't capitalize non-exception classes"),
285 285 # (r'in range\(', "use xrange"),
286 286 # (r'^\s*print\s+', "avoid using print in core and extensions"),
287 287 (r'[\x80-\xff]', "non-ASCII character literal"),
288 288 (r'("\')\.format\(', "str.format() has no bytes counterpart, use %"),
289 289 (r'^\s*(%s)\s\s' % '|'.join(keyword.kwlist),
290 290 "gratuitous whitespace after Python keyword"),
291 291 (r'([\(\[][ \t]\S)|(\S[ \t][\)\]])', "gratuitous whitespace in () or []"),
292 292 # (r'\s\s=', "gratuitous whitespace before ="),
293 293 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
294 294 "missing whitespace around operator"),
295 295 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\s',
296 296 "missing whitespace around operator"),
297 297 (r'\s(\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
298 298 "missing whitespace around operator"),
299 299 (r'[^^+=*/!<>&| %-](\s=|=\s)[^= ]',
300 300 "wrong whitespace around ="),
301 301 (r'\([^()]*( =[^=]|[^<>!=]= )',
302 302 "no whitespace around = for named parameters"),
303 303 (r'raise [^,(]+, (\([^\)]+\)|[^,\(\)]+)$',
304 304 "don't use old-style two-argument raise, use Exception(message)"),
305 305 (r' is\s+(not\s+)?["\'0-9-]', "object comparison with literal"),
306 306 (r' [=!]=\s+(True|False|None)',
307 307 "comparison with singleton, use 'is' or 'is not' instead"),
308 308 (r'^\s*(while|if) [01]:',
309 309 "use True/False for constant Boolean expression"),
310 310 (r'^\s*if False(:| +and)', 'Remove code instead of using `if False`'),
311 311 (r'(?:(?<!def)\s+|\()hasattr\(',
312 312 'hasattr(foo, bar) is broken on py2, use util.safehasattr(foo, bar) '
313 313 'instead', r'#.*hasattr-py3-only'),
314 314 (r'opener\([^)]*\).read\(',
315 315 "use opener.read() instead"),
316 316 (r'opener\([^)]*\).write\(',
317 317 "use opener.write() instead"),
318 318 (r'(?i)descend[e]nt', "the proper spelling is descendAnt"),
319 319 (r'\.debug\(\_', "don't mark debug messages for translation"),
320 320 (r'\.strip\(\)\.split\(\)', "no need to strip before splitting"),
321 321 (r'^\s*except\s*:', "naked except clause", r'#.*re-raises'),
322 322 (r'^\s*except\s([^\(,]+|\([^\)]+\))\s*,',
323 323 'legacy exception syntax; use "as" instead of ","'),
324 324 (r'release\(.*wlock, .*lock\)', "wrong lock release order"),
325 325 (r'\bdef\s+__bool__\b', "__bool__ should be __nonzero__ in Python 2"),
326 326 (r'os\.path\.join\(.*, *(""|\'\')\)',
327 327 "use pathutil.normasprefix(path) instead of os.path.join(path, '')"),
328 328 (r'\s0[0-7]+\b', 'legacy octal syntax; use "0o" prefix instead of "0"'),
329 329 # XXX only catch mutable arguments on the first line of the definition
330 330 (r'def.*[( ]\w+=\{\}', "don't use mutable default arguments"),
331 331 (r'\butil\.Abort\b', "directly use error.Abort"),
332 332 (r'^@(\w*\.)?cachefunc', "module-level @cachefunc is risky, please avoid"),
333 333 (r'^import Queue', "don't use Queue, use pycompat.queue.Queue + "
334 334 "pycompat.queue.Empty"),
335 335 (r'^import cStringIO', "don't use cStringIO.StringIO, use util.stringio"),
336 336 (r'^import urllib', "don't use urllib, use util.urlreq/util.urlerr"),
337 337 (r'^import SocketServer', "don't use SockerServer, use util.socketserver"),
338 338 (r'^import urlparse', "don't use urlparse, use util.urlreq"),
339 339 (r'^import xmlrpclib', "don't use xmlrpclib, use util.xmlrpclib"),
340 340 (r'^import cPickle', "don't use cPickle, use util.pickle"),
341 341 (r'^import pickle', "don't use pickle, use util.pickle"),
342 342 (r'^import httplib', "don't use httplib, use util.httplib"),
343 343 (r'^import BaseHTTPServer', "use util.httpserver instead"),
344 344 (r'^(from|import) mercurial\.(cext|pure|cffi)',
345 345 "use mercurial.policy.importmod instead"),
346 346 (r'\.next\(\)', "don't use .next(), use next(...)"),
347 347 (r'([a-z]*).revision\(\1\.node\(',
348 348 "don't convert rev to node before passing to revision(nodeorrev)"),
349 349 (r'platform\.system\(\)', "don't use platform.system(), use pycompat"),
350 350
351 351 ],
352 352 # warnings
353 353 [
354 354 ]
355 355 ]
356 356
357 357 # patterns to check normal *.py files
358 358 pypats = [
359 359 [
360 360 # Ideally, these should be placed in "commonpypats" for
361 361 # consistency of coding rules in Mercurial source tree.
362 362 # But on the other hand, these are not so seriously required for
363 363 # python code fragments embedded in test scripts. Fixing test
364 364 # scripts for these patterns requires many changes, and has less
365 365 # profit than effort.
366 366 (r'.{81}', "line too long"),
367 367 (r'raise Exception', "don't raise generic exceptions"),
368 368 (r'[\s\(](open|file)\([^)]*\)\.read\(',
369 369 "use util.readfile() instead"),
370 370 (r'[\s\(](open|file)\([^)]*\)\.write\(',
371 371 "use util.writefile() instead"),
372 372 (r'^[\s\(]*(open(er)?|file)\([^)]*\)(?!\.close\(\))',
373 373 "always assign an opened file to a variable, and close it afterwards"),
374 374 (r'[\s\(](open|file)\([^)]*\)\.(?!close\(\))',
375 375 "always assign an opened file to a variable, and close it afterwards"),
376 376 (r':\n( )*( ){1,3}[^ ]', "must indent 4 spaces"),
377 377 (r'^import atexit', "don't use atexit, use ui.atexit"),
378 378
379 379 # rules depending on implementation of repquote()
380 380 (r' x+[xpqo%APM][\'"]\n\s+[\'"]x',
381 381 'string join across lines with no space'),
382 382 (r'''(?x)ui\.(status|progress|write|note|warn)\(
383 383 [ \t\n#]*
384 384 (?# any strings/comments might precede a string, which
385 385 # contains translatable message)
386 386 ((['"]|\'\'\'|""")[ \npq%bAPMxno]*(['"]|\'\'\'|""")[ \t\n#]+)*
387 387 (?# sequence consisting of below might precede translatable message
388 388 # - formatting string: "% 10s", "%05d", "% -3.2f", "%*s", "%%" ...
389 389 # - escaped character: "\\", "\n", "\0" ...
390 390 # - character other than '%', 'b' as '\', and 'x' as alphabet)
391 391 (['"]|\'\'\'|""")
392 392 ((%([ n]?[PM]?([np]+|A))?x)|%%|b[bnx]|[ \nnpqAPMo])*x
393 393 (?# this regexp can't use [^...] style,
394 394 # because _preparepats forcibly adds "\n" into [^...],
395 395 # even though this regexp wants match it against "\n")''',
396 396 "missing _() in ui message (use () to hide false-positives)"),
397 397 ] + commonpypats[0],
398 398 # warnings
399 399 [
400 400 # rules depending on implementation of repquote()
401 401 (r'(^| )pp +xxxxqq[ \n][^\n]', "add two newlines after '.. note::'"),
402 402 ] + commonpypats[1]
403 403 ]
404 404
405 405 # common filters to convert *.py
406 406 commonpyfilters = [
407 407 (r"""(?msx)(?P<comment>\#.*?$)|
408 408 ((?P<quote>('''|\"\"\"|(?<!')'(?!')|(?<!")"(?!")))
409 409 (?P<text>(([^\\]|\\.)*?))
410 410 (?P=quote))""", reppython),
411 411 ]
412 412
413 413 # filters to convert normal *.py files
414 414 pyfilters = [
415 415 ] + commonpyfilters
416 416
417 417 # non-filter patterns
418 418 pynfpats = [
419 419 [
420 420 (r'pycompat\.osname\s*[=!]=\s*[\'"]nt[\'"]', "use pycompat.iswindows"),
421 421 (r'pycompat\.osname\s*[=!]=\s*[\'"]posix[\'"]', "use pycompat.isposix"),
422 422 (r'pycompat\.sysplatform\s*[!=]=\s*[\'"]darwin[\'"]',
423 423 "use pycompat.isdarwin"),
424 424 ],
425 425 # warnings
426 426 [],
427 427 ]
428 428
429 429 # extension non-filter patterns
430 430 pyextnfpats = [
431 431 [(r'^"""\n?[A-Z]', "don't capitalize docstring title")],
432 432 # warnings
433 433 [],
434 434 ]
435 435
436 436 txtfilters = []
437 437
438 438 txtpats = [
439 439 [
440 440 (r'\s$', 'trailing whitespace'),
441 441 ('.. note::[ \n][^\n]', 'add two newlines after note::')
442 442 ],
443 443 []
444 444 ]
445 445
446 446 cpats = [
447 447 [
448 448 (r'//', "don't use //-style comments"),
449 449 (r'\S\t', "don't use tabs except for indent"),
450 450 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
451 451 (r'.{81}', "line too long"),
452 452 (r'(while|if|do|for)\(', "use space after while/if/do/for"),
453 453 (r'return\(', "return is not a function"),
454 454 (r' ;', "no space before ;"),
455 455 (r'[^;] \)', "no space before )"),
456 456 (r'[)][{]', "space between ) and {"),
457 457 (r'\w+\* \w+', "use int *foo, not int* foo"),
458 458 (r'\W\([^\)]+\) \w+', "use (int)foo, not (int) foo"),
459 459 (r'\w+ (\+\+|--)', "use foo++, not foo ++"),
460 460 (r'\w,\w', "missing whitespace after ,"),
461 461 (r'^[^#]\w[+/*]\w', "missing whitespace in expression"),
462 462 (r'\w\s=\s\s+\w', "gratuitous whitespace after ="),
463 463 (r'^#\s+\w', "use #foo, not # foo"),
464 464 (r'[^\n]\Z', "no trailing newline"),
465 465 (r'^\s*#import\b', "use only #include in standard C code"),
466 466 (r'strcpy\(', "don't use strcpy, use strlcpy or memcpy"),
467 467 (r'strcat\(', "don't use strcat"),
468 468
469 469 # rules depending on implementation of repquote()
470 470 ],
471 471 # warnings
472 472 [
473 473 # rules depending on implementation of repquote()
474 474 ]
475 475 ]
476 476
477 477 cfilters = [
478 478 (r'(/\*)(((\*(?!/))|[^*])*)\*/', repccomment),
479 479 (r'''(?P<quote>(?<!")")(?P<text>([^"]|\\")+)"(?!")''', repquote),
480 480 (r'''(#\s*include\s+<)([^>]+)>''', repinclude),
481 481 (r'(\()([^)]+\))', repcallspaces),
482 482 ]
483 483
484 484 inutilpats = [
485 485 [
486 486 (r'\bui\.', "don't use ui in util"),
487 487 ],
488 488 # warnings
489 489 []
490 490 ]
491 491
492 492 inrevlogpats = [
493 493 [
494 494 (r'\brepo\.', "don't use repo in revlog"),
495 495 ],
496 496 # warnings
497 497 []
498 498 ]
499 499
500 500 webtemplatefilters = []
501 501
502 502 webtemplatepats = [
503 503 [],
504 504 [
505 505 (r'{desc(\|(?!websub|firstline)[^\|]*)+}',
506 506 'follow desc keyword with either firstline or websub'),
507 507 ]
508 508 ]
509 509
510 510 allfilesfilters = []
511 511
512 512 allfilespats = [
513 513 [
514 514 (r'(http|https)://[a-zA-Z0-9./]*selenic.com/',
515 515 'use mercurial-scm.org domain URL'),
516 516 (r'mercurial@selenic\.com',
517 517 'use mercurial-scm.org domain for mercurial ML address'),
518 518 (r'mercurial-devel@selenic\.com',
519 519 'use mercurial-scm.org domain for mercurial-devel ML address'),
520 520 ],
521 521 # warnings
522 522 [],
523 523 ]
524 524
525 525 py3pats = [
526 526 [
527 527 (r'os\.environ', "use encoding.environ instead (py3)", r'#.*re-exports'),
528 528 (r'os\.name', "use pycompat.osname instead (py3)"),
529 529 (r'os\.getcwd', "use encoding.getcwd instead (py3)", r'#.*re-exports'),
530 530 (r'os\.sep', "use pycompat.ossep instead (py3)"),
531 531 (r'os\.pathsep', "use pycompat.ospathsep instead (py3)"),
532 532 (r'os\.altsep', "use pycompat.osaltsep instead (py3)"),
533 533 (r'sys\.platform', "use pycompat.sysplatform instead (py3)"),
534 534 (r'getopt\.getopt', "use pycompat.getoptb instead (py3)"),
535 535 (r'os\.getenv', "use encoding.environ.get instead"),
536 536 (r'os\.setenv', "modifying the environ dict is not preferred"),
537 537 (r'(?<!pycompat\.)xrange', "use pycompat.xrange instead (py3)"),
538 538 ],
539 539 # warnings
540 540 [],
541 541 ]
542 542
543 543 checks = [
544 544 ('python', r'.*\.(py|cgi)$', r'^#!.*python', pyfilters, pypats),
545 545 ('python', r'.*\.(py|cgi)$', r'^#!.*python', [], pynfpats),
546 546 ('python', r'.*hgext.*\.py$', '', [], pyextnfpats),
547 547 ('python 3', r'.*(hgext|mercurial)/(?!demandimport|policy|pycompat).*\.py',
548 548 '', pyfilters, py3pats),
549 549 ('test script', r'(.*/)?test-[^.~]*$', '', testfilters, testpats),
550 550 ('c', r'.*\.[ch]$', '', cfilters, cpats),
551 551 ('unified test', r'.*\.t$', '', utestfilters, utestpats),
552 552 ('layering violation repo in revlog', r'mercurial/revlog\.py', '',
553 553 pyfilters, inrevlogpats),
554 554 ('layering violation ui in util', r'mercurial/util\.py', '', pyfilters,
555 555 inutilpats),
556 556 ('txt', r'.*\.txt$', '', txtfilters, txtpats),
557 557 ('web template', r'mercurial/templates/.*\.tmpl', '',
558 558 webtemplatefilters, webtemplatepats),
559 559 ('all except for .po', r'.*(?<!\.po)$', '',
560 560 allfilesfilters, allfilespats),
561 561 ]
562 562
563 563 def _preparepats():
564 564 def preparefailandwarn(failandwarn):
565 565 for pats in failandwarn:
566 566 for i, pseq in enumerate(pats):
567 567 # fix-up regexes for multi-line searches
568 568 p = pseq[0]
569 569 # \s doesn't match \n (done in two steps)
570 570 # first, we replace \s that appears in a set already
571 571 p = re.sub(r'\[\\s', r'[ \\t', p)
572 572 # now we replace other \s instances.
573 573 p = re.sub(r'(?<!(\\|\[))\\s', r'[ \\t]', p)
574 574 # [^...] doesn't match newline
575 575 p = re.sub(r'(?<!\\)\[\^', r'[^\\n', p)
576 576
577 577 pats[i] = (re.compile(p, re.MULTILINE),) + pseq[1:]
578 578
579 579 def preparefilters(filters):
580 580 for i, flt in enumerate(filters):
581 581 filters[i] = re.compile(flt[0]), flt[1]
582 582
583 583 for cs in (checks,):
584 584 for c in cs:
585 585 failandwarn = c[-1]
586 586 preparefailandwarn(failandwarn)
587 587
588 588 filters = c[-2]
589 589 preparefilters(filters)
590 590
591 591 class norepeatlogger(object):
592 592 def __init__(self):
593 593 self._lastseen = None
594 594
595 595 def log(self, fname, lineno, line, msg, blame):
596 596 """print error related a to given line of a given file.
597 597
598 598 The faulty line will also be printed but only once in the case
599 599 of multiple errors.
600 600
601 601 :fname: filename
602 602 :lineno: line number
603 603 :line: actual content of the line
604 604 :msg: error message
605 605 """
606 606 msgid = fname, lineno, line
607 607 if msgid != self._lastseen:
608 608 if blame:
609 609 print("%s:%d (%s):" % (fname, lineno, blame))
610 610 else:
611 611 print("%s:%d:" % (fname, lineno))
612 612 print(" > %s" % line)
613 613 self._lastseen = msgid
614 614 print(" " + msg)
615 615
616 616 _defaultlogger = norepeatlogger()
617 617
618 618 def getblame(f):
619 619 lines = []
620 620 for l in os.popen('hg annotate -un %s' % f):
621 621 start, line = l.split(':', 1)
622 622 user, rev = start.split()
623 623 lines.append((line[1:-1], user, rev))
624 624 return lines
625 625
626 626 def checkfile(f, logfunc=_defaultlogger.log, maxerr=None, warnings=False,
627 627 blame=False, debug=False, lineno=True):
628 628 """checks style and portability of a given file
629 629
630 630 :f: filepath
631 631 :logfunc: function used to report error
632 632 logfunc(filename, linenumber, linecontent, errormessage)
633 633 :maxerr: number of error to display before aborting.
634 634 Set to false (default) to report all errors
635 635
636 636 return True if no error is found, False otherwise.
637 637 """
638 blamecache = None
639 638 result = True
640 639
641 640 try:
642 641 with opentext(f) as fp:
643 642 try:
644 643 pre = fp.read()
645 644 except UnicodeDecodeError as e:
646 645 print("%s while reading %s" % (e, f))
647 646 return result
648 647 except IOError as e:
649 648 print("Skipping %s, %s" % (f, str(e).split(':', 1)[0]))
650 649 return result
651 650
651 # context information shared while single checkfile() invocation
652 context = {'blamecache': None}
653
652 654 for name, match, magic, filters, pats in checks:
653 post = pre # discard filtering result of previous check
654 655 if debug:
655 656 print(name, f)
656 fc = 0
657 657 if not (re.match(match, f) or (magic and re.search(magic, pre))):
658 658 if debug:
659 659 print("Skipping %s for %s it doesn't match %s" % (
660 660 name, match, f))
661 661 continue
662 662 if "no-" "check-code" in pre:
663 663 # If you're looking at this line, it's because a file has:
664 664 # no- check- code
665 665 # but the reason to output skipping is to make life for
666 666 # tests easier. So, instead of writing it with a normal
667 667 # spelling, we write it with the expected spelling from
668 668 # tests/test-check-code.t
669 669 print("Skipping %s it has no-che?k-code (glob)" % f)
670 670 return "Skip" # skip checking this file
671
672 if not _checkfiledata(name, f, pre, filters, pats, context,
673 logfunc, maxerr, warnings, blame, debug, lineno):
674 result = False
675
676 return result
677
678 def _checkfiledata(name, f, filedata, filters, pats, context,
679 logfunc, maxerr, warnings, blame, debug, lineno):
680 """Execute actual error check for file data
681
682 :name: of the checking category
683 :f: filepath
684 :filedata: content of a file
685 :filters: to be applied before checking
686 :pats: to detect errors
687 :context: a dict of information shared while single checkfile() invocation
688 Valid keys: 'blamecache'.
689 :logfunc: function used to report error
690 logfunc(filename, linenumber, linecontent, errormessage)
691 :maxerr: number of error to display before aborting, or False to
692 report all errors
693 :warnings: whether warning level checks should be applied
694 :blame: whether blame information should be displayed at error reporting
695 :debug: whether debug information should be displayed
696 :lineno: whether lineno should be displayed at error reporting
697
698 return True if no error is found, False otherwise.
699 """
700 blamecache = context['blamecache']
701
702 fc = 0
703 pre = post = filedata
704 result = True
705
706 if True: # TODO: get rid of this redundant 'if' block
671 707 for p, r in filters:
672 708 post = re.sub(p, r, post)
673 709 nerrs = len(pats[0]) # nerr elements are errors
674 710 if warnings:
675 711 pats = pats[0] + pats[1]
676 712 else:
677 713 pats = pats[0]
678 714 # print post # uncomment to show filtered version
679 715
680 716 if debug:
681 717 print("Checking %s for %s" % (name, f))
682 718
683 719 prelines = None
684 720 errors = []
685 721 for i, pat in enumerate(pats):
686 722 if len(pat) == 3:
687 723 p, msg, ignore = pat
688 724 else:
689 725 p, msg = pat
690 726 ignore = None
691 727 if i >= nerrs:
692 728 msg = "warning: " + msg
693 729
694 730 pos = 0
695 731 n = 0
696 732 for m in p.finditer(post):
697 733 if prelines is None:
698 734 prelines = pre.splitlines()
699 735 postlines = post.splitlines(True)
700 736
701 737 start = m.start()
702 738 while n < len(postlines):
703 739 step = len(postlines[n])
704 740 if pos + step > start:
705 741 break
706 742 pos += step
707 743 n += 1
708 744 l = prelines[n]
709 745
710 746 if ignore and re.search(ignore, l, re.MULTILINE):
711 747 if debug:
712 748 print("Skipping %s for %s:%s (ignore pattern)" % (
713 749 name, f, n))
714 750 continue
715 751 bd = ""
716 752 if blame:
717 753 bd = 'working directory'
718 if not blamecache:
754 if blamecache is None:
719 755 blamecache = getblame(f)
756 context['blamecache'] = blamecache
720 757 if n < len(blamecache):
721 758 bl, bu, br = blamecache[n]
722 759 if bl == l:
723 760 bd = '%s@%s' % (bu, br)
724 761
725 762 errors.append((f, lineno and n + 1, l, msg, bd))
726 763 result = False
727 764
728 765 errors.sort()
729 766 for e in errors:
730 767 logfunc(*e)
731 768 fc += 1
732 769 if maxerr and fc >= maxerr:
733 770 print(" (too many errors, giving up)")
734 771 break
735 772
736 773 return result
737 774
738 775 def main():
739 776 parser = optparse.OptionParser("%prog [options] [files | -]")
740 777 parser.add_option("-w", "--warnings", action="store_true",
741 778 help="include warning-level checks")
742 779 parser.add_option("-p", "--per-file", type="int",
743 780 help="max warnings per file")
744 781 parser.add_option("-b", "--blame", action="store_true",
745 782 help="use annotate to generate blame info")
746 783 parser.add_option("", "--debug", action="store_true",
747 784 help="show debug information")
748 785 parser.add_option("", "--nolineno", action="store_false",
749 786 dest='lineno', help="don't show line numbers")
750 787
751 788 parser.set_defaults(per_file=15, warnings=False, blame=False, debug=False,
752 789 lineno=True)
753 790 (options, args) = parser.parse_args()
754 791
755 792 if len(args) == 0:
756 793 check = glob.glob("*")
757 794 elif args == ['-']:
758 795 # read file list from stdin
759 796 check = sys.stdin.read().splitlines()
760 797 else:
761 798 check = args
762 799
763 800 _preparepats()
764 801
765 802 ret = 0
766 803 for f in check:
767 804 if not checkfile(f, maxerr=options.per_file, warnings=options.warnings,
768 805 blame=options.blame, debug=options.debug,
769 806 lineno=options.lineno):
770 807 ret = 1
771 808 return ret
772 809
773 810 if __name__ == "__main__":
774 811 sys.exit(main())
General Comments 0
You need to be logged in to leave comments. Login now