##// END OF EJS Templates
contrib: fix a subtle bug in check-code's regex rewriting...
Augie Fackler -
r36975:a8d540d2 default
parent child Browse files
Show More
@@ -1,738 +1,741
1 1 #!/usr/bin/env python
2 2 #
3 3 # check-code - a style and portability checker for Mercurial
4 4 #
5 5 # Copyright 2010 Matt Mackall <mpm@selenic.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """style and portability checker for Mercurial
11 11
12 12 when a rule triggers wrong, do one of the following (prefer one from top):
13 13 * do the work-around the rule suggests
14 14 * doublecheck that it is a false match
15 15 * improve the rule pattern
16 16 * add an ignore pattern to the rule (3rd arg) which matches your good line
17 17 (you can append a short comment and match this, like: #re-raises)
18 18 * change the pattern to a warning and list the exception in test-check-code-hg
19 19 * ONLY use no--check-code for skipping entire files from external sources
20 20 """
21 21
22 22 from __future__ import absolute_import, print_function
23 23 import glob
24 24 import keyword
25 25 import optparse
26 26 import os
27 27 import re
28 28 import sys
29 29 if sys.version_info[0] < 3:
30 30 opentext = open
31 31 else:
32 32 def opentext(f):
33 33 return open(f, encoding='ascii')
34 34 try:
35 35 xrange
36 36 except NameError:
37 37 xrange = range
38 38 try:
39 39 import re2
40 40 except ImportError:
41 41 re2 = None
42 42
43 43 def compilere(pat, multiline=False):
44 44 if multiline:
45 45 pat = '(?m)' + pat
46 46 if re2:
47 47 try:
48 48 return re2.compile(pat)
49 49 except re2.error:
50 50 pass
51 51 return re.compile(pat)
52 52
53 53 # check "rules depending on implementation of repquote()" in each
54 54 # patterns (especially pypats), before changing around repquote()
55 55 _repquotefixedmap = {' ': ' ', '\n': '\n', '.': 'p', ':': 'q',
56 56 '%': '%', '\\': 'b', '*': 'A', '+': 'P', '-': 'M'}
57 57 def _repquoteencodechr(i):
58 58 if i > 255:
59 59 return 'u'
60 60 c = chr(i)
61 61 if c in _repquotefixedmap:
62 62 return _repquotefixedmap[c]
63 63 if c.isalpha():
64 64 return 'x'
65 65 if c.isdigit():
66 66 return 'n'
67 67 return 'o'
68 68 _repquotett = ''.join(_repquoteencodechr(i) for i in xrange(256))
69 69
70 70 def repquote(m):
71 71 t = m.group('text')
72 72 t = t.translate(_repquotett)
73 73 return m.group('quote') + t + m.group('quote')
74 74
75 75 def reppython(m):
76 76 comment = m.group('comment')
77 77 if comment:
78 78 l = len(comment.rstrip())
79 79 return "#" * l + comment[l:]
80 80 return repquote(m)
81 81
82 82 def repcomment(m):
83 83 return m.group(1) + "#" * len(m.group(2))
84 84
85 85 def repccomment(m):
86 86 t = re.sub(r"((?<=\n) )|\S", "x", m.group(2))
87 87 return m.group(1) + t + "*/"
88 88
89 89 def repcallspaces(m):
90 90 t = re.sub(r"\n\s+", "\n", m.group(2))
91 91 return m.group(1) + t
92 92
93 93 def repinclude(m):
94 94 return m.group(1) + "<foo>"
95 95
96 96 def rephere(m):
97 97 t = re.sub(r"\S", "x", m.group(2))
98 98 return m.group(1) + t
99 99
100 100
101 101 testpats = [
102 102 [
103 103 (r'\b(push|pop)d\b', "don't use 'pushd' or 'popd', use 'cd'"),
104 104 (r'\W\$?\(\([^\)\n]*\)\)', "don't use (()) or $(()), use 'expr'"),
105 105 (r'grep.*-q', "don't use 'grep -q', redirect to /dev/null"),
106 106 (r'(?<!hg )grep.* -a', "don't use 'grep -a', use in-line python"),
107 107 (r'sed.*-i', "don't use 'sed -i', use a temporary file"),
108 108 (r'\becho\b.*\\n', "don't use 'echo \\n', use printf"),
109 109 (r'echo -n', "don't use 'echo -n', use printf"),
110 110 (r'(^|\|\s*)\bwc\b[^|]*$\n(?!.*\(re\))', "filter wc output"),
111 111 (r'head -c', "don't use 'head -c', use 'dd'"),
112 112 (r'tail -n', "don't use the '-n' option to tail, just use '-<num>'"),
113 113 (r'sha1sum', "don't use sha1sum, use $TESTDIR/md5sum.py"),
114 114 (r'ls.*-\w*R', "don't use 'ls -R', use 'find'"),
115 115 (r'printf.*[^\\]\\([1-9]|0\d)', r"don't use 'printf \NNN', use Python"),
116 116 (r'printf.*[^\\]\\x', "don't use printf \\x, use Python"),
117 117 (r'\$\(.*\)', "don't use $(expr), use `expr`"),
118 118 (r'rm -rf \*', "don't use naked rm -rf, target a directory"),
119 119 (r'\[[^\]]+==', '[ foo == bar ] is a bashism, use [ foo = bar ] instead'),
120 120 (r'(^|\|\s*)grep (-\w\s+)*[^|]*[(|]\w',
121 121 "use egrep for extended grep syntax"),
122 122 (r'(^|\|\s*)e?grep .*\\S', "don't use \\S in regular expression"),
123 123 (r'(?<!!)/bin/', "don't use explicit paths for tools"),
124 124 (r'#!.*/bash', "don't use bash in shebang, use sh"),
125 125 (r'[^\n]\Z', "no trailing newline"),
126 126 (r'export .*=', "don't export and assign at once"),
127 127 (r'^source\b', "don't use 'source', use '.'"),
128 128 (r'touch -d', "don't use 'touch -d', use 'touch -t' instead"),
129 129 (r'\bls +[^|\n-]+ +-', "options to 'ls' must come before filenames"),
130 130 (r'[^>\n]>\s*\$HGRCPATH', "don't overwrite $HGRCPATH, append to it"),
131 131 (r'^stop\(\)', "don't use 'stop' as a shell function name"),
132 132 (r'(\[|\btest\b).*-e ', "don't use 'test -e', use 'test -f'"),
133 133 (r'\[\[\s+[^\]]*\]\]', "don't use '[[ ]]', use '[ ]'"),
134 134 (r'^alias\b.*=', "don't use alias, use a function"),
135 135 (r'if\s*!', "don't use '!' to negate exit status"),
136 136 (r'/dev/u?random', "don't use entropy, use /dev/zero"),
137 137 (r'do\s*true;\s*done', "don't use true as loop body, use sleep 0"),
138 138 (r'sed (-e )?\'(\d+|/[^/]*/)i(?!\\\n)',
139 139 "put a backslash-escaped newline after sed 'i' command"),
140 140 (r'^diff *-\w*[uU].*$\n(^ \$ |^$)', "prefix diff -u/-U with cmp"),
141 141 (r'^\s+(if)? diff *-\w*[uU]', "prefix diff -u/-U with cmp"),
142 142 (r'[\s="`\']python\s(?!bindings)', "don't use 'python', use '$PYTHON'"),
143 143 (r'seq ', "don't use 'seq', use $TESTDIR/seq.py"),
144 144 (r'\butil\.Abort\b', "directly use error.Abort"),
145 145 (r'\|&', "don't use |&, use 2>&1"),
146 146 (r'\w = +\w', "only one space after = allowed"),
147 147 (r'\bsed\b.*[^\\]\\n', "don't use 'sed ... \\n', use a \\ and a newline"),
148 148 (r'env.*-u', "don't use 'env -u VAR', use 'unset VAR'"),
149 149 (r'cp.* -r ', "don't use 'cp -r', use 'cp -R'"),
150 150 (r'grep.* -[ABC]', "don't use grep's context flags"),
151 151 (r'find.*-printf',
152 152 "don't use 'find -printf', it doesn't exist on BSD find(1)"),
153 153 (r'\$RANDOM ', "don't use bash-only $RANDOM to generate random values"),
154 154 ],
155 155 # warnings
156 156 [
157 157 (r'^function', "don't use 'function', use old style"),
158 158 (r'^diff.*-\w*N', "don't use 'diff -N'"),
159 159 (r'\$PWD|\${PWD}', "don't use $PWD, use `pwd`"),
160 160 (r'^([^"\'\n]|("[^"\n]*")|(\'[^\'\n]*\'))*\^', "^ must be quoted"),
161 161 (r'kill (`|\$\()', "don't use kill, use killdaemons.py")
162 162 ]
163 163 ]
164 164
165 165 testfilters = [
166 166 (r"( *)(#([^!][^\n]*\S)?)", repcomment),
167 167 (r"<<(\S+)((.|\n)*?\n\1)", rephere),
168 168 ]
169 169
170 170 uprefix = r"^ \$ "
171 171 utestpats = [
172 172 [
173 173 (r'^(\S.*|| [$>] \S.*)[ \t]\n', "trailing whitespace on non-output"),
174 174 (uprefix + r'.*\|\s*sed[^|>\n]*\n',
175 175 "use regex test output patterns instead of sed"),
176 176 (uprefix + r'(true|exit 0)', "explicit zero exit unnecessary"),
177 177 (uprefix + r'.*(?<!\[)\$\?', "explicit exit code checks unnecessary"),
178 178 (uprefix + r'.*\|\| echo.*(fail|error)',
179 179 "explicit exit code checks unnecessary"),
180 180 (uprefix + r'set -e', "don't use set -e"),
181 181 (uprefix + r'(\s|fi\b|done\b)', "use > for continued lines"),
182 182 (uprefix + r'.*:\.\S*/', "x:.y in a path does not work on msys, rewrite "
183 183 "as x://.y, or see `hg log -k msys` for alternatives", r'-\S+:\.|' #-Rxxx
184 184 '# no-msys'), # in test-pull.t which is skipped on windows
185 185 (r'^ [^$>].*27\.0\.0\.1',
186 186 'use $LOCALIP not an explicit loopback address'),
187 187 (r'^ (?![>$] ).*\$LOCALIP.*[^)]$',
188 188 'mark $LOCALIP output lines with (glob) to help tests in BSD jails'),
189 189 (r'^ (cat|find): .*: \$ENOENT\$',
190 190 'use test -f to test for file existence'),
191 191 (r'^ diff -[^ -]*p',
192 192 "don't use (external) diff with -p for portability"),
193 193 (r' readlink ', 'use readlink.py instead of readlink'),
194 194 (r'^ [-+][-+][-+] .* [-+]0000 \(glob\)',
195 195 "glob timezone field in diff output for portability"),
196 196 (r'^ @@ -[0-9]+ [+][0-9]+,[0-9]+ @@',
197 197 "use '@@ -N* +N,n @@ (glob)' style chunk header for portability"),
198 198 (r'^ @@ -[0-9]+,[0-9]+ [+][0-9]+ @@',
199 199 "use '@@ -N,n +N* @@ (glob)' style chunk header for portability"),
200 200 (r'^ @@ -[0-9]+ [+][0-9]+ @@',
201 201 "use '@@ -N* +N* @@ (glob)' style chunk header for portability"),
202 202 (uprefix + r'hg( +-[^ ]+( +[^ ]+)?)* +extdiff'
203 203 r'( +(-[^ po-]+|--(?!program|option)[^ ]+|[^-][^ ]*))*$',
204 204 "use $RUNTESTDIR/pdiff via extdiff (or -o/-p for false-positives)"),
205 205 ],
206 206 # warnings
207 207 [
208 208 (r'^ (?!.*\$LOCALIP)[^*?/\n]* \(glob\)$',
209 209 "glob match with no glob string (?, *, /, and $LOCALIP)"),
210 210 ]
211 211 ]
212 212
213 213 # transform plain test rules to unified test's
214 214 for i in [0, 1]:
215 215 for tp in testpats[i]:
216 216 p = tp[0]
217 217 m = tp[1]
218 218 if p.startswith(r'^'):
219 219 p = r"^ [$>] (%s)" % p[1:]
220 220 else:
221 221 p = r"^ [$>] .*(%s)" % p
222 222 utestpats[i].append((p, m) + tp[2:])
223 223
224 224 # don't transform the following rules:
225 225 # " > \t" and " \t" should be allowed in unified tests
226 226 testpats[0].append((r'^( *)\t', "don't use tabs to indent"))
227 227 utestpats[0].append((r'^( ?)\t', "don't use tabs to indent"))
228 228
229 229 utestfilters = [
230 230 (r"<<(\S+)((.|\n)*?\n > \1)", rephere),
231 231 (r"( +)(#([^!][^\n]*\S)?)", repcomment),
232 232 ]
233 233
234 234 pypats = [
235 235 [
236 236 (r'^\s*def\s*\w+\s*\(.*,\s*\(',
237 237 "tuple parameter unpacking not available in Python 3+"),
238 238 (r'lambda\s*\(.*,.*\)',
239 239 "tuple parameter unpacking not available in Python 3+"),
240 240 (r'(?<!def)\s+(cmp)\(', "cmp is not available in Python 3+"),
241 241 (r'(?<!\.)\breduce\s*\(.*', "reduce is not available in Python 3+"),
242 242 (r'\bdict\(.*=', 'dict() is different in Py2 and 3 and is slower than {}',
243 243 'dict-from-generator'),
244 244 (r'\.has_key\b', "dict.has_key is not available in Python 3+"),
245 245 (r'\s<>\s', '<> operator is not available in Python 3+, use !='),
246 246 (r'^\s*\t', "don't use tabs"),
247 247 (r'\S;\s*\n', "semicolon"),
248 248 (r'[^_]_\([ \t\n]*(?:"[^"]+"[ \t\n+]*)+%', "don't use % inside _()"),
249 249 (r"[^_]_\([ \t\n]*(?:'[^']+'[ \t\n+]*)+%", "don't use % inside _()"),
250 250 (r'(\w|\)),\w', "missing whitespace after ,"),
251 251 (r'(\w|\))[+/*\-<>]\w', "missing whitespace in expression"),
252 252 (r'^\s+(\w|\.)+=\w[^,()\n]*$', "missing whitespace in assignment"),
253 253 (r'\w\s=\s\s+\w', "gratuitous whitespace after ="),
254 254 ((
255 255 # a line ending with a colon, potentially with trailing comments
256 256 r':([ \t]*#[^\n]*)?\n'
257 257 # one that is not a pass and not only a comment
258 258 r'(?P<indent>[ \t]+)[^#][^\n]+\n'
259 259 # more lines at the same indent level
260 260 r'((?P=indent)[^\n]+\n)*'
261 261 # a pass at the same indent level, which is bogus
262 262 r'(?P=indent)pass[ \t\n#]'
263 263 ), 'omit superfluous pass'),
264 264 (r'.{81}', "line too long"),
265 265 (r'[^\n]\Z', "no trailing newline"),
266 266 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
267 267 # (r'^\s+[^_ \n][^_. \n]+_[^_\n]+\s*=',
268 268 # "don't use underbars in identifiers"),
269 269 (r'^\s+(self\.)?[A-Za-z][a-z0-9]+[A-Z]\w* = ',
270 270 "don't use camelcase in identifiers", r'#.*camelcase-required'),
271 271 (r'^\s*(if|while|def|class|except|try)\s[^[\n]*:\s*[^\\n]#\s]+',
272 272 "linebreak after :"),
273 273 (r'class\s[^( \n]+:', "old-style class, use class foo(object)",
274 274 r'#.*old-style'),
275 275 (r'class\s[^( \n]+\(\):',
276 276 "class foo() creates old style object, use class foo(object)",
277 277 r'#.*old-style'),
278 278 (r'\b(%s)\(' % '|'.join(k for k in keyword.kwlist
279 279 if k not in ('print', 'exec')),
280 280 "Python keyword is not a function"),
281 281 (r',]', "unneeded trailing ',' in list"),
282 282 # (r'class\s[A-Z][^\(]*\((?!Exception)',
283 283 # "don't capitalize non-exception classes"),
284 284 # (r'in range\(', "use xrange"),
285 285 # (r'^\s*print\s+', "avoid using print in core and extensions"),
286 286 (r'[\x80-\xff]', "non-ASCII character literal"),
287 287 (r'("\')\.format\(', "str.format() has no bytes counterpart, use %"),
288 288 (r'^\s*(%s)\s\s' % '|'.join(keyword.kwlist),
289 289 "gratuitous whitespace after Python keyword"),
290 290 (r'([\(\[][ \t]\S)|(\S[ \t][\)\]])', "gratuitous whitespace in () or []"),
291 291 # (r'\s\s=', "gratuitous whitespace before ="),
292 292 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
293 293 "missing whitespace around operator"),
294 294 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\s',
295 295 "missing whitespace around operator"),
296 296 (r'\s(\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
297 297 "missing whitespace around operator"),
298 298 (r'[^^+=*/!<>&| %-](\s=|=\s)[^= ]',
299 299 "wrong whitespace around ="),
300 300 (r'\([^()]*( =[^=]|[^<>!=]= )',
301 301 "no whitespace around = for named parameters"),
302 302 (r'raise Exception', "don't raise generic exceptions"),
303 303 (r'raise [^,(]+, (\([^\)]+\)|[^,\(\)]+)$',
304 304 "don't use old-style two-argument raise, use Exception(message)"),
305 305 (r' is\s+(not\s+)?["\'0-9-]', "object comparison with literal"),
306 306 (r' [=!]=\s+(True|False|None)',
307 307 "comparison with singleton, use 'is' or 'is not' instead"),
308 308 (r'^\s*(while|if) [01]:',
309 309 "use True/False for constant Boolean expression"),
310 310 (r'^\s*if False(:| +and)', 'Remove code instead of using `if False`'),
311 311 (r'(?:(?<!def)\s+|\()hasattr\(',
312 312 'hasattr(foo, bar) is broken on py2, use util.safehasattr(foo, bar) '
313 313 'instead', r'#.*hasattr-py3-only'),
314 314 (r'opener\([^)]*\).read\(',
315 315 "use opener.read() instead"),
316 316 (r'opener\([^)]*\).write\(',
317 317 "use opener.write() instead"),
318 318 (r'[\s\(](open|file)\([^)]*\)\.read\(',
319 319 "use util.readfile() instead"),
320 320 (r'[\s\(](open|file)\([^)]*\)\.write\(',
321 321 "use util.writefile() instead"),
322 322 (r'^[\s\(]*(open(er)?|file)\([^)]*\)(?!\.close\(\))',
323 323 "always assign an opened file to a variable, and close it afterwards"),
324 324 (r'[\s\(](open|file)\([^)]*\)\.(?!close\(\))',
325 325 "always assign an opened file to a variable, and close it afterwards"),
326 326 (r'(?i)descend[e]nt', "the proper spelling is descendAnt"),
327 327 (r'\.debug\(\_', "don't mark debug messages for translation"),
328 328 (r'\.strip\(\)\.split\(\)', "no need to strip before splitting"),
329 329 (r'^\s*except\s*:', "naked except clause", r'#.*re-raises'),
330 330 (r'^\s*except\s([^\(,]+|\([^\)]+\))\s*,',
331 331 'legacy exception syntax; use "as" instead of ","'),
332 332 (r':\n( )*( ){1,3}[^ ]', "must indent 4 spaces"),
333 333 (r'release\(.*wlock, .*lock\)', "wrong lock release order"),
334 334 (r'\bdef\s+__bool__\b', "__bool__ should be __nonzero__ in Python 2"),
335 335 (r'os\.path\.join\(.*, *(""|\'\')\)',
336 336 "use pathutil.normasprefix(path) instead of os.path.join(path, '')"),
337 337 (r'\s0[0-7]+\b', 'legacy octal syntax; use "0o" prefix instead of "0"'),
338 338 # XXX only catch mutable arguments on the first line of the definition
339 339 (r'def.*[( ]\w+=\{\}', "don't use mutable default arguments"),
340 340 (r'\butil\.Abort\b', "directly use error.Abort"),
341 341 (r'^@(\w*\.)?cachefunc', "module-level @cachefunc is risky, please avoid"),
342 342 (r'^import atexit', "don't use atexit, use ui.atexit"),
343 343 (r'^import Queue', "don't use Queue, use util.queue + util.empty"),
344 344 (r'^import cStringIO', "don't use cStringIO.StringIO, use util.stringio"),
345 345 (r'^import urllib', "don't use urllib, use util.urlreq/util.urlerr"),
346 346 (r'^import SocketServer', "don't use SockerServer, use util.socketserver"),
347 347 (r'^import urlparse', "don't use urlparse, use util.urlreq"),
348 348 (r'^import xmlrpclib', "don't use xmlrpclib, use util.xmlrpclib"),
349 349 (r'^import cPickle', "don't use cPickle, use util.pickle"),
350 350 (r'^import pickle', "don't use pickle, use util.pickle"),
351 351 (r'^import httplib', "don't use httplib, use util.httplib"),
352 352 (r'^import BaseHTTPServer', "use util.httpserver instead"),
353 353 (r'^(from|import) mercurial\.(cext|pure|cffi)',
354 354 "use mercurial.policy.importmod instead"),
355 355 (r'\.next\(\)', "don't use .next(), use next(...)"),
356 356 (r'([a-z]*).revision\(\1\.node\(',
357 357 "don't convert rev to node before passing to revision(nodeorrev)"),
358 358 (r'platform\.system\(\)', "don't use platform.system(), use pycompat"),
359 359
360 360 # rules depending on implementation of repquote()
361 361 (r' x+[xpqo%APM][\'"]\n\s+[\'"]x',
362 362 'string join across lines with no space'),
363 363 (r'''(?x)ui\.(status|progress|write|note|warn)\(
364 364 [ \t\n#]*
365 365 (?# any strings/comments might precede a string, which
366 366 # contains translatable message)
367 367 ((['"]|\'\'\'|""")[ \npq%bAPMxno]*(['"]|\'\'\'|""")[ \t\n#]+)*
368 368 (?# sequence consisting of below might precede translatable message
369 369 # - formatting string: "% 10s", "%05d", "% -3.2f", "%*s", "%%" ...
370 370 # - escaped character: "\\", "\n", "\0" ...
371 371 # - character other than '%', 'b' as '\', and 'x' as alphabet)
372 372 (['"]|\'\'\'|""")
373 373 ((%([ n]?[PM]?([np]+|A))?x)|%%|b[bnx]|[ \nnpqAPMo])*x
374 374 (?# this regexp can't use [^...] style,
375 375 # because _preparepats forcibly adds "\n" into [^...],
376 376 # even though this regexp wants match it against "\n")''',
377 377 "missing _() in ui message (use () to hide false-positives)"),
378 378 ],
379 379 # warnings
380 380 [
381 381 # rules depending on implementation of repquote()
382 382 (r'(^| )pp +xxxxqq[ \n][^\n]', "add two newlines after '.. note::'"),
383 383 ]
384 384 ]
385 385
386 386 pyfilters = [
387 387 (r"""(?msx)(?P<comment>\#.*?$)|
388 388 ((?P<quote>('''|\"\"\"|(?<!')'(?!')|(?<!")"(?!")))
389 389 (?P<text>(([^\\]|\\.)*?))
390 390 (?P=quote))""", reppython),
391 391 ]
392 392
393 393 # non-filter patterns
394 394 pynfpats = [
395 395 [
396 396 (r'pycompat\.osname\s*[=!]=\s*[\'"]nt[\'"]', "use pycompat.iswindows"),
397 397 (r'pycompat\.osname\s*[=!]=\s*[\'"]posix[\'"]', "use pycompat.isposix"),
398 398 (r'pycompat\.sysplatform\s*[!=]=\s*[\'"]darwin[\'"]',
399 399 "use pycompat.isdarwin"),
400 400 ],
401 401 # warnings
402 402 [],
403 403 ]
404 404
405 405 # extension non-filter patterns
406 406 pyextnfpats = [
407 407 [(r'^"""\n?[A-Z]', "don't capitalize docstring title")],
408 408 # warnings
409 409 [],
410 410 ]
411 411
412 412 txtfilters = []
413 413
414 414 txtpats = [
415 415 [
416 416 ('\s$', 'trailing whitespace'),
417 417 ('.. note::[ \n][^\n]', 'add two newlines after note::')
418 418 ],
419 419 []
420 420 ]
421 421
422 422 cpats = [
423 423 [
424 424 (r'//', "don't use //-style comments"),
425 425 (r'\S\t', "don't use tabs except for indent"),
426 426 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
427 427 (r'.{81}', "line too long"),
428 428 (r'(while|if|do|for)\(', "use space after while/if/do/for"),
429 429 (r'return\(', "return is not a function"),
430 430 (r' ;', "no space before ;"),
431 431 (r'[^;] \)', "no space before )"),
432 432 (r'[)][{]', "space between ) and {"),
433 433 (r'\w+\* \w+', "use int *foo, not int* foo"),
434 434 (r'\W\([^\)]+\) \w+', "use (int)foo, not (int) foo"),
435 435 (r'\w+ (\+\+|--)', "use foo++, not foo ++"),
436 436 (r'\w,\w', "missing whitespace after ,"),
437 437 (r'^[^#]\w[+/*]\w', "missing whitespace in expression"),
438 438 (r'\w\s=\s\s+\w', "gratuitous whitespace after ="),
439 439 (r'^#\s+\w', "use #foo, not # foo"),
440 440 (r'[^\n]\Z', "no trailing newline"),
441 441 (r'^\s*#import\b', "use only #include in standard C code"),
442 442 (r'strcpy\(', "don't use strcpy, use strlcpy or memcpy"),
443 443 (r'strcat\(', "don't use strcat"),
444 444
445 445 # rules depending on implementation of repquote()
446 446 ],
447 447 # warnings
448 448 [
449 449 # rules depending on implementation of repquote()
450 450 ]
451 451 ]
452 452
453 453 cfilters = [
454 454 (r'(/\*)(((\*(?!/))|[^*])*)\*/', repccomment),
455 455 (r'''(?P<quote>(?<!")")(?P<text>([^"]|\\")+)"(?!")''', repquote),
456 456 (r'''(#\s*include\s+<)([^>]+)>''', repinclude),
457 457 (r'(\()([^)]+\))', repcallspaces),
458 458 ]
459 459
460 460 inutilpats = [
461 461 [
462 462 (r'\bui\.', "don't use ui in util"),
463 463 ],
464 464 # warnings
465 465 []
466 466 ]
467 467
468 468 inrevlogpats = [
469 469 [
470 470 (r'\brepo\.', "don't use repo in revlog"),
471 471 ],
472 472 # warnings
473 473 []
474 474 ]
475 475
476 476 webtemplatefilters = []
477 477
478 478 webtemplatepats = [
479 479 [],
480 480 [
481 481 (r'{desc(\|(?!websub|firstline)[^\|]*)+}',
482 482 'follow desc keyword with either firstline or websub'),
483 483 ]
484 484 ]
485 485
486 486 allfilesfilters = []
487 487
488 488 allfilespats = [
489 489 [
490 490 (r'(http|https)://[a-zA-Z0-9./]*selenic.com/',
491 491 'use mercurial-scm.org domain URL'),
492 492 (r'mercurial@selenic\.com',
493 493 'use mercurial-scm.org domain for mercurial ML address'),
494 494 (r'mercurial-devel@selenic\.com',
495 495 'use mercurial-scm.org domain for mercurial-devel ML address'),
496 496 ],
497 497 # warnings
498 498 [],
499 499 ]
500 500
501 501 py3pats = [
502 502 [
503 503 (r'os\.environ', "use encoding.environ instead (py3)", r'#.*re-exports'),
504 504 (r'os\.name', "use pycompat.osname instead (py3)"),
505 505 (r'os\.getcwd', "use pycompat.getcwd instead (py3)"),
506 506 (r'os\.sep', "use pycompat.ossep instead (py3)"),
507 507 (r'os\.pathsep', "use pycompat.ospathsep instead (py3)"),
508 508 (r'os\.altsep', "use pycompat.osaltsep instead (py3)"),
509 509 (r'sys\.platform', "use pycompat.sysplatform instead (py3)"),
510 510 (r'getopt\.getopt', "use pycompat.getoptb instead (py3)"),
511 511 (r'os\.getenv', "use encoding.environ.get instead"),
512 512 (r'os\.setenv', "modifying the environ dict is not preferred"),
513 513 ],
514 514 # warnings
515 515 [],
516 516 ]
517 517
518 518 checks = [
519 519 ('python', r'.*\.(py|cgi)$', r'^#!.*python', pyfilters, pypats),
520 520 ('python', r'.*\.(py|cgi)$', r'^#!.*python', [], pynfpats),
521 521 ('python', r'.*hgext.*\.py$', '', [], pyextnfpats),
522 522 ('python 3', r'.*(hgext|mercurial)/(?!demandimport|policy|pycompat).*\.py',
523 523 '', pyfilters, py3pats),
524 524 ('test script', r'(.*/)?test-[^.~]*$', '', testfilters, testpats),
525 525 ('c', r'.*\.[ch]$', '', cfilters, cpats),
526 526 ('unified test', r'.*\.t$', '', utestfilters, utestpats),
527 527 ('layering violation repo in revlog', r'mercurial/revlog\.py', '',
528 528 pyfilters, inrevlogpats),
529 529 ('layering violation ui in util', r'mercurial/util\.py', '', pyfilters,
530 530 inutilpats),
531 531 ('txt', r'.*\.txt$', '', txtfilters, txtpats),
532 532 ('web template', r'mercurial/templates/.*\.tmpl', '',
533 533 webtemplatefilters, webtemplatepats),
534 534 ('all except for .po', r'.*(?<!\.po)$', '',
535 535 allfilesfilters, allfilespats),
536 536 ]
537 537
538 538 def _preparepats():
539 539 for c in checks:
540 540 failandwarn = c[-1]
541 541 for pats in failandwarn:
542 542 for i, pseq in enumerate(pats):
543 543 # fix-up regexes for multi-line searches
544 544 p = pseq[0]
545 # \s doesn't match \n
546 p = re.sub(r'(?<!\\)\\s', r'[ \\t]', p)
545 # \s doesn't match \n (done in two steps)
546 # first, we replace \s that appears in a set already
547 p = re.sub(r'\[\\s', r'[ \\t', p)
548 # now we replace other \s instances.
549 p = re.sub(r'(?<!(\\|\[))\\s', r'[ \\t]', p)
547 550 # [^...] doesn't match newline
548 551 p = re.sub(r'(?<!\\)\[\^', r'[^\\n', p)
549 552
550 553 pats[i] = (re.compile(p, re.MULTILINE),) + pseq[1:]
551 554 filters = c[3]
552 555 for i, flt in enumerate(filters):
553 556 filters[i] = re.compile(flt[0]), flt[1]
554 557
555 558 class norepeatlogger(object):
556 559 def __init__(self):
557 560 self._lastseen = None
558 561
559 562 def log(self, fname, lineno, line, msg, blame):
560 563 """print error related a to given line of a given file.
561 564
562 565 The faulty line will also be printed but only once in the case
563 566 of multiple errors.
564 567
565 568 :fname: filename
566 569 :lineno: line number
567 570 :line: actual content of the line
568 571 :msg: error message
569 572 """
570 573 msgid = fname, lineno, line
571 574 if msgid != self._lastseen:
572 575 if blame:
573 576 print("%s:%d (%s):" % (fname, lineno, blame))
574 577 else:
575 578 print("%s:%d:" % (fname, lineno))
576 579 print(" > %s" % line)
577 580 self._lastseen = msgid
578 581 print(" " + msg)
579 582
580 583 _defaultlogger = norepeatlogger()
581 584
582 585 def getblame(f):
583 586 lines = []
584 587 for l in os.popen('hg annotate -un %s' % f):
585 588 start, line = l.split(':', 1)
586 589 user, rev = start.split()
587 590 lines.append((line[1:-1], user, rev))
588 591 return lines
589 592
590 593 def checkfile(f, logfunc=_defaultlogger.log, maxerr=None, warnings=False,
591 594 blame=False, debug=False, lineno=True):
592 595 """checks style and portability of a given file
593 596
594 597 :f: filepath
595 598 :logfunc: function used to report error
596 599 logfunc(filename, linenumber, linecontent, errormessage)
597 600 :maxerr: number of error to display before aborting.
598 601 Set to false (default) to report all errors
599 602
600 603 return True if no error is found, False otherwise.
601 604 """
602 605 blamecache = None
603 606 result = True
604 607
605 608 try:
606 609 with opentext(f) as fp:
607 610 try:
608 611 pre = post = fp.read()
609 612 except UnicodeDecodeError as e:
610 613 print("%s while reading %s" % (e, f))
611 614 return result
612 615 except IOError as e:
613 616 print("Skipping %s, %s" % (f, str(e).split(':', 1)[0]))
614 617 return result
615 618
616 619 for name, match, magic, filters, pats in checks:
617 620 post = pre # discard filtering result of previous check
618 621 if debug:
619 622 print(name, f)
620 623 fc = 0
621 624 if not (re.match(match, f) or (magic and re.search(magic, pre))):
622 625 if debug:
623 626 print("Skipping %s for %s it doesn't match %s" % (
624 627 name, match, f))
625 628 continue
626 629 if "no-" "check-code" in pre:
627 630 # If you're looking at this line, it's because a file has:
628 631 # no- check- code
629 632 # but the reason to output skipping is to make life for
630 633 # tests easier. So, instead of writing it with a normal
631 634 # spelling, we write it with the expected spelling from
632 635 # tests/test-check-code.t
633 636 print("Skipping %s it has no-che?k-code (glob)" % f)
634 637 return "Skip" # skip checking this file
635 638 for p, r in filters:
636 639 post = re.sub(p, r, post)
637 640 nerrs = len(pats[0]) # nerr elements are errors
638 641 if warnings:
639 642 pats = pats[0] + pats[1]
640 643 else:
641 644 pats = pats[0]
642 645 # print post # uncomment to show filtered version
643 646
644 647 if debug:
645 648 print("Checking %s for %s" % (name, f))
646 649
647 650 prelines = None
648 651 errors = []
649 652 for i, pat in enumerate(pats):
650 653 if len(pat) == 3:
651 654 p, msg, ignore = pat
652 655 else:
653 656 p, msg = pat
654 657 ignore = None
655 658 if i >= nerrs:
656 659 msg = "warning: " + msg
657 660
658 661 pos = 0
659 662 n = 0
660 663 for m in p.finditer(post):
661 664 if prelines is None:
662 665 prelines = pre.splitlines()
663 666 postlines = post.splitlines(True)
664 667
665 668 start = m.start()
666 669 while n < len(postlines):
667 670 step = len(postlines[n])
668 671 if pos + step > start:
669 672 break
670 673 pos += step
671 674 n += 1
672 675 l = prelines[n]
673 676
674 677 if ignore and re.search(ignore, l, re.MULTILINE):
675 678 if debug:
676 679 print("Skipping %s for %s:%s (ignore pattern)" % (
677 680 name, f, n))
678 681 continue
679 682 bd = ""
680 683 if blame:
681 684 bd = 'working directory'
682 685 if not blamecache:
683 686 blamecache = getblame(f)
684 687 if n < len(blamecache):
685 688 bl, bu, br = blamecache[n]
686 689 if bl == l:
687 690 bd = '%s@%s' % (bu, br)
688 691
689 692 errors.append((f, lineno and n + 1, l, msg, bd))
690 693 result = False
691 694
692 695 errors.sort()
693 696 for e in errors:
694 697 logfunc(*e)
695 698 fc += 1
696 699 if maxerr and fc >= maxerr:
697 700 print(" (too many errors, giving up)")
698 701 break
699 702
700 703 return result
701 704
702 705 def main():
703 706 parser = optparse.OptionParser("%prog [options] [files | -]")
704 707 parser.add_option("-w", "--warnings", action="store_true",
705 708 help="include warning-level checks")
706 709 parser.add_option("-p", "--per-file", type="int",
707 710 help="max warnings per file")
708 711 parser.add_option("-b", "--blame", action="store_true",
709 712 help="use annotate to generate blame info")
710 713 parser.add_option("", "--debug", action="store_true",
711 714 help="show debug information")
712 715 parser.add_option("", "--nolineno", action="store_false",
713 716 dest='lineno', help="don't show line numbers")
714 717
715 718 parser.set_defaults(per_file=15, warnings=False, blame=False, debug=False,
716 719 lineno=True)
717 720 (options, args) = parser.parse_args()
718 721
719 722 if len(args) == 0:
720 723 check = glob.glob("*")
721 724 elif args == ['-']:
722 725 # read file list from stdin
723 726 check = sys.stdin.read().splitlines()
724 727 else:
725 728 check = args
726 729
727 730 _preparepats()
728 731
729 732 ret = 0
730 733 for f in check:
731 734 if not checkfile(f, maxerr=options.per_file, warnings=options.warnings,
732 735 blame=options.blame, debug=options.debug,
733 736 lineno=options.lineno):
734 737 ret = 1
735 738 return ret
736 739
737 740 if __name__ == "__main__":
738 741 sys.exit(main())
General Comments 0
You need to be logged in to leave comments. Login now