##// END OF EJS Templates
contrib: split pypats list in check-code.py...
FUJIWARA Katsunori -
r41987:14e8d042 default
parent child Browse files
Show More
@@ -1,744 +1,766 b''
1 1 #!/usr/bin/env python
2 2 #
3 3 # check-code - a style and portability checker for Mercurial
4 4 #
5 5 # Copyright 2010 Matt Mackall <mpm@selenic.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """style and portability checker for Mercurial
11 11
12 12 when a rule triggers wrong, do one of the following (prefer one from top):
13 13 * do the work-around the rule suggests
14 14 * doublecheck that it is a false match
15 15 * improve the rule pattern
16 16 * add an ignore pattern to the rule (3rd arg) which matches your good line
17 17 (you can append a short comment and match this, like: #re-raises)
18 18 * change the pattern to a warning and list the exception in test-check-code-hg
19 19 * ONLY use no--check-code for skipping entire files from external sources
20 20 """
21 21
22 22 from __future__ import absolute_import, print_function
23 23 import glob
24 24 import keyword
25 25 import optparse
26 26 import os
27 27 import re
28 28 import sys
29 29 if sys.version_info[0] < 3:
30 30 opentext = open
31 31 else:
32 32 def opentext(f):
33 33 return open(f, encoding='latin1')
34 34 try:
35 35 xrange
36 36 except NameError:
37 37 xrange = range
38 38 try:
39 39 import re2
40 40 except ImportError:
41 41 re2 = None
42 42
43 43 def compilere(pat, multiline=False):
44 44 if multiline:
45 45 pat = '(?m)' + pat
46 46 if re2:
47 47 try:
48 48 return re2.compile(pat)
49 49 except re2.error:
50 50 pass
51 51 return re.compile(pat)
52 52
53 53 # check "rules depending on implementation of repquote()" in each
54 54 # patterns (especially pypats), before changing around repquote()
55 55 _repquotefixedmap = {' ': ' ', '\n': '\n', '.': 'p', ':': 'q',
56 56 '%': '%', '\\': 'b', '*': 'A', '+': 'P', '-': 'M'}
57 57 def _repquoteencodechr(i):
58 58 if i > 255:
59 59 return 'u'
60 60 c = chr(i)
61 61 if c in _repquotefixedmap:
62 62 return _repquotefixedmap[c]
63 63 if c.isalpha():
64 64 return 'x'
65 65 if c.isdigit():
66 66 return 'n'
67 67 return 'o'
68 68 _repquotett = ''.join(_repquoteencodechr(i) for i in xrange(256))
69 69
70 70 def repquote(m):
71 71 t = m.group('text')
72 72 t = t.translate(_repquotett)
73 73 return m.group('quote') + t + m.group('quote')
74 74
75 75 def reppython(m):
76 76 comment = m.group('comment')
77 77 if comment:
78 78 l = len(comment.rstrip())
79 79 return "#" * l + comment[l:]
80 80 return repquote(m)
81 81
82 82 def repcomment(m):
83 83 return m.group(1) + "#" * len(m.group(2))
84 84
85 85 def repccomment(m):
86 86 t = re.sub(r"((?<=\n) )|\S", "x", m.group(2))
87 87 return m.group(1) + t + "*/"
88 88
89 89 def repcallspaces(m):
90 90 t = re.sub(r"\n\s+", "\n", m.group(2))
91 91 return m.group(1) + t
92 92
93 93 def repinclude(m):
94 94 return m.group(1) + "<foo>"
95 95
96 96 def rephere(m):
97 97 t = re.sub(r"\S", "x", m.group(2))
98 98 return m.group(1) + t
99 99
100 100
101 101 testpats = [
102 102 [
103 103 (r'\b(push|pop)d\b', "don't use 'pushd' or 'popd', use 'cd'"),
104 104 (r'\W\$?\(\([^\)\n]*\)\)', "don't use (()) or $(()), use 'expr'"),
105 105 (r'grep.*-q', "don't use 'grep -q', redirect to /dev/null"),
106 106 (r'(?<!hg )grep.* -a', "don't use 'grep -a', use in-line python"),
107 107 (r'sed.*-i', "don't use 'sed -i', use a temporary file"),
108 108 (r'\becho\b.*\\n', "don't use 'echo \\n', use printf"),
109 109 (r'echo -n', "don't use 'echo -n', use printf"),
110 110 (r'(^|\|\s*)\bwc\b[^|]*$\n(?!.*\(re\))', "filter wc output"),
111 111 (r'head -c', "don't use 'head -c', use 'dd'"),
112 112 (r'tail -n', "don't use the '-n' option to tail, just use '-<num>'"),
113 113 (r'sha1sum', "don't use sha1sum, use $TESTDIR/md5sum.py"),
114 114 (r'\bls\b.*-\w*R', "don't use 'ls -R', use 'find'"),
115 115 (r'printf.*[^\\]\\([1-9]|0\d)', r"don't use 'printf \NNN', use Python"),
116 116 (r'printf.*[^\\]\\x', "don't use printf \\x, use Python"),
117 117 (r'\$\(.*\)', "don't use $(expr), use `expr`"),
118 118 (r'rm -rf \*', "don't use naked rm -rf, target a directory"),
119 119 (r'\[[^\]]+==', '[ foo == bar ] is a bashism, use [ foo = bar ] instead'),
120 120 (r'(^|\|\s*)grep (-\w\s+)*[^|]*[(|]\w',
121 121 "use egrep for extended grep syntax"),
122 122 (r'(^|\|\s*)e?grep .*\\S', "don't use \\S in regular expression"),
123 123 (r'(?<!!)/bin/', "don't use explicit paths for tools"),
124 124 (r'#!.*/bash', "don't use bash in shebang, use sh"),
125 125 (r'[^\n]\Z', "no trailing newline"),
126 126 (r'export .*=', "don't export and assign at once"),
127 127 (r'^source\b', "don't use 'source', use '.'"),
128 128 (r'touch -d', "don't use 'touch -d', use 'touch -t' instead"),
129 129 (r'\bls +[^|\n-]+ +-', "options to 'ls' must come before filenames"),
130 130 (r'[^>\n]>\s*\$HGRCPATH', "don't overwrite $HGRCPATH, append to it"),
131 131 (r'^stop\(\)', "don't use 'stop' as a shell function name"),
132 132 (r'(\[|\btest\b).*-e ', "don't use 'test -e', use 'test -f'"),
133 133 (r'\[\[\s+[^\]]*\]\]', "don't use '[[ ]]', use '[ ]'"),
134 134 (r'^alias\b.*=', "don't use alias, use a function"),
135 135 (r'if\s*!', "don't use '!' to negate exit status"),
136 136 (r'/dev/u?random', "don't use entropy, use /dev/zero"),
137 137 (r'do\s*true;\s*done', "don't use true as loop body, use sleep 0"),
138 138 (r'sed (-e )?\'(\d+|/[^/]*/)i(?!\\\n)',
139 139 "put a backslash-escaped newline after sed 'i' command"),
140 140 (r'^diff *-\w*[uU].*$\n(^ \$ |^$)', "prefix diff -u/-U with cmp"),
141 141 (r'^\s+(if)? diff *-\w*[uU]', "prefix diff -u/-U with cmp"),
142 142 (r'[\s="`\']python\s(?!bindings)', "don't use 'python', use '$PYTHON'"),
143 143 (r'seq ', "don't use 'seq', use $TESTDIR/seq.py"),
144 144 (r'\butil\.Abort\b', "directly use error.Abort"),
145 145 (r'\|&', "don't use |&, use 2>&1"),
146 146 (r'\w = +\w', "only one space after = allowed"),
147 147 (r'\bsed\b.*[^\\]\\n', "don't use 'sed ... \\n', use a \\ and a newline"),
148 148 (r'env.*-u', "don't use 'env -u VAR', use 'unset VAR'"),
149 149 (r'cp.* -r ', "don't use 'cp -r', use 'cp -R'"),
150 150 (r'grep.* -[ABC]', "don't use grep's context flags"),
151 151 (r'find.*-printf',
152 152 "don't use 'find -printf', it doesn't exist on BSD find(1)"),
153 153 (r'\$RANDOM ', "don't use bash-only $RANDOM to generate random values"),
154 154 ],
155 155 # warnings
156 156 [
157 157 (r'^function', "don't use 'function', use old style"),
158 158 (r'^diff.*-\w*N', "don't use 'diff -N'"),
159 159 (r'\$PWD|\${PWD}', "don't use $PWD, use `pwd`"),
160 160 (r'^([^"\'\n]|("[^"\n]*")|(\'[^\'\n]*\'))*\^', "^ must be quoted"),
161 161 (r'kill (`|\$\()', "don't use kill, use killdaemons.py")
162 162 ]
163 163 ]
164 164
165 165 testfilters = [
166 166 (r"( *)(#([^!][^\n]*\S)?)", repcomment),
167 167 (r"<<(\S+)((.|\n)*?\n\1)", rephere),
168 168 ]
169 169
170 170 uprefix = r"^ \$ "
171 171 utestpats = [
172 172 [
173 173 (r'^(\S.*|| [$>] \S.*)[ \t]\n', "trailing whitespace on non-output"),
174 174 (uprefix + r'.*\|\s*sed[^|>\n]*\n',
175 175 "use regex test output patterns instead of sed"),
176 176 (uprefix + r'(true|exit 0)', "explicit zero exit unnecessary"),
177 177 (uprefix + r'.*(?<!\[)\$\?', "explicit exit code checks unnecessary"),
178 178 (uprefix + r'.*\|\| echo.*(fail|error)',
179 179 "explicit exit code checks unnecessary"),
180 180 (uprefix + r'set -e', "don't use set -e"),
181 181 (uprefix + r'(\s|fi\b|done\b)', "use > for continued lines"),
182 182 (uprefix + r'.*:\.\S*/', "x:.y in a path does not work on msys, rewrite "
183 183 "as x://.y, or see `hg log -k msys` for alternatives", r'-\S+:\.|' #-Rxxx
184 184 '# no-msys'), # in test-pull.t which is skipped on windows
185 185 (r'^ [^$>].*27\.0\.0\.1',
186 186 'use $LOCALIP not an explicit loopback address'),
187 187 (r'^ (?![>$] ).*\$LOCALIP.*[^)]$',
188 188 'mark $LOCALIP output lines with (glob) to help tests in BSD jails'),
189 189 (r'^ (cat|find): .*: \$ENOENT\$',
190 190 'use test -f to test for file existence'),
191 191 (r'^ diff -[^ -]*p',
192 192 "don't use (external) diff with -p for portability"),
193 193 (r' readlink ', 'use readlink.py instead of readlink'),
194 194 (r'^ [-+][-+][-+] .* [-+]0000 \(glob\)',
195 195 "glob timezone field in diff output for portability"),
196 196 (r'^ @@ -[0-9]+ [+][0-9]+,[0-9]+ @@',
197 197 "use '@@ -N* +N,n @@ (glob)' style chunk header for portability"),
198 198 (r'^ @@ -[0-9]+,[0-9]+ [+][0-9]+ @@',
199 199 "use '@@ -N,n +N* @@ (glob)' style chunk header for portability"),
200 200 (r'^ @@ -[0-9]+ [+][0-9]+ @@',
201 201 "use '@@ -N* +N* @@ (glob)' style chunk header for portability"),
202 202 (uprefix + r'hg( +-[^ ]+( +[^ ]+)?)* +extdiff'
203 203 r'( +(-[^ po-]+|--(?!program|option)[^ ]+|[^-][^ ]*))*$',
204 204 "use $RUNTESTDIR/pdiff via extdiff (or -o/-p for false-positives)"),
205 205 ],
206 206 # warnings
207 207 [
208 208 (r'^ (?!.*\$LOCALIP)[^*?/\n]* \(glob\)$',
209 209 "glob match with no glob string (?, *, /, and $LOCALIP)"),
210 210 ]
211 211 ]
212 212
213 213 # transform plain test rules to unified test's
214 214 for i in [0, 1]:
215 215 for tp in testpats[i]:
216 216 p = tp[0]
217 217 m = tp[1]
218 218 if p.startswith(r'^'):
219 219 p = r"^ [$>] (%s)" % p[1:]
220 220 else:
221 221 p = r"^ [$>] .*(%s)" % p
222 222 utestpats[i].append((p, m) + tp[2:])
223 223
224 224 # don't transform the following rules:
225 225 # " > \t" and " \t" should be allowed in unified tests
226 226 testpats[0].append((r'^( *)\t', "don't use tabs to indent"))
227 227 utestpats[0].append((r'^( ?)\t', "don't use tabs to indent"))
228 228
229 229 utestfilters = [
230 230 (r"<<(\S+)((.|\n)*?\n > \1)", rephere),
231 231 (r"( +)(#([^!][^\n]*\S)?)", repcomment),
232 232 ]
233 233
234 pypats = [
234 # common patterns to check *.py
235 commonpypats = [
235 236 [
236 237 (r'\\$', 'Use () to wrap long lines in Python, not \\'),
237 238 (r'^\s*def\s*\w+\s*\(.*,\s*\(',
238 239 "tuple parameter unpacking not available in Python 3+"),
239 240 (r'lambda\s*\(.*,.*\)',
240 241 "tuple parameter unpacking not available in Python 3+"),
241 242 (r'(?<!def)\s+(cmp)\(', "cmp is not available in Python 3+"),
242 243 (r'(?<!\.)\breduce\s*\(.*', "reduce is not available in Python 3+"),
243 244 (r'\bdict\(.*=', 'dict() is different in Py2 and 3 and is slower than {}',
244 245 'dict-from-generator'),
245 246 (r'\.has_key\b', "dict.has_key is not available in Python 3+"),
246 247 (r'\s<>\s', '<> operator is not available in Python 3+, use !='),
247 248 (r'^\s*\t', "don't use tabs"),
248 249 (r'\S;\s*\n', "semicolon"),
249 250 (r'[^_]_\([ \t\n]*(?:"[^"]+"[ \t\n+]*)+%', "don't use % inside _()"),
250 251 (r"[^_]_\([ \t\n]*(?:'[^']+'[ \t\n+]*)+%", "don't use % inside _()"),
251 252 (r'(\w|\)),\w', "missing whitespace after ,"),
252 253 (r'(\w|\))[+/*\-<>]\w', "missing whitespace in expression"),
253 254 (r'^\s+(\w|\.)+=\w[^,()\n]*$', "missing whitespace in assignment"),
254 255 (r'\w\s=\s\s+\w', "gratuitous whitespace after ="),
255 256 ((
256 257 # a line ending with a colon, potentially with trailing comments
257 258 r':([ \t]*#[^\n]*)?\n'
258 259 # one that is not a pass and not only a comment
259 260 r'(?P<indent>[ \t]+)[^#][^\n]+\n'
260 261 # more lines at the same indent level
261 262 r'((?P=indent)[^\n]+\n)*'
262 263 # a pass at the same indent level, which is bogus
263 264 r'(?P=indent)pass[ \t\n#]'
264 265 ), 'omit superfluous pass'),
265 (r'.{81}', "line too long"),
266 266 (r'[^\n]\Z', "no trailing newline"),
267 267 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
268 268 # (r'^\s+[^_ \n][^_. \n]+_[^_\n]+\s*=',
269 269 # "don't use underbars in identifiers"),
270 270 (r'^\s+(self\.)?[A-Za-z][a-z0-9]+[A-Z]\w* = ',
271 271 "don't use camelcase in identifiers", r'#.*camelcase-required'),
272 272 (r'^\s*(if|while|def|class|except|try)\s[^[\n]*:\s*[^\\n]#\s]+',
273 273 "linebreak after :"),
274 274 (r'class\s[^( \n]+:', "old-style class, use class foo(object)",
275 275 r'#.*old-style'),
276 276 (r'class\s[^( \n]+\(\):',
277 277 "class foo() creates old style object, use class foo(object)",
278 278 r'#.*old-style'),
279 279 (r'\b(%s)\(' % '|'.join(k for k in keyword.kwlist
280 280 if k not in ('print', 'exec')),
281 281 "Python keyword is not a function"),
282 282 (r',]', "unneeded trailing ',' in list"),
283 283 # (r'class\s[A-Z][^\(]*\((?!Exception)',
284 284 # "don't capitalize non-exception classes"),
285 285 # (r'in range\(', "use xrange"),
286 286 # (r'^\s*print\s+', "avoid using print in core and extensions"),
287 287 (r'[\x80-\xff]', "non-ASCII character literal"),
288 288 (r'("\')\.format\(', "str.format() has no bytes counterpart, use %"),
289 289 (r'^\s*(%s)\s\s' % '|'.join(keyword.kwlist),
290 290 "gratuitous whitespace after Python keyword"),
291 291 (r'([\(\[][ \t]\S)|(\S[ \t][\)\]])', "gratuitous whitespace in () or []"),
292 292 # (r'\s\s=', "gratuitous whitespace before ="),
293 293 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
294 294 "missing whitespace around operator"),
295 295 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\s',
296 296 "missing whitespace around operator"),
297 297 (r'\s(\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
298 298 "missing whitespace around operator"),
299 299 (r'[^^+=*/!<>&| %-](\s=|=\s)[^= ]',
300 300 "wrong whitespace around ="),
301 301 (r'\([^()]*( =[^=]|[^<>!=]= )',
302 302 "no whitespace around = for named parameters"),
303 (r'raise Exception', "don't raise generic exceptions"),
304 303 (r'raise [^,(]+, (\([^\)]+\)|[^,\(\)]+)$',
305 304 "don't use old-style two-argument raise, use Exception(message)"),
306 305 (r' is\s+(not\s+)?["\'0-9-]', "object comparison with literal"),
307 306 (r' [=!]=\s+(True|False|None)',
308 307 "comparison with singleton, use 'is' or 'is not' instead"),
309 308 (r'^\s*(while|if) [01]:',
310 309 "use True/False for constant Boolean expression"),
311 310 (r'^\s*if False(:| +and)', 'Remove code instead of using `if False`'),
312 311 (r'(?:(?<!def)\s+|\()hasattr\(',
313 312 'hasattr(foo, bar) is broken on py2, use util.safehasattr(foo, bar) '
314 313 'instead', r'#.*hasattr-py3-only'),
315 314 (r'opener\([^)]*\).read\(',
316 315 "use opener.read() instead"),
317 316 (r'opener\([^)]*\).write\(',
318 317 "use opener.write() instead"),
319 (r'[\s\(](open|file)\([^)]*\)\.read\(',
320 "use util.readfile() instead"),
321 (r'[\s\(](open|file)\([^)]*\)\.write\(',
322 "use util.writefile() instead"),
323 (r'^[\s\(]*(open(er)?|file)\([^)]*\)(?!\.close\(\))',
324 "always assign an opened file to a variable, and close it afterwards"),
325 (r'[\s\(](open|file)\([^)]*\)\.(?!close\(\))',
326 "always assign an opened file to a variable, and close it afterwards"),
327 318 (r'(?i)descend[e]nt', "the proper spelling is descendAnt"),
328 319 (r'\.debug\(\_', "don't mark debug messages for translation"),
329 320 (r'\.strip\(\)\.split\(\)', "no need to strip before splitting"),
330 321 (r'^\s*except\s*:', "naked except clause", r'#.*re-raises'),
331 322 (r'^\s*except\s([^\(,]+|\([^\)]+\))\s*,',
332 323 'legacy exception syntax; use "as" instead of ","'),
333 (r':\n( )*( ){1,3}[^ ]', "must indent 4 spaces"),
334 324 (r'release\(.*wlock, .*lock\)', "wrong lock release order"),
335 325 (r'\bdef\s+__bool__\b', "__bool__ should be __nonzero__ in Python 2"),
336 326 (r'os\.path\.join\(.*, *(""|\'\')\)',
337 327 "use pathutil.normasprefix(path) instead of os.path.join(path, '')"),
338 328 (r'\s0[0-7]+\b', 'legacy octal syntax; use "0o" prefix instead of "0"'),
339 329 # XXX only catch mutable arguments on the first line of the definition
340 330 (r'def.*[( ]\w+=\{\}', "don't use mutable default arguments"),
341 331 (r'\butil\.Abort\b', "directly use error.Abort"),
342 332 (r'^@(\w*\.)?cachefunc', "module-level @cachefunc is risky, please avoid"),
343 (r'^import atexit', "don't use atexit, use ui.atexit"),
344 333 (r'^import Queue', "don't use Queue, use pycompat.queue.Queue + "
345 334 "pycompat.queue.Empty"),
346 335 (r'^import cStringIO', "don't use cStringIO.StringIO, use util.stringio"),
347 336 (r'^import urllib', "don't use urllib, use util.urlreq/util.urlerr"),
348 337 (r'^import SocketServer', "don't use SockerServer, use util.socketserver"),
349 338 (r'^import urlparse', "don't use urlparse, use util.urlreq"),
350 339 (r'^import xmlrpclib', "don't use xmlrpclib, use util.xmlrpclib"),
351 340 (r'^import cPickle', "don't use cPickle, use util.pickle"),
352 341 (r'^import pickle', "don't use pickle, use util.pickle"),
353 342 (r'^import httplib', "don't use httplib, use util.httplib"),
354 343 (r'^import BaseHTTPServer', "use util.httpserver instead"),
355 344 (r'^(from|import) mercurial\.(cext|pure|cffi)',
356 345 "use mercurial.policy.importmod instead"),
357 346 (r'\.next\(\)', "don't use .next(), use next(...)"),
358 347 (r'([a-z]*).revision\(\1\.node\(',
359 348 "don't convert rev to node before passing to revision(nodeorrev)"),
360 349 (r'platform\.system\(\)', "don't use platform.system(), use pycompat"),
361 350
351 ],
352 # warnings
353 [
354 ]
355 ]
356
357 # patterns to check normal *.py files
358 pypats = [
359 [
360 # Ideally, these should be placed in "commonpypats" for
361 # consistency of coding rules in Mercurial source tree.
362 # But on the other hand, these are not so seriously required for
363 # python code fragments embedded in test scripts. Fixing test
364 # scripts for these patterns requires many changes, and has less
365 # profit than effort.
366 (r'.{81}', "line too long"),
367 (r'raise Exception', "don't raise generic exceptions"),
368 (r'[\s\(](open|file)\([^)]*\)\.read\(',
369 "use util.readfile() instead"),
370 (r'[\s\(](open|file)\([^)]*\)\.write\(',
371 "use util.writefile() instead"),
372 (r'^[\s\(]*(open(er)?|file)\([^)]*\)(?!\.close\(\))',
373 "always assign an opened file to a variable, and close it afterwards"),
374 (r'[\s\(](open|file)\([^)]*\)\.(?!close\(\))',
375 "always assign an opened file to a variable, and close it afterwards"),
376 (r':\n( )*( ){1,3}[^ ]', "must indent 4 spaces"),
377 (r'^import atexit', "don't use atexit, use ui.atexit"),
378
362 379 # rules depending on implementation of repquote()
363 380 (r' x+[xpqo%APM][\'"]\n\s+[\'"]x',
364 381 'string join across lines with no space'),
365 382 (r'''(?x)ui\.(status|progress|write|note|warn)\(
366 383 [ \t\n#]*
367 384 (?# any strings/comments might precede a string, which
368 385 # contains translatable message)
369 386 ((['"]|\'\'\'|""")[ \npq%bAPMxno]*(['"]|\'\'\'|""")[ \t\n#]+)*
370 387 (?# sequence consisting of below might precede translatable message
371 388 # - formatting string: "% 10s", "%05d", "% -3.2f", "%*s", "%%" ...
372 389 # - escaped character: "\\", "\n", "\0" ...
373 390 # - character other than '%', 'b' as '\', and 'x' as alphabet)
374 391 (['"]|\'\'\'|""")
375 392 ((%([ n]?[PM]?([np]+|A))?x)|%%|b[bnx]|[ \nnpqAPMo])*x
376 393 (?# this regexp can't use [^...] style,
377 394 # because _preparepats forcibly adds "\n" into [^...],
378 395 # even though this regexp wants match it against "\n")''',
379 396 "missing _() in ui message (use () to hide false-positives)"),
380 ],
397 ] + commonpypats[0],
381 398 # warnings
382 399 [
383 400 # rules depending on implementation of repquote()
384 401 (r'(^| )pp +xxxxqq[ \n][^\n]', "add two newlines after '.. note::'"),
385 ]
402 ] + commonpypats[1]
386 403 ]
387 404
388 pyfilters = [
405 # common filters to convert *.py
406 commonpyfilters = [
389 407 (r"""(?msx)(?P<comment>\#.*?$)|
390 408 ((?P<quote>('''|\"\"\"|(?<!')'(?!')|(?<!")"(?!")))
391 409 (?P<text>(([^\\]|\\.)*?))
392 410 (?P=quote))""", reppython),
393 411 ]
394 412
413 # filters to convert normal *.py files
414 pyfilters = [
415 ] + commonpyfilters
416
395 417 # non-filter patterns
396 418 pynfpats = [
397 419 [
398 420 (r'pycompat\.osname\s*[=!]=\s*[\'"]nt[\'"]', "use pycompat.iswindows"),
399 421 (r'pycompat\.osname\s*[=!]=\s*[\'"]posix[\'"]', "use pycompat.isposix"),
400 422 (r'pycompat\.sysplatform\s*[!=]=\s*[\'"]darwin[\'"]',
401 423 "use pycompat.isdarwin"),
402 424 ],
403 425 # warnings
404 426 [],
405 427 ]
406 428
407 429 # extension non-filter patterns
408 430 pyextnfpats = [
409 431 [(r'^"""\n?[A-Z]', "don't capitalize docstring title")],
410 432 # warnings
411 433 [],
412 434 ]
413 435
414 436 txtfilters = []
415 437
416 438 txtpats = [
417 439 [
418 440 (r'\s$', 'trailing whitespace'),
419 441 ('.. note::[ \n][^\n]', 'add two newlines after note::')
420 442 ],
421 443 []
422 444 ]
423 445
424 446 cpats = [
425 447 [
426 448 (r'//', "don't use //-style comments"),
427 449 (r'\S\t', "don't use tabs except for indent"),
428 450 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
429 451 (r'.{81}', "line too long"),
430 452 (r'(while|if|do|for)\(', "use space after while/if/do/for"),
431 453 (r'return\(', "return is not a function"),
432 454 (r' ;', "no space before ;"),
433 455 (r'[^;] \)', "no space before )"),
434 456 (r'[)][{]', "space between ) and {"),
435 457 (r'\w+\* \w+', "use int *foo, not int* foo"),
436 458 (r'\W\([^\)]+\) \w+', "use (int)foo, not (int) foo"),
437 459 (r'\w+ (\+\+|--)', "use foo++, not foo ++"),
438 460 (r'\w,\w', "missing whitespace after ,"),
439 461 (r'^[^#]\w[+/*]\w', "missing whitespace in expression"),
440 462 (r'\w\s=\s\s+\w', "gratuitous whitespace after ="),
441 463 (r'^#\s+\w', "use #foo, not # foo"),
442 464 (r'[^\n]\Z', "no trailing newline"),
443 465 (r'^\s*#import\b', "use only #include in standard C code"),
444 466 (r'strcpy\(', "don't use strcpy, use strlcpy or memcpy"),
445 467 (r'strcat\(', "don't use strcat"),
446 468
447 469 # rules depending on implementation of repquote()
448 470 ],
449 471 # warnings
450 472 [
451 473 # rules depending on implementation of repquote()
452 474 ]
453 475 ]
454 476
455 477 cfilters = [
456 478 (r'(/\*)(((\*(?!/))|[^*])*)\*/', repccomment),
457 479 (r'''(?P<quote>(?<!")")(?P<text>([^"]|\\")+)"(?!")''', repquote),
458 480 (r'''(#\s*include\s+<)([^>]+)>''', repinclude),
459 481 (r'(\()([^)]+\))', repcallspaces),
460 482 ]
461 483
462 484 inutilpats = [
463 485 [
464 486 (r'\bui\.', "don't use ui in util"),
465 487 ],
466 488 # warnings
467 489 []
468 490 ]
469 491
470 492 inrevlogpats = [
471 493 [
472 494 (r'\brepo\.', "don't use repo in revlog"),
473 495 ],
474 496 # warnings
475 497 []
476 498 ]
477 499
478 500 webtemplatefilters = []
479 501
480 502 webtemplatepats = [
481 503 [],
482 504 [
483 505 (r'{desc(\|(?!websub|firstline)[^\|]*)+}',
484 506 'follow desc keyword with either firstline or websub'),
485 507 ]
486 508 ]
487 509
488 510 allfilesfilters = []
489 511
490 512 allfilespats = [
491 513 [
492 514 (r'(http|https)://[a-zA-Z0-9./]*selenic.com/',
493 515 'use mercurial-scm.org domain URL'),
494 516 (r'mercurial@selenic\.com',
495 517 'use mercurial-scm.org domain for mercurial ML address'),
496 518 (r'mercurial-devel@selenic\.com',
497 519 'use mercurial-scm.org domain for mercurial-devel ML address'),
498 520 ],
499 521 # warnings
500 522 [],
501 523 ]
502 524
503 525 py3pats = [
504 526 [
505 527 (r'os\.environ', "use encoding.environ instead (py3)", r'#.*re-exports'),
506 528 (r'os\.name', "use pycompat.osname instead (py3)"),
507 529 (r'os\.getcwd', "use encoding.getcwd instead (py3)", r'#.*re-exports'),
508 530 (r'os\.sep', "use pycompat.ossep instead (py3)"),
509 531 (r'os\.pathsep', "use pycompat.ospathsep instead (py3)"),
510 532 (r'os\.altsep', "use pycompat.osaltsep instead (py3)"),
511 533 (r'sys\.platform', "use pycompat.sysplatform instead (py3)"),
512 534 (r'getopt\.getopt', "use pycompat.getoptb instead (py3)"),
513 535 (r'os\.getenv', "use encoding.environ.get instead"),
514 536 (r'os\.setenv', "modifying the environ dict is not preferred"),
515 537 (r'(?<!pycompat\.)xrange', "use pycompat.xrange instead (py3)"),
516 538 ],
517 539 # warnings
518 540 [],
519 541 ]
520 542
521 543 checks = [
522 544 ('python', r'.*\.(py|cgi)$', r'^#!.*python', pyfilters, pypats),
523 545 ('python', r'.*\.(py|cgi)$', r'^#!.*python', [], pynfpats),
524 546 ('python', r'.*hgext.*\.py$', '', [], pyextnfpats),
525 547 ('python 3', r'.*(hgext|mercurial)/(?!demandimport|policy|pycompat).*\.py',
526 548 '', pyfilters, py3pats),
527 549 ('test script', r'(.*/)?test-[^.~]*$', '', testfilters, testpats),
528 550 ('c', r'.*\.[ch]$', '', cfilters, cpats),
529 551 ('unified test', r'.*\.t$', '', utestfilters, utestpats),
530 552 ('layering violation repo in revlog', r'mercurial/revlog\.py', '',
531 553 pyfilters, inrevlogpats),
532 554 ('layering violation ui in util', r'mercurial/util\.py', '', pyfilters,
533 555 inutilpats),
534 556 ('txt', r'.*\.txt$', '', txtfilters, txtpats),
535 557 ('web template', r'mercurial/templates/.*\.tmpl', '',
536 558 webtemplatefilters, webtemplatepats),
537 559 ('all except for .po', r'.*(?<!\.po)$', '',
538 560 allfilesfilters, allfilespats),
539 561 ]
540 562
541 563 def _preparepats():
542 564 for c in checks:
543 565 failandwarn = c[-1]
544 566 for pats in failandwarn:
545 567 for i, pseq in enumerate(pats):
546 568 # fix-up regexes for multi-line searches
547 569 p = pseq[0]
548 570 # \s doesn't match \n (done in two steps)
549 571 # first, we replace \s that appears in a set already
550 572 p = re.sub(r'\[\\s', r'[ \\t', p)
551 573 # now we replace other \s instances.
552 574 p = re.sub(r'(?<!(\\|\[))\\s', r'[ \\t]', p)
553 575 # [^...] doesn't match newline
554 576 p = re.sub(r'(?<!\\)\[\^', r'[^\\n', p)
555 577
556 578 pats[i] = (re.compile(p, re.MULTILINE),) + pseq[1:]
557 579 filters = c[3]
558 580 for i, flt in enumerate(filters):
559 581 filters[i] = re.compile(flt[0]), flt[1]
560 582
561 583 class norepeatlogger(object):
562 584 def __init__(self):
563 585 self._lastseen = None
564 586
565 587 def log(self, fname, lineno, line, msg, blame):
566 588 """print error related a to given line of a given file.
567 589
568 590 The faulty line will also be printed but only once in the case
569 591 of multiple errors.
570 592
571 593 :fname: filename
572 594 :lineno: line number
573 595 :line: actual content of the line
574 596 :msg: error message
575 597 """
576 598 msgid = fname, lineno, line
577 599 if msgid != self._lastseen:
578 600 if blame:
579 601 print("%s:%d (%s):" % (fname, lineno, blame))
580 602 else:
581 603 print("%s:%d:" % (fname, lineno))
582 604 print(" > %s" % line)
583 605 self._lastseen = msgid
584 606 print(" " + msg)
585 607
586 608 _defaultlogger = norepeatlogger()
587 609
588 610 def getblame(f):
589 611 lines = []
590 612 for l in os.popen('hg annotate -un %s' % f):
591 613 start, line = l.split(':', 1)
592 614 user, rev = start.split()
593 615 lines.append((line[1:-1], user, rev))
594 616 return lines
595 617
596 618 def checkfile(f, logfunc=_defaultlogger.log, maxerr=None, warnings=False,
597 619 blame=False, debug=False, lineno=True):
598 620 """checks style and portability of a given file
599 621
600 622 :f: filepath
601 623 :logfunc: function used to report error
602 624 logfunc(filename, linenumber, linecontent, errormessage)
603 625 :maxerr: number of error to display before aborting.
604 626 Set to false (default) to report all errors
605 627
606 628 return True if no error is found, False otherwise.
607 629 """
608 630 blamecache = None
609 631 result = True
610 632
611 633 try:
612 634 with opentext(f) as fp:
613 635 try:
614 636 pre = fp.read()
615 637 except UnicodeDecodeError as e:
616 638 print("%s while reading %s" % (e, f))
617 639 return result
618 640 except IOError as e:
619 641 print("Skipping %s, %s" % (f, str(e).split(':', 1)[0]))
620 642 return result
621 643
622 644 for name, match, magic, filters, pats in checks:
623 645 post = pre # discard filtering result of previous check
624 646 if debug:
625 647 print(name, f)
626 648 fc = 0
627 649 if not (re.match(match, f) or (magic and re.search(magic, pre))):
628 650 if debug:
629 651 print("Skipping %s for %s it doesn't match %s" % (
630 652 name, match, f))
631 653 continue
632 654 if "no-" "check-code" in pre:
633 655 # If you're looking at this line, it's because a file has:
634 656 # no- check- code
635 657 # but the reason to output skipping is to make life for
636 658 # tests easier. So, instead of writing it with a normal
637 659 # spelling, we write it with the expected spelling from
638 660 # tests/test-check-code.t
639 661 print("Skipping %s it has no-che?k-code (glob)" % f)
640 662 return "Skip" # skip checking this file
641 663 for p, r in filters:
642 664 post = re.sub(p, r, post)
643 665 nerrs = len(pats[0]) # nerr elements are errors
644 666 if warnings:
645 667 pats = pats[0] + pats[1]
646 668 else:
647 669 pats = pats[0]
648 670 # print post # uncomment to show filtered version
649 671
650 672 if debug:
651 673 print("Checking %s for %s" % (name, f))
652 674
653 675 prelines = None
654 676 errors = []
655 677 for i, pat in enumerate(pats):
656 678 if len(pat) == 3:
657 679 p, msg, ignore = pat
658 680 else:
659 681 p, msg = pat
660 682 ignore = None
661 683 if i >= nerrs:
662 684 msg = "warning: " + msg
663 685
664 686 pos = 0
665 687 n = 0
666 688 for m in p.finditer(post):
667 689 if prelines is None:
668 690 prelines = pre.splitlines()
669 691 postlines = post.splitlines(True)
670 692
671 693 start = m.start()
672 694 while n < len(postlines):
673 695 step = len(postlines[n])
674 696 if pos + step > start:
675 697 break
676 698 pos += step
677 699 n += 1
678 700 l = prelines[n]
679 701
680 702 if ignore and re.search(ignore, l, re.MULTILINE):
681 703 if debug:
682 704 print("Skipping %s for %s:%s (ignore pattern)" % (
683 705 name, f, n))
684 706 continue
685 707 bd = ""
686 708 if blame:
687 709 bd = 'working directory'
688 710 if not blamecache:
689 711 blamecache = getblame(f)
690 712 if n < len(blamecache):
691 713 bl, bu, br = blamecache[n]
692 714 if bl == l:
693 715 bd = '%s@%s' % (bu, br)
694 716
695 717 errors.append((f, lineno and n + 1, l, msg, bd))
696 718 result = False
697 719
698 720 errors.sort()
699 721 for e in errors:
700 722 logfunc(*e)
701 723 fc += 1
702 724 if maxerr and fc >= maxerr:
703 725 print(" (too many errors, giving up)")
704 726 break
705 727
706 728 return result
707 729
708 730 def main():
709 731 parser = optparse.OptionParser("%prog [options] [files | -]")
710 732 parser.add_option("-w", "--warnings", action="store_true",
711 733 help="include warning-level checks")
712 734 parser.add_option("-p", "--per-file", type="int",
713 735 help="max warnings per file")
714 736 parser.add_option("-b", "--blame", action="store_true",
715 737 help="use annotate to generate blame info")
716 738 parser.add_option("", "--debug", action="store_true",
717 739 help="show debug information")
718 740 parser.add_option("", "--nolineno", action="store_false",
719 741 dest='lineno', help="don't show line numbers")
720 742
721 743 parser.set_defaults(per_file=15, warnings=False, blame=False, debug=False,
722 744 lineno=True)
723 745 (options, args) = parser.parse_args()
724 746
725 747 if len(args) == 0:
726 748 check = glob.glob("*")
727 749 elif args == ['-']:
728 750 # read file list from stdin
729 751 check = sys.stdin.read().splitlines()
730 752 else:
731 753 check = args
732 754
733 755 _preparepats()
734 756
735 757 ret = 0
736 758 for f in check:
737 759 if not checkfile(f, maxerr=options.per_file, warnings=options.warnings,
738 760 blame=options.blame, debug=options.debug,
739 761 lineno=options.lineno):
740 762 ret = 1
741 763 return ret
742 764
743 765 if __name__ == "__main__":
744 766 sys.exit(main())
General Comments 0
You need to be logged in to leave comments. Login now