##// END OF EJS Templates
check-code: use raw string...
Gregory Szorc -
r41685:7d1798ec default
parent child Browse files
Show More
@@ -1,743 +1,743 b''
1 1 #!/usr/bin/env python
2 2 #
3 3 # check-code - a style and portability checker for Mercurial
4 4 #
5 5 # Copyright 2010 Matt Mackall <mpm@selenic.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """style and portability checker for Mercurial
11 11
12 12 when a rule triggers wrong, do one of the following (prefer one from top):
13 13 * do the work-around the rule suggests
14 14 * doublecheck that it is a false match
15 15 * improve the rule pattern
16 16 * add an ignore pattern to the rule (3rd arg) which matches your good line
17 17 (you can append a short comment and match this, like: #re-raises)
18 18 * change the pattern to a warning and list the exception in test-check-code-hg
19 19 * ONLY use no--check-code for skipping entire files from external sources
20 20 """
21 21
22 22 from __future__ import absolute_import, print_function
23 23 import glob
24 24 import keyword
25 25 import optparse
26 26 import os
27 27 import re
28 28 import sys
29 29 if sys.version_info[0] < 3:
30 30 opentext = open
31 31 else:
32 32 def opentext(f):
33 33 return open(f, encoding='latin1')
34 34 try:
35 35 xrange
36 36 except NameError:
37 37 xrange = range
38 38 try:
39 39 import re2
40 40 except ImportError:
41 41 re2 = None
42 42
43 43 def compilere(pat, multiline=False):
44 44 if multiline:
45 45 pat = '(?m)' + pat
46 46 if re2:
47 47 try:
48 48 return re2.compile(pat)
49 49 except re2.error:
50 50 pass
51 51 return re.compile(pat)
52 52
53 53 # check "rules depending on implementation of repquote()" in each
54 54 # patterns (especially pypats), before changing around repquote()
55 55 _repquotefixedmap = {' ': ' ', '\n': '\n', '.': 'p', ':': 'q',
56 56 '%': '%', '\\': 'b', '*': 'A', '+': 'P', '-': 'M'}
57 57 def _repquoteencodechr(i):
58 58 if i > 255:
59 59 return 'u'
60 60 c = chr(i)
61 61 if c in _repquotefixedmap:
62 62 return _repquotefixedmap[c]
63 63 if c.isalpha():
64 64 return 'x'
65 65 if c.isdigit():
66 66 return 'n'
67 67 return 'o'
68 68 _repquotett = ''.join(_repquoteencodechr(i) for i in xrange(256))
69 69
70 70 def repquote(m):
71 71 t = m.group('text')
72 72 t = t.translate(_repquotett)
73 73 return m.group('quote') + t + m.group('quote')
74 74
75 75 def reppython(m):
76 76 comment = m.group('comment')
77 77 if comment:
78 78 l = len(comment.rstrip())
79 79 return "#" * l + comment[l:]
80 80 return repquote(m)
81 81
82 82 def repcomment(m):
83 83 return m.group(1) + "#" * len(m.group(2))
84 84
85 85 def repccomment(m):
86 86 t = re.sub(r"((?<=\n) )|\S", "x", m.group(2))
87 87 return m.group(1) + t + "*/"
88 88
89 89 def repcallspaces(m):
90 90 t = re.sub(r"\n\s+", "\n", m.group(2))
91 91 return m.group(1) + t
92 92
93 93 def repinclude(m):
94 94 return m.group(1) + "<foo>"
95 95
96 96 def rephere(m):
97 97 t = re.sub(r"\S", "x", m.group(2))
98 98 return m.group(1) + t
99 99
100 100
101 101 testpats = [
102 102 [
103 103 (r'\b(push|pop)d\b', "don't use 'pushd' or 'popd', use 'cd'"),
104 104 (r'\W\$?\(\([^\)\n]*\)\)', "don't use (()) or $(()), use 'expr'"),
105 105 (r'grep.*-q', "don't use 'grep -q', redirect to /dev/null"),
106 106 (r'(?<!hg )grep.* -a', "don't use 'grep -a', use in-line python"),
107 107 (r'sed.*-i', "don't use 'sed -i', use a temporary file"),
108 108 (r'\becho\b.*\\n', "don't use 'echo \\n', use printf"),
109 109 (r'echo -n', "don't use 'echo -n', use printf"),
110 110 (r'(^|\|\s*)\bwc\b[^|]*$\n(?!.*\(re\))', "filter wc output"),
111 111 (r'head -c', "don't use 'head -c', use 'dd'"),
112 112 (r'tail -n', "don't use the '-n' option to tail, just use '-<num>'"),
113 113 (r'sha1sum', "don't use sha1sum, use $TESTDIR/md5sum.py"),
114 114 (r'\bls\b.*-\w*R', "don't use 'ls -R', use 'find'"),
115 115 (r'printf.*[^\\]\\([1-9]|0\d)', r"don't use 'printf \NNN', use Python"),
116 116 (r'printf.*[^\\]\\x', "don't use printf \\x, use Python"),
117 117 (r'\$\(.*\)', "don't use $(expr), use `expr`"),
118 118 (r'rm -rf \*', "don't use naked rm -rf, target a directory"),
119 119 (r'\[[^\]]+==', '[ foo == bar ] is a bashism, use [ foo = bar ] instead'),
120 120 (r'(^|\|\s*)grep (-\w\s+)*[^|]*[(|]\w',
121 121 "use egrep for extended grep syntax"),
122 122 (r'(^|\|\s*)e?grep .*\\S', "don't use \\S in regular expression"),
123 123 (r'(?<!!)/bin/', "don't use explicit paths for tools"),
124 124 (r'#!.*/bash', "don't use bash in shebang, use sh"),
125 125 (r'[^\n]\Z', "no trailing newline"),
126 126 (r'export .*=', "don't export and assign at once"),
127 127 (r'^source\b', "don't use 'source', use '.'"),
128 128 (r'touch -d', "don't use 'touch -d', use 'touch -t' instead"),
129 129 (r'\bls +[^|\n-]+ +-', "options to 'ls' must come before filenames"),
130 130 (r'[^>\n]>\s*\$HGRCPATH', "don't overwrite $HGRCPATH, append to it"),
131 131 (r'^stop\(\)', "don't use 'stop' as a shell function name"),
132 132 (r'(\[|\btest\b).*-e ', "don't use 'test -e', use 'test -f'"),
133 133 (r'\[\[\s+[^\]]*\]\]', "don't use '[[ ]]', use '[ ]'"),
134 134 (r'^alias\b.*=', "don't use alias, use a function"),
135 135 (r'if\s*!', "don't use '!' to negate exit status"),
136 136 (r'/dev/u?random', "don't use entropy, use /dev/zero"),
137 137 (r'do\s*true;\s*done', "don't use true as loop body, use sleep 0"),
138 138 (r'sed (-e )?\'(\d+|/[^/]*/)i(?!\\\n)',
139 139 "put a backslash-escaped newline after sed 'i' command"),
140 140 (r'^diff *-\w*[uU].*$\n(^ \$ |^$)', "prefix diff -u/-U with cmp"),
141 141 (r'^\s+(if)? diff *-\w*[uU]', "prefix diff -u/-U with cmp"),
142 142 (r'[\s="`\']python\s(?!bindings)', "don't use 'python', use '$PYTHON'"),
143 143 (r'seq ', "don't use 'seq', use $TESTDIR/seq.py"),
144 144 (r'\butil\.Abort\b', "directly use error.Abort"),
145 145 (r'\|&', "don't use |&, use 2>&1"),
146 146 (r'\w = +\w', "only one space after = allowed"),
147 147 (r'\bsed\b.*[^\\]\\n', "don't use 'sed ... \\n', use a \\ and a newline"),
148 148 (r'env.*-u', "don't use 'env -u VAR', use 'unset VAR'"),
149 149 (r'cp.* -r ', "don't use 'cp -r', use 'cp -R'"),
150 150 (r'grep.* -[ABC]', "don't use grep's context flags"),
151 151 (r'find.*-printf',
152 152 "don't use 'find -printf', it doesn't exist on BSD find(1)"),
153 153 (r'\$RANDOM ', "don't use bash-only $RANDOM to generate random values"),
154 154 ],
155 155 # warnings
156 156 [
157 157 (r'^function', "don't use 'function', use old style"),
158 158 (r'^diff.*-\w*N', "don't use 'diff -N'"),
159 159 (r'\$PWD|\${PWD}', "don't use $PWD, use `pwd`"),
160 160 (r'^([^"\'\n]|("[^"\n]*")|(\'[^\'\n]*\'))*\^', "^ must be quoted"),
161 161 (r'kill (`|\$\()', "don't use kill, use killdaemons.py")
162 162 ]
163 163 ]
164 164
165 165 testfilters = [
166 166 (r"( *)(#([^!][^\n]*\S)?)", repcomment),
167 167 (r"<<(\S+)((.|\n)*?\n\1)", rephere),
168 168 ]
169 169
170 170 uprefix = r"^ \$ "
171 171 utestpats = [
172 172 [
173 173 (r'^(\S.*|| [$>] \S.*)[ \t]\n', "trailing whitespace on non-output"),
174 174 (uprefix + r'.*\|\s*sed[^|>\n]*\n',
175 175 "use regex test output patterns instead of sed"),
176 176 (uprefix + r'(true|exit 0)', "explicit zero exit unnecessary"),
177 177 (uprefix + r'.*(?<!\[)\$\?', "explicit exit code checks unnecessary"),
178 178 (uprefix + r'.*\|\| echo.*(fail|error)',
179 179 "explicit exit code checks unnecessary"),
180 180 (uprefix + r'set -e', "don't use set -e"),
181 181 (uprefix + r'(\s|fi\b|done\b)', "use > for continued lines"),
182 182 (uprefix + r'.*:\.\S*/', "x:.y in a path does not work on msys, rewrite "
183 183 "as x://.y, or see `hg log -k msys` for alternatives", r'-\S+:\.|' #-Rxxx
184 184 '# no-msys'), # in test-pull.t which is skipped on windows
185 185 (r'^ [^$>].*27\.0\.0\.1',
186 186 'use $LOCALIP not an explicit loopback address'),
187 187 (r'^ (?![>$] ).*\$LOCALIP.*[^)]$',
188 188 'mark $LOCALIP output lines with (glob) to help tests in BSD jails'),
189 189 (r'^ (cat|find): .*: \$ENOENT\$',
190 190 'use test -f to test for file existence'),
191 191 (r'^ diff -[^ -]*p',
192 192 "don't use (external) diff with -p for portability"),
193 193 (r' readlink ', 'use readlink.py instead of readlink'),
194 194 (r'^ [-+][-+][-+] .* [-+]0000 \(glob\)',
195 195 "glob timezone field in diff output for portability"),
196 196 (r'^ @@ -[0-9]+ [+][0-9]+,[0-9]+ @@',
197 197 "use '@@ -N* +N,n @@ (glob)' style chunk header for portability"),
198 198 (r'^ @@ -[0-9]+,[0-9]+ [+][0-9]+ @@',
199 199 "use '@@ -N,n +N* @@ (glob)' style chunk header for portability"),
200 200 (r'^ @@ -[0-9]+ [+][0-9]+ @@',
201 201 "use '@@ -N* +N* @@ (glob)' style chunk header for portability"),
202 202 (uprefix + r'hg( +-[^ ]+( +[^ ]+)?)* +extdiff'
203 203 r'( +(-[^ po-]+|--(?!program|option)[^ ]+|[^-][^ ]*))*$',
204 204 "use $RUNTESTDIR/pdiff via extdiff (or -o/-p for false-positives)"),
205 205 ],
206 206 # warnings
207 207 [
208 208 (r'^ (?!.*\$LOCALIP)[^*?/\n]* \(glob\)$',
209 209 "glob match with no glob string (?, *, /, and $LOCALIP)"),
210 210 ]
211 211 ]
212 212
213 213 # transform plain test rules to unified test's
214 214 for i in [0, 1]:
215 215 for tp in testpats[i]:
216 216 p = tp[0]
217 217 m = tp[1]
218 218 if p.startswith(r'^'):
219 219 p = r"^ [$>] (%s)" % p[1:]
220 220 else:
221 221 p = r"^ [$>] .*(%s)" % p
222 222 utestpats[i].append((p, m) + tp[2:])
223 223
224 224 # don't transform the following rules:
225 225 # " > \t" and " \t" should be allowed in unified tests
226 226 testpats[0].append((r'^( *)\t', "don't use tabs to indent"))
227 227 utestpats[0].append((r'^( ?)\t', "don't use tabs to indent"))
228 228
229 229 utestfilters = [
230 230 (r"<<(\S+)((.|\n)*?\n > \1)", rephere),
231 231 (r"( +)(#([^!][^\n]*\S)?)", repcomment),
232 232 ]
233 233
234 234 pypats = [
235 235 [
236 236 (r'^\s*def\s*\w+\s*\(.*,\s*\(',
237 237 "tuple parameter unpacking not available in Python 3+"),
238 238 (r'lambda\s*\(.*,.*\)',
239 239 "tuple parameter unpacking not available in Python 3+"),
240 240 (r'(?<!def)\s+(cmp)\(', "cmp is not available in Python 3+"),
241 241 (r'(?<!\.)\breduce\s*\(.*', "reduce is not available in Python 3+"),
242 242 (r'\bdict\(.*=', 'dict() is different in Py2 and 3 and is slower than {}',
243 243 'dict-from-generator'),
244 244 (r'\.has_key\b', "dict.has_key is not available in Python 3+"),
245 245 (r'\s<>\s', '<> operator is not available in Python 3+, use !='),
246 246 (r'^\s*\t', "don't use tabs"),
247 247 (r'\S;\s*\n', "semicolon"),
248 248 (r'[^_]_\([ \t\n]*(?:"[^"]+"[ \t\n+]*)+%', "don't use % inside _()"),
249 249 (r"[^_]_\([ \t\n]*(?:'[^']+'[ \t\n+]*)+%", "don't use % inside _()"),
250 250 (r'(\w|\)),\w', "missing whitespace after ,"),
251 251 (r'(\w|\))[+/*\-<>]\w', "missing whitespace in expression"),
252 252 (r'^\s+(\w|\.)+=\w[^,()\n]*$', "missing whitespace in assignment"),
253 253 (r'\w\s=\s\s+\w', "gratuitous whitespace after ="),
254 254 ((
255 255 # a line ending with a colon, potentially with trailing comments
256 256 r':([ \t]*#[^\n]*)?\n'
257 257 # one that is not a pass and not only a comment
258 258 r'(?P<indent>[ \t]+)[^#][^\n]+\n'
259 259 # more lines at the same indent level
260 260 r'((?P=indent)[^\n]+\n)*'
261 261 # a pass at the same indent level, which is bogus
262 262 r'(?P=indent)pass[ \t\n#]'
263 263 ), 'omit superfluous pass'),
264 264 (r'.{81}', "line too long"),
265 265 (r'[^\n]\Z', "no trailing newline"),
266 266 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
267 267 # (r'^\s+[^_ \n][^_. \n]+_[^_\n]+\s*=',
268 268 # "don't use underbars in identifiers"),
269 269 (r'^\s+(self\.)?[A-Za-z][a-z0-9]+[A-Z]\w* = ',
270 270 "don't use camelcase in identifiers", r'#.*camelcase-required'),
271 271 (r'^\s*(if|while|def|class|except|try)\s[^[\n]*:\s*[^\\n]#\s]+',
272 272 "linebreak after :"),
273 273 (r'class\s[^( \n]+:', "old-style class, use class foo(object)",
274 274 r'#.*old-style'),
275 275 (r'class\s[^( \n]+\(\):',
276 276 "class foo() creates old style object, use class foo(object)",
277 277 r'#.*old-style'),
278 278 (r'\b(%s)\(' % '|'.join(k for k in keyword.kwlist
279 279 if k not in ('print', 'exec')),
280 280 "Python keyword is not a function"),
281 281 (r',]', "unneeded trailing ',' in list"),
282 282 # (r'class\s[A-Z][^\(]*\((?!Exception)',
283 283 # "don't capitalize non-exception classes"),
284 284 # (r'in range\(', "use xrange"),
285 285 # (r'^\s*print\s+', "avoid using print in core and extensions"),
286 286 (r'[\x80-\xff]', "non-ASCII character literal"),
287 287 (r'("\')\.format\(', "str.format() has no bytes counterpart, use %"),
288 288 (r'^\s*(%s)\s\s' % '|'.join(keyword.kwlist),
289 289 "gratuitous whitespace after Python keyword"),
290 290 (r'([\(\[][ \t]\S)|(\S[ \t][\)\]])', "gratuitous whitespace in () or []"),
291 291 # (r'\s\s=', "gratuitous whitespace before ="),
292 292 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
293 293 "missing whitespace around operator"),
294 294 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\s',
295 295 "missing whitespace around operator"),
296 296 (r'\s(\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
297 297 "missing whitespace around operator"),
298 298 (r'[^^+=*/!<>&| %-](\s=|=\s)[^= ]',
299 299 "wrong whitespace around ="),
300 300 (r'\([^()]*( =[^=]|[^<>!=]= )',
301 301 "no whitespace around = for named parameters"),
302 302 (r'raise Exception', "don't raise generic exceptions"),
303 303 (r'raise [^,(]+, (\([^\)]+\)|[^,\(\)]+)$',
304 304 "don't use old-style two-argument raise, use Exception(message)"),
305 305 (r' is\s+(not\s+)?["\'0-9-]', "object comparison with literal"),
306 306 (r' [=!]=\s+(True|False|None)',
307 307 "comparison with singleton, use 'is' or 'is not' instead"),
308 308 (r'^\s*(while|if) [01]:',
309 309 "use True/False for constant Boolean expression"),
310 310 (r'^\s*if False(:| +and)', 'Remove code instead of using `if False`'),
311 311 (r'(?:(?<!def)\s+|\()hasattr\(',
312 312 'hasattr(foo, bar) is broken on py2, use util.safehasattr(foo, bar) '
313 313 'instead', r'#.*hasattr-py3-only'),
314 314 (r'opener\([^)]*\).read\(',
315 315 "use opener.read() instead"),
316 316 (r'opener\([^)]*\).write\(',
317 317 "use opener.write() instead"),
318 318 (r'[\s\(](open|file)\([^)]*\)\.read\(',
319 319 "use util.readfile() instead"),
320 320 (r'[\s\(](open|file)\([^)]*\)\.write\(',
321 321 "use util.writefile() instead"),
322 322 (r'^[\s\(]*(open(er)?|file)\([^)]*\)(?!\.close\(\))',
323 323 "always assign an opened file to a variable, and close it afterwards"),
324 324 (r'[\s\(](open|file)\([^)]*\)\.(?!close\(\))',
325 325 "always assign an opened file to a variable, and close it afterwards"),
326 326 (r'(?i)descend[e]nt', "the proper spelling is descendAnt"),
327 327 (r'\.debug\(\_', "don't mark debug messages for translation"),
328 328 (r'\.strip\(\)\.split\(\)', "no need to strip before splitting"),
329 329 (r'^\s*except\s*:', "naked except clause", r'#.*re-raises'),
330 330 (r'^\s*except\s([^\(,]+|\([^\)]+\))\s*,',
331 331 'legacy exception syntax; use "as" instead of ","'),
332 332 (r':\n( )*( ){1,3}[^ ]', "must indent 4 spaces"),
333 333 (r'release\(.*wlock, .*lock\)', "wrong lock release order"),
334 334 (r'\bdef\s+__bool__\b', "__bool__ should be __nonzero__ in Python 2"),
335 335 (r'os\.path\.join\(.*, *(""|\'\')\)',
336 336 "use pathutil.normasprefix(path) instead of os.path.join(path, '')"),
337 337 (r'\s0[0-7]+\b', 'legacy octal syntax; use "0o" prefix instead of "0"'),
338 338 # XXX only catch mutable arguments on the first line of the definition
339 339 (r'def.*[( ]\w+=\{\}', "don't use mutable default arguments"),
340 340 (r'\butil\.Abort\b', "directly use error.Abort"),
341 341 (r'^@(\w*\.)?cachefunc', "module-level @cachefunc is risky, please avoid"),
342 342 (r'^import atexit', "don't use atexit, use ui.atexit"),
343 343 (r'^import Queue', "don't use Queue, use pycompat.queue.Queue + "
344 344 "pycompat.queue.Empty"),
345 345 (r'^import cStringIO', "don't use cStringIO.StringIO, use util.stringio"),
346 346 (r'^import urllib', "don't use urllib, use util.urlreq/util.urlerr"),
347 347 (r'^import SocketServer', "don't use SockerServer, use util.socketserver"),
348 348 (r'^import urlparse', "don't use urlparse, use util.urlreq"),
349 349 (r'^import xmlrpclib', "don't use xmlrpclib, use util.xmlrpclib"),
350 350 (r'^import cPickle', "don't use cPickle, use util.pickle"),
351 351 (r'^import pickle', "don't use pickle, use util.pickle"),
352 352 (r'^import httplib', "don't use httplib, use util.httplib"),
353 353 (r'^import BaseHTTPServer', "use util.httpserver instead"),
354 354 (r'^(from|import) mercurial\.(cext|pure|cffi)',
355 355 "use mercurial.policy.importmod instead"),
356 356 (r'\.next\(\)', "don't use .next(), use next(...)"),
357 357 (r'([a-z]*).revision\(\1\.node\(',
358 358 "don't convert rev to node before passing to revision(nodeorrev)"),
359 359 (r'platform\.system\(\)', "don't use platform.system(), use pycompat"),
360 360
361 361 # rules depending on implementation of repquote()
362 362 (r' x+[xpqo%APM][\'"]\n\s+[\'"]x',
363 363 'string join across lines with no space'),
364 364 (r'''(?x)ui\.(status|progress|write|note|warn)\(
365 365 [ \t\n#]*
366 366 (?# any strings/comments might precede a string, which
367 367 # contains translatable message)
368 368 ((['"]|\'\'\'|""")[ \npq%bAPMxno]*(['"]|\'\'\'|""")[ \t\n#]+)*
369 369 (?# sequence consisting of below might precede translatable message
370 370 # - formatting string: "% 10s", "%05d", "% -3.2f", "%*s", "%%" ...
371 371 # - escaped character: "\\", "\n", "\0" ...
372 372 # - character other than '%', 'b' as '\', and 'x' as alphabet)
373 373 (['"]|\'\'\'|""")
374 374 ((%([ n]?[PM]?([np]+|A))?x)|%%|b[bnx]|[ \nnpqAPMo])*x
375 375 (?# this regexp can't use [^...] style,
376 376 # because _preparepats forcibly adds "\n" into [^...],
377 377 # even though this regexp wants match it against "\n")''',
378 378 "missing _() in ui message (use () to hide false-positives)"),
379 379 ],
380 380 # warnings
381 381 [
382 382 # rules depending on implementation of repquote()
383 383 (r'(^| )pp +xxxxqq[ \n][^\n]', "add two newlines after '.. note::'"),
384 384 ]
385 385 ]
386 386
387 387 pyfilters = [
388 388 (r"""(?msx)(?P<comment>\#.*?$)|
389 389 ((?P<quote>('''|\"\"\"|(?<!')'(?!')|(?<!")"(?!")))
390 390 (?P<text>(([^\\]|\\.)*?))
391 391 (?P=quote))""", reppython),
392 392 ]
393 393
394 394 # non-filter patterns
395 395 pynfpats = [
396 396 [
397 397 (r'pycompat\.osname\s*[=!]=\s*[\'"]nt[\'"]', "use pycompat.iswindows"),
398 398 (r'pycompat\.osname\s*[=!]=\s*[\'"]posix[\'"]', "use pycompat.isposix"),
399 399 (r'pycompat\.sysplatform\s*[!=]=\s*[\'"]darwin[\'"]',
400 400 "use pycompat.isdarwin"),
401 401 ],
402 402 # warnings
403 403 [],
404 404 ]
405 405
406 406 # extension non-filter patterns
407 407 pyextnfpats = [
408 408 [(r'^"""\n?[A-Z]', "don't capitalize docstring title")],
409 409 # warnings
410 410 [],
411 411 ]
412 412
413 413 txtfilters = []
414 414
415 415 txtpats = [
416 416 [
417 ('\s$', 'trailing whitespace'),
417 (r'\s$', 'trailing whitespace'),
418 418 ('.. note::[ \n][^\n]', 'add two newlines after note::')
419 419 ],
420 420 []
421 421 ]
422 422
423 423 cpats = [
424 424 [
425 425 (r'//', "don't use //-style comments"),
426 426 (r'\S\t', "don't use tabs except for indent"),
427 427 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
428 428 (r'.{81}', "line too long"),
429 429 (r'(while|if|do|for)\(', "use space after while/if/do/for"),
430 430 (r'return\(', "return is not a function"),
431 431 (r' ;', "no space before ;"),
432 432 (r'[^;] \)', "no space before )"),
433 433 (r'[)][{]', "space between ) and {"),
434 434 (r'\w+\* \w+', "use int *foo, not int* foo"),
435 435 (r'\W\([^\)]+\) \w+', "use (int)foo, not (int) foo"),
436 436 (r'\w+ (\+\+|--)', "use foo++, not foo ++"),
437 437 (r'\w,\w', "missing whitespace after ,"),
438 438 (r'^[^#]\w[+/*]\w', "missing whitespace in expression"),
439 439 (r'\w\s=\s\s+\w', "gratuitous whitespace after ="),
440 440 (r'^#\s+\w', "use #foo, not # foo"),
441 441 (r'[^\n]\Z', "no trailing newline"),
442 442 (r'^\s*#import\b', "use only #include in standard C code"),
443 443 (r'strcpy\(', "don't use strcpy, use strlcpy or memcpy"),
444 444 (r'strcat\(', "don't use strcat"),
445 445
446 446 # rules depending on implementation of repquote()
447 447 ],
448 448 # warnings
449 449 [
450 450 # rules depending on implementation of repquote()
451 451 ]
452 452 ]
453 453
454 454 cfilters = [
455 455 (r'(/\*)(((\*(?!/))|[^*])*)\*/', repccomment),
456 456 (r'''(?P<quote>(?<!")")(?P<text>([^"]|\\")+)"(?!")''', repquote),
457 457 (r'''(#\s*include\s+<)([^>]+)>''', repinclude),
458 458 (r'(\()([^)]+\))', repcallspaces),
459 459 ]
460 460
461 461 inutilpats = [
462 462 [
463 463 (r'\bui\.', "don't use ui in util"),
464 464 ],
465 465 # warnings
466 466 []
467 467 ]
468 468
469 469 inrevlogpats = [
470 470 [
471 471 (r'\brepo\.', "don't use repo in revlog"),
472 472 ],
473 473 # warnings
474 474 []
475 475 ]
476 476
477 477 webtemplatefilters = []
478 478
479 479 webtemplatepats = [
480 480 [],
481 481 [
482 482 (r'{desc(\|(?!websub|firstline)[^\|]*)+}',
483 483 'follow desc keyword with either firstline or websub'),
484 484 ]
485 485 ]
486 486
487 487 allfilesfilters = []
488 488
489 489 allfilespats = [
490 490 [
491 491 (r'(http|https)://[a-zA-Z0-9./]*selenic.com/',
492 492 'use mercurial-scm.org domain URL'),
493 493 (r'mercurial@selenic\.com',
494 494 'use mercurial-scm.org domain for mercurial ML address'),
495 495 (r'mercurial-devel@selenic\.com',
496 496 'use mercurial-scm.org domain for mercurial-devel ML address'),
497 497 ],
498 498 # warnings
499 499 [],
500 500 ]
501 501
502 502 py3pats = [
503 503 [
504 504 (r'os\.environ', "use encoding.environ instead (py3)", r'#.*re-exports'),
505 505 (r'os\.name', "use pycompat.osname instead (py3)"),
506 506 (r'os\.getcwd', "use encoding.getcwd instead (py3)", r'#.*re-exports'),
507 507 (r'os\.sep', "use pycompat.ossep instead (py3)"),
508 508 (r'os\.pathsep', "use pycompat.ospathsep instead (py3)"),
509 509 (r'os\.altsep', "use pycompat.osaltsep instead (py3)"),
510 510 (r'sys\.platform', "use pycompat.sysplatform instead (py3)"),
511 511 (r'getopt\.getopt', "use pycompat.getoptb instead (py3)"),
512 512 (r'os\.getenv', "use encoding.environ.get instead"),
513 513 (r'os\.setenv', "modifying the environ dict is not preferred"),
514 514 (r'(?<!pycompat\.)xrange', "use pycompat.xrange instead (py3)"),
515 515 ],
516 516 # warnings
517 517 [],
518 518 ]
519 519
520 520 checks = [
521 521 ('python', r'.*\.(py|cgi)$', r'^#!.*python', pyfilters, pypats),
522 522 ('python', r'.*\.(py|cgi)$', r'^#!.*python', [], pynfpats),
523 523 ('python', r'.*hgext.*\.py$', '', [], pyextnfpats),
524 524 ('python 3', r'.*(hgext|mercurial)/(?!demandimport|policy|pycompat).*\.py',
525 525 '', pyfilters, py3pats),
526 526 ('test script', r'(.*/)?test-[^.~]*$', '', testfilters, testpats),
527 527 ('c', r'.*\.[ch]$', '', cfilters, cpats),
528 528 ('unified test', r'.*\.t$', '', utestfilters, utestpats),
529 529 ('layering violation repo in revlog', r'mercurial/revlog\.py', '',
530 530 pyfilters, inrevlogpats),
531 531 ('layering violation ui in util', r'mercurial/util\.py', '', pyfilters,
532 532 inutilpats),
533 533 ('txt', r'.*\.txt$', '', txtfilters, txtpats),
534 534 ('web template', r'mercurial/templates/.*\.tmpl', '',
535 535 webtemplatefilters, webtemplatepats),
536 536 ('all except for .po', r'.*(?<!\.po)$', '',
537 537 allfilesfilters, allfilespats),
538 538 ]
539 539
540 540 def _preparepats():
541 541 for c in checks:
542 542 failandwarn = c[-1]
543 543 for pats in failandwarn:
544 544 for i, pseq in enumerate(pats):
545 545 # fix-up regexes for multi-line searches
546 546 p = pseq[0]
547 547 # \s doesn't match \n (done in two steps)
548 548 # first, we replace \s that appears in a set already
549 549 p = re.sub(r'\[\\s', r'[ \\t', p)
550 550 # now we replace other \s instances.
551 551 p = re.sub(r'(?<!(\\|\[))\\s', r'[ \\t]', p)
552 552 # [^...] doesn't match newline
553 553 p = re.sub(r'(?<!\\)\[\^', r'[^\\n', p)
554 554
555 555 pats[i] = (re.compile(p, re.MULTILINE),) + pseq[1:]
556 556 filters = c[3]
557 557 for i, flt in enumerate(filters):
558 558 filters[i] = re.compile(flt[0]), flt[1]
559 559
560 560 class norepeatlogger(object):
561 561 def __init__(self):
562 562 self._lastseen = None
563 563
564 564 def log(self, fname, lineno, line, msg, blame):
565 565 """print error related a to given line of a given file.
566 566
567 567 The faulty line will also be printed but only once in the case
568 568 of multiple errors.
569 569
570 570 :fname: filename
571 571 :lineno: line number
572 572 :line: actual content of the line
573 573 :msg: error message
574 574 """
575 575 msgid = fname, lineno, line
576 576 if msgid != self._lastseen:
577 577 if blame:
578 578 print("%s:%d (%s):" % (fname, lineno, blame))
579 579 else:
580 580 print("%s:%d:" % (fname, lineno))
581 581 print(" > %s" % line)
582 582 self._lastseen = msgid
583 583 print(" " + msg)
584 584
585 585 _defaultlogger = norepeatlogger()
586 586
587 587 def getblame(f):
588 588 lines = []
589 589 for l in os.popen('hg annotate -un %s' % f):
590 590 start, line = l.split(':', 1)
591 591 user, rev = start.split()
592 592 lines.append((line[1:-1], user, rev))
593 593 return lines
594 594
595 595 def checkfile(f, logfunc=_defaultlogger.log, maxerr=None, warnings=False,
596 596 blame=False, debug=False, lineno=True):
597 597 """checks style and portability of a given file
598 598
599 599 :f: filepath
600 600 :logfunc: function used to report error
601 601 logfunc(filename, linenumber, linecontent, errormessage)
602 602 :maxerr: number of error to display before aborting.
603 603 Set to false (default) to report all errors
604 604
605 605 return True if no error is found, False otherwise.
606 606 """
607 607 blamecache = None
608 608 result = True
609 609
610 610 try:
611 611 with opentext(f) as fp:
612 612 try:
613 613 pre = fp.read()
614 614 except UnicodeDecodeError as e:
615 615 print("%s while reading %s" % (e, f))
616 616 return result
617 617 except IOError as e:
618 618 print("Skipping %s, %s" % (f, str(e).split(':', 1)[0]))
619 619 return result
620 620
621 621 for name, match, magic, filters, pats in checks:
622 622 post = pre # discard filtering result of previous check
623 623 if debug:
624 624 print(name, f)
625 625 fc = 0
626 626 if not (re.match(match, f) or (magic and re.search(magic, pre))):
627 627 if debug:
628 628 print("Skipping %s for %s it doesn't match %s" % (
629 629 name, match, f))
630 630 continue
631 631 if "no-" "check-code" in pre:
632 632 # If you're looking at this line, it's because a file has:
633 633 # no- check- code
634 634 # but the reason to output skipping is to make life for
635 635 # tests easier. So, instead of writing it with a normal
636 636 # spelling, we write it with the expected spelling from
637 637 # tests/test-check-code.t
638 638 print("Skipping %s it has no-che?k-code (glob)" % f)
639 639 return "Skip" # skip checking this file
640 640 for p, r in filters:
641 641 post = re.sub(p, r, post)
642 642 nerrs = len(pats[0]) # nerr elements are errors
643 643 if warnings:
644 644 pats = pats[0] + pats[1]
645 645 else:
646 646 pats = pats[0]
647 647 # print post # uncomment to show filtered version
648 648
649 649 if debug:
650 650 print("Checking %s for %s" % (name, f))
651 651
652 652 prelines = None
653 653 errors = []
654 654 for i, pat in enumerate(pats):
655 655 if len(pat) == 3:
656 656 p, msg, ignore = pat
657 657 else:
658 658 p, msg = pat
659 659 ignore = None
660 660 if i >= nerrs:
661 661 msg = "warning: " + msg
662 662
663 663 pos = 0
664 664 n = 0
665 665 for m in p.finditer(post):
666 666 if prelines is None:
667 667 prelines = pre.splitlines()
668 668 postlines = post.splitlines(True)
669 669
670 670 start = m.start()
671 671 while n < len(postlines):
672 672 step = len(postlines[n])
673 673 if pos + step > start:
674 674 break
675 675 pos += step
676 676 n += 1
677 677 l = prelines[n]
678 678
679 679 if ignore and re.search(ignore, l, re.MULTILINE):
680 680 if debug:
681 681 print("Skipping %s for %s:%s (ignore pattern)" % (
682 682 name, f, n))
683 683 continue
684 684 bd = ""
685 685 if blame:
686 686 bd = 'working directory'
687 687 if not blamecache:
688 688 blamecache = getblame(f)
689 689 if n < len(blamecache):
690 690 bl, bu, br = blamecache[n]
691 691 if bl == l:
692 692 bd = '%s@%s' % (bu, br)
693 693
694 694 errors.append((f, lineno and n + 1, l, msg, bd))
695 695 result = False
696 696
697 697 errors.sort()
698 698 for e in errors:
699 699 logfunc(*e)
700 700 fc += 1
701 701 if maxerr and fc >= maxerr:
702 702 print(" (too many errors, giving up)")
703 703 break
704 704
705 705 return result
706 706
707 707 def main():
708 708 parser = optparse.OptionParser("%prog [options] [files | -]")
709 709 parser.add_option("-w", "--warnings", action="store_true",
710 710 help="include warning-level checks")
711 711 parser.add_option("-p", "--per-file", type="int",
712 712 help="max warnings per file")
713 713 parser.add_option("-b", "--blame", action="store_true",
714 714 help="use annotate to generate blame info")
715 715 parser.add_option("", "--debug", action="store_true",
716 716 help="show debug information")
717 717 parser.add_option("", "--nolineno", action="store_false",
718 718 dest='lineno', help="don't show line numbers")
719 719
720 720 parser.set_defaults(per_file=15, warnings=False, blame=False, debug=False,
721 721 lineno=True)
722 722 (options, args) = parser.parse_args()
723 723
724 724 if len(args) == 0:
725 725 check = glob.glob("*")
726 726 elif args == ['-']:
727 727 # read file list from stdin
728 728 check = sys.stdin.read().splitlines()
729 729 else:
730 730 check = args
731 731
732 732 _preparepats()
733 733
734 734 ret = 0
735 735 for f in check:
736 736 if not checkfile(f, maxerr=options.per_file, warnings=options.warnings,
737 737 blame=options.blame, debug=options.debug,
738 738 lineno=options.lineno):
739 739 ret = 1
740 740 return ret
741 741
742 742 if __name__ == "__main__":
743 743 sys.exit(main())
General Comments 0
You need to be logged in to leave comments. Login now