##// END OF EJS Templates
check-code: do not skip entire file, skip only one match instead...
Simon Heimberg -
r20238:81e90579 default
parent child Browse files
Show More
@@ -1,545 +1,545 b''
1 1 #!/usr/bin/env python
2 2 #
3 3 # check-code - a style and portability checker for Mercurial
4 4 #
5 5 # Copyright 2010 Matt Mackall <mpm@selenic.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 import re, glob, os, sys
11 11 import keyword
12 12 import optparse
13 13 try:
14 14 import re2
15 15 except ImportError:
16 16 re2 = None
17 17
18 18 def compilere(pat, multiline=False):
19 19 if multiline:
20 20 pat = '(?m)' + pat
21 21 if re2:
22 22 try:
23 23 return re2.compile(pat)
24 24 except re2.error:
25 25 pass
26 26 return re.compile(pat)
27 27
28 28 def repquote(m):
29 29 fromc = '.:'
30 30 tochr = 'pq'
31 31 def encodechr(i):
32 32 if i > 255:
33 33 return 'u'
34 34 c = chr(i)
35 35 if c in ' \n':
36 36 return c
37 37 if c.isalpha():
38 38 return 'x'
39 39 if c.isdigit():
40 40 return 'n'
41 41 try:
42 42 return tochr[fromc.find(c)]
43 43 except (ValueError, IndexError):
44 44 return 'o'
45 45 t = m.group('text')
46 46 tt = ''.join(encodechr(i) for i in xrange(256))
47 47 t = t.translate(tt)
48 48 return m.group('quote') + t + m.group('quote')
49 49
50 50 def reppython(m):
51 51 comment = m.group('comment')
52 52 if comment:
53 53 l = len(comment.rstrip())
54 54 return "#" * l + comment[l:]
55 55 return repquote(m)
56 56
57 57 def repcomment(m):
58 58 return m.group(1) + "#" * len(m.group(2))
59 59
60 60 def repccomment(m):
61 61 t = re.sub(r"((?<=\n) )|\S", "x", m.group(2))
62 62 return m.group(1) + t + "*/"
63 63
64 64 def repcallspaces(m):
65 65 t = re.sub(r"\n\s+", "\n", m.group(2))
66 66 return m.group(1) + t
67 67
68 68 def repinclude(m):
69 69 return m.group(1) + "<foo>"
70 70
71 71 def rephere(m):
72 72 t = re.sub(r"\S", "x", m.group(2))
73 73 return m.group(1) + t
74 74
75 75
76 76 testpats = [
77 77 [
78 78 (r'pushd|popd', "don't use 'pushd' or 'popd', use 'cd'"),
79 79 (r'\W\$?\(\([^\)\n]*\)\)', "don't use (()) or $(()), use 'expr'"),
80 80 (r'grep.*-q', "don't use 'grep -q', redirect to /dev/null"),
81 81 (r'(?<!hg )grep.*-a', "don't use 'grep -a', use in-line python"),
82 82 (r'sed.*-i', "don't use 'sed -i', use a temporary file"),
83 83 (r'\becho\b.*\\n', "don't use 'echo \\n', use printf"),
84 84 (r'echo -n', "don't use 'echo -n', use printf"),
85 85 (r'(^| )wc[^|]*$\n(?!.*\(re\))', "filter wc output"),
86 86 (r'head -c', "don't use 'head -c', use 'dd'"),
87 87 (r'tail -n', "don't use the '-n' option to tail, just use '-<num>'"),
88 88 (r'sha1sum', "don't use sha1sum, use $TESTDIR/md5sum.py"),
89 89 (r'ls.*-\w*R', "don't use 'ls -R', use 'find'"),
90 90 (r'printf.*[^\\]\\([1-9]|0\d)', "don't use 'printf \NNN', use Python"),
91 91 (r'printf.*[^\\]\\x', "don't use printf \\x, use Python"),
92 92 (r'\$\(.*\)', "don't use $(expr), use `expr`"),
93 93 (r'rm -rf \*', "don't use naked rm -rf, target a directory"),
94 94 (r'(^|\|\s*)grep (-\w\s+)*[^|]*[(|]\w',
95 95 "use egrep for extended grep syntax"),
96 96 (r'/bin/', "don't use explicit paths for tools"),
97 97 (r'[^\n]\Z', "no trailing newline"),
98 98 (r'export.*=', "don't export and assign at once"),
99 99 (r'^source\b', "don't use 'source', use '.'"),
100 100 (r'touch -d', "don't use 'touch -d', use 'touch -t' instead"),
101 101 (r'ls +[^|\n-]+ +-', "options to 'ls' must come before filenames"),
102 102 (r'[^>\n]>\s*\$HGRCPATH', "don't overwrite $HGRCPATH, append to it"),
103 103 (r'^stop\(\)', "don't use 'stop' as a shell function name"),
104 104 (r'(\[|\btest\b).*-e ', "don't use 'test -e', use 'test -f'"),
105 105 (r'^alias\b.*=', "don't use alias, use a function"),
106 106 (r'if\s*!', "don't use '!' to negate exit status"),
107 107 (r'/dev/u?random', "don't use entropy, use /dev/zero"),
108 108 (r'do\s*true;\s*done', "don't use true as loop body, use sleep 0"),
109 109 (r'^( *)\t', "don't use tabs to indent"),
110 110 (r'sed (-e )?\'(\d+|/[^/]*/)i(?!\\\n)',
111 111 "put a backslash-escaped newline after sed 'i' command"),
112 112 ],
113 113 # warnings
114 114 [
115 115 (r'^function', "don't use 'function', use old style"),
116 116 (r'^diff.*-\w*N', "don't use 'diff -N'"),
117 117 (r'\$PWD|\${PWD}', "don't use $PWD, use `pwd`"),
118 118 (r'^([^"\'\n]|("[^"\n]*")|(\'[^\'\n]*\'))*\^', "^ must be quoted"),
119 119 (r'kill (`|\$\()', "don't use kill, use killdaemons.py")
120 120 ]
121 121 ]
122 122
123 123 testfilters = [
124 124 (r"( *)(#([^\n]*\S)?)", repcomment),
125 125 (r"<<(\S+)((.|\n)*?\n\1)", rephere),
126 126 ]
127 127
128 128 winglobmsg = "use (glob) to match Windows paths too"
129 129 uprefix = r"^ \$ "
130 130 utestpats = [
131 131 [
132 132 (r'^(\S.*|| [$>] .*)[ \t]\n', "trailing whitespace on non-output"),
133 133 (uprefix + r'.*\|\s*sed[^|>\n]*\n',
134 134 "use regex test output patterns instead of sed"),
135 135 (uprefix + r'(true|exit 0)', "explicit zero exit unnecessary"),
136 136 (uprefix + r'.*(?<!\[)\$\?', "explicit exit code checks unnecessary"),
137 137 (uprefix + r'.*\|\| echo.*(fail|error)',
138 138 "explicit exit code checks unnecessary"),
139 139 (uprefix + r'set -e', "don't use set -e"),
140 140 (uprefix + r'(\s|fi\b|done\b)', "use > for continued lines"),
141 141 (r'^ saved backup bundle to \$TESTTMP.*\.hg$', winglobmsg),
142 142 (r'^ changeset .* references (corrupted|missing) \$TESTTMP/.*[^)]$',
143 143 winglobmsg),
144 144 (r'^ pulling from \$TESTTMP/.*[^)]$', winglobmsg,
145 145 '\$TESTTMP/unix-repo$'), # in test-issue1802.t which skipped on windows
146 146 (r'^ reverting .*/.*[^)]$', winglobmsg),
147 147 (r'^ cloning subrepo \S+/.*[^)]$', winglobmsg),
148 148 (r'^ pushing to \$TESTTMP/.*[^)]$', winglobmsg),
149 149 (r'^ pushing subrepo \S+/\S+ to.*[^)]$', winglobmsg),
150 150 (r'^ moving \S+/.*[^)]$', winglobmsg),
151 151 (r'^ no changes made to subrepo since.*/.*[^)]$', winglobmsg),
152 152 (r'^ .*: largefile \S+ not available from file:.*/.*[^)]$', winglobmsg),
153 153 ],
154 154 # warnings
155 155 [
156 156 (r'^ [^*?/\n]* \(glob\)$',
157 157 "glob match with no glob character (?*/)"),
158 158 ]
159 159 ]
160 160
161 161 for i in [0, 1]:
162 162 for p, m in testpats[i]:
163 163 if p.startswith(r'^'):
164 164 p = r"^ [$>] (%s)" % p[1:]
165 165 else:
166 166 p = r"^ [$>] .*(%s)" % p
167 167 utestpats[i].append((p, m))
168 168
169 169 utestfilters = [
170 170 (r"<<(\S+)((.|\n)*?\n > \1)", rephere),
171 171 (r"( *)(#([^\n]*\S)?)", repcomment),
172 172 ]
173 173
174 174 pypats = [
175 175 [
176 176 (r'^\s*def\s*\w+\s*\(.*,\s*\(',
177 177 "tuple parameter unpacking not available in Python 3+"),
178 178 (r'lambda\s*\(.*,.*\)',
179 179 "tuple parameter unpacking not available in Python 3+"),
180 180 (r'import (.+,[^.]+\.[^.]+|[^.]+\.[^.]+,)',
181 181 '2to3 can\'t always rewrite "import qux, foo.bar", '
182 182 'use "import foo.bar" on its own line instead.'),
183 183 (r'(?<!def)\s+(cmp)\(', "cmp is not available in Python 3+"),
184 184 (r'\breduce\s*\(.*', "reduce is not available in Python 3+"),
185 185 (r'\.has_key\b', "dict.has_key is not available in Python 3+"),
186 186 (r'\s<>\s', '<> operator is not available in Python 3+, use !='),
187 187 (r'^\s*\t', "don't use tabs"),
188 188 (r'\S;\s*\n', "semicolon"),
189 189 (r'[^_]_\("[^"]+"\s*%', "don't use % inside _()"),
190 190 (r"[^_]_\('[^']+'\s*%", "don't use % inside _()"),
191 191 (r'(\w|\)),\w', "missing whitespace after ,"),
192 192 (r'(\w|\))[+/*\-<>]\w', "missing whitespace in expression"),
193 193 (r'^\s+(\w|\.)+=\w[^,()\n]*$', "missing whitespace in assignment"),
194 194 (r'(\s+)try:\n((?:\n|\1\s.*\n)+?)\1except.*?:\n'
195 195 r'((?:\n|\1\s.*\n)+?)\1finally:', 'no try/except/finally in Python 2.4'),
196 196 (r'(?<!def)(\s+|^|\()next\(.+\)',
197 197 'no next(foo) in Python 2.4 and 2.5, use foo.next() instead'),
198 198 (r'(\s+)try:\n((?:\n|\1\s.*\n)*?)\1\s*yield\b.*?'
199 199 r'((?:\n|\1\s.*\n)+?)\1finally:',
200 200 'no yield inside try/finally in Python 2.4'),
201 201 (r'.{81}', "line too long"),
202 202 (r' x+[xo][\'"]\n\s+[\'"]x', 'string join across lines with no space'),
203 203 (r'[^\n]\Z', "no trailing newline"),
204 204 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
205 205 # (r'^\s+[^_ \n][^_. \n]+_[^_\n]+\s*=',
206 206 # "don't use underbars in identifiers"),
207 207 (r'^\s+(self\.)?[A-za-z][a-z0-9]+[A-Z]\w* = ',
208 208 "don't use camelcase in identifiers"),
209 209 (r'^\s*(if|while|def|class|except|try)\s[^[\n]*:\s*[^\\n]#\s]+',
210 210 "linebreak after :"),
211 211 (r'class\s[^( \n]+:', "old-style class, use class foo(object)"),
212 212 (r'class\s[^( \n]+\(\):',
213 213 "class foo() not available in Python 2.4, use class foo(object)"),
214 214 (r'\b(%s)\(' % '|'.join(keyword.kwlist),
215 215 "Python keyword is not a function"),
216 216 (r',]', "unneeded trailing ',' in list"),
217 217 # (r'class\s[A-Z][^\(]*\((?!Exception)',
218 218 # "don't capitalize non-exception classes"),
219 219 # (r'in range\(', "use xrange"),
220 220 # (r'^\s*print\s+', "avoid using print in core and extensions"),
221 221 (r'[\x80-\xff]', "non-ASCII character literal"),
222 222 (r'("\')\.format\(', "str.format() not available in Python 2.4"),
223 223 (r'^\s*with\s+', "with not available in Python 2.4"),
224 224 (r'\.isdisjoint\(', "set.isdisjoint not available in Python 2.4"),
225 225 (r'^\s*except.* as .*:', "except as not available in Python 2.4"),
226 226 (r'^\s*os\.path\.relpath', "relpath not available in Python 2.4"),
227 227 (r'(?<!def)\s+(any|all|format)\(',
228 "any/all/format not available in Python 2.4"),
228 "any/all/format not available in Python 2.4", 'no-py24'),
229 229 (r'(?<!def)\s+(callable)\(',
230 230 "callable not available in Python 3, use getattr(f, '__call__', None)"),
231 231 (r'if\s.*\selse', "if ... else form not available in Python 2.4"),
232 232 (r'^\s*(%s)\s\s' % '|'.join(keyword.kwlist),
233 233 "gratuitous whitespace after Python keyword"),
234 234 (r'([\(\[][ \t]\S)|(\S[ \t][\)\]])', "gratuitous whitespace in () or []"),
235 235 # (r'\s\s=', "gratuitous whitespace before ="),
236 236 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
237 237 "missing whitespace around operator"),
238 238 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\s',
239 239 "missing whitespace around operator"),
240 240 (r'\s(\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
241 241 "missing whitespace around operator"),
242 242 (r'[^^+=*/!<>&| %-](\s=|=\s)[^= ]',
243 243 "wrong whitespace around ="),
244 244 (r'\([^()]*( =[^=]|[^<>!=]= )',
245 245 "no whitespace around = for named parameters"),
246 246 (r'raise Exception', "don't raise generic exceptions"),
247 247 (r'raise [^,(]+, (\([^\)]+\)|[^,\(\)]+)$',
248 248 "don't use old-style two-argument raise, use Exception(message)"),
249 249 (r' is\s+(not\s+)?["\'0-9-]', "object comparison with literal"),
250 250 (r' [=!]=\s+(True|False|None)',
251 251 "comparison with singleton, use 'is' or 'is not' instead"),
252 252 (r'^\s*(while|if) [01]:',
253 253 "use True/False for constant Boolean expression"),
254 254 (r'(?:(?<!def)\s+|\()hasattr',
255 255 'hasattr(foo, bar) is broken, use util.safehasattr(foo, bar) instead'),
256 256 (r'opener\([^)]*\).read\(',
257 257 "use opener.read() instead"),
258 258 (r'BaseException', 'not in Python 2.4, use Exception'),
259 259 (r'os\.path\.relpath', 'os.path.relpath is not in Python 2.5'),
260 260 (r'opener\([^)]*\).write\(',
261 261 "use opener.write() instead"),
262 262 (r'[\s\(](open|file)\([^)]*\)\.read\(',
263 263 "use util.readfile() instead"),
264 264 (r'[\s\(](open|file)\([^)]*\)\.write\(',
265 265 "use util.writefile() instead"),
266 266 (r'^[\s\(]*(open(er)?|file)\([^)]*\)',
267 267 "always assign an opened file to a variable, and close it afterwards"),
268 268 (r'[\s\(](open|file)\([^)]*\)\.',
269 269 "always assign an opened file to a variable, and close it afterwards"),
270 270 (r'(?i)descendent', "the proper spelling is descendAnt"),
271 271 (r'\.debug\(\_', "don't mark debug messages for translation"),
272 272 (r'\.strip\(\)\.split\(\)', "no need to strip before splitting"),
273 273 (r'^\s*except\s*:', "naked except clause", r'#.*re-raises'),
274 274 (r':\n( )*( ){1,3}[^ ]', "must indent 4 spaces"),
275 275 (r'ui\.(status|progress|write|note|warn)\([\'\"]x',
276 276 "missing _() in ui message (use () to hide false-positives)"),
277 277 (r'release\(.*wlock, .*lock\)', "wrong lock release order"),
278 278 ],
279 279 # warnings
280 280 [
281 281 (r'(^| )pp +xxxxqq[ \n][^\n]', "add two newlines after '.. note::'"),
282 282 ]
283 283 ]
284 284
285 285 pyfilters = [
286 286 (r"""(?msx)(?P<comment>\#.*?$)|
287 287 ((?P<quote>('''|\"\"\"|(?<!')'(?!')|(?<!")"(?!")))
288 288 (?P<text>(([^\\]|\\.)*?))
289 289 (?P=quote))""", reppython),
290 290 ]
291 291
292 292 txtfilters = []
293 293
294 294 txtpats = [
295 295 [
296 296 ('\s$', 'trailing whitespace'),
297 297 ],
298 298 []
299 299 ]
300 300
301 301 cpats = [
302 302 [
303 303 (r'//', "don't use //-style comments"),
304 304 (r'^ ', "don't use spaces to indent"),
305 305 (r'\S\t', "don't use tabs except for indent"),
306 306 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
307 307 (r'.{81}', "line too long"),
308 308 (r'(while|if|do|for)\(', "use space after while/if/do/for"),
309 309 (r'return\(', "return is not a function"),
310 310 (r' ;', "no space before ;"),
311 311 (r'[)][{]', "space between ) and {"),
312 312 (r'\w+\* \w+', "use int *foo, not int* foo"),
313 313 (r'\W\([^\)]+\) \w+', "use (int)foo, not (int) foo"),
314 314 (r'\w+ (\+\+|--)', "use foo++, not foo ++"),
315 315 (r'\w,\w', "missing whitespace after ,"),
316 316 (r'^[^#]\w[+/*]\w', "missing whitespace in expression"),
317 317 (r'^#\s+\w', "use #foo, not # foo"),
318 318 (r'[^\n]\Z', "no trailing newline"),
319 319 (r'^\s*#import\b', "use only #include in standard C code"),
320 320 ],
321 321 # warnings
322 322 []
323 323 ]
324 324
325 325 cfilters = [
326 326 (r'(/\*)(((\*(?!/))|[^*])*)\*/', repccomment),
327 327 (r'''(?P<quote>(?<!")")(?P<text>([^"]|\\")+)"(?!")''', repquote),
328 328 (r'''(#\s*include\s+<)([^>]+)>''', repinclude),
329 329 (r'(\()([^)]+\))', repcallspaces),
330 330 ]
331 331
332 332 inutilpats = [
333 333 [
334 334 (r'\bui\.', "don't use ui in util"),
335 335 ],
336 336 # warnings
337 337 []
338 338 ]
339 339
340 340 inrevlogpats = [
341 341 [
342 342 (r'\brepo\.', "don't use repo in revlog"),
343 343 ],
344 344 # warnings
345 345 []
346 346 ]
347 347
348 348 checks = [
349 349 ('python', r'.*\.(py|cgi)$', pyfilters, pypats),
350 350 ('test script', r'(.*/)?test-[^.~]*$', testfilters, testpats),
351 351 ('c', r'.*\.[ch]$', cfilters, cpats),
352 352 ('unified test', r'.*\.t$', utestfilters, utestpats),
353 353 ('layering violation repo in revlog', r'mercurial/revlog\.py', pyfilters,
354 354 inrevlogpats),
355 355 ('layering violation ui in util', r'mercurial/util\.py', pyfilters,
356 356 inutilpats),
357 357 ('txt', r'.*\.txt$', txtfilters, txtpats),
358 358 ]
359 359
360 360 def _preparepats():
361 361 for c in checks:
362 362 failandwarn = c[-1]
363 363 for pats in failandwarn:
364 364 for i, pseq in enumerate(pats):
365 365 # fix-up regexes for multi-line searches
366 366 p = pseq[0]
367 367 # \s doesn't match \n
368 368 p = re.sub(r'(?<!\\)\\s', r'[ \\t]', p)
369 369 # [^...] doesn't match newline
370 370 p = re.sub(r'(?<!\\)\[\^', r'[^\\n', p)
371 371
372 372 pats[i] = (re.compile(p, re.MULTILINE),) + pseq[1:]
373 373 filters = c[2]
374 374 for i, flt in enumerate(filters):
375 375 filters[i] = re.compile(flt[0]), flt[1]
376 376 _preparepats()
377 377
378 378 class norepeatlogger(object):
379 379 def __init__(self):
380 380 self._lastseen = None
381 381
382 382 def log(self, fname, lineno, line, msg, blame):
383 383 """print error related a to given line of a given file.
384 384
385 385 The faulty line will also be printed but only once in the case
386 386 of multiple errors.
387 387
388 388 :fname: filename
389 389 :lineno: line number
390 390 :line: actual content of the line
391 391 :msg: error message
392 392 """
393 393 msgid = fname, lineno, line
394 394 if msgid != self._lastseen:
395 395 if blame:
396 396 print "%s:%d (%s):" % (fname, lineno, blame)
397 397 else:
398 398 print "%s:%d:" % (fname, lineno)
399 399 print " > %s" % line
400 400 self._lastseen = msgid
401 401 print " " + msg
402 402
403 403 _defaultlogger = norepeatlogger()
404 404
405 405 def getblame(f):
406 406 lines = []
407 407 for l in os.popen('hg annotate -un %s' % f):
408 408 start, line = l.split(':', 1)
409 409 user, rev = start.split()
410 410 lines.append((line[1:-1], user, rev))
411 411 return lines
412 412
413 413 def checkfile(f, logfunc=_defaultlogger.log, maxerr=None, warnings=False,
414 414 blame=False, debug=False, lineno=True):
415 415 """checks style and portability of a given file
416 416
417 417 :f: filepath
418 418 :logfunc: function used to report error
419 419 logfunc(filename, linenumber, linecontent, errormessage)
420 420 :maxerr: number of error to display before aborting.
421 421 Set to false (default) to report all errors
422 422
423 423 return True if no error is found, False otherwise.
424 424 """
425 425 blamecache = None
426 426 result = True
427 427 for name, match, filters, pats in checks:
428 428 if debug:
429 429 print name, f
430 430 fc = 0
431 431 if not re.match(match, f):
432 432 if debug:
433 433 print "Skipping %s for %s it doesn't match %s" % (
434 434 name, match, f)
435 435 continue
436 436 try:
437 437 fp = open(f)
438 438 except IOError, e:
439 439 print "Skipping %s, %s" % (f, str(e).split(':', 1)[0])
440 440 continue
441 441 pre = post = fp.read()
442 442 fp.close()
443 443 if "no-" "check-code" in pre:
444 444 if debug:
445 445 print "Skipping %s for %s it has no-" "check-code" % (
446 446 name, f)
447 447 break
448 448 for p, r in filters:
449 449 post = re.sub(p, r, post)
450 450 nerrs = len(pats[0]) # nerr elements are errors
451 451 if warnings:
452 452 pats = pats[0] + pats[1]
453 453 else:
454 454 pats = pats[0]
455 455 # print post # uncomment to show filtered version
456 456
457 457 if debug:
458 458 print "Checking %s for %s" % (name, f)
459 459
460 460 prelines = None
461 461 errors = []
462 462 for i, pat in enumerate(pats):
463 463 if len(pat) == 3:
464 464 p, msg, ignore = pat
465 465 else:
466 466 p, msg = pat
467 467 ignore = None
468 468 if i >= nerrs:
469 469 msg = "warning: " + msg
470 470
471 471 pos = 0
472 472 n = 0
473 473 for m in p.finditer(post):
474 474 if prelines is None:
475 475 prelines = pre.splitlines()
476 476 postlines = post.splitlines(True)
477 477
478 478 start = m.start()
479 479 while n < len(postlines):
480 480 step = len(postlines[n])
481 481 if pos + step > start:
482 482 break
483 483 pos += step
484 484 n += 1
485 485 l = prelines[n]
486 486
487 487 if "check-code" "-ignore" in l:
488 488 if debug:
489 489 print "Skipping %s for %s:%s (check-code" "-ignore)" % (
490 490 name, f, n)
491 491 continue
492 492 elif ignore and re.search(ignore, l, re.MULTILINE):
493 493 continue
494 494 bd = ""
495 495 if blame:
496 496 bd = 'working directory'
497 497 if not blamecache:
498 498 blamecache = getblame(f)
499 499 if n < len(blamecache):
500 500 bl, bu, br = blamecache[n]
501 501 if bl == l:
502 502 bd = '%s@%s' % (bu, br)
503 503
504 504 errors.append((f, lineno and n + 1, l, msg, bd))
505 505 result = False
506 506
507 507 errors.sort()
508 508 for e in errors:
509 509 logfunc(*e)
510 510 fc += 1
511 511 if maxerr and fc >= maxerr:
512 512 print " (too many errors, giving up)"
513 513 break
514 514
515 515 return result
516 516
517 517 if __name__ == "__main__":
518 518 parser = optparse.OptionParser("%prog [options] [files]")
519 519 parser.add_option("-w", "--warnings", action="store_true",
520 520 help="include warning-level checks")
521 521 parser.add_option("-p", "--per-file", type="int",
522 522 help="max warnings per file")
523 523 parser.add_option("-b", "--blame", action="store_true",
524 524 help="use annotate to generate blame info")
525 525 parser.add_option("", "--debug", action="store_true",
526 526 help="show debug information")
527 527 parser.add_option("", "--nolineno", action="store_false",
528 528 dest='lineno', help="don't show line numbers")
529 529
530 530 parser.set_defaults(per_file=15, warnings=False, blame=False, debug=False,
531 531 lineno=True)
532 532 (options, args) = parser.parse_args()
533 533
534 534 if len(args) == 0:
535 535 check = glob.glob("*")
536 536 else:
537 537 check = args
538 538
539 539 ret = 0
540 540 for f in check:
541 541 if not checkfile(f, maxerr=options.per_file, warnings=options.warnings,
542 542 blame=options.blame, debug=options.debug,
543 543 lineno=options.lineno):
544 544 ret = 1
545 545 sys.exit(ret)
@@ -1,244 +1,244 b''
1 1 import ast
2 2 import os
3 3 import sys
4 4
5 5 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
6 6 # to work when run from a virtualenv. The modules were chosen empirically
7 7 # so that the return value matches the return value without virtualenv.
8 8 import BaseHTTPServer
9 9 import zlib
10 10
11 11 def dotted_name_of_path(path):
12 12 """Given a relative path to a source file, return its dotted module name.
13 13
14 14
15 15 >>> dotted_name_of_path('mercurial/error.py')
16 16 'mercurial.error'
17 17 """
18 18 parts = path.split('/')
19 19 parts[-1] = parts[-1][:-3] # remove .py
20 20 return '.'.join(parts)
21 21
22 22
23 23 def list_stdlib_modules():
24 24 """List the modules present in the stdlib.
25 25
26 26 >>> mods = set(list_stdlib_modules())
27 27 >>> 'BaseHTTPServer' in mods
28 28 True
29 29
30 30 os.path isn't really a module, so it's missing:
31 31
32 32 >>> 'os.path' in mods
33 33 False
34 34
35 35 sys requires special treatment, because it's baked into the
36 36 interpreter, but it should still appear:
37 37
38 38 >>> 'sys' in mods
39 39 True
40 40
41 41 >>> 'collections' in mods
42 42 True
43 43
44 44 >>> 'cStringIO' in mods
45 45 True
46 46 """
47 47 for m in sys.builtin_module_names:
48 48 yield m
49 49 # These modules only exist on windows, but we should always
50 50 # consider them stdlib.
51 51 for m in ['msvcrt', '_winreg']:
52 52 yield m
53 53 # These get missed too
54 54 for m in 'ctypes', 'email':
55 55 yield m
56 56 yield 'builtins' # python3 only
57 57 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
58 58 # We need to supplement the list of prefixes for the search to work
59 59 # when run from within a virtualenv.
60 60 for mod in (BaseHTTPServer, zlib):
61 61 try:
62 62 # Not all module objects have a __file__ attribute.
63 63 filename = mod.__file__
64 64 except AttributeError:
65 65 continue
66 66 dirname = os.path.dirname(filename)
67 67 for prefix in stdlib_prefixes:
68 68 if dirname.startswith(prefix):
69 69 # Then this directory is redundant.
70 70 break
71 71 else:
72 72 stdlib_prefixes.add(dirname)
73 73 for libpath in sys.path:
74 74 # We want to walk everything in sys.path that starts with
75 75 # something in stdlib_prefixes. check-code suppressed because
76 76 # the ast module used by this script implies the availability
77 77 # of any().
78 if not any(libpath.startswith(p) for p in stdlib_prefixes): # no-check-code
78 if not any(libpath.startswith(p) for p in stdlib_prefixes): # no-py24
79 79 continue
80 80 if 'site-packages' in libpath:
81 81 continue
82 82 for top, dirs, files in os.walk(libpath):
83 83 for name in files:
84 84 if name == '__init__.py':
85 85 continue
86 86 if not (name.endswith('.py') or name.endswith('.so')):
87 87 continue
88 88 full_path = os.path.join(top, name)
89 89 if 'site-packages' in full_path:
90 90 continue
91 91 rel_path = full_path[len(libpath) + 1:]
92 92 mod = dotted_name_of_path(rel_path)
93 93 yield mod
94 94
95 95 stdlib_modules = set(list_stdlib_modules())
96 96
97 97 def imported_modules(source, ignore_nested=False):
98 98 """Given the source of a file as a string, yield the names
99 99 imported by that file.
100 100
101 101 Args:
102 102 source: The python source to examine as a string.
103 103 ignore_nested: If true, import statements that do not start in
104 104 column zero will be ignored.
105 105
106 106 Returns:
107 107 A list of module names imported by the given source.
108 108
109 109 >>> sorted(imported_modules(
110 110 ... 'import foo ; from baz import bar; import foo.qux'))
111 111 ['baz.bar', 'foo', 'foo.qux']
112 112 >>> sorted(imported_modules(
113 113 ... '''import foo
114 114 ... def wat():
115 115 ... import bar
116 116 ... ''', ignore_nested=True))
117 117 ['foo']
118 118 """
119 119 for node in ast.walk(ast.parse(source)):
120 120 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
121 121 continue
122 122 if isinstance(node, ast.Import):
123 123 for n in node.names:
124 124 yield n.name
125 125 elif isinstance(node, ast.ImportFrom):
126 126 prefix = node.module + '.'
127 127 for n in node.names:
128 128 yield prefix + n.name
129 129
130 130 def verify_stdlib_on_own_line(source):
131 131 """Given some python source, verify that stdlib imports are done
132 132 in separate statements from relative local module imports.
133 133
134 134 Observing this limitation is important as it works around an
135 135 annoying lib2to3 bug in relative import rewrites:
136 136 http://bugs.python.org/issue19510.
137 137
138 138 >>> list(verify_stdlib_on_own_line('import sys, foo'))
139 139 ['mixed stdlib and relative imports:\\n foo, sys']
140 140 >>> list(verify_stdlib_on_own_line('import sys, os'))
141 141 []
142 142 >>> list(verify_stdlib_on_own_line('import foo, bar'))
143 143 []
144 144 """
145 145 for node in ast.walk(ast.parse(source)):
146 146 if isinstance(node, ast.Import):
147 147 from_stdlib = {}
148 148 for n in node.names:
149 149 from_stdlib[n.name] = n.name in stdlib_modules
150 150 num_std = len([x for x in from_stdlib.values() if x])
151 151 if num_std not in (len(from_stdlib.values()), 0):
152 152 yield ('mixed stdlib and relative imports:\n %s' %
153 153 ', '.join(sorted(from_stdlib.iterkeys())))
154 154
155 155 class CircularImport(Exception):
156 156 pass
157 157
158 158
159 159 def cyclekey(names):
160 160 return tuple(sorted(set(names)))
161 161
162 162 def check_one_mod(mod, imports, path=None, ignore=None):
163 163 if path is None:
164 164 path = []
165 165 if ignore is None:
166 166 ignore = []
167 167 path = path + [mod]
168 168 for i in sorted(imports.get(mod, [])):
169 169 if i not in stdlib_modules:
170 170 i = mod.rsplit('.', 1)[0] + '.' + i
171 171 if i in path:
172 172 firstspot = path.index(i)
173 173 cycle = path[firstspot:] + [i]
174 174 if cyclekey(cycle) not in ignore:
175 175 raise CircularImport(cycle)
176 176 continue
177 177 check_one_mod(i, imports, path=path, ignore=ignore)
178 178
179 179 def rotatecycle(cycle):
180 180 """arrange a cycle so that the lexicographically first module listed first
181 181
182 182 >>> rotatecycle(['foo', 'bar', 'foo'])
183 183 ['bar', 'foo', 'bar']
184 184 """
185 185 lowest = min(cycle)
186 186 idx = cycle.index(lowest)
187 187 return cycle[idx:] + cycle[1:idx] + [lowest]
188 188
189 189 def find_cycles(imports):
190 190 """Find cycles in an already-loaded import graph.
191 191
192 192 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
193 193 ... 'top.bar': ['baz', 'sys'],
194 194 ... 'top.baz': ['foo'],
195 195 ... 'top.qux': ['foo']}
196 196 >>> print '\\n'.join(sorted(find_cycles(imports)))
197 197 top.bar -> top.baz -> top.foo -> top.bar -> top.bar
198 198 top.foo -> top.qux -> top.foo -> top.foo
199 199 """
200 200 cycles = {}
201 201 for mod in sorted(imports.iterkeys()):
202 202 try:
203 203 check_one_mod(mod, imports, ignore=cycles)
204 204 except CircularImport, e:
205 205 cycle = e.args[0]
206 206 cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle))
207 207 return cycles.values()
208 208
209 209 def _cycle_sortkey(c):
210 210 return len(c), c
211 211
212 212 def main(argv):
213 213 if len(argv) < 2:
214 214 print 'Usage: %s file [file] [file] ...'
215 215 return 1
216 216 used_imports = {}
217 217 any_errors = False
218 218 for source_path in argv[1:]:
219 219 f = open(source_path)
220 220 modname = dotted_name_of_path(source_path)
221 221 src = f.read()
222 222 used_imports[modname] = sorted(
223 223 imported_modules(src, ignore_nested=True))
224 224 for error in verify_stdlib_on_own_line(src):
225 225 any_errors = True
226 226 print source_path, error
227 227 f.close()
228 228 cycles = find_cycles(used_imports)
229 229 if cycles:
230 230 firstmods = set()
231 231 for c in sorted(cycles, key=_cycle_sortkey):
232 232 first = c.split()[0]
233 233 # As a rough cut, ignore any cycle that starts with the
234 234 # same module as some other cycle. Otherwise we see lots
235 235 # of cycles that are effectively duplicates.
236 236 if first in firstmods:
237 237 continue
238 238 print 'Import cycle:', c
239 239 firstmods.add(first)
240 240 any_errors = True
241 241 return not any_errors
242 242
243 243 if __name__ == '__main__':
244 244 sys.exit(int(main(sys.argv)))
General Comments 0
You need to be logged in to leave comments. Login now