##// END OF EJS Templates
import-checker: track SyntaxErrors...
timeless -
r28920:cdf331b5 default
parent child Browse files
Show More
@@ -1,612 +1,617
1 1 #!/usr/bin/env python
2 2
3 3 from __future__ import absolute_import, print_function
4 4
5 5 import ast
6 6 import collections
7 7 import os
8 8 import sys
9 9
10 10 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
11 11 # to work when run from a virtualenv. The modules were chosen empirically
12 12 # so that the return value matches the return value without virtualenv.
13 13 import BaseHTTPServer
14 14 import zlib
15 15
16 16 # Whitelist of modules that symbols can be directly imported from.
17 17 allowsymbolimports = (
18 18 '__future__',
19 19 'mercurial.hgweb.common',
20 20 'mercurial.hgweb.request',
21 21 'mercurial.i18n',
22 22 'mercurial.node',
23 23 )
24 24
25 25 # Modules that must be aliased because they are commonly confused with
26 26 # common variables and can create aliasing and readability issues.
27 27 requirealias = {
28 28 'ui': 'uimod',
29 29 }
30 30
31 31 def usingabsolute(root):
32 32 """Whether absolute imports are being used."""
33 33 if sys.version_info[0] >= 3:
34 34 return True
35 35
36 36 for node in ast.walk(root):
37 37 if isinstance(node, ast.ImportFrom):
38 38 if node.module == '__future__':
39 39 for n in node.names:
40 40 if n.name == 'absolute_import':
41 41 return True
42 42
43 43 return False
44 44
45 45 def walklocal(root):
46 46 """Recursively yield all descendant nodes but not in a different scope"""
47 47 todo = collections.deque(ast.iter_child_nodes(root))
48 48 yield root, False
49 49 while todo:
50 50 node = todo.popleft()
51 51 newscope = isinstance(node, ast.FunctionDef)
52 52 if not newscope:
53 53 todo.extend(ast.iter_child_nodes(node))
54 54 yield node, newscope
55 55
56 56 def dotted_name_of_path(path, trimpure=False):
57 57 """Given a relative path to a source file, return its dotted module name.
58 58
59 59 >>> dotted_name_of_path('mercurial/error.py')
60 60 'mercurial.error'
61 61 >>> dotted_name_of_path('mercurial/pure/parsers.py', trimpure=True)
62 62 'mercurial.parsers'
63 63 >>> dotted_name_of_path('zlibmodule.so')
64 64 'zlib'
65 65 """
66 66 parts = path.replace(os.sep, '/').split('/')
67 67 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
68 68 if parts[-1].endswith('module'):
69 69 parts[-1] = parts[-1][:-6]
70 70 if trimpure:
71 71 return '.'.join(p for p in parts if p != 'pure')
72 72 return '.'.join(parts)
73 73
74 74 def fromlocalfunc(modulename, localmods):
75 75 """Get a function to examine which locally defined module the
76 76 target source imports via a specified name.
77 77
78 78 `modulename` is an `dotted_name_of_path()`-ed source file path,
79 79 which may have `.__init__` at the end of it, of the target source.
80 80
81 81 `localmods` is a dict (or set), of which key is an absolute
82 82 `dotted_name_of_path()`-ed source file path of locally defined (=
83 83 Mercurial specific) modules.
84 84
85 85 This function assumes that module names not existing in
86 86 `localmods` are from the Python standard library.
87 87
88 88 This function returns the function, which takes `name` argument,
89 89 and returns `(absname, dottedpath, hassubmod)` tuple if `name`
90 90 matches against locally defined module. Otherwise, it returns
91 91 False.
92 92
93 93 It is assumed that `name` doesn't have `.__init__`.
94 94
95 95 `absname` is an absolute module name of specified `name`
96 96 (e.g. "hgext.convert"). This can be used to compose prefix for sub
97 97 modules or so.
98 98
99 99 `dottedpath` is a `dotted_name_of_path()`-ed source file path
100 100 (e.g. "hgext.convert.__init__") of `name`. This is used to look
101 101 module up in `localmods` again.
102 102
103 103 `hassubmod` is whether it may have sub modules under it (for
104 104 convenient, even though this is also equivalent to "absname !=
105 105 dottednpath")
106 106
107 107 >>> localmods = {'foo.__init__': True, 'foo.foo1': True,
108 108 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
109 109 ... 'baz.__init__': True, 'baz.baz1': True }
110 110 >>> fromlocal = fromlocalfunc('foo.xxx', localmods)
111 111 >>> # relative
112 112 >>> fromlocal('foo1')
113 113 ('foo.foo1', 'foo.foo1', False)
114 114 >>> fromlocal('bar')
115 115 ('foo.bar', 'foo.bar.__init__', True)
116 116 >>> fromlocal('bar.bar1')
117 117 ('foo.bar.bar1', 'foo.bar.bar1', False)
118 118 >>> # absolute
119 119 >>> fromlocal('baz')
120 120 ('baz', 'baz.__init__', True)
121 121 >>> fromlocal('baz.baz1')
122 122 ('baz.baz1', 'baz.baz1', False)
123 123 >>> # unknown = maybe standard library
124 124 >>> fromlocal('os')
125 125 False
126 126 >>> fromlocal(None, 1)
127 127 ('foo', 'foo.__init__', True)
128 128 >>> fromlocal2 = fromlocalfunc('foo.xxx.yyy', localmods)
129 129 >>> fromlocal2(None, 2)
130 130 ('foo', 'foo.__init__', True)
131 131 """
132 132 prefix = '.'.join(modulename.split('.')[:-1])
133 133 if prefix:
134 134 prefix += '.'
135 135 def fromlocal(name, level=0):
136 136 # name is None when relative imports are used.
137 137 if name is None:
138 138 # If relative imports are used, level must not be absolute.
139 139 assert level > 0
140 140 candidates = ['.'.join(modulename.split('.')[:-level])]
141 141 else:
142 142 # Check relative name first.
143 143 candidates = [prefix + name, name]
144 144
145 145 for n in candidates:
146 146 if n in localmods:
147 147 return (n, n, False)
148 148 dottedpath = n + '.__init__'
149 149 if dottedpath in localmods:
150 150 return (n, dottedpath, True)
151 151 return False
152 152 return fromlocal
153 153
154 154 def list_stdlib_modules():
155 155 """List the modules present in the stdlib.
156 156
157 157 >>> mods = set(list_stdlib_modules())
158 158 >>> 'BaseHTTPServer' in mods
159 159 True
160 160
161 161 os.path isn't really a module, so it's missing:
162 162
163 163 >>> 'os.path' in mods
164 164 False
165 165
166 166 sys requires special treatment, because it's baked into the
167 167 interpreter, but it should still appear:
168 168
169 169 >>> 'sys' in mods
170 170 True
171 171
172 172 >>> 'collections' in mods
173 173 True
174 174
175 175 >>> 'cStringIO' in mods
176 176 True
177 177 """
178 178 for m in sys.builtin_module_names:
179 179 yield m
180 180 # These modules only exist on windows, but we should always
181 181 # consider them stdlib.
182 182 for m in ['msvcrt', '_winreg']:
183 183 yield m
184 184 yield 'builtins' # python3 only
185 185 for m in 'fcntl', 'grp', 'pwd', 'termios': # Unix only
186 186 yield m
187 187 for m in 'cPickle', 'datetime': # in Python (not C) on PyPy
188 188 yield m
189 189 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
190 190 # We need to supplement the list of prefixes for the search to work
191 191 # when run from within a virtualenv.
192 192 for mod in (BaseHTTPServer, zlib):
193 193 try:
194 194 # Not all module objects have a __file__ attribute.
195 195 filename = mod.__file__
196 196 except AttributeError:
197 197 continue
198 198 dirname = os.path.dirname(filename)
199 199 for prefix in stdlib_prefixes:
200 200 if dirname.startswith(prefix):
201 201 # Then this directory is redundant.
202 202 break
203 203 else:
204 204 stdlib_prefixes.add(dirname)
205 205 for libpath in sys.path:
206 206 # We want to walk everything in sys.path that starts with
207 207 # something in stdlib_prefixes.
208 208 if not any(libpath.startswith(p) for p in stdlib_prefixes):
209 209 continue
210 210 for top, dirs, files in os.walk(libpath):
211 211 for i, d in reversed(list(enumerate(dirs))):
212 212 if (not os.path.exists(os.path.join(top, d, '__init__.py'))
213 213 or top == libpath and d in ('hgext', 'mercurial')):
214 214 del dirs[i]
215 215 for name in files:
216 216 if not name.endswith(('.py', '.so', '.pyc', '.pyo', '.pyd')):
217 217 continue
218 218 if name.startswith('__init__.py'):
219 219 full_path = top
220 220 else:
221 221 full_path = os.path.join(top, name)
222 222 rel_path = full_path[len(libpath) + 1:]
223 223 mod = dotted_name_of_path(rel_path)
224 224 yield mod
225 225
226 226 stdlib_modules = set(list_stdlib_modules())
227 227
228 228 def imported_modules(source, modulename, localmods, ignore_nested=False):
229 229 """Given the source of a file as a string, yield the names
230 230 imported by that file.
231 231
232 232 Args:
233 233 source: The python source to examine as a string.
234 234 modulename: of specified python source (may have `__init__`)
235 235 localmods: dict of locally defined module names (may have `__init__`)
236 236 ignore_nested: If true, import statements that do not start in
237 237 column zero will be ignored.
238 238
239 239 Returns:
240 240 A list of absolute module names imported by the given source.
241 241
242 242 >>> modulename = 'foo.xxx'
243 243 >>> localmods = {'foo.__init__': True,
244 244 ... 'foo.foo1': True, 'foo.foo2': True,
245 245 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
246 246 ... 'baz.__init__': True, 'baz.baz1': True }
247 247 >>> # standard library (= not locally defined ones)
248 248 >>> sorted(imported_modules(
249 249 ... 'from stdlib1 import foo, bar; import stdlib2',
250 250 ... modulename, localmods))
251 251 []
252 252 >>> # relative importing
253 253 >>> sorted(imported_modules(
254 254 ... 'import foo1; from bar import bar1',
255 255 ... modulename, localmods))
256 256 ['foo.bar.bar1', 'foo.foo1']
257 257 >>> sorted(imported_modules(
258 258 ... 'from bar.bar1 import name1, name2, name3',
259 259 ... modulename, localmods))
260 260 ['foo.bar.bar1']
261 261 >>> # absolute importing
262 262 >>> sorted(imported_modules(
263 263 ... 'from baz import baz1, name1',
264 264 ... modulename, localmods))
265 265 ['baz.__init__', 'baz.baz1']
266 266 >>> # mixed importing, even though it shouldn't be recommended
267 267 >>> sorted(imported_modules(
268 268 ... 'import stdlib, foo1, baz',
269 269 ... modulename, localmods))
270 270 ['baz.__init__', 'foo.foo1']
271 271 >>> # ignore_nested
272 272 >>> sorted(imported_modules(
273 273 ... '''import foo
274 274 ... def wat():
275 275 ... import bar
276 276 ... ''', modulename, localmods))
277 277 ['foo.__init__', 'foo.bar.__init__']
278 278 >>> sorted(imported_modules(
279 279 ... '''import foo
280 280 ... def wat():
281 281 ... import bar
282 282 ... ''', modulename, localmods, ignore_nested=True))
283 283 ['foo.__init__']
284 284 """
285 285 fromlocal = fromlocalfunc(modulename, localmods)
286 286 for node in ast.walk(ast.parse(source)):
287 287 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
288 288 continue
289 289 if isinstance(node, ast.Import):
290 290 for n in node.names:
291 291 found = fromlocal(n.name)
292 292 if not found:
293 293 # this should import standard library
294 294 continue
295 295 yield found[1]
296 296 elif isinstance(node, ast.ImportFrom):
297 297 found = fromlocal(node.module, node.level)
298 298 if not found:
299 299 # this should import standard library
300 300 continue
301 301
302 302 absname, dottedpath, hassubmod = found
303 303 if not hassubmod:
304 304 # "dottedpath" is not a package; must be imported
305 305 yield dottedpath
306 306 # examination of "node.names" should be redundant
307 307 # e.g.: from mercurial.node import nullid, nullrev
308 308 continue
309 309
310 310 modnotfound = False
311 311 prefix = absname + '.'
312 312 for n in node.names:
313 313 found = fromlocal(prefix + n.name)
314 314 if not found:
315 315 # this should be a function or a property of "node.module"
316 316 modnotfound = True
317 317 continue
318 318 yield found[1]
319 319 if modnotfound:
320 320 # "dottedpath" is a package, but imported because of non-module
321 321 # lookup
322 322 yield dottedpath
323 323
324 324 def verify_import_convention(module, source, localmods):
325 325 """Verify imports match our established coding convention.
326 326
327 327 We have 2 conventions: legacy and modern. The modern convention is in
328 328 effect when using absolute imports.
329 329
330 330 The legacy convention only looks for mixed imports. The modern convention
331 331 is much more thorough.
332 332 """
333 333 root = ast.parse(source)
334 334 absolute = usingabsolute(root)
335 335
336 336 if absolute:
337 337 return verify_modern_convention(module, root, localmods)
338 338 else:
339 339 return verify_stdlib_on_own_line(root)
340 340
341 341 def verify_modern_convention(module, root, localmods, root_col_offset=0):
342 342 """Verify a file conforms to the modern import convention rules.
343 343
344 344 The rules of the modern convention are:
345 345
346 346 * Ordering is stdlib followed by local imports. Each group is lexically
347 347 sorted.
348 348 * Importing multiple modules via "import X, Y" is not allowed: use
349 349 separate import statements.
350 350 * Importing multiple modules via "from X import ..." is allowed if using
351 351 parenthesis and one entry per line.
352 352 * Only 1 relative import statement per import level ("from .", "from ..")
353 353 is allowed.
354 354 * Relative imports from higher levels must occur before lower levels. e.g.
355 355 "from .." must be before "from .".
356 356 * Imports from peer packages should use relative import (e.g. do not
357 357 "import mercurial.foo" from a "mercurial.*" module).
358 358 * Symbols can only be imported from specific modules (see
359 359 `allowsymbolimports`). For other modules, first import the module then
360 360 assign the symbol to a module-level variable. In addition, these imports
361 361 must be performed before other relative imports. This rule only
362 362 applies to import statements outside of any blocks.
363 363 * Relative imports from the standard library are not allowed.
364 364 * Certain modules must be aliased to alternate names to avoid aliasing
365 365 and readability problems. See `requirealias`.
366 366 """
367 367 topmodule = module.split('.')[0]
368 368 fromlocal = fromlocalfunc(module, localmods)
369 369
370 370 # Whether a local/non-stdlib import has been performed.
371 371 seenlocal = None
372 372 # Whether a relative, non-symbol import has been seen.
373 373 seennonsymbolrelative = False
374 374 # The last name to be imported (for sorting).
375 375 lastname = None
376 376 # Relative import levels encountered so far.
377 377 seenlevels = set()
378 378
379 379 for node, newscope in walklocal(root):
380 380 def msg(fmt, *args):
381 381 return (fmt % args, node.lineno)
382 382 if newscope:
383 383 # Check for local imports in function
384 384 for r in verify_modern_convention(module, node, localmods,
385 385 node.col_offset + 4):
386 386 yield r
387 387 elif isinstance(node, ast.Import):
388 388 # Disallow "import foo, bar" and require separate imports
389 389 # for each module.
390 390 if len(node.names) > 1:
391 391 yield msg('multiple imported names: %s',
392 392 ', '.join(n.name for n in node.names))
393 393
394 394 name = node.names[0].name
395 395 asname = node.names[0].asname
396 396
397 397 # Ignore sorting rules on imports inside blocks.
398 398 if node.col_offset == root_col_offset:
399 399 if lastname and name < lastname:
400 400 yield msg('imports not lexically sorted: %s < %s',
401 401 name, lastname)
402 402
403 403 lastname = name
404 404
405 405 # stdlib imports should be before local imports.
406 406 stdlib = name in stdlib_modules
407 407 if stdlib and seenlocal and node.col_offset == root_col_offset:
408 408 yield msg('stdlib import "%s" follows local import: %s',
409 409 name, seenlocal)
410 410
411 411 if not stdlib:
412 412 seenlocal = name
413 413
414 414 # Import of sibling modules should use relative imports.
415 415 topname = name.split('.')[0]
416 416 if topname == topmodule:
417 417 yield msg('import should be relative: %s', name)
418 418
419 419 if name in requirealias and asname != requirealias[name]:
420 420 yield msg('%s module must be "as" aliased to %s',
421 421 name, requirealias[name])
422 422
423 423 elif isinstance(node, ast.ImportFrom):
424 424 # Resolve the full imported module name.
425 425 if node.level > 0:
426 426 fullname = '.'.join(module.split('.')[:-node.level])
427 427 if node.module:
428 428 fullname += '.%s' % node.module
429 429 else:
430 430 assert node.module
431 431 fullname = node.module
432 432
433 433 topname = fullname.split('.')[0]
434 434 if topname == topmodule:
435 435 yield msg('import should be relative: %s', fullname)
436 436
437 437 # __future__ is special since it needs to come first and use
438 438 # symbol import.
439 439 if fullname != '__future__':
440 440 if not fullname or fullname in stdlib_modules:
441 441 yield msg('relative import of stdlib module')
442 442 else:
443 443 seenlocal = fullname
444 444
445 445 # Direct symbol import is only allowed from certain modules and
446 446 # must occur before non-symbol imports.
447 447 if node.module and node.col_offset == root_col_offset:
448 448 found = fromlocal(node.module, node.level)
449 449 if found and found[2]: # node.module is a package
450 450 prefix = found[0] + '.'
451 451 symbols = [n.name for n in node.names
452 452 if not fromlocal(prefix + n.name)]
453 453 else:
454 454 symbols = [n.name for n in node.names]
455 455
456 456 if symbols and fullname not in allowsymbolimports:
457 457 yield msg('direct symbol import %s from %s',
458 458 ', '.join(symbols), fullname)
459 459
460 460 if symbols and seennonsymbolrelative:
461 461 yield msg('symbol import follows non-symbol import: %s',
462 462 fullname)
463 463
464 464 if not node.module:
465 465 assert node.level
466 466 seennonsymbolrelative = True
467 467
468 468 # Only allow 1 group per level.
469 469 if (node.level in seenlevels
470 470 and node.col_offset == root_col_offset):
471 471 yield msg('multiple "from %s import" statements',
472 472 '.' * node.level)
473 473
474 474 # Higher-level groups come before lower-level groups.
475 475 if any(node.level > l for l in seenlevels):
476 476 yield msg('higher-level import should come first: %s',
477 477 fullname)
478 478
479 479 seenlevels.add(node.level)
480 480
481 481 # Entries in "from .X import ( ... )" lists must be lexically
482 482 # sorted.
483 483 lastentryname = None
484 484
485 485 for n in node.names:
486 486 if lastentryname and n.name < lastentryname:
487 487 yield msg('imports from %s not lexically sorted: %s < %s',
488 488 fullname, n.name, lastentryname)
489 489
490 490 lastentryname = n.name
491 491
492 492 if n.name in requirealias and n.asname != requirealias[n.name]:
493 493 yield msg('%s from %s must be "as" aliased to %s',
494 494 n.name, fullname, requirealias[n.name])
495 495
496 496 def verify_stdlib_on_own_line(root):
497 497 """Given some python source, verify that stdlib imports are done
498 498 in separate statements from relative local module imports.
499 499
500 500 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, foo')))
501 501 [('mixed imports\\n stdlib: sys\\n relative: foo', 1)]
502 502 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, os')))
503 503 []
504 504 >>> list(verify_stdlib_on_own_line(ast.parse('import foo, bar')))
505 505 []
506 506 """
507 507 for node in ast.walk(root):
508 508 if isinstance(node, ast.Import):
509 509 from_stdlib = {False: [], True: []}
510 510 for n in node.names:
511 511 from_stdlib[n.name in stdlib_modules].append(n.name)
512 512 if from_stdlib[True] and from_stdlib[False]:
513 513 yield ('mixed imports\n stdlib: %s\n relative: %s' %
514 514 (', '.join(sorted(from_stdlib[True])),
515 515 ', '.join(sorted(from_stdlib[False]))), node.lineno)
516 516
517 517 class CircularImport(Exception):
518 518 pass
519 519
520 520 def checkmod(mod, imports):
521 521 shortest = {}
522 522 visit = [[mod]]
523 523 while visit:
524 524 path = visit.pop(0)
525 525 for i in sorted(imports.get(path[-1], [])):
526 526 if len(path) < shortest.get(i, 1000):
527 527 shortest[i] = len(path)
528 528 if i in path:
529 529 if i == path[0]:
530 530 raise CircularImport(path)
531 531 continue
532 532 visit.append(path + [i])
533 533
534 534 def rotatecycle(cycle):
535 535 """arrange a cycle so that the lexicographically first module listed first
536 536
537 537 >>> rotatecycle(['foo', 'bar'])
538 538 ['bar', 'foo', 'bar']
539 539 """
540 540 lowest = min(cycle)
541 541 idx = cycle.index(lowest)
542 542 return cycle[idx:] + cycle[:idx] + [lowest]
543 543
544 544 def find_cycles(imports):
545 545 """Find cycles in an already-loaded import graph.
546 546
547 547 All module names recorded in `imports` should be absolute one.
548 548
549 549 >>> from __future__ import print_function
550 550 >>> imports = {'top.foo': ['top.bar', 'os.path', 'top.qux'],
551 551 ... 'top.bar': ['top.baz', 'sys'],
552 552 ... 'top.baz': ['top.foo'],
553 553 ... 'top.qux': ['top.foo']}
554 554 >>> print('\\n'.join(sorted(find_cycles(imports))))
555 555 top.bar -> top.baz -> top.foo -> top.bar
556 556 top.foo -> top.qux -> top.foo
557 557 """
558 558 cycles = set()
559 559 for mod in sorted(imports.keys()):
560 560 try:
561 561 checkmod(mod, imports)
562 562 except CircularImport as e:
563 563 cycle = e.args[0]
564 564 cycles.add(" -> ".join(rotatecycle(cycle)))
565 565 return cycles
566 566
567 567 def _cycle_sortkey(c):
568 568 return len(c), c
569 569
570 570 def sources(f, modname):
571 571 if f.endswith('.py'):
572 572 with open(f) as src:
573 573 yield src.read(), modname
574 574
575 575 def main(argv):
576 576 if len(argv) < 2 or (argv[1] == '-' and len(argv) > 2):
577 577 print('Usage: %s {-|file [file] [file] ...}')
578 578 return 1
579 579 if argv[1] == '-':
580 580 argv = argv[:1]
581 581 argv.extend(l.rstrip() for l in sys.stdin.readlines())
582 582 localmods = {}
583 583 used_imports = {}
584 584 any_errors = False
585 585 for source_path in argv[1:]:
586 586 modname = dotted_name_of_path(source_path, trimpure=True)
587 587 localmods[modname] = source_path
588 588 for localmodname, source_path in sorted(localmods.items()):
589 589 for src, modname in sources(source_path, localmodname):
590 try:
590 591 used_imports[modname] = sorted(
591 imported_modules(src, modname, localmods, ignore_nested=True))
592 imported_modules(src, modname, localmods,
593 ignore_nested=True))
592 594 for error, lineno in verify_import_convention(modname, src,
593 595 localmods):
594 596 any_errors = True
595 597 print('%s:%d: %s' % (source_path, lineno, error))
598 except SyntaxError as e:
599 print('%s:%d: SyntaxError: %s' %
600 (source_path, e.lineno, e))
596 601 cycles = find_cycles(used_imports)
597 602 if cycles:
598 603 firstmods = set()
599 604 for c in sorted(cycles, key=_cycle_sortkey):
600 605 first = c.split()[0]
601 606 # As a rough cut, ignore any cycle that starts with the
602 607 # same module as some other cycle. Otherwise we see lots
603 608 # of cycles that are effectively duplicates.
604 609 if first in firstmods:
605 610 continue
606 611 print('Import cycle:', c)
607 612 firstmods.add(first)
608 613 any_errors = True
609 614 return any_errors != 0
610 615
611 616 if __name__ == '__main__':
612 617 sys.exit(int(main(sys.argv)))
General Comments 0
You need to be logged in to leave comments. Login now