##// END OF EJS Templates
import-checker: list package directory as stdlib module...
Yuya Nishihara -
r27621:39845b06 default
parent child Browse files
Show More
@@ -1,610 +1,608
1 1 #!/usr/bin/env python
2 2
3 3 import ast
4 4 import collections
5 5 import os
6 6 import sys
7 7
8 8 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
9 9 # to work when run from a virtualenv. The modules were chosen empirically
10 10 # so that the return value matches the return value without virtualenv.
11 11 import BaseHTTPServer
12 12 import zlib
13 13
14 14 # Whitelist of modules that symbols can be directly imported from.
15 15 allowsymbolimports = (
16 16 '__future__',
17 17 'mercurial.hgweb.common',
18 18 'mercurial.hgweb.request',
19 19 'mercurial.i18n',
20 20 'mercurial.node',
21 21 )
22 22
23 23 # Modules that must be aliased because they are commonly confused with
24 24 # common variables and can create aliasing and readability issues.
25 25 requirealias = {
26 26 'ui': 'uimod',
27 27 }
28 28
29 29 def usingabsolute(root):
30 30 """Whether absolute imports are being used."""
31 31 if sys.version_info[0] >= 3:
32 32 return True
33 33
34 34 for node in ast.walk(root):
35 35 if isinstance(node, ast.ImportFrom):
36 36 if node.module == '__future__':
37 37 for n in node.names:
38 38 if n.name == 'absolute_import':
39 39 return True
40 40
41 41 return False
42 42
43 43 def walklocal(root):
44 44 """Recursively yield all descendant nodes but not in a different scope"""
45 45 todo = collections.deque(ast.iter_child_nodes(root))
46 46 yield root, False
47 47 while todo:
48 48 node = todo.popleft()
49 49 newscope = isinstance(node, ast.FunctionDef)
50 50 if not newscope:
51 51 todo.extend(ast.iter_child_nodes(node))
52 52 yield node, newscope
53 53
54 54 def dotted_name_of_path(path, trimpure=False):
55 55 """Given a relative path to a source file, return its dotted module name.
56 56
57 57 >>> dotted_name_of_path('mercurial/error.py')
58 58 'mercurial.error'
59 59 >>> dotted_name_of_path('mercurial/pure/parsers.py', trimpure=True)
60 60 'mercurial.parsers'
61 61 >>> dotted_name_of_path('zlibmodule.so')
62 62 'zlib'
63 63 """
64 64 parts = path.replace(os.sep, '/').split('/')
65 65 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
66 66 if parts[-1].endswith('module'):
67 67 parts[-1] = parts[-1][:-6]
68 68 if trimpure:
69 69 return '.'.join(p for p in parts if p != 'pure')
70 70 return '.'.join(parts)
71 71
72 72 def fromlocalfunc(modulename, localmods):
73 73 """Get a function to examine which locally defined module the
74 74 target source imports via a specified name.
75 75
76 76 `modulename` is an `dotted_name_of_path()`-ed source file path,
77 77 which may have `.__init__` at the end of it, of the target source.
78 78
79 79 `localmods` is a dict (or set), of which key is an absolute
80 80 `dotted_name_of_path()`-ed source file path of locally defined (=
81 81 Mercurial specific) modules.
82 82
83 83 This function assumes that module names not existing in
84 84 `localmods` are from the Python standard library.
85 85
86 86 This function returns the function, which takes `name` argument,
87 87 and returns `(absname, dottedpath, hassubmod)` tuple if `name`
88 88 matches against locally defined module. Otherwise, it returns
89 89 False.
90 90
91 91 It is assumed that `name` doesn't have `.__init__`.
92 92
93 93 `absname` is an absolute module name of specified `name`
94 94 (e.g. "hgext.convert"). This can be used to compose prefix for sub
95 95 modules or so.
96 96
97 97 `dottedpath` is a `dotted_name_of_path()`-ed source file path
98 98 (e.g. "hgext.convert.__init__") of `name`. This is used to look
99 99 module up in `localmods` again.
100 100
101 101 `hassubmod` is whether it may have sub modules under it (for
102 102 convenient, even though this is also equivalent to "absname !=
103 103 dottednpath")
104 104
105 105 >>> localmods = {'foo.__init__': True, 'foo.foo1': True,
106 106 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
107 107 ... 'baz.__init__': True, 'baz.baz1': True }
108 108 >>> fromlocal = fromlocalfunc('foo.xxx', localmods)
109 109 >>> # relative
110 110 >>> fromlocal('foo1')
111 111 ('foo.foo1', 'foo.foo1', False)
112 112 >>> fromlocal('bar')
113 113 ('foo.bar', 'foo.bar.__init__', True)
114 114 >>> fromlocal('bar.bar1')
115 115 ('foo.bar.bar1', 'foo.bar.bar1', False)
116 116 >>> # absolute
117 117 >>> fromlocal('baz')
118 118 ('baz', 'baz.__init__', True)
119 119 >>> fromlocal('baz.baz1')
120 120 ('baz.baz1', 'baz.baz1', False)
121 121 >>> # unknown = maybe standard library
122 122 >>> fromlocal('os')
123 123 False
124 124 >>> fromlocal(None, 1)
125 125 ('foo', 'foo.__init__', True)
126 126 >>> fromlocal2 = fromlocalfunc('foo.xxx.yyy', localmods)
127 127 >>> fromlocal2(None, 2)
128 128 ('foo', 'foo.__init__', True)
129 129 """
130 130 prefix = '.'.join(modulename.split('.')[:-1])
131 131 if prefix:
132 132 prefix += '.'
133 133 def fromlocal(name, level=0):
134 134 # name is None when relative imports are used.
135 135 if name is None:
136 136 # If relative imports are used, level must not be absolute.
137 137 assert level > 0
138 138 candidates = ['.'.join(modulename.split('.')[:-level])]
139 139 else:
140 140 # Check relative name first.
141 141 candidates = [prefix + name, name]
142 142
143 143 for n in candidates:
144 144 if n in localmods:
145 145 return (n, n, False)
146 146 dottedpath = n + '.__init__'
147 147 if dottedpath in localmods:
148 148 return (n, dottedpath, True)
149 149 return False
150 150 return fromlocal
151 151
152 152 def list_stdlib_modules():
153 153 """List the modules present in the stdlib.
154 154
155 155 >>> mods = set(list_stdlib_modules())
156 156 >>> 'BaseHTTPServer' in mods
157 157 True
158 158
159 159 os.path isn't really a module, so it's missing:
160 160
161 161 >>> 'os.path' in mods
162 162 False
163 163
164 164 sys requires special treatment, because it's baked into the
165 165 interpreter, but it should still appear:
166 166
167 167 >>> 'sys' in mods
168 168 True
169 169
170 170 >>> 'collections' in mods
171 171 True
172 172
173 173 >>> 'cStringIO' in mods
174 174 True
175 175 """
176 176 for m in sys.builtin_module_names:
177 177 yield m
178 178 # These modules only exist on windows, but we should always
179 179 # consider them stdlib.
180 180 for m in ['msvcrt', '_winreg']:
181 181 yield m
182 # These get missed too
183 for m in 'ctypes', 'email', 'logging', 'multiprocessing':
184 yield m
185 182 yield 'builtins' # python3 only
186 183 for m in 'fcntl', 'grp', 'pwd', 'termios': # Unix only
187 184 yield m
188 185 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
189 186 # We need to supplement the list of prefixes for the search to work
190 187 # when run from within a virtualenv.
191 188 for mod in (BaseHTTPServer, zlib):
192 189 try:
193 190 # Not all module objects have a __file__ attribute.
194 191 filename = mod.__file__
195 192 except AttributeError:
196 193 continue
197 194 dirname = os.path.dirname(filename)
198 195 for prefix in stdlib_prefixes:
199 196 if dirname.startswith(prefix):
200 197 # Then this directory is redundant.
201 198 break
202 199 else:
203 200 stdlib_prefixes.add(dirname)
204 201 for libpath in sys.path:
205 202 # We want to walk everything in sys.path that starts with
206 203 # something in stdlib_prefixes. check-code suppressed because
207 204 # the ast module used by this script implies the availability
208 205 # of any().
209 206 if not any(libpath.startswith(p) for p in stdlib_prefixes): # no-py24
210 207 continue
211 208 for top, dirs, files in os.walk(libpath):
212 209 for i, d in reversed(list(enumerate(dirs))):
213 210 if (not os.path.exists(os.path.join(top, d, '__init__.py'))
214 211 or top == libpath and d in ('hgext', 'mercurial')):
215 212 del dirs[i]
216 213 for name in files:
217 if name == '__init__.py':
218 continue
219 214 if not name.endswith(('.py', '.so', '.pyc', '.pyo', '.pyd')):
220 215 continue
216 if name.startswith('__init__.py'):
217 full_path = top
218 else:
221 219 full_path = os.path.join(top, name)
222 220 rel_path = full_path[len(libpath) + 1:]
223 221 mod = dotted_name_of_path(rel_path)
224 222 yield mod
225 223
226 224 stdlib_modules = set(list_stdlib_modules())
227 225
228 226 def imported_modules(source, modulename, localmods, ignore_nested=False):
229 227 """Given the source of a file as a string, yield the names
230 228 imported by that file.
231 229
232 230 Args:
233 231 source: The python source to examine as a string.
234 232 modulename: of specified python source (may have `__init__`)
235 233 localmods: dict of locally defined module names (may have `__init__`)
236 234 ignore_nested: If true, import statements that do not start in
237 235 column zero will be ignored.
238 236
239 237 Returns:
240 238 A list of absolute module names imported by the given source.
241 239
242 240 >>> modulename = 'foo.xxx'
243 241 >>> localmods = {'foo.__init__': True,
244 242 ... 'foo.foo1': True, 'foo.foo2': True,
245 243 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
246 244 ... 'baz.__init__': True, 'baz.baz1': True }
247 245 >>> # standard library (= not locally defined ones)
248 246 >>> sorted(imported_modules(
249 247 ... 'from stdlib1 import foo, bar; import stdlib2',
250 248 ... modulename, localmods))
251 249 []
252 250 >>> # relative importing
253 251 >>> sorted(imported_modules(
254 252 ... 'import foo1; from bar import bar1',
255 253 ... modulename, localmods))
256 254 ['foo.bar.bar1', 'foo.foo1']
257 255 >>> sorted(imported_modules(
258 256 ... 'from bar.bar1 import name1, name2, name3',
259 257 ... modulename, localmods))
260 258 ['foo.bar.bar1']
261 259 >>> # absolute importing
262 260 >>> sorted(imported_modules(
263 261 ... 'from baz import baz1, name1',
264 262 ... modulename, localmods))
265 263 ['baz.__init__', 'baz.baz1']
266 264 >>> # mixed importing, even though it shouldn't be recommended
267 265 >>> sorted(imported_modules(
268 266 ... 'import stdlib, foo1, baz',
269 267 ... modulename, localmods))
270 268 ['baz.__init__', 'foo.foo1']
271 269 >>> # ignore_nested
272 270 >>> sorted(imported_modules(
273 271 ... '''import foo
274 272 ... def wat():
275 273 ... import bar
276 274 ... ''', modulename, localmods))
277 275 ['foo.__init__', 'foo.bar.__init__']
278 276 >>> sorted(imported_modules(
279 277 ... '''import foo
280 278 ... def wat():
281 279 ... import bar
282 280 ... ''', modulename, localmods, ignore_nested=True))
283 281 ['foo.__init__']
284 282 """
285 283 fromlocal = fromlocalfunc(modulename, localmods)
286 284 for node in ast.walk(ast.parse(source)):
287 285 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
288 286 continue
289 287 if isinstance(node, ast.Import):
290 288 for n in node.names:
291 289 found = fromlocal(n.name)
292 290 if not found:
293 291 # this should import standard library
294 292 continue
295 293 yield found[1]
296 294 elif isinstance(node, ast.ImportFrom):
297 295 found = fromlocal(node.module, node.level)
298 296 if not found:
299 297 # this should import standard library
300 298 continue
301 299
302 300 absname, dottedpath, hassubmod = found
303 301 if not hassubmod:
304 302 # "dottedpath" is not a package; must be imported
305 303 yield dottedpath
306 304 # examination of "node.names" should be redundant
307 305 # e.g.: from mercurial.node import nullid, nullrev
308 306 continue
309 307
310 308 modnotfound = False
311 309 prefix = absname + '.'
312 310 for n in node.names:
313 311 found = fromlocal(prefix + n.name)
314 312 if not found:
315 313 # this should be a function or a property of "node.module"
316 314 modnotfound = True
317 315 continue
318 316 yield found[1]
319 317 if modnotfound:
320 318 # "dottedpath" is a package, but imported because of non-module
321 319 # lookup
322 320 yield dottedpath
323 321
324 322 def verify_import_convention(module, source, localmods):
325 323 """Verify imports match our established coding convention.
326 324
327 325 We have 2 conventions: legacy and modern. The modern convention is in
328 326 effect when using absolute imports.
329 327
330 328 The legacy convention only looks for mixed imports. The modern convention
331 329 is much more thorough.
332 330 """
333 331 root = ast.parse(source)
334 332 absolute = usingabsolute(root)
335 333
336 334 if absolute:
337 335 return verify_modern_convention(module, root, localmods)
338 336 else:
339 337 return verify_stdlib_on_own_line(root)
340 338
341 339 def verify_modern_convention(module, root, localmods, root_col_offset=0):
342 340 """Verify a file conforms to the modern import convention rules.
343 341
344 342 The rules of the modern convention are:
345 343
346 344 * Ordering is stdlib followed by local imports. Each group is lexically
347 345 sorted.
348 346 * Importing multiple modules via "import X, Y" is not allowed: use
349 347 separate import statements.
350 348 * Importing multiple modules via "from X import ..." is allowed if using
351 349 parenthesis and one entry per line.
352 350 * Only 1 relative import statement per import level ("from .", "from ..")
353 351 is allowed.
354 352 * Relative imports from higher levels must occur before lower levels. e.g.
355 353 "from .." must be before "from .".
356 354 * Imports from peer packages should use relative import (e.g. do not
357 355 "import mercurial.foo" from a "mercurial.*" module).
358 356 * Symbols can only be imported from specific modules (see
359 357 `allowsymbolimports`). For other modules, first import the module then
360 358 assign the symbol to a module-level variable. In addition, these imports
361 359 must be performed before other relative imports. This rule only
362 360 applies to import statements outside of any blocks.
363 361 * Relative imports from the standard library are not allowed.
364 362 * Certain modules must be aliased to alternate names to avoid aliasing
365 363 and readability problems. See `requirealias`.
366 364 """
367 365 topmodule = module.split('.')[0]
368 366 fromlocal = fromlocalfunc(module, localmods)
369 367
370 368 # Whether a local/non-stdlib import has been performed.
371 369 seenlocal = False
372 370 # Whether a relative, non-symbol import has been seen.
373 371 seennonsymbolrelative = False
374 372 # The last name to be imported (for sorting).
375 373 lastname = None
376 374 # Relative import levels encountered so far.
377 375 seenlevels = set()
378 376
379 377 for node, newscope in walklocal(root):
380 378 def msg(fmt, *args):
381 379 return (fmt % args, node.lineno)
382 380 if newscope:
383 381 # Check for local imports in function
384 382 for r in verify_modern_convention(module, node, localmods,
385 383 node.col_offset + 4):
386 384 yield r
387 385 elif isinstance(node, ast.Import):
388 386 # Disallow "import foo, bar" and require separate imports
389 387 # for each module.
390 388 if len(node.names) > 1:
391 389 yield msg('multiple imported names: %s',
392 390 ', '.join(n.name for n in node.names))
393 391
394 392 name = node.names[0].name
395 393 asname = node.names[0].asname
396 394
397 395 # Ignore sorting rules on imports inside blocks.
398 396 if node.col_offset == root_col_offset:
399 397 if lastname and name < lastname:
400 398 yield msg('imports not lexically sorted: %s < %s',
401 399 name, lastname)
402 400
403 401 lastname = name
404 402
405 403 # stdlib imports should be before local imports.
406 404 stdlib = name in stdlib_modules
407 405 if stdlib and seenlocal and node.col_offset == root_col_offset:
408 406 yield msg('stdlib import follows local import: %s', name)
409 407
410 408 if not stdlib:
411 409 seenlocal = True
412 410
413 411 # Import of sibling modules should use relative imports.
414 412 topname = name.split('.')[0]
415 413 if topname == topmodule:
416 414 yield msg('import should be relative: %s', name)
417 415
418 416 if name in requirealias and asname != requirealias[name]:
419 417 yield msg('%s module must be "as" aliased to %s',
420 418 name, requirealias[name])
421 419
422 420 elif isinstance(node, ast.ImportFrom):
423 421 # Resolve the full imported module name.
424 422 if node.level > 0:
425 423 fullname = '.'.join(module.split('.')[:-node.level])
426 424 if node.module:
427 425 fullname += '.%s' % node.module
428 426 else:
429 427 assert node.module
430 428 fullname = node.module
431 429
432 430 topname = fullname.split('.')[0]
433 431 if topname == topmodule:
434 432 yield msg('import should be relative: %s', fullname)
435 433
436 434 # __future__ is special since it needs to come first and use
437 435 # symbol import.
438 436 if fullname != '__future__':
439 437 if not fullname or fullname in stdlib_modules:
440 438 yield msg('relative import of stdlib module')
441 439 else:
442 440 seenlocal = True
443 441
444 442 # Direct symbol import is only allowed from certain modules and
445 443 # must occur before non-symbol imports.
446 444 if node.module and node.col_offset == root_col_offset:
447 445 found = fromlocal(node.module, node.level)
448 446 if found and found[2]: # node.module is a package
449 447 prefix = found[0] + '.'
450 448 symbols = [n.name for n in node.names
451 449 if not fromlocal(prefix + n.name)]
452 450 else:
453 451 symbols = [n.name for n in node.names]
454 452
455 453 if symbols and fullname not in allowsymbolimports:
456 454 yield msg('direct symbol import %s from %s',
457 455 ', '.join(symbols), fullname)
458 456
459 457 if symbols and seennonsymbolrelative:
460 458 yield msg('symbol import follows non-symbol import: %s',
461 459 fullname)
462 460
463 461 if not node.module:
464 462 assert node.level
465 463 seennonsymbolrelative = True
466 464
467 465 # Only allow 1 group per level.
468 466 if (node.level in seenlevels
469 467 and node.col_offset == root_col_offset):
470 468 yield msg('multiple "from %s import" statements',
471 469 '.' * node.level)
472 470
473 471 # Higher-level groups come before lower-level groups.
474 472 if any(node.level > l for l in seenlevels):
475 473 yield msg('higher-level import should come first: %s',
476 474 fullname)
477 475
478 476 seenlevels.add(node.level)
479 477
480 478 # Entries in "from .X import ( ... )" lists must be lexically
481 479 # sorted.
482 480 lastentryname = None
483 481
484 482 for n in node.names:
485 483 if lastentryname and n.name < lastentryname:
486 484 yield msg('imports from %s not lexically sorted: %s < %s',
487 485 fullname, n.name, lastentryname)
488 486
489 487 lastentryname = n.name
490 488
491 489 if n.name in requirealias and n.asname != requirealias[n.name]:
492 490 yield msg('%s from %s must be "as" aliased to %s',
493 491 n.name, fullname, requirealias[n.name])
494 492
495 493 def verify_stdlib_on_own_line(root):
496 494 """Given some python source, verify that stdlib imports are done
497 495 in separate statements from relative local module imports.
498 496
499 497 Observing this limitation is important as it works around an
500 498 annoying lib2to3 bug in relative import rewrites:
501 499 http://bugs.python.org/issue19510.
502 500
503 501 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, foo')))
504 502 [('mixed imports\\n stdlib: sys\\n relative: foo', 1)]
505 503 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, os')))
506 504 []
507 505 >>> list(verify_stdlib_on_own_line(ast.parse('import foo, bar')))
508 506 []
509 507 """
510 508 for node in ast.walk(root):
511 509 if isinstance(node, ast.Import):
512 510 from_stdlib = {False: [], True: []}
513 511 for n in node.names:
514 512 from_stdlib[n.name in stdlib_modules].append(n.name)
515 513 if from_stdlib[True] and from_stdlib[False]:
516 514 yield ('mixed imports\n stdlib: %s\n relative: %s' %
517 515 (', '.join(sorted(from_stdlib[True])),
518 516 ', '.join(sorted(from_stdlib[False]))), node.lineno)
519 517
520 518 class CircularImport(Exception):
521 519 pass
522 520
523 521 def checkmod(mod, imports):
524 522 shortest = {}
525 523 visit = [[mod]]
526 524 while visit:
527 525 path = visit.pop(0)
528 526 for i in sorted(imports.get(path[-1], [])):
529 527 if len(path) < shortest.get(i, 1000):
530 528 shortest[i] = len(path)
531 529 if i in path:
532 530 if i == path[0]:
533 531 raise CircularImport(path)
534 532 continue
535 533 visit.append(path + [i])
536 534
537 535 def rotatecycle(cycle):
538 536 """arrange a cycle so that the lexicographically first module listed first
539 537
540 538 >>> rotatecycle(['foo', 'bar'])
541 539 ['bar', 'foo', 'bar']
542 540 """
543 541 lowest = min(cycle)
544 542 idx = cycle.index(lowest)
545 543 return cycle[idx:] + cycle[:idx] + [lowest]
546 544
547 545 def find_cycles(imports):
548 546 """Find cycles in an already-loaded import graph.
549 547
550 548 All module names recorded in `imports` should be absolute one.
551 549
552 550 >>> imports = {'top.foo': ['top.bar', 'os.path', 'top.qux'],
553 551 ... 'top.bar': ['top.baz', 'sys'],
554 552 ... 'top.baz': ['top.foo'],
555 553 ... 'top.qux': ['top.foo']}
556 554 >>> print '\\n'.join(sorted(find_cycles(imports)))
557 555 top.bar -> top.baz -> top.foo -> top.bar
558 556 top.foo -> top.qux -> top.foo
559 557 """
560 558 cycles = set()
561 559 for mod in sorted(imports.iterkeys()):
562 560 try:
563 561 checkmod(mod, imports)
564 562 except CircularImport as e:
565 563 cycle = e.args[0]
566 564 cycles.add(" -> ".join(rotatecycle(cycle)))
567 565 return cycles
568 566
569 567 def _cycle_sortkey(c):
570 568 return len(c), c
571 569
572 570 def main(argv):
573 571 if len(argv) < 2 or (argv[1] == '-' and len(argv) > 2):
574 572 print 'Usage: %s {-|file [file] [file] ...}'
575 573 return 1
576 574 if argv[1] == '-':
577 575 argv = argv[:1]
578 576 argv.extend(l.rstrip() for l in sys.stdin.readlines())
579 577 localmods = {}
580 578 used_imports = {}
581 579 any_errors = False
582 580 for source_path in argv[1:]:
583 581 modname = dotted_name_of_path(source_path, trimpure=True)
584 582 localmods[modname] = source_path
585 583 for modname, source_path in sorted(localmods.iteritems()):
586 584 f = open(source_path)
587 585 src = f.read()
588 586 used_imports[modname] = sorted(
589 587 imported_modules(src, modname, localmods, ignore_nested=True))
590 588 for error, lineno in verify_import_convention(modname, src, localmods):
591 589 any_errors = True
592 590 print '%s:%d: %s' % (source_path, lineno, error)
593 591 f.close()
594 592 cycles = find_cycles(used_imports)
595 593 if cycles:
596 594 firstmods = set()
597 595 for c in sorted(cycles, key=_cycle_sortkey):
598 596 first = c.split()[0]
599 597 # As a rough cut, ignore any cycle that starts with the
600 598 # same module as some other cycle. Otherwise we see lots
601 599 # of cycles that are effectively duplicates.
602 600 if first in firstmods:
603 601 continue
604 602 print 'Import cycle:', c
605 603 firstmods.add(first)
606 604 any_errors = True
607 605 return any_errors != 0
608 606
609 607 if __name__ == '__main__':
610 608 sys.exit(int(main(sys.argv)))
General Comments 0
You need to be logged in to leave comments. Login now