##// END OF EJS Templates
import-checker: make it executable for convenience
Yuya Nishihara -
r26954:f804bf27 default
parent child Browse files
Show More
@@ -1,569 +1,571
1 #!/usr/bin/env python
2
1 3 import ast
2 4 import os
3 5 import sys
4 6
5 7 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
6 8 # to work when run from a virtualenv. The modules were chosen empirically
7 9 # so that the return value matches the return value without virtualenv.
8 10 import BaseHTTPServer
9 11 import zlib
10 12
11 13 # Whitelist of modules that symbols can be directly imported from.
12 14 allowsymbolimports = (
13 15 '__future__',
14 16 'mercurial.i18n',
15 17 'mercurial.node',
16 18 )
17 19
18 20 # Modules that must be aliased because they are commonly confused with
19 21 # common variables and can create aliasing and readability issues.
20 22 requirealias = {
21 23 'ui': 'uimod',
22 24 }
23 25
24 26 def usingabsolute(root):
25 27 """Whether absolute imports are being used."""
26 28 if sys.version_info[0] >= 3:
27 29 return True
28 30
29 31 for node in ast.walk(root):
30 32 if isinstance(node, ast.ImportFrom):
31 33 if node.module == '__future__':
32 34 for n in node.names:
33 35 if n.name == 'absolute_import':
34 36 return True
35 37
36 38 return False
37 39
38 40 def dotted_name_of_path(path, trimpure=False):
39 41 """Given a relative path to a source file, return its dotted module name.
40 42
41 43 >>> dotted_name_of_path('mercurial/error.py')
42 44 'mercurial.error'
43 45 >>> dotted_name_of_path('mercurial/pure/parsers.py', trimpure=True)
44 46 'mercurial.parsers'
45 47 >>> dotted_name_of_path('zlibmodule.so')
46 48 'zlib'
47 49 """
48 50 parts = path.split('/')
49 51 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
50 52 if parts[-1].endswith('module'):
51 53 parts[-1] = parts[-1][:-6]
52 54 if trimpure:
53 55 return '.'.join(p for p in parts if p != 'pure')
54 56 return '.'.join(parts)
55 57
56 58 def fromlocalfunc(modulename, localmods):
57 59 """Get a function to examine which locally defined module the
58 60 target source imports via a specified name.
59 61
60 62 `modulename` is an `dotted_name_of_path()`-ed source file path,
61 63 which may have `.__init__` at the end of it, of the target source.
62 64
63 65 `localmods` is a dict (or set), of which key is an absolute
64 66 `dotted_name_of_path()`-ed source file path of locally defined (=
65 67 Mercurial specific) modules.
66 68
67 69 This function assumes that module names not existing in
68 70 `localmods` are from the Python standard library.
69 71
70 72 This function returns the function, which takes `name` argument,
71 73 and returns `(absname, dottedpath, hassubmod)` tuple if `name`
72 74 matches against locally defined module. Otherwise, it returns
73 75 False.
74 76
75 77 It is assumed that `name` doesn't have `.__init__`.
76 78
77 79 `absname` is an absolute module name of specified `name`
78 80 (e.g. "hgext.convert"). This can be used to compose prefix for sub
79 81 modules or so.
80 82
81 83 `dottedpath` is a `dotted_name_of_path()`-ed source file path
82 84 (e.g. "hgext.convert.__init__") of `name`. This is used to look
83 85 module up in `localmods` again.
84 86
85 87 `hassubmod` is whether it may have sub modules under it (for
86 88 convenient, even though this is also equivalent to "absname !=
87 89 dottednpath")
88 90
89 91 >>> localmods = {'foo.__init__': True, 'foo.foo1': True,
90 92 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
91 93 ... 'baz.__init__': True, 'baz.baz1': True }
92 94 >>> fromlocal = fromlocalfunc('foo.xxx', localmods)
93 95 >>> # relative
94 96 >>> fromlocal('foo1')
95 97 ('foo.foo1', 'foo.foo1', False)
96 98 >>> fromlocal('bar')
97 99 ('foo.bar', 'foo.bar.__init__', True)
98 100 >>> fromlocal('bar.bar1')
99 101 ('foo.bar.bar1', 'foo.bar.bar1', False)
100 102 >>> # absolute
101 103 >>> fromlocal('baz')
102 104 ('baz', 'baz.__init__', True)
103 105 >>> fromlocal('baz.baz1')
104 106 ('baz.baz1', 'baz.baz1', False)
105 107 >>> # unknown = maybe standard library
106 108 >>> fromlocal('os')
107 109 False
108 110 >>> fromlocal(None, 1)
109 111 ('foo', 'foo.__init__', True)
110 112 >>> fromlocal2 = fromlocalfunc('foo.xxx.yyy', localmods)
111 113 >>> fromlocal2(None, 2)
112 114 ('foo', 'foo.__init__', True)
113 115 """
114 116 prefix = '.'.join(modulename.split('.')[:-1])
115 117 if prefix:
116 118 prefix += '.'
117 119 def fromlocal(name, level=0):
118 120 # name is None when relative imports are used.
119 121 if name is None:
120 122 # If relative imports are used, level must not be absolute.
121 123 assert level > 0
122 124 candidates = ['.'.join(modulename.split('.')[:-level])]
123 125 else:
124 126 # Check relative name first.
125 127 candidates = [prefix + name, name]
126 128
127 129 for n in candidates:
128 130 if n in localmods:
129 131 return (n, n, False)
130 132 dottedpath = n + '.__init__'
131 133 if dottedpath in localmods:
132 134 return (n, dottedpath, True)
133 135 return False
134 136 return fromlocal
135 137
136 138 def list_stdlib_modules():
137 139 """List the modules present in the stdlib.
138 140
139 141 >>> mods = set(list_stdlib_modules())
140 142 >>> 'BaseHTTPServer' in mods
141 143 True
142 144
143 145 os.path isn't really a module, so it's missing:
144 146
145 147 >>> 'os.path' in mods
146 148 False
147 149
148 150 sys requires special treatment, because it's baked into the
149 151 interpreter, but it should still appear:
150 152
151 153 >>> 'sys' in mods
152 154 True
153 155
154 156 >>> 'collections' in mods
155 157 True
156 158
157 159 >>> 'cStringIO' in mods
158 160 True
159 161 """
160 162 for m in sys.builtin_module_names:
161 163 yield m
162 164 # These modules only exist on windows, but we should always
163 165 # consider them stdlib.
164 166 for m in ['msvcrt', '_winreg']:
165 167 yield m
166 168 # These get missed too
167 169 for m in 'ctypes', 'email', 'multiprocessing':
168 170 yield m
169 171 yield 'builtins' # python3 only
170 172 for m in 'fcntl', 'grp', 'pwd', 'termios': # Unix only
171 173 yield m
172 174 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
173 175 # We need to supplement the list of prefixes for the search to work
174 176 # when run from within a virtualenv.
175 177 for mod in (BaseHTTPServer, zlib):
176 178 try:
177 179 # Not all module objects have a __file__ attribute.
178 180 filename = mod.__file__
179 181 except AttributeError:
180 182 continue
181 183 dirname = os.path.dirname(filename)
182 184 for prefix in stdlib_prefixes:
183 185 if dirname.startswith(prefix):
184 186 # Then this directory is redundant.
185 187 break
186 188 else:
187 189 stdlib_prefixes.add(dirname)
188 190 for libpath in sys.path:
189 191 # We want to walk everything in sys.path that starts with
190 192 # something in stdlib_prefixes. check-code suppressed because
191 193 # the ast module used by this script implies the availability
192 194 # of any().
193 195 if not any(libpath.startswith(p) for p in stdlib_prefixes): # no-py24
194 196 continue
195 197 for top, dirs, files in os.walk(libpath):
196 198 for i, d in reversed(list(enumerate(dirs))):
197 199 if (not os.path.exists(os.path.join(top, d, '__init__.py'))
198 200 or top == libpath and d in ('hgext', 'mercurial')):
199 201 del dirs[i]
200 202 for name in files:
201 203 if name == '__init__.py':
202 204 continue
203 205 if not name.endswith(('.py', '.so', '.pyc', '.pyo', '.pyd')):
204 206 continue
205 207 full_path = os.path.join(top, name)
206 208 rel_path = full_path[len(libpath) + 1:]
207 209 mod = dotted_name_of_path(rel_path)
208 210 yield mod
209 211
210 212 stdlib_modules = set(list_stdlib_modules())
211 213
212 214 def imported_modules(source, modulename, localmods, ignore_nested=False):
213 215 """Given the source of a file as a string, yield the names
214 216 imported by that file.
215 217
216 218 Args:
217 219 source: The python source to examine as a string.
218 220 modulename: of specified python source (may have `__init__`)
219 221 localmods: dict of locally defined module names (may have `__init__`)
220 222 ignore_nested: If true, import statements that do not start in
221 223 column zero will be ignored.
222 224
223 225 Returns:
224 226 A list of absolute module names imported by the given source.
225 227
226 228 >>> modulename = 'foo.xxx'
227 229 >>> localmods = {'foo.__init__': True,
228 230 ... 'foo.foo1': True, 'foo.foo2': True,
229 231 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
230 232 ... 'baz.__init__': True, 'baz.baz1': True }
231 233 >>> # standard library (= not locally defined ones)
232 234 >>> sorted(imported_modules(
233 235 ... 'from stdlib1 import foo, bar; import stdlib2',
234 236 ... modulename, localmods))
235 237 []
236 238 >>> # relative importing
237 239 >>> sorted(imported_modules(
238 240 ... 'import foo1; from bar import bar1',
239 241 ... modulename, localmods))
240 242 ['foo.bar.__init__', 'foo.bar.bar1', 'foo.foo1']
241 243 >>> sorted(imported_modules(
242 244 ... 'from bar.bar1 import name1, name2, name3',
243 245 ... modulename, localmods))
244 246 ['foo.bar.bar1']
245 247 >>> # absolute importing
246 248 >>> sorted(imported_modules(
247 249 ... 'from baz import baz1, name1',
248 250 ... modulename, localmods))
249 251 ['baz.__init__', 'baz.baz1']
250 252 >>> # mixed importing, even though it shouldn't be recommended
251 253 >>> sorted(imported_modules(
252 254 ... 'import stdlib, foo1, baz',
253 255 ... modulename, localmods))
254 256 ['baz.__init__', 'foo.foo1']
255 257 >>> # ignore_nested
256 258 >>> sorted(imported_modules(
257 259 ... '''import foo
258 260 ... def wat():
259 261 ... import bar
260 262 ... ''', modulename, localmods))
261 263 ['foo.__init__', 'foo.bar.__init__']
262 264 >>> sorted(imported_modules(
263 265 ... '''import foo
264 266 ... def wat():
265 267 ... import bar
266 268 ... ''', modulename, localmods, ignore_nested=True))
267 269 ['foo.__init__']
268 270 """
269 271 fromlocal = fromlocalfunc(modulename, localmods)
270 272 for node in ast.walk(ast.parse(source)):
271 273 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
272 274 continue
273 275 if isinstance(node, ast.Import):
274 276 for n in node.names:
275 277 found = fromlocal(n.name)
276 278 if not found:
277 279 # this should import standard library
278 280 continue
279 281 yield found[1]
280 282 elif isinstance(node, ast.ImportFrom):
281 283 found = fromlocal(node.module, node.level)
282 284 if not found:
283 285 # this should import standard library
284 286 continue
285 287
286 288 absname, dottedpath, hassubmod = found
287 289 yield dottedpath
288 290 if not hassubmod:
289 291 # examination of "node.names" should be redundant
290 292 # e.g.: from mercurial.node import nullid, nullrev
291 293 continue
292 294
293 295 prefix = absname + '.'
294 296 for n in node.names:
295 297 found = fromlocal(prefix + n.name)
296 298 if not found:
297 299 # this should be a function or a property of "node.module"
298 300 continue
299 301 yield found[1]
300 302
301 303 def verify_import_convention(module, source):
302 304 """Verify imports match our established coding convention.
303 305
304 306 We have 2 conventions: legacy and modern. The modern convention is in
305 307 effect when using absolute imports.
306 308
307 309 The legacy convention only looks for mixed imports. The modern convention
308 310 is much more thorough.
309 311 """
310 312 root = ast.parse(source)
311 313 absolute = usingabsolute(root)
312 314
313 315 if absolute:
314 316 return verify_modern_convention(module, root)
315 317 else:
316 318 return verify_stdlib_on_own_line(root)
317 319
318 320 def verify_modern_convention(module, root):
319 321 """Verify a file conforms to the modern import convention rules.
320 322
321 323 The rules of the modern convention are:
322 324
323 325 * Ordering is stdlib followed by local imports. Each group is lexically
324 326 sorted.
325 327 * Importing multiple modules via "import X, Y" is not allowed: use
326 328 separate import statements.
327 329 * Importing multiple modules via "from X import ..." is allowed if using
328 330 parenthesis and one entry per line.
329 331 * Only 1 relative import statement per import level ("from .", "from ..")
330 332 is allowed.
331 333 * Relative imports from higher levels must occur before lower levels. e.g.
332 334 "from .." must be before "from .".
333 335 * Imports from peer packages should use relative import (e.g. do not
334 336 "import mercurial.foo" from a "mercurial.*" module).
335 337 * Symbols can only be imported from specific modules (see
336 338 `allowsymbolimports`). For other modules, first import the module then
337 339 assign the symbol to a module-level variable. In addition, these imports
338 340 must be performed before other relative imports. This rule only
339 341 applies to import statements outside of any blocks.
340 342 * Relative imports from the standard library are not allowed.
341 343 * Certain modules must be aliased to alternate names to avoid aliasing
342 344 and readability problems. See `requirealias`.
343 345 """
344 346 topmodule = module.split('.')[0]
345 347
346 348 # Whether a local/non-stdlib import has been performed.
347 349 seenlocal = False
348 350 # Whether a relative, non-symbol import has been seen.
349 351 seennonsymbolrelative = False
350 352 # The last name to be imported (for sorting).
351 353 lastname = None
352 354 # Relative import levels encountered so far.
353 355 seenlevels = set()
354 356
355 357 for node in ast.walk(root):
356 358 if isinstance(node, ast.Import):
357 359 # Disallow "import foo, bar" and require separate imports
358 360 # for each module.
359 361 if len(node.names) > 1:
360 362 yield 'multiple imported names: %s' % ', '.join(
361 363 n.name for n in node.names)
362 364
363 365 name = node.names[0].name
364 366 asname = node.names[0].asname
365 367
366 368 # Ignore sorting rules on imports inside blocks.
367 369 if node.col_offset == 0:
368 370 if lastname and name < lastname:
369 371 yield 'imports not lexically sorted: %s < %s' % (
370 372 name, lastname)
371 373
372 374 lastname = name
373 375
374 376 # stdlib imports should be before local imports.
375 377 stdlib = name in stdlib_modules
376 378 if stdlib and seenlocal and node.col_offset == 0:
377 379 yield 'stdlib import follows local import: %s' % name
378 380
379 381 if not stdlib:
380 382 seenlocal = True
381 383
382 384 # Import of sibling modules should use relative imports.
383 385 topname = name.split('.')[0]
384 386 if topname == topmodule:
385 387 yield 'import should be relative: %s' % name
386 388
387 389 if name in requirealias and asname != requirealias[name]:
388 390 yield '%s module must be "as" aliased to %s' % (
389 391 name, requirealias[name])
390 392
391 393 elif isinstance(node, ast.ImportFrom):
392 394 # Resolve the full imported module name.
393 395 if node.level > 0:
394 396 fullname = '.'.join(module.split('.')[:-node.level])
395 397 if node.module:
396 398 fullname += '.%s' % node.module
397 399 else:
398 400 assert node.module
399 401 fullname = node.module
400 402
401 403 topname = fullname.split('.')[0]
402 404 if topname == topmodule:
403 405 yield 'import should be relative: %s' % fullname
404 406
405 407 # __future__ is special since it needs to come first and use
406 408 # symbol import.
407 409 if fullname != '__future__':
408 410 if not fullname or fullname in stdlib_modules:
409 411 yield 'relative import of stdlib module'
410 412 else:
411 413 seenlocal = True
412 414
413 415 # Direct symbol import is only allowed from certain modules and
414 416 # must occur before non-symbol imports.
415 417 if node.module and node.col_offset == 0:
416 418 if fullname not in allowsymbolimports:
417 419 yield 'direct symbol import from %s' % fullname
418 420
419 421 if seennonsymbolrelative:
420 422 yield ('symbol import follows non-symbol import: %s' %
421 423 fullname)
422 424
423 425 if not node.module:
424 426 assert node.level
425 427 seennonsymbolrelative = True
426 428
427 429 # Only allow 1 group per level.
428 430 if node.level in seenlevels and node.col_offset == 0:
429 431 yield 'multiple "from %s import" statements' % (
430 432 '.' * node.level)
431 433
432 434 # Higher-level groups come before lower-level groups.
433 435 if any(node.level > l for l in seenlevels):
434 436 yield 'higher-level import should come first: %s' % (
435 437 fullname)
436 438
437 439 seenlevels.add(node.level)
438 440
439 441 # Entries in "from .X import ( ... )" lists must be lexically
440 442 # sorted.
441 443 lastentryname = None
442 444
443 445 for n in node.names:
444 446 if lastentryname and n.name < lastentryname:
445 447 yield 'imports from %s not lexically sorted: %s < %s' % (
446 448 fullname, n.name, lastentryname)
447 449
448 450 lastentryname = n.name
449 451
450 452 if n.name in requirealias and n.asname != requirealias[n.name]:
451 453 yield '%s from %s must be "as" aliased to %s' % (
452 454 n.name, fullname, requirealias[n.name])
453 455
454 456 def verify_stdlib_on_own_line(root):
455 457 """Given some python source, verify that stdlib imports are done
456 458 in separate statements from relative local module imports.
457 459
458 460 Observing this limitation is important as it works around an
459 461 annoying lib2to3 bug in relative import rewrites:
460 462 http://bugs.python.org/issue19510.
461 463
462 464 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, foo')))
463 465 ['mixed imports\\n stdlib: sys\\n relative: foo']
464 466 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, os')))
465 467 []
466 468 >>> list(verify_stdlib_on_own_line(ast.parse('import foo, bar')))
467 469 []
468 470 """
469 471 for node in ast.walk(root):
470 472 if isinstance(node, ast.Import):
471 473 from_stdlib = {False: [], True: []}
472 474 for n in node.names:
473 475 from_stdlib[n.name in stdlib_modules].append(n.name)
474 476 if from_stdlib[True] and from_stdlib[False]:
475 477 yield ('mixed imports\n stdlib: %s\n relative: %s' %
476 478 (', '.join(sorted(from_stdlib[True])),
477 479 ', '.join(sorted(from_stdlib[False]))))
478 480
479 481 class CircularImport(Exception):
480 482 pass
481 483
482 484 def checkmod(mod, imports):
483 485 shortest = {}
484 486 visit = [[mod]]
485 487 while visit:
486 488 path = visit.pop(0)
487 489 for i in sorted(imports.get(path[-1], [])):
488 490 if len(path) < shortest.get(i, 1000):
489 491 shortest[i] = len(path)
490 492 if i in path:
491 493 if i == path[0]:
492 494 raise CircularImport(path)
493 495 continue
494 496 visit.append(path + [i])
495 497
496 498 def rotatecycle(cycle):
497 499 """arrange a cycle so that the lexicographically first module listed first
498 500
499 501 >>> rotatecycle(['foo', 'bar'])
500 502 ['bar', 'foo', 'bar']
501 503 """
502 504 lowest = min(cycle)
503 505 idx = cycle.index(lowest)
504 506 return cycle[idx:] + cycle[:idx] + [lowest]
505 507
506 508 def find_cycles(imports):
507 509 """Find cycles in an already-loaded import graph.
508 510
509 511 All module names recorded in `imports` should be absolute one.
510 512
511 513 >>> imports = {'top.foo': ['top.bar', 'os.path', 'top.qux'],
512 514 ... 'top.bar': ['top.baz', 'sys'],
513 515 ... 'top.baz': ['top.foo'],
514 516 ... 'top.qux': ['top.foo']}
515 517 >>> print '\\n'.join(sorted(find_cycles(imports)))
516 518 top.bar -> top.baz -> top.foo -> top.bar
517 519 top.foo -> top.qux -> top.foo
518 520 """
519 521 cycles = set()
520 522 for mod in sorted(imports.iterkeys()):
521 523 try:
522 524 checkmod(mod, imports)
523 525 except CircularImport as e:
524 526 cycle = e.args[0]
525 527 cycles.add(" -> ".join(rotatecycle(cycle)))
526 528 return cycles
527 529
528 530 def _cycle_sortkey(c):
529 531 return len(c), c
530 532
531 533 def main(argv):
532 534 if len(argv) < 2 or (argv[1] == '-' and len(argv) > 2):
533 535 print 'Usage: %s {-|file [file] [file] ...}'
534 536 return 1
535 537 if argv[1] == '-':
536 538 argv = argv[:1]
537 539 argv.extend(l.rstrip() for l in sys.stdin.readlines())
538 540 localmods = {}
539 541 used_imports = {}
540 542 any_errors = False
541 543 for source_path in argv[1:]:
542 544 modname = dotted_name_of_path(source_path, trimpure=True)
543 545 localmods[modname] = source_path
544 546 for modname, source_path in sorted(localmods.iteritems()):
545 547 f = open(source_path)
546 548 src = f.read()
547 549 used_imports[modname] = sorted(
548 550 imported_modules(src, modname, localmods, ignore_nested=True))
549 551 for error in verify_import_convention(modname, src):
550 552 any_errors = True
551 553 print source_path, error
552 554 f.close()
553 555 cycles = find_cycles(used_imports)
554 556 if cycles:
555 557 firstmods = set()
556 558 for c in sorted(cycles, key=_cycle_sortkey):
557 559 first = c.split()[0]
558 560 # As a rough cut, ignore any cycle that starts with the
559 561 # same module as some other cycle. Otherwise we see lots
560 562 # of cycles that are effectively duplicates.
561 563 if first in firstmods:
562 564 continue
563 565 print 'Import cycle:', c
564 566 firstmods.add(first)
565 567 any_errors = True
566 568 return any_errors != 0
567 569
568 570 if __name__ == '__main__':
569 571 sys.exit(int(main(sys.argv)))
General Comments 0
You need to be logged in to leave comments. Login now