##// END OF EJS Templates
check-module-imports: ignore non-stdlib module installed by distribution...
marmoute -
r48602:42e2cdb5 stable
parent child Browse files
Show More
@@ -1,821 +1,823
1 1 #!/usr/bin/env python3
2 2
3 3 from __future__ import absolute_import, print_function
4 4
5 5 import ast
6 6 import collections
7 7 import io
8 8 import os
9 9 import sys
10 10
11 11 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
12 12 # to work when run from a virtualenv. The modules were chosen empirically
13 13 # so that the return value matches the return value without virtualenv.
14 14 if True: # disable lexical sorting checks
15 15 try:
16 16 import BaseHTTPServer as basehttpserver
17 17 except ImportError:
18 18 basehttpserver = None
19 19 import zlib
20 20
21 21 import testparseutil
22 22
23 23 # Whitelist of modules that symbols can be directly imported from.
24 24 allowsymbolimports = (
25 25 '__future__',
26 26 'breezy',
27 27 'hgclient',
28 28 'mercurial',
29 29 'mercurial.hgweb.common',
30 30 'mercurial.hgweb.request',
31 31 'mercurial.i18n',
32 32 'mercurial.interfaces',
33 33 'mercurial.node',
34 34 'mercurial.pycompat',
35 35 # for revlog to re-export constant to extensions
36 36 'mercurial.revlogutils.constants',
37 37 'mercurial.revlogutils.flagutil',
38 38 # for cffi modules to re-export pure functions
39 39 'mercurial.pure.base85',
40 40 'mercurial.pure.bdiff',
41 41 'mercurial.pure.mpatch',
42 42 'mercurial.pure.osutil',
43 43 'mercurial.pure.parsers',
44 44 # third-party imports should be directly imported
45 45 'mercurial.thirdparty',
46 46 'mercurial.thirdparty.attr',
47 47 'mercurial.thirdparty.zope',
48 48 'mercurial.thirdparty.zope.interface',
49 49 )
50 50
51 51 # Whitelist of symbols that can be directly imported.
52 52 directsymbols = ('demandimport',)
53 53
54 54 # Modules that must be aliased because they are commonly confused with
55 55 # common variables and can create aliasing and readability issues.
56 56 requirealias = {
57 57 'ui': 'uimod',
58 58 }
59 59
60 60
61 61 def usingabsolute(root):
62 62 """Whether absolute imports are being used."""
63 63 if sys.version_info[0] >= 3:
64 64 return True
65 65
66 66 for node in ast.walk(root):
67 67 if isinstance(node, ast.ImportFrom):
68 68 if node.module == '__future__':
69 69 for n in node.names:
70 70 if n.name == 'absolute_import':
71 71 return True
72 72
73 73 return False
74 74
75 75
76 76 def walklocal(root):
77 77 """Recursively yield all descendant nodes but not in a different scope"""
78 78 todo = collections.deque(ast.iter_child_nodes(root))
79 79 yield root, False
80 80 while todo:
81 81 node = todo.popleft()
82 82 newscope = isinstance(node, ast.FunctionDef)
83 83 if not newscope:
84 84 todo.extend(ast.iter_child_nodes(node))
85 85 yield node, newscope
86 86
87 87
88 88 def dotted_name_of_path(path):
89 89 """Given a relative path to a source file, return its dotted module name.
90 90
91 91 >>> dotted_name_of_path('mercurial/error.py')
92 92 'mercurial.error'
93 93 >>> dotted_name_of_path('zlibmodule.so')
94 94 'zlib'
95 95 """
96 96 parts = path.replace(os.sep, '/').split('/')
97 97 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
98 98 if parts[-1].endswith('module'):
99 99 parts[-1] = parts[-1][:-6]
100 100 return '.'.join(parts)
101 101
102 102
103 103 def fromlocalfunc(modulename, localmods):
104 104 """Get a function to examine which locally defined module the
105 105 target source imports via a specified name.
106 106
107 107 `modulename` is an `dotted_name_of_path()`-ed source file path,
108 108 which may have `.__init__` at the end of it, of the target source.
109 109
110 110 `localmods` is a set of absolute `dotted_name_of_path()`-ed source file
111 111 paths of locally defined (= Mercurial specific) modules.
112 112
113 113 This function assumes that module names not existing in
114 114 `localmods` are from the Python standard library.
115 115
116 116 This function returns the function, which takes `name` argument,
117 117 and returns `(absname, dottedpath, hassubmod)` tuple if `name`
118 118 matches against locally defined module. Otherwise, it returns
119 119 False.
120 120
121 121 It is assumed that `name` doesn't have `.__init__`.
122 122
123 123 `absname` is an absolute module name of specified `name`
124 124 (e.g. "hgext.convert"). This can be used to compose prefix for sub
125 125 modules or so.
126 126
127 127 `dottedpath` is a `dotted_name_of_path()`-ed source file path
128 128 (e.g. "hgext.convert.__init__") of `name`. This is used to look
129 129 module up in `localmods` again.
130 130
131 131 `hassubmod` is whether it may have sub modules under it (for
132 132 convenient, even though this is also equivalent to "absname !=
133 133 dottednpath")
134 134
135 135 >>> localmods = {'foo.__init__', 'foo.foo1',
136 136 ... 'foo.bar.__init__', 'foo.bar.bar1',
137 137 ... 'baz.__init__', 'baz.baz1'}
138 138 >>> fromlocal = fromlocalfunc('foo.xxx', localmods)
139 139 >>> # relative
140 140 >>> fromlocal('foo1')
141 141 ('foo.foo1', 'foo.foo1', False)
142 142 >>> fromlocal('bar')
143 143 ('foo.bar', 'foo.bar.__init__', True)
144 144 >>> fromlocal('bar.bar1')
145 145 ('foo.bar.bar1', 'foo.bar.bar1', False)
146 146 >>> # absolute
147 147 >>> fromlocal('baz')
148 148 ('baz', 'baz.__init__', True)
149 149 >>> fromlocal('baz.baz1')
150 150 ('baz.baz1', 'baz.baz1', False)
151 151 >>> # unknown = maybe standard library
152 152 >>> fromlocal('os')
153 153 False
154 154 >>> fromlocal(None, 1)
155 155 ('foo', 'foo.__init__', True)
156 156 >>> fromlocal('foo1', 1)
157 157 ('foo.foo1', 'foo.foo1', False)
158 158 >>> fromlocal2 = fromlocalfunc('foo.xxx.yyy', localmods)
159 159 >>> fromlocal2(None, 2)
160 160 ('foo', 'foo.__init__', True)
161 161 >>> fromlocal2('bar2', 1)
162 162 False
163 163 >>> fromlocal2('bar', 2)
164 164 ('foo.bar', 'foo.bar.__init__', True)
165 165 """
166 166 if not isinstance(modulename, str):
167 167 modulename = modulename.decode('ascii')
168 168 prefix = '.'.join(modulename.split('.')[:-1])
169 169 if prefix:
170 170 prefix += '.'
171 171
172 172 def fromlocal(name, level=0):
173 173 # name is false value when relative imports are used.
174 174 if not name:
175 175 # If relative imports are used, level must not be absolute.
176 176 assert level > 0
177 177 candidates = ['.'.join(modulename.split('.')[:-level])]
178 178 else:
179 179 if not level:
180 180 # Check relative name first.
181 181 candidates = [prefix + name, name]
182 182 else:
183 183 candidates = [
184 184 '.'.join(modulename.split('.')[:-level]) + '.' + name
185 185 ]
186 186
187 187 for n in candidates:
188 188 if n in localmods:
189 189 return (n, n, False)
190 190 dottedpath = n + '.__init__'
191 191 if dottedpath in localmods:
192 192 return (n, dottedpath, True)
193 193 return False
194 194
195 195 return fromlocal
196 196
197 197
198 198 def populateextmods(localmods):
199 199 """Populate C extension modules based on pure modules"""
200 200 newlocalmods = set(localmods)
201 201 for n in localmods:
202 202 if n.startswith('mercurial.pure.'):
203 203 m = n[len('mercurial.pure.') :]
204 204 newlocalmods.add('mercurial.cext.' + m)
205 205 newlocalmods.add('mercurial.cffi._' + m)
206 206 return newlocalmods
207 207
208 208
209 209 def list_stdlib_modules():
210 210 """List the modules present in the stdlib.
211 211
212 212 >>> py3 = sys.version_info[0] >= 3
213 213 >>> mods = set(list_stdlib_modules())
214 214 >>> 'BaseHTTPServer' in mods or py3
215 215 True
216 216
217 217 os.path isn't really a module, so it's missing:
218 218
219 219 >>> 'os.path' in mods
220 220 False
221 221
222 222 sys requires special treatment, because it's baked into the
223 223 interpreter, but it should still appear:
224 224
225 225 >>> 'sys' in mods
226 226 True
227 227
228 228 >>> 'collections' in mods
229 229 True
230 230
231 231 >>> 'cStringIO' in mods or py3
232 232 True
233 233
234 234 >>> 'cffi' in mods
235 235 True
236 236 """
237 237 for m in sys.builtin_module_names:
238 238 yield m
239 239 # These modules only exist on windows, but we should always
240 240 # consider them stdlib.
241 241 for m in ['msvcrt', '_winreg']:
242 242 yield m
243 243 yield '__builtin__'
244 244 yield 'builtins' # python3 only
245 245 yield 'importlib.abc' # python3 only
246 246 yield 'importlib.machinery' # python3 only
247 247 yield 'importlib.util' # python3 only
248 248 for m in 'fcntl', 'grp', 'pwd', 'termios': # Unix only
249 249 yield m
250 250 for m in 'cPickle', 'datetime': # in Python (not C) on PyPy
251 251 yield m
252 252 for m in ['cffi']:
253 253 yield m
254 254 stdlib_prefixes = {sys.prefix, sys.exec_prefix}
255 255 # We need to supplement the list of prefixes for the search to work
256 256 # when run from within a virtualenv.
257 257 for mod in (basehttpserver, zlib):
258 258 if mod is None:
259 259 continue
260 260 try:
261 261 # Not all module objects have a __file__ attribute.
262 262 filename = mod.__file__
263 263 except AttributeError:
264 264 continue
265 265 dirname = os.path.dirname(filename)
266 266 for prefix in stdlib_prefixes:
267 267 if dirname.startswith(prefix):
268 268 # Then this directory is redundant.
269 269 break
270 270 else:
271 271 stdlib_prefixes.add(dirname)
272 272 sourceroot = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
273 273 for libpath in sys.path:
274 274 # We want to walk everything in sys.path that starts with something in
275 275 # stdlib_prefixes, but not directories from the hg sources.
276 276 if os.path.abspath(libpath).startswith(sourceroot) or not any(
277 277 libpath.startswith(p) for p in stdlib_prefixes
278 278 ):
279 279 continue
280 280 for top, dirs, files in os.walk(libpath):
281 if 'dist-packages' in top.split(os.path.sep):
282 continue
281 283 for i, d in reversed(list(enumerate(dirs))):
282 284 if (
283 285 not os.path.exists(os.path.join(top, d, '__init__.py'))
284 286 or top == libpath
285 287 and d in ('hgdemandimport', 'hgext', 'mercurial')
286 288 ):
287 289 del dirs[i]
288 290 for name in files:
289 291 if not name.endswith(('.py', '.so', '.pyc', '.pyo', '.pyd')):
290 292 continue
291 293 if name.startswith('__init__.py'):
292 294 full_path = top
293 295 else:
294 296 full_path = os.path.join(top, name)
295 297 rel_path = full_path[len(libpath) + 1 :]
296 298 mod = dotted_name_of_path(rel_path)
297 299 yield mod
298 300
299 301
300 302 stdlib_modules = set(list_stdlib_modules())
301 303
302 304
303 305 def imported_modules(source, modulename, f, localmods, ignore_nested=False):
304 306 """Given the source of a file as a string, yield the names
305 307 imported by that file.
306 308
307 309 Args:
308 310 source: The python source to examine as a string.
309 311 modulename: of specified python source (may have `__init__`)
310 312 localmods: set of locally defined module names (may have `__init__`)
311 313 ignore_nested: If true, import statements that do not start in
312 314 column zero will be ignored.
313 315
314 316 Returns:
315 317 A list of absolute module names imported by the given source.
316 318
317 319 >>> f = 'foo/xxx.py'
318 320 >>> modulename = 'foo.xxx'
319 321 >>> localmods = {'foo.__init__': True,
320 322 ... 'foo.foo1': True, 'foo.foo2': True,
321 323 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
322 324 ... 'baz.__init__': True, 'baz.baz1': True }
323 325 >>> # standard library (= not locally defined ones)
324 326 >>> sorted(imported_modules(
325 327 ... 'from stdlib1 import foo, bar; import stdlib2',
326 328 ... modulename, f, localmods))
327 329 []
328 330 >>> # relative importing
329 331 >>> sorted(imported_modules(
330 332 ... 'import foo1; from bar import bar1',
331 333 ... modulename, f, localmods))
332 334 ['foo.bar.bar1', 'foo.foo1']
333 335 >>> sorted(imported_modules(
334 336 ... 'from bar.bar1 import name1, name2, name3',
335 337 ... modulename, f, localmods))
336 338 ['foo.bar.bar1']
337 339 >>> # absolute importing
338 340 >>> sorted(imported_modules(
339 341 ... 'from baz import baz1, name1',
340 342 ... modulename, f, localmods))
341 343 ['baz.__init__', 'baz.baz1']
342 344 >>> # mixed importing, even though it shouldn't be recommended
343 345 >>> sorted(imported_modules(
344 346 ... 'import stdlib, foo1, baz',
345 347 ... modulename, f, localmods))
346 348 ['baz.__init__', 'foo.foo1']
347 349 >>> # ignore_nested
348 350 >>> sorted(imported_modules(
349 351 ... '''import foo
350 352 ... def wat():
351 353 ... import bar
352 354 ... ''', modulename, f, localmods))
353 355 ['foo.__init__', 'foo.bar.__init__']
354 356 >>> sorted(imported_modules(
355 357 ... '''import foo
356 358 ... def wat():
357 359 ... import bar
358 360 ... ''', modulename, f, localmods, ignore_nested=True))
359 361 ['foo.__init__']
360 362 """
361 363 fromlocal = fromlocalfunc(modulename, localmods)
362 364 for node in ast.walk(ast.parse(source, f)):
363 365 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
364 366 continue
365 367 if isinstance(node, ast.Import):
366 368 for n in node.names:
367 369 found = fromlocal(n.name)
368 370 if not found:
369 371 # this should import standard library
370 372 continue
371 373 yield found[1]
372 374 elif isinstance(node, ast.ImportFrom):
373 375 found = fromlocal(node.module, node.level)
374 376 if not found:
375 377 # this should import standard library
376 378 continue
377 379
378 380 absname, dottedpath, hassubmod = found
379 381 if not hassubmod:
380 382 # "dottedpath" is not a package; must be imported
381 383 yield dottedpath
382 384 # examination of "node.names" should be redundant
383 385 # e.g.: from mercurial.node import nullid, nullrev
384 386 continue
385 387
386 388 modnotfound = False
387 389 prefix = absname + '.'
388 390 for n in node.names:
389 391 found = fromlocal(prefix + n.name)
390 392 if not found:
391 393 # this should be a function or a property of "node.module"
392 394 modnotfound = True
393 395 continue
394 396 yield found[1]
395 397 if modnotfound and dottedpath != modulename:
396 398 # "dottedpath" is a package, but imported because of non-module
397 399 # lookup
398 400 # specifically allow "from . import foo" from __init__.py
399 401 yield dottedpath
400 402
401 403
402 404 def verify_import_convention(module, source, localmods):
403 405 """Verify imports match our established coding convention.
404 406
405 407 We have 2 conventions: legacy and modern. The modern convention is in
406 408 effect when using absolute imports.
407 409
408 410 The legacy convention only looks for mixed imports. The modern convention
409 411 is much more thorough.
410 412 """
411 413 root = ast.parse(source)
412 414 absolute = usingabsolute(root)
413 415
414 416 if absolute:
415 417 return verify_modern_convention(module, root, localmods)
416 418 else:
417 419 return verify_stdlib_on_own_line(root)
418 420
419 421
420 422 def verify_modern_convention(module, root, localmods, root_col_offset=0):
421 423 """Verify a file conforms to the modern import convention rules.
422 424
423 425 The rules of the modern convention are:
424 426
425 427 * Ordering is stdlib followed by local imports. Each group is lexically
426 428 sorted.
427 429 * Importing multiple modules via "import X, Y" is not allowed: use
428 430 separate import statements.
429 431 * Importing multiple modules via "from X import ..." is allowed if using
430 432 parenthesis and one entry per line.
431 433 * Only 1 relative import statement per import level ("from .", "from ..")
432 434 is allowed.
433 435 * Relative imports from higher levels must occur before lower levels. e.g.
434 436 "from .." must be before "from .".
435 437 * Imports from peer packages should use relative import (e.g. do not
436 438 "import mercurial.foo" from a "mercurial.*" module).
437 439 * Symbols can only be imported from specific modules (see
438 440 `allowsymbolimports`). For other modules, first import the module then
439 441 assign the symbol to a module-level variable. In addition, these imports
440 442 must be performed before other local imports. This rule only
441 443 applies to import statements outside of any blocks.
442 444 * Relative imports from the standard library are not allowed, unless that
443 445 library is also a local module.
444 446 * Certain modules must be aliased to alternate names to avoid aliasing
445 447 and readability problems. See `requirealias`.
446 448 """
447 449 if not isinstance(module, str):
448 450 module = module.decode('ascii')
449 451 topmodule = module.split('.')[0]
450 452 fromlocal = fromlocalfunc(module, localmods)
451 453
452 454 # Whether a local/non-stdlib import has been performed.
453 455 seenlocal = None
454 456 # Whether a local/non-stdlib, non-symbol import has been seen.
455 457 seennonsymbollocal = False
456 458 # The last name to be imported (for sorting).
457 459 lastname = None
458 460 laststdlib = None
459 461 # Relative import levels encountered so far.
460 462 seenlevels = set()
461 463
462 464 for node, newscope in walklocal(root):
463 465
464 466 def msg(fmt, *args):
465 467 return (fmt % args, node.lineno)
466 468
467 469 if newscope:
468 470 # Check for local imports in function
469 471 for r in verify_modern_convention(
470 472 module, node, localmods, node.col_offset + 4
471 473 ):
472 474 yield r
473 475 elif isinstance(node, ast.Import):
474 476 # Disallow "import foo, bar" and require separate imports
475 477 # for each module.
476 478 if len(node.names) > 1:
477 479 yield msg(
478 480 'multiple imported names: %s',
479 481 ', '.join(n.name for n in node.names),
480 482 )
481 483
482 484 name = node.names[0].name
483 485 asname = node.names[0].asname
484 486
485 487 stdlib = name in stdlib_modules
486 488
487 489 # Ignore sorting rules on imports inside blocks.
488 490 if node.col_offset == root_col_offset:
489 491 if lastname and name < lastname and laststdlib == stdlib:
490 492 yield msg(
491 493 'imports not lexically sorted: %s < %s', name, lastname
492 494 )
493 495
494 496 lastname = name
495 497 laststdlib = stdlib
496 498
497 499 # stdlib imports should be before local imports.
498 500 if stdlib and seenlocal and node.col_offset == root_col_offset:
499 501 yield msg(
500 502 'stdlib import "%s" follows local import: %s',
501 503 name,
502 504 seenlocal,
503 505 )
504 506
505 507 if not stdlib:
506 508 seenlocal = name
507 509
508 510 # Import of sibling modules should use relative imports.
509 511 topname = name.split('.')[0]
510 512 if topname == topmodule:
511 513 yield msg('import should be relative: %s', name)
512 514
513 515 if name in requirealias and asname != requirealias[name]:
514 516 yield msg(
515 517 '%s module must be "as" aliased to %s',
516 518 name,
517 519 requirealias[name],
518 520 )
519 521
520 522 elif isinstance(node, ast.ImportFrom):
521 523 # Resolve the full imported module name.
522 524 if node.level > 0:
523 525 fullname = '.'.join(module.split('.')[: -node.level])
524 526 if node.module:
525 527 fullname += '.%s' % node.module
526 528 else:
527 529 assert node.module
528 530 fullname = node.module
529 531
530 532 topname = fullname.split('.')[0]
531 533 if topname == topmodule:
532 534 yield msg('import should be relative: %s', fullname)
533 535
534 536 # __future__ is special since it needs to come first and use
535 537 # symbol import.
536 538 if fullname != '__future__':
537 539 if not fullname or (
538 540 fullname in stdlib_modules
539 541 # allow standard 'from typing import ...' style
540 542 and fullname.startswith('.')
541 543 and fullname not in localmods
542 544 and fullname + '.__init__' not in localmods
543 545 ):
544 546 yield msg('relative import of stdlib module')
545 547 else:
546 548 seenlocal = fullname
547 549
548 550 # Direct symbol import is only allowed from certain modules and
549 551 # must occur before non-symbol imports.
550 552 found = fromlocal(node.module, node.level)
551 553 if found and found[2]: # node.module is a package
552 554 prefix = found[0] + '.'
553 555 symbols = (
554 556 n.name for n in node.names if not fromlocal(prefix + n.name)
555 557 )
556 558 else:
557 559 symbols = (n.name for n in node.names)
558 560 symbols = [sym for sym in symbols if sym not in directsymbols]
559 561 if node.module and node.col_offset == root_col_offset:
560 562 if symbols and fullname not in allowsymbolimports:
561 563 yield msg(
562 564 'direct symbol import %s from %s',
563 565 ', '.join(symbols),
564 566 fullname,
565 567 )
566 568
567 569 if symbols and seennonsymbollocal:
568 570 yield msg(
569 571 'symbol import follows non-symbol import: %s', fullname
570 572 )
571 573 if not symbols and fullname not in stdlib_modules:
572 574 seennonsymbollocal = True
573 575
574 576 if not node.module:
575 577 assert node.level
576 578
577 579 # Only allow 1 group per level.
578 580 if (
579 581 node.level in seenlevels
580 582 and node.col_offset == root_col_offset
581 583 ):
582 584 yield msg(
583 585 'multiple "from %s import" statements', '.' * node.level
584 586 )
585 587
586 588 # Higher-level groups come before lower-level groups.
587 589 if any(node.level > l for l in seenlevels):
588 590 yield msg(
589 591 'higher-level import should come first: %s', fullname
590 592 )
591 593
592 594 seenlevels.add(node.level)
593 595
594 596 # Entries in "from .X import ( ... )" lists must be lexically
595 597 # sorted.
596 598 lastentryname = None
597 599
598 600 for n in node.names:
599 601 if lastentryname and n.name < lastentryname:
600 602 yield msg(
601 603 'imports from %s not lexically sorted: %s < %s',
602 604 fullname,
603 605 n.name,
604 606 lastentryname,
605 607 )
606 608
607 609 lastentryname = n.name
608 610
609 611 if n.name in requirealias and n.asname != requirealias[n.name]:
610 612 yield msg(
611 613 '%s from %s must be "as" aliased to %s',
612 614 n.name,
613 615 fullname,
614 616 requirealias[n.name],
615 617 )
616 618
617 619
618 620 def verify_stdlib_on_own_line(root):
619 621 """Given some python source, verify that stdlib imports are done
620 622 in separate statements from relative local module imports.
621 623
622 624 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, foo')))
623 625 [('mixed imports\\n stdlib: sys\\n relative: foo', 1)]
624 626 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, os')))
625 627 []
626 628 >>> list(verify_stdlib_on_own_line(ast.parse('import foo, bar')))
627 629 []
628 630 """
629 631 for node in ast.walk(root):
630 632 if isinstance(node, ast.Import):
631 633 from_stdlib = {False: [], True: []}
632 634 for n in node.names:
633 635 from_stdlib[n.name in stdlib_modules].append(n.name)
634 636 if from_stdlib[True] and from_stdlib[False]:
635 637 yield (
636 638 'mixed imports\n stdlib: %s\n relative: %s'
637 639 % (
638 640 ', '.join(sorted(from_stdlib[True])),
639 641 ', '.join(sorted(from_stdlib[False])),
640 642 ),
641 643 node.lineno,
642 644 )
643 645
644 646
645 647 class CircularImport(Exception):
646 648 pass
647 649
648 650
649 651 def checkmod(mod, imports):
650 652 shortest = {}
651 653 visit = [[mod]]
652 654 while visit:
653 655 path = visit.pop(0)
654 656 for i in sorted(imports.get(path[-1], [])):
655 657 if len(path) < shortest.get(i, 1000):
656 658 shortest[i] = len(path)
657 659 if i in path:
658 660 if i == path[0]:
659 661 raise CircularImport(path)
660 662 continue
661 663 visit.append(path + [i])
662 664
663 665
664 666 def rotatecycle(cycle):
665 667 """arrange a cycle so that the lexicographically first module listed first
666 668
667 669 >>> rotatecycle(['foo', 'bar'])
668 670 ['bar', 'foo', 'bar']
669 671 """
670 672 lowest = min(cycle)
671 673 idx = cycle.index(lowest)
672 674 return cycle[idx:] + cycle[:idx] + [lowest]
673 675
674 676
675 677 def find_cycles(imports):
676 678 """Find cycles in an already-loaded import graph.
677 679
678 680 All module names recorded in `imports` should be absolute one.
679 681
680 682 >>> from __future__ import print_function
681 683 >>> imports = {'top.foo': ['top.bar', 'os.path', 'top.qux'],
682 684 ... 'top.bar': ['top.baz', 'sys'],
683 685 ... 'top.baz': ['top.foo'],
684 686 ... 'top.qux': ['top.foo']}
685 687 >>> print('\\n'.join(sorted(find_cycles(imports))))
686 688 top.bar -> top.baz -> top.foo -> top.bar
687 689 top.foo -> top.qux -> top.foo
688 690 """
689 691 cycles = set()
690 692 for mod in sorted(imports.keys()):
691 693 try:
692 694 checkmod(mod, imports)
693 695 except CircularImport as e:
694 696 cycle = e.args[0]
695 697 cycles.add(" -> ".join(rotatecycle(cycle)))
696 698 return cycles
697 699
698 700
699 701 def _cycle_sortkey(c):
700 702 return len(c), c
701 703
702 704
703 705 def embedded(f, modname, src):
704 706 """Extract embedded python code
705 707
706 708 >>> def _forcestr(thing):
707 709 ... if not isinstance(thing, str):
708 710 ... return thing.decode('ascii')
709 711 ... return thing
710 712 >>> def test(fn, lines):
711 713 ... for s, m, f, l in embedded(fn, b"example", lines):
712 714 ... print("%s %s %d" % (_forcestr(m), _forcestr(f), l))
713 715 ... print(repr(_forcestr(s)))
714 716 >>> lines = [
715 717 ... 'comment',
716 718 ... ' >>> from __future__ import print_function',
717 719 ... " >>> ' multiline",
718 720 ... " ... string'",
719 721 ... ' ',
720 722 ... 'comment',
721 723 ... ' $ cat > foo.py <<EOF',
722 724 ... ' > from __future__ import print_function',
723 725 ... ' > EOF',
724 726 ... ]
725 727 >>> test(b"example.t", lines)
726 728 example[2] doctest.py 1
727 729 "from __future__ import print_function\\n' multiline\\nstring'\\n\\n"
728 730 example[8] foo.py 7
729 731 'from __future__ import print_function\\n'
730 732 """
731 733 errors = []
732 734 for name, starts, ends, code in testparseutil.pyembedded(f, src, errors):
733 735 if not name:
734 736 # use 'doctest.py', in order to make already existing
735 737 # doctest above pass instantly
736 738 name = 'doctest.py'
737 739 # "starts" is "line number" (1-origin), but embedded() is
738 740 # expected to return "line offset" (0-origin). Therefore, this
739 741 # yields "starts - 1".
740 742 if not isinstance(modname, str):
741 743 modname = modname.decode('utf8')
742 744 yield code, "%s[%d]" % (modname, starts), name, starts - 1
743 745
744 746
745 747 def sources(f, modname):
746 748 """Yields possibly multiple sources from a filepath
747 749
748 750 input: filepath, modulename
749 751 yields: script(string), modulename, filepath, linenumber
750 752
751 753 For embedded scripts, the modulename and filepath will be different
752 754 from the function arguments. linenumber is an offset relative to
753 755 the input file.
754 756 """
755 757 py = False
756 758 if not f.endswith('.t'):
757 759 with open(f, 'rb') as src:
758 760 yield src.read(), modname, f, 0
759 761 py = True
760 762 if py or f.endswith('.t'):
761 763 # Strictly speaking we should sniff for the magic header that denotes
762 764 # Python source file encoding. But in reality we don't use anything
763 765 # other than ASCII (mainly) and UTF-8 (in a few exceptions), so
764 766 # simplicity is fine.
765 767 with io.open(f, 'r', encoding='utf-8') as src:
766 768 for script, modname, t, line in embedded(f, modname, src):
767 769 yield script, modname.encode('utf8'), t, line
768 770
769 771
770 772 def main(argv):
771 773 if len(argv) < 2 or (argv[1] == '-' and len(argv) > 2):
772 774 print('Usage: %s {-|file [file] [file] ...}')
773 775 return 1
774 776 if argv[1] == '-':
775 777 argv = argv[:1]
776 778 argv.extend(l.rstrip() for l in sys.stdin.readlines())
777 779 localmodpaths = {}
778 780 used_imports = {}
779 781 any_errors = False
780 782 for source_path in argv[1:]:
781 783 modname = dotted_name_of_path(source_path)
782 784 localmodpaths[modname] = source_path
783 785 localmods = populateextmods(localmodpaths)
784 786 for localmodname, source_path in sorted(localmodpaths.items()):
785 787 if not isinstance(localmodname, bytes):
786 788 # This is only safe because all hg's files are ascii
787 789 localmodname = localmodname.encode('ascii')
788 790 for src, modname, name, line in sources(source_path, localmodname):
789 791 try:
790 792 used_imports[modname] = sorted(
791 793 imported_modules(
792 794 src, modname, name, localmods, ignore_nested=True
793 795 )
794 796 )
795 797 for error, lineno in verify_import_convention(
796 798 modname, src, localmods
797 799 ):
798 800 any_errors = True
799 801 print('%s:%d: %s' % (source_path, lineno + line, error))
800 802 except SyntaxError as e:
801 803 print(
802 804 '%s:%d: SyntaxError: %s' % (source_path, e.lineno + line, e)
803 805 )
804 806 cycles = find_cycles(used_imports)
805 807 if cycles:
806 808 firstmods = set()
807 809 for c in sorted(cycles, key=_cycle_sortkey):
808 810 first = c.split()[0]
809 811 # As a rough cut, ignore any cycle that starts with the
810 812 # same module as some other cycle. Otherwise we see lots
811 813 # of cycles that are effectively duplicates.
812 814 if first in firstmods:
813 815 continue
814 816 print('Import cycle:', c)
815 817 firstmods.add(first)
816 818 any_errors = True
817 819 return any_errors != 0
818 820
819 821
820 822 if __name__ == '__main__':
821 823 sys.exit(int(main(sys.argv)))
General Comments 0
You need to be logged in to leave comments. Login now