##// END OF EJS Templates
import-checker: assume absolute and use modern import checker...
Gregory Szorc -
r49723:a52f5bfc default
parent child Browse files
Show More
@@ -1,824 +1,771 b''
1 1 #!/usr/bin/env python3
2 2
3 3 from __future__ import absolute_import, print_function
4 4
5 5 import ast
6 6 import collections
7 7 import io
8 8 import os
9 9 import sys
10 10
11 11 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
12 12 # to work when run from a virtualenv. The modules were chosen empirically
13 13 # so that the return value matches the return value without virtualenv.
14 14 if True: # disable lexical sorting checks
15 15 try:
16 16 import BaseHTTPServer as basehttpserver
17 17 except ImportError:
18 18 basehttpserver = None
19 19 import zlib
20 20
21 21 import testparseutil
22 22
23 23 # Whitelist of modules that symbols can be directly imported from.
24 24 allowsymbolimports = (
25 25 '__future__',
26 26 'breezy',
27 27 'concurrent',
28 28 'hgclient',
29 29 'mercurial',
30 30 'mercurial.hgweb.common',
31 31 'mercurial.hgweb.request',
32 32 'mercurial.i18n',
33 33 'mercurial.interfaces',
34 34 'mercurial.node',
35 35 'mercurial.pycompat',
36 36 # for revlog to re-export constant to extensions
37 37 'mercurial.revlogutils.constants',
38 38 'mercurial.revlogutils.flagutil',
39 39 # for cffi modules to re-export pure functions
40 40 'mercurial.pure.base85',
41 41 'mercurial.pure.bdiff',
42 42 'mercurial.pure.mpatch',
43 43 'mercurial.pure.osutil',
44 44 'mercurial.pure.parsers',
45 45 # third-party imports should be directly imported
46 46 'mercurial.thirdparty',
47 47 'mercurial.thirdparty.attr',
48 48 'mercurial.thirdparty.zope',
49 49 'mercurial.thirdparty.zope.interface',
50 50 )
51 51
52 52 # Whitelist of symbols that can be directly imported.
53 53 directsymbols = ('demandimport',)
54 54
55 55 # Modules that must be aliased because they are commonly confused with
56 56 # common variables and can create aliasing and readability issues.
57 57 requirealias = {
58 58 'ui': 'uimod',
59 59 }
60 60
61 61
62 def usingabsolute(root):
63 """Whether absolute imports are being used."""
64 if sys.version_info[0] >= 3:
65 return True
66
67 for node in ast.walk(root):
68 if isinstance(node, ast.ImportFrom):
69 if node.module == '__future__':
70 for n in node.names:
71 if n.name == 'absolute_import':
72 return True
73
74 return False
75
76
77 62 def walklocal(root):
78 63 """Recursively yield all descendant nodes but not in a different scope"""
79 64 todo = collections.deque(ast.iter_child_nodes(root))
80 65 yield root, False
81 66 while todo:
82 67 node = todo.popleft()
83 68 newscope = isinstance(node, ast.FunctionDef)
84 69 if not newscope:
85 70 todo.extend(ast.iter_child_nodes(node))
86 71 yield node, newscope
87 72
88 73
89 74 def dotted_name_of_path(path):
90 75 """Given a relative path to a source file, return its dotted module name.
91 76
92 77 >>> dotted_name_of_path('mercurial/error.py')
93 78 'mercurial.error'
94 79 >>> dotted_name_of_path('zlibmodule.so')
95 80 'zlib'
96 81 """
97 82 parts = path.replace(os.sep, '/').split('/')
98 83 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
99 84 if parts[-1].endswith('module'):
100 85 parts[-1] = parts[-1][:-6]
101 86 return '.'.join(parts)
102 87
103 88
104 89 def fromlocalfunc(modulename, localmods):
105 90 """Get a function to examine which locally defined module the
106 91 target source imports via a specified name.
107 92
108 93 `modulename` is an `dotted_name_of_path()`-ed source file path,
109 94 which may have `.__init__` at the end of it, of the target source.
110 95
111 96 `localmods` is a set of absolute `dotted_name_of_path()`-ed source file
112 97 paths of locally defined (= Mercurial specific) modules.
113 98
114 99 This function assumes that module names not existing in
115 100 `localmods` are from the Python standard library.
116 101
117 102 This function returns the function, which takes `name` argument,
118 103 and returns `(absname, dottedpath, hassubmod)` tuple if `name`
119 104 matches against locally defined module. Otherwise, it returns
120 105 False.
121 106
122 107 It is assumed that `name` doesn't have `.__init__`.
123 108
124 109 `absname` is an absolute module name of specified `name`
125 110 (e.g. "hgext.convert"). This can be used to compose prefix for sub
126 111 modules or so.
127 112
128 113 `dottedpath` is a `dotted_name_of_path()`-ed source file path
129 114 (e.g. "hgext.convert.__init__") of `name`. This is used to look
130 115 module up in `localmods` again.
131 116
132 117 `hassubmod` is whether it may have sub modules under it (for
133 118 convenient, even though this is also equivalent to "absname !=
134 119 dottednpath")
135 120
136 121 >>> localmods = {'foo.__init__', 'foo.foo1',
137 122 ... 'foo.bar.__init__', 'foo.bar.bar1',
138 123 ... 'baz.__init__', 'baz.baz1'}
139 124 >>> fromlocal = fromlocalfunc('foo.xxx', localmods)
140 125 >>> # relative
141 126 >>> fromlocal('foo1')
142 127 ('foo.foo1', 'foo.foo1', False)
143 128 >>> fromlocal('bar')
144 129 ('foo.bar', 'foo.bar.__init__', True)
145 130 >>> fromlocal('bar.bar1')
146 131 ('foo.bar.bar1', 'foo.bar.bar1', False)
147 132 >>> # absolute
148 133 >>> fromlocal('baz')
149 134 ('baz', 'baz.__init__', True)
150 135 >>> fromlocal('baz.baz1')
151 136 ('baz.baz1', 'baz.baz1', False)
152 137 >>> # unknown = maybe standard library
153 138 >>> fromlocal('os')
154 139 False
155 140 >>> fromlocal(None, 1)
156 141 ('foo', 'foo.__init__', True)
157 142 >>> fromlocal('foo1', 1)
158 143 ('foo.foo1', 'foo.foo1', False)
159 144 >>> fromlocal2 = fromlocalfunc('foo.xxx.yyy', localmods)
160 145 >>> fromlocal2(None, 2)
161 146 ('foo', 'foo.__init__', True)
162 147 >>> fromlocal2('bar2', 1)
163 148 False
164 149 >>> fromlocal2('bar', 2)
165 150 ('foo.bar', 'foo.bar.__init__', True)
166 151 """
167 152 if not isinstance(modulename, str):
168 153 modulename = modulename.decode('ascii')
169 154 prefix = '.'.join(modulename.split('.')[:-1])
170 155 if prefix:
171 156 prefix += '.'
172 157
173 158 def fromlocal(name, level=0):
174 159 # name is false value when relative imports are used.
175 160 if not name:
176 161 # If relative imports are used, level must not be absolute.
177 162 assert level > 0
178 163 candidates = ['.'.join(modulename.split('.')[:-level])]
179 164 else:
180 165 if not level:
181 166 # Check relative name first.
182 167 candidates = [prefix + name, name]
183 168 else:
184 169 candidates = [
185 170 '.'.join(modulename.split('.')[:-level]) + '.' + name
186 171 ]
187 172
188 173 for n in candidates:
189 174 if n in localmods:
190 175 return (n, n, False)
191 176 dottedpath = n + '.__init__'
192 177 if dottedpath in localmods:
193 178 return (n, dottedpath, True)
194 179 return False
195 180
196 181 return fromlocal
197 182
198 183
199 184 def populateextmods(localmods):
200 185 """Populate C extension modules based on pure modules"""
201 186 newlocalmods = set(localmods)
202 187 for n in localmods:
203 188 if n.startswith('mercurial.pure.'):
204 189 m = n[len('mercurial.pure.') :]
205 190 newlocalmods.add('mercurial.cext.' + m)
206 191 newlocalmods.add('mercurial.cffi._' + m)
207 192 return newlocalmods
208 193
209 194
210 195 def list_stdlib_modules():
211 196 """List the modules present in the stdlib.
212 197
213 198 >>> py3 = sys.version_info[0] >= 3
214 199 >>> mods = set(list_stdlib_modules())
215 200 >>> 'BaseHTTPServer' in mods or py3
216 201 True
217 202
218 203 os.path isn't really a module, so it's missing:
219 204
220 205 >>> 'os.path' in mods
221 206 False
222 207
223 208 sys requires special treatment, because it's baked into the
224 209 interpreter, but it should still appear:
225 210
226 211 >>> 'sys' in mods
227 212 True
228 213
229 214 >>> 'collections' in mods
230 215 True
231 216
232 217 >>> 'cStringIO' in mods or py3
233 218 True
234 219
235 220 >>> 'cffi' in mods
236 221 True
237 222 """
238 223 for m in sys.builtin_module_names:
239 224 yield m
240 225 # These modules only exist on windows, but we should always
241 226 # consider them stdlib.
242 227 for m in ['msvcrt', '_winreg']:
243 228 yield m
244 229 yield '__builtin__'
245 230 yield 'builtins' # python3 only
246 231 yield 'importlib.abc' # python3 only
247 232 yield 'importlib.machinery' # python3 only
248 233 yield 'importlib.util' # python3 only
249 234 for m in 'fcntl', 'grp', 'pwd', 'termios': # Unix only
250 235 yield m
251 236 for m in 'cPickle', 'datetime': # in Python (not C) on PyPy
252 237 yield m
253 238 for m in ['cffi']:
254 239 yield m
255 240 stdlib_prefixes = {sys.prefix, sys.exec_prefix}
256 241 # We need to supplement the list of prefixes for the search to work
257 242 # when run from within a virtualenv.
258 243 for mod in (basehttpserver, zlib):
259 244 if mod is None:
260 245 continue
261 246 try:
262 247 # Not all module objects have a __file__ attribute.
263 248 filename = mod.__file__
264 249 except AttributeError:
265 250 continue
266 251 dirname = os.path.dirname(filename)
267 252 for prefix in stdlib_prefixes:
268 253 if dirname.startswith(prefix):
269 254 # Then this directory is redundant.
270 255 break
271 256 else:
272 257 stdlib_prefixes.add(dirname)
273 258 sourceroot = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
274 259 for libpath in sys.path:
275 260 # We want to walk everything in sys.path that starts with something in
276 261 # stdlib_prefixes, but not directories from the hg sources.
277 262 if os.path.abspath(libpath).startswith(sourceroot) or not any(
278 263 libpath.startswith(p) for p in stdlib_prefixes
279 264 ):
280 265 continue
281 266 for top, dirs, files in os.walk(libpath):
282 267 if 'dist-packages' in top.split(os.path.sep):
283 268 continue
284 269 for i, d in reversed(list(enumerate(dirs))):
285 270 if (
286 271 not os.path.exists(os.path.join(top, d, '__init__.py'))
287 272 or top == libpath
288 273 and d in ('hgdemandimport', 'hgext', 'mercurial')
289 274 ):
290 275 del dirs[i]
291 276 for name in files:
292 277 if not name.endswith(('.py', '.so', '.pyc', '.pyo', '.pyd')):
293 278 continue
294 279 if name.startswith('__init__.py'):
295 280 full_path = top
296 281 else:
297 282 full_path = os.path.join(top, name)
298 283 rel_path = full_path[len(libpath) + 1 :]
299 284 mod = dotted_name_of_path(rel_path)
300 285 yield mod
301 286
302 287
303 288 stdlib_modules = set(list_stdlib_modules())
304 289
305 290
306 291 def imported_modules(source, modulename, f, localmods, ignore_nested=False):
307 292 """Given the source of a file as a string, yield the names
308 293 imported by that file.
309 294
310 295 Args:
311 296 source: The python source to examine as a string.
312 297 modulename: of specified python source (may have `__init__`)
313 298 localmods: set of locally defined module names (may have `__init__`)
314 299 ignore_nested: If true, import statements that do not start in
315 300 column zero will be ignored.
316 301
317 302 Returns:
318 303 A list of absolute module names imported by the given source.
319 304
320 305 >>> f = 'foo/xxx.py'
321 306 >>> modulename = 'foo.xxx'
322 307 >>> localmods = {'foo.__init__': True,
323 308 ... 'foo.foo1': True, 'foo.foo2': True,
324 309 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
325 310 ... 'baz.__init__': True, 'baz.baz1': True }
326 311 >>> # standard library (= not locally defined ones)
327 312 >>> sorted(imported_modules(
328 313 ... 'from stdlib1 import foo, bar; import stdlib2',
329 314 ... modulename, f, localmods))
330 315 []
331 316 >>> # relative importing
332 317 >>> sorted(imported_modules(
333 318 ... 'import foo1; from bar import bar1',
334 319 ... modulename, f, localmods))
335 320 ['foo.bar.bar1', 'foo.foo1']
336 321 >>> sorted(imported_modules(
337 322 ... 'from bar.bar1 import name1, name2, name3',
338 323 ... modulename, f, localmods))
339 324 ['foo.bar.bar1']
340 325 >>> # absolute importing
341 326 >>> sorted(imported_modules(
342 327 ... 'from baz import baz1, name1',
343 328 ... modulename, f, localmods))
344 329 ['baz.__init__', 'baz.baz1']
345 330 >>> # mixed importing, even though it shouldn't be recommended
346 331 >>> sorted(imported_modules(
347 332 ... 'import stdlib, foo1, baz',
348 333 ... modulename, f, localmods))
349 334 ['baz.__init__', 'foo.foo1']
350 335 >>> # ignore_nested
351 336 >>> sorted(imported_modules(
352 337 ... '''import foo
353 338 ... def wat():
354 339 ... import bar
355 340 ... ''', modulename, f, localmods))
356 341 ['foo.__init__', 'foo.bar.__init__']
357 342 >>> sorted(imported_modules(
358 343 ... '''import foo
359 344 ... def wat():
360 345 ... import bar
361 346 ... ''', modulename, f, localmods, ignore_nested=True))
362 347 ['foo.__init__']
363 348 """
364 349 fromlocal = fromlocalfunc(modulename, localmods)
365 350 for node in ast.walk(ast.parse(source, f)):
366 351 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
367 352 continue
368 353 if isinstance(node, ast.Import):
369 354 for n in node.names:
370 355 found = fromlocal(n.name)
371 356 if not found:
372 357 # this should import standard library
373 358 continue
374 359 yield found[1]
375 360 elif isinstance(node, ast.ImportFrom):
376 361 found = fromlocal(node.module, node.level)
377 362 if not found:
378 363 # this should import standard library
379 364 continue
380 365
381 366 absname, dottedpath, hassubmod = found
382 367 if not hassubmod:
383 368 # "dottedpath" is not a package; must be imported
384 369 yield dottedpath
385 370 # examination of "node.names" should be redundant
386 371 # e.g.: from mercurial.node import nullid, nullrev
387 372 continue
388 373
389 374 modnotfound = False
390 375 prefix = absname + '.'
391 376 for n in node.names:
392 377 found = fromlocal(prefix + n.name)
393 378 if not found:
394 379 # this should be a function or a property of "node.module"
395 380 modnotfound = True
396 381 continue
397 382 yield found[1]
398 383 if modnotfound and dottedpath != modulename:
399 384 # "dottedpath" is a package, but imported because of non-module
400 385 # lookup
401 386 # specifically allow "from . import foo" from __init__.py
402 387 yield dottedpath
403 388
404 389
405 390 def verify_import_convention(module, source, localmods):
406 """Verify imports match our established coding convention.
407
408 We have 2 conventions: legacy and modern. The modern convention is in
409 effect when using absolute imports.
391 """Verify imports match our established coding convention."""
392 root = ast.parse(source)
410 393
411 The legacy convention only looks for mixed imports. The modern convention
412 is much more thorough.
413 """
414 root = ast.parse(source)
415 absolute = usingabsolute(root)
416
417 if absolute:
418 394 return verify_modern_convention(module, root, localmods)
419 else:
420 return verify_stdlib_on_own_line(root)
421 395
422 396
423 397 def verify_modern_convention(module, root, localmods, root_col_offset=0):
424 398 """Verify a file conforms to the modern import convention rules.
425 399
426 400 The rules of the modern convention are:
427 401
428 402 * Ordering is stdlib followed by local imports. Each group is lexically
429 403 sorted.
430 404 * Importing multiple modules via "import X, Y" is not allowed: use
431 405 separate import statements.
432 406 * Importing multiple modules via "from X import ..." is allowed if using
433 407 parenthesis and one entry per line.
434 408 * Only 1 relative import statement per import level ("from .", "from ..")
435 409 is allowed.
436 410 * Relative imports from higher levels must occur before lower levels. e.g.
437 411 "from .." must be before "from .".
438 412 * Imports from peer packages should use relative import (e.g. do not
439 413 "import mercurial.foo" from a "mercurial.*" module).
440 414 * Symbols can only be imported from specific modules (see
441 415 `allowsymbolimports`). For other modules, first import the module then
442 416 assign the symbol to a module-level variable. In addition, these imports
443 417 must be performed before other local imports. This rule only
444 418 applies to import statements outside of any blocks.
445 419 * Relative imports from the standard library are not allowed, unless that
446 420 library is also a local module.
447 421 * Certain modules must be aliased to alternate names to avoid aliasing
448 422 and readability problems. See `requirealias`.
449 423 """
450 424 if not isinstance(module, str):
451 425 module = module.decode('ascii')
452 426 topmodule = module.split('.')[0]
453 427 fromlocal = fromlocalfunc(module, localmods)
454 428
455 429 # Whether a local/non-stdlib import has been performed.
456 430 seenlocal = None
457 431 # Whether a local/non-stdlib, non-symbol import has been seen.
458 432 seennonsymbollocal = False
459 433 # The last name to be imported (for sorting).
460 434 lastname = None
461 435 laststdlib = None
462 436 # Relative import levels encountered so far.
463 437 seenlevels = set()
464 438
465 439 for node, newscope in walklocal(root):
466 440
467 441 def msg(fmt, *args):
468 442 return (fmt % args, node.lineno)
469 443
470 444 if newscope:
471 445 # Check for local imports in function
472 446 for r in verify_modern_convention(
473 447 module, node, localmods, node.col_offset + 4
474 448 ):
475 449 yield r
476 450 elif isinstance(node, ast.Import):
477 451 # Disallow "import foo, bar" and require separate imports
478 452 # for each module.
479 453 if len(node.names) > 1:
480 454 yield msg(
481 455 'multiple imported names: %s',
482 456 ', '.join(n.name for n in node.names),
483 457 )
484 458
485 459 name = node.names[0].name
486 460 asname = node.names[0].asname
487 461
488 462 stdlib = name in stdlib_modules
489 463
490 464 # Ignore sorting rules on imports inside blocks.
491 465 if node.col_offset == root_col_offset:
492 466 if lastname and name < lastname and laststdlib == stdlib:
493 467 yield msg(
494 468 'imports not lexically sorted: %s < %s', name, lastname
495 469 )
496 470
497 471 lastname = name
498 472 laststdlib = stdlib
499 473
500 474 # stdlib imports should be before local imports.
501 475 if stdlib and seenlocal and node.col_offset == root_col_offset:
502 476 yield msg(
503 477 'stdlib import "%s" follows local import: %s',
504 478 name,
505 479 seenlocal,
506 480 )
507 481
508 482 if not stdlib:
509 483 seenlocal = name
510 484
511 485 # Import of sibling modules should use relative imports.
512 486 topname = name.split('.')[0]
513 487 if topname == topmodule:
514 488 yield msg('import should be relative: %s', name)
515 489
516 490 if name in requirealias and asname != requirealias[name]:
517 491 yield msg(
518 492 '%s module must be "as" aliased to %s',
519 493 name,
520 494 requirealias[name],
521 495 )
522 496
523 497 elif isinstance(node, ast.ImportFrom):
524 498 # Resolve the full imported module name.
525 499 if node.level > 0:
526 500 fullname = '.'.join(module.split('.')[: -node.level])
527 501 if node.module:
528 502 fullname += '.%s' % node.module
529 503 else:
530 504 assert node.module
531 505 fullname = node.module
532 506
533 507 topname = fullname.split('.')[0]
534 508 if topname == topmodule:
535 509 yield msg('import should be relative: %s', fullname)
536 510
537 511 # __future__ is special since it needs to come first and use
538 512 # symbol import.
539 513 if fullname != '__future__':
540 514 if not fullname or (
541 515 fullname in stdlib_modules
542 516 # allow standard 'from typing import ...' style
543 517 and fullname.startswith('.')
544 518 and fullname not in localmods
545 519 and fullname + '.__init__' not in localmods
546 520 ):
547 521 yield msg('relative import of stdlib module')
548 522 else:
549 523 seenlocal = fullname
550 524
551 525 # Direct symbol import is only allowed from certain modules and
552 526 # must occur before non-symbol imports.
553 527 found = fromlocal(node.module, node.level)
554 528 if found and found[2]: # node.module is a package
555 529 prefix = found[0] + '.'
556 530 symbols = (
557 531 n.name for n in node.names if not fromlocal(prefix + n.name)
558 532 )
559 533 else:
560 534 symbols = (n.name for n in node.names)
561 535 symbols = [sym for sym in symbols if sym not in directsymbols]
562 536 if node.module and node.col_offset == root_col_offset:
563 537 if symbols and fullname not in allowsymbolimports:
564 538 yield msg(
565 539 'direct symbol import %s from %s',
566 540 ', '.join(symbols),
567 541 fullname,
568 542 )
569 543
570 544 if symbols and seennonsymbollocal:
571 545 yield msg(
572 546 'symbol import follows non-symbol import: %s', fullname
573 547 )
574 548 if not symbols and fullname not in stdlib_modules:
575 549 seennonsymbollocal = True
576 550
577 551 if not node.module:
578 552 assert node.level
579 553
580 554 # Only allow 1 group per level.
581 555 if (
582 556 node.level in seenlevels
583 557 and node.col_offset == root_col_offset
584 558 ):
585 559 yield msg(
586 560 'multiple "from %s import" statements', '.' * node.level
587 561 )
588 562
589 563 # Higher-level groups come before lower-level groups.
590 564 if any(node.level > l for l in seenlevels):
591 565 yield msg(
592 566 'higher-level import should come first: %s', fullname
593 567 )
594 568
595 569 seenlevels.add(node.level)
596 570
597 571 # Entries in "from .X import ( ... )" lists must be lexically
598 572 # sorted.
599 573 lastentryname = None
600 574
601 575 for n in node.names:
602 576 if lastentryname and n.name < lastentryname:
603 577 yield msg(
604 578 'imports from %s not lexically sorted: %s < %s',
605 579 fullname,
606 580 n.name,
607 581 lastentryname,
608 582 )
609 583
610 584 lastentryname = n.name
611 585
612 586 if n.name in requirealias and n.asname != requirealias[n.name]:
613 587 yield msg(
614 588 '%s from %s must be "as" aliased to %s',
615 589 n.name,
616 590 fullname,
617 591 requirealias[n.name],
618 592 )
619 593
620 594
621 def verify_stdlib_on_own_line(root):
622 """Given some python source, verify that stdlib imports are done
623 in separate statements from relative local module imports.
624
625 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, foo')))
626 [('mixed imports\\n stdlib: sys\\n relative: foo', 1)]
627 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, os')))
628 []
629 >>> list(verify_stdlib_on_own_line(ast.parse('import foo, bar')))
630 []
631 """
632 for node in ast.walk(root):
633 if isinstance(node, ast.Import):
634 from_stdlib = {False: [], True: []}
635 for n in node.names:
636 from_stdlib[n.name in stdlib_modules].append(n.name)
637 if from_stdlib[True] and from_stdlib[False]:
638 yield (
639 'mixed imports\n stdlib: %s\n relative: %s'
640 % (
641 ', '.join(sorted(from_stdlib[True])),
642 ', '.join(sorted(from_stdlib[False])),
643 ),
644 node.lineno,
645 )
646
647
648 595 class CircularImport(Exception):
649 596 pass
650 597
651 598
652 599 def checkmod(mod, imports):
653 600 shortest = {}
654 601 visit = [[mod]]
655 602 while visit:
656 603 path = visit.pop(0)
657 604 for i in sorted(imports.get(path[-1], [])):
658 605 if len(path) < shortest.get(i, 1000):
659 606 shortest[i] = len(path)
660 607 if i in path:
661 608 if i == path[0]:
662 609 raise CircularImport(path)
663 610 continue
664 611 visit.append(path + [i])
665 612
666 613
667 614 def rotatecycle(cycle):
668 615 """arrange a cycle so that the lexicographically first module listed first
669 616
670 617 >>> rotatecycle(['foo', 'bar'])
671 618 ['bar', 'foo', 'bar']
672 619 """
673 620 lowest = min(cycle)
674 621 idx = cycle.index(lowest)
675 622 return cycle[idx:] + cycle[:idx] + [lowest]
676 623
677 624
678 625 def find_cycles(imports):
679 626 """Find cycles in an already-loaded import graph.
680 627
681 628 All module names recorded in `imports` should be absolute one.
682 629
683 630 >>> from __future__ import print_function
684 631 >>> imports = {'top.foo': ['top.bar', 'os.path', 'top.qux'],
685 632 ... 'top.bar': ['top.baz', 'sys'],
686 633 ... 'top.baz': ['top.foo'],
687 634 ... 'top.qux': ['top.foo']}
688 635 >>> print('\\n'.join(sorted(find_cycles(imports))))
689 636 top.bar -> top.baz -> top.foo -> top.bar
690 637 top.foo -> top.qux -> top.foo
691 638 """
692 639 cycles = set()
693 640 for mod in sorted(imports.keys()):
694 641 try:
695 642 checkmod(mod, imports)
696 643 except CircularImport as e:
697 644 cycle = e.args[0]
698 645 cycles.add(" -> ".join(rotatecycle(cycle)))
699 646 return cycles
700 647
701 648
702 649 def _cycle_sortkey(c):
703 650 return len(c), c
704 651
705 652
706 653 def embedded(f, modname, src):
707 654 """Extract embedded python code
708 655
709 656 >>> def _forcestr(thing):
710 657 ... if not isinstance(thing, str):
711 658 ... return thing.decode('ascii')
712 659 ... return thing
713 660 >>> def test(fn, lines):
714 661 ... for s, m, f, l in embedded(fn, b"example", lines):
715 662 ... print("%s %s %d" % (_forcestr(m), _forcestr(f), l))
716 663 ... print(repr(_forcestr(s)))
717 664 >>> lines = [
718 665 ... 'comment',
719 666 ... ' >>> from __future__ import print_function',
720 667 ... " >>> ' multiline",
721 668 ... " ... string'",
722 669 ... ' ',
723 670 ... 'comment',
724 671 ... ' $ cat > foo.py <<EOF',
725 672 ... ' > from __future__ import print_function',
726 673 ... ' > EOF',
727 674 ... ]
728 675 >>> test(b"example.t", lines)
729 676 example[2] doctest.py 1
730 677 "from __future__ import print_function\\n' multiline\\nstring'\\n\\n"
731 678 example[8] foo.py 7
732 679 'from __future__ import print_function\\n'
733 680 """
734 681 errors = []
735 682 for name, starts, ends, code in testparseutil.pyembedded(f, src, errors):
736 683 if not name:
737 684 # use 'doctest.py', in order to make already existing
738 685 # doctest above pass instantly
739 686 name = 'doctest.py'
740 687 # "starts" is "line number" (1-origin), but embedded() is
741 688 # expected to return "line offset" (0-origin). Therefore, this
742 689 # yields "starts - 1".
743 690 if not isinstance(modname, str):
744 691 modname = modname.decode('utf8')
745 692 yield code, "%s[%d]" % (modname, starts), name, starts - 1
746 693
747 694
748 695 def sources(f, modname):
749 696 """Yields possibly multiple sources from a filepath
750 697
751 698 input: filepath, modulename
752 699 yields: script(string), modulename, filepath, linenumber
753 700
754 701 For embedded scripts, the modulename and filepath will be different
755 702 from the function arguments. linenumber is an offset relative to
756 703 the input file.
757 704 """
758 705 py = False
759 706 if not f.endswith('.t'):
760 707 with open(f, 'rb') as src:
761 708 yield src.read(), modname, f, 0
762 709 py = True
763 710 if py or f.endswith('.t'):
764 711 # Strictly speaking we should sniff for the magic header that denotes
765 712 # Python source file encoding. But in reality we don't use anything
766 713 # other than ASCII (mainly) and UTF-8 (in a few exceptions), so
767 714 # simplicity is fine.
768 715 with io.open(f, 'r', encoding='utf-8') as src:
769 716 for script, modname, t, line in embedded(f, modname, src):
770 717 yield script, modname.encode('utf8'), t, line
771 718
772 719
773 720 def main(argv):
774 721 if len(argv) < 2 or (argv[1] == '-' and len(argv) > 2):
775 722 print('Usage: %s {-|file [file] [file] ...}')
776 723 return 1
777 724 if argv[1] == '-':
778 725 argv = argv[:1]
779 726 argv.extend(l.rstrip() for l in sys.stdin.readlines())
780 727 localmodpaths = {}
781 728 used_imports = {}
782 729 any_errors = False
783 730 for source_path in argv[1:]:
784 731 modname = dotted_name_of_path(source_path)
785 732 localmodpaths[modname] = source_path
786 733 localmods = populateextmods(localmodpaths)
787 734 for localmodname, source_path in sorted(localmodpaths.items()):
788 735 if not isinstance(localmodname, bytes):
789 736 # This is only safe because all hg's files are ascii
790 737 localmodname = localmodname.encode('ascii')
791 738 for src, modname, name, line in sources(source_path, localmodname):
792 739 try:
793 740 used_imports[modname] = sorted(
794 741 imported_modules(
795 742 src, modname, name, localmods, ignore_nested=True
796 743 )
797 744 )
798 745 for error, lineno in verify_import_convention(
799 746 modname, src, localmods
800 747 ):
801 748 any_errors = True
802 749 print('%s:%d: %s' % (source_path, lineno + line, error))
803 750 except SyntaxError as e:
804 751 print(
805 752 '%s:%d: SyntaxError: %s' % (source_path, e.lineno + line, e)
806 753 )
807 754 cycles = find_cycles(used_imports)
808 755 if cycles:
809 756 firstmods = set()
810 757 for c in sorted(cycles, key=_cycle_sortkey):
811 758 first = c.split()[0]
812 759 # As a rough cut, ignore any cycle that starts with the
813 760 # same module as some other cycle. Otherwise we see lots
814 761 # of cycles that are effectively duplicates.
815 762 if first in firstmods:
816 763 continue
817 764 print('Import cycle:', c)
818 765 firstmods.add(first)
819 766 any_errors = True
820 767 return any_errors != 0
821 768
822 769
823 770 if __name__ == '__main__':
824 771 sys.exit(int(main(sys.argv)))
General Comments 0
You need to be logged in to leave comments. Login now