##// END OF EJS Templates
import-checker: open all source files as utf-8...
Gregory Szorc -
r43733:a8454e84 stable
parent child Browse files
Show More
@@ -1,813 +1,818 b''
1 1 #!/usr/bin/env python
2 2
3 3 from __future__ import absolute_import, print_function
4 4
5 5 import ast
6 6 import collections
7 import io
7 8 import os
8 9 import sys
9 10
10 11 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
11 12 # to work when run from a virtualenv. The modules were chosen empirically
12 13 # so that the return value matches the return value without virtualenv.
13 14 if True: # disable lexical sorting checks
14 15 try:
15 16 import BaseHTTPServer as basehttpserver
16 17 except ImportError:
17 18 basehttpserver = None
18 19 import zlib
19 20
20 21 import testparseutil
21 22
22 23 # Whitelist of modules that symbols can be directly imported from.
23 24 allowsymbolimports = (
24 25 '__future__',
25 26 'bzrlib',
26 27 'hgclient',
27 28 'mercurial',
28 29 'mercurial.hgweb.common',
29 30 'mercurial.hgweb.request',
30 31 'mercurial.i18n',
31 32 'mercurial.interfaces',
32 33 'mercurial.node',
33 34 'mercurial.pycompat',
34 35 # for revlog to re-export constant to extensions
35 36 'mercurial.revlogutils.constants',
36 37 'mercurial.revlogutils.flagutil',
37 38 # for cffi modules to re-export pure functions
38 39 'mercurial.pure.base85',
39 40 'mercurial.pure.bdiff',
40 41 'mercurial.pure.mpatch',
41 42 'mercurial.pure.osutil',
42 43 'mercurial.pure.parsers',
43 44 # third-party imports should be directly imported
44 45 'mercurial.thirdparty',
45 46 'mercurial.thirdparty.attr',
46 47 'mercurial.thirdparty.zope',
47 48 'mercurial.thirdparty.zope.interface',
48 49 )
49 50
50 51 # Whitelist of symbols that can be directly imported.
51 52 directsymbols = ('demandimport',)
52 53
53 54 # Modules that must be aliased because they are commonly confused with
54 55 # common variables and can create aliasing and readability issues.
55 56 requirealias = {
56 57 'ui': 'uimod',
57 58 }
58 59
59 60
60 61 def usingabsolute(root):
61 62 """Whether absolute imports are being used."""
62 63 if sys.version_info[0] >= 3:
63 64 return True
64 65
65 66 for node in ast.walk(root):
66 67 if isinstance(node, ast.ImportFrom):
67 68 if node.module == '__future__':
68 69 for n in node.names:
69 70 if n.name == 'absolute_import':
70 71 return True
71 72
72 73 return False
73 74
74 75
75 76 def walklocal(root):
76 77 """Recursively yield all descendant nodes but not in a different scope"""
77 78 todo = collections.deque(ast.iter_child_nodes(root))
78 79 yield root, False
79 80 while todo:
80 81 node = todo.popleft()
81 82 newscope = isinstance(node, ast.FunctionDef)
82 83 if not newscope:
83 84 todo.extend(ast.iter_child_nodes(node))
84 85 yield node, newscope
85 86
86 87
87 88 def dotted_name_of_path(path):
88 89 """Given a relative path to a source file, return its dotted module name.
89 90
90 91 >>> dotted_name_of_path('mercurial/error.py')
91 92 'mercurial.error'
92 93 >>> dotted_name_of_path('zlibmodule.so')
93 94 'zlib'
94 95 """
95 96 parts = path.replace(os.sep, '/').split('/')
96 97 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
97 98 if parts[-1].endswith('module'):
98 99 parts[-1] = parts[-1][:-6]
99 100 return '.'.join(parts)
100 101
101 102
102 103 def fromlocalfunc(modulename, localmods):
103 104 """Get a function to examine which locally defined module the
104 105 target source imports via a specified name.
105 106
106 107 `modulename` is an `dotted_name_of_path()`-ed source file path,
107 108 which may have `.__init__` at the end of it, of the target source.
108 109
109 110 `localmods` is a set of absolute `dotted_name_of_path()`-ed source file
110 111 paths of locally defined (= Mercurial specific) modules.
111 112
112 113 This function assumes that module names not existing in
113 114 `localmods` are from the Python standard library.
114 115
115 116 This function returns the function, which takes `name` argument,
116 117 and returns `(absname, dottedpath, hassubmod)` tuple if `name`
117 118 matches against locally defined module. Otherwise, it returns
118 119 False.
119 120
120 121 It is assumed that `name` doesn't have `.__init__`.
121 122
122 123 `absname` is an absolute module name of specified `name`
123 124 (e.g. "hgext.convert"). This can be used to compose prefix for sub
124 125 modules or so.
125 126
126 127 `dottedpath` is a `dotted_name_of_path()`-ed source file path
127 128 (e.g. "hgext.convert.__init__") of `name`. This is used to look
128 129 module up in `localmods` again.
129 130
130 131 `hassubmod` is whether it may have sub modules under it (for
131 132 convenient, even though this is also equivalent to "absname !=
132 133 dottednpath")
133 134
134 135 >>> localmods = {'foo.__init__', 'foo.foo1',
135 136 ... 'foo.bar.__init__', 'foo.bar.bar1',
136 137 ... 'baz.__init__', 'baz.baz1'}
137 138 >>> fromlocal = fromlocalfunc('foo.xxx', localmods)
138 139 >>> # relative
139 140 >>> fromlocal('foo1')
140 141 ('foo.foo1', 'foo.foo1', False)
141 142 >>> fromlocal('bar')
142 143 ('foo.bar', 'foo.bar.__init__', True)
143 144 >>> fromlocal('bar.bar1')
144 145 ('foo.bar.bar1', 'foo.bar.bar1', False)
145 146 >>> # absolute
146 147 >>> fromlocal('baz')
147 148 ('baz', 'baz.__init__', True)
148 149 >>> fromlocal('baz.baz1')
149 150 ('baz.baz1', 'baz.baz1', False)
150 151 >>> # unknown = maybe standard library
151 152 >>> fromlocal('os')
152 153 False
153 154 >>> fromlocal(None, 1)
154 155 ('foo', 'foo.__init__', True)
155 156 >>> fromlocal('foo1', 1)
156 157 ('foo.foo1', 'foo.foo1', False)
157 158 >>> fromlocal2 = fromlocalfunc('foo.xxx.yyy', localmods)
158 159 >>> fromlocal2(None, 2)
159 160 ('foo', 'foo.__init__', True)
160 161 >>> fromlocal2('bar2', 1)
161 162 False
162 163 >>> fromlocal2('bar', 2)
163 164 ('foo.bar', 'foo.bar.__init__', True)
164 165 """
165 166 if not isinstance(modulename, str):
166 167 modulename = modulename.decode('ascii')
167 168 prefix = '.'.join(modulename.split('.')[:-1])
168 169 if prefix:
169 170 prefix += '.'
170 171
171 172 def fromlocal(name, level=0):
172 173 # name is false value when relative imports are used.
173 174 if not name:
174 175 # If relative imports are used, level must not be absolute.
175 176 assert level > 0
176 177 candidates = ['.'.join(modulename.split('.')[:-level])]
177 178 else:
178 179 if not level:
179 180 # Check relative name first.
180 181 candidates = [prefix + name, name]
181 182 else:
182 183 candidates = [
183 184 '.'.join(modulename.split('.')[:-level]) + '.' + name
184 185 ]
185 186
186 187 for n in candidates:
187 188 if n in localmods:
188 189 return (n, n, False)
189 190 dottedpath = n + '.__init__'
190 191 if dottedpath in localmods:
191 192 return (n, dottedpath, True)
192 193 return False
193 194
194 195 return fromlocal
195 196
196 197
197 198 def populateextmods(localmods):
198 199 """Populate C extension modules based on pure modules"""
199 200 newlocalmods = set(localmods)
200 201 for n in localmods:
201 202 if n.startswith('mercurial.pure.'):
202 203 m = n[len('mercurial.pure.') :]
203 204 newlocalmods.add('mercurial.cext.' + m)
204 205 newlocalmods.add('mercurial.cffi._' + m)
205 206 return newlocalmods
206 207
207 208
208 209 def list_stdlib_modules():
209 210 """List the modules present in the stdlib.
210 211
211 212 >>> py3 = sys.version_info[0] >= 3
212 213 >>> mods = set(list_stdlib_modules())
213 214 >>> 'BaseHTTPServer' in mods or py3
214 215 True
215 216
216 217 os.path isn't really a module, so it's missing:
217 218
218 219 >>> 'os.path' in mods
219 220 False
220 221
221 222 sys requires special treatment, because it's baked into the
222 223 interpreter, but it should still appear:
223 224
224 225 >>> 'sys' in mods
225 226 True
226 227
227 228 >>> 'collections' in mods
228 229 True
229 230
230 231 >>> 'cStringIO' in mods or py3
231 232 True
232 233
233 234 >>> 'cffi' in mods
234 235 True
235 236 """
236 237 for m in sys.builtin_module_names:
237 238 yield m
238 239 # These modules only exist on windows, but we should always
239 240 # consider them stdlib.
240 241 for m in ['msvcrt', '_winreg']:
241 242 yield m
242 243 yield '__builtin__'
243 244 yield 'builtins' # python3 only
244 245 yield 'importlib.abc' # python3 only
245 246 yield 'importlib.machinery' # python3 only
246 247 yield 'importlib.util' # python3 only
247 248 for m in 'fcntl', 'grp', 'pwd', 'termios': # Unix only
248 249 yield m
249 250 for m in 'cPickle', 'datetime': # in Python (not C) on PyPy
250 251 yield m
251 252 for m in ['cffi']:
252 253 yield m
253 254 stdlib_prefixes = {sys.prefix, sys.exec_prefix}
254 255 # We need to supplement the list of prefixes for the search to work
255 256 # when run from within a virtualenv.
256 257 for mod in (basehttpserver, zlib):
257 258 if mod is None:
258 259 continue
259 260 try:
260 261 # Not all module objects have a __file__ attribute.
261 262 filename = mod.__file__
262 263 except AttributeError:
263 264 continue
264 265 dirname = os.path.dirname(filename)
265 266 for prefix in stdlib_prefixes:
266 267 if dirname.startswith(prefix):
267 268 # Then this directory is redundant.
268 269 break
269 270 else:
270 271 stdlib_prefixes.add(dirname)
271 272 sourceroot = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
272 273 for libpath in sys.path:
273 274 # We want to walk everything in sys.path that starts with something in
274 275 # stdlib_prefixes, but not directories from the hg sources.
275 276 if os.path.abspath(libpath).startswith(sourceroot) or not any(
276 277 libpath.startswith(p) for p in stdlib_prefixes
277 278 ):
278 279 continue
279 280 for top, dirs, files in os.walk(libpath):
280 281 for i, d in reversed(list(enumerate(dirs))):
281 282 if (
282 283 not os.path.exists(os.path.join(top, d, '__init__.py'))
283 284 or top == libpath
284 285 and d in ('hgdemandimport', 'hgext', 'mercurial')
285 286 ):
286 287 del dirs[i]
287 288 for name in files:
288 289 if not name.endswith(('.py', '.so', '.pyc', '.pyo', '.pyd')):
289 290 continue
290 291 if name.startswith('__init__.py'):
291 292 full_path = top
292 293 else:
293 294 full_path = os.path.join(top, name)
294 295 rel_path = full_path[len(libpath) + 1 :]
295 296 mod = dotted_name_of_path(rel_path)
296 297 yield mod
297 298
298 299
299 300 stdlib_modules = set(list_stdlib_modules())
300 301
301 302
302 303 def imported_modules(source, modulename, f, localmods, ignore_nested=False):
303 304 """Given the source of a file as a string, yield the names
304 305 imported by that file.
305 306
306 307 Args:
307 308 source: The python source to examine as a string.
308 309 modulename: of specified python source (may have `__init__`)
309 310 localmods: set of locally defined module names (may have `__init__`)
310 311 ignore_nested: If true, import statements that do not start in
311 312 column zero will be ignored.
312 313
313 314 Returns:
314 315 A list of absolute module names imported by the given source.
315 316
316 317 >>> f = 'foo/xxx.py'
317 318 >>> modulename = 'foo.xxx'
318 319 >>> localmods = {'foo.__init__': True,
319 320 ... 'foo.foo1': True, 'foo.foo2': True,
320 321 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
321 322 ... 'baz.__init__': True, 'baz.baz1': True }
322 323 >>> # standard library (= not locally defined ones)
323 324 >>> sorted(imported_modules(
324 325 ... 'from stdlib1 import foo, bar; import stdlib2',
325 326 ... modulename, f, localmods))
326 327 []
327 328 >>> # relative importing
328 329 >>> sorted(imported_modules(
329 330 ... 'import foo1; from bar import bar1',
330 331 ... modulename, f, localmods))
331 332 ['foo.bar.bar1', 'foo.foo1']
332 333 >>> sorted(imported_modules(
333 334 ... 'from bar.bar1 import name1, name2, name3',
334 335 ... modulename, f, localmods))
335 336 ['foo.bar.bar1']
336 337 >>> # absolute importing
337 338 >>> sorted(imported_modules(
338 339 ... 'from baz import baz1, name1',
339 340 ... modulename, f, localmods))
340 341 ['baz.__init__', 'baz.baz1']
341 342 >>> # mixed importing, even though it shouldn't be recommended
342 343 >>> sorted(imported_modules(
343 344 ... 'import stdlib, foo1, baz',
344 345 ... modulename, f, localmods))
345 346 ['baz.__init__', 'foo.foo1']
346 347 >>> # ignore_nested
347 348 >>> sorted(imported_modules(
348 349 ... '''import foo
349 350 ... def wat():
350 351 ... import bar
351 352 ... ''', modulename, f, localmods))
352 353 ['foo.__init__', 'foo.bar.__init__']
353 354 >>> sorted(imported_modules(
354 355 ... '''import foo
355 356 ... def wat():
356 357 ... import bar
357 358 ... ''', modulename, f, localmods, ignore_nested=True))
358 359 ['foo.__init__']
359 360 """
360 361 fromlocal = fromlocalfunc(modulename, localmods)
361 362 for node in ast.walk(ast.parse(source, f)):
362 363 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
363 364 continue
364 365 if isinstance(node, ast.Import):
365 366 for n in node.names:
366 367 found = fromlocal(n.name)
367 368 if not found:
368 369 # this should import standard library
369 370 continue
370 371 yield found[1]
371 372 elif isinstance(node, ast.ImportFrom):
372 373 found = fromlocal(node.module, node.level)
373 374 if not found:
374 375 # this should import standard library
375 376 continue
376 377
377 378 absname, dottedpath, hassubmod = found
378 379 if not hassubmod:
379 380 # "dottedpath" is not a package; must be imported
380 381 yield dottedpath
381 382 # examination of "node.names" should be redundant
382 383 # e.g.: from mercurial.node import nullid, nullrev
383 384 continue
384 385
385 386 modnotfound = False
386 387 prefix = absname + '.'
387 388 for n in node.names:
388 389 found = fromlocal(prefix + n.name)
389 390 if not found:
390 391 # this should be a function or a property of "node.module"
391 392 modnotfound = True
392 393 continue
393 394 yield found[1]
394 395 if modnotfound:
395 396 # "dottedpath" is a package, but imported because of non-module
396 397 # lookup
397 398 yield dottedpath
398 399
399 400
400 401 def verify_import_convention(module, source, localmods):
401 402 """Verify imports match our established coding convention.
402 403
403 404 We have 2 conventions: legacy and modern. The modern convention is in
404 405 effect when using absolute imports.
405 406
406 407 The legacy convention only looks for mixed imports. The modern convention
407 408 is much more thorough.
408 409 """
409 410 root = ast.parse(source)
410 411 absolute = usingabsolute(root)
411 412
412 413 if absolute:
413 414 return verify_modern_convention(module, root, localmods)
414 415 else:
415 416 return verify_stdlib_on_own_line(root)
416 417
417 418
418 419 def verify_modern_convention(module, root, localmods, root_col_offset=0):
419 420 """Verify a file conforms to the modern import convention rules.
420 421
421 422 The rules of the modern convention are:
422 423
423 424 * Ordering is stdlib followed by local imports. Each group is lexically
424 425 sorted.
425 426 * Importing multiple modules via "import X, Y" is not allowed: use
426 427 separate import statements.
427 428 * Importing multiple modules via "from X import ..." is allowed if using
428 429 parenthesis and one entry per line.
429 430 * Only 1 relative import statement per import level ("from .", "from ..")
430 431 is allowed.
431 432 * Relative imports from higher levels must occur before lower levels. e.g.
432 433 "from .." must be before "from .".
433 434 * Imports from peer packages should use relative import (e.g. do not
434 435 "import mercurial.foo" from a "mercurial.*" module).
435 436 * Symbols can only be imported from specific modules (see
436 437 `allowsymbolimports`). For other modules, first import the module then
437 438 assign the symbol to a module-level variable. In addition, these imports
438 439 must be performed before other local imports. This rule only
439 440 applies to import statements outside of any blocks.
440 441 * Relative imports from the standard library are not allowed, unless that
441 442 library is also a local module.
442 443 * Certain modules must be aliased to alternate names to avoid aliasing
443 444 and readability problems. See `requirealias`.
444 445 """
445 446 if not isinstance(module, str):
446 447 module = module.decode('ascii')
447 448 topmodule = module.split('.')[0]
448 449 fromlocal = fromlocalfunc(module, localmods)
449 450
450 451 # Whether a local/non-stdlib import has been performed.
451 452 seenlocal = None
452 453 # Whether a local/non-stdlib, non-symbol import has been seen.
453 454 seennonsymbollocal = False
454 455 # The last name to be imported (for sorting).
455 456 lastname = None
456 457 laststdlib = None
457 458 # Relative import levels encountered so far.
458 459 seenlevels = set()
459 460
460 461 for node, newscope in walklocal(root):
461 462
462 463 def msg(fmt, *args):
463 464 return (fmt % args, node.lineno)
464 465
465 466 if newscope:
466 467 # Check for local imports in function
467 468 for r in verify_modern_convention(
468 469 module, node, localmods, node.col_offset + 4
469 470 ):
470 471 yield r
471 472 elif isinstance(node, ast.Import):
472 473 # Disallow "import foo, bar" and require separate imports
473 474 # for each module.
474 475 if len(node.names) > 1:
475 476 yield msg(
476 477 'multiple imported names: %s',
477 478 ', '.join(n.name for n in node.names),
478 479 )
479 480
480 481 name = node.names[0].name
481 482 asname = node.names[0].asname
482 483
483 484 stdlib = name in stdlib_modules
484 485
485 486 # Ignore sorting rules on imports inside blocks.
486 487 if node.col_offset == root_col_offset:
487 488 if lastname and name < lastname and laststdlib == stdlib:
488 489 yield msg(
489 490 'imports not lexically sorted: %s < %s', name, lastname
490 491 )
491 492
492 493 lastname = name
493 494 laststdlib = stdlib
494 495
495 496 # stdlib imports should be before local imports.
496 497 if stdlib and seenlocal and node.col_offset == root_col_offset:
497 498 yield msg(
498 499 'stdlib import "%s" follows local import: %s',
499 500 name,
500 501 seenlocal,
501 502 )
502 503
503 504 if not stdlib:
504 505 seenlocal = name
505 506
506 507 # Import of sibling modules should use relative imports.
507 508 topname = name.split('.')[0]
508 509 if topname == topmodule:
509 510 yield msg('import should be relative: %s', name)
510 511
511 512 if name in requirealias and asname != requirealias[name]:
512 513 yield msg(
513 514 '%s module must be "as" aliased to %s',
514 515 name,
515 516 requirealias[name],
516 517 )
517 518
518 519 elif isinstance(node, ast.ImportFrom):
519 520 # Resolve the full imported module name.
520 521 if node.level > 0:
521 522 fullname = '.'.join(module.split('.')[: -node.level])
522 523 if node.module:
523 524 fullname += '.%s' % node.module
524 525 else:
525 526 assert node.module
526 527 fullname = node.module
527 528
528 529 topname = fullname.split('.')[0]
529 530 if topname == topmodule:
530 531 yield msg('import should be relative: %s', fullname)
531 532
532 533 # __future__ is special since it needs to come first and use
533 534 # symbol import.
534 535 if fullname != '__future__':
535 536 if not fullname or (
536 537 fullname in stdlib_modules
537 538 and fullname not in localmods
538 539 and fullname + '.__init__' not in localmods
539 540 ):
540 541 yield msg('relative import of stdlib module')
541 542 else:
542 543 seenlocal = fullname
543 544
544 545 # Direct symbol import is only allowed from certain modules and
545 546 # must occur before non-symbol imports.
546 547 found = fromlocal(node.module, node.level)
547 548 if found and found[2]: # node.module is a package
548 549 prefix = found[0] + '.'
549 550 symbols = (
550 551 n.name for n in node.names if not fromlocal(prefix + n.name)
551 552 )
552 553 else:
553 554 symbols = (n.name for n in node.names)
554 555 symbols = [sym for sym in symbols if sym not in directsymbols]
555 556 if node.module and node.col_offset == root_col_offset:
556 557 if symbols and fullname not in allowsymbolimports:
557 558 yield msg(
558 559 'direct symbol import %s from %s',
559 560 ', '.join(symbols),
560 561 fullname,
561 562 )
562 563
563 564 if symbols and seennonsymbollocal:
564 565 yield msg(
565 566 'symbol import follows non-symbol import: %s', fullname
566 567 )
567 568 if not symbols and fullname not in stdlib_modules:
568 569 seennonsymbollocal = True
569 570
570 571 if not node.module:
571 572 assert node.level
572 573
573 574 # Only allow 1 group per level.
574 575 if (
575 576 node.level in seenlevels
576 577 and node.col_offset == root_col_offset
577 578 ):
578 579 yield msg(
579 580 'multiple "from %s import" statements', '.' * node.level
580 581 )
581 582
582 583 # Higher-level groups come before lower-level groups.
583 584 if any(node.level > l for l in seenlevels):
584 585 yield msg(
585 586 'higher-level import should come first: %s', fullname
586 587 )
587 588
588 589 seenlevels.add(node.level)
589 590
590 591 # Entries in "from .X import ( ... )" lists must be lexically
591 592 # sorted.
592 593 lastentryname = None
593 594
594 595 for n in node.names:
595 596 if lastentryname and n.name < lastentryname:
596 597 yield msg(
597 598 'imports from %s not lexically sorted: %s < %s',
598 599 fullname,
599 600 n.name,
600 601 lastentryname,
601 602 )
602 603
603 604 lastentryname = n.name
604 605
605 606 if n.name in requirealias and n.asname != requirealias[n.name]:
606 607 yield msg(
607 608 '%s from %s must be "as" aliased to %s',
608 609 n.name,
609 610 fullname,
610 611 requirealias[n.name],
611 612 )
612 613
613 614
614 615 def verify_stdlib_on_own_line(root):
615 616 """Given some python source, verify that stdlib imports are done
616 617 in separate statements from relative local module imports.
617 618
618 619 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, foo')))
619 620 [('mixed imports\\n stdlib: sys\\n relative: foo', 1)]
620 621 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, os')))
621 622 []
622 623 >>> list(verify_stdlib_on_own_line(ast.parse('import foo, bar')))
623 624 []
624 625 """
625 626 for node in ast.walk(root):
626 627 if isinstance(node, ast.Import):
627 628 from_stdlib = {False: [], True: []}
628 629 for n in node.names:
629 630 from_stdlib[n.name in stdlib_modules].append(n.name)
630 631 if from_stdlib[True] and from_stdlib[False]:
631 632 yield (
632 633 'mixed imports\n stdlib: %s\n relative: %s'
633 634 % (
634 635 ', '.join(sorted(from_stdlib[True])),
635 636 ', '.join(sorted(from_stdlib[False])),
636 637 ),
637 638 node.lineno,
638 639 )
639 640
640 641
641 642 class CircularImport(Exception):
642 643 pass
643 644
644 645
645 646 def checkmod(mod, imports):
646 647 shortest = {}
647 648 visit = [[mod]]
648 649 while visit:
649 650 path = visit.pop(0)
650 651 for i in sorted(imports.get(path[-1], [])):
651 652 if len(path) < shortest.get(i, 1000):
652 653 shortest[i] = len(path)
653 654 if i in path:
654 655 if i == path[0]:
655 656 raise CircularImport(path)
656 657 continue
657 658 visit.append(path + [i])
658 659
659 660
660 661 def rotatecycle(cycle):
661 662 """arrange a cycle so that the lexicographically first module listed first
662 663
663 664 >>> rotatecycle(['foo', 'bar'])
664 665 ['bar', 'foo', 'bar']
665 666 """
666 667 lowest = min(cycle)
667 668 idx = cycle.index(lowest)
668 669 return cycle[idx:] + cycle[:idx] + [lowest]
669 670
670 671
671 672 def find_cycles(imports):
672 673 """Find cycles in an already-loaded import graph.
673 674
674 675 All module names recorded in `imports` should be absolute one.
675 676
676 677 >>> from __future__ import print_function
677 678 >>> imports = {'top.foo': ['top.bar', 'os.path', 'top.qux'],
678 679 ... 'top.bar': ['top.baz', 'sys'],
679 680 ... 'top.baz': ['top.foo'],
680 681 ... 'top.qux': ['top.foo']}
681 682 >>> print('\\n'.join(sorted(find_cycles(imports))))
682 683 top.bar -> top.baz -> top.foo -> top.bar
683 684 top.foo -> top.qux -> top.foo
684 685 """
685 686 cycles = set()
686 687 for mod in sorted(imports.keys()):
687 688 try:
688 689 checkmod(mod, imports)
689 690 except CircularImport as e:
690 691 cycle = e.args[0]
691 692 cycles.add(" -> ".join(rotatecycle(cycle)))
692 693 return cycles
693 694
694 695
695 696 def _cycle_sortkey(c):
696 697 return len(c), c
697 698
698 699
699 700 def embedded(f, modname, src):
700 701 """Extract embedded python code
701 702
702 703 >>> def _forcestr(thing):
703 704 ... if not isinstance(thing, str):
704 705 ... return thing.decode('ascii')
705 706 ... return thing
706 707 >>> def test(fn, lines):
707 708 ... for s, m, f, l in embedded(fn, b"example", lines):
708 709 ... print("%s %s %d" % (_forcestr(m), _forcestr(f), l))
709 710 ... print(repr(_forcestr(s)))
710 711 >>> lines = [
711 712 ... 'comment',
712 713 ... ' >>> from __future__ import print_function',
713 714 ... " >>> ' multiline",
714 715 ... " ... string'",
715 716 ... ' ',
716 717 ... 'comment',
717 718 ... ' $ cat > foo.py <<EOF',
718 719 ... ' > from __future__ import print_function',
719 720 ... ' > EOF',
720 721 ... ]
721 722 >>> test(b"example.t", lines)
722 723 example[2] doctest.py 1
723 724 "from __future__ import print_function\\n' multiline\\nstring'\\n\\n"
724 725 example[8] foo.py 7
725 726 'from __future__ import print_function\\n'
726 727 """
727 728 errors = []
728 729 for name, starts, ends, code in testparseutil.pyembedded(f, src, errors):
729 730 if not name:
730 731 # use 'doctest.py', in order to make already existing
731 732 # doctest above pass instantly
732 733 name = 'doctest.py'
733 734 # "starts" is "line number" (1-origin), but embedded() is
734 735 # expected to return "line offset" (0-origin). Therefore, this
735 736 # yields "starts - 1".
736 737 if not isinstance(modname, str):
737 738 modname = modname.decode('utf8')
738 739 yield code, "%s[%d]" % (modname, starts), name, starts - 1
739 740
740 741
741 742 def sources(f, modname):
742 743 """Yields possibly multiple sources from a filepath
743 744
744 745 input: filepath, modulename
745 746 yields: script(string), modulename, filepath, linenumber
746 747
747 748 For embedded scripts, the modulename and filepath will be different
748 749 from the function arguments. linenumber is an offset relative to
749 750 the input file.
750 751 """
751 752 py = False
752 753 if not f.endswith('.t'):
753 754 with open(f, 'rb') as src:
754 755 yield src.read(), modname, f, 0
755 756 py = True
756 757 if py or f.endswith('.t'):
757 with open(f, 'r') as src:
758 # Strictly speaking we should sniff for the magic header that denotes
759 # Python source file encoding. But in reality we don't use anything
760 # other than ASCII (mainly) and UTF-8 (in a few exceptions), so
761 # simplicity is fine.
762 with io.open(f, 'r', encoding='utf-8') as src:
758 763 for script, modname, t, line in embedded(f, modname, src):
759 764 yield script, modname.encode('utf8'), t, line
760 765
761 766
762 767 def main(argv):
763 768 if len(argv) < 2 or (argv[1] == '-' and len(argv) > 2):
764 769 print('Usage: %s {-|file [file] [file] ...}')
765 770 return 1
766 771 if argv[1] == '-':
767 772 argv = argv[:1]
768 773 argv.extend(l.rstrip() for l in sys.stdin.readlines())
769 774 localmodpaths = {}
770 775 used_imports = {}
771 776 any_errors = False
772 777 for source_path in argv[1:]:
773 778 modname = dotted_name_of_path(source_path)
774 779 localmodpaths[modname] = source_path
775 780 localmods = populateextmods(localmodpaths)
776 781 for localmodname, source_path in sorted(localmodpaths.items()):
777 782 if not isinstance(localmodname, bytes):
778 783 # This is only safe because all hg's files are ascii
779 784 localmodname = localmodname.encode('ascii')
780 785 for src, modname, name, line in sources(source_path, localmodname):
781 786 try:
782 787 used_imports[modname] = sorted(
783 788 imported_modules(
784 789 src, modname, name, localmods, ignore_nested=True
785 790 )
786 791 )
787 792 for error, lineno in verify_import_convention(
788 793 modname, src, localmods
789 794 ):
790 795 any_errors = True
791 796 print('%s:%d: %s' % (source_path, lineno + line, error))
792 797 except SyntaxError as e:
793 798 print(
794 799 '%s:%d: SyntaxError: %s' % (source_path, e.lineno + line, e)
795 800 )
796 801 cycles = find_cycles(used_imports)
797 802 if cycles:
798 803 firstmods = set()
799 804 for c in sorted(cycles, key=_cycle_sortkey):
800 805 first = c.split()[0]
801 806 # As a rough cut, ignore any cycle that starts with the
802 807 # same module as some other cycle. Otherwise we see lots
803 808 # of cycles that are effectively duplicates.
804 809 if first in firstmods:
805 810 continue
806 811 print('Import cycle:', c)
807 812 firstmods.add(first)
808 813 any_errors = True
809 814 return any_errors != 0
810 815
811 816
812 817 if __name__ == '__main__':
813 818 sys.exit(int(main(sys.argv)))
General Comments 0
You need to be logged in to leave comments. Login now