##// END OF EJS Templates
tests: fix builtin module test on pypy...
Maciej Fijalkowski -
r28713:806d260c default
parent child Browse files
Show More
@@ -1,606 +1,608
1 1 #!/usr/bin/env python
2 2
3 3 from __future__ import absolute_import, print_function
4 4
5 5 import ast
6 6 import collections
7 7 import os
8 8 import sys
9 9
10 10 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
11 11 # to work when run from a virtualenv. The modules were chosen empirically
12 12 # so that the return value matches the return value without virtualenv.
13 13 import BaseHTTPServer
14 14 import zlib
15 15
16 16 # Whitelist of modules that symbols can be directly imported from.
17 17 allowsymbolimports = (
18 18 '__future__',
19 19 'mercurial.hgweb.common',
20 20 'mercurial.hgweb.request',
21 21 'mercurial.i18n',
22 22 'mercurial.node',
23 23 )
24 24
25 25 # Modules that must be aliased because they are commonly confused with
26 26 # common variables and can create aliasing and readability issues.
27 27 requirealias = {
28 28 'ui': 'uimod',
29 29 }
30 30
31 31 def usingabsolute(root):
32 32 """Whether absolute imports are being used."""
33 33 if sys.version_info[0] >= 3:
34 34 return True
35 35
36 36 for node in ast.walk(root):
37 37 if isinstance(node, ast.ImportFrom):
38 38 if node.module == '__future__':
39 39 for n in node.names:
40 40 if n.name == 'absolute_import':
41 41 return True
42 42
43 43 return False
44 44
45 45 def walklocal(root):
46 46 """Recursively yield all descendant nodes but not in a different scope"""
47 47 todo = collections.deque(ast.iter_child_nodes(root))
48 48 yield root, False
49 49 while todo:
50 50 node = todo.popleft()
51 51 newscope = isinstance(node, ast.FunctionDef)
52 52 if not newscope:
53 53 todo.extend(ast.iter_child_nodes(node))
54 54 yield node, newscope
55 55
56 56 def dotted_name_of_path(path, trimpure=False):
57 57 """Given a relative path to a source file, return its dotted module name.
58 58
59 59 >>> dotted_name_of_path('mercurial/error.py')
60 60 'mercurial.error'
61 61 >>> dotted_name_of_path('mercurial/pure/parsers.py', trimpure=True)
62 62 'mercurial.parsers'
63 63 >>> dotted_name_of_path('zlibmodule.so')
64 64 'zlib'
65 65 """
66 66 parts = path.replace(os.sep, '/').split('/')
67 67 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
68 68 if parts[-1].endswith('module'):
69 69 parts[-1] = parts[-1][:-6]
70 70 if trimpure:
71 71 return '.'.join(p for p in parts if p != 'pure')
72 72 return '.'.join(parts)
73 73
74 74 def fromlocalfunc(modulename, localmods):
75 75 """Get a function to examine which locally defined module the
76 76 target source imports via a specified name.
77 77
78 78 `modulename` is an `dotted_name_of_path()`-ed source file path,
79 79 which may have `.__init__` at the end of it, of the target source.
80 80
81 81 `localmods` is a dict (or set), of which key is an absolute
82 82 `dotted_name_of_path()`-ed source file path of locally defined (=
83 83 Mercurial specific) modules.
84 84
85 85 This function assumes that module names not existing in
86 86 `localmods` are from the Python standard library.
87 87
88 88 This function returns the function, which takes `name` argument,
89 89 and returns `(absname, dottedpath, hassubmod)` tuple if `name`
90 90 matches against locally defined module. Otherwise, it returns
91 91 False.
92 92
93 93 It is assumed that `name` doesn't have `.__init__`.
94 94
95 95 `absname` is an absolute module name of specified `name`
96 96 (e.g. "hgext.convert"). This can be used to compose prefix for sub
97 97 modules or so.
98 98
99 99 `dottedpath` is a `dotted_name_of_path()`-ed source file path
100 100 (e.g. "hgext.convert.__init__") of `name`. This is used to look
101 101 module up in `localmods` again.
102 102
103 103 `hassubmod` is whether it may have sub modules under it (for
104 104 convenient, even though this is also equivalent to "absname !=
105 105 dottednpath")
106 106
107 107 >>> localmods = {'foo.__init__': True, 'foo.foo1': True,
108 108 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
109 109 ... 'baz.__init__': True, 'baz.baz1': True }
110 110 >>> fromlocal = fromlocalfunc('foo.xxx', localmods)
111 111 >>> # relative
112 112 >>> fromlocal('foo1')
113 113 ('foo.foo1', 'foo.foo1', False)
114 114 >>> fromlocal('bar')
115 115 ('foo.bar', 'foo.bar.__init__', True)
116 116 >>> fromlocal('bar.bar1')
117 117 ('foo.bar.bar1', 'foo.bar.bar1', False)
118 118 >>> # absolute
119 119 >>> fromlocal('baz')
120 120 ('baz', 'baz.__init__', True)
121 121 >>> fromlocal('baz.baz1')
122 122 ('baz.baz1', 'baz.baz1', False)
123 123 >>> # unknown = maybe standard library
124 124 >>> fromlocal('os')
125 125 False
126 126 >>> fromlocal(None, 1)
127 127 ('foo', 'foo.__init__', True)
128 128 >>> fromlocal2 = fromlocalfunc('foo.xxx.yyy', localmods)
129 129 >>> fromlocal2(None, 2)
130 130 ('foo', 'foo.__init__', True)
131 131 """
132 132 prefix = '.'.join(modulename.split('.')[:-1])
133 133 if prefix:
134 134 prefix += '.'
135 135 def fromlocal(name, level=0):
136 136 # name is None when relative imports are used.
137 137 if name is None:
138 138 # If relative imports are used, level must not be absolute.
139 139 assert level > 0
140 140 candidates = ['.'.join(modulename.split('.')[:-level])]
141 141 else:
142 142 # Check relative name first.
143 143 candidates = [prefix + name, name]
144 144
145 145 for n in candidates:
146 146 if n in localmods:
147 147 return (n, n, False)
148 148 dottedpath = n + '.__init__'
149 149 if dottedpath in localmods:
150 150 return (n, dottedpath, True)
151 151 return False
152 152 return fromlocal
153 153
154 154 def list_stdlib_modules():
155 155 """List the modules present in the stdlib.
156 156
157 157 >>> mods = set(list_stdlib_modules())
158 158 >>> 'BaseHTTPServer' in mods
159 159 True
160 160
161 161 os.path isn't really a module, so it's missing:
162 162
163 163 >>> 'os.path' in mods
164 164 False
165 165
166 166 sys requires special treatment, because it's baked into the
167 167 interpreter, but it should still appear:
168 168
169 169 >>> 'sys' in mods
170 170 True
171 171
172 172 >>> 'collections' in mods
173 173 True
174 174
175 175 >>> 'cStringIO' in mods
176 176 True
177 177 """
178 178 for m in sys.builtin_module_names:
179 179 yield m
180 180 # These modules only exist on windows, but we should always
181 181 # consider them stdlib.
182 182 for m in ['msvcrt', '_winreg']:
183 183 yield m
184 184 yield 'builtins' # python3 only
185 185 for m in 'fcntl', 'grp', 'pwd', 'termios': # Unix only
186 186 yield m
187 for m in 'cPickle', 'datetime': # in Python (not C) on PyPy
188 yield m
187 189 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
188 190 # We need to supplement the list of prefixes for the search to work
189 191 # when run from within a virtualenv.
190 192 for mod in (BaseHTTPServer, zlib):
191 193 try:
192 194 # Not all module objects have a __file__ attribute.
193 195 filename = mod.__file__
194 196 except AttributeError:
195 197 continue
196 198 dirname = os.path.dirname(filename)
197 199 for prefix in stdlib_prefixes:
198 200 if dirname.startswith(prefix):
199 201 # Then this directory is redundant.
200 202 break
201 203 else:
202 204 stdlib_prefixes.add(dirname)
203 205 for libpath in sys.path:
204 206 # We want to walk everything in sys.path that starts with
205 207 # something in stdlib_prefixes.
206 208 if not any(libpath.startswith(p) for p in stdlib_prefixes):
207 209 continue
208 210 for top, dirs, files in os.walk(libpath):
209 211 for i, d in reversed(list(enumerate(dirs))):
210 212 if (not os.path.exists(os.path.join(top, d, '__init__.py'))
211 213 or top == libpath and d in ('hgext', 'mercurial')):
212 214 del dirs[i]
213 215 for name in files:
214 216 if not name.endswith(('.py', '.so', '.pyc', '.pyo', '.pyd')):
215 217 continue
216 218 if name.startswith('__init__.py'):
217 219 full_path = top
218 220 else:
219 221 full_path = os.path.join(top, name)
220 222 rel_path = full_path[len(libpath) + 1:]
221 223 mod = dotted_name_of_path(rel_path)
222 224 yield mod
223 225
224 226 stdlib_modules = set(list_stdlib_modules())
225 227
226 228 def imported_modules(source, modulename, localmods, ignore_nested=False):
227 229 """Given the source of a file as a string, yield the names
228 230 imported by that file.
229 231
230 232 Args:
231 233 source: The python source to examine as a string.
232 234 modulename: of specified python source (may have `__init__`)
233 235 localmods: dict of locally defined module names (may have `__init__`)
234 236 ignore_nested: If true, import statements that do not start in
235 237 column zero will be ignored.
236 238
237 239 Returns:
238 240 A list of absolute module names imported by the given source.
239 241
240 242 >>> modulename = 'foo.xxx'
241 243 >>> localmods = {'foo.__init__': True,
242 244 ... 'foo.foo1': True, 'foo.foo2': True,
243 245 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
244 246 ... 'baz.__init__': True, 'baz.baz1': True }
245 247 >>> # standard library (= not locally defined ones)
246 248 >>> sorted(imported_modules(
247 249 ... 'from stdlib1 import foo, bar; import stdlib2',
248 250 ... modulename, localmods))
249 251 []
250 252 >>> # relative importing
251 253 >>> sorted(imported_modules(
252 254 ... 'import foo1; from bar import bar1',
253 255 ... modulename, localmods))
254 256 ['foo.bar.bar1', 'foo.foo1']
255 257 >>> sorted(imported_modules(
256 258 ... 'from bar.bar1 import name1, name2, name3',
257 259 ... modulename, localmods))
258 260 ['foo.bar.bar1']
259 261 >>> # absolute importing
260 262 >>> sorted(imported_modules(
261 263 ... 'from baz import baz1, name1',
262 264 ... modulename, localmods))
263 265 ['baz.__init__', 'baz.baz1']
264 266 >>> # mixed importing, even though it shouldn't be recommended
265 267 >>> sorted(imported_modules(
266 268 ... 'import stdlib, foo1, baz',
267 269 ... modulename, localmods))
268 270 ['baz.__init__', 'foo.foo1']
269 271 >>> # ignore_nested
270 272 >>> sorted(imported_modules(
271 273 ... '''import foo
272 274 ... def wat():
273 275 ... import bar
274 276 ... ''', modulename, localmods))
275 277 ['foo.__init__', 'foo.bar.__init__']
276 278 >>> sorted(imported_modules(
277 279 ... '''import foo
278 280 ... def wat():
279 281 ... import bar
280 282 ... ''', modulename, localmods, ignore_nested=True))
281 283 ['foo.__init__']
282 284 """
283 285 fromlocal = fromlocalfunc(modulename, localmods)
284 286 for node in ast.walk(ast.parse(source)):
285 287 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
286 288 continue
287 289 if isinstance(node, ast.Import):
288 290 for n in node.names:
289 291 found = fromlocal(n.name)
290 292 if not found:
291 293 # this should import standard library
292 294 continue
293 295 yield found[1]
294 296 elif isinstance(node, ast.ImportFrom):
295 297 found = fromlocal(node.module, node.level)
296 298 if not found:
297 299 # this should import standard library
298 300 continue
299 301
300 302 absname, dottedpath, hassubmod = found
301 303 if not hassubmod:
302 304 # "dottedpath" is not a package; must be imported
303 305 yield dottedpath
304 306 # examination of "node.names" should be redundant
305 307 # e.g.: from mercurial.node import nullid, nullrev
306 308 continue
307 309
308 310 modnotfound = False
309 311 prefix = absname + '.'
310 312 for n in node.names:
311 313 found = fromlocal(prefix + n.name)
312 314 if not found:
313 315 # this should be a function or a property of "node.module"
314 316 modnotfound = True
315 317 continue
316 318 yield found[1]
317 319 if modnotfound:
318 320 # "dottedpath" is a package, but imported because of non-module
319 321 # lookup
320 322 yield dottedpath
321 323
322 324 def verify_import_convention(module, source, localmods):
323 325 """Verify imports match our established coding convention.
324 326
325 327 We have 2 conventions: legacy and modern. The modern convention is in
326 328 effect when using absolute imports.
327 329
328 330 The legacy convention only looks for mixed imports. The modern convention
329 331 is much more thorough.
330 332 """
331 333 root = ast.parse(source)
332 334 absolute = usingabsolute(root)
333 335
334 336 if absolute:
335 337 return verify_modern_convention(module, root, localmods)
336 338 else:
337 339 return verify_stdlib_on_own_line(root)
338 340
339 341 def verify_modern_convention(module, root, localmods, root_col_offset=0):
340 342 """Verify a file conforms to the modern import convention rules.
341 343
342 344 The rules of the modern convention are:
343 345
344 346 * Ordering is stdlib followed by local imports. Each group is lexically
345 347 sorted.
346 348 * Importing multiple modules via "import X, Y" is not allowed: use
347 349 separate import statements.
348 350 * Importing multiple modules via "from X import ..." is allowed if using
349 351 parenthesis and one entry per line.
350 352 * Only 1 relative import statement per import level ("from .", "from ..")
351 353 is allowed.
352 354 * Relative imports from higher levels must occur before lower levels. e.g.
353 355 "from .." must be before "from .".
354 356 * Imports from peer packages should use relative import (e.g. do not
355 357 "import mercurial.foo" from a "mercurial.*" module).
356 358 * Symbols can only be imported from specific modules (see
357 359 `allowsymbolimports`). For other modules, first import the module then
358 360 assign the symbol to a module-level variable. In addition, these imports
359 361 must be performed before other relative imports. This rule only
360 362 applies to import statements outside of any blocks.
361 363 * Relative imports from the standard library are not allowed.
362 364 * Certain modules must be aliased to alternate names to avoid aliasing
363 365 and readability problems. See `requirealias`.
364 366 """
365 367 topmodule = module.split('.')[0]
366 368 fromlocal = fromlocalfunc(module, localmods)
367 369
368 370 # Whether a local/non-stdlib import has been performed.
369 371 seenlocal = None
370 372 # Whether a relative, non-symbol import has been seen.
371 373 seennonsymbolrelative = False
372 374 # The last name to be imported (for sorting).
373 375 lastname = None
374 376 # Relative import levels encountered so far.
375 377 seenlevels = set()
376 378
377 379 for node, newscope in walklocal(root):
378 380 def msg(fmt, *args):
379 381 return (fmt % args, node.lineno)
380 382 if newscope:
381 383 # Check for local imports in function
382 384 for r in verify_modern_convention(module, node, localmods,
383 385 node.col_offset + 4):
384 386 yield r
385 387 elif isinstance(node, ast.Import):
386 388 # Disallow "import foo, bar" and require separate imports
387 389 # for each module.
388 390 if len(node.names) > 1:
389 391 yield msg('multiple imported names: %s',
390 392 ', '.join(n.name for n in node.names))
391 393
392 394 name = node.names[0].name
393 395 asname = node.names[0].asname
394 396
395 397 # Ignore sorting rules on imports inside blocks.
396 398 if node.col_offset == root_col_offset:
397 399 if lastname and name < lastname:
398 400 yield msg('imports not lexically sorted: %s < %s',
399 401 name, lastname)
400 402
401 403 lastname = name
402 404
403 405 # stdlib imports should be before local imports.
404 406 stdlib = name in stdlib_modules
405 407 if stdlib and seenlocal and node.col_offset == root_col_offset:
406 408 yield msg('stdlib import "%s" follows local import: %s',
407 409 name, seenlocal)
408 410
409 411 if not stdlib:
410 412 seenlocal = name
411 413
412 414 # Import of sibling modules should use relative imports.
413 415 topname = name.split('.')[0]
414 416 if topname == topmodule:
415 417 yield msg('import should be relative: %s', name)
416 418
417 419 if name in requirealias and asname != requirealias[name]:
418 420 yield msg('%s module must be "as" aliased to %s',
419 421 name, requirealias[name])
420 422
421 423 elif isinstance(node, ast.ImportFrom):
422 424 # Resolve the full imported module name.
423 425 if node.level > 0:
424 426 fullname = '.'.join(module.split('.')[:-node.level])
425 427 if node.module:
426 428 fullname += '.%s' % node.module
427 429 else:
428 430 assert node.module
429 431 fullname = node.module
430 432
431 433 topname = fullname.split('.')[0]
432 434 if topname == topmodule:
433 435 yield msg('import should be relative: %s', fullname)
434 436
435 437 # __future__ is special since it needs to come first and use
436 438 # symbol import.
437 439 if fullname != '__future__':
438 440 if not fullname or fullname in stdlib_modules:
439 441 yield msg('relative import of stdlib module')
440 442 else:
441 443 seenlocal = fullname
442 444
443 445 # Direct symbol import is only allowed from certain modules and
444 446 # must occur before non-symbol imports.
445 447 if node.module and node.col_offset == root_col_offset:
446 448 found = fromlocal(node.module, node.level)
447 449 if found and found[2]: # node.module is a package
448 450 prefix = found[0] + '.'
449 451 symbols = [n.name for n in node.names
450 452 if not fromlocal(prefix + n.name)]
451 453 else:
452 454 symbols = [n.name for n in node.names]
453 455
454 456 if symbols and fullname not in allowsymbolimports:
455 457 yield msg('direct symbol import %s from %s',
456 458 ', '.join(symbols), fullname)
457 459
458 460 if symbols and seennonsymbolrelative:
459 461 yield msg('symbol import follows non-symbol import: %s',
460 462 fullname)
461 463
462 464 if not node.module:
463 465 assert node.level
464 466 seennonsymbolrelative = True
465 467
466 468 # Only allow 1 group per level.
467 469 if (node.level in seenlevels
468 470 and node.col_offset == root_col_offset):
469 471 yield msg('multiple "from %s import" statements',
470 472 '.' * node.level)
471 473
472 474 # Higher-level groups come before lower-level groups.
473 475 if any(node.level > l for l in seenlevels):
474 476 yield msg('higher-level import should come first: %s',
475 477 fullname)
476 478
477 479 seenlevels.add(node.level)
478 480
479 481 # Entries in "from .X import ( ... )" lists must be lexically
480 482 # sorted.
481 483 lastentryname = None
482 484
483 485 for n in node.names:
484 486 if lastentryname and n.name < lastentryname:
485 487 yield msg('imports from %s not lexically sorted: %s < %s',
486 488 fullname, n.name, lastentryname)
487 489
488 490 lastentryname = n.name
489 491
490 492 if n.name in requirealias and n.asname != requirealias[n.name]:
491 493 yield msg('%s from %s must be "as" aliased to %s',
492 494 n.name, fullname, requirealias[n.name])
493 495
494 496 def verify_stdlib_on_own_line(root):
495 497 """Given some python source, verify that stdlib imports are done
496 498 in separate statements from relative local module imports.
497 499
498 500 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, foo')))
499 501 [('mixed imports\\n stdlib: sys\\n relative: foo', 1)]
500 502 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, os')))
501 503 []
502 504 >>> list(verify_stdlib_on_own_line(ast.parse('import foo, bar')))
503 505 []
504 506 """
505 507 for node in ast.walk(root):
506 508 if isinstance(node, ast.Import):
507 509 from_stdlib = {False: [], True: []}
508 510 for n in node.names:
509 511 from_stdlib[n.name in stdlib_modules].append(n.name)
510 512 if from_stdlib[True] and from_stdlib[False]:
511 513 yield ('mixed imports\n stdlib: %s\n relative: %s' %
512 514 (', '.join(sorted(from_stdlib[True])),
513 515 ', '.join(sorted(from_stdlib[False]))), node.lineno)
514 516
515 517 class CircularImport(Exception):
516 518 pass
517 519
518 520 def checkmod(mod, imports):
519 521 shortest = {}
520 522 visit = [[mod]]
521 523 while visit:
522 524 path = visit.pop(0)
523 525 for i in sorted(imports.get(path[-1], [])):
524 526 if len(path) < shortest.get(i, 1000):
525 527 shortest[i] = len(path)
526 528 if i in path:
527 529 if i == path[0]:
528 530 raise CircularImport(path)
529 531 continue
530 532 visit.append(path + [i])
531 533
532 534 def rotatecycle(cycle):
533 535 """arrange a cycle so that the lexicographically first module listed first
534 536
535 537 >>> rotatecycle(['foo', 'bar'])
536 538 ['bar', 'foo', 'bar']
537 539 """
538 540 lowest = min(cycle)
539 541 idx = cycle.index(lowest)
540 542 return cycle[idx:] + cycle[:idx] + [lowest]
541 543
542 544 def find_cycles(imports):
543 545 """Find cycles in an already-loaded import graph.
544 546
545 547 All module names recorded in `imports` should be absolute one.
546 548
547 549 >>> from __future__ import print_function
548 550 >>> imports = {'top.foo': ['top.bar', 'os.path', 'top.qux'],
549 551 ... 'top.bar': ['top.baz', 'sys'],
550 552 ... 'top.baz': ['top.foo'],
551 553 ... 'top.qux': ['top.foo']}
552 554 >>> print('\\n'.join(sorted(find_cycles(imports))))
553 555 top.bar -> top.baz -> top.foo -> top.bar
554 556 top.foo -> top.qux -> top.foo
555 557 """
556 558 cycles = set()
557 559 for mod in sorted(imports.keys()):
558 560 try:
559 561 checkmod(mod, imports)
560 562 except CircularImport as e:
561 563 cycle = e.args[0]
562 564 cycles.add(" -> ".join(rotatecycle(cycle)))
563 565 return cycles
564 566
565 567 def _cycle_sortkey(c):
566 568 return len(c), c
567 569
568 570 def main(argv):
569 571 if len(argv) < 2 or (argv[1] == '-' and len(argv) > 2):
570 572 print('Usage: %s {-|file [file] [file] ...}')
571 573 return 1
572 574 if argv[1] == '-':
573 575 argv = argv[:1]
574 576 argv.extend(l.rstrip() for l in sys.stdin.readlines())
575 577 localmods = {}
576 578 used_imports = {}
577 579 any_errors = False
578 580 for source_path in argv[1:]:
579 581 modname = dotted_name_of_path(source_path, trimpure=True)
580 582 localmods[modname] = source_path
581 583 for modname, source_path in sorted(localmods.items()):
582 584 f = open(source_path)
583 585 src = f.read()
584 586 used_imports[modname] = sorted(
585 587 imported_modules(src, modname, localmods, ignore_nested=True))
586 588 for error, lineno in verify_import_convention(modname, src, localmods):
587 589 any_errors = True
588 590 print('%s:%d: %s' % (source_path, lineno, error))
589 591 f.close()
590 592 cycles = find_cycles(used_imports)
591 593 if cycles:
592 594 firstmods = set()
593 595 for c in sorted(cycles, key=_cycle_sortkey):
594 596 first = c.split()[0]
595 597 # As a rough cut, ignore any cycle that starts with the
596 598 # same module as some other cycle. Otherwise we see lots
597 599 # of cycles that are effectively duplicates.
598 600 if first in firstmods:
599 601 continue
600 602 print('Import cycle:', c)
601 603 firstmods.add(first)
602 604 any_errors = True
603 605 return any_errors != 0
604 606
605 607 if __name__ == '__main__':
606 608 sys.exit(int(main(sys.argv)))
General Comments 0
You need to be logged in to leave comments. Login now