##// END OF EJS Templates
contrib: add an import checker...
Augie Fackler -
r20036:e5d51fa5 default
parent child Browse files
Show More
@@ -0,0 +1,195 b''
1 import ast
2 import os
3 import sys
4
5 def dotted_name_of_path(path):
6 """Given a relative path to a source file, return its dotted module name.
7
8
9 >>> dotted_name_of_path('mercurial/error.py')
10 'mercurial.error'
11 """
12 parts = path.split('/')
13 parts[-1] = parts[-1][:-3] # remove .py
14 return '.'.join(parts)
15
16
17 def list_stdlib_modules():
18 """List the modules present in the stdlib.
19
20 >>> mods = set(list_stdlib_modules())
21 >>> 'BaseHTTPServer' in mods
22 True
23
24 os.path isn't really a module, so it's missing:
25
26 >>> 'os.path' in mods
27 False
28
29 sys requires special treatment, because it's baked into the
30 interpreter, but it should still appear:
31
32 >>> 'sys' in mods
33 True
34
35 >>> 'collections' in mods
36 True
37
38 >>> 'cStringIO' in mods
39 True
40 """
41 for m in sys.builtin_module_names:
42 yield m
43 # These modules only exist on windows, but we should always
44 # consider them stdlib.
45 for m in ['msvcrt', '_winreg']:
46 yield m
47 # These get missed too
48 for m in 'ctypes', 'email':
49 yield m
50 yield 'builtins' # python3 only
51 for libpath in sys.path:
52 # We want to walk everything in sys.path that starts with
53 # either sys.prefix or sys.exec_prefix.
54 if not (libpath.startswith(sys.prefix)
55 or libpath.startswith(sys.exec_prefix)):
56 continue
57 if 'site-packages' in libpath:
58 continue
59 for top, dirs, files in os.walk(libpath):
60 for name in files:
61 if name == '__init__.py':
62 continue
63 if not (name.endswith('.py') or name.endswith('.so')):
64 continue
65 full_path = os.path.join(top, name)
66 if 'site-packages' in full_path:
67 continue
68 rel_path = full_path[len(libpath) + 1:]
69 mod = dotted_name_of_path(rel_path)
70 yield mod
71
72 stdlib_modules = set(list_stdlib_modules())
73
74 def imported_modules(source):
75 """Given the source of a file as a string, yield the names
76 imported by that file.
77
78 >>> list(imported_modules(
79 ... 'import foo ; from baz import bar; import foo.qux'))
80 ['foo', 'baz.bar', 'foo.qux']
81 """
82 for node in ast.walk(ast.parse(source)):
83 if isinstance(node, ast.Import):
84 for n in node.names:
85 yield n.name
86 elif isinstance(node, ast.ImportFrom):
87 prefix = node.module + '.'
88 for n in node.names:
89 yield prefix + n.name
90
91 def verify_stdlib_on_own_line(source):
92 """Given some python source, verify that stdlib imports are done
93 in separate statements from relative local module imports.
94
95 Observing this limitation is important as it works around an
96 annoying lib2to3 bug in relative import rewrites:
97 http://bugs.python.org/issue19510.
98
99 >>> list(verify_stdlib_on_own_line('import sys, foo'))
100 ['mixed stdlib and relative imports:\\n foo, sys']
101 >>> list(verify_stdlib_on_own_line('import sys, os'))
102 []
103 >>> list(verify_stdlib_on_own_line('import foo, bar'))
104 []
105 """
106 for node in ast.walk(ast.parse(source)):
107 if isinstance(node, ast.Import):
108 from_stdlib = {}
109 for n in node.names:
110 from_stdlib[n.name] = n.name in stdlib_modules
111 num_std = len([x for x in from_stdlib.values() if x])
112 if num_std not in (len(from_stdlib.values()), 0):
113 yield ('mixed stdlib and relative imports:\n %s' %
114 ', '.join(sorted(from_stdlib.iterkeys())))
115
116 class CircularImport(Exception):
117 pass
118
119
120 def cyclekey(names):
121 return tuple(sorted(set(names)))
122
123 def check_one_mod(mod, imports, path=None, ignore=None):
124 if path is None:
125 path = []
126 if ignore is None:
127 ignore = []
128 path = path + [mod]
129 for i in sorted(imports.get(mod, [])):
130 if i not in stdlib_modules:
131 i = mod.rsplit('.', 1)[0] + '.' + i
132 if i in path:
133 firstspot = path.index(i)
134 cycle = path[firstspot:] + [i]
135 if cyclekey(cycle) not in ignore:
136 raise CircularImport(cycle)
137 continue
138 check_one_mod(i, imports, path=path, ignore=ignore)
139
140
141 def find_cycles(imports):
142 """Find cycles in an already-loaded import graph.
143
144 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
145 ... 'top.bar': ['baz', 'sys'],
146 ... 'top.baz': ['foo'],
147 ... 'top.qux': ['foo']}
148 >>> print '\\n'.join(sorted(find_cycles(imports)))
149 top.bar -> top.baz -> top.foo -> top.bar
150 top.foo -> top.qux -> top.foo
151 """
152 cycles = {}
153 for mod in sorted(imports.iterkeys()):
154 try:
155 check_one_mod(mod, imports, ignore=cycles)
156 except CircularImport, e:
157 cycle = e.args[0]
158 cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle))
159 return cycles.values()
160
161 def _cycle_sortkey(c):
162 return len(c), c
163
164 def main(argv):
165 if len(argv) < 2:
166 print 'Usage: %s file [file] [file] ...'
167 return 1
168 used_imports = {}
169 any_errors = False
170 for source_path in argv[1:]:
171 f = open(source_path)
172 modname = dotted_name_of_path(source_path)
173 src = f.read()
174 used_imports[modname] = sorted(imported_modules(src))
175 for error in verify_stdlib_on_own_line(src):
176 any_errors = True
177 print source_path, error
178 f.close()
179 cycles = find_cycles(used_imports)
180 if cycles:
181 firstmods = set()
182 for c in sorted(cycles, key=_cycle_sortkey):
183 first = c.split()[0]
184 # As a rough cut, ignore any cycle that starts with the
185 # same module as some other cycle. Otherwise we see lots
186 # of cycles that are effectively duplicates.
187 if first in firstmods:
188 continue
189 print 'Import cycle:', c
190 firstmods.add(first)
191 any_errors = True
192 return not any_errors
193
194 if __name__ == '__main__':
195 sys.exit(int(main(sys.argv)))
General Comments 0
You need to be logged in to leave comments. Login now