##// END OF EJS Templates
import-checker: ignore nested imports
Augie Fackler -
r20037:957b4337 default
parent child Browse files
Show More
@@ -1,195 +1,212
1 1 import ast
2 2 import os
3 3 import sys
4 4
5 5 def dotted_name_of_path(path):
6 6 """Given a relative path to a source file, return its dotted module name.
7 7
8 8
9 9 >>> dotted_name_of_path('mercurial/error.py')
10 10 'mercurial.error'
11 11 """
12 12 parts = path.split('/')
13 13 parts[-1] = parts[-1][:-3] # remove .py
14 14 return '.'.join(parts)
15 15
16 16
17 17 def list_stdlib_modules():
18 18 """List the modules present in the stdlib.
19 19
20 20 >>> mods = set(list_stdlib_modules())
21 21 >>> 'BaseHTTPServer' in mods
22 22 True
23 23
24 24 os.path isn't really a module, so it's missing:
25 25
26 26 >>> 'os.path' in mods
27 27 False
28 28
29 29 sys requires special treatment, because it's baked into the
30 30 interpreter, but it should still appear:
31 31
32 32 >>> 'sys' in mods
33 33 True
34 34
35 35 >>> 'collections' in mods
36 36 True
37 37
38 38 >>> 'cStringIO' in mods
39 39 True
40 40 """
41 41 for m in sys.builtin_module_names:
42 42 yield m
43 43 # These modules only exist on windows, but we should always
44 44 # consider them stdlib.
45 45 for m in ['msvcrt', '_winreg']:
46 46 yield m
47 47 # These get missed too
48 48 for m in 'ctypes', 'email':
49 49 yield m
50 50 yield 'builtins' # python3 only
51 51 for libpath in sys.path:
52 52 # We want to walk everything in sys.path that starts with
53 53 # either sys.prefix or sys.exec_prefix.
54 54 if not (libpath.startswith(sys.prefix)
55 55 or libpath.startswith(sys.exec_prefix)):
56 56 continue
57 57 if 'site-packages' in libpath:
58 58 continue
59 59 for top, dirs, files in os.walk(libpath):
60 60 for name in files:
61 61 if name == '__init__.py':
62 62 continue
63 63 if not (name.endswith('.py') or name.endswith('.so')):
64 64 continue
65 65 full_path = os.path.join(top, name)
66 66 if 'site-packages' in full_path:
67 67 continue
68 68 rel_path = full_path[len(libpath) + 1:]
69 69 mod = dotted_name_of_path(rel_path)
70 70 yield mod
71 71
72 72 stdlib_modules = set(list_stdlib_modules())
73 73
74 def imported_modules(source):
74 def imported_modules(source, ignore_nested=False):
75 75 """Given the source of a file as a string, yield the names
76 76 imported by that file.
77 77
78 >>> list(imported_modules(
78 Args:
79 source: The python source to examine as a string.
80 ignore_nested: If true, import statements that do not start in
81 column zero will be ignored.
82
83 Returns:
84 A list of module names imported by the given source.
85
86 >>> sorted(imported_modules(
79 87 ... 'import foo ; from baz import bar; import foo.qux'))
80 ['foo', 'baz.bar', 'foo.qux']
88 ['baz.bar', 'foo', 'foo.qux']
89 >>> sorted(imported_modules(
90 ... '''import foo
91 ... def wat():
92 ... import bar
93 ... ''', ignore_nested=True))
94 ['foo']
81 95 """
82 96 for node in ast.walk(ast.parse(source)):
97 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
98 continue
83 99 if isinstance(node, ast.Import):
84 100 for n in node.names:
85 101 yield n.name
86 102 elif isinstance(node, ast.ImportFrom):
87 103 prefix = node.module + '.'
88 104 for n in node.names:
89 105 yield prefix + n.name
90 106
91 107 def verify_stdlib_on_own_line(source):
92 108 """Given some python source, verify that stdlib imports are done
93 109 in separate statements from relative local module imports.
94 110
95 111 Observing this limitation is important as it works around an
96 112 annoying lib2to3 bug in relative import rewrites:
97 113 http://bugs.python.org/issue19510.
98 114
99 115 >>> list(verify_stdlib_on_own_line('import sys, foo'))
100 116 ['mixed stdlib and relative imports:\\n foo, sys']
101 117 >>> list(verify_stdlib_on_own_line('import sys, os'))
102 118 []
103 119 >>> list(verify_stdlib_on_own_line('import foo, bar'))
104 120 []
105 121 """
106 122 for node in ast.walk(ast.parse(source)):
107 123 if isinstance(node, ast.Import):
108 124 from_stdlib = {}
109 125 for n in node.names:
110 126 from_stdlib[n.name] = n.name in stdlib_modules
111 127 num_std = len([x for x in from_stdlib.values() if x])
112 128 if num_std not in (len(from_stdlib.values()), 0):
113 129 yield ('mixed stdlib and relative imports:\n %s' %
114 130 ', '.join(sorted(from_stdlib.iterkeys())))
115 131
116 132 class CircularImport(Exception):
117 133 pass
118 134
119 135
120 136 def cyclekey(names):
121 137 return tuple(sorted(set(names)))
122 138
123 139 def check_one_mod(mod, imports, path=None, ignore=None):
124 140 if path is None:
125 141 path = []
126 142 if ignore is None:
127 143 ignore = []
128 144 path = path + [mod]
129 145 for i in sorted(imports.get(mod, [])):
130 146 if i not in stdlib_modules:
131 147 i = mod.rsplit('.', 1)[0] + '.' + i
132 148 if i in path:
133 149 firstspot = path.index(i)
134 150 cycle = path[firstspot:] + [i]
135 151 if cyclekey(cycle) not in ignore:
136 152 raise CircularImport(cycle)
137 153 continue
138 154 check_one_mod(i, imports, path=path, ignore=ignore)
139 155
140 156
141 157 def find_cycles(imports):
142 158 """Find cycles in an already-loaded import graph.
143 159
144 160 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
145 161 ... 'top.bar': ['baz', 'sys'],
146 162 ... 'top.baz': ['foo'],
147 163 ... 'top.qux': ['foo']}
148 164 >>> print '\\n'.join(sorted(find_cycles(imports)))
149 165 top.bar -> top.baz -> top.foo -> top.bar
150 166 top.foo -> top.qux -> top.foo
151 167 """
152 168 cycles = {}
153 169 for mod in sorted(imports.iterkeys()):
154 170 try:
155 171 check_one_mod(mod, imports, ignore=cycles)
156 172 except CircularImport, e:
157 173 cycle = e.args[0]
158 174 cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle))
159 175 return cycles.values()
160 176
161 177 def _cycle_sortkey(c):
162 178 return len(c), c
163 179
164 180 def main(argv):
165 181 if len(argv) < 2:
166 182 print 'Usage: %s file [file] [file] ...'
167 183 return 1
168 184 used_imports = {}
169 185 any_errors = False
170 186 for source_path in argv[1:]:
171 187 f = open(source_path)
172 188 modname = dotted_name_of_path(source_path)
173 189 src = f.read()
174 used_imports[modname] = sorted(imported_modules(src))
190 used_imports[modname] = sorted(
191 imported_modules(src, ignore_nested=True))
175 192 for error in verify_stdlib_on_own_line(src):
176 193 any_errors = True
177 194 print source_path, error
178 195 f.close()
179 196 cycles = find_cycles(used_imports)
180 197 if cycles:
181 198 firstmods = set()
182 199 for c in sorted(cycles, key=_cycle_sortkey):
183 200 first = c.split()[0]
184 201 # As a rough cut, ignore any cycle that starts with the
185 202 # same module as some other cycle. Otherwise we see lots
186 203 # of cycles that are effectively duplicates.
187 204 if first in firstmods:
188 205 continue
189 206 print 'Import cycle:', c
190 207 firstmods.add(first)
191 208 any_errors = True
192 209 return not any_errors
193 210
194 211 if __name__ == '__main__':
195 212 sys.exit(int(main(sys.argv)))
General Comments 0
You need to be logged in to leave comments. Login now