##// END OF EJS Templates
import-checker: drop duplicate element from cycle...
Matt Mackall -
r24488:4b3fc460 default
parent child Browse files
Show More
@@ -1,251 +1,251
1 1 import ast
2 2 import os
3 3 import sys
4 4
5 5 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
6 6 # to work when run from a virtualenv. The modules were chosen empirically
7 7 # so that the return value matches the return value without virtualenv.
8 8 import BaseHTTPServer
9 9 import zlib
10 10
11 11 def dotted_name_of_path(path, trimpure=False):
12 12 """Given a relative path to a source file, return its dotted module name.
13 13
14 14 >>> dotted_name_of_path('mercurial/error.py')
15 15 'mercurial.error'
16 16 >>> dotted_name_of_path('mercurial/pure/parsers.py', trimpure=True)
17 17 'mercurial.parsers'
18 18 >>> dotted_name_of_path('zlibmodule.so')
19 19 'zlib'
20 20 """
21 21 parts = path.split('/')
22 22 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
23 23 if parts[-1].endswith('module'):
24 24 parts[-1] = parts[-1][:-6]
25 25 if trimpure:
26 26 return '.'.join(p for p in parts if p != 'pure')
27 27 return '.'.join(parts)
28 28
29 29
30 30 def list_stdlib_modules():
31 31 """List the modules present in the stdlib.
32 32
33 33 >>> mods = set(list_stdlib_modules())
34 34 >>> 'BaseHTTPServer' in mods
35 35 True
36 36
37 37 os.path isn't really a module, so it's missing:
38 38
39 39 >>> 'os.path' in mods
40 40 False
41 41
42 42 sys requires special treatment, because it's baked into the
43 43 interpreter, but it should still appear:
44 44
45 45 >>> 'sys' in mods
46 46 True
47 47
48 48 >>> 'collections' in mods
49 49 True
50 50
51 51 >>> 'cStringIO' in mods
52 52 True
53 53 """
54 54 for m in sys.builtin_module_names:
55 55 yield m
56 56 # These modules only exist on windows, but we should always
57 57 # consider them stdlib.
58 58 for m in ['msvcrt', '_winreg']:
59 59 yield m
60 60 # These get missed too
61 61 for m in 'ctypes', 'email':
62 62 yield m
63 63 yield 'builtins' # python3 only
64 64 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
65 65 # We need to supplement the list of prefixes for the search to work
66 66 # when run from within a virtualenv.
67 67 for mod in (BaseHTTPServer, zlib):
68 68 try:
69 69 # Not all module objects have a __file__ attribute.
70 70 filename = mod.__file__
71 71 except AttributeError:
72 72 continue
73 73 dirname = os.path.dirname(filename)
74 74 for prefix in stdlib_prefixes:
75 75 if dirname.startswith(prefix):
76 76 # Then this directory is redundant.
77 77 break
78 78 else:
79 79 stdlib_prefixes.add(dirname)
80 80 for libpath in sys.path:
81 81 # We want to walk everything in sys.path that starts with
82 82 # something in stdlib_prefixes. check-code suppressed because
83 83 # the ast module used by this script implies the availability
84 84 # of any().
85 85 if not any(libpath.startswith(p) for p in stdlib_prefixes): # no-py24
86 86 continue
87 87 if 'site-packages' in libpath:
88 88 continue
89 89 for top, dirs, files in os.walk(libpath):
90 90 for name in files:
91 91 if name == '__init__.py':
92 92 continue
93 93 if not (name.endswith('.py') or name.endswith('.so')):
94 94 continue
95 95 full_path = os.path.join(top, name)
96 96 if 'site-packages' in full_path:
97 97 continue
98 98 rel_path = full_path[len(libpath) + 1:]
99 99 mod = dotted_name_of_path(rel_path)
100 100 yield mod
101 101
102 102 stdlib_modules = set(list_stdlib_modules())
103 103
104 104 def imported_modules(source, ignore_nested=False):
105 105 """Given the source of a file as a string, yield the names
106 106 imported by that file.
107 107
108 108 Args:
109 109 source: The python source to examine as a string.
110 110 ignore_nested: If true, import statements that do not start in
111 111 column zero will be ignored.
112 112
113 113 Returns:
114 114 A list of module names imported by the given source.
115 115
116 116 >>> sorted(imported_modules(
117 117 ... 'import foo ; from baz import bar; import foo.qux'))
118 118 ['baz.bar', 'foo', 'foo.qux']
119 119 >>> sorted(imported_modules(
120 120 ... '''import foo
121 121 ... def wat():
122 122 ... import bar
123 123 ... ''', ignore_nested=True))
124 124 ['foo']
125 125 """
126 126 for node in ast.walk(ast.parse(source)):
127 127 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
128 128 continue
129 129 if isinstance(node, ast.Import):
130 130 for n in node.names:
131 131 yield n.name
132 132 elif isinstance(node, ast.ImportFrom):
133 133 prefix = node.module + '.'
134 134 for n in node.names:
135 135 yield prefix + n.name
136 136
137 137 def verify_stdlib_on_own_line(source):
138 138 """Given some python source, verify that stdlib imports are done
139 139 in separate statements from relative local module imports.
140 140
141 141 Observing this limitation is important as it works around an
142 142 annoying lib2to3 bug in relative import rewrites:
143 143 http://bugs.python.org/issue19510.
144 144
145 145 >>> list(verify_stdlib_on_own_line('import sys, foo'))
146 146 ['mixed imports\\n stdlib: sys\\n relative: foo']
147 147 >>> list(verify_stdlib_on_own_line('import sys, os'))
148 148 []
149 149 >>> list(verify_stdlib_on_own_line('import foo, bar'))
150 150 []
151 151 """
152 152 for node in ast.walk(ast.parse(source)):
153 153 if isinstance(node, ast.Import):
154 154 from_stdlib = {False: [], True: []}
155 155 for n in node.names:
156 156 from_stdlib[n.name in stdlib_modules].append(n.name)
157 157 if from_stdlib[True] and from_stdlib[False]:
158 158 yield ('mixed imports\n stdlib: %s\n relative: %s' %
159 159 (', '.join(sorted(from_stdlib[True])),
160 160 ', '.join(sorted(from_stdlib[False]))))
161 161
162 162 class CircularImport(Exception):
163 163 pass
164 164
165 165
166 166 def cyclekey(names):
167 return tuple(sorted(set(names)))
167 return tuple(sorted((names)))
168 168
169 169 def check_one_mod(mod, imports, path=None, ignore=None):
170 170 if path is None:
171 171 path = []
172 172 if ignore is None:
173 173 ignore = []
174 174 path = path + [mod]
175 175 for i in sorted(imports.get(mod, [])):
176 176 if i not in stdlib_modules and not i.startswith('mercurial.'):
177 177 i = mod.rsplit('.', 1)[0] + '.' + i
178 178 if i in path:
179 179 firstspot = path.index(i)
180 cycle = path[firstspot:] + [i]
180 cycle = path[firstspot:]
181 181 if cyclekey(cycle) not in ignore:
182 182 raise CircularImport(cycle)
183 183 continue
184 184 check_one_mod(i, imports, path=path, ignore=ignore)
185 185
186 186 def rotatecycle(cycle):
187 187 """arrange a cycle so that the lexicographically first module listed first
188 188
189 >>> rotatecycle(['foo', 'bar', 'foo'])
189 >>> rotatecycle(['foo', 'bar'])
190 190 ['bar', 'foo', 'bar']
191 191 """
192 192 lowest = min(cycle)
193 193 idx = cycle.index(lowest)
194 return cycle[idx:-1] + cycle[:idx] + [lowest]
194 return cycle[idx:] + cycle[:idx] + [lowest]
195 195
196 196 def find_cycles(imports):
197 197 """Find cycles in an already-loaded import graph.
198 198
199 199 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
200 200 ... 'top.bar': ['baz', 'sys'],
201 201 ... 'top.baz': ['foo'],
202 202 ... 'top.qux': ['foo']}
203 203 >>> print '\\n'.join(sorted(find_cycles(imports)))
204 204 top.bar -> top.baz -> top.foo -> top.bar
205 205 top.foo -> top.qux -> top.foo
206 206 """
207 207 cycles = {}
208 208 for mod in sorted(imports.iterkeys()):
209 209 try:
210 210 check_one_mod(mod, imports, ignore=cycles)
211 211 except CircularImport, e:
212 212 cycle = e.args[0]
213 213 cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle))
214 214 return cycles.values()
215 215
216 216 def _cycle_sortkey(c):
217 217 return len(c), c
218 218
219 219 def main(argv):
220 220 if len(argv) < 2:
221 221 print 'Usage: %s file [file] [file] ...'
222 222 return 1
223 223 used_imports = {}
224 224 any_errors = False
225 225 for source_path in argv[1:]:
226 226 f = open(source_path)
227 227 modname = dotted_name_of_path(source_path, trimpure=True)
228 228 src = f.read()
229 229 used_imports[modname] = sorted(
230 230 imported_modules(src, ignore_nested=True))
231 231 for error in verify_stdlib_on_own_line(src):
232 232 any_errors = True
233 233 print source_path, error
234 234 f.close()
235 235 cycles = find_cycles(used_imports)
236 236 if cycles:
237 237 firstmods = set()
238 238 for c in sorted(cycles, key=_cycle_sortkey):
239 239 first = c.split()[0]
240 240 # As a rough cut, ignore any cycle that starts with the
241 241 # same module as some other cycle. Otherwise we see lots
242 242 # of cycles that are effectively duplicates.
243 243 if first in firstmods:
244 244 continue
245 245 print 'Import cycle:', c
246 246 firstmods.add(first)
247 247 any_errors = True
248 248 return not any_errors
249 249
250 250 if __name__ == '__main__':
251 251 sys.exit(int(main(sys.argv)))
General Comments 0
You need to be logged in to leave comments. Login now