##// END OF EJS Templates
import-checker: rotatecycle is actually the canonical cycle key...
Matt Mackall -
r24491:784b278b default
parent child Browse files
Show More
@@ -1,250 +1,246
1 import ast
1 import ast
2 import os
2 import os
3 import sys
3 import sys
4
4
5 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
5 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
6 # to work when run from a virtualenv. The modules were chosen empirically
6 # to work when run from a virtualenv. The modules were chosen empirically
7 # so that the return value matches the return value without virtualenv.
7 # so that the return value matches the return value without virtualenv.
8 import BaseHTTPServer
8 import BaseHTTPServer
9 import zlib
9 import zlib
10
10
11 def dotted_name_of_path(path, trimpure=False):
11 def dotted_name_of_path(path, trimpure=False):
12 """Given a relative path to a source file, return its dotted module name.
12 """Given a relative path to a source file, return its dotted module name.
13
13
14 >>> dotted_name_of_path('mercurial/error.py')
14 >>> dotted_name_of_path('mercurial/error.py')
15 'mercurial.error'
15 'mercurial.error'
16 >>> dotted_name_of_path('mercurial/pure/parsers.py', trimpure=True)
16 >>> dotted_name_of_path('mercurial/pure/parsers.py', trimpure=True)
17 'mercurial.parsers'
17 'mercurial.parsers'
18 >>> dotted_name_of_path('zlibmodule.so')
18 >>> dotted_name_of_path('zlibmodule.so')
19 'zlib'
19 'zlib'
20 """
20 """
21 parts = path.split('/')
21 parts = path.split('/')
22 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
22 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
23 if parts[-1].endswith('module'):
23 if parts[-1].endswith('module'):
24 parts[-1] = parts[-1][:-6]
24 parts[-1] = parts[-1][:-6]
25 if trimpure:
25 if trimpure:
26 return '.'.join(p for p in parts if p != 'pure')
26 return '.'.join(p for p in parts if p != 'pure')
27 return '.'.join(parts)
27 return '.'.join(parts)
28
28
29
29
30 def list_stdlib_modules():
30 def list_stdlib_modules():
31 """List the modules present in the stdlib.
31 """List the modules present in the stdlib.
32
32
33 >>> mods = set(list_stdlib_modules())
33 >>> mods = set(list_stdlib_modules())
34 >>> 'BaseHTTPServer' in mods
34 >>> 'BaseHTTPServer' in mods
35 True
35 True
36
36
37 os.path isn't really a module, so it's missing:
37 os.path isn't really a module, so it's missing:
38
38
39 >>> 'os.path' in mods
39 >>> 'os.path' in mods
40 False
40 False
41
41
42 sys requires special treatment, because it's baked into the
42 sys requires special treatment, because it's baked into the
43 interpreter, but it should still appear:
43 interpreter, but it should still appear:
44
44
45 >>> 'sys' in mods
45 >>> 'sys' in mods
46 True
46 True
47
47
48 >>> 'collections' in mods
48 >>> 'collections' in mods
49 True
49 True
50
50
51 >>> 'cStringIO' in mods
51 >>> 'cStringIO' in mods
52 True
52 True
53 """
53 """
54 for m in sys.builtin_module_names:
54 for m in sys.builtin_module_names:
55 yield m
55 yield m
56 # These modules only exist on windows, but we should always
56 # These modules only exist on windows, but we should always
57 # consider them stdlib.
57 # consider them stdlib.
58 for m in ['msvcrt', '_winreg']:
58 for m in ['msvcrt', '_winreg']:
59 yield m
59 yield m
60 # These get missed too
60 # These get missed too
61 for m in 'ctypes', 'email':
61 for m in 'ctypes', 'email':
62 yield m
62 yield m
63 yield 'builtins' # python3 only
63 yield 'builtins' # python3 only
64 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
64 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
65 # We need to supplement the list of prefixes for the search to work
65 # We need to supplement the list of prefixes for the search to work
66 # when run from within a virtualenv.
66 # when run from within a virtualenv.
67 for mod in (BaseHTTPServer, zlib):
67 for mod in (BaseHTTPServer, zlib):
68 try:
68 try:
69 # Not all module objects have a __file__ attribute.
69 # Not all module objects have a __file__ attribute.
70 filename = mod.__file__
70 filename = mod.__file__
71 except AttributeError:
71 except AttributeError:
72 continue
72 continue
73 dirname = os.path.dirname(filename)
73 dirname = os.path.dirname(filename)
74 for prefix in stdlib_prefixes:
74 for prefix in stdlib_prefixes:
75 if dirname.startswith(prefix):
75 if dirname.startswith(prefix):
76 # Then this directory is redundant.
76 # Then this directory is redundant.
77 break
77 break
78 else:
78 else:
79 stdlib_prefixes.add(dirname)
79 stdlib_prefixes.add(dirname)
80 for libpath in sys.path:
80 for libpath in sys.path:
81 # We want to walk everything in sys.path that starts with
81 # We want to walk everything in sys.path that starts with
82 # something in stdlib_prefixes. check-code suppressed because
82 # something in stdlib_prefixes. check-code suppressed because
83 # the ast module used by this script implies the availability
83 # the ast module used by this script implies the availability
84 # of any().
84 # of any().
85 if not any(libpath.startswith(p) for p in stdlib_prefixes): # no-py24
85 if not any(libpath.startswith(p) for p in stdlib_prefixes): # no-py24
86 continue
86 continue
87 if 'site-packages' in libpath:
87 if 'site-packages' in libpath:
88 continue
88 continue
89 for top, dirs, files in os.walk(libpath):
89 for top, dirs, files in os.walk(libpath):
90 for name in files:
90 for name in files:
91 if name == '__init__.py':
91 if name == '__init__.py':
92 continue
92 continue
93 if not (name.endswith('.py') or name.endswith('.so')):
93 if not (name.endswith('.py') or name.endswith('.so')):
94 continue
94 continue
95 full_path = os.path.join(top, name)
95 full_path = os.path.join(top, name)
96 if 'site-packages' in full_path:
96 if 'site-packages' in full_path:
97 continue
97 continue
98 rel_path = full_path[len(libpath) + 1:]
98 rel_path = full_path[len(libpath) + 1:]
99 mod = dotted_name_of_path(rel_path)
99 mod = dotted_name_of_path(rel_path)
100 yield mod
100 yield mod
101
101
102 stdlib_modules = set(list_stdlib_modules())
102 stdlib_modules = set(list_stdlib_modules())
103
103
104 def imported_modules(source, ignore_nested=False):
104 def imported_modules(source, ignore_nested=False):
105 """Given the source of a file as a string, yield the names
105 """Given the source of a file as a string, yield the names
106 imported by that file.
106 imported by that file.
107
107
108 Args:
108 Args:
109 source: The python source to examine as a string.
109 source: The python source to examine as a string.
110 ignore_nested: If true, import statements that do not start in
110 ignore_nested: If true, import statements that do not start in
111 column zero will be ignored.
111 column zero will be ignored.
112
112
113 Returns:
113 Returns:
114 A list of module names imported by the given source.
114 A list of module names imported by the given source.
115
115
116 >>> sorted(imported_modules(
116 >>> sorted(imported_modules(
117 ... 'import foo ; from baz import bar; import foo.qux'))
117 ... 'import foo ; from baz import bar; import foo.qux'))
118 ['baz.bar', 'foo', 'foo.qux']
118 ['baz.bar', 'foo', 'foo.qux']
119 >>> sorted(imported_modules(
119 >>> sorted(imported_modules(
120 ... '''import foo
120 ... '''import foo
121 ... def wat():
121 ... def wat():
122 ... import bar
122 ... import bar
123 ... ''', ignore_nested=True))
123 ... ''', ignore_nested=True))
124 ['foo']
124 ['foo']
125 """
125 """
126 for node in ast.walk(ast.parse(source)):
126 for node in ast.walk(ast.parse(source)):
127 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
127 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
128 continue
128 continue
129 if isinstance(node, ast.Import):
129 if isinstance(node, ast.Import):
130 for n in node.names:
130 for n in node.names:
131 yield n.name
131 yield n.name
132 elif isinstance(node, ast.ImportFrom):
132 elif isinstance(node, ast.ImportFrom):
133 prefix = node.module + '.'
133 prefix = node.module + '.'
134 for n in node.names:
134 for n in node.names:
135 yield prefix + n.name
135 yield prefix + n.name
136
136
137 def verify_stdlib_on_own_line(source):
137 def verify_stdlib_on_own_line(source):
138 """Given some python source, verify that stdlib imports are done
138 """Given some python source, verify that stdlib imports are done
139 in separate statements from relative local module imports.
139 in separate statements from relative local module imports.
140
140
141 Observing this limitation is important as it works around an
141 Observing this limitation is important as it works around an
142 annoying lib2to3 bug in relative import rewrites:
142 annoying lib2to3 bug in relative import rewrites:
143 http://bugs.python.org/issue19510.
143 http://bugs.python.org/issue19510.
144
144
145 >>> list(verify_stdlib_on_own_line('import sys, foo'))
145 >>> list(verify_stdlib_on_own_line('import sys, foo'))
146 ['mixed imports\\n stdlib: sys\\n relative: foo']
146 ['mixed imports\\n stdlib: sys\\n relative: foo']
147 >>> list(verify_stdlib_on_own_line('import sys, os'))
147 >>> list(verify_stdlib_on_own_line('import sys, os'))
148 []
148 []
149 >>> list(verify_stdlib_on_own_line('import foo, bar'))
149 >>> list(verify_stdlib_on_own_line('import foo, bar'))
150 []
150 []
151 """
151 """
152 for node in ast.walk(ast.parse(source)):
152 for node in ast.walk(ast.parse(source)):
153 if isinstance(node, ast.Import):
153 if isinstance(node, ast.Import):
154 from_stdlib = {False: [], True: []}
154 from_stdlib = {False: [], True: []}
155 for n in node.names:
155 for n in node.names:
156 from_stdlib[n.name in stdlib_modules].append(n.name)
156 from_stdlib[n.name in stdlib_modules].append(n.name)
157 if from_stdlib[True] and from_stdlib[False]:
157 if from_stdlib[True] and from_stdlib[False]:
158 yield ('mixed imports\n stdlib: %s\n relative: %s' %
158 yield ('mixed imports\n stdlib: %s\n relative: %s' %
159 (', '.join(sorted(from_stdlib[True])),
159 (', '.join(sorted(from_stdlib[True])),
160 ', '.join(sorted(from_stdlib[False]))))
160 ', '.join(sorted(from_stdlib[False]))))
161
161
162 class CircularImport(Exception):
162 class CircularImport(Exception):
163 pass
163 pass
164
164
165
166 def cyclekey(names):
167 return tuple(sorted(names))
168
169 def checkmod(mod, imports):
165 def checkmod(mod, imports):
170 shortest = {}
166 shortest = {}
171 visit = [[mod]]
167 visit = [[mod]]
172 while visit:
168 while visit:
173 path = visit.pop(0)
169 path = visit.pop(0)
174 for i in sorted(imports.get(path[-1], [])):
170 for i in sorted(imports.get(path[-1], [])):
175 if i not in stdlib_modules and not i.startswith('mercurial.'):
171 if i not in stdlib_modules and not i.startswith('mercurial.'):
176 i = mod.rsplit('.', 1)[0] + '.' + i
172 i = mod.rsplit('.', 1)[0] + '.' + i
177 if len(path) < shortest.get(i, 1000):
173 if len(path) < shortest.get(i, 1000):
178 shortest[i] = len(path)
174 shortest[i] = len(path)
179 if i in path:
175 if i in path:
180 if i == path[0]:
176 if i == path[0]:
181 raise CircularImport(path)
177 raise CircularImport(path)
182 continue
178 continue
183 visit.append(path + [i])
179 visit.append(path + [i])
184
180
185 def rotatecycle(cycle):
181 def rotatecycle(cycle):
186 """arrange a cycle so that the lexicographically first module listed first
182 """arrange a cycle so that the lexicographically first module listed first
187
183
188 >>> rotatecycle(['foo', 'bar'])
184 >>> rotatecycle(['foo', 'bar'])
189 ['bar', 'foo', 'bar']
185 ['bar', 'foo', 'bar']
190 """
186 """
191 lowest = min(cycle)
187 lowest = min(cycle)
192 idx = cycle.index(lowest)
188 idx = cycle.index(lowest)
193 return cycle[idx:] + cycle[:idx] + [lowest]
189 return cycle[idx:] + cycle[:idx] + [lowest]
194
190
195 def find_cycles(imports):
191 def find_cycles(imports):
196 """Find cycles in an already-loaded import graph.
192 """Find cycles in an already-loaded import graph.
197
193
198 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
194 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
199 ... 'top.bar': ['baz', 'sys'],
195 ... 'top.bar': ['baz', 'sys'],
200 ... 'top.baz': ['foo'],
196 ... 'top.baz': ['foo'],
201 ... 'top.qux': ['foo']}
197 ... 'top.qux': ['foo']}
202 >>> print '\\n'.join(sorted(find_cycles(imports)))
198 >>> print '\\n'.join(sorted(find_cycles(imports)))
203 top.bar -> top.baz -> top.foo -> top.bar
199 top.bar -> top.baz -> top.foo -> top.bar
204 top.foo -> top.qux -> top.foo
200 top.foo -> top.qux -> top.foo
205 """
201 """
206 cycles = {}
202 cycles = set()
207 for mod in sorted(imports.iterkeys()):
203 for mod in sorted(imports.iterkeys()):
208 try:
204 try:
209 checkmod(mod, imports)
205 checkmod(mod, imports)
210 except CircularImport, e:
206 except CircularImport, e:
211 cycle = e.args[0]
207 cycle = e.args[0]
212 cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle))
208 cycles.add(" -> ".join(rotatecycle(cycle)))
213 return cycles.values()
209 return cycles
214
210
215 def _cycle_sortkey(c):
211 def _cycle_sortkey(c):
216 return len(c), c
212 return len(c), c
217
213
218 def main(argv):
214 def main(argv):
219 if len(argv) < 2:
215 if len(argv) < 2:
220 print 'Usage: %s file [file] [file] ...'
216 print 'Usage: %s file [file] [file] ...'
221 return 1
217 return 1
222 used_imports = {}
218 used_imports = {}
223 any_errors = False
219 any_errors = False
224 for source_path in argv[1:]:
220 for source_path in argv[1:]:
225 f = open(source_path)
221 f = open(source_path)
226 modname = dotted_name_of_path(source_path, trimpure=True)
222 modname = dotted_name_of_path(source_path, trimpure=True)
227 src = f.read()
223 src = f.read()
228 used_imports[modname] = sorted(
224 used_imports[modname] = sorted(
229 imported_modules(src, ignore_nested=True))
225 imported_modules(src, ignore_nested=True))
230 for error in verify_stdlib_on_own_line(src):
226 for error in verify_stdlib_on_own_line(src):
231 any_errors = True
227 any_errors = True
232 print source_path, error
228 print source_path, error
233 f.close()
229 f.close()
234 cycles = find_cycles(used_imports)
230 cycles = find_cycles(used_imports)
235 if cycles:
231 if cycles:
236 firstmods = set()
232 firstmods = set()
237 for c in sorted(cycles, key=_cycle_sortkey):
233 for c in sorted(cycles, key=_cycle_sortkey):
238 first = c.split()[0]
234 first = c.split()[0]
239 # As a rough cut, ignore any cycle that starts with the
235 # As a rough cut, ignore any cycle that starts with the
240 # same module as some other cycle. Otherwise we see lots
236 # same module as some other cycle. Otherwise we see lots
241 # of cycles that are effectively duplicates.
237 # of cycles that are effectively duplicates.
242 if first in firstmods:
238 if first in firstmods:
243 continue
239 continue
244 print 'Import cycle:', c
240 print 'Import cycle:', c
245 firstmods.add(first)
241 firstmods.add(first)
246 any_errors = True
242 any_errors = True
247 return not any_errors
243 return not any_errors
248
244
249 if __name__ == '__main__':
245 if __name__ == '__main__':
250 sys.exit(int(main(sys.argv)))
246 sys.exit(int(main(sys.argv)))
General Comments 0
You need to be logged in to leave comments. Login now