##// END OF EJS Templates
import-checker: drop duplicate element from cycle...
Matt Mackall -
r24488:4b3fc460 default
parent child Browse files
Show More
@@ -1,251 +1,251
1 import ast
1 import ast
2 import os
2 import os
3 import sys
3 import sys
4
4
5 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
5 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
6 # to work when run from a virtualenv. The modules were chosen empirically
6 # to work when run from a virtualenv. The modules were chosen empirically
7 # so that the return value matches the return value without virtualenv.
7 # so that the return value matches the return value without virtualenv.
8 import BaseHTTPServer
8 import BaseHTTPServer
9 import zlib
9 import zlib
10
10
11 def dotted_name_of_path(path, trimpure=False):
11 def dotted_name_of_path(path, trimpure=False):
12 """Given a relative path to a source file, return its dotted module name.
12 """Given a relative path to a source file, return its dotted module name.
13
13
14 >>> dotted_name_of_path('mercurial/error.py')
14 >>> dotted_name_of_path('mercurial/error.py')
15 'mercurial.error'
15 'mercurial.error'
16 >>> dotted_name_of_path('mercurial/pure/parsers.py', trimpure=True)
16 >>> dotted_name_of_path('mercurial/pure/parsers.py', trimpure=True)
17 'mercurial.parsers'
17 'mercurial.parsers'
18 >>> dotted_name_of_path('zlibmodule.so')
18 >>> dotted_name_of_path('zlibmodule.so')
19 'zlib'
19 'zlib'
20 """
20 """
21 parts = path.split('/')
21 parts = path.split('/')
22 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
22 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
23 if parts[-1].endswith('module'):
23 if parts[-1].endswith('module'):
24 parts[-1] = parts[-1][:-6]
24 parts[-1] = parts[-1][:-6]
25 if trimpure:
25 if trimpure:
26 return '.'.join(p for p in parts if p != 'pure')
26 return '.'.join(p for p in parts if p != 'pure')
27 return '.'.join(parts)
27 return '.'.join(parts)
28
28
29
29
30 def list_stdlib_modules():
30 def list_stdlib_modules():
31 """List the modules present in the stdlib.
31 """List the modules present in the stdlib.
32
32
33 >>> mods = set(list_stdlib_modules())
33 >>> mods = set(list_stdlib_modules())
34 >>> 'BaseHTTPServer' in mods
34 >>> 'BaseHTTPServer' in mods
35 True
35 True
36
36
37 os.path isn't really a module, so it's missing:
37 os.path isn't really a module, so it's missing:
38
38
39 >>> 'os.path' in mods
39 >>> 'os.path' in mods
40 False
40 False
41
41
42 sys requires special treatment, because it's baked into the
42 sys requires special treatment, because it's baked into the
43 interpreter, but it should still appear:
43 interpreter, but it should still appear:
44
44
45 >>> 'sys' in mods
45 >>> 'sys' in mods
46 True
46 True
47
47
48 >>> 'collections' in mods
48 >>> 'collections' in mods
49 True
49 True
50
50
51 >>> 'cStringIO' in mods
51 >>> 'cStringIO' in mods
52 True
52 True
53 """
53 """
54 for m in sys.builtin_module_names:
54 for m in sys.builtin_module_names:
55 yield m
55 yield m
56 # These modules only exist on windows, but we should always
56 # These modules only exist on windows, but we should always
57 # consider them stdlib.
57 # consider them stdlib.
58 for m in ['msvcrt', '_winreg']:
58 for m in ['msvcrt', '_winreg']:
59 yield m
59 yield m
60 # These get missed too
60 # These get missed too
61 for m in 'ctypes', 'email':
61 for m in 'ctypes', 'email':
62 yield m
62 yield m
63 yield 'builtins' # python3 only
63 yield 'builtins' # python3 only
64 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
64 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
65 # We need to supplement the list of prefixes for the search to work
65 # We need to supplement the list of prefixes for the search to work
66 # when run from within a virtualenv.
66 # when run from within a virtualenv.
67 for mod in (BaseHTTPServer, zlib):
67 for mod in (BaseHTTPServer, zlib):
68 try:
68 try:
69 # Not all module objects have a __file__ attribute.
69 # Not all module objects have a __file__ attribute.
70 filename = mod.__file__
70 filename = mod.__file__
71 except AttributeError:
71 except AttributeError:
72 continue
72 continue
73 dirname = os.path.dirname(filename)
73 dirname = os.path.dirname(filename)
74 for prefix in stdlib_prefixes:
74 for prefix in stdlib_prefixes:
75 if dirname.startswith(prefix):
75 if dirname.startswith(prefix):
76 # Then this directory is redundant.
76 # Then this directory is redundant.
77 break
77 break
78 else:
78 else:
79 stdlib_prefixes.add(dirname)
79 stdlib_prefixes.add(dirname)
80 for libpath in sys.path:
80 for libpath in sys.path:
81 # We want to walk everything in sys.path that starts with
81 # We want to walk everything in sys.path that starts with
82 # something in stdlib_prefixes. check-code suppressed because
82 # something in stdlib_prefixes. check-code suppressed because
83 # the ast module used by this script implies the availability
83 # the ast module used by this script implies the availability
84 # of any().
84 # of any().
85 if not any(libpath.startswith(p) for p in stdlib_prefixes): # no-py24
85 if not any(libpath.startswith(p) for p in stdlib_prefixes): # no-py24
86 continue
86 continue
87 if 'site-packages' in libpath:
87 if 'site-packages' in libpath:
88 continue
88 continue
89 for top, dirs, files in os.walk(libpath):
89 for top, dirs, files in os.walk(libpath):
90 for name in files:
90 for name in files:
91 if name == '__init__.py':
91 if name == '__init__.py':
92 continue
92 continue
93 if not (name.endswith('.py') or name.endswith('.so')):
93 if not (name.endswith('.py') or name.endswith('.so')):
94 continue
94 continue
95 full_path = os.path.join(top, name)
95 full_path = os.path.join(top, name)
96 if 'site-packages' in full_path:
96 if 'site-packages' in full_path:
97 continue
97 continue
98 rel_path = full_path[len(libpath) + 1:]
98 rel_path = full_path[len(libpath) + 1:]
99 mod = dotted_name_of_path(rel_path)
99 mod = dotted_name_of_path(rel_path)
100 yield mod
100 yield mod
101
101
102 stdlib_modules = set(list_stdlib_modules())
102 stdlib_modules = set(list_stdlib_modules())
103
103
104 def imported_modules(source, ignore_nested=False):
104 def imported_modules(source, ignore_nested=False):
105 """Given the source of a file as a string, yield the names
105 """Given the source of a file as a string, yield the names
106 imported by that file.
106 imported by that file.
107
107
108 Args:
108 Args:
109 source: The python source to examine as a string.
109 source: The python source to examine as a string.
110 ignore_nested: If true, import statements that do not start in
110 ignore_nested: If true, import statements that do not start in
111 column zero will be ignored.
111 column zero will be ignored.
112
112
113 Returns:
113 Returns:
114 A list of module names imported by the given source.
114 A list of module names imported by the given source.
115
115
116 >>> sorted(imported_modules(
116 >>> sorted(imported_modules(
117 ... 'import foo ; from baz import bar; import foo.qux'))
117 ... 'import foo ; from baz import bar; import foo.qux'))
118 ['baz.bar', 'foo', 'foo.qux']
118 ['baz.bar', 'foo', 'foo.qux']
119 >>> sorted(imported_modules(
119 >>> sorted(imported_modules(
120 ... '''import foo
120 ... '''import foo
121 ... def wat():
121 ... def wat():
122 ... import bar
122 ... import bar
123 ... ''', ignore_nested=True))
123 ... ''', ignore_nested=True))
124 ['foo']
124 ['foo']
125 """
125 """
126 for node in ast.walk(ast.parse(source)):
126 for node in ast.walk(ast.parse(source)):
127 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
127 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
128 continue
128 continue
129 if isinstance(node, ast.Import):
129 if isinstance(node, ast.Import):
130 for n in node.names:
130 for n in node.names:
131 yield n.name
131 yield n.name
132 elif isinstance(node, ast.ImportFrom):
132 elif isinstance(node, ast.ImportFrom):
133 prefix = node.module + '.'
133 prefix = node.module + '.'
134 for n in node.names:
134 for n in node.names:
135 yield prefix + n.name
135 yield prefix + n.name
136
136
137 def verify_stdlib_on_own_line(source):
137 def verify_stdlib_on_own_line(source):
138 """Given some python source, verify that stdlib imports are done
138 """Given some python source, verify that stdlib imports are done
139 in separate statements from relative local module imports.
139 in separate statements from relative local module imports.
140
140
141 Observing this limitation is important as it works around an
141 Observing this limitation is important as it works around an
142 annoying lib2to3 bug in relative import rewrites:
142 annoying lib2to3 bug in relative import rewrites:
143 http://bugs.python.org/issue19510.
143 http://bugs.python.org/issue19510.
144
144
145 >>> list(verify_stdlib_on_own_line('import sys, foo'))
145 >>> list(verify_stdlib_on_own_line('import sys, foo'))
146 ['mixed imports\\n stdlib: sys\\n relative: foo']
146 ['mixed imports\\n stdlib: sys\\n relative: foo']
147 >>> list(verify_stdlib_on_own_line('import sys, os'))
147 >>> list(verify_stdlib_on_own_line('import sys, os'))
148 []
148 []
149 >>> list(verify_stdlib_on_own_line('import foo, bar'))
149 >>> list(verify_stdlib_on_own_line('import foo, bar'))
150 []
150 []
151 """
151 """
152 for node in ast.walk(ast.parse(source)):
152 for node in ast.walk(ast.parse(source)):
153 if isinstance(node, ast.Import):
153 if isinstance(node, ast.Import):
154 from_stdlib = {False: [], True: []}
154 from_stdlib = {False: [], True: []}
155 for n in node.names:
155 for n in node.names:
156 from_stdlib[n.name in stdlib_modules].append(n.name)
156 from_stdlib[n.name in stdlib_modules].append(n.name)
157 if from_stdlib[True] and from_stdlib[False]:
157 if from_stdlib[True] and from_stdlib[False]:
158 yield ('mixed imports\n stdlib: %s\n relative: %s' %
158 yield ('mixed imports\n stdlib: %s\n relative: %s' %
159 (', '.join(sorted(from_stdlib[True])),
159 (', '.join(sorted(from_stdlib[True])),
160 ', '.join(sorted(from_stdlib[False]))))
160 ', '.join(sorted(from_stdlib[False]))))
161
161
162 class CircularImport(Exception):
162 class CircularImport(Exception):
163 pass
163 pass
164
164
165
165
166 def cyclekey(names):
166 def cyclekey(names):
167 return tuple(sorted(set(names)))
167 return tuple(sorted((names)))
168
168
169 def check_one_mod(mod, imports, path=None, ignore=None):
169 def check_one_mod(mod, imports, path=None, ignore=None):
170 if path is None:
170 if path is None:
171 path = []
171 path = []
172 if ignore is None:
172 if ignore is None:
173 ignore = []
173 ignore = []
174 path = path + [mod]
174 path = path + [mod]
175 for i in sorted(imports.get(mod, [])):
175 for i in sorted(imports.get(mod, [])):
176 if i not in stdlib_modules and not i.startswith('mercurial.'):
176 if i not in stdlib_modules and not i.startswith('mercurial.'):
177 i = mod.rsplit('.', 1)[0] + '.' + i
177 i = mod.rsplit('.', 1)[0] + '.' + i
178 if i in path:
178 if i in path:
179 firstspot = path.index(i)
179 firstspot = path.index(i)
180 cycle = path[firstspot:] + [i]
180 cycle = path[firstspot:]
181 if cyclekey(cycle) not in ignore:
181 if cyclekey(cycle) not in ignore:
182 raise CircularImport(cycle)
182 raise CircularImport(cycle)
183 continue
183 continue
184 check_one_mod(i, imports, path=path, ignore=ignore)
184 check_one_mod(i, imports, path=path, ignore=ignore)
185
185
186 def rotatecycle(cycle):
186 def rotatecycle(cycle):
187 """arrange a cycle so that the lexicographically first module listed first
187 """arrange a cycle so that the lexicographically first module listed first
188
188
189 >>> rotatecycle(['foo', 'bar', 'foo'])
189 >>> rotatecycle(['foo', 'bar'])
190 ['bar', 'foo', 'bar']
190 ['bar', 'foo', 'bar']
191 """
191 """
192 lowest = min(cycle)
192 lowest = min(cycle)
193 idx = cycle.index(lowest)
193 idx = cycle.index(lowest)
194 return cycle[idx:-1] + cycle[:idx] + [lowest]
194 return cycle[idx:] + cycle[:idx] + [lowest]
195
195
196 def find_cycles(imports):
196 def find_cycles(imports):
197 """Find cycles in an already-loaded import graph.
197 """Find cycles in an already-loaded import graph.
198
198
199 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
199 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
200 ... 'top.bar': ['baz', 'sys'],
200 ... 'top.bar': ['baz', 'sys'],
201 ... 'top.baz': ['foo'],
201 ... 'top.baz': ['foo'],
202 ... 'top.qux': ['foo']}
202 ... 'top.qux': ['foo']}
203 >>> print '\\n'.join(sorted(find_cycles(imports)))
203 >>> print '\\n'.join(sorted(find_cycles(imports)))
204 top.bar -> top.baz -> top.foo -> top.bar
204 top.bar -> top.baz -> top.foo -> top.bar
205 top.foo -> top.qux -> top.foo
205 top.foo -> top.qux -> top.foo
206 """
206 """
207 cycles = {}
207 cycles = {}
208 for mod in sorted(imports.iterkeys()):
208 for mod in sorted(imports.iterkeys()):
209 try:
209 try:
210 check_one_mod(mod, imports, ignore=cycles)
210 check_one_mod(mod, imports, ignore=cycles)
211 except CircularImport, e:
211 except CircularImport, e:
212 cycle = e.args[0]
212 cycle = e.args[0]
213 cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle))
213 cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle))
214 return cycles.values()
214 return cycles.values()
215
215
216 def _cycle_sortkey(c):
216 def _cycle_sortkey(c):
217 return len(c), c
217 return len(c), c
218
218
219 def main(argv):
219 def main(argv):
220 if len(argv) < 2:
220 if len(argv) < 2:
221 print 'Usage: %s file [file] [file] ...'
221 print 'Usage: %s file [file] [file] ...'
222 return 1
222 return 1
223 used_imports = {}
223 used_imports = {}
224 any_errors = False
224 any_errors = False
225 for source_path in argv[1:]:
225 for source_path in argv[1:]:
226 f = open(source_path)
226 f = open(source_path)
227 modname = dotted_name_of_path(source_path, trimpure=True)
227 modname = dotted_name_of_path(source_path, trimpure=True)
228 src = f.read()
228 src = f.read()
229 used_imports[modname] = sorted(
229 used_imports[modname] = sorted(
230 imported_modules(src, ignore_nested=True))
230 imported_modules(src, ignore_nested=True))
231 for error in verify_stdlib_on_own_line(src):
231 for error in verify_stdlib_on_own_line(src):
232 any_errors = True
232 any_errors = True
233 print source_path, error
233 print source_path, error
234 f.close()
234 f.close()
235 cycles = find_cycles(used_imports)
235 cycles = find_cycles(used_imports)
236 if cycles:
236 if cycles:
237 firstmods = set()
237 firstmods = set()
238 for c in sorted(cycles, key=_cycle_sortkey):
238 for c in sorted(cycles, key=_cycle_sortkey):
239 first = c.split()[0]
239 first = c.split()[0]
240 # As a rough cut, ignore any cycle that starts with the
240 # As a rough cut, ignore any cycle that starts with the
241 # same module as some other cycle. Otherwise we see lots
241 # same module as some other cycle. Otherwise we see lots
242 # of cycles that are effectively duplicates.
242 # of cycles that are effectively duplicates.
243 if first in firstmods:
243 if first in firstmods:
244 continue
244 continue
245 print 'Import cycle:', c
245 print 'Import cycle:', c
246 firstmods.add(first)
246 firstmods.add(first)
247 any_errors = True
247 any_errors = True
248 return not any_errors
248 return not any_errors
249
249
250 if __name__ == '__main__':
250 if __name__ == '__main__':
251 sys.exit(int(main(sys.argv)))
251 sys.exit(int(main(sys.argv)))
General Comments 0
You need to be logged in to leave comments. Login now