##// END OF EJS Templates
import-checker: allow *.pyd based stdlib modules...
Matt Harbison -
r24668:81873bb2 default
parent child Browse files
Show More
@@ -1,246 +1,247 b''
1 import ast
1 import ast
2 import os
2 import os
3 import sys
3 import sys
4
4
5 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
5 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
6 # to work when run from a virtualenv. The modules were chosen empirically
6 # to work when run from a virtualenv. The modules were chosen empirically
7 # so that the return value matches the return value without virtualenv.
7 # so that the return value matches the return value without virtualenv.
8 import BaseHTTPServer
8 import BaseHTTPServer
9 import zlib
9 import zlib
10
10
11 def dotted_name_of_path(path, trimpure=False):
11 def dotted_name_of_path(path, trimpure=False):
12 """Given a relative path to a source file, return its dotted module name.
12 """Given a relative path to a source file, return its dotted module name.
13
13
14 >>> dotted_name_of_path('mercurial/error.py')
14 >>> dotted_name_of_path('mercurial/error.py')
15 'mercurial.error'
15 'mercurial.error'
16 >>> dotted_name_of_path('mercurial/pure/parsers.py', trimpure=True)
16 >>> dotted_name_of_path('mercurial/pure/parsers.py', trimpure=True)
17 'mercurial.parsers'
17 'mercurial.parsers'
18 >>> dotted_name_of_path('zlibmodule.so')
18 >>> dotted_name_of_path('zlibmodule.so')
19 'zlib'
19 'zlib'
20 """
20 """
21 parts = path.split('/')
21 parts = path.split('/')
22 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
22 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
23 if parts[-1].endswith('module'):
23 if parts[-1].endswith('module'):
24 parts[-1] = parts[-1][:-6]
24 parts[-1] = parts[-1][:-6]
25 if trimpure:
25 if trimpure:
26 return '.'.join(p for p in parts if p != 'pure')
26 return '.'.join(p for p in parts if p != 'pure')
27 return '.'.join(parts)
27 return '.'.join(parts)
28
28
29
29
30 def list_stdlib_modules():
30 def list_stdlib_modules():
31 """List the modules present in the stdlib.
31 """List the modules present in the stdlib.
32
32
33 >>> mods = set(list_stdlib_modules())
33 >>> mods = set(list_stdlib_modules())
34 >>> 'BaseHTTPServer' in mods
34 >>> 'BaseHTTPServer' in mods
35 True
35 True
36
36
37 os.path isn't really a module, so it's missing:
37 os.path isn't really a module, so it's missing:
38
38
39 >>> 'os.path' in mods
39 >>> 'os.path' in mods
40 False
40 False
41
41
42 sys requires special treatment, because it's baked into the
42 sys requires special treatment, because it's baked into the
43 interpreter, but it should still appear:
43 interpreter, but it should still appear:
44
44
45 >>> 'sys' in mods
45 >>> 'sys' in mods
46 True
46 True
47
47
48 >>> 'collections' in mods
48 >>> 'collections' in mods
49 True
49 True
50
50
51 >>> 'cStringIO' in mods
51 >>> 'cStringIO' in mods
52 True
52 True
53 """
53 """
54 for m in sys.builtin_module_names:
54 for m in sys.builtin_module_names:
55 yield m
55 yield m
56 # These modules only exist on windows, but we should always
56 # These modules only exist on windows, but we should always
57 # consider them stdlib.
57 # consider them stdlib.
58 for m in ['msvcrt', '_winreg']:
58 for m in ['msvcrt', '_winreg']:
59 yield m
59 yield m
60 # These get missed too
60 # These get missed too
61 for m in 'ctypes', 'email':
61 for m in 'ctypes', 'email':
62 yield m
62 yield m
63 yield 'builtins' # python3 only
63 yield 'builtins' # python3 only
64 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
64 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
65 # We need to supplement the list of prefixes for the search to work
65 # We need to supplement the list of prefixes for the search to work
66 # when run from within a virtualenv.
66 # when run from within a virtualenv.
67 for mod in (BaseHTTPServer, zlib):
67 for mod in (BaseHTTPServer, zlib):
68 try:
68 try:
69 # Not all module objects have a __file__ attribute.
69 # Not all module objects have a __file__ attribute.
70 filename = mod.__file__
70 filename = mod.__file__
71 except AttributeError:
71 except AttributeError:
72 continue
72 continue
73 dirname = os.path.dirname(filename)
73 dirname = os.path.dirname(filename)
74 for prefix in stdlib_prefixes:
74 for prefix in stdlib_prefixes:
75 if dirname.startswith(prefix):
75 if dirname.startswith(prefix):
76 # Then this directory is redundant.
76 # Then this directory is redundant.
77 break
77 break
78 else:
78 else:
79 stdlib_prefixes.add(dirname)
79 stdlib_prefixes.add(dirname)
80 for libpath in sys.path:
80 for libpath in sys.path:
81 # We want to walk everything in sys.path that starts with
81 # We want to walk everything in sys.path that starts with
82 # something in stdlib_prefixes. check-code suppressed because
82 # something in stdlib_prefixes. check-code suppressed because
83 # the ast module used by this script implies the availability
83 # the ast module used by this script implies the availability
84 # of any().
84 # of any().
85 if not any(libpath.startswith(p) for p in stdlib_prefixes): # no-py24
85 if not any(libpath.startswith(p) for p in stdlib_prefixes): # no-py24
86 continue
86 continue
87 if 'site-packages' in libpath:
87 if 'site-packages' in libpath:
88 continue
88 continue
89 for top, dirs, files in os.walk(libpath):
89 for top, dirs, files in os.walk(libpath):
90 for name in files:
90 for name in files:
91 if name == '__init__.py':
91 if name == '__init__.py':
92 continue
92 continue
93 if not (name.endswith('.py') or name.endswith('.so')):
93 if not (name.endswith('.py') or name.endswith('.so')
94 or name.endswith('.pyd')):
94 continue
95 continue
95 full_path = os.path.join(top, name)
96 full_path = os.path.join(top, name)
96 if 'site-packages' in full_path:
97 if 'site-packages' in full_path:
97 continue
98 continue
98 rel_path = full_path[len(libpath) + 1:]
99 rel_path = full_path[len(libpath) + 1:]
99 mod = dotted_name_of_path(rel_path)
100 mod = dotted_name_of_path(rel_path)
100 yield mod
101 yield mod
101
102
102 stdlib_modules = set(list_stdlib_modules())
103 stdlib_modules = set(list_stdlib_modules())
103
104
104 def imported_modules(source, ignore_nested=False):
105 def imported_modules(source, ignore_nested=False):
105 """Given the source of a file as a string, yield the names
106 """Given the source of a file as a string, yield the names
106 imported by that file.
107 imported by that file.
107
108
108 Args:
109 Args:
109 source: The python source to examine as a string.
110 source: The python source to examine as a string.
110 ignore_nested: If true, import statements that do not start in
111 ignore_nested: If true, import statements that do not start in
111 column zero will be ignored.
112 column zero will be ignored.
112
113
113 Returns:
114 Returns:
114 A list of module names imported by the given source.
115 A list of module names imported by the given source.
115
116
116 >>> sorted(imported_modules(
117 >>> sorted(imported_modules(
117 ... 'import foo ; from baz import bar; import foo.qux'))
118 ... 'import foo ; from baz import bar; import foo.qux'))
118 ['baz.bar', 'foo', 'foo.qux']
119 ['baz.bar', 'foo', 'foo.qux']
119 >>> sorted(imported_modules(
120 >>> sorted(imported_modules(
120 ... '''import foo
121 ... '''import foo
121 ... def wat():
122 ... def wat():
122 ... import bar
123 ... import bar
123 ... ''', ignore_nested=True))
124 ... ''', ignore_nested=True))
124 ['foo']
125 ['foo']
125 """
126 """
126 for node in ast.walk(ast.parse(source)):
127 for node in ast.walk(ast.parse(source)):
127 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
128 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
128 continue
129 continue
129 if isinstance(node, ast.Import):
130 if isinstance(node, ast.Import):
130 for n in node.names:
131 for n in node.names:
131 yield n.name
132 yield n.name
132 elif isinstance(node, ast.ImportFrom):
133 elif isinstance(node, ast.ImportFrom):
133 prefix = node.module + '.'
134 prefix = node.module + '.'
134 for n in node.names:
135 for n in node.names:
135 yield prefix + n.name
136 yield prefix + n.name
136
137
137 def verify_stdlib_on_own_line(source):
138 def verify_stdlib_on_own_line(source):
138 """Given some python source, verify that stdlib imports are done
139 """Given some python source, verify that stdlib imports are done
139 in separate statements from relative local module imports.
140 in separate statements from relative local module imports.
140
141
141 Observing this limitation is important as it works around an
142 Observing this limitation is important as it works around an
142 annoying lib2to3 bug in relative import rewrites:
143 annoying lib2to3 bug in relative import rewrites:
143 http://bugs.python.org/issue19510.
144 http://bugs.python.org/issue19510.
144
145
145 >>> list(verify_stdlib_on_own_line('import sys, foo'))
146 >>> list(verify_stdlib_on_own_line('import sys, foo'))
146 ['mixed imports\\n stdlib: sys\\n relative: foo']
147 ['mixed imports\\n stdlib: sys\\n relative: foo']
147 >>> list(verify_stdlib_on_own_line('import sys, os'))
148 >>> list(verify_stdlib_on_own_line('import sys, os'))
148 []
149 []
149 >>> list(verify_stdlib_on_own_line('import foo, bar'))
150 >>> list(verify_stdlib_on_own_line('import foo, bar'))
150 []
151 []
151 """
152 """
152 for node in ast.walk(ast.parse(source)):
153 for node in ast.walk(ast.parse(source)):
153 if isinstance(node, ast.Import):
154 if isinstance(node, ast.Import):
154 from_stdlib = {False: [], True: []}
155 from_stdlib = {False: [], True: []}
155 for n in node.names:
156 for n in node.names:
156 from_stdlib[n.name in stdlib_modules].append(n.name)
157 from_stdlib[n.name in stdlib_modules].append(n.name)
157 if from_stdlib[True] and from_stdlib[False]:
158 if from_stdlib[True] and from_stdlib[False]:
158 yield ('mixed imports\n stdlib: %s\n relative: %s' %
159 yield ('mixed imports\n stdlib: %s\n relative: %s' %
159 (', '.join(sorted(from_stdlib[True])),
160 (', '.join(sorted(from_stdlib[True])),
160 ', '.join(sorted(from_stdlib[False]))))
161 ', '.join(sorted(from_stdlib[False]))))
161
162
162 class CircularImport(Exception):
163 class CircularImport(Exception):
163 pass
164 pass
164
165
165 def checkmod(mod, imports):
166 def checkmod(mod, imports):
166 shortest = {}
167 shortest = {}
167 visit = [[mod]]
168 visit = [[mod]]
168 while visit:
169 while visit:
169 path = visit.pop(0)
170 path = visit.pop(0)
170 for i in sorted(imports.get(path[-1], [])):
171 for i in sorted(imports.get(path[-1], [])):
171 if i not in stdlib_modules and not i.startswith('mercurial.'):
172 if i not in stdlib_modules and not i.startswith('mercurial.'):
172 i = mod.rsplit('.', 1)[0] + '.' + i
173 i = mod.rsplit('.', 1)[0] + '.' + i
173 if len(path) < shortest.get(i, 1000):
174 if len(path) < shortest.get(i, 1000):
174 shortest[i] = len(path)
175 shortest[i] = len(path)
175 if i in path:
176 if i in path:
176 if i == path[0]:
177 if i == path[0]:
177 raise CircularImport(path)
178 raise CircularImport(path)
178 continue
179 continue
179 visit.append(path + [i])
180 visit.append(path + [i])
180
181
181 def rotatecycle(cycle):
182 def rotatecycle(cycle):
182 """arrange a cycle so that the lexicographically first module listed first
183 """arrange a cycle so that the lexicographically first module listed first
183
184
184 >>> rotatecycle(['foo', 'bar'])
185 >>> rotatecycle(['foo', 'bar'])
185 ['bar', 'foo', 'bar']
186 ['bar', 'foo', 'bar']
186 """
187 """
187 lowest = min(cycle)
188 lowest = min(cycle)
188 idx = cycle.index(lowest)
189 idx = cycle.index(lowest)
189 return cycle[idx:] + cycle[:idx] + [lowest]
190 return cycle[idx:] + cycle[:idx] + [lowest]
190
191
191 def find_cycles(imports):
192 def find_cycles(imports):
192 """Find cycles in an already-loaded import graph.
193 """Find cycles in an already-loaded import graph.
193
194
194 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
195 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
195 ... 'top.bar': ['baz', 'sys'],
196 ... 'top.bar': ['baz', 'sys'],
196 ... 'top.baz': ['foo'],
197 ... 'top.baz': ['foo'],
197 ... 'top.qux': ['foo']}
198 ... 'top.qux': ['foo']}
198 >>> print '\\n'.join(sorted(find_cycles(imports)))
199 >>> print '\\n'.join(sorted(find_cycles(imports)))
199 top.bar -> top.baz -> top.foo -> top.bar
200 top.bar -> top.baz -> top.foo -> top.bar
200 top.foo -> top.qux -> top.foo
201 top.foo -> top.qux -> top.foo
201 """
202 """
202 cycles = set()
203 cycles = set()
203 for mod in sorted(imports.iterkeys()):
204 for mod in sorted(imports.iterkeys()):
204 try:
205 try:
205 checkmod(mod, imports)
206 checkmod(mod, imports)
206 except CircularImport, e:
207 except CircularImport, e:
207 cycle = e.args[0]
208 cycle = e.args[0]
208 cycles.add(" -> ".join(rotatecycle(cycle)))
209 cycles.add(" -> ".join(rotatecycle(cycle)))
209 return cycles
210 return cycles
210
211
211 def _cycle_sortkey(c):
212 def _cycle_sortkey(c):
212 return len(c), c
213 return len(c), c
213
214
214 def main(argv):
215 def main(argv):
215 if len(argv) < 2:
216 if len(argv) < 2:
216 print 'Usage: %s file [file] [file] ...'
217 print 'Usage: %s file [file] [file] ...'
217 return 1
218 return 1
218 used_imports = {}
219 used_imports = {}
219 any_errors = False
220 any_errors = False
220 for source_path in argv[1:]:
221 for source_path in argv[1:]:
221 f = open(source_path)
222 f = open(source_path)
222 modname = dotted_name_of_path(source_path, trimpure=True)
223 modname = dotted_name_of_path(source_path, trimpure=True)
223 src = f.read()
224 src = f.read()
224 used_imports[modname] = sorted(
225 used_imports[modname] = sorted(
225 imported_modules(src, ignore_nested=True))
226 imported_modules(src, ignore_nested=True))
226 for error in verify_stdlib_on_own_line(src):
227 for error in verify_stdlib_on_own_line(src):
227 any_errors = True
228 any_errors = True
228 print source_path, error
229 print source_path, error
229 f.close()
230 f.close()
230 cycles = find_cycles(used_imports)
231 cycles = find_cycles(used_imports)
231 if cycles:
232 if cycles:
232 firstmods = set()
233 firstmods = set()
233 for c in sorted(cycles, key=_cycle_sortkey):
234 for c in sorted(cycles, key=_cycle_sortkey):
234 first = c.split()[0]
235 first = c.split()[0]
235 # As a rough cut, ignore any cycle that starts with the
236 # As a rough cut, ignore any cycle that starts with the
236 # same module as some other cycle. Otherwise we see lots
237 # same module as some other cycle. Otherwise we see lots
237 # of cycles that are effectively duplicates.
238 # of cycles that are effectively duplicates.
238 if first in firstmods:
239 if first in firstmods:
239 continue
240 continue
240 print 'Import cycle:', c
241 print 'Import cycle:', c
241 firstmods.add(first)
242 firstmods.add(first)
242 any_errors = True
243 any_errors = True
243 return not any_errors
244 return not any_errors
244
245
245 if __name__ == '__main__':
246 if __name__ == '__main__':
246 sys.exit(int(main(sys.argv)))
247 sys.exit(int(main(sys.argv)))
General Comments 0
You need to be logged in to leave comments. Login now