##// END OF EJS Templates
import-checker: force 'fcntl', 'grp', 'pwd', and 'termios' to stdlib modules...
Matt Harbison -
r24669:fbdbff1b default
parent child Browse files
Show More
@@ -1,247 +1,249 b''
1 import ast
1 import ast
2 import os
2 import os
3 import sys
3 import sys
4
4
5 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
5 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
6 # to work when run from a virtualenv. The modules were chosen empirically
6 # to work when run from a virtualenv. The modules were chosen empirically
7 # so that the return value matches the return value without virtualenv.
7 # so that the return value matches the return value without virtualenv.
8 import BaseHTTPServer
8 import BaseHTTPServer
9 import zlib
9 import zlib
10
10
11 def dotted_name_of_path(path, trimpure=False):
11 def dotted_name_of_path(path, trimpure=False):
12 """Given a relative path to a source file, return its dotted module name.
12 """Given a relative path to a source file, return its dotted module name.
13
13
14 >>> dotted_name_of_path('mercurial/error.py')
14 >>> dotted_name_of_path('mercurial/error.py')
15 'mercurial.error'
15 'mercurial.error'
16 >>> dotted_name_of_path('mercurial/pure/parsers.py', trimpure=True)
16 >>> dotted_name_of_path('mercurial/pure/parsers.py', trimpure=True)
17 'mercurial.parsers'
17 'mercurial.parsers'
18 >>> dotted_name_of_path('zlibmodule.so')
18 >>> dotted_name_of_path('zlibmodule.so')
19 'zlib'
19 'zlib'
20 """
20 """
21 parts = path.split('/')
21 parts = path.split('/')
22 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
22 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
23 if parts[-1].endswith('module'):
23 if parts[-1].endswith('module'):
24 parts[-1] = parts[-1][:-6]
24 parts[-1] = parts[-1][:-6]
25 if trimpure:
25 if trimpure:
26 return '.'.join(p for p in parts if p != 'pure')
26 return '.'.join(p for p in parts if p != 'pure')
27 return '.'.join(parts)
27 return '.'.join(parts)
28
28
29
29
30 def list_stdlib_modules():
30 def list_stdlib_modules():
31 """List the modules present in the stdlib.
31 """List the modules present in the stdlib.
32
32
33 >>> mods = set(list_stdlib_modules())
33 >>> mods = set(list_stdlib_modules())
34 >>> 'BaseHTTPServer' in mods
34 >>> 'BaseHTTPServer' in mods
35 True
35 True
36
36
37 os.path isn't really a module, so it's missing:
37 os.path isn't really a module, so it's missing:
38
38
39 >>> 'os.path' in mods
39 >>> 'os.path' in mods
40 False
40 False
41
41
42 sys requires special treatment, because it's baked into the
42 sys requires special treatment, because it's baked into the
43 interpreter, but it should still appear:
43 interpreter, but it should still appear:
44
44
45 >>> 'sys' in mods
45 >>> 'sys' in mods
46 True
46 True
47
47
48 >>> 'collections' in mods
48 >>> 'collections' in mods
49 True
49 True
50
50
51 >>> 'cStringIO' in mods
51 >>> 'cStringIO' in mods
52 True
52 True
53 """
53 """
54 for m in sys.builtin_module_names:
54 for m in sys.builtin_module_names:
55 yield m
55 yield m
56 # These modules only exist on windows, but we should always
56 # These modules only exist on windows, but we should always
57 # consider them stdlib.
57 # consider them stdlib.
58 for m in ['msvcrt', '_winreg']:
58 for m in ['msvcrt', '_winreg']:
59 yield m
59 yield m
60 # These get missed too
60 # These get missed too
61 for m in 'ctypes', 'email':
61 for m in 'ctypes', 'email':
62 yield m
62 yield m
63 yield 'builtins' # python3 only
63 yield 'builtins' # python3 only
64 for m in 'fcntl', 'grp', 'pwd', 'termios': # Unix only
65 yield m
64 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
66 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
65 # We need to supplement the list of prefixes for the search to work
67 # We need to supplement the list of prefixes for the search to work
66 # when run from within a virtualenv.
68 # when run from within a virtualenv.
67 for mod in (BaseHTTPServer, zlib):
69 for mod in (BaseHTTPServer, zlib):
68 try:
70 try:
69 # Not all module objects have a __file__ attribute.
71 # Not all module objects have a __file__ attribute.
70 filename = mod.__file__
72 filename = mod.__file__
71 except AttributeError:
73 except AttributeError:
72 continue
74 continue
73 dirname = os.path.dirname(filename)
75 dirname = os.path.dirname(filename)
74 for prefix in stdlib_prefixes:
76 for prefix in stdlib_prefixes:
75 if dirname.startswith(prefix):
77 if dirname.startswith(prefix):
76 # Then this directory is redundant.
78 # Then this directory is redundant.
77 break
79 break
78 else:
80 else:
79 stdlib_prefixes.add(dirname)
81 stdlib_prefixes.add(dirname)
80 for libpath in sys.path:
82 for libpath in sys.path:
81 # We want to walk everything in sys.path that starts with
83 # We want to walk everything in sys.path that starts with
82 # something in stdlib_prefixes. check-code suppressed because
84 # something in stdlib_prefixes. check-code suppressed because
83 # the ast module used by this script implies the availability
85 # the ast module used by this script implies the availability
84 # of any().
86 # of any().
85 if not any(libpath.startswith(p) for p in stdlib_prefixes): # no-py24
87 if not any(libpath.startswith(p) for p in stdlib_prefixes): # no-py24
86 continue
88 continue
87 if 'site-packages' in libpath:
89 if 'site-packages' in libpath:
88 continue
90 continue
89 for top, dirs, files in os.walk(libpath):
91 for top, dirs, files in os.walk(libpath):
90 for name in files:
92 for name in files:
91 if name == '__init__.py':
93 if name == '__init__.py':
92 continue
94 continue
93 if not (name.endswith('.py') or name.endswith('.so')
95 if not (name.endswith('.py') or name.endswith('.so')
94 or name.endswith('.pyd')):
96 or name.endswith('.pyd')):
95 continue
97 continue
96 full_path = os.path.join(top, name)
98 full_path = os.path.join(top, name)
97 if 'site-packages' in full_path:
99 if 'site-packages' in full_path:
98 continue
100 continue
99 rel_path = full_path[len(libpath) + 1:]
101 rel_path = full_path[len(libpath) + 1:]
100 mod = dotted_name_of_path(rel_path)
102 mod = dotted_name_of_path(rel_path)
101 yield mod
103 yield mod
102
104
103 stdlib_modules = set(list_stdlib_modules())
105 stdlib_modules = set(list_stdlib_modules())
104
106
105 def imported_modules(source, ignore_nested=False):
107 def imported_modules(source, ignore_nested=False):
106 """Given the source of a file as a string, yield the names
108 """Given the source of a file as a string, yield the names
107 imported by that file.
109 imported by that file.
108
110
109 Args:
111 Args:
110 source: The python source to examine as a string.
112 source: The python source to examine as a string.
111 ignore_nested: If true, import statements that do not start in
113 ignore_nested: If true, import statements that do not start in
112 column zero will be ignored.
114 column zero will be ignored.
113
115
114 Returns:
116 Returns:
115 A list of module names imported by the given source.
117 A list of module names imported by the given source.
116
118
117 >>> sorted(imported_modules(
119 >>> sorted(imported_modules(
118 ... 'import foo ; from baz import bar; import foo.qux'))
120 ... 'import foo ; from baz import bar; import foo.qux'))
119 ['baz.bar', 'foo', 'foo.qux']
121 ['baz.bar', 'foo', 'foo.qux']
120 >>> sorted(imported_modules(
122 >>> sorted(imported_modules(
121 ... '''import foo
123 ... '''import foo
122 ... def wat():
124 ... def wat():
123 ... import bar
125 ... import bar
124 ... ''', ignore_nested=True))
126 ... ''', ignore_nested=True))
125 ['foo']
127 ['foo']
126 """
128 """
127 for node in ast.walk(ast.parse(source)):
129 for node in ast.walk(ast.parse(source)):
128 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
130 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
129 continue
131 continue
130 if isinstance(node, ast.Import):
132 if isinstance(node, ast.Import):
131 for n in node.names:
133 for n in node.names:
132 yield n.name
134 yield n.name
133 elif isinstance(node, ast.ImportFrom):
135 elif isinstance(node, ast.ImportFrom):
134 prefix = node.module + '.'
136 prefix = node.module + '.'
135 for n in node.names:
137 for n in node.names:
136 yield prefix + n.name
138 yield prefix + n.name
137
139
138 def verify_stdlib_on_own_line(source):
140 def verify_stdlib_on_own_line(source):
139 """Given some python source, verify that stdlib imports are done
141 """Given some python source, verify that stdlib imports are done
140 in separate statements from relative local module imports.
142 in separate statements from relative local module imports.
141
143
142 Observing this limitation is important as it works around an
144 Observing this limitation is important as it works around an
143 annoying lib2to3 bug in relative import rewrites:
145 annoying lib2to3 bug in relative import rewrites:
144 http://bugs.python.org/issue19510.
146 http://bugs.python.org/issue19510.
145
147
146 >>> list(verify_stdlib_on_own_line('import sys, foo'))
148 >>> list(verify_stdlib_on_own_line('import sys, foo'))
147 ['mixed imports\\n stdlib: sys\\n relative: foo']
149 ['mixed imports\\n stdlib: sys\\n relative: foo']
148 >>> list(verify_stdlib_on_own_line('import sys, os'))
150 >>> list(verify_stdlib_on_own_line('import sys, os'))
149 []
151 []
150 >>> list(verify_stdlib_on_own_line('import foo, bar'))
152 >>> list(verify_stdlib_on_own_line('import foo, bar'))
151 []
153 []
152 """
154 """
153 for node in ast.walk(ast.parse(source)):
155 for node in ast.walk(ast.parse(source)):
154 if isinstance(node, ast.Import):
156 if isinstance(node, ast.Import):
155 from_stdlib = {False: [], True: []}
157 from_stdlib = {False: [], True: []}
156 for n in node.names:
158 for n in node.names:
157 from_stdlib[n.name in stdlib_modules].append(n.name)
159 from_stdlib[n.name in stdlib_modules].append(n.name)
158 if from_stdlib[True] and from_stdlib[False]:
160 if from_stdlib[True] and from_stdlib[False]:
159 yield ('mixed imports\n stdlib: %s\n relative: %s' %
161 yield ('mixed imports\n stdlib: %s\n relative: %s' %
160 (', '.join(sorted(from_stdlib[True])),
162 (', '.join(sorted(from_stdlib[True])),
161 ', '.join(sorted(from_stdlib[False]))))
163 ', '.join(sorted(from_stdlib[False]))))
162
164
163 class CircularImport(Exception):
165 class CircularImport(Exception):
164 pass
166 pass
165
167
166 def checkmod(mod, imports):
168 def checkmod(mod, imports):
167 shortest = {}
169 shortest = {}
168 visit = [[mod]]
170 visit = [[mod]]
169 while visit:
171 while visit:
170 path = visit.pop(0)
172 path = visit.pop(0)
171 for i in sorted(imports.get(path[-1], [])):
173 for i in sorted(imports.get(path[-1], [])):
172 if i not in stdlib_modules and not i.startswith('mercurial.'):
174 if i not in stdlib_modules and not i.startswith('mercurial.'):
173 i = mod.rsplit('.', 1)[0] + '.' + i
175 i = mod.rsplit('.', 1)[0] + '.' + i
174 if len(path) < shortest.get(i, 1000):
176 if len(path) < shortest.get(i, 1000):
175 shortest[i] = len(path)
177 shortest[i] = len(path)
176 if i in path:
178 if i in path:
177 if i == path[0]:
179 if i == path[0]:
178 raise CircularImport(path)
180 raise CircularImport(path)
179 continue
181 continue
180 visit.append(path + [i])
182 visit.append(path + [i])
181
183
182 def rotatecycle(cycle):
184 def rotatecycle(cycle):
183 """arrange a cycle so that the lexicographically first module listed first
185 """arrange a cycle so that the lexicographically first module listed first
184
186
185 >>> rotatecycle(['foo', 'bar'])
187 >>> rotatecycle(['foo', 'bar'])
186 ['bar', 'foo', 'bar']
188 ['bar', 'foo', 'bar']
187 """
189 """
188 lowest = min(cycle)
190 lowest = min(cycle)
189 idx = cycle.index(lowest)
191 idx = cycle.index(lowest)
190 return cycle[idx:] + cycle[:idx] + [lowest]
192 return cycle[idx:] + cycle[:idx] + [lowest]
191
193
192 def find_cycles(imports):
194 def find_cycles(imports):
193 """Find cycles in an already-loaded import graph.
195 """Find cycles in an already-loaded import graph.
194
196
195 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
197 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
196 ... 'top.bar': ['baz', 'sys'],
198 ... 'top.bar': ['baz', 'sys'],
197 ... 'top.baz': ['foo'],
199 ... 'top.baz': ['foo'],
198 ... 'top.qux': ['foo']}
200 ... 'top.qux': ['foo']}
199 >>> print '\\n'.join(sorted(find_cycles(imports)))
201 >>> print '\\n'.join(sorted(find_cycles(imports)))
200 top.bar -> top.baz -> top.foo -> top.bar
202 top.bar -> top.baz -> top.foo -> top.bar
201 top.foo -> top.qux -> top.foo
203 top.foo -> top.qux -> top.foo
202 """
204 """
203 cycles = set()
205 cycles = set()
204 for mod in sorted(imports.iterkeys()):
206 for mod in sorted(imports.iterkeys()):
205 try:
207 try:
206 checkmod(mod, imports)
208 checkmod(mod, imports)
207 except CircularImport, e:
209 except CircularImport, e:
208 cycle = e.args[0]
210 cycle = e.args[0]
209 cycles.add(" -> ".join(rotatecycle(cycle)))
211 cycles.add(" -> ".join(rotatecycle(cycle)))
210 return cycles
212 return cycles
211
213
212 def _cycle_sortkey(c):
214 def _cycle_sortkey(c):
213 return len(c), c
215 return len(c), c
214
216
215 def main(argv):
217 def main(argv):
216 if len(argv) < 2:
218 if len(argv) < 2:
217 print 'Usage: %s file [file] [file] ...'
219 print 'Usage: %s file [file] [file] ...'
218 return 1
220 return 1
219 used_imports = {}
221 used_imports = {}
220 any_errors = False
222 any_errors = False
221 for source_path in argv[1:]:
223 for source_path in argv[1:]:
222 f = open(source_path)
224 f = open(source_path)
223 modname = dotted_name_of_path(source_path, trimpure=True)
225 modname = dotted_name_of_path(source_path, trimpure=True)
224 src = f.read()
226 src = f.read()
225 used_imports[modname] = sorted(
227 used_imports[modname] = sorted(
226 imported_modules(src, ignore_nested=True))
228 imported_modules(src, ignore_nested=True))
227 for error in verify_stdlib_on_own_line(src):
229 for error in verify_stdlib_on_own_line(src):
228 any_errors = True
230 any_errors = True
229 print source_path, error
231 print source_path, error
230 f.close()
232 f.close()
231 cycles = find_cycles(used_imports)
233 cycles = find_cycles(used_imports)
232 if cycles:
234 if cycles:
233 firstmods = set()
235 firstmods = set()
234 for c in sorted(cycles, key=_cycle_sortkey):
236 for c in sorted(cycles, key=_cycle_sortkey):
235 first = c.split()[0]
237 first = c.split()[0]
236 # As a rough cut, ignore any cycle that starts with the
238 # As a rough cut, ignore any cycle that starts with the
237 # same module as some other cycle. Otherwise we see lots
239 # same module as some other cycle. Otherwise we see lots
238 # of cycles that are effectively duplicates.
240 # of cycles that are effectively duplicates.
239 if first in firstmods:
241 if first in firstmods:
240 continue
242 continue
241 print 'Import cycle:', c
243 print 'Import cycle:', c
242 firstmods.add(first)
244 firstmods.add(first)
243 any_errors = True
245 any_errors = True
244 return not any_errors
246 return not any_errors
245
247
246 if __name__ == '__main__':
248 if __name__ == '__main__':
247 sys.exit(int(main(sys.argv)))
249 sys.exit(int(main(sys.argv)))
General Comments 0
You need to be logged in to leave comments. Login now