##// END OF EJS Templates
import-checker: ignore nested imports
Augie Fackler -
r20037:957b4337 default
parent child Browse files
Show More
@@ -1,195 +1,212
1 import ast
1 import ast
2 import os
2 import os
3 import sys
3 import sys
4
4
5 def dotted_name_of_path(path):
5 def dotted_name_of_path(path):
6 """Given a relative path to a source file, return its dotted module name.
6 """Given a relative path to a source file, return its dotted module name.
7
7
8
8
9 >>> dotted_name_of_path('mercurial/error.py')
9 >>> dotted_name_of_path('mercurial/error.py')
10 'mercurial.error'
10 'mercurial.error'
11 """
11 """
12 parts = path.split('/')
12 parts = path.split('/')
13 parts[-1] = parts[-1][:-3] # remove .py
13 parts[-1] = parts[-1][:-3] # remove .py
14 return '.'.join(parts)
14 return '.'.join(parts)
15
15
16
16
17 def list_stdlib_modules():
17 def list_stdlib_modules():
18 """List the modules present in the stdlib.
18 """List the modules present in the stdlib.
19
19
20 >>> mods = set(list_stdlib_modules())
20 >>> mods = set(list_stdlib_modules())
21 >>> 'BaseHTTPServer' in mods
21 >>> 'BaseHTTPServer' in mods
22 True
22 True
23
23
24 os.path isn't really a module, so it's missing:
24 os.path isn't really a module, so it's missing:
25
25
26 >>> 'os.path' in mods
26 >>> 'os.path' in mods
27 False
27 False
28
28
29 sys requires special treatment, because it's baked into the
29 sys requires special treatment, because it's baked into the
30 interpreter, but it should still appear:
30 interpreter, but it should still appear:
31
31
32 >>> 'sys' in mods
32 >>> 'sys' in mods
33 True
33 True
34
34
35 >>> 'collections' in mods
35 >>> 'collections' in mods
36 True
36 True
37
37
38 >>> 'cStringIO' in mods
38 >>> 'cStringIO' in mods
39 True
39 True
40 """
40 """
41 for m in sys.builtin_module_names:
41 for m in sys.builtin_module_names:
42 yield m
42 yield m
43 # These modules only exist on windows, but we should always
43 # These modules only exist on windows, but we should always
44 # consider them stdlib.
44 # consider them stdlib.
45 for m in ['msvcrt', '_winreg']:
45 for m in ['msvcrt', '_winreg']:
46 yield m
46 yield m
47 # These get missed too
47 # These get missed too
48 for m in 'ctypes', 'email':
48 for m in 'ctypes', 'email':
49 yield m
49 yield m
50 yield 'builtins' # python3 only
50 yield 'builtins' # python3 only
51 for libpath in sys.path:
51 for libpath in sys.path:
52 # We want to walk everything in sys.path that starts with
52 # We want to walk everything in sys.path that starts with
53 # either sys.prefix or sys.exec_prefix.
53 # either sys.prefix or sys.exec_prefix.
54 if not (libpath.startswith(sys.prefix)
54 if not (libpath.startswith(sys.prefix)
55 or libpath.startswith(sys.exec_prefix)):
55 or libpath.startswith(sys.exec_prefix)):
56 continue
56 continue
57 if 'site-packages' in libpath:
57 if 'site-packages' in libpath:
58 continue
58 continue
59 for top, dirs, files in os.walk(libpath):
59 for top, dirs, files in os.walk(libpath):
60 for name in files:
60 for name in files:
61 if name == '__init__.py':
61 if name == '__init__.py':
62 continue
62 continue
63 if not (name.endswith('.py') or name.endswith('.so')):
63 if not (name.endswith('.py') or name.endswith('.so')):
64 continue
64 continue
65 full_path = os.path.join(top, name)
65 full_path = os.path.join(top, name)
66 if 'site-packages' in full_path:
66 if 'site-packages' in full_path:
67 continue
67 continue
68 rel_path = full_path[len(libpath) + 1:]
68 rel_path = full_path[len(libpath) + 1:]
69 mod = dotted_name_of_path(rel_path)
69 mod = dotted_name_of_path(rel_path)
70 yield mod
70 yield mod
71
71
72 stdlib_modules = set(list_stdlib_modules())
72 stdlib_modules = set(list_stdlib_modules())
73
73
74 def imported_modules(source):
74 def imported_modules(source, ignore_nested=False):
75 """Given the source of a file as a string, yield the names
75 """Given the source of a file as a string, yield the names
76 imported by that file.
76 imported by that file.
77
77
78 >>> list(imported_modules(
78 Args:
79 source: The python source to examine as a string.
80 ignore_nested: If true, import statements that do not start in
81 column zero will be ignored.
82
83 Returns:
84 A list of module names imported by the given source.
85
86 >>> sorted(imported_modules(
79 ... 'import foo ; from baz import bar; import foo.qux'))
87 ... 'import foo ; from baz import bar; import foo.qux'))
80 ['foo', 'baz.bar', 'foo.qux']
88 ['baz.bar', 'foo', 'foo.qux']
89 >>> sorted(imported_modules(
90 ... '''import foo
91 ... def wat():
92 ... import bar
93 ... ''', ignore_nested=True))
94 ['foo']
81 """
95 """
82 for node in ast.walk(ast.parse(source)):
96 for node in ast.walk(ast.parse(source)):
97 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
98 continue
83 if isinstance(node, ast.Import):
99 if isinstance(node, ast.Import):
84 for n in node.names:
100 for n in node.names:
85 yield n.name
101 yield n.name
86 elif isinstance(node, ast.ImportFrom):
102 elif isinstance(node, ast.ImportFrom):
87 prefix = node.module + '.'
103 prefix = node.module + '.'
88 for n in node.names:
104 for n in node.names:
89 yield prefix + n.name
105 yield prefix + n.name
90
106
91 def verify_stdlib_on_own_line(source):
107 def verify_stdlib_on_own_line(source):
92 """Given some python source, verify that stdlib imports are done
108 """Given some python source, verify that stdlib imports are done
93 in separate statements from relative local module imports.
109 in separate statements from relative local module imports.
94
110
95 Observing this limitation is important as it works around an
111 Observing this limitation is important as it works around an
96 annoying lib2to3 bug in relative import rewrites:
112 annoying lib2to3 bug in relative import rewrites:
97 http://bugs.python.org/issue19510.
113 http://bugs.python.org/issue19510.
98
114
99 >>> list(verify_stdlib_on_own_line('import sys, foo'))
115 >>> list(verify_stdlib_on_own_line('import sys, foo'))
100 ['mixed stdlib and relative imports:\\n foo, sys']
116 ['mixed stdlib and relative imports:\\n foo, sys']
101 >>> list(verify_stdlib_on_own_line('import sys, os'))
117 >>> list(verify_stdlib_on_own_line('import sys, os'))
102 []
118 []
103 >>> list(verify_stdlib_on_own_line('import foo, bar'))
119 >>> list(verify_stdlib_on_own_line('import foo, bar'))
104 []
120 []
105 """
121 """
106 for node in ast.walk(ast.parse(source)):
122 for node in ast.walk(ast.parse(source)):
107 if isinstance(node, ast.Import):
123 if isinstance(node, ast.Import):
108 from_stdlib = {}
124 from_stdlib = {}
109 for n in node.names:
125 for n in node.names:
110 from_stdlib[n.name] = n.name in stdlib_modules
126 from_stdlib[n.name] = n.name in stdlib_modules
111 num_std = len([x for x in from_stdlib.values() if x])
127 num_std = len([x for x in from_stdlib.values() if x])
112 if num_std not in (len(from_stdlib.values()), 0):
128 if num_std not in (len(from_stdlib.values()), 0):
113 yield ('mixed stdlib and relative imports:\n %s' %
129 yield ('mixed stdlib and relative imports:\n %s' %
114 ', '.join(sorted(from_stdlib.iterkeys())))
130 ', '.join(sorted(from_stdlib.iterkeys())))
115
131
116 class CircularImport(Exception):
132 class CircularImport(Exception):
117 pass
133 pass
118
134
119
135
120 def cyclekey(names):
136 def cyclekey(names):
121 return tuple(sorted(set(names)))
137 return tuple(sorted(set(names)))
122
138
123 def check_one_mod(mod, imports, path=None, ignore=None):
139 def check_one_mod(mod, imports, path=None, ignore=None):
124 if path is None:
140 if path is None:
125 path = []
141 path = []
126 if ignore is None:
142 if ignore is None:
127 ignore = []
143 ignore = []
128 path = path + [mod]
144 path = path + [mod]
129 for i in sorted(imports.get(mod, [])):
145 for i in sorted(imports.get(mod, [])):
130 if i not in stdlib_modules:
146 if i not in stdlib_modules:
131 i = mod.rsplit('.', 1)[0] + '.' + i
147 i = mod.rsplit('.', 1)[0] + '.' + i
132 if i in path:
148 if i in path:
133 firstspot = path.index(i)
149 firstspot = path.index(i)
134 cycle = path[firstspot:] + [i]
150 cycle = path[firstspot:] + [i]
135 if cyclekey(cycle) not in ignore:
151 if cyclekey(cycle) not in ignore:
136 raise CircularImport(cycle)
152 raise CircularImport(cycle)
137 continue
153 continue
138 check_one_mod(i, imports, path=path, ignore=ignore)
154 check_one_mod(i, imports, path=path, ignore=ignore)
139
155
140
156
141 def find_cycles(imports):
157 def find_cycles(imports):
142 """Find cycles in an already-loaded import graph.
158 """Find cycles in an already-loaded import graph.
143
159
144 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
160 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
145 ... 'top.bar': ['baz', 'sys'],
161 ... 'top.bar': ['baz', 'sys'],
146 ... 'top.baz': ['foo'],
162 ... 'top.baz': ['foo'],
147 ... 'top.qux': ['foo']}
163 ... 'top.qux': ['foo']}
148 >>> print '\\n'.join(sorted(find_cycles(imports)))
164 >>> print '\\n'.join(sorted(find_cycles(imports)))
149 top.bar -> top.baz -> top.foo -> top.bar
165 top.bar -> top.baz -> top.foo -> top.bar
150 top.foo -> top.qux -> top.foo
166 top.foo -> top.qux -> top.foo
151 """
167 """
152 cycles = {}
168 cycles = {}
153 for mod in sorted(imports.iterkeys()):
169 for mod in sorted(imports.iterkeys()):
154 try:
170 try:
155 check_one_mod(mod, imports, ignore=cycles)
171 check_one_mod(mod, imports, ignore=cycles)
156 except CircularImport, e:
172 except CircularImport, e:
157 cycle = e.args[0]
173 cycle = e.args[0]
158 cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle))
174 cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle))
159 return cycles.values()
175 return cycles.values()
160
176
161 def _cycle_sortkey(c):
177 def _cycle_sortkey(c):
162 return len(c), c
178 return len(c), c
163
179
164 def main(argv):
180 def main(argv):
165 if len(argv) < 2:
181 if len(argv) < 2:
166 print 'Usage: %s file [file] [file] ...'
182 print 'Usage: %s file [file] [file] ...'
167 return 1
183 return 1
168 used_imports = {}
184 used_imports = {}
169 any_errors = False
185 any_errors = False
170 for source_path in argv[1:]:
186 for source_path in argv[1:]:
171 f = open(source_path)
187 f = open(source_path)
172 modname = dotted_name_of_path(source_path)
188 modname = dotted_name_of_path(source_path)
173 src = f.read()
189 src = f.read()
174 used_imports[modname] = sorted(imported_modules(src))
190 used_imports[modname] = sorted(
191 imported_modules(src, ignore_nested=True))
175 for error in verify_stdlib_on_own_line(src):
192 for error in verify_stdlib_on_own_line(src):
176 any_errors = True
193 any_errors = True
177 print source_path, error
194 print source_path, error
178 f.close()
195 f.close()
179 cycles = find_cycles(used_imports)
196 cycles = find_cycles(used_imports)
180 if cycles:
197 if cycles:
181 firstmods = set()
198 firstmods = set()
182 for c in sorted(cycles, key=_cycle_sortkey):
199 for c in sorted(cycles, key=_cycle_sortkey):
183 first = c.split()[0]
200 first = c.split()[0]
184 # As a rough cut, ignore any cycle that starts with the
201 # As a rough cut, ignore any cycle that starts with the
185 # same module as some other cycle. Otherwise we see lots
202 # same module as some other cycle. Otherwise we see lots
186 # of cycles that are effectively duplicates.
203 # of cycles that are effectively duplicates.
187 if first in firstmods:
204 if first in firstmods:
188 continue
205 continue
189 print 'Import cycle:', c
206 print 'Import cycle:', c
190 firstmods.add(first)
207 firstmods.add(first)
191 any_errors = True
208 any_errors = True
192 return not any_errors
209 return not any_errors
193
210
194 if __name__ == '__main__':
211 if __name__ == '__main__':
195 sys.exit(int(main(sys.argv)))
212 sys.exit(int(main(sys.argv)))
General Comments 0
You need to be logged in to leave comments. Login now