##// END OF EJS Templates
import-checker: refactor sys.path prefix check (issue4129)...
Chris Jerdonek -
r20197:761f2929 default
parent child Browse files
Show More
@@ -1,221 +1,224
1 import ast
1 import ast
2 import os
2 import os
3 import sys
3 import sys
4
4
5 def dotted_name_of_path(path):
5 def dotted_name_of_path(path):
6 """Given a relative path to a source file, return its dotted module name.
6 """Given a relative path to a source file, return its dotted module name.
7
7
8
8
9 >>> dotted_name_of_path('mercurial/error.py')
9 >>> dotted_name_of_path('mercurial/error.py')
10 'mercurial.error'
10 'mercurial.error'
11 """
11 """
12 parts = path.split('/')
12 parts = path.split('/')
13 parts[-1] = parts[-1][:-3] # remove .py
13 parts[-1] = parts[-1][:-3] # remove .py
14 return '.'.join(parts)
14 return '.'.join(parts)
15
15
16
16
17 def list_stdlib_modules():
17 def list_stdlib_modules():
18 """List the modules present in the stdlib.
18 """List the modules present in the stdlib.
19
19
20 >>> mods = set(list_stdlib_modules())
20 >>> mods = set(list_stdlib_modules())
21 >>> 'BaseHTTPServer' in mods
21 >>> 'BaseHTTPServer' in mods
22 True
22 True
23
23
24 os.path isn't really a module, so it's missing:
24 os.path isn't really a module, so it's missing:
25
25
26 >>> 'os.path' in mods
26 >>> 'os.path' in mods
27 False
27 False
28
28
29 sys requires special treatment, because it's baked into the
29 sys requires special treatment, because it's baked into the
30 interpreter, but it should still appear:
30 interpreter, but it should still appear:
31
31
32 >>> 'sys' in mods
32 >>> 'sys' in mods
33 True
33 True
34
34
35 >>> 'collections' in mods
35 >>> 'collections' in mods
36 True
36 True
37
37
38 >>> 'cStringIO' in mods
38 >>> 'cStringIO' in mods
39 True
39 True
40 """
40 """
41 for m in sys.builtin_module_names:
41 for m in sys.builtin_module_names:
42 yield m
42 yield m
43 # These modules only exist on windows, but we should always
43 # These modules only exist on windows, but we should always
44 # consider them stdlib.
44 # consider them stdlib.
45 for m in ['msvcrt', '_winreg']:
45 for m in ['msvcrt', '_winreg']:
46 yield m
46 yield m
47 # These get missed too
47 # These get missed too
48 for m in 'ctypes', 'email':
48 for m in 'ctypes', 'email':
49 yield m
49 yield m
50 yield 'builtins' # python3 only
50 yield 'builtins' # python3 only
51 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
51 for libpath in sys.path:
52 for libpath in sys.path:
52 # We want to walk everything in sys.path that starts with
53 # We want to walk everything in sys.path that starts with something
53 # either sys.prefix or sys.exec_prefix.
54 # in stdlib_prefixes.
54 if not (libpath.startswith(sys.prefix)
55 for prefix in stdlib_prefixes:
55 or libpath.startswith(sys.exec_prefix)):
56 if libpath.startswith(prefix):
57 break
58 else:
56 continue
59 continue
57 if 'site-packages' in libpath:
60 if 'site-packages' in libpath:
58 continue
61 continue
59 for top, dirs, files in os.walk(libpath):
62 for top, dirs, files in os.walk(libpath):
60 for name in files:
63 for name in files:
61 if name == '__init__.py':
64 if name == '__init__.py':
62 continue
65 continue
63 if not (name.endswith('.py') or name.endswith('.so')):
66 if not (name.endswith('.py') or name.endswith('.so')):
64 continue
67 continue
65 full_path = os.path.join(top, name)
68 full_path = os.path.join(top, name)
66 if 'site-packages' in full_path:
69 if 'site-packages' in full_path:
67 continue
70 continue
68 rel_path = full_path[len(libpath) + 1:]
71 rel_path = full_path[len(libpath) + 1:]
69 mod = dotted_name_of_path(rel_path)
72 mod = dotted_name_of_path(rel_path)
70 yield mod
73 yield mod
71
74
72 stdlib_modules = set(list_stdlib_modules())
75 stdlib_modules = set(list_stdlib_modules())
73
76
74 def imported_modules(source, ignore_nested=False):
77 def imported_modules(source, ignore_nested=False):
75 """Given the source of a file as a string, yield the names
78 """Given the source of a file as a string, yield the names
76 imported by that file.
79 imported by that file.
77
80
78 Args:
81 Args:
79 source: The python source to examine as a string.
82 source: The python source to examine as a string.
80 ignore_nested: If true, import statements that do not start in
83 ignore_nested: If true, import statements that do not start in
81 column zero will be ignored.
84 column zero will be ignored.
82
85
83 Returns:
86 Returns:
84 A list of module names imported by the given source.
87 A list of module names imported by the given source.
85
88
86 >>> sorted(imported_modules(
89 >>> sorted(imported_modules(
87 ... 'import foo ; from baz import bar; import foo.qux'))
90 ... 'import foo ; from baz import bar; import foo.qux'))
88 ['baz.bar', 'foo', 'foo.qux']
91 ['baz.bar', 'foo', 'foo.qux']
89 >>> sorted(imported_modules(
92 >>> sorted(imported_modules(
90 ... '''import foo
93 ... '''import foo
91 ... def wat():
94 ... def wat():
92 ... import bar
95 ... import bar
93 ... ''', ignore_nested=True))
96 ... ''', ignore_nested=True))
94 ['foo']
97 ['foo']
95 """
98 """
96 for node in ast.walk(ast.parse(source)):
99 for node in ast.walk(ast.parse(source)):
97 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
100 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
98 continue
101 continue
99 if isinstance(node, ast.Import):
102 if isinstance(node, ast.Import):
100 for n in node.names:
103 for n in node.names:
101 yield n.name
104 yield n.name
102 elif isinstance(node, ast.ImportFrom):
105 elif isinstance(node, ast.ImportFrom):
103 prefix = node.module + '.'
106 prefix = node.module + '.'
104 for n in node.names:
107 for n in node.names:
105 yield prefix + n.name
108 yield prefix + n.name
106
109
107 def verify_stdlib_on_own_line(source):
110 def verify_stdlib_on_own_line(source):
108 """Given some python source, verify that stdlib imports are done
111 """Given some python source, verify that stdlib imports are done
109 in separate statements from relative local module imports.
112 in separate statements from relative local module imports.
110
113
111 Observing this limitation is important as it works around an
114 Observing this limitation is important as it works around an
112 annoying lib2to3 bug in relative import rewrites:
115 annoying lib2to3 bug in relative import rewrites:
113 http://bugs.python.org/issue19510.
116 http://bugs.python.org/issue19510.
114
117
115 >>> list(verify_stdlib_on_own_line('import sys, foo'))
118 >>> list(verify_stdlib_on_own_line('import sys, foo'))
116 ['mixed stdlib and relative imports:\\n foo, sys']
119 ['mixed stdlib and relative imports:\\n foo, sys']
117 >>> list(verify_stdlib_on_own_line('import sys, os'))
120 >>> list(verify_stdlib_on_own_line('import sys, os'))
118 []
121 []
119 >>> list(verify_stdlib_on_own_line('import foo, bar'))
122 >>> list(verify_stdlib_on_own_line('import foo, bar'))
120 []
123 []
121 """
124 """
122 for node in ast.walk(ast.parse(source)):
125 for node in ast.walk(ast.parse(source)):
123 if isinstance(node, ast.Import):
126 if isinstance(node, ast.Import):
124 from_stdlib = {}
127 from_stdlib = {}
125 for n in node.names:
128 for n in node.names:
126 from_stdlib[n.name] = n.name in stdlib_modules
129 from_stdlib[n.name] = n.name in stdlib_modules
127 num_std = len([x for x in from_stdlib.values() if x])
130 num_std = len([x for x in from_stdlib.values() if x])
128 if num_std not in (len(from_stdlib.values()), 0):
131 if num_std not in (len(from_stdlib.values()), 0):
129 yield ('mixed stdlib and relative imports:\n %s' %
132 yield ('mixed stdlib and relative imports:\n %s' %
130 ', '.join(sorted(from_stdlib.iterkeys())))
133 ', '.join(sorted(from_stdlib.iterkeys())))
131
134
132 class CircularImport(Exception):
135 class CircularImport(Exception):
133 pass
136 pass
134
137
135
138
136 def cyclekey(names):
139 def cyclekey(names):
137 return tuple(sorted(set(names)))
140 return tuple(sorted(set(names)))
138
141
139 def check_one_mod(mod, imports, path=None, ignore=None):
142 def check_one_mod(mod, imports, path=None, ignore=None):
140 if path is None:
143 if path is None:
141 path = []
144 path = []
142 if ignore is None:
145 if ignore is None:
143 ignore = []
146 ignore = []
144 path = path + [mod]
147 path = path + [mod]
145 for i in sorted(imports.get(mod, [])):
148 for i in sorted(imports.get(mod, [])):
146 if i not in stdlib_modules:
149 if i not in stdlib_modules:
147 i = mod.rsplit('.', 1)[0] + '.' + i
150 i = mod.rsplit('.', 1)[0] + '.' + i
148 if i in path:
151 if i in path:
149 firstspot = path.index(i)
152 firstspot = path.index(i)
150 cycle = path[firstspot:] + [i]
153 cycle = path[firstspot:] + [i]
151 if cyclekey(cycle) not in ignore:
154 if cyclekey(cycle) not in ignore:
152 raise CircularImport(cycle)
155 raise CircularImport(cycle)
153 continue
156 continue
154 check_one_mod(i, imports, path=path, ignore=ignore)
157 check_one_mod(i, imports, path=path, ignore=ignore)
155
158
156 def rotatecycle(cycle):
159 def rotatecycle(cycle):
157 """arrange a cycle so that the lexicographically first module listed first
160 """arrange a cycle so that the lexicographically first module listed first
158
161
159 >>> rotatecycle(['foo', 'bar', 'foo'])
162 >>> rotatecycle(['foo', 'bar', 'foo'])
160 ['bar', 'foo', 'bar']
163 ['bar', 'foo', 'bar']
161 """
164 """
162 lowest = min(cycle)
165 lowest = min(cycle)
163 idx = cycle.index(lowest)
166 idx = cycle.index(lowest)
164 return cycle[idx:] + cycle[1:idx] + [lowest]
167 return cycle[idx:] + cycle[1:idx] + [lowest]
165
168
166 def find_cycles(imports):
169 def find_cycles(imports):
167 """Find cycles in an already-loaded import graph.
170 """Find cycles in an already-loaded import graph.
168
171
169 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
172 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
170 ... 'top.bar': ['baz', 'sys'],
173 ... 'top.bar': ['baz', 'sys'],
171 ... 'top.baz': ['foo'],
174 ... 'top.baz': ['foo'],
172 ... 'top.qux': ['foo']}
175 ... 'top.qux': ['foo']}
173 >>> print '\\n'.join(sorted(find_cycles(imports)))
176 >>> print '\\n'.join(sorted(find_cycles(imports)))
174 top.bar -> top.baz -> top.foo -> top.bar -> top.bar
177 top.bar -> top.baz -> top.foo -> top.bar -> top.bar
175 top.foo -> top.qux -> top.foo -> top.foo
178 top.foo -> top.qux -> top.foo -> top.foo
176 """
179 """
177 cycles = {}
180 cycles = {}
178 for mod in sorted(imports.iterkeys()):
181 for mod in sorted(imports.iterkeys()):
179 try:
182 try:
180 check_one_mod(mod, imports, ignore=cycles)
183 check_one_mod(mod, imports, ignore=cycles)
181 except CircularImport, e:
184 except CircularImport, e:
182 cycle = e.args[0]
185 cycle = e.args[0]
183 cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle))
186 cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle))
184 return cycles.values()
187 return cycles.values()
185
188
186 def _cycle_sortkey(c):
189 def _cycle_sortkey(c):
187 return len(c), c
190 return len(c), c
188
191
189 def main(argv):
192 def main(argv):
190 if len(argv) < 2:
193 if len(argv) < 2:
191 print 'Usage: %s file [file] [file] ...'
194 print 'Usage: %s file [file] [file] ...'
192 return 1
195 return 1
193 used_imports = {}
196 used_imports = {}
194 any_errors = False
197 any_errors = False
195 for source_path in argv[1:]:
198 for source_path in argv[1:]:
196 f = open(source_path)
199 f = open(source_path)
197 modname = dotted_name_of_path(source_path)
200 modname = dotted_name_of_path(source_path)
198 src = f.read()
201 src = f.read()
199 used_imports[modname] = sorted(
202 used_imports[modname] = sorted(
200 imported_modules(src, ignore_nested=True))
203 imported_modules(src, ignore_nested=True))
201 for error in verify_stdlib_on_own_line(src):
204 for error in verify_stdlib_on_own_line(src):
202 any_errors = True
205 any_errors = True
203 print source_path, error
206 print source_path, error
204 f.close()
207 f.close()
205 cycles = find_cycles(used_imports)
208 cycles = find_cycles(used_imports)
206 if cycles:
209 if cycles:
207 firstmods = set()
210 firstmods = set()
208 for c in sorted(cycles, key=_cycle_sortkey):
211 for c in sorted(cycles, key=_cycle_sortkey):
209 first = c.split()[0]
212 first = c.split()[0]
210 # As a rough cut, ignore any cycle that starts with the
213 # As a rough cut, ignore any cycle that starts with the
211 # same module as some other cycle. Otherwise we see lots
214 # same module as some other cycle. Otherwise we see lots
212 # of cycles that are effectively duplicates.
215 # of cycles that are effectively duplicates.
213 if first in firstmods:
216 if first in firstmods:
214 continue
217 continue
215 print 'Import cycle:', c
218 print 'Import cycle:', c
216 firstmods.add(first)
219 firstmods.add(first)
217 any_errors = True
220 any_errors = True
218 return not any_errors
221 return not any_errors
219
222
220 if __name__ == '__main__':
223 if __name__ == '__main__':
221 sys.exit(int(main(sys.argv)))
224 sys.exit(int(main(sys.argv)))
General Comments 0
You need to be logged in to leave comments. Login now