##// END OF EJS Templates
import-checker: refactor sys.path prefix check (issue4129)...
Chris Jerdonek -
r20197:761f2929 default
parent child Browse files
Show More
@@ -1,221 +1,224
1 1 import ast
2 2 import os
3 3 import sys
4 4
5 5 def dotted_name_of_path(path):
6 6 """Given a relative path to a source file, return its dotted module name.
7 7
8 8
9 9 >>> dotted_name_of_path('mercurial/error.py')
10 10 'mercurial.error'
11 11 """
12 12 parts = path.split('/')
13 13 parts[-1] = parts[-1][:-3] # remove .py
14 14 return '.'.join(parts)
15 15
16 16
17 17 def list_stdlib_modules():
18 18 """List the modules present in the stdlib.
19 19
20 20 >>> mods = set(list_stdlib_modules())
21 21 >>> 'BaseHTTPServer' in mods
22 22 True
23 23
24 24 os.path isn't really a module, so it's missing:
25 25
26 26 >>> 'os.path' in mods
27 27 False
28 28
29 29 sys requires special treatment, because it's baked into the
30 30 interpreter, but it should still appear:
31 31
32 32 >>> 'sys' in mods
33 33 True
34 34
35 35 >>> 'collections' in mods
36 36 True
37 37
38 38 >>> 'cStringIO' in mods
39 39 True
40 40 """
41 41 for m in sys.builtin_module_names:
42 42 yield m
43 43 # These modules only exist on windows, but we should always
44 44 # consider them stdlib.
45 45 for m in ['msvcrt', '_winreg']:
46 46 yield m
47 47 # These get missed too
48 48 for m in 'ctypes', 'email':
49 49 yield m
50 50 yield 'builtins' # python3 only
51 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
51 52 for libpath in sys.path:
52 # We want to walk everything in sys.path that starts with
53 # either sys.prefix or sys.exec_prefix.
54 if not (libpath.startswith(sys.prefix)
55 or libpath.startswith(sys.exec_prefix)):
53 # We want to walk everything in sys.path that starts with something
54 # in stdlib_prefixes.
55 for prefix in stdlib_prefixes:
56 if libpath.startswith(prefix):
57 break
58 else:
56 59 continue
57 60 if 'site-packages' in libpath:
58 61 continue
59 62 for top, dirs, files in os.walk(libpath):
60 63 for name in files:
61 64 if name == '__init__.py':
62 65 continue
63 66 if not (name.endswith('.py') or name.endswith('.so')):
64 67 continue
65 68 full_path = os.path.join(top, name)
66 69 if 'site-packages' in full_path:
67 70 continue
68 71 rel_path = full_path[len(libpath) + 1:]
69 72 mod = dotted_name_of_path(rel_path)
70 73 yield mod
71 74
72 75 stdlib_modules = set(list_stdlib_modules())
73 76
74 77 def imported_modules(source, ignore_nested=False):
75 78 """Given the source of a file as a string, yield the names
76 79 imported by that file.
77 80
78 81 Args:
79 82 source: The python source to examine as a string.
80 83 ignore_nested: If true, import statements that do not start in
81 84 column zero will be ignored.
82 85
83 86 Returns:
84 87 A list of module names imported by the given source.
85 88
86 89 >>> sorted(imported_modules(
87 90 ... 'import foo ; from baz import bar; import foo.qux'))
88 91 ['baz.bar', 'foo', 'foo.qux']
89 92 >>> sorted(imported_modules(
90 93 ... '''import foo
91 94 ... def wat():
92 95 ... import bar
93 96 ... ''', ignore_nested=True))
94 97 ['foo']
95 98 """
96 99 for node in ast.walk(ast.parse(source)):
97 100 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
98 101 continue
99 102 if isinstance(node, ast.Import):
100 103 for n in node.names:
101 104 yield n.name
102 105 elif isinstance(node, ast.ImportFrom):
103 106 prefix = node.module + '.'
104 107 for n in node.names:
105 108 yield prefix + n.name
106 109
107 110 def verify_stdlib_on_own_line(source):
108 111 """Given some python source, verify that stdlib imports are done
109 112 in separate statements from relative local module imports.
110 113
111 114 Observing this limitation is important as it works around an
112 115 annoying lib2to3 bug in relative import rewrites:
113 116 http://bugs.python.org/issue19510.
114 117
115 118 >>> list(verify_stdlib_on_own_line('import sys, foo'))
116 119 ['mixed stdlib and relative imports:\\n foo, sys']
117 120 >>> list(verify_stdlib_on_own_line('import sys, os'))
118 121 []
119 122 >>> list(verify_stdlib_on_own_line('import foo, bar'))
120 123 []
121 124 """
122 125 for node in ast.walk(ast.parse(source)):
123 126 if isinstance(node, ast.Import):
124 127 from_stdlib = {}
125 128 for n in node.names:
126 129 from_stdlib[n.name] = n.name in stdlib_modules
127 130 num_std = len([x for x in from_stdlib.values() if x])
128 131 if num_std not in (len(from_stdlib.values()), 0):
129 132 yield ('mixed stdlib and relative imports:\n %s' %
130 133 ', '.join(sorted(from_stdlib.iterkeys())))
131 134
132 135 class CircularImport(Exception):
133 136 pass
134 137
135 138
136 139 def cyclekey(names):
137 140 return tuple(sorted(set(names)))
138 141
139 142 def check_one_mod(mod, imports, path=None, ignore=None):
140 143 if path is None:
141 144 path = []
142 145 if ignore is None:
143 146 ignore = []
144 147 path = path + [mod]
145 148 for i in sorted(imports.get(mod, [])):
146 149 if i not in stdlib_modules:
147 150 i = mod.rsplit('.', 1)[0] + '.' + i
148 151 if i in path:
149 152 firstspot = path.index(i)
150 153 cycle = path[firstspot:] + [i]
151 154 if cyclekey(cycle) not in ignore:
152 155 raise CircularImport(cycle)
153 156 continue
154 157 check_one_mod(i, imports, path=path, ignore=ignore)
155 158
156 159 def rotatecycle(cycle):
157 160 """arrange a cycle so that the lexicographically first module listed first
158 161
159 162 >>> rotatecycle(['foo', 'bar', 'foo'])
160 163 ['bar', 'foo', 'bar']
161 164 """
162 165 lowest = min(cycle)
163 166 idx = cycle.index(lowest)
164 167 return cycle[idx:] + cycle[1:idx] + [lowest]
165 168
166 169 def find_cycles(imports):
167 170 """Find cycles in an already-loaded import graph.
168 171
169 172 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
170 173 ... 'top.bar': ['baz', 'sys'],
171 174 ... 'top.baz': ['foo'],
172 175 ... 'top.qux': ['foo']}
173 176 >>> print '\\n'.join(sorted(find_cycles(imports)))
174 177 top.bar -> top.baz -> top.foo -> top.bar -> top.bar
175 178 top.foo -> top.qux -> top.foo -> top.foo
176 179 """
177 180 cycles = {}
178 181 for mod in sorted(imports.iterkeys()):
179 182 try:
180 183 check_one_mod(mod, imports, ignore=cycles)
181 184 except CircularImport, e:
182 185 cycle = e.args[0]
183 186 cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle))
184 187 return cycles.values()
185 188
186 189 def _cycle_sortkey(c):
187 190 return len(c), c
188 191
189 192 def main(argv):
190 193 if len(argv) < 2:
191 194 print 'Usage: %s file [file] [file] ...'
192 195 return 1
193 196 used_imports = {}
194 197 any_errors = False
195 198 for source_path in argv[1:]:
196 199 f = open(source_path)
197 200 modname = dotted_name_of_path(source_path)
198 201 src = f.read()
199 202 used_imports[modname] = sorted(
200 203 imported_modules(src, ignore_nested=True))
201 204 for error in verify_stdlib_on_own_line(src):
202 205 any_errors = True
203 206 print source_path, error
204 207 f.close()
205 208 cycles = find_cycles(used_imports)
206 209 if cycles:
207 210 firstmods = set()
208 211 for c in sorted(cycles, key=_cycle_sortkey):
209 212 first = c.split()[0]
210 213 # As a rough cut, ignore any cycle that starts with the
211 214 # same module as some other cycle. Otherwise we see lots
212 215 # of cycles that are effectively duplicates.
213 216 if first in firstmods:
214 217 continue
215 218 print 'Import cycle:', c
216 219 firstmods.add(first)
217 220 any_errors = True
218 221 return not any_errors
219 222
220 223 if __name__ == '__main__':
221 224 sys.exit(int(main(sys.argv)))
General Comments 0
You need to be logged in to leave comments. Login now