##// END OF EJS Templates
import-checker: show stdlib and relative imports separately...
Mads Kiilerich -
r20386:a05d31b0 default
parent child Browse files
Show More
@@ -1,247 +1,247 b''
1 1 import ast
2 2 import os
3 3 import sys
4 4
5 5 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
6 6 # to work when run from a virtualenv. The modules were chosen empirically
7 7 # so that the return value matches the return value without virtualenv.
8 8 import BaseHTTPServer
9 9 import zlib
10 10
11 11 def dotted_name_of_path(path):
12 12 """Given a relative path to a source file, return its dotted module name.
13 13
14 14 >>> dotted_name_of_path('mercurial/error.py')
15 15 'mercurial.error'
16 16 >>> dotted_name_of_path('zlibmodule.so')
17 17 'zlib'
18 18 """
19 19 parts = path.split('/')
20 20 parts[-1] = parts[-1][:-3] # remove .py
21 21 if parts[-1].endswith('module'):
22 22 parts[-1] = parts[-1][:-6]
23 23 return '.'.join(parts)
24 24
25 25
26 26 def list_stdlib_modules():
27 27 """List the modules present in the stdlib.
28 28
29 29 >>> mods = set(list_stdlib_modules())
30 30 >>> 'BaseHTTPServer' in mods
31 31 True
32 32
33 33 os.path isn't really a module, so it's missing:
34 34
35 35 >>> 'os.path' in mods
36 36 False
37 37
38 38 sys requires special treatment, because it's baked into the
39 39 interpreter, but it should still appear:
40 40
41 41 >>> 'sys' in mods
42 42 True
43 43
44 44 >>> 'collections' in mods
45 45 True
46 46
47 47 >>> 'cStringIO' in mods
48 48 True
49 49 """
50 50 for m in sys.builtin_module_names:
51 51 yield m
52 52 # These modules only exist on windows, but we should always
53 53 # consider them stdlib.
54 54 for m in ['msvcrt', '_winreg']:
55 55 yield m
56 56 # These get missed too
57 57 for m in 'ctypes', 'email':
58 58 yield m
59 59 yield 'builtins' # python3 only
60 60 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
61 61 # We need to supplement the list of prefixes for the search to work
62 62 # when run from within a virtualenv.
63 63 for mod in (BaseHTTPServer, zlib):
64 64 try:
65 65 # Not all module objects have a __file__ attribute.
66 66 filename = mod.__file__
67 67 except AttributeError:
68 68 continue
69 69 dirname = os.path.dirname(filename)
70 70 for prefix in stdlib_prefixes:
71 71 if dirname.startswith(prefix):
72 72 # Then this directory is redundant.
73 73 break
74 74 else:
75 75 stdlib_prefixes.add(dirname)
76 76 for libpath in sys.path:
77 77 # We want to walk everything in sys.path that starts with
78 78 # something in stdlib_prefixes. check-code suppressed because
79 79 # the ast module used by this script implies the availability
80 80 # of any().
81 81 if not any(libpath.startswith(p) for p in stdlib_prefixes): # no-py24
82 82 continue
83 83 if 'site-packages' in libpath:
84 84 continue
85 85 for top, dirs, files in os.walk(libpath):
86 86 for name in files:
87 87 if name == '__init__.py':
88 88 continue
89 89 if not (name.endswith('.py') or name.endswith('.so')):
90 90 continue
91 91 full_path = os.path.join(top, name)
92 92 if 'site-packages' in full_path:
93 93 continue
94 94 rel_path = full_path[len(libpath) + 1:]
95 95 mod = dotted_name_of_path(rel_path)
96 96 yield mod
97 97
98 98 stdlib_modules = set(list_stdlib_modules())
99 99
100 100 def imported_modules(source, ignore_nested=False):
101 101 """Given the source of a file as a string, yield the names
102 102 imported by that file.
103 103
104 104 Args:
105 105 source: The python source to examine as a string.
106 106 ignore_nested: If true, import statements that do not start in
107 107 column zero will be ignored.
108 108
109 109 Returns:
110 110 A list of module names imported by the given source.
111 111
112 112 >>> sorted(imported_modules(
113 113 ... 'import foo ; from baz import bar; import foo.qux'))
114 114 ['baz.bar', 'foo', 'foo.qux']
115 115 >>> sorted(imported_modules(
116 116 ... '''import foo
117 117 ... def wat():
118 118 ... import bar
119 119 ... ''', ignore_nested=True))
120 120 ['foo']
121 121 """
122 122 for node in ast.walk(ast.parse(source)):
123 123 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
124 124 continue
125 125 if isinstance(node, ast.Import):
126 126 for n in node.names:
127 127 yield n.name
128 128 elif isinstance(node, ast.ImportFrom):
129 129 prefix = node.module + '.'
130 130 for n in node.names:
131 131 yield prefix + n.name
132 132
133 133 def verify_stdlib_on_own_line(source):
134 134 """Given some python source, verify that stdlib imports are done
135 135 in separate statements from relative local module imports.
136 136
137 137 Observing this limitation is important as it works around an
138 138 annoying lib2to3 bug in relative import rewrites:
139 139 http://bugs.python.org/issue19510.
140 140
141 141 >>> list(verify_stdlib_on_own_line('import sys, foo'))
142 ['mixed stdlib and relative imports:\\n foo, sys']
142 ['mixed imports\\n stdlib: sys\\n relative: foo']
143 143 >>> list(verify_stdlib_on_own_line('import sys, os'))
144 144 []
145 145 >>> list(verify_stdlib_on_own_line('import foo, bar'))
146 146 []
147 147 """
148 148 for node in ast.walk(ast.parse(source)):
149 149 if isinstance(node, ast.Import):
150 from_stdlib = {}
150 from_stdlib = {False: [], True: []}
151 151 for n in node.names:
152 from_stdlib[n.name] = n.name in stdlib_modules
153 num_std = len([x for x in from_stdlib.values() if x])
154 if num_std not in (len(from_stdlib.values()), 0):
155 yield ('mixed stdlib and relative imports:\n %s' %
156 ', '.join(sorted(from_stdlib.iterkeys())))
152 from_stdlib[n.name in stdlib_modules].append(n.name)
153 if from_stdlib[True] and from_stdlib[False]:
154 yield ('mixed imports\n stdlib: %s\n relative: %s' %
155 (', '.join(sorted(from_stdlib[True])),
156 ', '.join(sorted(from_stdlib[False]))))
157 157
158 158 class CircularImport(Exception):
159 159 pass
160 160
161 161
162 162 def cyclekey(names):
163 163 return tuple(sorted(set(names)))
164 164
165 165 def check_one_mod(mod, imports, path=None, ignore=None):
166 166 if path is None:
167 167 path = []
168 168 if ignore is None:
169 169 ignore = []
170 170 path = path + [mod]
171 171 for i in sorted(imports.get(mod, [])):
172 172 if i not in stdlib_modules:
173 173 i = mod.rsplit('.', 1)[0] + '.' + i
174 174 if i in path:
175 175 firstspot = path.index(i)
176 176 cycle = path[firstspot:] + [i]
177 177 if cyclekey(cycle) not in ignore:
178 178 raise CircularImport(cycle)
179 179 continue
180 180 check_one_mod(i, imports, path=path, ignore=ignore)
181 181
182 182 def rotatecycle(cycle):
183 183 """arrange a cycle so that the lexicographically first module listed first
184 184
185 185 >>> rotatecycle(['foo', 'bar', 'foo'])
186 186 ['bar', 'foo', 'bar']
187 187 """
188 188 lowest = min(cycle)
189 189 idx = cycle.index(lowest)
190 190 return cycle[idx:] + cycle[1:idx] + [lowest]
191 191
192 192 def find_cycles(imports):
193 193 """Find cycles in an already-loaded import graph.
194 194
195 195 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
196 196 ... 'top.bar': ['baz', 'sys'],
197 197 ... 'top.baz': ['foo'],
198 198 ... 'top.qux': ['foo']}
199 199 >>> print '\\n'.join(sorted(find_cycles(imports)))
200 200 top.bar -> top.baz -> top.foo -> top.bar -> top.bar
201 201 top.foo -> top.qux -> top.foo -> top.foo
202 202 """
203 203 cycles = {}
204 204 for mod in sorted(imports.iterkeys()):
205 205 try:
206 206 check_one_mod(mod, imports, ignore=cycles)
207 207 except CircularImport, e:
208 208 cycle = e.args[0]
209 209 cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle))
210 210 return cycles.values()
211 211
212 212 def _cycle_sortkey(c):
213 213 return len(c), c
214 214
215 215 def main(argv):
216 216 if len(argv) < 2:
217 217 print 'Usage: %s file [file] [file] ...'
218 218 return 1
219 219 used_imports = {}
220 220 any_errors = False
221 221 for source_path in argv[1:]:
222 222 f = open(source_path)
223 223 modname = dotted_name_of_path(source_path)
224 224 src = f.read()
225 225 used_imports[modname] = sorted(
226 226 imported_modules(src, ignore_nested=True))
227 227 for error in verify_stdlib_on_own_line(src):
228 228 any_errors = True
229 229 print source_path, error
230 230 f.close()
231 231 cycles = find_cycles(used_imports)
232 232 if cycles:
233 233 firstmods = set()
234 234 for c in sorted(cycles, key=_cycle_sortkey):
235 235 first = c.split()[0]
236 236 # As a rough cut, ignore any cycle that starts with the
237 237 # same module as some other cycle. Otherwise we see lots
238 238 # of cycles that are effectively duplicates.
239 239 if first in firstmods:
240 240 continue
241 241 print 'Import cycle:', c
242 242 firstmods.add(first)
243 243 any_errors = True
244 244 return not any_errors
245 245
246 246 if __name__ == '__main__':
247 247 sys.exit(int(main(sys.argv)))
@@ -1,39 +1,44 b''
1 1 This code uses the ast module, which was new in 2.6, so we'll skip
2 2 this test on anything earlier.
3 3 $ python -c 'import sys ; assert sys.version_info >= (2, 6)' || exit 80
4 4
5 5 $ import_checker="$TESTDIR"/../contrib/import-checker.py
6 6 Run the doctests from the import checker, and make sure
7 7 it's working correctly.
8 8 $ TERM=dumb
9 9 $ export TERM
10 10 $ python -m doctest $import_checker
11 11
12 12 $ cd "$TESTDIR"/..
13 13 $ if hg identify -q > /dev/null 2>&1; then :
14 14 > else
15 15 > echo "skipped: not a Mercurial working dir" >&2
16 16 > exit 80
17 17 > fi
18 18
19 19 There are a handful of cases here that require renaming a module so it
20 20 doesn't overlap with a stdlib module name. There are also some cycles
21 21 here that we should still endeavor to fix, and some cycles will be
22 22 hidden by deduplication algorithm in the cycle detector, so fixing
23 23 these may expose other cycles.
24 24
25 25 $ hg locate 'mercurial/**.py' | xargs python "$import_checker"
26 mercurial/dispatch.py mixed stdlib and relative imports:
27 commands, error, extensions, fancyopts, hg, hook, util
28 mercurial/fileset.py mixed stdlib and relative imports:
29 error, merge, parser, util
30 mercurial/revset.py mixed stdlib and relative imports:
31 discovery, error, hbisect, parser, phases, util
32 mercurial/templater.py mixed stdlib and relative imports:
33 config, error, parser, templatefilters, util
34 mercurial/ui.py mixed stdlib and relative imports:
35 config, error, formatter, scmutil, util
26 mercurial/dispatch.py mixed imports
27 stdlib: commands
28 relative: error, extensions, fancyopts, hg, hook, util
29 mercurial/fileset.py mixed imports
30 stdlib: parser
31 relative: error, merge, util
32 mercurial/revset.py mixed imports
33 stdlib: parser
34 relative: discovery, error, hbisect, phases, util
35 mercurial/templater.py mixed imports
36 stdlib: parser
37 relative: config, error, templatefilters, util
38 mercurial/ui.py mixed imports
39 stdlib: formatter
40 relative: config, error, scmutil, util
36 41 Import cycle: mercurial.cmdutil -> mercurial.subrepo -> mercurial.cmdutil
37 42 Import cycle: mercurial.repoview -> mercurial.revset -> mercurial.repoview
38 43 Import cycle: mercurial.fileset -> mercurial.merge -> mercurial.subrepo -> mercurial.match -> mercurial.fileset
39 44 Import cycle: mercurial.filemerge -> mercurial.match -> mercurial.fileset -> mercurial.merge -> mercurial.filemerge
General Comments 0
You need to be logged in to leave comments. Login now