##// END OF EJS Templates
import-checker: show stdlib and relative imports separately...
Mads Kiilerich -
r20386:a05d31b0 default
parent child Browse files
Show More
@@ -1,247 +1,247
1 import ast
1 import ast
2 import os
2 import os
3 import sys
3 import sys
4
4
5 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
5 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
6 # to work when run from a virtualenv. The modules were chosen empirically
6 # to work when run from a virtualenv. The modules were chosen empirically
7 # so that the return value matches the return value without virtualenv.
7 # so that the return value matches the return value without virtualenv.
8 import BaseHTTPServer
8 import BaseHTTPServer
9 import zlib
9 import zlib
10
10
11 def dotted_name_of_path(path):
11 def dotted_name_of_path(path):
12 """Given a relative path to a source file, return its dotted module name.
12 """Given a relative path to a source file, return its dotted module name.
13
13
14 >>> dotted_name_of_path('mercurial/error.py')
14 >>> dotted_name_of_path('mercurial/error.py')
15 'mercurial.error'
15 'mercurial.error'
16 >>> dotted_name_of_path('zlibmodule.so')
16 >>> dotted_name_of_path('zlibmodule.so')
17 'zlib'
17 'zlib'
18 """
18 """
19 parts = path.split('/')
19 parts = path.split('/')
20 parts[-1] = parts[-1][:-3] # remove .py
20 parts[-1] = parts[-1][:-3] # remove .py
21 if parts[-1].endswith('module'):
21 if parts[-1].endswith('module'):
22 parts[-1] = parts[-1][:-6]
22 parts[-1] = parts[-1][:-6]
23 return '.'.join(parts)
23 return '.'.join(parts)
24
24
25
25
26 def list_stdlib_modules():
26 def list_stdlib_modules():
27 """List the modules present in the stdlib.
27 """List the modules present in the stdlib.
28
28
29 >>> mods = set(list_stdlib_modules())
29 >>> mods = set(list_stdlib_modules())
30 >>> 'BaseHTTPServer' in mods
30 >>> 'BaseHTTPServer' in mods
31 True
31 True
32
32
33 os.path isn't really a module, so it's missing:
33 os.path isn't really a module, so it's missing:
34
34
35 >>> 'os.path' in mods
35 >>> 'os.path' in mods
36 False
36 False
37
37
38 sys requires special treatment, because it's baked into the
38 sys requires special treatment, because it's baked into the
39 interpreter, but it should still appear:
39 interpreter, but it should still appear:
40
40
41 >>> 'sys' in mods
41 >>> 'sys' in mods
42 True
42 True
43
43
44 >>> 'collections' in mods
44 >>> 'collections' in mods
45 True
45 True
46
46
47 >>> 'cStringIO' in mods
47 >>> 'cStringIO' in mods
48 True
48 True
49 """
49 """
50 for m in sys.builtin_module_names:
50 for m in sys.builtin_module_names:
51 yield m
51 yield m
52 # These modules only exist on windows, but we should always
52 # These modules only exist on windows, but we should always
53 # consider them stdlib.
53 # consider them stdlib.
54 for m in ['msvcrt', '_winreg']:
54 for m in ['msvcrt', '_winreg']:
55 yield m
55 yield m
56 # These get missed too
56 # These get missed too
57 for m in 'ctypes', 'email':
57 for m in 'ctypes', 'email':
58 yield m
58 yield m
59 yield 'builtins' # python3 only
59 yield 'builtins' # python3 only
60 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
60 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
61 # We need to supplement the list of prefixes for the search to work
61 # We need to supplement the list of prefixes for the search to work
62 # when run from within a virtualenv.
62 # when run from within a virtualenv.
63 for mod in (BaseHTTPServer, zlib):
63 for mod in (BaseHTTPServer, zlib):
64 try:
64 try:
65 # Not all module objects have a __file__ attribute.
65 # Not all module objects have a __file__ attribute.
66 filename = mod.__file__
66 filename = mod.__file__
67 except AttributeError:
67 except AttributeError:
68 continue
68 continue
69 dirname = os.path.dirname(filename)
69 dirname = os.path.dirname(filename)
70 for prefix in stdlib_prefixes:
70 for prefix in stdlib_prefixes:
71 if dirname.startswith(prefix):
71 if dirname.startswith(prefix):
72 # Then this directory is redundant.
72 # Then this directory is redundant.
73 break
73 break
74 else:
74 else:
75 stdlib_prefixes.add(dirname)
75 stdlib_prefixes.add(dirname)
76 for libpath in sys.path:
76 for libpath in sys.path:
77 # We want to walk everything in sys.path that starts with
77 # We want to walk everything in sys.path that starts with
78 # something in stdlib_prefixes. check-code suppressed because
78 # something in stdlib_prefixes. check-code suppressed because
79 # the ast module used by this script implies the availability
79 # the ast module used by this script implies the availability
80 # of any().
80 # of any().
81 if not any(libpath.startswith(p) for p in stdlib_prefixes): # no-py24
81 if not any(libpath.startswith(p) for p in stdlib_prefixes): # no-py24
82 continue
82 continue
83 if 'site-packages' in libpath:
83 if 'site-packages' in libpath:
84 continue
84 continue
85 for top, dirs, files in os.walk(libpath):
85 for top, dirs, files in os.walk(libpath):
86 for name in files:
86 for name in files:
87 if name == '__init__.py':
87 if name == '__init__.py':
88 continue
88 continue
89 if not (name.endswith('.py') or name.endswith('.so')):
89 if not (name.endswith('.py') or name.endswith('.so')):
90 continue
90 continue
91 full_path = os.path.join(top, name)
91 full_path = os.path.join(top, name)
92 if 'site-packages' in full_path:
92 if 'site-packages' in full_path:
93 continue
93 continue
94 rel_path = full_path[len(libpath) + 1:]
94 rel_path = full_path[len(libpath) + 1:]
95 mod = dotted_name_of_path(rel_path)
95 mod = dotted_name_of_path(rel_path)
96 yield mod
96 yield mod
97
97
98 stdlib_modules = set(list_stdlib_modules())
98 stdlib_modules = set(list_stdlib_modules())
99
99
100 def imported_modules(source, ignore_nested=False):
100 def imported_modules(source, ignore_nested=False):
101 """Given the source of a file as a string, yield the names
101 """Given the source of a file as a string, yield the names
102 imported by that file.
102 imported by that file.
103
103
104 Args:
104 Args:
105 source: The python source to examine as a string.
105 source: The python source to examine as a string.
106 ignore_nested: If true, import statements that do not start in
106 ignore_nested: If true, import statements that do not start in
107 column zero will be ignored.
107 column zero will be ignored.
108
108
109 Returns:
109 Returns:
110 A list of module names imported by the given source.
110 A list of module names imported by the given source.
111
111
112 >>> sorted(imported_modules(
112 >>> sorted(imported_modules(
113 ... 'import foo ; from baz import bar; import foo.qux'))
113 ... 'import foo ; from baz import bar; import foo.qux'))
114 ['baz.bar', 'foo', 'foo.qux']
114 ['baz.bar', 'foo', 'foo.qux']
115 >>> sorted(imported_modules(
115 >>> sorted(imported_modules(
116 ... '''import foo
116 ... '''import foo
117 ... def wat():
117 ... def wat():
118 ... import bar
118 ... import bar
119 ... ''', ignore_nested=True))
119 ... ''', ignore_nested=True))
120 ['foo']
120 ['foo']
121 """
121 """
122 for node in ast.walk(ast.parse(source)):
122 for node in ast.walk(ast.parse(source)):
123 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
123 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
124 continue
124 continue
125 if isinstance(node, ast.Import):
125 if isinstance(node, ast.Import):
126 for n in node.names:
126 for n in node.names:
127 yield n.name
127 yield n.name
128 elif isinstance(node, ast.ImportFrom):
128 elif isinstance(node, ast.ImportFrom):
129 prefix = node.module + '.'
129 prefix = node.module + '.'
130 for n in node.names:
130 for n in node.names:
131 yield prefix + n.name
131 yield prefix + n.name
132
132
133 def verify_stdlib_on_own_line(source):
133 def verify_stdlib_on_own_line(source):
134 """Given some python source, verify that stdlib imports are done
134 """Given some python source, verify that stdlib imports are done
135 in separate statements from relative local module imports.
135 in separate statements from relative local module imports.
136
136
137 Observing this limitation is important as it works around an
137 Observing this limitation is important as it works around an
138 annoying lib2to3 bug in relative import rewrites:
138 annoying lib2to3 bug in relative import rewrites:
139 http://bugs.python.org/issue19510.
139 http://bugs.python.org/issue19510.
140
140
141 >>> list(verify_stdlib_on_own_line('import sys, foo'))
141 >>> list(verify_stdlib_on_own_line('import sys, foo'))
142 ['mixed stdlib and relative imports:\\n foo, sys']
142 ['mixed imports\\n stdlib: sys\\n relative: foo']
143 >>> list(verify_stdlib_on_own_line('import sys, os'))
143 >>> list(verify_stdlib_on_own_line('import sys, os'))
144 []
144 []
145 >>> list(verify_stdlib_on_own_line('import foo, bar'))
145 >>> list(verify_stdlib_on_own_line('import foo, bar'))
146 []
146 []
147 """
147 """
148 for node in ast.walk(ast.parse(source)):
148 for node in ast.walk(ast.parse(source)):
149 if isinstance(node, ast.Import):
149 if isinstance(node, ast.Import):
150 from_stdlib = {}
150 from_stdlib = {False: [], True: []}
151 for n in node.names:
151 for n in node.names:
152 from_stdlib[n.name] = n.name in stdlib_modules
152 from_stdlib[n.name in stdlib_modules].append(n.name)
153 num_std = len([x for x in from_stdlib.values() if x])
153 if from_stdlib[True] and from_stdlib[False]:
154 if num_std not in (len(from_stdlib.values()), 0):
154 yield ('mixed imports\n stdlib: %s\n relative: %s' %
155 yield ('mixed stdlib and relative imports:\n %s' %
155 (', '.join(sorted(from_stdlib[True])),
156 ', '.join(sorted(from_stdlib.iterkeys())))
156 ', '.join(sorted(from_stdlib[False]))))
157
157
158 class CircularImport(Exception):
158 class CircularImport(Exception):
159 pass
159 pass
160
160
161
161
162 def cyclekey(names):
162 def cyclekey(names):
163 return tuple(sorted(set(names)))
163 return tuple(sorted(set(names)))
164
164
165 def check_one_mod(mod, imports, path=None, ignore=None):
165 def check_one_mod(mod, imports, path=None, ignore=None):
166 if path is None:
166 if path is None:
167 path = []
167 path = []
168 if ignore is None:
168 if ignore is None:
169 ignore = []
169 ignore = []
170 path = path + [mod]
170 path = path + [mod]
171 for i in sorted(imports.get(mod, [])):
171 for i in sorted(imports.get(mod, [])):
172 if i not in stdlib_modules:
172 if i not in stdlib_modules:
173 i = mod.rsplit('.', 1)[0] + '.' + i
173 i = mod.rsplit('.', 1)[0] + '.' + i
174 if i in path:
174 if i in path:
175 firstspot = path.index(i)
175 firstspot = path.index(i)
176 cycle = path[firstspot:] + [i]
176 cycle = path[firstspot:] + [i]
177 if cyclekey(cycle) not in ignore:
177 if cyclekey(cycle) not in ignore:
178 raise CircularImport(cycle)
178 raise CircularImport(cycle)
179 continue
179 continue
180 check_one_mod(i, imports, path=path, ignore=ignore)
180 check_one_mod(i, imports, path=path, ignore=ignore)
181
181
182 def rotatecycle(cycle):
182 def rotatecycle(cycle):
183 """arrange a cycle so that the lexicographically first module listed first
183 """arrange a cycle so that the lexicographically first module listed first
184
184
185 >>> rotatecycle(['foo', 'bar', 'foo'])
185 >>> rotatecycle(['foo', 'bar', 'foo'])
186 ['bar', 'foo', 'bar']
186 ['bar', 'foo', 'bar']
187 """
187 """
188 lowest = min(cycle)
188 lowest = min(cycle)
189 idx = cycle.index(lowest)
189 idx = cycle.index(lowest)
190 return cycle[idx:] + cycle[1:idx] + [lowest]
190 return cycle[idx:] + cycle[1:idx] + [lowest]
191
191
192 def find_cycles(imports):
192 def find_cycles(imports):
193 """Find cycles in an already-loaded import graph.
193 """Find cycles in an already-loaded import graph.
194
194
195 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
195 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
196 ... 'top.bar': ['baz', 'sys'],
196 ... 'top.bar': ['baz', 'sys'],
197 ... 'top.baz': ['foo'],
197 ... 'top.baz': ['foo'],
198 ... 'top.qux': ['foo']}
198 ... 'top.qux': ['foo']}
199 >>> print '\\n'.join(sorted(find_cycles(imports)))
199 >>> print '\\n'.join(sorted(find_cycles(imports)))
200 top.bar -> top.baz -> top.foo -> top.bar -> top.bar
200 top.bar -> top.baz -> top.foo -> top.bar -> top.bar
201 top.foo -> top.qux -> top.foo -> top.foo
201 top.foo -> top.qux -> top.foo -> top.foo
202 """
202 """
203 cycles = {}
203 cycles = {}
204 for mod in sorted(imports.iterkeys()):
204 for mod in sorted(imports.iterkeys()):
205 try:
205 try:
206 check_one_mod(mod, imports, ignore=cycles)
206 check_one_mod(mod, imports, ignore=cycles)
207 except CircularImport, e:
207 except CircularImport, e:
208 cycle = e.args[0]
208 cycle = e.args[0]
209 cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle))
209 cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle))
210 return cycles.values()
210 return cycles.values()
211
211
212 def _cycle_sortkey(c):
212 def _cycle_sortkey(c):
213 return len(c), c
213 return len(c), c
214
214
215 def main(argv):
215 def main(argv):
216 if len(argv) < 2:
216 if len(argv) < 2:
217 print 'Usage: %s file [file] [file] ...'
217 print 'Usage: %s file [file] [file] ...'
218 return 1
218 return 1
219 used_imports = {}
219 used_imports = {}
220 any_errors = False
220 any_errors = False
221 for source_path in argv[1:]:
221 for source_path in argv[1:]:
222 f = open(source_path)
222 f = open(source_path)
223 modname = dotted_name_of_path(source_path)
223 modname = dotted_name_of_path(source_path)
224 src = f.read()
224 src = f.read()
225 used_imports[modname] = sorted(
225 used_imports[modname] = sorted(
226 imported_modules(src, ignore_nested=True))
226 imported_modules(src, ignore_nested=True))
227 for error in verify_stdlib_on_own_line(src):
227 for error in verify_stdlib_on_own_line(src):
228 any_errors = True
228 any_errors = True
229 print source_path, error
229 print source_path, error
230 f.close()
230 f.close()
231 cycles = find_cycles(used_imports)
231 cycles = find_cycles(used_imports)
232 if cycles:
232 if cycles:
233 firstmods = set()
233 firstmods = set()
234 for c in sorted(cycles, key=_cycle_sortkey):
234 for c in sorted(cycles, key=_cycle_sortkey):
235 first = c.split()[0]
235 first = c.split()[0]
236 # As a rough cut, ignore any cycle that starts with the
236 # As a rough cut, ignore any cycle that starts with the
237 # same module as some other cycle. Otherwise we see lots
237 # same module as some other cycle. Otherwise we see lots
238 # of cycles that are effectively duplicates.
238 # of cycles that are effectively duplicates.
239 if first in firstmods:
239 if first in firstmods:
240 continue
240 continue
241 print 'Import cycle:', c
241 print 'Import cycle:', c
242 firstmods.add(first)
242 firstmods.add(first)
243 any_errors = True
243 any_errors = True
244 return not any_errors
244 return not any_errors
245
245
246 if __name__ == '__main__':
246 if __name__ == '__main__':
247 sys.exit(int(main(sys.argv)))
247 sys.exit(int(main(sys.argv)))
@@ -1,39 +1,44
1 This code uses the ast module, which was new in 2.6, so we'll skip
1 This code uses the ast module, which was new in 2.6, so we'll skip
2 this test on anything earlier.
2 this test on anything earlier.
3 $ python -c 'import sys ; assert sys.version_info >= (2, 6)' || exit 80
3 $ python -c 'import sys ; assert sys.version_info >= (2, 6)' || exit 80
4
4
5 $ import_checker="$TESTDIR"/../contrib/import-checker.py
5 $ import_checker="$TESTDIR"/../contrib/import-checker.py
6 Run the doctests from the import checker, and make sure
6 Run the doctests from the import checker, and make sure
7 it's working correctly.
7 it's working correctly.
8 $ TERM=dumb
8 $ TERM=dumb
9 $ export TERM
9 $ export TERM
10 $ python -m doctest $import_checker
10 $ python -m doctest $import_checker
11
11
12 $ cd "$TESTDIR"/..
12 $ cd "$TESTDIR"/..
13 $ if hg identify -q > /dev/null 2>&1; then :
13 $ if hg identify -q > /dev/null 2>&1; then :
14 > else
14 > else
15 > echo "skipped: not a Mercurial working dir" >&2
15 > echo "skipped: not a Mercurial working dir" >&2
16 > exit 80
16 > exit 80
17 > fi
17 > fi
18
18
19 There are a handful of cases here that require renaming a module so it
19 There are a handful of cases here that require renaming a module so it
20 doesn't overlap with a stdlib module name. There are also some cycles
20 doesn't overlap with a stdlib module name. There are also some cycles
21 here that we should still endeavor to fix, and some cycles will be
21 here that we should still endeavor to fix, and some cycles will be
22 hidden by deduplication algorithm in the cycle detector, so fixing
22 hidden by deduplication algorithm in the cycle detector, so fixing
23 these may expose other cycles.
23 these may expose other cycles.
24
24
25 $ hg locate 'mercurial/**.py' | xargs python "$import_checker"
25 $ hg locate 'mercurial/**.py' | xargs python "$import_checker"
26 mercurial/dispatch.py mixed stdlib and relative imports:
26 mercurial/dispatch.py mixed imports
27 commands, error, extensions, fancyopts, hg, hook, util
27 stdlib: commands
28 mercurial/fileset.py mixed stdlib and relative imports:
28 relative: error, extensions, fancyopts, hg, hook, util
29 error, merge, parser, util
29 mercurial/fileset.py mixed imports
30 mercurial/revset.py mixed stdlib and relative imports:
30 stdlib: parser
31 discovery, error, hbisect, parser, phases, util
31 relative: error, merge, util
32 mercurial/templater.py mixed stdlib and relative imports:
32 mercurial/revset.py mixed imports
33 config, error, parser, templatefilters, util
33 stdlib: parser
34 mercurial/ui.py mixed stdlib and relative imports:
34 relative: discovery, error, hbisect, phases, util
35 config, error, formatter, scmutil, util
35 mercurial/templater.py mixed imports
36 stdlib: parser
37 relative: config, error, templatefilters, util
38 mercurial/ui.py mixed imports
39 stdlib: formatter
40 relative: config, error, scmutil, util
36 Import cycle: mercurial.cmdutil -> mercurial.subrepo -> mercurial.cmdutil
41 Import cycle: mercurial.cmdutil -> mercurial.subrepo -> mercurial.cmdutil
37 Import cycle: mercurial.repoview -> mercurial.revset -> mercurial.repoview
42 Import cycle: mercurial.repoview -> mercurial.revset -> mercurial.repoview
38 Import cycle: mercurial.fileset -> mercurial.merge -> mercurial.subrepo -> mercurial.match -> mercurial.fileset
43 Import cycle: mercurial.fileset -> mercurial.merge -> mercurial.subrepo -> mercurial.match -> mercurial.fileset
39 Import cycle: mercurial.filemerge -> mercurial.match -> mercurial.fileset -> mercurial.merge -> mercurial.filemerge
44 Import cycle: mercurial.filemerge -> mercurial.match -> mercurial.fileset -> mercurial.merge -> mercurial.filemerge
General Comments 0
You need to be logged in to leave comments. Login now