##// END OF EJS Templates
import-checker: establish new function for verifying import conventions...
Gregory Szorc -
r25702:ab2c5163 default
parent child Browse files
Show More
@@ -1,390 +1,396 b''
1 1 import ast
2 2 import os
3 3 import sys
4 4
5 5 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
6 6 # to work when run from a virtualenv. The modules were chosen empirically
7 7 # so that the return value matches the return value without virtualenv.
8 8 import BaseHTTPServer
9 9 import zlib
10 10
11 11 def dotted_name_of_path(path, trimpure=False):
12 12 """Given a relative path to a source file, return its dotted module name.
13 13
14 14 >>> dotted_name_of_path('mercurial/error.py')
15 15 'mercurial.error'
16 16 >>> dotted_name_of_path('mercurial/pure/parsers.py', trimpure=True)
17 17 'mercurial.parsers'
18 18 >>> dotted_name_of_path('zlibmodule.so')
19 19 'zlib'
20 20 """
21 21 parts = path.split('/')
22 22 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
23 23 if parts[-1].endswith('module'):
24 24 parts[-1] = parts[-1][:-6]
25 25 if trimpure:
26 26 return '.'.join(p for p in parts if p != 'pure')
27 27 return '.'.join(parts)
28 28
29 29 def fromlocalfunc(modulename, localmods):
30 30 """Get a function to examine which locally defined module the
31 31 target source imports via a specified name.
32 32
33 33 `modulename` is an `dotted_name_of_path()`-ed source file path,
34 34 which may have `.__init__` at the end of it, of the target source.
35 35
36 36 `localmods` is a dict (or set), of which key is an absolute
37 37 `dotted_name_of_path()`-ed source file path of locally defined (=
38 38 Mercurial specific) modules.
39 39
40 40 This function assumes that module names not existing in
41 41 `localmods` are ones of Python standard libarary.
42 42
43 43 This function returns the function, which takes `name` argument,
44 44 and returns `(absname, dottedpath, hassubmod)` tuple if `name`
45 45 matches against locally defined module. Otherwise, it returns
46 46 False.
47 47
48 48 It is assumed that `name` doesn't have `.__init__`.
49 49
50 50 `absname` is an absolute module name of specified `name`
51 51 (e.g. "hgext.convert"). This can be used to compose prefix for sub
52 52 modules or so.
53 53
54 54 `dottedpath` is a `dotted_name_of_path()`-ed source file path
55 55 (e.g. "hgext.convert.__init__") of `name`. This is used to look
56 56 module up in `localmods` again.
57 57
58 58 `hassubmod` is whether it may have sub modules under it (for
59 59 convenient, even though this is also equivalent to "absname !=
60 60 dottednpath")
61 61
62 62 >>> localmods = {'foo.__init__': True, 'foo.foo1': True,
63 63 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
64 64 ... 'baz.__init__': True, 'baz.baz1': True }
65 65 >>> fromlocal = fromlocalfunc('foo.xxx', localmods)
66 66 >>> # relative
67 67 >>> fromlocal('foo1')
68 68 ('foo.foo1', 'foo.foo1', False)
69 69 >>> fromlocal('bar')
70 70 ('foo.bar', 'foo.bar.__init__', True)
71 71 >>> fromlocal('bar.bar1')
72 72 ('foo.bar.bar1', 'foo.bar.bar1', False)
73 73 >>> # absolute
74 74 >>> fromlocal('baz')
75 75 ('baz', 'baz.__init__', True)
76 76 >>> fromlocal('baz.baz1')
77 77 ('baz.baz1', 'baz.baz1', False)
78 78 >>> # unknown = maybe standard library
79 79 >>> fromlocal('os')
80 80 False
81 81 >>> fromlocal(None, 1)
82 82 ('foo', 'foo.__init__', True)
83 83 >>> fromlocal2 = fromlocalfunc('foo.xxx.yyy', localmods)
84 84 >>> fromlocal2(None, 2)
85 85 ('foo', 'foo.__init__', True)
86 86 """
87 87 prefix = '.'.join(modulename.split('.')[:-1])
88 88 if prefix:
89 89 prefix += '.'
90 90 def fromlocal(name, level=0):
91 91 # name is None when relative imports are used.
92 92 if name is None:
93 93 # If relative imports are used, level must not be absolute.
94 94 assert level > 0
95 95 candidates = ['.'.join(modulename.split('.')[:-level])]
96 96 else:
97 97 # Check relative name first.
98 98 candidates = [prefix + name, name]
99 99
100 100 for n in candidates:
101 101 if n in localmods:
102 102 return (n, n, False)
103 103 dottedpath = n + '.__init__'
104 104 if dottedpath in localmods:
105 105 return (n, dottedpath, True)
106 106 return False
107 107 return fromlocal
108 108
109 109 def list_stdlib_modules():
110 110 """List the modules present in the stdlib.
111 111
112 112 >>> mods = set(list_stdlib_modules())
113 113 >>> 'BaseHTTPServer' in mods
114 114 True
115 115
116 116 os.path isn't really a module, so it's missing:
117 117
118 118 >>> 'os.path' in mods
119 119 False
120 120
121 121 sys requires special treatment, because it's baked into the
122 122 interpreter, but it should still appear:
123 123
124 124 >>> 'sys' in mods
125 125 True
126 126
127 127 >>> 'collections' in mods
128 128 True
129 129
130 130 >>> 'cStringIO' in mods
131 131 True
132 132 """
133 133 for m in sys.builtin_module_names:
134 134 yield m
135 135 # These modules only exist on windows, but we should always
136 136 # consider them stdlib.
137 137 for m in ['msvcrt', '_winreg']:
138 138 yield m
139 139 # These get missed too
140 140 for m in 'ctypes', 'email':
141 141 yield m
142 142 yield 'builtins' # python3 only
143 143 for m in 'fcntl', 'grp', 'pwd', 'termios': # Unix only
144 144 yield m
145 145 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
146 146 # We need to supplement the list of prefixes for the search to work
147 147 # when run from within a virtualenv.
148 148 for mod in (BaseHTTPServer, zlib):
149 149 try:
150 150 # Not all module objects have a __file__ attribute.
151 151 filename = mod.__file__
152 152 except AttributeError:
153 153 continue
154 154 dirname = os.path.dirname(filename)
155 155 for prefix in stdlib_prefixes:
156 156 if dirname.startswith(prefix):
157 157 # Then this directory is redundant.
158 158 break
159 159 else:
160 160 stdlib_prefixes.add(dirname)
161 161 for libpath in sys.path:
162 162 # We want to walk everything in sys.path that starts with
163 163 # something in stdlib_prefixes. check-code suppressed because
164 164 # the ast module used by this script implies the availability
165 165 # of any().
166 166 if not any(libpath.startswith(p) for p in stdlib_prefixes): # no-py24
167 167 continue
168 168 if 'site-packages' in libpath:
169 169 continue
170 170 for top, dirs, files in os.walk(libpath):
171 171 for name in files:
172 172 if name == '__init__.py':
173 173 continue
174 174 if not (name.endswith('.py') or name.endswith('.so')
175 175 or name.endswith('.pyd')):
176 176 continue
177 177 full_path = os.path.join(top, name)
178 178 if 'site-packages' in full_path:
179 179 continue
180 180 rel_path = full_path[len(libpath) + 1:]
181 181 mod = dotted_name_of_path(rel_path)
182 182 yield mod
183 183
184 184 stdlib_modules = set(list_stdlib_modules())
185 185
186 186 def imported_modules(source, modulename, localmods, ignore_nested=False):
187 187 """Given the source of a file as a string, yield the names
188 188 imported by that file.
189 189
190 190 Args:
191 191 source: The python source to examine as a string.
192 192 modulename: of specified python source (may have `__init__`)
193 193 localmods: dict of locally defined module names (may have `__init__`)
194 194 ignore_nested: If true, import statements that do not start in
195 195 column zero will be ignored.
196 196
197 197 Returns:
198 198 A list of absolute module names imported by the given source.
199 199
200 200 >>> modulename = 'foo.xxx'
201 201 >>> localmods = {'foo.__init__': True,
202 202 ... 'foo.foo1': True, 'foo.foo2': True,
203 203 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
204 204 ... 'baz.__init__': True, 'baz.baz1': True }
205 205 >>> # standard library (= not locally defined ones)
206 206 >>> sorted(imported_modules(
207 207 ... 'from stdlib1 import foo, bar; import stdlib2',
208 208 ... modulename, localmods))
209 209 []
210 210 >>> # relative importing
211 211 >>> sorted(imported_modules(
212 212 ... 'import foo1; from bar import bar1',
213 213 ... modulename, localmods))
214 214 ['foo.bar.__init__', 'foo.bar.bar1', 'foo.foo1']
215 215 >>> sorted(imported_modules(
216 216 ... 'from bar.bar1 import name1, name2, name3',
217 217 ... modulename, localmods))
218 218 ['foo.bar.bar1']
219 219 >>> # absolute importing
220 220 >>> sorted(imported_modules(
221 221 ... 'from baz import baz1, name1',
222 222 ... modulename, localmods))
223 223 ['baz.__init__', 'baz.baz1']
224 224 >>> # mixed importing, even though it shouldn't be recommended
225 225 >>> sorted(imported_modules(
226 226 ... 'import stdlib, foo1, baz',
227 227 ... modulename, localmods))
228 228 ['baz.__init__', 'foo.foo1']
229 229 >>> # ignore_nested
230 230 >>> sorted(imported_modules(
231 231 ... '''import foo
232 232 ... def wat():
233 233 ... import bar
234 234 ... ''', modulename, localmods))
235 235 ['foo.__init__', 'foo.bar.__init__']
236 236 >>> sorted(imported_modules(
237 237 ... '''import foo
238 238 ... def wat():
239 239 ... import bar
240 240 ... ''', modulename, localmods, ignore_nested=True))
241 241 ['foo.__init__']
242 242 """
243 243 fromlocal = fromlocalfunc(modulename, localmods)
244 244 for node in ast.walk(ast.parse(source)):
245 245 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
246 246 continue
247 247 if isinstance(node, ast.Import):
248 248 for n in node.names:
249 249 found = fromlocal(n.name)
250 250 if not found:
251 251 # this should import standard library
252 252 continue
253 253 yield found[1]
254 254 elif isinstance(node, ast.ImportFrom):
255 255 found = fromlocal(node.module, node.level)
256 256 if not found:
257 257 # this should import standard library
258 258 continue
259 259
260 260 absname, dottedpath, hassubmod = found
261 261 yield dottedpath
262 262 if not hassubmod:
263 263 # examination of "node.names" should be redundant
264 264 # e.g.: from mercurial.node import nullid, nullrev
265 265 continue
266 266
267 267 prefix = absname + '.'
268 268 for n in node.names:
269 269 found = fromlocal(prefix + n.name)
270 270 if not found:
271 271 # this should be a function or a property of "node.module"
272 272 continue
273 273 yield found[1]
274 274
275 def verify_stdlib_on_own_line(source):
275 def verify_import_convention(module, source):
276 """Verify imports match our established coding convention."""
277 root = ast.parse(source)
278
279 return verify_stdlib_on_own_line(root)
280
281 def verify_stdlib_on_own_line(root):
276 282 """Given some python source, verify that stdlib imports are done
277 283 in separate statements from relative local module imports.
278 284
279 285 Observing this limitation is important as it works around an
280 286 annoying lib2to3 bug in relative import rewrites:
281 287 http://bugs.python.org/issue19510.
282 288
283 >>> list(verify_stdlib_on_own_line('import sys, foo'))
289 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, foo')))
284 290 ['mixed imports\\n stdlib: sys\\n relative: foo']
285 >>> list(verify_stdlib_on_own_line('import sys, os'))
291 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, os')))
286 292 []
287 >>> list(verify_stdlib_on_own_line('import foo, bar'))
293 >>> list(verify_stdlib_on_own_line(ast.parse('import foo, bar')))
288 294 []
289 295 """
290 for node in ast.walk(ast.parse(source)):
296 for node in ast.walk(root):
291 297 if isinstance(node, ast.Import):
292 298 from_stdlib = {False: [], True: []}
293 299 for n in node.names:
294 300 from_stdlib[n.name in stdlib_modules].append(n.name)
295 301 if from_stdlib[True] and from_stdlib[False]:
296 302 yield ('mixed imports\n stdlib: %s\n relative: %s' %
297 303 (', '.join(sorted(from_stdlib[True])),
298 304 ', '.join(sorted(from_stdlib[False]))))
299 305
300 306 class CircularImport(Exception):
301 307 pass
302 308
303 309 def checkmod(mod, imports):
304 310 shortest = {}
305 311 visit = [[mod]]
306 312 while visit:
307 313 path = visit.pop(0)
308 314 for i in sorted(imports.get(path[-1], [])):
309 315 if len(path) < shortest.get(i, 1000):
310 316 shortest[i] = len(path)
311 317 if i in path:
312 318 if i == path[0]:
313 319 raise CircularImport(path)
314 320 continue
315 321 visit.append(path + [i])
316 322
317 323 def rotatecycle(cycle):
318 324 """arrange a cycle so that the lexicographically first module listed first
319 325
320 326 >>> rotatecycle(['foo', 'bar'])
321 327 ['bar', 'foo', 'bar']
322 328 """
323 329 lowest = min(cycle)
324 330 idx = cycle.index(lowest)
325 331 return cycle[idx:] + cycle[:idx] + [lowest]
326 332
327 333 def find_cycles(imports):
328 334 """Find cycles in an already-loaded import graph.
329 335
330 336 All module names recorded in `imports` should be absolute one.
331 337
332 338 >>> imports = {'top.foo': ['top.bar', 'os.path', 'top.qux'],
333 339 ... 'top.bar': ['top.baz', 'sys'],
334 340 ... 'top.baz': ['top.foo'],
335 341 ... 'top.qux': ['top.foo']}
336 342 >>> print '\\n'.join(sorted(find_cycles(imports)))
337 343 top.bar -> top.baz -> top.foo -> top.bar
338 344 top.foo -> top.qux -> top.foo
339 345 """
340 346 cycles = set()
341 347 for mod in sorted(imports.iterkeys()):
342 348 try:
343 349 checkmod(mod, imports)
344 350 except CircularImport as e:
345 351 cycle = e.args[0]
346 352 cycles.add(" -> ".join(rotatecycle(cycle)))
347 353 return cycles
348 354
349 355 def _cycle_sortkey(c):
350 356 return len(c), c
351 357
352 358 def main(argv):
353 359 if len(argv) < 2 or (argv[1] == '-' and len(argv) > 2):
354 360 print 'Usage: %s {-|file [file] [file] ...}'
355 361 return 1
356 362 if argv[1] == '-':
357 363 argv = argv[:1]
358 364 argv.extend(l.rstrip() for l in sys.stdin.readlines())
359 365 localmods = {}
360 366 used_imports = {}
361 367 any_errors = False
362 368 for source_path in argv[1:]:
363 369 modname = dotted_name_of_path(source_path, trimpure=True)
364 370 localmods[modname] = source_path
365 371 for modname, source_path in sorted(localmods.iteritems()):
366 372 f = open(source_path)
367 373 src = f.read()
368 374 used_imports[modname] = sorted(
369 375 imported_modules(src, modname, localmods, ignore_nested=True))
370 for error in verify_stdlib_on_own_line(src):
376 for error in verify_import_convention(modname, src):
371 377 any_errors = True
372 378 print source_path, error
373 379 f.close()
374 380 cycles = find_cycles(used_imports)
375 381 if cycles:
376 382 firstmods = set()
377 383 for c in sorted(cycles, key=_cycle_sortkey):
378 384 first = c.split()[0]
379 385 # As a rough cut, ignore any cycle that starts with the
380 386 # same module as some other cycle. Otherwise we see lots
381 387 # of cycles that are effectively duplicates.
382 388 if first in firstmods:
383 389 continue
384 390 print 'Import cycle:', c
385 391 firstmods.add(first)
386 392 any_errors = True
387 393 return not any_errors
388 394
389 395 if __name__ == '__main__':
390 396 sys.exit(int(main(sys.argv)))
General Comments 0
You need to be logged in to leave comments. Login now