##// END OF EJS Templates
import-checker: parse python code from .t files
timeless -
r28922:4ec62a08 default
parent child Browse files
Show More
@@ -1,618 +1,706
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2
2
3 from __future__ import absolute_import, print_function
3 from __future__ import absolute_import, print_function
4
4
5 import ast
5 import ast
6 import collections
6 import collections
7 import os
7 import os
8 import re
8 import sys
9 import sys
9
10
10 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
11 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
11 # to work when run from a virtualenv. The modules were chosen empirically
12 # to work when run from a virtualenv. The modules were chosen empirically
12 # so that the return value matches the return value without virtualenv.
13 # so that the return value matches the return value without virtualenv.
13 import BaseHTTPServer
14 import BaseHTTPServer
14 import zlib
15 import zlib
15
16
16 # Whitelist of modules that symbols can be directly imported from.
17 # Whitelist of modules that symbols can be directly imported from.
17 allowsymbolimports = (
18 allowsymbolimports = (
18 '__future__',
19 '__future__',
19 'mercurial.hgweb.common',
20 'mercurial.hgweb.common',
20 'mercurial.hgweb.request',
21 'mercurial.hgweb.request',
21 'mercurial.i18n',
22 'mercurial.i18n',
22 'mercurial.node',
23 'mercurial.node',
23 )
24 )
24
25
25 # Modules that must be aliased because they are commonly confused with
26 # Modules that must be aliased because they are commonly confused with
26 # common variables and can create aliasing and readability issues.
27 # common variables and can create aliasing and readability issues.
27 requirealias = {
28 requirealias = {
28 'ui': 'uimod',
29 'ui': 'uimod',
29 }
30 }
30
31
31 def usingabsolute(root):
32 def usingabsolute(root):
32 """Whether absolute imports are being used."""
33 """Whether absolute imports are being used."""
33 if sys.version_info[0] >= 3:
34 if sys.version_info[0] >= 3:
34 return True
35 return True
35
36
36 for node in ast.walk(root):
37 for node in ast.walk(root):
37 if isinstance(node, ast.ImportFrom):
38 if isinstance(node, ast.ImportFrom):
38 if node.module == '__future__':
39 if node.module == '__future__':
39 for n in node.names:
40 for n in node.names:
40 if n.name == 'absolute_import':
41 if n.name == 'absolute_import':
41 return True
42 return True
42
43
43 return False
44 return False
44
45
45 def walklocal(root):
46 def walklocal(root):
46 """Recursively yield all descendant nodes but not in a different scope"""
47 """Recursively yield all descendant nodes but not in a different scope"""
47 todo = collections.deque(ast.iter_child_nodes(root))
48 todo = collections.deque(ast.iter_child_nodes(root))
48 yield root, False
49 yield root, False
49 while todo:
50 while todo:
50 node = todo.popleft()
51 node = todo.popleft()
51 newscope = isinstance(node, ast.FunctionDef)
52 newscope = isinstance(node, ast.FunctionDef)
52 if not newscope:
53 if not newscope:
53 todo.extend(ast.iter_child_nodes(node))
54 todo.extend(ast.iter_child_nodes(node))
54 yield node, newscope
55 yield node, newscope
55
56
56 def dotted_name_of_path(path, trimpure=False):
57 def dotted_name_of_path(path, trimpure=False):
57 """Given a relative path to a source file, return its dotted module name.
58 """Given a relative path to a source file, return its dotted module name.
58
59
59 >>> dotted_name_of_path('mercurial/error.py')
60 >>> dotted_name_of_path('mercurial/error.py')
60 'mercurial.error'
61 'mercurial.error'
61 >>> dotted_name_of_path('mercurial/pure/parsers.py', trimpure=True)
62 >>> dotted_name_of_path('mercurial/pure/parsers.py', trimpure=True)
62 'mercurial.parsers'
63 'mercurial.parsers'
63 >>> dotted_name_of_path('zlibmodule.so')
64 >>> dotted_name_of_path('zlibmodule.so')
64 'zlib'
65 'zlib'
65 """
66 """
66 parts = path.replace(os.sep, '/').split('/')
67 parts = path.replace(os.sep, '/').split('/')
67 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
68 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
68 if parts[-1].endswith('module'):
69 if parts[-1].endswith('module'):
69 parts[-1] = parts[-1][:-6]
70 parts[-1] = parts[-1][:-6]
70 if trimpure:
71 if trimpure:
71 return '.'.join(p for p in parts if p != 'pure')
72 return '.'.join(p for p in parts if p != 'pure')
72 return '.'.join(parts)
73 return '.'.join(parts)
73
74
74 def fromlocalfunc(modulename, localmods):
75 def fromlocalfunc(modulename, localmods):
75 """Get a function to examine which locally defined module the
76 """Get a function to examine which locally defined module the
76 target source imports via a specified name.
77 target source imports via a specified name.
77
78
78 `modulename` is an `dotted_name_of_path()`-ed source file path,
79 `modulename` is an `dotted_name_of_path()`-ed source file path,
79 which may have `.__init__` at the end of it, of the target source.
80 which may have `.__init__` at the end of it, of the target source.
80
81
81 `localmods` is a dict (or set), of which key is an absolute
82 `localmods` is a dict (or set), of which key is an absolute
82 `dotted_name_of_path()`-ed source file path of locally defined (=
83 `dotted_name_of_path()`-ed source file path of locally defined (=
83 Mercurial specific) modules.
84 Mercurial specific) modules.
84
85
85 This function assumes that module names not existing in
86 This function assumes that module names not existing in
86 `localmods` are from the Python standard library.
87 `localmods` are from the Python standard library.
87
88
88 This function returns the function, which takes `name` argument,
89 This function returns the function, which takes `name` argument,
89 and returns `(absname, dottedpath, hassubmod)` tuple if `name`
90 and returns `(absname, dottedpath, hassubmod)` tuple if `name`
90 matches against locally defined module. Otherwise, it returns
91 matches against locally defined module. Otherwise, it returns
91 False.
92 False.
92
93
93 It is assumed that `name` doesn't have `.__init__`.
94 It is assumed that `name` doesn't have `.__init__`.
94
95
95 `absname` is an absolute module name of specified `name`
96 `absname` is an absolute module name of specified `name`
96 (e.g. "hgext.convert"). This can be used to compose prefix for sub
97 (e.g. "hgext.convert"). This can be used to compose prefix for sub
97 modules or so.
98 modules or so.
98
99
99 `dottedpath` is a `dotted_name_of_path()`-ed source file path
100 `dottedpath` is a `dotted_name_of_path()`-ed source file path
100 (e.g. "hgext.convert.__init__") of `name`. This is used to look
101 (e.g. "hgext.convert.__init__") of `name`. This is used to look
101 module up in `localmods` again.
102 module up in `localmods` again.
102
103
103 `hassubmod` is whether it may have sub modules under it (for
104 `hassubmod` is whether it may have sub modules under it (for
104 convenient, even though this is also equivalent to "absname !=
105 convenient, even though this is also equivalent to "absname !=
105 dottednpath")
106 dottednpath")
106
107
107 >>> localmods = {'foo.__init__': True, 'foo.foo1': True,
108 >>> localmods = {'foo.__init__': True, 'foo.foo1': True,
108 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
109 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
109 ... 'baz.__init__': True, 'baz.baz1': True }
110 ... 'baz.__init__': True, 'baz.baz1': True }
110 >>> fromlocal = fromlocalfunc('foo.xxx', localmods)
111 >>> fromlocal = fromlocalfunc('foo.xxx', localmods)
111 >>> # relative
112 >>> # relative
112 >>> fromlocal('foo1')
113 >>> fromlocal('foo1')
113 ('foo.foo1', 'foo.foo1', False)
114 ('foo.foo1', 'foo.foo1', False)
114 >>> fromlocal('bar')
115 >>> fromlocal('bar')
115 ('foo.bar', 'foo.bar.__init__', True)
116 ('foo.bar', 'foo.bar.__init__', True)
116 >>> fromlocal('bar.bar1')
117 >>> fromlocal('bar.bar1')
117 ('foo.bar.bar1', 'foo.bar.bar1', False)
118 ('foo.bar.bar1', 'foo.bar.bar1', False)
118 >>> # absolute
119 >>> # absolute
119 >>> fromlocal('baz')
120 >>> fromlocal('baz')
120 ('baz', 'baz.__init__', True)
121 ('baz', 'baz.__init__', True)
121 >>> fromlocal('baz.baz1')
122 >>> fromlocal('baz.baz1')
122 ('baz.baz1', 'baz.baz1', False)
123 ('baz.baz1', 'baz.baz1', False)
123 >>> # unknown = maybe standard library
124 >>> # unknown = maybe standard library
124 >>> fromlocal('os')
125 >>> fromlocal('os')
125 False
126 False
126 >>> fromlocal(None, 1)
127 >>> fromlocal(None, 1)
127 ('foo', 'foo.__init__', True)
128 ('foo', 'foo.__init__', True)
128 >>> fromlocal2 = fromlocalfunc('foo.xxx.yyy', localmods)
129 >>> fromlocal2 = fromlocalfunc('foo.xxx.yyy', localmods)
129 >>> fromlocal2(None, 2)
130 >>> fromlocal2(None, 2)
130 ('foo', 'foo.__init__', True)
131 ('foo', 'foo.__init__', True)
131 """
132 """
132 prefix = '.'.join(modulename.split('.')[:-1])
133 prefix = '.'.join(modulename.split('.')[:-1])
133 if prefix:
134 if prefix:
134 prefix += '.'
135 prefix += '.'
135 def fromlocal(name, level=0):
136 def fromlocal(name, level=0):
136 # name is None when relative imports are used.
137 # name is None when relative imports are used.
137 if name is None:
138 if name is None:
138 # If relative imports are used, level must not be absolute.
139 # If relative imports are used, level must not be absolute.
139 assert level > 0
140 assert level > 0
140 candidates = ['.'.join(modulename.split('.')[:-level])]
141 candidates = ['.'.join(modulename.split('.')[:-level])]
141 else:
142 else:
142 # Check relative name first.
143 # Check relative name first.
143 candidates = [prefix + name, name]
144 candidates = [prefix + name, name]
144
145
145 for n in candidates:
146 for n in candidates:
146 if n in localmods:
147 if n in localmods:
147 return (n, n, False)
148 return (n, n, False)
148 dottedpath = n + '.__init__'
149 dottedpath = n + '.__init__'
149 if dottedpath in localmods:
150 if dottedpath in localmods:
150 return (n, dottedpath, True)
151 return (n, dottedpath, True)
151 return False
152 return False
152 return fromlocal
153 return fromlocal
153
154
154 def list_stdlib_modules():
155 def list_stdlib_modules():
155 """List the modules present in the stdlib.
156 """List the modules present in the stdlib.
156
157
157 >>> mods = set(list_stdlib_modules())
158 >>> mods = set(list_stdlib_modules())
158 >>> 'BaseHTTPServer' in mods
159 >>> 'BaseHTTPServer' in mods
159 True
160 True
160
161
161 os.path isn't really a module, so it's missing:
162 os.path isn't really a module, so it's missing:
162
163
163 >>> 'os.path' in mods
164 >>> 'os.path' in mods
164 False
165 False
165
166
166 sys requires special treatment, because it's baked into the
167 sys requires special treatment, because it's baked into the
167 interpreter, but it should still appear:
168 interpreter, but it should still appear:
168
169
169 >>> 'sys' in mods
170 >>> 'sys' in mods
170 True
171 True
171
172
172 >>> 'collections' in mods
173 >>> 'collections' in mods
173 True
174 True
174
175
175 >>> 'cStringIO' in mods
176 >>> 'cStringIO' in mods
176 True
177 True
177 """
178 """
178 for m in sys.builtin_module_names:
179 for m in sys.builtin_module_names:
179 yield m
180 yield m
180 # These modules only exist on windows, but we should always
181 # These modules only exist on windows, but we should always
181 # consider them stdlib.
182 # consider them stdlib.
182 for m in ['msvcrt', '_winreg']:
183 for m in ['msvcrt', '_winreg']:
183 yield m
184 yield m
184 yield 'builtins' # python3 only
185 yield 'builtins' # python3 only
185 for m in 'fcntl', 'grp', 'pwd', 'termios': # Unix only
186 for m in 'fcntl', 'grp', 'pwd', 'termios': # Unix only
186 yield m
187 yield m
187 for m in 'cPickle', 'datetime': # in Python (not C) on PyPy
188 for m in 'cPickle', 'datetime': # in Python (not C) on PyPy
188 yield m
189 yield m
189 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
190 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
190 # We need to supplement the list of prefixes for the search to work
191 # We need to supplement the list of prefixes for the search to work
191 # when run from within a virtualenv.
192 # when run from within a virtualenv.
192 for mod in (BaseHTTPServer, zlib):
193 for mod in (BaseHTTPServer, zlib):
193 try:
194 try:
194 # Not all module objects have a __file__ attribute.
195 # Not all module objects have a __file__ attribute.
195 filename = mod.__file__
196 filename = mod.__file__
196 except AttributeError:
197 except AttributeError:
197 continue
198 continue
198 dirname = os.path.dirname(filename)
199 dirname = os.path.dirname(filename)
199 for prefix in stdlib_prefixes:
200 for prefix in stdlib_prefixes:
200 if dirname.startswith(prefix):
201 if dirname.startswith(prefix):
201 # Then this directory is redundant.
202 # Then this directory is redundant.
202 break
203 break
203 else:
204 else:
204 stdlib_prefixes.add(dirname)
205 stdlib_prefixes.add(dirname)
205 for libpath in sys.path:
206 for libpath in sys.path:
206 # We want to walk everything in sys.path that starts with
207 # We want to walk everything in sys.path that starts with
207 # something in stdlib_prefixes.
208 # something in stdlib_prefixes.
208 if not any(libpath.startswith(p) for p in stdlib_prefixes):
209 if not any(libpath.startswith(p) for p in stdlib_prefixes):
209 continue
210 continue
210 for top, dirs, files in os.walk(libpath):
211 for top, dirs, files in os.walk(libpath):
211 for i, d in reversed(list(enumerate(dirs))):
212 for i, d in reversed(list(enumerate(dirs))):
212 if (not os.path.exists(os.path.join(top, d, '__init__.py'))
213 if (not os.path.exists(os.path.join(top, d, '__init__.py'))
213 or top == libpath and d in ('hgext', 'mercurial')):
214 or top == libpath and d in ('hgext', 'mercurial')):
214 del dirs[i]
215 del dirs[i]
215 for name in files:
216 for name in files:
216 if not name.endswith(('.py', '.so', '.pyc', '.pyo', '.pyd')):
217 if not name.endswith(('.py', '.so', '.pyc', '.pyo', '.pyd')):
217 continue
218 continue
218 if name.startswith('__init__.py'):
219 if name.startswith('__init__.py'):
219 full_path = top
220 full_path = top
220 else:
221 else:
221 full_path = os.path.join(top, name)
222 full_path = os.path.join(top, name)
222 rel_path = full_path[len(libpath) + 1:]
223 rel_path = full_path[len(libpath) + 1:]
223 mod = dotted_name_of_path(rel_path)
224 mod = dotted_name_of_path(rel_path)
224 yield mod
225 yield mod
225
226
226 stdlib_modules = set(list_stdlib_modules())
227 stdlib_modules = set(list_stdlib_modules())
227
228
228 def imported_modules(source, modulename, f, localmods, ignore_nested=False):
229 def imported_modules(source, modulename, f, localmods, ignore_nested=False):
229 """Given the source of a file as a string, yield the names
230 """Given the source of a file as a string, yield the names
230 imported by that file.
231 imported by that file.
231
232
232 Args:
233 Args:
233 source: The python source to examine as a string.
234 source: The python source to examine as a string.
234 modulename: of specified python source (may have `__init__`)
235 modulename: of specified python source (may have `__init__`)
235 localmods: dict of locally defined module names (may have `__init__`)
236 localmods: dict of locally defined module names (may have `__init__`)
236 ignore_nested: If true, import statements that do not start in
237 ignore_nested: If true, import statements that do not start in
237 column zero will be ignored.
238 column zero will be ignored.
238
239
239 Returns:
240 Returns:
240 A list of absolute module names imported by the given source.
241 A list of absolute module names imported by the given source.
241
242
242 >>> f = 'foo/xxx.py'
243 >>> f = 'foo/xxx.py'
243 >>> modulename = 'foo.xxx'
244 >>> modulename = 'foo.xxx'
244 >>> localmods = {'foo.__init__': True,
245 >>> localmods = {'foo.__init__': True,
245 ... 'foo.foo1': True, 'foo.foo2': True,
246 ... 'foo.foo1': True, 'foo.foo2': True,
246 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
247 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
247 ... 'baz.__init__': True, 'baz.baz1': True }
248 ... 'baz.__init__': True, 'baz.baz1': True }
248 >>> # standard library (= not locally defined ones)
249 >>> # standard library (= not locally defined ones)
249 >>> sorted(imported_modules(
250 >>> sorted(imported_modules(
250 ... 'from stdlib1 import foo, bar; import stdlib2',
251 ... 'from stdlib1 import foo, bar; import stdlib2',
251 ... modulename, f, localmods))
252 ... modulename, f, localmods))
252 []
253 []
253 >>> # relative importing
254 >>> # relative importing
254 >>> sorted(imported_modules(
255 >>> sorted(imported_modules(
255 ... 'import foo1; from bar import bar1',
256 ... 'import foo1; from bar import bar1',
256 ... modulename, f, localmods))
257 ... modulename, f, localmods))
257 ['foo.bar.bar1', 'foo.foo1']
258 ['foo.bar.bar1', 'foo.foo1']
258 >>> sorted(imported_modules(
259 >>> sorted(imported_modules(
259 ... 'from bar.bar1 import name1, name2, name3',
260 ... 'from bar.bar1 import name1, name2, name3',
260 ... modulename, f, localmods))
261 ... modulename, f, localmods))
261 ['foo.bar.bar1']
262 ['foo.bar.bar1']
262 >>> # absolute importing
263 >>> # absolute importing
263 >>> sorted(imported_modules(
264 >>> sorted(imported_modules(
264 ... 'from baz import baz1, name1',
265 ... 'from baz import baz1, name1',
265 ... modulename, f, localmods))
266 ... modulename, f, localmods))
266 ['baz.__init__', 'baz.baz1']
267 ['baz.__init__', 'baz.baz1']
267 >>> # mixed importing, even though it shouldn't be recommended
268 >>> # mixed importing, even though it shouldn't be recommended
268 >>> sorted(imported_modules(
269 >>> sorted(imported_modules(
269 ... 'import stdlib, foo1, baz',
270 ... 'import stdlib, foo1, baz',
270 ... modulename, f, localmods))
271 ... modulename, f, localmods))
271 ['baz.__init__', 'foo.foo1']
272 ['baz.__init__', 'foo.foo1']
272 >>> # ignore_nested
273 >>> # ignore_nested
273 >>> sorted(imported_modules(
274 >>> sorted(imported_modules(
274 ... '''import foo
275 ... '''import foo
275 ... def wat():
276 ... def wat():
276 ... import bar
277 ... import bar
277 ... ''', modulename, f, localmods))
278 ... ''', modulename, f, localmods))
278 ['foo.__init__', 'foo.bar.__init__']
279 ['foo.__init__', 'foo.bar.__init__']
279 >>> sorted(imported_modules(
280 >>> sorted(imported_modules(
280 ... '''import foo
281 ... '''import foo
281 ... def wat():
282 ... def wat():
282 ... import bar
283 ... import bar
283 ... ''', modulename, f, localmods, ignore_nested=True))
284 ... ''', modulename, f, localmods, ignore_nested=True))
284 ['foo.__init__']
285 ['foo.__init__']
285 """
286 """
286 fromlocal = fromlocalfunc(modulename, localmods)
287 fromlocal = fromlocalfunc(modulename, localmods)
287 for node in ast.walk(ast.parse(source, f)):
288 for node in ast.walk(ast.parse(source, f)):
288 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
289 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
289 continue
290 continue
290 if isinstance(node, ast.Import):
291 if isinstance(node, ast.Import):
291 for n in node.names:
292 for n in node.names:
292 found = fromlocal(n.name)
293 found = fromlocal(n.name)
293 if not found:
294 if not found:
294 # this should import standard library
295 # this should import standard library
295 continue
296 continue
296 yield found[1]
297 yield found[1]
297 elif isinstance(node, ast.ImportFrom):
298 elif isinstance(node, ast.ImportFrom):
298 found = fromlocal(node.module, node.level)
299 found = fromlocal(node.module, node.level)
299 if not found:
300 if not found:
300 # this should import standard library
301 # this should import standard library
301 continue
302 continue
302
303
303 absname, dottedpath, hassubmod = found
304 absname, dottedpath, hassubmod = found
304 if not hassubmod:
305 if not hassubmod:
305 # "dottedpath" is not a package; must be imported
306 # "dottedpath" is not a package; must be imported
306 yield dottedpath
307 yield dottedpath
307 # examination of "node.names" should be redundant
308 # examination of "node.names" should be redundant
308 # e.g.: from mercurial.node import nullid, nullrev
309 # e.g.: from mercurial.node import nullid, nullrev
309 continue
310 continue
310
311
311 modnotfound = False
312 modnotfound = False
312 prefix = absname + '.'
313 prefix = absname + '.'
313 for n in node.names:
314 for n in node.names:
314 found = fromlocal(prefix + n.name)
315 found = fromlocal(prefix + n.name)
315 if not found:
316 if not found:
316 # this should be a function or a property of "node.module"
317 # this should be a function or a property of "node.module"
317 modnotfound = True
318 modnotfound = True
318 continue
319 continue
319 yield found[1]
320 yield found[1]
320 if modnotfound:
321 if modnotfound:
321 # "dottedpath" is a package, but imported because of non-module
322 # "dottedpath" is a package, but imported because of non-module
322 # lookup
323 # lookup
323 yield dottedpath
324 yield dottedpath
324
325
325 def verify_import_convention(module, source, localmods):
326 def verify_import_convention(module, source, localmods):
326 """Verify imports match our established coding convention.
327 """Verify imports match our established coding convention.
327
328
328 We have 2 conventions: legacy and modern. The modern convention is in
329 We have 2 conventions: legacy and modern. The modern convention is in
329 effect when using absolute imports.
330 effect when using absolute imports.
330
331
331 The legacy convention only looks for mixed imports. The modern convention
332 The legacy convention only looks for mixed imports. The modern convention
332 is much more thorough.
333 is much more thorough.
333 """
334 """
334 root = ast.parse(source)
335 root = ast.parse(source)
335 absolute = usingabsolute(root)
336 absolute = usingabsolute(root)
336
337
337 if absolute:
338 if absolute:
338 return verify_modern_convention(module, root, localmods)
339 return verify_modern_convention(module, root, localmods)
339 else:
340 else:
340 return verify_stdlib_on_own_line(root)
341 return verify_stdlib_on_own_line(root)
341
342
342 def verify_modern_convention(module, root, localmods, root_col_offset=0):
343 def verify_modern_convention(module, root, localmods, root_col_offset=0):
343 """Verify a file conforms to the modern import convention rules.
344 """Verify a file conforms to the modern import convention rules.
344
345
345 The rules of the modern convention are:
346 The rules of the modern convention are:
346
347
347 * Ordering is stdlib followed by local imports. Each group is lexically
348 * Ordering is stdlib followed by local imports. Each group is lexically
348 sorted.
349 sorted.
349 * Importing multiple modules via "import X, Y" is not allowed: use
350 * Importing multiple modules via "import X, Y" is not allowed: use
350 separate import statements.
351 separate import statements.
351 * Importing multiple modules via "from X import ..." is allowed if using
352 * Importing multiple modules via "from X import ..." is allowed if using
352 parenthesis and one entry per line.
353 parenthesis and one entry per line.
353 * Only 1 relative import statement per import level ("from .", "from ..")
354 * Only 1 relative import statement per import level ("from .", "from ..")
354 is allowed.
355 is allowed.
355 * Relative imports from higher levels must occur before lower levels. e.g.
356 * Relative imports from higher levels must occur before lower levels. e.g.
356 "from .." must be before "from .".
357 "from .." must be before "from .".
357 * Imports from peer packages should use relative import (e.g. do not
358 * Imports from peer packages should use relative import (e.g. do not
358 "import mercurial.foo" from a "mercurial.*" module).
359 "import mercurial.foo" from a "mercurial.*" module).
359 * Symbols can only be imported from specific modules (see
360 * Symbols can only be imported from specific modules (see
360 `allowsymbolimports`). For other modules, first import the module then
361 `allowsymbolimports`). For other modules, first import the module then
361 assign the symbol to a module-level variable. In addition, these imports
362 assign the symbol to a module-level variable. In addition, these imports
362 must be performed before other relative imports. This rule only
363 must be performed before other relative imports. This rule only
363 applies to import statements outside of any blocks.
364 applies to import statements outside of any blocks.
364 * Relative imports from the standard library are not allowed.
365 * Relative imports from the standard library are not allowed.
365 * Certain modules must be aliased to alternate names to avoid aliasing
366 * Certain modules must be aliased to alternate names to avoid aliasing
366 and readability problems. See `requirealias`.
367 and readability problems. See `requirealias`.
367 """
368 """
368 topmodule = module.split('.')[0]
369 topmodule = module.split('.')[0]
369 fromlocal = fromlocalfunc(module, localmods)
370 fromlocal = fromlocalfunc(module, localmods)
370
371
371 # Whether a local/non-stdlib import has been performed.
372 # Whether a local/non-stdlib import has been performed.
372 seenlocal = None
373 seenlocal = None
373 # Whether a relative, non-symbol import has been seen.
374 # Whether a relative, non-symbol import has been seen.
374 seennonsymbolrelative = False
375 seennonsymbolrelative = False
375 # The last name to be imported (for sorting).
376 # The last name to be imported (for sorting).
376 lastname = None
377 lastname = None
377 # Relative import levels encountered so far.
378 # Relative import levels encountered so far.
378 seenlevels = set()
379 seenlevels = set()
379
380
380 for node, newscope in walklocal(root):
381 for node, newscope in walklocal(root):
381 def msg(fmt, *args):
382 def msg(fmt, *args):
382 return (fmt % args, node.lineno)
383 return (fmt % args, node.lineno)
383 if newscope:
384 if newscope:
384 # Check for local imports in function
385 # Check for local imports in function
385 for r in verify_modern_convention(module, node, localmods,
386 for r in verify_modern_convention(module, node, localmods,
386 node.col_offset + 4):
387 node.col_offset + 4):
387 yield r
388 yield r
388 elif isinstance(node, ast.Import):
389 elif isinstance(node, ast.Import):
389 # Disallow "import foo, bar" and require separate imports
390 # Disallow "import foo, bar" and require separate imports
390 # for each module.
391 # for each module.
391 if len(node.names) > 1:
392 if len(node.names) > 1:
392 yield msg('multiple imported names: %s',
393 yield msg('multiple imported names: %s',
393 ', '.join(n.name for n in node.names))
394 ', '.join(n.name for n in node.names))
394
395
395 name = node.names[0].name
396 name = node.names[0].name
396 asname = node.names[0].asname
397 asname = node.names[0].asname
397
398
398 # Ignore sorting rules on imports inside blocks.
399 # Ignore sorting rules on imports inside blocks.
399 if node.col_offset == root_col_offset:
400 if node.col_offset == root_col_offset:
400 if lastname and name < lastname:
401 if lastname and name < lastname:
401 yield msg('imports not lexically sorted: %s < %s',
402 yield msg('imports not lexically sorted: %s < %s',
402 name, lastname)
403 name, lastname)
403
404
404 lastname = name
405 lastname = name
405
406
406 # stdlib imports should be before local imports.
407 # stdlib imports should be before local imports.
407 stdlib = name in stdlib_modules
408 stdlib = name in stdlib_modules
408 if stdlib and seenlocal and node.col_offset == root_col_offset:
409 if stdlib and seenlocal and node.col_offset == root_col_offset:
409 yield msg('stdlib import "%s" follows local import: %s',
410 yield msg('stdlib import "%s" follows local import: %s',
410 name, seenlocal)
411 name, seenlocal)
411
412
412 if not stdlib:
413 if not stdlib:
413 seenlocal = name
414 seenlocal = name
414
415
415 # Import of sibling modules should use relative imports.
416 # Import of sibling modules should use relative imports.
416 topname = name.split('.')[0]
417 topname = name.split('.')[0]
417 if topname == topmodule:
418 if topname == topmodule:
418 yield msg('import should be relative: %s', name)
419 yield msg('import should be relative: %s', name)
419
420
420 if name in requirealias and asname != requirealias[name]:
421 if name in requirealias and asname != requirealias[name]:
421 yield msg('%s module must be "as" aliased to %s',
422 yield msg('%s module must be "as" aliased to %s',
422 name, requirealias[name])
423 name, requirealias[name])
423
424
424 elif isinstance(node, ast.ImportFrom):
425 elif isinstance(node, ast.ImportFrom):
425 # Resolve the full imported module name.
426 # Resolve the full imported module name.
426 if node.level > 0:
427 if node.level > 0:
427 fullname = '.'.join(module.split('.')[:-node.level])
428 fullname = '.'.join(module.split('.')[:-node.level])
428 if node.module:
429 if node.module:
429 fullname += '.%s' % node.module
430 fullname += '.%s' % node.module
430 else:
431 else:
431 assert node.module
432 assert node.module
432 fullname = node.module
433 fullname = node.module
433
434
434 topname = fullname.split('.')[0]
435 topname = fullname.split('.')[0]
435 if topname == topmodule:
436 if topname == topmodule:
436 yield msg('import should be relative: %s', fullname)
437 yield msg('import should be relative: %s', fullname)
437
438
438 # __future__ is special since it needs to come first and use
439 # __future__ is special since it needs to come first and use
439 # symbol import.
440 # symbol import.
440 if fullname != '__future__':
441 if fullname != '__future__':
441 if not fullname or fullname in stdlib_modules:
442 if not fullname or fullname in stdlib_modules:
442 yield msg('relative import of stdlib module')
443 yield msg('relative import of stdlib module')
443 else:
444 else:
444 seenlocal = fullname
445 seenlocal = fullname
445
446
446 # Direct symbol import is only allowed from certain modules and
447 # Direct symbol import is only allowed from certain modules and
447 # must occur before non-symbol imports.
448 # must occur before non-symbol imports.
448 if node.module and node.col_offset == root_col_offset:
449 if node.module and node.col_offset == root_col_offset:
449 found = fromlocal(node.module, node.level)
450 found = fromlocal(node.module, node.level)
450 if found and found[2]: # node.module is a package
451 if found and found[2]: # node.module is a package
451 prefix = found[0] + '.'
452 prefix = found[0] + '.'
452 symbols = [n.name for n in node.names
453 symbols = [n.name for n in node.names
453 if not fromlocal(prefix + n.name)]
454 if not fromlocal(prefix + n.name)]
454 else:
455 else:
455 symbols = [n.name for n in node.names]
456 symbols = [n.name for n in node.names]
456
457
457 if symbols and fullname not in allowsymbolimports:
458 if symbols and fullname not in allowsymbolimports:
458 yield msg('direct symbol import %s from %s',
459 yield msg('direct symbol import %s from %s',
459 ', '.join(symbols), fullname)
460 ', '.join(symbols), fullname)
460
461
461 if symbols and seennonsymbolrelative:
462 if symbols and seennonsymbolrelative:
462 yield msg('symbol import follows non-symbol import: %s',
463 yield msg('symbol import follows non-symbol import: %s',
463 fullname)
464 fullname)
464
465
465 if not node.module:
466 if not node.module:
466 assert node.level
467 assert node.level
467 seennonsymbolrelative = True
468 seennonsymbolrelative = True
468
469
469 # Only allow 1 group per level.
470 # Only allow 1 group per level.
470 if (node.level in seenlevels
471 if (node.level in seenlevels
471 and node.col_offset == root_col_offset):
472 and node.col_offset == root_col_offset):
472 yield msg('multiple "from %s import" statements',
473 yield msg('multiple "from %s import" statements',
473 '.' * node.level)
474 '.' * node.level)
474
475
475 # Higher-level groups come before lower-level groups.
476 # Higher-level groups come before lower-level groups.
476 if any(node.level > l for l in seenlevels):
477 if any(node.level > l for l in seenlevels):
477 yield msg('higher-level import should come first: %s',
478 yield msg('higher-level import should come first: %s',
478 fullname)
479 fullname)
479
480
480 seenlevels.add(node.level)
481 seenlevels.add(node.level)
481
482
482 # Entries in "from .X import ( ... )" lists must be lexically
483 # Entries in "from .X import ( ... )" lists must be lexically
483 # sorted.
484 # sorted.
484 lastentryname = None
485 lastentryname = None
485
486
486 for n in node.names:
487 for n in node.names:
487 if lastentryname and n.name < lastentryname:
488 if lastentryname and n.name < lastentryname:
488 yield msg('imports from %s not lexically sorted: %s < %s',
489 yield msg('imports from %s not lexically sorted: %s < %s',
489 fullname, n.name, lastentryname)
490 fullname, n.name, lastentryname)
490
491
491 lastentryname = n.name
492 lastentryname = n.name
492
493
493 if n.name in requirealias and n.asname != requirealias[n.name]:
494 if n.name in requirealias and n.asname != requirealias[n.name]:
494 yield msg('%s from %s must be "as" aliased to %s',
495 yield msg('%s from %s must be "as" aliased to %s',
495 n.name, fullname, requirealias[n.name])
496 n.name, fullname, requirealias[n.name])
496
497
497 def verify_stdlib_on_own_line(root):
498 def verify_stdlib_on_own_line(root):
498 """Given some python source, verify that stdlib imports are done
499 """Given some python source, verify that stdlib imports are done
499 in separate statements from relative local module imports.
500 in separate statements from relative local module imports.
500
501
501 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, foo')))
502 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, foo')))
502 [('mixed imports\\n stdlib: sys\\n relative: foo', 1)]
503 [('mixed imports\\n stdlib: sys\\n relative: foo', 1)]
503 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, os')))
504 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, os')))
504 []
505 []
505 >>> list(verify_stdlib_on_own_line(ast.parse('import foo, bar')))
506 >>> list(verify_stdlib_on_own_line(ast.parse('import foo, bar')))
506 []
507 []
507 """
508 """
508 for node in ast.walk(root):
509 for node in ast.walk(root):
509 if isinstance(node, ast.Import):
510 if isinstance(node, ast.Import):
510 from_stdlib = {False: [], True: []}
511 from_stdlib = {False: [], True: []}
511 for n in node.names:
512 for n in node.names:
512 from_stdlib[n.name in stdlib_modules].append(n.name)
513 from_stdlib[n.name in stdlib_modules].append(n.name)
513 if from_stdlib[True] and from_stdlib[False]:
514 if from_stdlib[True] and from_stdlib[False]:
514 yield ('mixed imports\n stdlib: %s\n relative: %s' %
515 yield ('mixed imports\n stdlib: %s\n relative: %s' %
515 (', '.join(sorted(from_stdlib[True])),
516 (', '.join(sorted(from_stdlib[True])),
516 ', '.join(sorted(from_stdlib[False]))), node.lineno)
517 ', '.join(sorted(from_stdlib[False]))), node.lineno)
517
518
518 class CircularImport(Exception):
519 class CircularImport(Exception):
519 pass
520 pass
520
521
521 def checkmod(mod, imports):
522 def checkmod(mod, imports):
522 shortest = {}
523 shortest = {}
523 visit = [[mod]]
524 visit = [[mod]]
524 while visit:
525 while visit:
525 path = visit.pop(0)
526 path = visit.pop(0)
526 for i in sorted(imports.get(path[-1], [])):
527 for i in sorted(imports.get(path[-1], [])):
527 if len(path) < shortest.get(i, 1000):
528 if len(path) < shortest.get(i, 1000):
528 shortest[i] = len(path)
529 shortest[i] = len(path)
529 if i in path:
530 if i in path:
530 if i == path[0]:
531 if i == path[0]:
531 raise CircularImport(path)
532 raise CircularImport(path)
532 continue
533 continue
533 visit.append(path + [i])
534 visit.append(path + [i])
534
535
535 def rotatecycle(cycle):
536 def rotatecycle(cycle):
536 """arrange a cycle so that the lexicographically first module listed first
537 """arrange a cycle so that the lexicographically first module listed first
537
538
538 >>> rotatecycle(['foo', 'bar'])
539 >>> rotatecycle(['foo', 'bar'])
539 ['bar', 'foo', 'bar']
540 ['bar', 'foo', 'bar']
540 """
541 """
541 lowest = min(cycle)
542 lowest = min(cycle)
542 idx = cycle.index(lowest)
543 idx = cycle.index(lowest)
543 return cycle[idx:] + cycle[:idx] + [lowest]
544 return cycle[idx:] + cycle[:idx] + [lowest]
544
545
545 def find_cycles(imports):
546 def find_cycles(imports):
546 """Find cycles in an already-loaded import graph.
547 """Find cycles in an already-loaded import graph.
547
548
548 All module names recorded in `imports` should be absolute one.
549 All module names recorded in `imports` should be absolute one.
549
550
550 >>> from __future__ import print_function
551 >>> from __future__ import print_function
551 >>> imports = {'top.foo': ['top.bar', 'os.path', 'top.qux'],
552 >>> imports = {'top.foo': ['top.bar', 'os.path', 'top.qux'],
552 ... 'top.bar': ['top.baz', 'sys'],
553 ... 'top.bar': ['top.baz', 'sys'],
553 ... 'top.baz': ['top.foo'],
554 ... 'top.baz': ['top.foo'],
554 ... 'top.qux': ['top.foo']}
555 ... 'top.qux': ['top.foo']}
555 >>> print('\\n'.join(sorted(find_cycles(imports))))
556 >>> print('\\n'.join(sorted(find_cycles(imports))))
556 top.bar -> top.baz -> top.foo -> top.bar
557 top.bar -> top.baz -> top.foo -> top.bar
557 top.foo -> top.qux -> top.foo
558 top.foo -> top.qux -> top.foo
558 """
559 """
559 cycles = set()
560 cycles = set()
560 for mod in sorted(imports.keys()):
561 for mod in sorted(imports.keys()):
561 try:
562 try:
562 checkmod(mod, imports)
563 checkmod(mod, imports)
563 except CircularImport as e:
564 except CircularImport as e:
564 cycle = e.args[0]
565 cycle = e.args[0]
565 cycles.add(" -> ".join(rotatecycle(cycle)))
566 cycles.add(" -> ".join(rotatecycle(cycle)))
566 return cycles
567 return cycles
567
568
568 def _cycle_sortkey(c):
569 def _cycle_sortkey(c):
569 return len(c), c
570 return len(c), c
570
571
572 def embedded(f, modname, src):
573 """Extract embedded python code
574
575 >>> def test(fn, lines):
576 ... for s, m, f, l in embedded(fn, "example", lines):
577 ... print("%s %s %s" % (m, f, l))
578 ... print(repr(s))
579 >>> lines = [
580 ... 'comment',
581 ... ' >>> from __future__ import print_function',
582 ... " >>> ' multiline",
583 ... " ... string'",
584 ... ' ',
585 ... 'comment',
586 ... ' $ cat > foo.py <<EOF',
587 ... ' > from __future__ import print_function',
588 ... ' > EOF',
589 ... ]
590 >>> test("example.t", lines)
591 example[2] doctest.py 2
592 "from __future__ import print_function\\n' multiline\\nstring'\\n"
593 example[7] foo.py 7
594 'from __future__ import print_function\\n'
595 """
596 inlinepython = 0
597 shpython = 0
598 script = []
599 prefix = 6
600 t = ''
601 n = 0
602 for l in src:
603 n += 1
604 if not l.endswith(b'\n'):
605 l += b'\n'
606 if l.startswith(b' >>> '): # python inlines
607 if shpython:
608 print("%s:%d: Parse Error" % (f, n))
609 if not inlinepython:
610 # We've just entered a Python block.
611 inlinepython = n
612 t = 'doctest.py'
613 script.append(l[prefix:])
614 continue
615 if l.startswith(b' ... '): # python inlines
616 script.append(l[prefix:])
617 continue
618 cat = re.search(r"\$ \s*cat\s*>\s*(\S+\.py)\s*<<\s*EOF", l)
619 if cat:
620 if inlinepython:
621 yield ''.join(script), ("%s[%d]" %
622 (modname, inlinepython)), t, inlinepython
623 script = []
624 inlinepython = 0
625 shpython = n
626 t = cat.group(1)
627 continue
628 if shpython and l.startswith(b' > '): # sh continuation
629 if l == b' > EOF\n':
630 yield ''.join(script), ("%s[%d]" %
631 (modname, shpython)), t, shpython
632 script = []
633 shpython = 0
634 else:
635 script.append(l[4:])
636 continue
637 if inlinepython and l == b' \n':
638 yield ''.join(script), ("%s[%d]" %
639 (modname, inlinepython)), t, inlinepython
640 script = []
641 inlinepython = 0
642 continue
643
571 def sources(f, modname):
644 def sources(f, modname):
645 """Yields possibly multiple sources from a filepath
646
647 input: filepath, modulename
648 yields: script(string), modulename, filepath, linenumber
649
650 For embedded scripts, the modulename and filepath will be different
651 from the function arguments. linenumber is an offset relative to
652 the input file.
653 """
654 py = False
572 if f.endswith('.py'):
655 if f.endswith('.py'):
573 with open(f) as src:
656 with open(f) as src:
574 yield src.read(), modname
657 yield src.read(), modname, f, 0
658 py = True
659 if py or f.endswith('.t'):
660 with open(f) as src:
661 for script, modname, t, line in embedded(f, modname, src):
662 yield script, modname, t, line
575
663
576 def main(argv):
664 def main(argv):
577 if len(argv) < 2 or (argv[1] == '-' and len(argv) > 2):
665 if len(argv) < 2 or (argv[1] == '-' and len(argv) > 2):
578 print('Usage: %s {-|file [file] [file] ...}')
666 print('Usage: %s {-|file [file] [file] ...}')
579 return 1
667 return 1
580 if argv[1] == '-':
668 if argv[1] == '-':
581 argv = argv[:1]
669 argv = argv[:1]
582 argv.extend(l.rstrip() for l in sys.stdin.readlines())
670 argv.extend(l.rstrip() for l in sys.stdin.readlines())
583 localmods = {}
671 localmods = {}
584 used_imports = {}
672 used_imports = {}
585 any_errors = False
673 any_errors = False
586 for source_path in argv[1:]:
674 for source_path in argv[1:]:
587 modname = dotted_name_of_path(source_path, trimpure=True)
675 modname = dotted_name_of_path(source_path, trimpure=True)
588 localmods[modname] = source_path
676 localmods[modname] = source_path
589 for localmodname, source_path in sorted(localmods.items()):
677 for localmodname, source_path in sorted(localmods.items()):
590 for src, modname in sources(source_path, localmodname):
678 for src, modname, name, line in sources(source_path, localmodname):
591 try:
679 try:
592 used_imports[modname] = sorted(
680 used_imports[modname] = sorted(
593 imported_modules(src, modname, source_path, localmods,
681 imported_modules(src, modname, name, localmods,
594 ignore_nested=True))
682 ignore_nested=True))
595 for error, lineno in verify_import_convention(modname, src,
683 for error, lineno in verify_import_convention(modname, src,
596 localmods):
684 localmods):
597 any_errors = True
685 any_errors = True
598 print('%s:%d: %s' % (source_path, lineno, error))
686 print('%s:%d: %s' % (source_path, lineno + line, error))
599 except SyntaxError as e:
687 except SyntaxError as e:
600 print('%s:%d: SyntaxError: %s' %
688 print('%s:%d: SyntaxError: %s' %
601 (source_path, e.lineno, e))
689 (source_path, e.lineno + line, e))
602 cycles = find_cycles(used_imports)
690 cycles = find_cycles(used_imports)
603 if cycles:
691 if cycles:
604 firstmods = set()
692 firstmods = set()
605 for c in sorted(cycles, key=_cycle_sortkey):
693 for c in sorted(cycles, key=_cycle_sortkey):
606 first = c.split()[0]
694 first = c.split()[0]
607 # As a rough cut, ignore any cycle that starts with the
695 # As a rough cut, ignore any cycle that starts with the
608 # same module as some other cycle. Otherwise we see lots
696 # same module as some other cycle. Otherwise we see lots
609 # of cycles that are effectively duplicates.
697 # of cycles that are effectively duplicates.
610 if first in firstmods:
698 if first in firstmods:
611 continue
699 continue
612 print('Import cycle:', c)
700 print('Import cycle:', c)
613 firstmods.add(first)
701 firstmods.add(first)
614 any_errors = True
702 any_errors = True
615 return any_errors != 0
703 return any_errors != 0
616
704
617 if __name__ == '__main__':
705 if __name__ == '__main__':
618 sys.exit(int(main(sys.argv)))
706 sys.exit(int(main(sys.argv)))
General Comments 0
You need to be logged in to leave comments. Login now