##// END OF EJS Templates
policy: eliminate ".pure." from module name only if marked as dual...
Yuya Nishihara -
r32207:65cd7e70 default
parent child Browse files
Show More
@@ -1,95 +1,107 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 #
2 #
3 # check-py3-compat - check Python 3 compatibility of Mercurial files
3 # check-py3-compat - check Python 3 compatibility of Mercurial files
4 #
4 #
5 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
5 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 from __future__ import absolute_import, print_function
10 from __future__ import absolute_import, print_function
11
11
12 import ast
12 import ast
13 import os
13 import os
14 import sys
14 import sys
15 import traceback
15 import traceback
16
16
17 # Modules that have both Python and C implementations.
18 _dualmodules = (
19 'base85.py',
20 'bdiff.py',
21 'diffhelpers.py',
22 'mpatch.py',
23 'osutil.py',
24 'parsers.py',
25 )
26
17 def check_compat_py2(f):
27 def check_compat_py2(f):
18 """Check Python 3 compatibility for a file with Python 2"""
28 """Check Python 3 compatibility for a file with Python 2"""
19 with open(f, 'rb') as fh:
29 with open(f, 'rb') as fh:
20 content = fh.read()
30 content = fh.read()
21 root = ast.parse(content)
31 root = ast.parse(content)
22
32
23 # Ignore empty files.
33 # Ignore empty files.
24 if not root.body:
34 if not root.body:
25 return
35 return
26
36
27 futures = set()
37 futures = set()
28 haveprint = False
38 haveprint = False
29 for node in ast.walk(root):
39 for node in ast.walk(root):
30 if isinstance(node, ast.ImportFrom):
40 if isinstance(node, ast.ImportFrom):
31 if node.module == '__future__':
41 if node.module == '__future__':
32 futures |= set(n.name for n in node.names)
42 futures |= set(n.name for n in node.names)
33 elif isinstance(node, ast.Print):
43 elif isinstance(node, ast.Print):
34 haveprint = True
44 haveprint = True
35
45
36 if 'absolute_import' not in futures:
46 if 'absolute_import' not in futures:
37 print('%s not using absolute_import' % f)
47 print('%s not using absolute_import' % f)
38 if haveprint and 'print_function' not in futures:
48 if haveprint and 'print_function' not in futures:
39 print('%s requires print_function' % f)
49 print('%s requires print_function' % f)
40
50
41 def check_compat_py3(f):
51 def check_compat_py3(f):
42 """Check Python 3 compatibility of a file with Python 3."""
52 """Check Python 3 compatibility of a file with Python 3."""
43 import importlib # not available on Python 2.6
53 import importlib # not available on Python 2.6
44 with open(f, 'rb') as fh:
54 with open(f, 'rb') as fh:
45 content = fh.read()
55 content = fh.read()
46
56
47 try:
57 try:
48 ast.parse(content)
58 ast.parse(content)
49 except SyntaxError as e:
59 except SyntaxError as e:
50 print('%s: invalid syntax: %s' % (f, e))
60 print('%s: invalid syntax: %s' % (f, e))
51 return
61 return
52
62
53 # Try to import the module.
63 # Try to import the module.
54 # For now we only support mercurial.* and hgext.* modules because figuring
64 # For now we only support mercurial.* and hgext.* modules because figuring
55 # out module paths for things not in a package can be confusing.
65 # out module paths for things not in a package can be confusing.
56 if f.startswith(('hgext/', 'mercurial/')) and not f.endswith('__init__.py'):
66 if f.startswith(('hgext/', 'mercurial/')) and not f.endswith('__init__.py'):
57 assert f.endswith('.py')
67 assert f.endswith('.py')
58 name = f.replace('/', '.')[:-3].replace('.pure.', '.')
68 name = f.replace('/', '.')[:-3]
69 if f.endswith(_dualmodules):
70 name = name.replace('.pure.', '.')
59 try:
71 try:
60 importlib.import_module(name)
72 importlib.import_module(name)
61 except Exception as e:
73 except Exception as e:
62 exc_type, exc_value, tb = sys.exc_info()
74 exc_type, exc_value, tb = sys.exc_info()
63 # We walk the stack and ignore frames from our custom importer,
75 # We walk the stack and ignore frames from our custom importer,
64 # import mechanisms, and stdlib modules. This kinda/sorta
76 # import mechanisms, and stdlib modules. This kinda/sorta
65 # emulates CPython behavior in import.c while also attempting
77 # emulates CPython behavior in import.c while also attempting
66 # to pin blame on a Mercurial file.
78 # to pin blame on a Mercurial file.
67 for frame in reversed(traceback.extract_tb(tb)):
79 for frame in reversed(traceback.extract_tb(tb)):
68 if frame.name == '_call_with_frames_removed':
80 if frame.name == '_call_with_frames_removed':
69 continue
81 continue
70 if 'importlib' in frame.filename:
82 if 'importlib' in frame.filename:
71 continue
83 continue
72 if 'mercurial/__init__.py' in frame.filename:
84 if 'mercurial/__init__.py' in frame.filename:
73 continue
85 continue
74 if frame.filename.startswith(sys.prefix):
86 if frame.filename.startswith(sys.prefix):
75 continue
87 continue
76 break
88 break
77
89
78 if frame.filename:
90 if frame.filename:
79 filename = os.path.basename(frame.filename)
91 filename = os.path.basename(frame.filename)
80 print('%s: error importing: <%s> %s (error at %s:%d)' % (
92 print('%s: error importing: <%s> %s (error at %s:%d)' % (
81 f, type(e).__name__, e, filename, frame.lineno))
93 f, type(e).__name__, e, filename, frame.lineno))
82 else:
94 else:
83 print('%s: error importing module: <%s> %s (line %d)' % (
95 print('%s: error importing module: <%s> %s (line %d)' % (
84 f, type(e).__name__, e, frame.lineno))
96 f, type(e).__name__, e, frame.lineno))
85
97
86 if __name__ == '__main__':
98 if __name__ == '__main__':
87 if sys.version_info[0] == 2:
99 if sys.version_info[0] == 2:
88 fn = check_compat_py2
100 fn = check_compat_py2
89 else:
101 else:
90 fn = check_compat_py3
102 fn = check_compat_py3
91
103
92 for f in sys.argv[1:]:
104 for f in sys.argv[1:]:
93 fn(f)
105 fn(f)
94
106
95 sys.exit(0)
107 sys.exit(0)
@@ -1,725 +1,736 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2
2
3 from __future__ import absolute_import, print_function
3 from __future__ import absolute_import, print_function
4
4
5 import ast
5 import ast
6 import collections
6 import collections
7 import os
7 import os
8 import re
8 import re
9 import sys
9 import sys
10
10
11 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
11 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
12 # to work when run from a virtualenv. The modules were chosen empirically
12 # to work when run from a virtualenv. The modules were chosen empirically
13 # so that the return value matches the return value without virtualenv.
13 # so that the return value matches the return value without virtualenv.
14 if True: # disable lexical sorting checks
14 if True: # disable lexical sorting checks
15 import BaseHTTPServer
15 import BaseHTTPServer
16 import zlib
16 import zlib
17
17
18 # Whitelist of modules that symbols can be directly imported from.
18 # Whitelist of modules that symbols can be directly imported from.
19 allowsymbolimports = (
19 allowsymbolimports = (
20 '__future__',
20 '__future__',
21 'mercurial.hgweb.common',
21 'mercurial.hgweb.common',
22 'mercurial.hgweb.request',
22 'mercurial.hgweb.request',
23 'mercurial.i18n',
23 'mercurial.i18n',
24 'mercurial.node',
24 'mercurial.node',
25 )
25 )
26
26
27 # Modules that have both Python and C implementations.
28 _dualmodules = (
29 'base85.py',
30 'bdiff.py',
31 'diffhelpers.py',
32 'mpatch.py',
33 'osutil.py',
34 'parsers.py',
35 )
36
27 # Modules that must be aliased because they are commonly confused with
37 # Modules that must be aliased because they are commonly confused with
28 # common variables and can create aliasing and readability issues.
38 # common variables and can create aliasing and readability issues.
29 requirealias = {
39 requirealias = {
30 'ui': 'uimod',
40 'ui': 'uimod',
31 }
41 }
32
42
33 def usingabsolute(root):
43 def usingabsolute(root):
34 """Whether absolute imports are being used."""
44 """Whether absolute imports are being used."""
35 if sys.version_info[0] >= 3:
45 if sys.version_info[0] >= 3:
36 return True
46 return True
37
47
38 for node in ast.walk(root):
48 for node in ast.walk(root):
39 if isinstance(node, ast.ImportFrom):
49 if isinstance(node, ast.ImportFrom):
40 if node.module == '__future__':
50 if node.module == '__future__':
41 for n in node.names:
51 for n in node.names:
42 if n.name == 'absolute_import':
52 if n.name == 'absolute_import':
43 return True
53 return True
44
54
45 return False
55 return False
46
56
47 def walklocal(root):
57 def walklocal(root):
48 """Recursively yield all descendant nodes but not in a different scope"""
58 """Recursively yield all descendant nodes but not in a different scope"""
49 todo = collections.deque(ast.iter_child_nodes(root))
59 todo = collections.deque(ast.iter_child_nodes(root))
50 yield root, False
60 yield root, False
51 while todo:
61 while todo:
52 node = todo.popleft()
62 node = todo.popleft()
53 newscope = isinstance(node, ast.FunctionDef)
63 newscope = isinstance(node, ast.FunctionDef)
54 if not newscope:
64 if not newscope:
55 todo.extend(ast.iter_child_nodes(node))
65 todo.extend(ast.iter_child_nodes(node))
56 yield node, newscope
66 yield node, newscope
57
67
58 def dotted_name_of_path(path, trimpure=False):
68 def dotted_name_of_path(path, trimpure=False):
59 """Given a relative path to a source file, return its dotted module name.
69 """Given a relative path to a source file, return its dotted module name.
60
70
61 >>> dotted_name_of_path('mercurial/error.py')
71 >>> dotted_name_of_path('mercurial/error.py')
62 'mercurial.error'
72 'mercurial.error'
63 >>> dotted_name_of_path('mercurial/pure/parsers.py', trimpure=True)
73 >>> dotted_name_of_path('mercurial/pure/parsers.py', trimpure=True)
64 'mercurial.parsers'
74 'mercurial.parsers'
65 >>> dotted_name_of_path('zlibmodule.so')
75 >>> dotted_name_of_path('zlibmodule.so')
66 'zlib'
76 'zlib'
67 """
77 """
68 parts = path.replace(os.sep, '/').split('/')
78 parts = path.replace(os.sep, '/').split('/')
69 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
79 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
70 if parts[-1].endswith('module'):
80 if parts[-1].endswith('module'):
71 parts[-1] = parts[-1][:-6]
81 parts[-1] = parts[-1][:-6]
72 if trimpure:
82 if trimpure:
73 return '.'.join(p for p in parts if p != 'pure')
83 return '.'.join(p for p in parts if p != 'pure')
74 return '.'.join(parts)
84 return '.'.join(parts)
75
85
76 def fromlocalfunc(modulename, localmods):
86 def fromlocalfunc(modulename, localmods):
77 """Get a function to examine which locally defined module the
87 """Get a function to examine which locally defined module the
78 target source imports via a specified name.
88 target source imports via a specified name.
79
89
80 `modulename` is an `dotted_name_of_path()`-ed source file path,
90 `modulename` is an `dotted_name_of_path()`-ed source file path,
81 which may have `.__init__` at the end of it, of the target source.
91 which may have `.__init__` at the end of it, of the target source.
82
92
83 `localmods` is a dict (or set), of which key is an absolute
93 `localmods` is a dict (or set), of which key is an absolute
84 `dotted_name_of_path()`-ed source file path of locally defined (=
94 `dotted_name_of_path()`-ed source file path of locally defined (=
85 Mercurial specific) modules.
95 Mercurial specific) modules.
86
96
87 This function assumes that module names not existing in
97 This function assumes that module names not existing in
88 `localmods` are from the Python standard library.
98 `localmods` are from the Python standard library.
89
99
90 This function returns the function, which takes `name` argument,
100 This function returns the function, which takes `name` argument,
91 and returns `(absname, dottedpath, hassubmod)` tuple if `name`
101 and returns `(absname, dottedpath, hassubmod)` tuple if `name`
92 matches against locally defined module. Otherwise, it returns
102 matches against locally defined module. Otherwise, it returns
93 False.
103 False.
94
104
95 It is assumed that `name` doesn't have `.__init__`.
105 It is assumed that `name` doesn't have `.__init__`.
96
106
97 `absname` is an absolute module name of specified `name`
107 `absname` is an absolute module name of specified `name`
98 (e.g. "hgext.convert"). This can be used to compose prefix for sub
108 (e.g. "hgext.convert"). This can be used to compose prefix for sub
99 modules or so.
109 modules or so.
100
110
101 `dottedpath` is a `dotted_name_of_path()`-ed source file path
111 `dottedpath` is a `dotted_name_of_path()`-ed source file path
102 (e.g. "hgext.convert.__init__") of `name`. This is used to look
112 (e.g. "hgext.convert.__init__") of `name`. This is used to look
103 module up in `localmods` again.
113 module up in `localmods` again.
104
114
105 `hassubmod` is whether it may have sub modules under it (for
115 `hassubmod` is whether it may have sub modules under it (for
106 convenient, even though this is also equivalent to "absname !=
116 convenient, even though this is also equivalent to "absname !=
107 dottednpath")
117 dottednpath")
108
118
109 >>> localmods = {'foo.__init__': True, 'foo.foo1': True,
119 >>> localmods = {'foo.__init__': True, 'foo.foo1': True,
110 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
120 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
111 ... 'baz.__init__': True, 'baz.baz1': True }
121 ... 'baz.__init__': True, 'baz.baz1': True }
112 >>> fromlocal = fromlocalfunc('foo.xxx', localmods)
122 >>> fromlocal = fromlocalfunc('foo.xxx', localmods)
113 >>> # relative
123 >>> # relative
114 >>> fromlocal('foo1')
124 >>> fromlocal('foo1')
115 ('foo.foo1', 'foo.foo1', False)
125 ('foo.foo1', 'foo.foo1', False)
116 >>> fromlocal('bar')
126 >>> fromlocal('bar')
117 ('foo.bar', 'foo.bar.__init__', True)
127 ('foo.bar', 'foo.bar.__init__', True)
118 >>> fromlocal('bar.bar1')
128 >>> fromlocal('bar.bar1')
119 ('foo.bar.bar1', 'foo.bar.bar1', False)
129 ('foo.bar.bar1', 'foo.bar.bar1', False)
120 >>> # absolute
130 >>> # absolute
121 >>> fromlocal('baz')
131 >>> fromlocal('baz')
122 ('baz', 'baz.__init__', True)
132 ('baz', 'baz.__init__', True)
123 >>> fromlocal('baz.baz1')
133 >>> fromlocal('baz.baz1')
124 ('baz.baz1', 'baz.baz1', False)
134 ('baz.baz1', 'baz.baz1', False)
125 >>> # unknown = maybe standard library
135 >>> # unknown = maybe standard library
126 >>> fromlocal('os')
136 >>> fromlocal('os')
127 False
137 False
128 >>> fromlocal(None, 1)
138 >>> fromlocal(None, 1)
129 ('foo', 'foo.__init__', True)
139 ('foo', 'foo.__init__', True)
130 >>> fromlocal('foo1', 1)
140 >>> fromlocal('foo1', 1)
131 ('foo.foo1', 'foo.foo1', False)
141 ('foo.foo1', 'foo.foo1', False)
132 >>> fromlocal2 = fromlocalfunc('foo.xxx.yyy', localmods)
142 >>> fromlocal2 = fromlocalfunc('foo.xxx.yyy', localmods)
133 >>> fromlocal2(None, 2)
143 >>> fromlocal2(None, 2)
134 ('foo', 'foo.__init__', True)
144 ('foo', 'foo.__init__', True)
135 >>> fromlocal2('bar2', 1)
145 >>> fromlocal2('bar2', 1)
136 False
146 False
137 >>> fromlocal2('bar', 2)
147 >>> fromlocal2('bar', 2)
138 ('foo.bar', 'foo.bar.__init__', True)
148 ('foo.bar', 'foo.bar.__init__', True)
139 """
149 """
140 prefix = '.'.join(modulename.split('.')[:-1])
150 prefix = '.'.join(modulename.split('.')[:-1])
141 if prefix:
151 if prefix:
142 prefix += '.'
152 prefix += '.'
143 def fromlocal(name, level=0):
153 def fromlocal(name, level=0):
144 # name is false value when relative imports are used.
154 # name is false value when relative imports are used.
145 if not name:
155 if not name:
146 # If relative imports are used, level must not be absolute.
156 # If relative imports are used, level must not be absolute.
147 assert level > 0
157 assert level > 0
148 candidates = ['.'.join(modulename.split('.')[:-level])]
158 candidates = ['.'.join(modulename.split('.')[:-level])]
149 else:
159 else:
150 if not level:
160 if not level:
151 # Check relative name first.
161 # Check relative name first.
152 candidates = [prefix + name, name]
162 candidates = [prefix + name, name]
153 else:
163 else:
154 candidates = ['.'.join(modulename.split('.')[:-level]) +
164 candidates = ['.'.join(modulename.split('.')[:-level]) +
155 '.' + name]
165 '.' + name]
156
166
157 for n in candidates:
167 for n in candidates:
158 if n in localmods:
168 if n in localmods:
159 return (n, n, False)
169 return (n, n, False)
160 dottedpath = n + '.__init__'
170 dottedpath = n + '.__init__'
161 if dottedpath in localmods:
171 if dottedpath in localmods:
162 return (n, dottedpath, True)
172 return (n, dottedpath, True)
163 return False
173 return False
164 return fromlocal
174 return fromlocal
165
175
166 def list_stdlib_modules():
176 def list_stdlib_modules():
167 """List the modules present in the stdlib.
177 """List the modules present in the stdlib.
168
178
169 >>> mods = set(list_stdlib_modules())
179 >>> mods = set(list_stdlib_modules())
170 >>> 'BaseHTTPServer' in mods
180 >>> 'BaseHTTPServer' in mods
171 True
181 True
172
182
173 os.path isn't really a module, so it's missing:
183 os.path isn't really a module, so it's missing:
174
184
175 >>> 'os.path' in mods
185 >>> 'os.path' in mods
176 False
186 False
177
187
178 sys requires special treatment, because it's baked into the
188 sys requires special treatment, because it's baked into the
179 interpreter, but it should still appear:
189 interpreter, but it should still appear:
180
190
181 >>> 'sys' in mods
191 >>> 'sys' in mods
182 True
192 True
183
193
184 >>> 'collections' in mods
194 >>> 'collections' in mods
185 True
195 True
186
196
187 >>> 'cStringIO' in mods
197 >>> 'cStringIO' in mods
188 True
198 True
189
199
190 >>> 'cffi' in mods
200 >>> 'cffi' in mods
191 True
201 True
192 """
202 """
193 for m in sys.builtin_module_names:
203 for m in sys.builtin_module_names:
194 yield m
204 yield m
195 # These modules only exist on windows, but we should always
205 # These modules only exist on windows, but we should always
196 # consider them stdlib.
206 # consider them stdlib.
197 for m in ['msvcrt', '_winreg']:
207 for m in ['msvcrt', '_winreg']:
198 yield m
208 yield m
199 yield 'builtins' # python3 only
209 yield 'builtins' # python3 only
200 for m in 'fcntl', 'grp', 'pwd', 'termios': # Unix only
210 for m in 'fcntl', 'grp', 'pwd', 'termios': # Unix only
201 yield m
211 yield m
202 for m in 'cPickle', 'datetime': # in Python (not C) on PyPy
212 for m in 'cPickle', 'datetime': # in Python (not C) on PyPy
203 yield m
213 yield m
204 for m in ['cffi']:
214 for m in ['cffi']:
205 yield m
215 yield m
206 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
216 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
207 # We need to supplement the list of prefixes for the search to work
217 # We need to supplement the list of prefixes for the search to work
208 # when run from within a virtualenv.
218 # when run from within a virtualenv.
209 for mod in (BaseHTTPServer, zlib):
219 for mod in (BaseHTTPServer, zlib):
210 try:
220 try:
211 # Not all module objects have a __file__ attribute.
221 # Not all module objects have a __file__ attribute.
212 filename = mod.__file__
222 filename = mod.__file__
213 except AttributeError:
223 except AttributeError:
214 continue
224 continue
215 dirname = os.path.dirname(filename)
225 dirname = os.path.dirname(filename)
216 for prefix in stdlib_prefixes:
226 for prefix in stdlib_prefixes:
217 if dirname.startswith(prefix):
227 if dirname.startswith(prefix):
218 # Then this directory is redundant.
228 # Then this directory is redundant.
219 break
229 break
220 else:
230 else:
221 stdlib_prefixes.add(dirname)
231 stdlib_prefixes.add(dirname)
222 for libpath in sys.path:
232 for libpath in sys.path:
223 # We want to walk everything in sys.path that starts with
233 # We want to walk everything in sys.path that starts with
224 # something in stdlib_prefixes.
234 # something in stdlib_prefixes.
225 if not any(libpath.startswith(p) for p in stdlib_prefixes):
235 if not any(libpath.startswith(p) for p in stdlib_prefixes):
226 continue
236 continue
227 for top, dirs, files in os.walk(libpath):
237 for top, dirs, files in os.walk(libpath):
228 for i, d in reversed(list(enumerate(dirs))):
238 for i, d in reversed(list(enumerate(dirs))):
229 if (not os.path.exists(os.path.join(top, d, '__init__.py'))
239 if (not os.path.exists(os.path.join(top, d, '__init__.py'))
230 or top == libpath and d in ('hgext', 'mercurial')):
240 or top == libpath and d in ('hgext', 'mercurial')):
231 del dirs[i]
241 del dirs[i]
232 for name in files:
242 for name in files:
233 if not name.endswith(('.py', '.so', '.pyc', '.pyo', '.pyd')):
243 if not name.endswith(('.py', '.so', '.pyc', '.pyo', '.pyd')):
234 continue
244 continue
235 if name.startswith('__init__.py'):
245 if name.startswith('__init__.py'):
236 full_path = top
246 full_path = top
237 else:
247 else:
238 full_path = os.path.join(top, name)
248 full_path = os.path.join(top, name)
239 rel_path = full_path[len(libpath) + 1:]
249 rel_path = full_path[len(libpath) + 1:]
240 mod = dotted_name_of_path(rel_path)
250 mod = dotted_name_of_path(rel_path)
241 yield mod
251 yield mod
242
252
243 stdlib_modules = set(list_stdlib_modules())
253 stdlib_modules = set(list_stdlib_modules())
244
254
245 def imported_modules(source, modulename, f, localmods, ignore_nested=False):
255 def imported_modules(source, modulename, f, localmods, ignore_nested=False):
246 """Given the source of a file as a string, yield the names
256 """Given the source of a file as a string, yield the names
247 imported by that file.
257 imported by that file.
248
258
249 Args:
259 Args:
250 source: The python source to examine as a string.
260 source: The python source to examine as a string.
251 modulename: of specified python source (may have `__init__`)
261 modulename: of specified python source (may have `__init__`)
252 localmods: dict of locally defined module names (may have `__init__`)
262 localmods: dict of locally defined module names (may have `__init__`)
253 ignore_nested: If true, import statements that do not start in
263 ignore_nested: If true, import statements that do not start in
254 column zero will be ignored.
264 column zero will be ignored.
255
265
256 Returns:
266 Returns:
257 A list of absolute module names imported by the given source.
267 A list of absolute module names imported by the given source.
258
268
259 >>> f = 'foo/xxx.py'
269 >>> f = 'foo/xxx.py'
260 >>> modulename = 'foo.xxx'
270 >>> modulename = 'foo.xxx'
261 >>> localmods = {'foo.__init__': True,
271 >>> localmods = {'foo.__init__': True,
262 ... 'foo.foo1': True, 'foo.foo2': True,
272 ... 'foo.foo1': True, 'foo.foo2': True,
263 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
273 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
264 ... 'baz.__init__': True, 'baz.baz1': True }
274 ... 'baz.__init__': True, 'baz.baz1': True }
265 >>> # standard library (= not locally defined ones)
275 >>> # standard library (= not locally defined ones)
266 >>> sorted(imported_modules(
276 >>> sorted(imported_modules(
267 ... 'from stdlib1 import foo, bar; import stdlib2',
277 ... 'from stdlib1 import foo, bar; import stdlib2',
268 ... modulename, f, localmods))
278 ... modulename, f, localmods))
269 []
279 []
270 >>> # relative importing
280 >>> # relative importing
271 >>> sorted(imported_modules(
281 >>> sorted(imported_modules(
272 ... 'import foo1; from bar import bar1',
282 ... 'import foo1; from bar import bar1',
273 ... modulename, f, localmods))
283 ... modulename, f, localmods))
274 ['foo.bar.bar1', 'foo.foo1']
284 ['foo.bar.bar1', 'foo.foo1']
275 >>> sorted(imported_modules(
285 >>> sorted(imported_modules(
276 ... 'from bar.bar1 import name1, name2, name3',
286 ... 'from bar.bar1 import name1, name2, name3',
277 ... modulename, f, localmods))
287 ... modulename, f, localmods))
278 ['foo.bar.bar1']
288 ['foo.bar.bar1']
279 >>> # absolute importing
289 >>> # absolute importing
280 >>> sorted(imported_modules(
290 >>> sorted(imported_modules(
281 ... 'from baz import baz1, name1',
291 ... 'from baz import baz1, name1',
282 ... modulename, f, localmods))
292 ... modulename, f, localmods))
283 ['baz.__init__', 'baz.baz1']
293 ['baz.__init__', 'baz.baz1']
284 >>> # mixed importing, even though it shouldn't be recommended
294 >>> # mixed importing, even though it shouldn't be recommended
285 >>> sorted(imported_modules(
295 >>> sorted(imported_modules(
286 ... 'import stdlib, foo1, baz',
296 ... 'import stdlib, foo1, baz',
287 ... modulename, f, localmods))
297 ... modulename, f, localmods))
288 ['baz.__init__', 'foo.foo1']
298 ['baz.__init__', 'foo.foo1']
289 >>> # ignore_nested
299 >>> # ignore_nested
290 >>> sorted(imported_modules(
300 >>> sorted(imported_modules(
291 ... '''import foo
301 ... '''import foo
292 ... def wat():
302 ... def wat():
293 ... import bar
303 ... import bar
294 ... ''', modulename, f, localmods))
304 ... ''', modulename, f, localmods))
295 ['foo.__init__', 'foo.bar.__init__']
305 ['foo.__init__', 'foo.bar.__init__']
296 >>> sorted(imported_modules(
306 >>> sorted(imported_modules(
297 ... '''import foo
307 ... '''import foo
298 ... def wat():
308 ... def wat():
299 ... import bar
309 ... import bar
300 ... ''', modulename, f, localmods, ignore_nested=True))
310 ... ''', modulename, f, localmods, ignore_nested=True))
301 ['foo.__init__']
311 ['foo.__init__']
302 """
312 """
303 fromlocal = fromlocalfunc(modulename, localmods)
313 fromlocal = fromlocalfunc(modulename, localmods)
304 for node in ast.walk(ast.parse(source, f)):
314 for node in ast.walk(ast.parse(source, f)):
305 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
315 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
306 continue
316 continue
307 if isinstance(node, ast.Import):
317 if isinstance(node, ast.Import):
308 for n in node.names:
318 for n in node.names:
309 found = fromlocal(n.name)
319 found = fromlocal(n.name)
310 if not found:
320 if not found:
311 # this should import standard library
321 # this should import standard library
312 continue
322 continue
313 yield found[1]
323 yield found[1]
314 elif isinstance(node, ast.ImportFrom):
324 elif isinstance(node, ast.ImportFrom):
315 found = fromlocal(node.module, node.level)
325 found = fromlocal(node.module, node.level)
316 if not found:
326 if not found:
317 # this should import standard library
327 # this should import standard library
318 continue
328 continue
319
329
320 absname, dottedpath, hassubmod = found
330 absname, dottedpath, hassubmod = found
321 if not hassubmod:
331 if not hassubmod:
322 # "dottedpath" is not a package; must be imported
332 # "dottedpath" is not a package; must be imported
323 yield dottedpath
333 yield dottedpath
324 # examination of "node.names" should be redundant
334 # examination of "node.names" should be redundant
325 # e.g.: from mercurial.node import nullid, nullrev
335 # e.g.: from mercurial.node import nullid, nullrev
326 continue
336 continue
327
337
328 modnotfound = False
338 modnotfound = False
329 prefix = absname + '.'
339 prefix = absname + '.'
330 for n in node.names:
340 for n in node.names:
331 found = fromlocal(prefix + n.name)
341 found = fromlocal(prefix + n.name)
332 if not found:
342 if not found:
333 # this should be a function or a property of "node.module"
343 # this should be a function or a property of "node.module"
334 modnotfound = True
344 modnotfound = True
335 continue
345 continue
336 yield found[1]
346 yield found[1]
337 if modnotfound:
347 if modnotfound:
338 # "dottedpath" is a package, but imported because of non-module
348 # "dottedpath" is a package, but imported because of non-module
339 # lookup
349 # lookup
340 yield dottedpath
350 yield dottedpath
341
351
342 def verify_import_convention(module, source, localmods):
352 def verify_import_convention(module, source, localmods):
343 """Verify imports match our established coding convention.
353 """Verify imports match our established coding convention.
344
354
345 We have 2 conventions: legacy and modern. The modern convention is in
355 We have 2 conventions: legacy and modern. The modern convention is in
346 effect when using absolute imports.
356 effect when using absolute imports.
347
357
348 The legacy convention only looks for mixed imports. The modern convention
358 The legacy convention only looks for mixed imports. The modern convention
349 is much more thorough.
359 is much more thorough.
350 """
360 """
351 root = ast.parse(source)
361 root = ast.parse(source)
352 absolute = usingabsolute(root)
362 absolute = usingabsolute(root)
353
363
354 if absolute:
364 if absolute:
355 return verify_modern_convention(module, root, localmods)
365 return verify_modern_convention(module, root, localmods)
356 else:
366 else:
357 return verify_stdlib_on_own_line(root)
367 return verify_stdlib_on_own_line(root)
358
368
359 def verify_modern_convention(module, root, localmods, root_col_offset=0):
369 def verify_modern_convention(module, root, localmods, root_col_offset=0):
360 """Verify a file conforms to the modern import convention rules.
370 """Verify a file conforms to the modern import convention rules.
361
371
362 The rules of the modern convention are:
372 The rules of the modern convention are:
363
373
364 * Ordering is stdlib followed by local imports. Each group is lexically
374 * Ordering is stdlib followed by local imports. Each group is lexically
365 sorted.
375 sorted.
366 * Importing multiple modules via "import X, Y" is not allowed: use
376 * Importing multiple modules via "import X, Y" is not allowed: use
367 separate import statements.
377 separate import statements.
368 * Importing multiple modules via "from X import ..." is allowed if using
378 * Importing multiple modules via "from X import ..." is allowed if using
369 parenthesis and one entry per line.
379 parenthesis and one entry per line.
370 * Only 1 relative import statement per import level ("from .", "from ..")
380 * Only 1 relative import statement per import level ("from .", "from ..")
371 is allowed.
381 is allowed.
372 * Relative imports from higher levels must occur before lower levels. e.g.
382 * Relative imports from higher levels must occur before lower levels. e.g.
373 "from .." must be before "from .".
383 "from .." must be before "from .".
374 * Imports from peer packages should use relative import (e.g. do not
384 * Imports from peer packages should use relative import (e.g. do not
375 "import mercurial.foo" from a "mercurial.*" module).
385 "import mercurial.foo" from a "mercurial.*" module).
376 * Symbols can only be imported from specific modules (see
386 * Symbols can only be imported from specific modules (see
377 `allowsymbolimports`). For other modules, first import the module then
387 `allowsymbolimports`). For other modules, first import the module then
378 assign the symbol to a module-level variable. In addition, these imports
388 assign the symbol to a module-level variable. In addition, these imports
379 must be performed before other local imports. This rule only
389 must be performed before other local imports. This rule only
380 applies to import statements outside of any blocks.
390 applies to import statements outside of any blocks.
381 * Relative imports from the standard library are not allowed.
391 * Relative imports from the standard library are not allowed.
382 * Certain modules must be aliased to alternate names to avoid aliasing
392 * Certain modules must be aliased to alternate names to avoid aliasing
383 and readability problems. See `requirealias`.
393 and readability problems. See `requirealias`.
384 """
394 """
385 topmodule = module.split('.')[0]
395 topmodule = module.split('.')[0]
386 fromlocal = fromlocalfunc(module, localmods)
396 fromlocal = fromlocalfunc(module, localmods)
387
397
388 # Whether a local/non-stdlib import has been performed.
398 # Whether a local/non-stdlib import has been performed.
389 seenlocal = None
399 seenlocal = None
390 # Whether a local/non-stdlib, non-symbol import has been seen.
400 # Whether a local/non-stdlib, non-symbol import has been seen.
391 seennonsymbollocal = False
401 seennonsymbollocal = False
392 # The last name to be imported (for sorting).
402 # The last name to be imported (for sorting).
393 lastname = None
403 lastname = None
394 laststdlib = None
404 laststdlib = None
395 # Relative import levels encountered so far.
405 # Relative import levels encountered so far.
396 seenlevels = set()
406 seenlevels = set()
397
407
398 for node, newscope in walklocal(root):
408 for node, newscope in walklocal(root):
399 def msg(fmt, *args):
409 def msg(fmt, *args):
400 return (fmt % args, node.lineno)
410 return (fmt % args, node.lineno)
401 if newscope:
411 if newscope:
402 # Check for local imports in function
412 # Check for local imports in function
403 for r in verify_modern_convention(module, node, localmods,
413 for r in verify_modern_convention(module, node, localmods,
404 node.col_offset + 4):
414 node.col_offset + 4):
405 yield r
415 yield r
406 elif isinstance(node, ast.Import):
416 elif isinstance(node, ast.Import):
407 # Disallow "import foo, bar" and require separate imports
417 # Disallow "import foo, bar" and require separate imports
408 # for each module.
418 # for each module.
409 if len(node.names) > 1:
419 if len(node.names) > 1:
410 yield msg('multiple imported names: %s',
420 yield msg('multiple imported names: %s',
411 ', '.join(n.name for n in node.names))
421 ', '.join(n.name for n in node.names))
412
422
413 name = node.names[0].name
423 name = node.names[0].name
414 asname = node.names[0].asname
424 asname = node.names[0].asname
415
425
416 stdlib = name in stdlib_modules
426 stdlib = name in stdlib_modules
417
427
418 # Ignore sorting rules on imports inside blocks.
428 # Ignore sorting rules on imports inside blocks.
419 if node.col_offset == root_col_offset:
429 if node.col_offset == root_col_offset:
420 if lastname and name < lastname and laststdlib == stdlib:
430 if lastname and name < lastname and laststdlib == stdlib:
421 yield msg('imports not lexically sorted: %s < %s',
431 yield msg('imports not lexically sorted: %s < %s',
422 name, lastname)
432 name, lastname)
423
433
424 lastname = name
434 lastname = name
425 laststdlib = stdlib
435 laststdlib = stdlib
426
436
427 # stdlib imports should be before local imports.
437 # stdlib imports should be before local imports.
428 if stdlib and seenlocal and node.col_offset == root_col_offset:
438 if stdlib and seenlocal and node.col_offset == root_col_offset:
429 yield msg('stdlib import "%s" follows local import: %s',
439 yield msg('stdlib import "%s" follows local import: %s',
430 name, seenlocal)
440 name, seenlocal)
431
441
432 if not stdlib:
442 if not stdlib:
433 seenlocal = name
443 seenlocal = name
434
444
435 # Import of sibling modules should use relative imports.
445 # Import of sibling modules should use relative imports.
436 topname = name.split('.')[0]
446 topname = name.split('.')[0]
437 if topname == topmodule:
447 if topname == topmodule:
438 yield msg('import should be relative: %s', name)
448 yield msg('import should be relative: %s', name)
439
449
440 if name in requirealias and asname != requirealias[name]:
450 if name in requirealias and asname != requirealias[name]:
441 yield msg('%s module must be "as" aliased to %s',
451 yield msg('%s module must be "as" aliased to %s',
442 name, requirealias[name])
452 name, requirealias[name])
443
453
444 elif isinstance(node, ast.ImportFrom):
454 elif isinstance(node, ast.ImportFrom):
445 # Resolve the full imported module name.
455 # Resolve the full imported module name.
446 if node.level > 0:
456 if node.level > 0:
447 fullname = '.'.join(module.split('.')[:-node.level])
457 fullname = '.'.join(module.split('.')[:-node.level])
448 if node.module:
458 if node.module:
449 fullname += '.%s' % node.module
459 fullname += '.%s' % node.module
450 else:
460 else:
451 assert node.module
461 assert node.module
452 fullname = node.module
462 fullname = node.module
453
463
454 topname = fullname.split('.')[0]
464 topname = fullname.split('.')[0]
455 if topname == topmodule:
465 if topname == topmodule:
456 yield msg('import should be relative: %s', fullname)
466 yield msg('import should be relative: %s', fullname)
457
467
458 # __future__ is special since it needs to come first and use
468 # __future__ is special since it needs to come first and use
459 # symbol import.
469 # symbol import.
460 if fullname != '__future__':
470 if fullname != '__future__':
461 if not fullname or fullname in stdlib_modules:
471 if not fullname or fullname in stdlib_modules:
462 yield msg('relative import of stdlib module')
472 yield msg('relative import of stdlib module')
463 else:
473 else:
464 seenlocal = fullname
474 seenlocal = fullname
465
475
466 # Direct symbol import is only allowed from certain modules and
476 # Direct symbol import is only allowed from certain modules and
467 # must occur before non-symbol imports.
477 # must occur before non-symbol imports.
468 found = fromlocal(node.module, node.level)
478 found = fromlocal(node.module, node.level)
469 if found and found[2]: # node.module is a package
479 if found and found[2]: # node.module is a package
470 prefix = found[0] + '.'
480 prefix = found[0] + '.'
471 symbols = [n.name for n in node.names
481 symbols = [n.name for n in node.names
472 if not fromlocal(prefix + n.name)]
482 if not fromlocal(prefix + n.name)]
473 else:
483 else:
474 symbols = [n.name for n in node.names]
484 symbols = [n.name for n in node.names]
475 if node.module and node.col_offset == root_col_offset:
485 if node.module and node.col_offset == root_col_offset:
476 if symbols and fullname not in allowsymbolimports:
486 if symbols and fullname not in allowsymbolimports:
477 yield msg('direct symbol import %s from %s',
487 yield msg('direct symbol import %s from %s',
478 ', '.join(symbols), fullname)
488 ', '.join(symbols), fullname)
479
489
480 if symbols and seennonsymbollocal:
490 if symbols and seennonsymbollocal:
481 yield msg('symbol import follows non-symbol import: %s',
491 yield msg('symbol import follows non-symbol import: %s',
482 fullname)
492 fullname)
483 if not symbols and fullname not in stdlib_modules:
493 if not symbols and fullname not in stdlib_modules:
484 seennonsymbollocal = True
494 seennonsymbollocal = True
485
495
486 if not node.module:
496 if not node.module:
487 assert node.level
497 assert node.level
488
498
489 # Only allow 1 group per level.
499 # Only allow 1 group per level.
490 if (node.level in seenlevels
500 if (node.level in seenlevels
491 and node.col_offset == root_col_offset):
501 and node.col_offset == root_col_offset):
492 yield msg('multiple "from %s import" statements',
502 yield msg('multiple "from %s import" statements',
493 '.' * node.level)
503 '.' * node.level)
494
504
495 # Higher-level groups come before lower-level groups.
505 # Higher-level groups come before lower-level groups.
496 if any(node.level > l for l in seenlevels):
506 if any(node.level > l for l in seenlevels):
497 yield msg('higher-level import should come first: %s',
507 yield msg('higher-level import should come first: %s',
498 fullname)
508 fullname)
499
509
500 seenlevels.add(node.level)
510 seenlevels.add(node.level)
501
511
502 # Entries in "from .X import ( ... )" lists must be lexically
512 # Entries in "from .X import ( ... )" lists must be lexically
503 # sorted.
513 # sorted.
504 lastentryname = None
514 lastentryname = None
505
515
506 for n in node.names:
516 for n in node.names:
507 if lastentryname and n.name < lastentryname:
517 if lastentryname and n.name < lastentryname:
508 yield msg('imports from %s not lexically sorted: %s < %s',
518 yield msg('imports from %s not lexically sorted: %s < %s',
509 fullname, n.name, lastentryname)
519 fullname, n.name, lastentryname)
510
520
511 lastentryname = n.name
521 lastentryname = n.name
512
522
513 if n.name in requirealias and n.asname != requirealias[n.name]:
523 if n.name in requirealias and n.asname != requirealias[n.name]:
514 yield msg('%s from %s must be "as" aliased to %s',
524 yield msg('%s from %s must be "as" aliased to %s',
515 n.name, fullname, requirealias[n.name])
525 n.name, fullname, requirealias[n.name])
516
526
517 def verify_stdlib_on_own_line(root):
527 def verify_stdlib_on_own_line(root):
518 """Given some python source, verify that stdlib imports are done
528 """Given some python source, verify that stdlib imports are done
519 in separate statements from relative local module imports.
529 in separate statements from relative local module imports.
520
530
521 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, foo')))
531 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, foo')))
522 [('mixed imports\\n stdlib: sys\\n relative: foo', 1)]
532 [('mixed imports\\n stdlib: sys\\n relative: foo', 1)]
523 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, os')))
533 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, os')))
524 []
534 []
525 >>> list(verify_stdlib_on_own_line(ast.parse('import foo, bar')))
535 >>> list(verify_stdlib_on_own_line(ast.parse('import foo, bar')))
526 []
536 []
527 """
537 """
528 for node in ast.walk(root):
538 for node in ast.walk(root):
529 if isinstance(node, ast.Import):
539 if isinstance(node, ast.Import):
530 from_stdlib = {False: [], True: []}
540 from_stdlib = {False: [], True: []}
531 for n in node.names:
541 for n in node.names:
532 from_stdlib[n.name in stdlib_modules].append(n.name)
542 from_stdlib[n.name in stdlib_modules].append(n.name)
533 if from_stdlib[True] and from_stdlib[False]:
543 if from_stdlib[True] and from_stdlib[False]:
534 yield ('mixed imports\n stdlib: %s\n relative: %s' %
544 yield ('mixed imports\n stdlib: %s\n relative: %s' %
535 (', '.join(sorted(from_stdlib[True])),
545 (', '.join(sorted(from_stdlib[True])),
536 ', '.join(sorted(from_stdlib[False]))), node.lineno)
546 ', '.join(sorted(from_stdlib[False]))), node.lineno)
537
547
538 class CircularImport(Exception):
548 class CircularImport(Exception):
539 pass
549 pass
540
550
541 def checkmod(mod, imports):
551 def checkmod(mod, imports):
542 shortest = {}
552 shortest = {}
543 visit = [[mod]]
553 visit = [[mod]]
544 while visit:
554 while visit:
545 path = visit.pop(0)
555 path = visit.pop(0)
546 for i in sorted(imports.get(path[-1], [])):
556 for i in sorted(imports.get(path[-1], [])):
547 if len(path) < shortest.get(i, 1000):
557 if len(path) < shortest.get(i, 1000):
548 shortest[i] = len(path)
558 shortest[i] = len(path)
549 if i in path:
559 if i in path:
550 if i == path[0]:
560 if i == path[0]:
551 raise CircularImport(path)
561 raise CircularImport(path)
552 continue
562 continue
553 visit.append(path + [i])
563 visit.append(path + [i])
554
564
555 def rotatecycle(cycle):
565 def rotatecycle(cycle):
556 """arrange a cycle so that the lexicographically first module listed first
566 """arrange a cycle so that the lexicographically first module listed first
557
567
558 >>> rotatecycle(['foo', 'bar'])
568 >>> rotatecycle(['foo', 'bar'])
559 ['bar', 'foo', 'bar']
569 ['bar', 'foo', 'bar']
560 """
570 """
561 lowest = min(cycle)
571 lowest = min(cycle)
562 idx = cycle.index(lowest)
572 idx = cycle.index(lowest)
563 return cycle[idx:] + cycle[:idx] + [lowest]
573 return cycle[idx:] + cycle[:idx] + [lowest]
564
574
565 def find_cycles(imports):
575 def find_cycles(imports):
566 """Find cycles in an already-loaded import graph.
576 """Find cycles in an already-loaded import graph.
567
577
568 All module names recorded in `imports` should be absolute one.
578 All module names recorded in `imports` should be absolute one.
569
579
570 >>> from __future__ import print_function
580 >>> from __future__ import print_function
571 >>> imports = {'top.foo': ['top.bar', 'os.path', 'top.qux'],
581 >>> imports = {'top.foo': ['top.bar', 'os.path', 'top.qux'],
572 ... 'top.bar': ['top.baz', 'sys'],
582 ... 'top.bar': ['top.baz', 'sys'],
573 ... 'top.baz': ['top.foo'],
583 ... 'top.baz': ['top.foo'],
574 ... 'top.qux': ['top.foo']}
584 ... 'top.qux': ['top.foo']}
575 >>> print('\\n'.join(sorted(find_cycles(imports))))
585 >>> print('\\n'.join(sorted(find_cycles(imports))))
576 top.bar -> top.baz -> top.foo -> top.bar
586 top.bar -> top.baz -> top.foo -> top.bar
577 top.foo -> top.qux -> top.foo
587 top.foo -> top.qux -> top.foo
578 """
588 """
579 cycles = set()
589 cycles = set()
580 for mod in sorted(imports.keys()):
590 for mod in sorted(imports.keys()):
581 try:
591 try:
582 checkmod(mod, imports)
592 checkmod(mod, imports)
583 except CircularImport as e:
593 except CircularImport as e:
584 cycle = e.args[0]
594 cycle = e.args[0]
585 cycles.add(" -> ".join(rotatecycle(cycle)))
595 cycles.add(" -> ".join(rotatecycle(cycle)))
586 return cycles
596 return cycles
587
597
588 def _cycle_sortkey(c):
598 def _cycle_sortkey(c):
589 return len(c), c
599 return len(c), c
590
600
591 def embedded(f, modname, src):
601 def embedded(f, modname, src):
592 """Extract embedded python code
602 """Extract embedded python code
593
603
594 >>> def test(fn, lines):
604 >>> def test(fn, lines):
595 ... for s, m, f, l in embedded(fn, "example", lines):
605 ... for s, m, f, l in embedded(fn, "example", lines):
596 ... print("%s %s %s" % (m, f, l))
606 ... print("%s %s %s" % (m, f, l))
597 ... print(repr(s))
607 ... print(repr(s))
598 >>> lines = [
608 >>> lines = [
599 ... 'comment',
609 ... 'comment',
600 ... ' >>> from __future__ import print_function',
610 ... ' >>> from __future__ import print_function',
601 ... " >>> ' multiline",
611 ... " >>> ' multiline",
602 ... " ... string'",
612 ... " ... string'",
603 ... ' ',
613 ... ' ',
604 ... 'comment',
614 ... 'comment',
605 ... ' $ cat > foo.py <<EOF',
615 ... ' $ cat > foo.py <<EOF',
606 ... ' > from __future__ import print_function',
616 ... ' > from __future__ import print_function',
607 ... ' > EOF',
617 ... ' > EOF',
608 ... ]
618 ... ]
609 >>> test("example.t", lines)
619 >>> test("example.t", lines)
610 example[2] doctest.py 2
620 example[2] doctest.py 2
611 "from __future__ import print_function\\n' multiline\\nstring'\\n"
621 "from __future__ import print_function\\n' multiline\\nstring'\\n"
612 example[7] foo.py 7
622 example[7] foo.py 7
613 'from __future__ import print_function\\n'
623 'from __future__ import print_function\\n'
614 """
624 """
615 inlinepython = 0
625 inlinepython = 0
616 shpython = 0
626 shpython = 0
617 script = []
627 script = []
618 prefix = 6
628 prefix = 6
619 t = ''
629 t = ''
620 n = 0
630 n = 0
621 for l in src:
631 for l in src:
622 n += 1
632 n += 1
623 if not l.endswith(b'\n'):
633 if not l.endswith(b'\n'):
624 l += b'\n'
634 l += b'\n'
625 if l.startswith(b' >>> '): # python inlines
635 if l.startswith(b' >>> '): # python inlines
626 if shpython:
636 if shpython:
627 print("%s:%d: Parse Error" % (f, n))
637 print("%s:%d: Parse Error" % (f, n))
628 if not inlinepython:
638 if not inlinepython:
629 # We've just entered a Python block.
639 # We've just entered a Python block.
630 inlinepython = n
640 inlinepython = n
631 t = 'doctest.py'
641 t = 'doctest.py'
632 script.append(l[prefix:])
642 script.append(l[prefix:])
633 continue
643 continue
634 if l.startswith(b' ... '): # python inlines
644 if l.startswith(b' ... '): # python inlines
635 script.append(l[prefix:])
645 script.append(l[prefix:])
636 continue
646 continue
637 cat = re.search(r"\$ \s*cat\s*>\s*(\S+\.py)\s*<<\s*EOF", l)
647 cat = re.search(r"\$ \s*cat\s*>\s*(\S+\.py)\s*<<\s*EOF", l)
638 if cat:
648 if cat:
639 if inlinepython:
649 if inlinepython:
640 yield ''.join(script), ("%s[%d]" %
650 yield ''.join(script), ("%s[%d]" %
641 (modname, inlinepython)), t, inlinepython
651 (modname, inlinepython)), t, inlinepython
642 script = []
652 script = []
643 inlinepython = 0
653 inlinepython = 0
644 shpython = n
654 shpython = n
645 t = cat.group(1)
655 t = cat.group(1)
646 continue
656 continue
647 if shpython and l.startswith(b' > '): # sh continuation
657 if shpython and l.startswith(b' > '): # sh continuation
648 if l == b' > EOF\n':
658 if l == b' > EOF\n':
649 yield ''.join(script), ("%s[%d]" %
659 yield ''.join(script), ("%s[%d]" %
650 (modname, shpython)), t, shpython
660 (modname, shpython)), t, shpython
651 script = []
661 script = []
652 shpython = 0
662 shpython = 0
653 else:
663 else:
654 script.append(l[4:])
664 script.append(l[4:])
655 continue
665 continue
656 if inlinepython and l == b' \n':
666 if inlinepython and l == b' \n':
657 yield ''.join(script), ("%s[%d]" %
667 yield ''.join(script), ("%s[%d]" %
658 (modname, inlinepython)), t, inlinepython
668 (modname, inlinepython)), t, inlinepython
659 script = []
669 script = []
660 inlinepython = 0
670 inlinepython = 0
661 continue
671 continue
662
672
663 def sources(f, modname):
673 def sources(f, modname):
664 """Yields possibly multiple sources from a filepath
674 """Yields possibly multiple sources from a filepath
665
675
666 input: filepath, modulename
676 input: filepath, modulename
667 yields: script(string), modulename, filepath, linenumber
677 yields: script(string), modulename, filepath, linenumber
668
678
669 For embedded scripts, the modulename and filepath will be different
679 For embedded scripts, the modulename and filepath will be different
670 from the function arguments. linenumber is an offset relative to
680 from the function arguments. linenumber is an offset relative to
671 the input file.
681 the input file.
672 """
682 """
673 py = False
683 py = False
674 if not f.endswith('.t'):
684 if not f.endswith('.t'):
675 with open(f) as src:
685 with open(f) as src:
676 yield src.read(), modname, f, 0
686 yield src.read(), modname, f, 0
677 py = True
687 py = True
678 if py or f.endswith('.t'):
688 if py or f.endswith('.t'):
679 with open(f) as src:
689 with open(f) as src:
680 for script, modname, t, line in embedded(f, modname, src):
690 for script, modname, t, line in embedded(f, modname, src):
681 yield script, modname, t, line
691 yield script, modname, t, line
682
692
683 def main(argv):
693 def main(argv):
684 if len(argv) < 2 or (argv[1] == '-' and len(argv) > 2):
694 if len(argv) < 2 or (argv[1] == '-' and len(argv) > 2):
685 print('Usage: %s {-|file [file] [file] ...}')
695 print('Usage: %s {-|file [file] [file] ...}')
686 return 1
696 return 1
687 if argv[1] == '-':
697 if argv[1] == '-':
688 argv = argv[:1]
698 argv = argv[:1]
689 argv.extend(l.rstrip() for l in sys.stdin.readlines())
699 argv.extend(l.rstrip() for l in sys.stdin.readlines())
690 localmods = {}
700 localmods = {}
691 used_imports = {}
701 used_imports = {}
692 any_errors = False
702 any_errors = False
693 for source_path in argv[1:]:
703 for source_path in argv[1:]:
694 modname = dotted_name_of_path(source_path, trimpure=True)
704 trimpure = source_path.endswith(_dualmodules)
705 modname = dotted_name_of_path(source_path, trimpure=trimpure)
695 localmods[modname] = source_path
706 localmods[modname] = source_path
696 for localmodname, source_path in sorted(localmods.items()):
707 for localmodname, source_path in sorted(localmods.items()):
697 for src, modname, name, line in sources(source_path, localmodname):
708 for src, modname, name, line in sources(source_path, localmodname):
698 try:
709 try:
699 used_imports[modname] = sorted(
710 used_imports[modname] = sorted(
700 imported_modules(src, modname, name, localmods,
711 imported_modules(src, modname, name, localmods,
701 ignore_nested=True))
712 ignore_nested=True))
702 for error, lineno in verify_import_convention(modname, src,
713 for error, lineno in verify_import_convention(modname, src,
703 localmods):
714 localmods):
704 any_errors = True
715 any_errors = True
705 print('%s:%d: %s' % (source_path, lineno + line, error))
716 print('%s:%d: %s' % (source_path, lineno + line, error))
706 except SyntaxError as e:
717 except SyntaxError as e:
707 print('%s:%d: SyntaxError: %s' %
718 print('%s:%d: SyntaxError: %s' %
708 (source_path, e.lineno + line, e))
719 (source_path, e.lineno + line, e))
709 cycles = find_cycles(used_imports)
720 cycles = find_cycles(used_imports)
710 if cycles:
721 if cycles:
711 firstmods = set()
722 firstmods = set()
712 for c in sorted(cycles, key=_cycle_sortkey):
723 for c in sorted(cycles, key=_cycle_sortkey):
713 first = c.split()[0]
724 first = c.split()[0]
714 # As a rough cut, ignore any cycle that starts with the
725 # As a rough cut, ignore any cycle that starts with the
715 # same module as some other cycle. Otherwise we see lots
726 # same module as some other cycle. Otherwise we see lots
716 # of cycles that are effectively duplicates.
727 # of cycles that are effectively duplicates.
717 if first in firstmods:
728 if first in firstmods:
718 continue
729 continue
719 print('Import cycle:', c)
730 print('Import cycle:', c)
720 firstmods.add(first)
731 firstmods.add(first)
721 any_errors = True
732 any_errors = True
722 return any_errors != 0
733 return any_errors != 0
723
734
724 if __name__ == '__main__':
735 if __name__ == '__main__':
725 sys.exit(int(main(sys.argv)))
736 sys.exit(int(main(sys.argv)))
@@ -1,403 +1,404 b''
1 # __init__.py - Startup and module loading logic for Mercurial.
1 # __init__.py - Startup and module loading logic for Mercurial.
2 #
2 #
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import imp
10 import imp
11 import os
11 import os
12 import sys
12 import sys
13 import zipimport
13 import zipimport
14
14
15 from . import (
15 from . import (
16 policy
16 policy
17 )
17 )
18
18
19 __all__ = []
19 __all__ = []
20
20
21 modulepolicy = policy.policy
21 modulepolicy = policy.policy
22
22
23 # Modules that have both Python and C implementations. See also the
23 # Modules that have both Python and C implementations. See also the
24 # set of .py files under mercurial/pure/.
24 # set of .py files under mercurial/pure/.
25 _dualmodules = set([
25 _dualmodules = set([
26 'mercurial.base85',
26 'mercurial.base85',
27 'mercurial.bdiff',
27 'mercurial.bdiff',
28 'mercurial.diffhelpers',
28 'mercurial.diffhelpers',
29 'mercurial.mpatch',
29 'mercurial.mpatch',
30 'mercurial.osutil',
30 'mercurial.osutil',
31 'mercurial.parsers',
31 'mercurial.parsers',
32 ])
32 ])
33
33
34 class hgimporter(object):
34 class hgimporter(object):
35 """Object that conforms to import hook interface defined in PEP-302."""
35 """Object that conforms to import hook interface defined in PEP-302."""
36 def find_module(self, name, path=None):
36 def find_module(self, name, path=None):
37 # We only care about modules that have both C and pure implementations.
37 # We only care about modules that have both C and pure implementations.
38 if name in _dualmodules:
38 if name in _dualmodules:
39 return self
39 return self
40 return None
40 return None
41
41
42 def load_module(self, name):
42 def load_module(self, name):
43 mod = sys.modules.get(name, None)
43 mod = sys.modules.get(name, None)
44 if mod:
44 if mod:
45 return mod
45 return mod
46
46
47 mercurial = sys.modules['mercurial']
47 mercurial = sys.modules['mercurial']
48
48
49 # The zip importer behaves sufficiently differently from the default
49 # The zip importer behaves sufficiently differently from the default
50 # importer to warrant its own code path.
50 # importer to warrant its own code path.
51 loader = getattr(mercurial, '__loader__', None)
51 loader = getattr(mercurial, '__loader__', None)
52 if isinstance(loader, zipimport.zipimporter):
52 if isinstance(loader, zipimport.zipimporter):
53 def ziploader(*paths):
53 def ziploader(*paths):
54 """Obtain a zipimporter for a directory under the main zip."""
54 """Obtain a zipimporter for a directory under the main zip."""
55 path = os.path.join(loader.archive, *paths)
55 path = os.path.join(loader.archive, *paths)
56 zl = sys.path_importer_cache.get(path)
56 zl = sys.path_importer_cache.get(path)
57 if not zl:
57 if not zl:
58 zl = zipimport.zipimporter(path)
58 zl = zipimport.zipimporter(path)
59 return zl
59 return zl
60
60
61 try:
61 try:
62 if modulepolicy in policy.policynoc:
62 if modulepolicy in policy.policynoc:
63 raise ImportError()
63 raise ImportError()
64
64
65 zl = ziploader('mercurial')
65 zl = ziploader('mercurial')
66 mod = zl.load_module(name)
66 mod = zl.load_module(name)
67 # Unlike imp, ziploader doesn't expose module metadata that
67 # Unlike imp, ziploader doesn't expose module metadata that
68 # indicates the type of module. So just assume what we found
68 # indicates the type of module. So just assume what we found
69 # is OK (even though it could be a pure Python module).
69 # is OK (even though it could be a pure Python module).
70 except ImportError:
70 except ImportError:
71 if modulepolicy == b'c':
71 if modulepolicy == b'c':
72 raise
72 raise
73 zl = ziploader('mercurial', 'pure')
73 zl = ziploader('mercurial', 'pure')
74 mod = zl.load_module(name)
74 mod = zl.load_module(name)
75
75
76 sys.modules[name] = mod
76 sys.modules[name] = mod
77 return mod
77 return mod
78
78
79 # Unlike the default importer which searches special locations and
79 # Unlike the default importer which searches special locations and
80 # sys.path, we only look in the directory where "mercurial" was
80 # sys.path, we only look in the directory where "mercurial" was
81 # imported from.
81 # imported from.
82
82
83 # imp.find_module doesn't support submodules (modules with ".").
83 # imp.find_module doesn't support submodules (modules with ".").
84 # Instead you have to pass the parent package's __path__ attribute
84 # Instead you have to pass the parent package's __path__ attribute
85 # as the path argument.
85 # as the path argument.
86 stem = name.split('.')[-1]
86 stem = name.split('.')[-1]
87
87
88 try:
88 try:
89 if modulepolicy in policy.policynoc:
89 if modulepolicy in policy.policynoc:
90 raise ImportError()
90 raise ImportError()
91
91
92 modinfo = imp.find_module(stem, mercurial.__path__)
92 modinfo = imp.find_module(stem, mercurial.__path__)
93
93
94 # The Mercurial installer used to copy files from
94 # The Mercurial installer used to copy files from
95 # mercurial/pure/*.py to mercurial/*.py. Therefore, it's possible
95 # mercurial/pure/*.py to mercurial/*.py. Therefore, it's possible
96 # for some installations to have .py files under mercurial/*.
96 # for some installations to have .py files under mercurial/*.
97 # Loading Python modules when we expected C versions could result
97 # Loading Python modules when we expected C versions could result
98 # in a) poor performance b) loading a version from a previous
98 # in a) poor performance b) loading a version from a previous
99 # Mercurial version, potentially leading to incompatibility. Either
99 # Mercurial version, potentially leading to incompatibility. Either
100 # scenario is bad. So we verify that modules loaded from
100 # scenario is bad. So we verify that modules loaded from
101 # mercurial/* are C extensions. If the current policy allows the
101 # mercurial/* are C extensions. If the current policy allows the
102 # loading of .py modules, the module will be re-imported from
102 # loading of .py modules, the module will be re-imported from
103 # mercurial/pure/* below.
103 # mercurial/pure/* below.
104 if modinfo[2][2] != imp.C_EXTENSION:
104 if modinfo[2][2] != imp.C_EXTENSION:
105 raise ImportError('.py version of %s found where C '
105 raise ImportError('.py version of %s found where C '
106 'version should exist' % name)
106 'version should exist' % name)
107
107
108 except ImportError:
108 except ImportError:
109 if modulepolicy == b'c':
109 if modulepolicy == b'c':
110 raise
110 raise
111
111
112 # Could not load the C extension and pure Python is allowed. So
112 # Could not load the C extension and pure Python is allowed. So
113 # try to load them.
113 # try to load them.
114 from . import pure
114 from . import pure
115 modinfo = imp.find_module(stem, pure.__path__)
115 modinfo = imp.find_module(stem, pure.__path__)
116 if not modinfo:
116 if not modinfo:
117 raise ImportError('could not find mercurial module %s' %
117 raise ImportError('could not find mercurial module %s' %
118 name)
118 name)
119
119
120 mod = imp.load_module(name, *modinfo)
120 mod = imp.load_module(name, *modinfo)
121 sys.modules[name] = mod
121 sys.modules[name] = mod
122 return mod
122 return mod
123
123
124 # Python 3 uses a custom module loader that transforms source code between
124 # Python 3 uses a custom module loader that transforms source code between
125 # source file reading and compilation. This is done by registering a custom
125 # source file reading and compilation. This is done by registering a custom
126 # finder that changes the spec for Mercurial modules to use a custom loader.
126 # finder that changes the spec for Mercurial modules to use a custom loader.
127 if sys.version_info[0] >= 3:
127 if sys.version_info[0] >= 3:
128 from . import pure
128 from . import pure
129 import importlib
129 import importlib
130 import io
130 import io
131 import token
131 import token
132 import tokenize
132 import tokenize
133
133
134 class hgpathentryfinder(importlib.abc.MetaPathFinder):
134 class hgpathentryfinder(importlib.abc.MetaPathFinder):
135 """A sys.meta_path finder that uses a custom module loader."""
135 """A sys.meta_path finder that uses a custom module loader."""
136 def find_spec(self, fullname, path, target=None):
136 def find_spec(self, fullname, path, target=None):
137 # Only handle Mercurial-related modules.
137 # Only handle Mercurial-related modules.
138 if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')):
138 if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')):
139 return None
139 return None
140 # zstd is already dual-version clean, don't try and mangle it
140 # zstd is already dual-version clean, don't try and mangle it
141 if fullname.startswith('mercurial.zstd'):
141 if fullname.startswith('mercurial.zstd'):
142 return None
142 return None
143
143
144 # This assumes Python 3 doesn't support loading C modules.
144 # This assumes Python 3 doesn't support loading C modules.
145 if fullname in _dualmodules:
145 if fullname in _dualmodules:
146 stem = fullname.split('.')[-1]
146 stem = fullname.split('.')[-1]
147 fullname = 'mercurial.pure.%s' % stem
147 fullname = 'mercurial.pure.%s' % stem
148 target = pure
148 target = pure
149 assert len(path) == 1
149 assert len(path) == 1
150 path = [os.path.join(path[0], 'pure')]
150 path = [os.path.join(path[0], 'pure')]
151
151
152 # Try to find the module using other registered finders.
152 # Try to find the module using other registered finders.
153 spec = None
153 spec = None
154 for finder in sys.meta_path:
154 for finder in sys.meta_path:
155 if finder == self:
155 if finder == self:
156 continue
156 continue
157
157
158 spec = finder.find_spec(fullname, path, target=target)
158 spec = finder.find_spec(fullname, path, target=target)
159 if spec:
159 if spec:
160 break
160 break
161
161
162 # This is a Mercurial-related module but we couldn't find it
162 # This is a Mercurial-related module but we couldn't find it
163 # using the previously-registered finders. This likely means
163 # using the previously-registered finders. This likely means
164 # the module doesn't exist.
164 # the module doesn't exist.
165 if not spec:
165 if not spec:
166 return None
166 return None
167
167
168 if fullname.startswith('mercurial.pure.'):
168 if (fullname.startswith('mercurial.pure.')
169 and fullname.replace('.pure.', '.') in _dualmodules):
169 spec.name = spec.name.replace('.pure.', '.')
170 spec.name = spec.name.replace('.pure.', '.')
170
171
171 # TODO need to support loaders from alternate specs, like zip
172 # TODO need to support loaders from alternate specs, like zip
172 # loaders.
173 # loaders.
173 spec.loader = hgloader(spec.name, spec.origin)
174 spec.loader = hgloader(spec.name, spec.origin)
174 return spec
175 return spec
175
176
176 def replacetokens(tokens, fullname):
177 def replacetokens(tokens, fullname):
177 """Transform a stream of tokens from raw to Python 3.
178 """Transform a stream of tokens from raw to Python 3.
178
179
179 It is called by the custom module loading machinery to rewrite
180 It is called by the custom module loading machinery to rewrite
180 source/tokens between source decoding and compilation.
181 source/tokens between source decoding and compilation.
181
182
182 Returns a generator of possibly rewritten tokens.
183 Returns a generator of possibly rewritten tokens.
183
184
184 The input token list may be mutated as part of processing. However,
185 The input token list may be mutated as part of processing. However,
185 its changes do not necessarily match the output token stream.
186 its changes do not necessarily match the output token stream.
186
187
187 REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
188 REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
188 OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
189 OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
189 """
190 """
190 futureimpline = False
191 futureimpline = False
191
192
192 # The following utility functions access the tokens list and i index of
193 # The following utility functions access the tokens list and i index of
193 # the for i, t enumerate(tokens) loop below
194 # the for i, t enumerate(tokens) loop below
194 def _isop(j, *o):
195 def _isop(j, *o):
195 """Assert that tokens[j] is an OP with one of the given values"""
196 """Assert that tokens[j] is an OP with one of the given values"""
196 try:
197 try:
197 return tokens[j].type == token.OP and tokens[j].string in o
198 return tokens[j].type == token.OP and tokens[j].string in o
198 except IndexError:
199 except IndexError:
199 return False
200 return False
200
201
201 def _findargnofcall(n):
202 def _findargnofcall(n):
202 """Find arg n of a call expression (start at 0)
203 """Find arg n of a call expression (start at 0)
203
204
204 Returns index of the first token of that argument, or None if
205 Returns index of the first token of that argument, or None if
205 there is not that many arguments.
206 there is not that many arguments.
206
207
207 Assumes that token[i + 1] is '('.
208 Assumes that token[i + 1] is '('.
208
209
209 """
210 """
210 nested = 0
211 nested = 0
211 for j in range(i + 2, len(tokens)):
212 for j in range(i + 2, len(tokens)):
212 if _isop(j, ')', ']', '}'):
213 if _isop(j, ')', ']', '}'):
213 # end of call, tuple, subscription or dict / set
214 # end of call, tuple, subscription or dict / set
214 nested -= 1
215 nested -= 1
215 if nested < 0:
216 if nested < 0:
216 return None
217 return None
217 elif n == 0:
218 elif n == 0:
218 # this is the starting position of arg
219 # this is the starting position of arg
219 return j
220 return j
220 elif _isop(j, '(', '[', '{'):
221 elif _isop(j, '(', '[', '{'):
221 nested += 1
222 nested += 1
222 elif _isop(j, ',') and nested == 0:
223 elif _isop(j, ',') and nested == 0:
223 n -= 1
224 n -= 1
224
225
225 return None
226 return None
226
227
227 def _ensureunicode(j):
228 def _ensureunicode(j):
228 """Make sure the token at j is a unicode string
229 """Make sure the token at j is a unicode string
229
230
230 This rewrites a string token to include the unicode literal prefix
231 This rewrites a string token to include the unicode literal prefix
231 so the string transformer won't add the byte prefix.
232 so the string transformer won't add the byte prefix.
232
233
233 Ignores tokens that are not strings. Assumes bounds checking has
234 Ignores tokens that are not strings. Assumes bounds checking has
234 already been done.
235 already been done.
235
236
236 """
237 """
237 st = tokens[j]
238 st = tokens[j]
238 if st.type == token.STRING and st.string.startswith(("'", '"')):
239 if st.type == token.STRING and st.string.startswith(("'", '"')):
239 tokens[j] = st._replace(string='u%s' % st.string)
240 tokens[j] = st._replace(string='u%s' % st.string)
240
241
241 for i, t in enumerate(tokens):
242 for i, t in enumerate(tokens):
242 # Convert most string literals to byte literals. String literals
243 # Convert most string literals to byte literals. String literals
243 # in Python 2 are bytes. String literals in Python 3 are unicode.
244 # in Python 2 are bytes. String literals in Python 3 are unicode.
244 # Most strings in Mercurial are bytes and unicode strings are rare.
245 # Most strings in Mercurial are bytes and unicode strings are rare.
245 # Rather than rewrite all string literals to use ``b''`` to indicate
246 # Rather than rewrite all string literals to use ``b''`` to indicate
246 # byte strings, we apply this token transformer to insert the ``b``
247 # byte strings, we apply this token transformer to insert the ``b``
247 # prefix nearly everywhere.
248 # prefix nearly everywhere.
248 if t.type == token.STRING:
249 if t.type == token.STRING:
249 s = t.string
250 s = t.string
250
251
251 # Preserve docstrings as string literals. This is inconsistent
252 # Preserve docstrings as string literals. This is inconsistent
252 # with regular unprefixed strings. However, the
253 # with regular unprefixed strings. However, the
253 # "from __future__" parsing (which allows a module docstring to
254 # "from __future__" parsing (which allows a module docstring to
254 # exist before it) doesn't properly handle the docstring if it
255 # exist before it) doesn't properly handle the docstring if it
255 # is b''' prefixed, leading to a SyntaxError. We leave all
256 # is b''' prefixed, leading to a SyntaxError. We leave all
256 # docstrings as unprefixed to avoid this. This means Mercurial
257 # docstrings as unprefixed to avoid this. This means Mercurial
257 # components touching docstrings need to handle unicode,
258 # components touching docstrings need to handle unicode,
258 # unfortunately.
259 # unfortunately.
259 if s[0:3] in ("'''", '"""'):
260 if s[0:3] in ("'''", '"""'):
260 yield t
261 yield t
261 continue
262 continue
262
263
263 # If the first character isn't a quote, it is likely a string
264 # If the first character isn't a quote, it is likely a string
264 # prefixing character (such as 'b', 'u', or 'r'. Ignore.
265 # prefixing character (such as 'b', 'u', or 'r'. Ignore.
265 if s[0] not in ("'", '"'):
266 if s[0] not in ("'", '"'):
266 yield t
267 yield t
267 continue
268 continue
268
269
269 # String literal. Prefix to make a b'' string.
270 # String literal. Prefix to make a b'' string.
270 yield t._replace(string='b%s' % t.string)
271 yield t._replace(string='b%s' % t.string)
271 continue
272 continue
272
273
273 # Insert compatibility imports at "from __future__ import" line.
274 # Insert compatibility imports at "from __future__ import" line.
274 # No '\n' should be added to preserve line numbers.
275 # No '\n' should be added to preserve line numbers.
275 if (t.type == token.NAME and t.string == 'import' and
276 if (t.type == token.NAME and t.string == 'import' and
276 all(u.type == token.NAME for u in tokens[i - 2:i]) and
277 all(u.type == token.NAME for u in tokens[i - 2:i]) and
277 [u.string for u in tokens[i - 2:i]] == ['from', '__future__']):
278 [u.string for u in tokens[i - 2:i]] == ['from', '__future__']):
278 futureimpline = True
279 futureimpline = True
279 if t.type == token.NEWLINE and futureimpline:
280 if t.type == token.NEWLINE and futureimpline:
280 futureimpline = False
281 futureimpline = False
281 if fullname == 'mercurial.pycompat':
282 if fullname == 'mercurial.pycompat':
282 yield t
283 yield t
283 continue
284 continue
284 r, c = t.start
285 r, c = t.start
285 l = (b'; from mercurial.pycompat import '
286 l = (b'; from mercurial.pycompat import '
286 b'delattr, getattr, hasattr, setattr, xrange, '
287 b'delattr, getattr, hasattr, setattr, xrange, '
287 b'open, unicode\n')
288 b'open, unicode\n')
288 for u in tokenize.tokenize(io.BytesIO(l).readline):
289 for u in tokenize.tokenize(io.BytesIO(l).readline):
289 if u.type in (tokenize.ENCODING, token.ENDMARKER):
290 if u.type in (tokenize.ENCODING, token.ENDMARKER):
290 continue
291 continue
291 yield u._replace(
292 yield u._replace(
292 start=(r, c + u.start[1]), end=(r, c + u.end[1]))
293 start=(r, c + u.start[1]), end=(r, c + u.end[1]))
293 continue
294 continue
294
295
295 # This looks like a function call.
296 # This looks like a function call.
296 if t.type == token.NAME and _isop(i + 1, '('):
297 if t.type == token.NAME and _isop(i + 1, '('):
297 fn = t.string
298 fn = t.string
298
299
299 # *attr() builtins don't accept byte strings to 2nd argument.
300 # *attr() builtins don't accept byte strings to 2nd argument.
300 if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and
301 if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and
301 not _isop(i - 1, '.')):
302 not _isop(i - 1, '.')):
302 arg1idx = _findargnofcall(1)
303 arg1idx = _findargnofcall(1)
303 if arg1idx is not None:
304 if arg1idx is not None:
304 _ensureunicode(arg1idx)
305 _ensureunicode(arg1idx)
305
306
306 # .encode() and .decode() on str/bytes/unicode don't accept
307 # .encode() and .decode() on str/bytes/unicode don't accept
307 # byte strings on Python 3.
308 # byte strings on Python 3.
308 elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
309 elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
309 for argn in range(2):
310 for argn in range(2):
310 argidx = _findargnofcall(argn)
311 argidx = _findargnofcall(argn)
311 if argidx is not None:
312 if argidx is not None:
312 _ensureunicode(argidx)
313 _ensureunicode(argidx)
313
314
314 # It changes iteritems/values to items/values as they are not
315 # It changes iteritems/values to items/values as they are not
315 # present in Python 3 world.
316 # present in Python 3 world.
316 elif fn in ('iteritems', 'itervalues'):
317 elif fn in ('iteritems', 'itervalues'):
317 yield t._replace(string=fn[4:])
318 yield t._replace(string=fn[4:])
318 continue
319 continue
319
320
320 # Emit unmodified token.
321 # Emit unmodified token.
321 yield t
322 yield t
322
323
323 # Header to add to bytecode files. This MUST be changed when
324 # Header to add to bytecode files. This MUST be changed when
324 # ``replacetoken`` or any mechanism that changes semantics of module
325 # ``replacetoken`` or any mechanism that changes semantics of module
325 # loading is changed. Otherwise cached bytecode may get loaded without
326 # loading is changed. Otherwise cached bytecode may get loaded without
326 # the new transformation mechanisms applied.
327 # the new transformation mechanisms applied.
327 BYTECODEHEADER = b'HG\x00\x0a'
328 BYTECODEHEADER = b'HG\x00\x0a'
328
329
329 class hgloader(importlib.machinery.SourceFileLoader):
330 class hgloader(importlib.machinery.SourceFileLoader):
330 """Custom module loader that transforms source code.
331 """Custom module loader that transforms source code.
331
332
332 When the source code is converted to a code object, we transform
333 When the source code is converted to a code object, we transform
333 certain patterns to be Python 3 compatible. This allows us to write code
334 certain patterns to be Python 3 compatible. This allows us to write code
334 that is natively Python 2 and compatible with Python 3 without
335 that is natively Python 2 and compatible with Python 3 without
335 making the code excessively ugly.
336 making the code excessively ugly.
336
337
337 We do this by transforming the token stream between parse and compile.
338 We do this by transforming the token stream between parse and compile.
338
339
339 Implementing transformations invalidates caching assumptions made
340 Implementing transformations invalidates caching assumptions made
340 by the built-in importer. The built-in importer stores a header on
341 by the built-in importer. The built-in importer stores a header on
341 saved bytecode files indicating the Python/bytecode version. If the
342 saved bytecode files indicating the Python/bytecode version. If the
342 version changes, the cached bytecode is ignored. The Mercurial
343 version changes, the cached bytecode is ignored. The Mercurial
343 transformations could change at any time. This means we need to check
344 transformations could change at any time. This means we need to check
344 that cached bytecode was generated with the current transformation
345 that cached bytecode was generated with the current transformation
345 code or there could be a mismatch between cached bytecode and what
346 code or there could be a mismatch between cached bytecode and what
346 would be generated from this class.
347 would be generated from this class.
347
348
348 We supplement the bytecode caching layer by wrapping ``get_data``
349 We supplement the bytecode caching layer by wrapping ``get_data``
349 and ``set_data``. These functions are called when the
350 and ``set_data``. These functions are called when the
350 ``SourceFileLoader`` retrieves and saves bytecode cache files,
351 ``SourceFileLoader`` retrieves and saves bytecode cache files,
351 respectively. We simply add an additional header on the file. As
352 respectively. We simply add an additional header on the file. As
352 long as the version in this file is changed when semantics change,
353 long as the version in this file is changed when semantics change,
353 cached bytecode should be invalidated when transformations change.
354 cached bytecode should be invalidated when transformations change.
354
355
355 The added header has the form ``HG<VERSION>``. That is a literal
356 The added header has the form ``HG<VERSION>``. That is a literal
356 ``HG`` with 2 binary bytes indicating the transformation version.
357 ``HG`` with 2 binary bytes indicating the transformation version.
357 """
358 """
358 def get_data(self, path):
359 def get_data(self, path):
359 data = super(hgloader, self).get_data(path)
360 data = super(hgloader, self).get_data(path)
360
361
361 if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
362 if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
362 return data
363 return data
363
364
364 # There should be a header indicating the Mercurial transformation
365 # There should be a header indicating the Mercurial transformation
365 # version. If it doesn't exist or doesn't match the current version,
366 # version. If it doesn't exist or doesn't match the current version,
366 # we raise an OSError because that is what
367 # we raise an OSError because that is what
367 # ``SourceFileLoader.get_code()`` expects when loading bytecode
368 # ``SourceFileLoader.get_code()`` expects when loading bytecode
368 # paths to indicate the cached file is "bad."
369 # paths to indicate the cached file is "bad."
369 if data[0:2] != b'HG':
370 if data[0:2] != b'HG':
370 raise OSError('no hg header')
371 raise OSError('no hg header')
371 if data[0:4] != BYTECODEHEADER:
372 if data[0:4] != BYTECODEHEADER:
372 raise OSError('hg header version mismatch')
373 raise OSError('hg header version mismatch')
373
374
374 return data[4:]
375 return data[4:]
375
376
376 def set_data(self, path, data, *args, **kwargs):
377 def set_data(self, path, data, *args, **kwargs):
377 if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
378 if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
378 data = BYTECODEHEADER + data
379 data = BYTECODEHEADER + data
379
380
380 return super(hgloader, self).set_data(path, data, *args, **kwargs)
381 return super(hgloader, self).set_data(path, data, *args, **kwargs)
381
382
382 def source_to_code(self, data, path):
383 def source_to_code(self, data, path):
383 """Perform token transformation before compilation."""
384 """Perform token transformation before compilation."""
384 buf = io.BytesIO(data)
385 buf = io.BytesIO(data)
385 tokens = tokenize.tokenize(buf.readline)
386 tokens = tokenize.tokenize(buf.readline)
386 data = tokenize.untokenize(replacetokens(list(tokens), self.name))
387 data = tokenize.untokenize(replacetokens(list(tokens), self.name))
387 # Python's built-in importer strips frames from exceptions raised
388 # Python's built-in importer strips frames from exceptions raised
388 # for this code. Unfortunately, that mechanism isn't extensible
389 # for this code. Unfortunately, that mechanism isn't extensible
389 # and our frame will be blamed for the import failure. There
390 # and our frame will be blamed for the import failure. There
390 # are extremely hacky ways to do frame stripping. We haven't
391 # are extremely hacky ways to do frame stripping. We haven't
391 # implemented them because they are very ugly.
392 # implemented them because they are very ugly.
392 return super(hgloader, self).source_to_code(data, path)
393 return super(hgloader, self).source_to_code(data, path)
393
394
394 # We automagically register our custom importer as a side-effect of loading.
395 # We automagically register our custom importer as a side-effect of loading.
395 # This is necessary to ensure that any entry points are able to import
396 # This is necessary to ensure that any entry points are able to import
396 # mercurial.* modules without having to perform this registration themselves.
397 # mercurial.* modules without having to perform this registration themselves.
397 if sys.version_info[0] >= 3:
398 if sys.version_info[0] >= 3:
398 _importercls = hgpathentryfinder
399 _importercls = hgpathentryfinder
399 else:
400 else:
400 _importercls = hgimporter
401 _importercls = hgimporter
401 if not any(isinstance(x, _importercls) for x in sys.meta_path):
402 if not any(isinstance(x, _importercls) for x in sys.meta_path):
402 # meta_path is used before any implicit finders and before sys.path.
403 # meta_path is used before any implicit finders and before sys.path.
403 sys.meta_path.insert(0, _importercls())
404 sys.meta_path.insert(0, _importercls())
General Comments 0
You need to be logged in to leave comments. Login now