##// END OF EJS Templates
flagutil: create a `mercurial.revlogutils.flagutil` module...
marmoute -
r42954:ca5ca3ba default
parent child Browse files
Show More
@@ -0,0 +1,31 b''
1 # flagutils.py - code to deal with revlog flags and their processors
2 #
3 # Copyright 2016 Remi Chaintron <remi@fb.com>
4 # Copyright 2016-2019 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
5 #
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
8
9 from __future__ import absolute_import
10
11 from .constants import (
12 REVIDX_DEFAULT_FLAGS,
13 REVIDX_ELLIPSIS,
14 REVIDX_EXTSTORED,
15 REVIDX_FLAGS_ORDER,
16 REVIDX_ISCENSORED,
17 REVIDX_KNOWN_FLAGS,
18 REVIDX_RAWTEXT_CHANGING_FLAGS,
19 )
20
21 # blanked usage of all the name to prevent pyflakes constraints
22 # We need these name available in the module for extensions.
23 REVIDX_ISCENSORED
24 REVIDX_ELLIPSIS
25 REVIDX_EXTSTORED
26 REVIDX_DEFAULT_FLAGS
27 REVIDX_FLAGS_ORDER
28 REVIDX_KNOWN_FLAGS
29 REVIDX_RAWTEXT_CHANGING_FLAGS
30
31
@@ -1,747 +1,748 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2
2
3 from __future__ import absolute_import, print_function
3 from __future__ import absolute_import, print_function
4
4
5 import ast
5 import ast
6 import collections
6 import collections
7 import os
7 import os
8 import sys
8 import sys
9
9
10 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
10 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
11 # to work when run from a virtualenv. The modules were chosen empirically
11 # to work when run from a virtualenv. The modules were chosen empirically
12 # so that the return value matches the return value without virtualenv.
12 # so that the return value matches the return value without virtualenv.
13 if True: # disable lexical sorting checks
13 if True: # disable lexical sorting checks
14 try:
14 try:
15 import BaseHTTPServer as basehttpserver
15 import BaseHTTPServer as basehttpserver
16 except ImportError:
16 except ImportError:
17 basehttpserver = None
17 basehttpserver = None
18 import zlib
18 import zlib
19
19
20 import testparseutil
20 import testparseutil
21
21
22 # Whitelist of modules that symbols can be directly imported from.
22 # Whitelist of modules that symbols can be directly imported from.
23 allowsymbolimports = (
23 allowsymbolimports = (
24 '__future__',
24 '__future__',
25 'bzrlib',
25 'bzrlib',
26 'hgclient',
26 'hgclient',
27 'mercurial',
27 'mercurial',
28 'mercurial.hgweb.common',
28 'mercurial.hgweb.common',
29 'mercurial.hgweb.request',
29 'mercurial.hgweb.request',
30 'mercurial.i18n',
30 'mercurial.i18n',
31 'mercurial.node',
31 'mercurial.node',
32 # for revlog to re-export constant to extensions
32 # for revlog to re-export constant to extensions
33 'mercurial.revlogutils.constants',
33 'mercurial.revlogutils.constants',
34 'mercurial.revlogutils.flagutil',
34 # for cffi modules to re-export pure functions
35 # for cffi modules to re-export pure functions
35 'mercurial.pure.base85',
36 'mercurial.pure.base85',
36 'mercurial.pure.bdiff',
37 'mercurial.pure.bdiff',
37 'mercurial.pure.mpatch',
38 'mercurial.pure.mpatch',
38 'mercurial.pure.osutil',
39 'mercurial.pure.osutil',
39 'mercurial.pure.parsers',
40 'mercurial.pure.parsers',
40 # third-party imports should be directly imported
41 # third-party imports should be directly imported
41 'mercurial.thirdparty',
42 'mercurial.thirdparty',
42 'mercurial.thirdparty.attr',
43 'mercurial.thirdparty.attr',
43 'mercurial.thirdparty.zope',
44 'mercurial.thirdparty.zope',
44 'mercurial.thirdparty.zope.interface',
45 'mercurial.thirdparty.zope.interface',
45 )
46 )
46
47
47 # Whitelist of symbols that can be directly imported.
48 # Whitelist of symbols that can be directly imported.
48 directsymbols = (
49 directsymbols = (
49 'demandimport',
50 'demandimport',
50 )
51 )
51
52
52 # Modules that must be aliased because they are commonly confused with
53 # Modules that must be aliased because they are commonly confused with
53 # common variables and can create aliasing and readability issues.
54 # common variables and can create aliasing and readability issues.
54 requirealias = {
55 requirealias = {
55 'ui': 'uimod',
56 'ui': 'uimod',
56 }
57 }
57
58
58 def usingabsolute(root):
59 def usingabsolute(root):
59 """Whether absolute imports are being used."""
60 """Whether absolute imports are being used."""
60 if sys.version_info[0] >= 3:
61 if sys.version_info[0] >= 3:
61 return True
62 return True
62
63
63 for node in ast.walk(root):
64 for node in ast.walk(root):
64 if isinstance(node, ast.ImportFrom):
65 if isinstance(node, ast.ImportFrom):
65 if node.module == '__future__':
66 if node.module == '__future__':
66 for n in node.names:
67 for n in node.names:
67 if n.name == 'absolute_import':
68 if n.name == 'absolute_import':
68 return True
69 return True
69
70
70 return False
71 return False
71
72
72 def walklocal(root):
73 def walklocal(root):
73 """Recursively yield all descendant nodes but not in a different scope"""
74 """Recursively yield all descendant nodes but not in a different scope"""
74 todo = collections.deque(ast.iter_child_nodes(root))
75 todo = collections.deque(ast.iter_child_nodes(root))
75 yield root, False
76 yield root, False
76 while todo:
77 while todo:
77 node = todo.popleft()
78 node = todo.popleft()
78 newscope = isinstance(node, ast.FunctionDef)
79 newscope = isinstance(node, ast.FunctionDef)
79 if not newscope:
80 if not newscope:
80 todo.extend(ast.iter_child_nodes(node))
81 todo.extend(ast.iter_child_nodes(node))
81 yield node, newscope
82 yield node, newscope
82
83
83 def dotted_name_of_path(path):
84 def dotted_name_of_path(path):
84 """Given a relative path to a source file, return its dotted module name.
85 """Given a relative path to a source file, return its dotted module name.
85
86
86 >>> dotted_name_of_path('mercurial/error.py')
87 >>> dotted_name_of_path('mercurial/error.py')
87 'mercurial.error'
88 'mercurial.error'
88 >>> dotted_name_of_path('zlibmodule.so')
89 >>> dotted_name_of_path('zlibmodule.so')
89 'zlib'
90 'zlib'
90 """
91 """
91 parts = path.replace(os.sep, '/').split('/')
92 parts = path.replace(os.sep, '/').split('/')
92 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
93 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
93 if parts[-1].endswith('module'):
94 if parts[-1].endswith('module'):
94 parts[-1] = parts[-1][:-6]
95 parts[-1] = parts[-1][:-6]
95 return '.'.join(parts)
96 return '.'.join(parts)
96
97
97 def fromlocalfunc(modulename, localmods):
98 def fromlocalfunc(modulename, localmods):
98 """Get a function to examine which locally defined module the
99 """Get a function to examine which locally defined module the
99 target source imports via a specified name.
100 target source imports via a specified name.
100
101
101 `modulename` is an `dotted_name_of_path()`-ed source file path,
102 `modulename` is an `dotted_name_of_path()`-ed source file path,
102 which may have `.__init__` at the end of it, of the target source.
103 which may have `.__init__` at the end of it, of the target source.
103
104
104 `localmods` is a set of absolute `dotted_name_of_path()`-ed source file
105 `localmods` is a set of absolute `dotted_name_of_path()`-ed source file
105 paths of locally defined (= Mercurial specific) modules.
106 paths of locally defined (= Mercurial specific) modules.
106
107
107 This function assumes that module names not existing in
108 This function assumes that module names not existing in
108 `localmods` are from the Python standard library.
109 `localmods` are from the Python standard library.
109
110
110 This function returns the function, which takes `name` argument,
111 This function returns the function, which takes `name` argument,
111 and returns `(absname, dottedpath, hassubmod)` tuple if `name`
112 and returns `(absname, dottedpath, hassubmod)` tuple if `name`
112 matches against locally defined module. Otherwise, it returns
113 matches against locally defined module. Otherwise, it returns
113 False.
114 False.
114
115
115 It is assumed that `name` doesn't have `.__init__`.
116 It is assumed that `name` doesn't have `.__init__`.
116
117
117 `absname` is an absolute module name of specified `name`
118 `absname` is an absolute module name of specified `name`
118 (e.g. "hgext.convert"). This can be used to compose prefix for sub
119 (e.g. "hgext.convert"). This can be used to compose prefix for sub
119 modules or so.
120 modules or so.
120
121
121 `dottedpath` is a `dotted_name_of_path()`-ed source file path
122 `dottedpath` is a `dotted_name_of_path()`-ed source file path
122 (e.g. "hgext.convert.__init__") of `name`. This is used to look
123 (e.g. "hgext.convert.__init__") of `name`. This is used to look
123 module up in `localmods` again.
124 module up in `localmods` again.
124
125
125 `hassubmod` is whether it may have sub modules under it (for
126 `hassubmod` is whether it may have sub modules under it (for
126 convenient, even though this is also equivalent to "absname !=
127 convenient, even though this is also equivalent to "absname !=
127 dottednpath")
128 dottednpath")
128
129
129 >>> localmods = {'foo.__init__', 'foo.foo1',
130 >>> localmods = {'foo.__init__', 'foo.foo1',
130 ... 'foo.bar.__init__', 'foo.bar.bar1',
131 ... 'foo.bar.__init__', 'foo.bar.bar1',
131 ... 'baz.__init__', 'baz.baz1'}
132 ... 'baz.__init__', 'baz.baz1'}
132 >>> fromlocal = fromlocalfunc('foo.xxx', localmods)
133 >>> fromlocal = fromlocalfunc('foo.xxx', localmods)
133 >>> # relative
134 >>> # relative
134 >>> fromlocal('foo1')
135 >>> fromlocal('foo1')
135 ('foo.foo1', 'foo.foo1', False)
136 ('foo.foo1', 'foo.foo1', False)
136 >>> fromlocal('bar')
137 >>> fromlocal('bar')
137 ('foo.bar', 'foo.bar.__init__', True)
138 ('foo.bar', 'foo.bar.__init__', True)
138 >>> fromlocal('bar.bar1')
139 >>> fromlocal('bar.bar1')
139 ('foo.bar.bar1', 'foo.bar.bar1', False)
140 ('foo.bar.bar1', 'foo.bar.bar1', False)
140 >>> # absolute
141 >>> # absolute
141 >>> fromlocal('baz')
142 >>> fromlocal('baz')
142 ('baz', 'baz.__init__', True)
143 ('baz', 'baz.__init__', True)
143 >>> fromlocal('baz.baz1')
144 >>> fromlocal('baz.baz1')
144 ('baz.baz1', 'baz.baz1', False)
145 ('baz.baz1', 'baz.baz1', False)
145 >>> # unknown = maybe standard library
146 >>> # unknown = maybe standard library
146 >>> fromlocal('os')
147 >>> fromlocal('os')
147 False
148 False
148 >>> fromlocal(None, 1)
149 >>> fromlocal(None, 1)
149 ('foo', 'foo.__init__', True)
150 ('foo', 'foo.__init__', True)
150 >>> fromlocal('foo1', 1)
151 >>> fromlocal('foo1', 1)
151 ('foo.foo1', 'foo.foo1', False)
152 ('foo.foo1', 'foo.foo1', False)
152 >>> fromlocal2 = fromlocalfunc('foo.xxx.yyy', localmods)
153 >>> fromlocal2 = fromlocalfunc('foo.xxx.yyy', localmods)
153 >>> fromlocal2(None, 2)
154 >>> fromlocal2(None, 2)
154 ('foo', 'foo.__init__', True)
155 ('foo', 'foo.__init__', True)
155 >>> fromlocal2('bar2', 1)
156 >>> fromlocal2('bar2', 1)
156 False
157 False
157 >>> fromlocal2('bar', 2)
158 >>> fromlocal2('bar', 2)
158 ('foo.bar', 'foo.bar.__init__', True)
159 ('foo.bar', 'foo.bar.__init__', True)
159 """
160 """
160 if not isinstance(modulename, str):
161 if not isinstance(modulename, str):
161 modulename = modulename.decode('ascii')
162 modulename = modulename.decode('ascii')
162 prefix = '.'.join(modulename.split('.')[:-1])
163 prefix = '.'.join(modulename.split('.')[:-1])
163 if prefix:
164 if prefix:
164 prefix += '.'
165 prefix += '.'
165 def fromlocal(name, level=0):
166 def fromlocal(name, level=0):
166 # name is false value when relative imports are used.
167 # name is false value when relative imports are used.
167 if not name:
168 if not name:
168 # If relative imports are used, level must not be absolute.
169 # If relative imports are used, level must not be absolute.
169 assert level > 0
170 assert level > 0
170 candidates = ['.'.join(modulename.split('.')[:-level])]
171 candidates = ['.'.join(modulename.split('.')[:-level])]
171 else:
172 else:
172 if not level:
173 if not level:
173 # Check relative name first.
174 # Check relative name first.
174 candidates = [prefix + name, name]
175 candidates = [prefix + name, name]
175 else:
176 else:
176 candidates = ['.'.join(modulename.split('.')[:-level]) +
177 candidates = ['.'.join(modulename.split('.')[:-level]) +
177 '.' + name]
178 '.' + name]
178
179
179 for n in candidates:
180 for n in candidates:
180 if n in localmods:
181 if n in localmods:
181 return (n, n, False)
182 return (n, n, False)
182 dottedpath = n + '.__init__'
183 dottedpath = n + '.__init__'
183 if dottedpath in localmods:
184 if dottedpath in localmods:
184 return (n, dottedpath, True)
185 return (n, dottedpath, True)
185 return False
186 return False
186 return fromlocal
187 return fromlocal
187
188
188 def populateextmods(localmods):
189 def populateextmods(localmods):
189 """Populate C extension modules based on pure modules"""
190 """Populate C extension modules based on pure modules"""
190 newlocalmods = set(localmods)
191 newlocalmods = set(localmods)
191 for n in localmods:
192 for n in localmods:
192 if n.startswith('mercurial.pure.'):
193 if n.startswith('mercurial.pure.'):
193 m = n[len('mercurial.pure.'):]
194 m = n[len('mercurial.pure.'):]
194 newlocalmods.add('mercurial.cext.' + m)
195 newlocalmods.add('mercurial.cext.' + m)
195 newlocalmods.add('mercurial.cffi._' + m)
196 newlocalmods.add('mercurial.cffi._' + m)
196 return newlocalmods
197 return newlocalmods
197
198
198 def list_stdlib_modules():
199 def list_stdlib_modules():
199 """List the modules present in the stdlib.
200 """List the modules present in the stdlib.
200
201
201 >>> py3 = sys.version_info[0] >= 3
202 >>> py3 = sys.version_info[0] >= 3
202 >>> mods = set(list_stdlib_modules())
203 >>> mods = set(list_stdlib_modules())
203 >>> 'BaseHTTPServer' in mods or py3
204 >>> 'BaseHTTPServer' in mods or py3
204 True
205 True
205
206
206 os.path isn't really a module, so it's missing:
207 os.path isn't really a module, so it's missing:
207
208
208 >>> 'os.path' in mods
209 >>> 'os.path' in mods
209 False
210 False
210
211
211 sys requires special treatment, because it's baked into the
212 sys requires special treatment, because it's baked into the
212 interpreter, but it should still appear:
213 interpreter, but it should still appear:
213
214
214 >>> 'sys' in mods
215 >>> 'sys' in mods
215 True
216 True
216
217
217 >>> 'collections' in mods
218 >>> 'collections' in mods
218 True
219 True
219
220
220 >>> 'cStringIO' in mods or py3
221 >>> 'cStringIO' in mods or py3
221 True
222 True
222
223
223 >>> 'cffi' in mods
224 >>> 'cffi' in mods
224 True
225 True
225 """
226 """
226 for m in sys.builtin_module_names:
227 for m in sys.builtin_module_names:
227 yield m
228 yield m
228 # These modules only exist on windows, but we should always
229 # These modules only exist on windows, but we should always
229 # consider them stdlib.
230 # consider them stdlib.
230 for m in ['msvcrt', '_winreg']:
231 for m in ['msvcrt', '_winreg']:
231 yield m
232 yield m
232 yield '__builtin__'
233 yield '__builtin__'
233 yield 'builtins' # python3 only
234 yield 'builtins' # python3 only
234 yield 'importlib.abc' # python3 only
235 yield 'importlib.abc' # python3 only
235 yield 'importlib.machinery' # python3 only
236 yield 'importlib.machinery' # python3 only
236 yield 'importlib.util' # python3 only
237 yield 'importlib.util' # python3 only
237 for m in 'fcntl', 'grp', 'pwd', 'termios': # Unix only
238 for m in 'fcntl', 'grp', 'pwd', 'termios': # Unix only
238 yield m
239 yield m
239 for m in 'cPickle', 'datetime': # in Python (not C) on PyPy
240 for m in 'cPickle', 'datetime': # in Python (not C) on PyPy
240 yield m
241 yield m
241 for m in ['cffi']:
242 for m in ['cffi']:
242 yield m
243 yield m
243 stdlib_prefixes = {sys.prefix, sys.exec_prefix}
244 stdlib_prefixes = {sys.prefix, sys.exec_prefix}
244 # We need to supplement the list of prefixes for the search to work
245 # We need to supplement the list of prefixes for the search to work
245 # when run from within a virtualenv.
246 # when run from within a virtualenv.
246 for mod in (basehttpserver, zlib):
247 for mod in (basehttpserver, zlib):
247 if mod is None:
248 if mod is None:
248 continue
249 continue
249 try:
250 try:
250 # Not all module objects have a __file__ attribute.
251 # Not all module objects have a __file__ attribute.
251 filename = mod.__file__
252 filename = mod.__file__
252 except AttributeError:
253 except AttributeError:
253 continue
254 continue
254 dirname = os.path.dirname(filename)
255 dirname = os.path.dirname(filename)
255 for prefix in stdlib_prefixes:
256 for prefix in stdlib_prefixes:
256 if dirname.startswith(prefix):
257 if dirname.startswith(prefix):
257 # Then this directory is redundant.
258 # Then this directory is redundant.
258 break
259 break
259 else:
260 else:
260 stdlib_prefixes.add(dirname)
261 stdlib_prefixes.add(dirname)
261 sourceroot = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
262 sourceroot = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
262 for libpath in sys.path:
263 for libpath in sys.path:
263 # We want to walk everything in sys.path that starts with something in
264 # We want to walk everything in sys.path that starts with something in
264 # stdlib_prefixes, but not directories from the hg sources.
265 # stdlib_prefixes, but not directories from the hg sources.
265 if (os.path.abspath(libpath).startswith(sourceroot)
266 if (os.path.abspath(libpath).startswith(sourceroot)
266 or not any(libpath.startswith(p) for p in stdlib_prefixes)):
267 or not any(libpath.startswith(p) for p in stdlib_prefixes)):
267 continue
268 continue
268 for top, dirs, files in os.walk(libpath):
269 for top, dirs, files in os.walk(libpath):
269 for i, d in reversed(list(enumerate(dirs))):
270 for i, d in reversed(list(enumerate(dirs))):
270 if (not os.path.exists(os.path.join(top, d, '__init__.py'))
271 if (not os.path.exists(os.path.join(top, d, '__init__.py'))
271 or top == libpath and d in ('hgdemandimport', 'hgext',
272 or top == libpath and d in ('hgdemandimport', 'hgext',
272 'mercurial')):
273 'mercurial')):
273 del dirs[i]
274 del dirs[i]
274 for name in files:
275 for name in files:
275 if not name.endswith(('.py', '.so', '.pyc', '.pyo', '.pyd')):
276 if not name.endswith(('.py', '.so', '.pyc', '.pyo', '.pyd')):
276 continue
277 continue
277 if name.startswith('__init__.py'):
278 if name.startswith('__init__.py'):
278 full_path = top
279 full_path = top
279 else:
280 else:
280 full_path = os.path.join(top, name)
281 full_path = os.path.join(top, name)
281 rel_path = full_path[len(libpath) + 1:]
282 rel_path = full_path[len(libpath) + 1:]
282 mod = dotted_name_of_path(rel_path)
283 mod = dotted_name_of_path(rel_path)
283 yield mod
284 yield mod
284
285
285 stdlib_modules = set(list_stdlib_modules())
286 stdlib_modules = set(list_stdlib_modules())
286
287
287 def imported_modules(source, modulename, f, localmods, ignore_nested=False):
288 def imported_modules(source, modulename, f, localmods, ignore_nested=False):
288 """Given the source of a file as a string, yield the names
289 """Given the source of a file as a string, yield the names
289 imported by that file.
290 imported by that file.
290
291
291 Args:
292 Args:
292 source: The python source to examine as a string.
293 source: The python source to examine as a string.
293 modulename: of specified python source (may have `__init__`)
294 modulename: of specified python source (may have `__init__`)
294 localmods: set of locally defined module names (may have `__init__`)
295 localmods: set of locally defined module names (may have `__init__`)
295 ignore_nested: If true, import statements that do not start in
296 ignore_nested: If true, import statements that do not start in
296 column zero will be ignored.
297 column zero will be ignored.
297
298
298 Returns:
299 Returns:
299 A list of absolute module names imported by the given source.
300 A list of absolute module names imported by the given source.
300
301
301 >>> f = 'foo/xxx.py'
302 >>> f = 'foo/xxx.py'
302 >>> modulename = 'foo.xxx'
303 >>> modulename = 'foo.xxx'
303 >>> localmods = {'foo.__init__': True,
304 >>> localmods = {'foo.__init__': True,
304 ... 'foo.foo1': True, 'foo.foo2': True,
305 ... 'foo.foo1': True, 'foo.foo2': True,
305 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
306 ... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
306 ... 'baz.__init__': True, 'baz.baz1': True }
307 ... 'baz.__init__': True, 'baz.baz1': True }
307 >>> # standard library (= not locally defined ones)
308 >>> # standard library (= not locally defined ones)
308 >>> sorted(imported_modules(
309 >>> sorted(imported_modules(
309 ... 'from stdlib1 import foo, bar; import stdlib2',
310 ... 'from stdlib1 import foo, bar; import stdlib2',
310 ... modulename, f, localmods))
311 ... modulename, f, localmods))
311 []
312 []
312 >>> # relative importing
313 >>> # relative importing
313 >>> sorted(imported_modules(
314 >>> sorted(imported_modules(
314 ... 'import foo1; from bar import bar1',
315 ... 'import foo1; from bar import bar1',
315 ... modulename, f, localmods))
316 ... modulename, f, localmods))
316 ['foo.bar.bar1', 'foo.foo1']
317 ['foo.bar.bar1', 'foo.foo1']
317 >>> sorted(imported_modules(
318 >>> sorted(imported_modules(
318 ... 'from bar.bar1 import name1, name2, name3',
319 ... 'from bar.bar1 import name1, name2, name3',
319 ... modulename, f, localmods))
320 ... modulename, f, localmods))
320 ['foo.bar.bar1']
321 ['foo.bar.bar1']
321 >>> # absolute importing
322 >>> # absolute importing
322 >>> sorted(imported_modules(
323 >>> sorted(imported_modules(
323 ... 'from baz import baz1, name1',
324 ... 'from baz import baz1, name1',
324 ... modulename, f, localmods))
325 ... modulename, f, localmods))
325 ['baz.__init__', 'baz.baz1']
326 ['baz.__init__', 'baz.baz1']
326 >>> # mixed importing, even though it shouldn't be recommended
327 >>> # mixed importing, even though it shouldn't be recommended
327 >>> sorted(imported_modules(
328 >>> sorted(imported_modules(
328 ... 'import stdlib, foo1, baz',
329 ... 'import stdlib, foo1, baz',
329 ... modulename, f, localmods))
330 ... modulename, f, localmods))
330 ['baz.__init__', 'foo.foo1']
331 ['baz.__init__', 'foo.foo1']
331 >>> # ignore_nested
332 >>> # ignore_nested
332 >>> sorted(imported_modules(
333 >>> sorted(imported_modules(
333 ... '''import foo
334 ... '''import foo
334 ... def wat():
335 ... def wat():
335 ... import bar
336 ... import bar
336 ... ''', modulename, f, localmods))
337 ... ''', modulename, f, localmods))
337 ['foo.__init__', 'foo.bar.__init__']
338 ['foo.__init__', 'foo.bar.__init__']
338 >>> sorted(imported_modules(
339 >>> sorted(imported_modules(
339 ... '''import foo
340 ... '''import foo
340 ... def wat():
341 ... def wat():
341 ... import bar
342 ... import bar
342 ... ''', modulename, f, localmods, ignore_nested=True))
343 ... ''', modulename, f, localmods, ignore_nested=True))
343 ['foo.__init__']
344 ['foo.__init__']
344 """
345 """
345 fromlocal = fromlocalfunc(modulename, localmods)
346 fromlocal = fromlocalfunc(modulename, localmods)
346 for node in ast.walk(ast.parse(source, f)):
347 for node in ast.walk(ast.parse(source, f)):
347 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
348 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
348 continue
349 continue
349 if isinstance(node, ast.Import):
350 if isinstance(node, ast.Import):
350 for n in node.names:
351 for n in node.names:
351 found = fromlocal(n.name)
352 found = fromlocal(n.name)
352 if not found:
353 if not found:
353 # this should import standard library
354 # this should import standard library
354 continue
355 continue
355 yield found[1]
356 yield found[1]
356 elif isinstance(node, ast.ImportFrom):
357 elif isinstance(node, ast.ImportFrom):
357 found = fromlocal(node.module, node.level)
358 found = fromlocal(node.module, node.level)
358 if not found:
359 if not found:
359 # this should import standard library
360 # this should import standard library
360 continue
361 continue
361
362
362 absname, dottedpath, hassubmod = found
363 absname, dottedpath, hassubmod = found
363 if not hassubmod:
364 if not hassubmod:
364 # "dottedpath" is not a package; must be imported
365 # "dottedpath" is not a package; must be imported
365 yield dottedpath
366 yield dottedpath
366 # examination of "node.names" should be redundant
367 # examination of "node.names" should be redundant
367 # e.g.: from mercurial.node import nullid, nullrev
368 # e.g.: from mercurial.node import nullid, nullrev
368 continue
369 continue
369
370
370 modnotfound = False
371 modnotfound = False
371 prefix = absname + '.'
372 prefix = absname + '.'
372 for n in node.names:
373 for n in node.names:
373 found = fromlocal(prefix + n.name)
374 found = fromlocal(prefix + n.name)
374 if not found:
375 if not found:
375 # this should be a function or a property of "node.module"
376 # this should be a function or a property of "node.module"
376 modnotfound = True
377 modnotfound = True
377 continue
378 continue
378 yield found[1]
379 yield found[1]
379 if modnotfound:
380 if modnotfound:
380 # "dottedpath" is a package, but imported because of non-module
381 # "dottedpath" is a package, but imported because of non-module
381 # lookup
382 # lookup
382 yield dottedpath
383 yield dottedpath
383
384
384 def verify_import_convention(module, source, localmods):
385 def verify_import_convention(module, source, localmods):
385 """Verify imports match our established coding convention.
386 """Verify imports match our established coding convention.
386
387
387 We have 2 conventions: legacy and modern. The modern convention is in
388 We have 2 conventions: legacy and modern. The modern convention is in
388 effect when using absolute imports.
389 effect when using absolute imports.
389
390
390 The legacy convention only looks for mixed imports. The modern convention
391 The legacy convention only looks for mixed imports. The modern convention
391 is much more thorough.
392 is much more thorough.
392 """
393 """
393 root = ast.parse(source)
394 root = ast.parse(source)
394 absolute = usingabsolute(root)
395 absolute = usingabsolute(root)
395
396
396 if absolute:
397 if absolute:
397 return verify_modern_convention(module, root, localmods)
398 return verify_modern_convention(module, root, localmods)
398 else:
399 else:
399 return verify_stdlib_on_own_line(root)
400 return verify_stdlib_on_own_line(root)
400
401
401 def verify_modern_convention(module, root, localmods, root_col_offset=0):
402 def verify_modern_convention(module, root, localmods, root_col_offset=0):
402 """Verify a file conforms to the modern import convention rules.
403 """Verify a file conforms to the modern import convention rules.
403
404
404 The rules of the modern convention are:
405 The rules of the modern convention are:
405
406
406 * Ordering is stdlib followed by local imports. Each group is lexically
407 * Ordering is stdlib followed by local imports. Each group is lexically
407 sorted.
408 sorted.
408 * Importing multiple modules via "import X, Y" is not allowed: use
409 * Importing multiple modules via "import X, Y" is not allowed: use
409 separate import statements.
410 separate import statements.
410 * Importing multiple modules via "from X import ..." is allowed if using
411 * Importing multiple modules via "from X import ..." is allowed if using
411 parenthesis and one entry per line.
412 parenthesis and one entry per line.
412 * Only 1 relative import statement per import level ("from .", "from ..")
413 * Only 1 relative import statement per import level ("from .", "from ..")
413 is allowed.
414 is allowed.
414 * Relative imports from higher levels must occur before lower levels. e.g.
415 * Relative imports from higher levels must occur before lower levels. e.g.
415 "from .." must be before "from .".
416 "from .." must be before "from .".
416 * Imports from peer packages should use relative import (e.g. do not
417 * Imports from peer packages should use relative import (e.g. do not
417 "import mercurial.foo" from a "mercurial.*" module).
418 "import mercurial.foo" from a "mercurial.*" module).
418 * Symbols can only be imported from specific modules (see
419 * Symbols can only be imported from specific modules (see
419 `allowsymbolimports`). For other modules, first import the module then
420 `allowsymbolimports`). For other modules, first import the module then
420 assign the symbol to a module-level variable. In addition, these imports
421 assign the symbol to a module-level variable. In addition, these imports
421 must be performed before other local imports. This rule only
422 must be performed before other local imports. This rule only
422 applies to import statements outside of any blocks.
423 applies to import statements outside of any blocks.
423 * Relative imports from the standard library are not allowed, unless that
424 * Relative imports from the standard library are not allowed, unless that
424 library is also a local module.
425 library is also a local module.
425 * Certain modules must be aliased to alternate names to avoid aliasing
426 * Certain modules must be aliased to alternate names to avoid aliasing
426 and readability problems. See `requirealias`.
427 and readability problems. See `requirealias`.
427 """
428 """
428 if not isinstance(module, str):
429 if not isinstance(module, str):
429 module = module.decode('ascii')
430 module = module.decode('ascii')
430 topmodule = module.split('.')[0]
431 topmodule = module.split('.')[0]
431 fromlocal = fromlocalfunc(module, localmods)
432 fromlocal = fromlocalfunc(module, localmods)
432
433
433 # Whether a local/non-stdlib import has been performed.
434 # Whether a local/non-stdlib import has been performed.
434 seenlocal = None
435 seenlocal = None
435 # Whether a local/non-stdlib, non-symbol import has been seen.
436 # Whether a local/non-stdlib, non-symbol import has been seen.
436 seennonsymbollocal = False
437 seennonsymbollocal = False
437 # The last name to be imported (for sorting).
438 # The last name to be imported (for sorting).
438 lastname = None
439 lastname = None
439 laststdlib = None
440 laststdlib = None
440 # Relative import levels encountered so far.
441 # Relative import levels encountered so far.
441 seenlevels = set()
442 seenlevels = set()
442
443
443 for node, newscope in walklocal(root):
444 for node, newscope in walklocal(root):
444 def msg(fmt, *args):
445 def msg(fmt, *args):
445 return (fmt % args, node.lineno)
446 return (fmt % args, node.lineno)
446 if newscope:
447 if newscope:
447 # Check for local imports in function
448 # Check for local imports in function
448 for r in verify_modern_convention(module, node, localmods,
449 for r in verify_modern_convention(module, node, localmods,
449 node.col_offset + 4):
450 node.col_offset + 4):
450 yield r
451 yield r
451 elif isinstance(node, ast.Import):
452 elif isinstance(node, ast.Import):
452 # Disallow "import foo, bar" and require separate imports
453 # Disallow "import foo, bar" and require separate imports
453 # for each module.
454 # for each module.
454 if len(node.names) > 1:
455 if len(node.names) > 1:
455 yield msg('multiple imported names: %s',
456 yield msg('multiple imported names: %s',
456 ', '.join(n.name for n in node.names))
457 ', '.join(n.name for n in node.names))
457
458
458 name = node.names[0].name
459 name = node.names[0].name
459 asname = node.names[0].asname
460 asname = node.names[0].asname
460
461
461 stdlib = name in stdlib_modules
462 stdlib = name in stdlib_modules
462
463
463 # Ignore sorting rules on imports inside blocks.
464 # Ignore sorting rules on imports inside blocks.
464 if node.col_offset == root_col_offset:
465 if node.col_offset == root_col_offset:
465 if lastname and name < lastname and laststdlib == stdlib:
466 if lastname and name < lastname and laststdlib == stdlib:
466 yield msg('imports not lexically sorted: %s < %s',
467 yield msg('imports not lexically sorted: %s < %s',
467 name, lastname)
468 name, lastname)
468
469
469 lastname = name
470 lastname = name
470 laststdlib = stdlib
471 laststdlib = stdlib
471
472
472 # stdlib imports should be before local imports.
473 # stdlib imports should be before local imports.
473 if stdlib and seenlocal and node.col_offset == root_col_offset:
474 if stdlib and seenlocal and node.col_offset == root_col_offset:
474 yield msg('stdlib import "%s" follows local import: %s',
475 yield msg('stdlib import "%s" follows local import: %s',
475 name, seenlocal)
476 name, seenlocal)
476
477
477 if not stdlib:
478 if not stdlib:
478 seenlocal = name
479 seenlocal = name
479
480
480 # Import of sibling modules should use relative imports.
481 # Import of sibling modules should use relative imports.
481 topname = name.split('.')[0]
482 topname = name.split('.')[0]
482 if topname == topmodule:
483 if topname == topmodule:
483 yield msg('import should be relative: %s', name)
484 yield msg('import should be relative: %s', name)
484
485
485 if name in requirealias and asname != requirealias[name]:
486 if name in requirealias and asname != requirealias[name]:
486 yield msg('%s module must be "as" aliased to %s',
487 yield msg('%s module must be "as" aliased to %s',
487 name, requirealias[name])
488 name, requirealias[name])
488
489
489 elif isinstance(node, ast.ImportFrom):
490 elif isinstance(node, ast.ImportFrom):
490 # Resolve the full imported module name.
491 # Resolve the full imported module name.
491 if node.level > 0:
492 if node.level > 0:
492 fullname = '.'.join(module.split('.')[:-node.level])
493 fullname = '.'.join(module.split('.')[:-node.level])
493 if node.module:
494 if node.module:
494 fullname += '.%s' % node.module
495 fullname += '.%s' % node.module
495 else:
496 else:
496 assert node.module
497 assert node.module
497 fullname = node.module
498 fullname = node.module
498
499
499 topname = fullname.split('.')[0]
500 topname = fullname.split('.')[0]
500 if topname == topmodule:
501 if topname == topmodule:
501 yield msg('import should be relative: %s', fullname)
502 yield msg('import should be relative: %s', fullname)
502
503
503 # __future__ is special since it needs to come first and use
504 # __future__ is special since it needs to come first and use
504 # symbol import.
505 # symbol import.
505 if fullname != '__future__':
506 if fullname != '__future__':
506 if not fullname or (
507 if not fullname or (
507 fullname in stdlib_modules
508 fullname in stdlib_modules
508 and fullname not in localmods
509 and fullname not in localmods
509 and fullname + '.__init__' not in localmods):
510 and fullname + '.__init__' not in localmods):
510 yield msg('relative import of stdlib module')
511 yield msg('relative import of stdlib module')
511 else:
512 else:
512 seenlocal = fullname
513 seenlocal = fullname
513
514
514 # Direct symbol import is only allowed from certain modules and
515 # Direct symbol import is only allowed from certain modules and
515 # must occur before non-symbol imports.
516 # must occur before non-symbol imports.
516 found = fromlocal(node.module, node.level)
517 found = fromlocal(node.module, node.level)
517 if found and found[2]: # node.module is a package
518 if found and found[2]: # node.module is a package
518 prefix = found[0] + '.'
519 prefix = found[0] + '.'
519 symbols = (n.name for n in node.names
520 symbols = (n.name for n in node.names
520 if not fromlocal(prefix + n.name))
521 if not fromlocal(prefix + n.name))
521 else:
522 else:
522 symbols = (n.name for n in node.names)
523 symbols = (n.name for n in node.names)
523 symbols = [sym for sym in symbols if sym not in directsymbols]
524 symbols = [sym for sym in symbols if sym not in directsymbols]
524 if node.module and node.col_offset == root_col_offset:
525 if node.module and node.col_offset == root_col_offset:
525 if symbols and fullname not in allowsymbolimports:
526 if symbols and fullname not in allowsymbolimports:
526 yield msg('direct symbol import %s from %s',
527 yield msg('direct symbol import %s from %s',
527 ', '.join(symbols), fullname)
528 ', '.join(symbols), fullname)
528
529
529 if symbols and seennonsymbollocal:
530 if symbols and seennonsymbollocal:
530 yield msg('symbol import follows non-symbol import: %s',
531 yield msg('symbol import follows non-symbol import: %s',
531 fullname)
532 fullname)
532 if not symbols and fullname not in stdlib_modules:
533 if not symbols and fullname not in stdlib_modules:
533 seennonsymbollocal = True
534 seennonsymbollocal = True
534
535
535 if not node.module:
536 if not node.module:
536 assert node.level
537 assert node.level
537
538
538 # Only allow 1 group per level.
539 # Only allow 1 group per level.
539 if (node.level in seenlevels
540 if (node.level in seenlevels
540 and node.col_offset == root_col_offset):
541 and node.col_offset == root_col_offset):
541 yield msg('multiple "from %s import" statements',
542 yield msg('multiple "from %s import" statements',
542 '.' * node.level)
543 '.' * node.level)
543
544
544 # Higher-level groups come before lower-level groups.
545 # Higher-level groups come before lower-level groups.
545 if any(node.level > l for l in seenlevels):
546 if any(node.level > l for l in seenlevels):
546 yield msg('higher-level import should come first: %s',
547 yield msg('higher-level import should come first: %s',
547 fullname)
548 fullname)
548
549
549 seenlevels.add(node.level)
550 seenlevels.add(node.level)
550
551
551 # Entries in "from .X import ( ... )" lists must be lexically
552 # Entries in "from .X import ( ... )" lists must be lexically
552 # sorted.
553 # sorted.
553 lastentryname = None
554 lastentryname = None
554
555
555 for n in node.names:
556 for n in node.names:
556 if lastentryname and n.name < lastentryname:
557 if lastentryname and n.name < lastentryname:
557 yield msg('imports from %s not lexically sorted: %s < %s',
558 yield msg('imports from %s not lexically sorted: %s < %s',
558 fullname, n.name, lastentryname)
559 fullname, n.name, lastentryname)
559
560
560 lastentryname = n.name
561 lastentryname = n.name
561
562
562 if n.name in requirealias and n.asname != requirealias[n.name]:
563 if n.name in requirealias and n.asname != requirealias[n.name]:
563 yield msg('%s from %s must be "as" aliased to %s',
564 yield msg('%s from %s must be "as" aliased to %s',
564 n.name, fullname, requirealias[n.name])
565 n.name, fullname, requirealias[n.name])
565
566
566 def verify_stdlib_on_own_line(root):
567 def verify_stdlib_on_own_line(root):
567 """Given some python source, verify that stdlib imports are done
568 """Given some python source, verify that stdlib imports are done
568 in separate statements from relative local module imports.
569 in separate statements from relative local module imports.
569
570
570 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, foo')))
571 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, foo')))
571 [('mixed imports\\n stdlib: sys\\n relative: foo', 1)]
572 [('mixed imports\\n stdlib: sys\\n relative: foo', 1)]
572 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, os')))
573 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, os')))
573 []
574 []
574 >>> list(verify_stdlib_on_own_line(ast.parse('import foo, bar')))
575 >>> list(verify_stdlib_on_own_line(ast.parse('import foo, bar')))
575 []
576 []
576 """
577 """
577 for node in ast.walk(root):
578 for node in ast.walk(root):
578 if isinstance(node, ast.Import):
579 if isinstance(node, ast.Import):
579 from_stdlib = {False: [], True: []}
580 from_stdlib = {False: [], True: []}
580 for n in node.names:
581 for n in node.names:
581 from_stdlib[n.name in stdlib_modules].append(n.name)
582 from_stdlib[n.name in stdlib_modules].append(n.name)
582 if from_stdlib[True] and from_stdlib[False]:
583 if from_stdlib[True] and from_stdlib[False]:
583 yield ('mixed imports\n stdlib: %s\n relative: %s' %
584 yield ('mixed imports\n stdlib: %s\n relative: %s' %
584 (', '.join(sorted(from_stdlib[True])),
585 (', '.join(sorted(from_stdlib[True])),
585 ', '.join(sorted(from_stdlib[False]))), node.lineno)
586 ', '.join(sorted(from_stdlib[False]))), node.lineno)
586
587
587 class CircularImport(Exception):
588 class CircularImport(Exception):
588 pass
589 pass
589
590
590 def checkmod(mod, imports):
591 def checkmod(mod, imports):
591 shortest = {}
592 shortest = {}
592 visit = [[mod]]
593 visit = [[mod]]
593 while visit:
594 while visit:
594 path = visit.pop(0)
595 path = visit.pop(0)
595 for i in sorted(imports.get(path[-1], [])):
596 for i in sorted(imports.get(path[-1], [])):
596 if len(path) < shortest.get(i, 1000):
597 if len(path) < shortest.get(i, 1000):
597 shortest[i] = len(path)
598 shortest[i] = len(path)
598 if i in path:
599 if i in path:
599 if i == path[0]:
600 if i == path[0]:
600 raise CircularImport(path)
601 raise CircularImport(path)
601 continue
602 continue
602 visit.append(path + [i])
603 visit.append(path + [i])
603
604
604 def rotatecycle(cycle):
605 def rotatecycle(cycle):
605 """arrange a cycle so that the lexicographically first module listed first
606 """arrange a cycle so that the lexicographically first module listed first
606
607
607 >>> rotatecycle(['foo', 'bar'])
608 >>> rotatecycle(['foo', 'bar'])
608 ['bar', 'foo', 'bar']
609 ['bar', 'foo', 'bar']
609 """
610 """
610 lowest = min(cycle)
611 lowest = min(cycle)
611 idx = cycle.index(lowest)
612 idx = cycle.index(lowest)
612 return cycle[idx:] + cycle[:idx] + [lowest]
613 return cycle[idx:] + cycle[:idx] + [lowest]
613
614
614 def find_cycles(imports):
615 def find_cycles(imports):
615 """Find cycles in an already-loaded import graph.
616 """Find cycles in an already-loaded import graph.
616
617
617 All module names recorded in `imports` should be absolute one.
618 All module names recorded in `imports` should be absolute one.
618
619
619 >>> from __future__ import print_function
620 >>> from __future__ import print_function
620 >>> imports = {'top.foo': ['top.bar', 'os.path', 'top.qux'],
621 >>> imports = {'top.foo': ['top.bar', 'os.path', 'top.qux'],
621 ... 'top.bar': ['top.baz', 'sys'],
622 ... 'top.bar': ['top.baz', 'sys'],
622 ... 'top.baz': ['top.foo'],
623 ... 'top.baz': ['top.foo'],
623 ... 'top.qux': ['top.foo']}
624 ... 'top.qux': ['top.foo']}
624 >>> print('\\n'.join(sorted(find_cycles(imports))))
625 >>> print('\\n'.join(sorted(find_cycles(imports))))
625 top.bar -> top.baz -> top.foo -> top.bar
626 top.bar -> top.baz -> top.foo -> top.bar
626 top.foo -> top.qux -> top.foo
627 top.foo -> top.qux -> top.foo
627 """
628 """
628 cycles = set()
629 cycles = set()
629 for mod in sorted(imports.keys()):
630 for mod in sorted(imports.keys()):
630 try:
631 try:
631 checkmod(mod, imports)
632 checkmod(mod, imports)
632 except CircularImport as e:
633 except CircularImport as e:
633 cycle = e.args[0]
634 cycle = e.args[0]
634 cycles.add(" -> ".join(rotatecycle(cycle)))
635 cycles.add(" -> ".join(rotatecycle(cycle)))
635 return cycles
636 return cycles
636
637
637 def _cycle_sortkey(c):
638 def _cycle_sortkey(c):
638 return len(c), c
639 return len(c), c
639
640
640 def embedded(f, modname, src):
641 def embedded(f, modname, src):
641 """Extract embedded python code
642 """Extract embedded python code
642
643
643 >>> def _forcestr(thing):
644 >>> def _forcestr(thing):
644 ... if not isinstance(thing, str):
645 ... if not isinstance(thing, str):
645 ... return thing.decode('ascii')
646 ... return thing.decode('ascii')
646 ... return thing
647 ... return thing
647 >>> def test(fn, lines):
648 >>> def test(fn, lines):
648 ... for s, m, f, l in embedded(fn, b"example", lines):
649 ... for s, m, f, l in embedded(fn, b"example", lines):
649 ... print("%s %s %d" % (_forcestr(m), _forcestr(f), l))
650 ... print("%s %s %d" % (_forcestr(m), _forcestr(f), l))
650 ... print(repr(_forcestr(s)))
651 ... print(repr(_forcestr(s)))
651 >>> lines = [
652 >>> lines = [
652 ... 'comment',
653 ... 'comment',
653 ... ' >>> from __future__ import print_function',
654 ... ' >>> from __future__ import print_function',
654 ... " >>> ' multiline",
655 ... " >>> ' multiline",
655 ... " ... string'",
656 ... " ... string'",
656 ... ' ',
657 ... ' ',
657 ... 'comment',
658 ... 'comment',
658 ... ' $ cat > foo.py <<EOF',
659 ... ' $ cat > foo.py <<EOF',
659 ... ' > from __future__ import print_function',
660 ... ' > from __future__ import print_function',
660 ... ' > EOF',
661 ... ' > EOF',
661 ... ]
662 ... ]
662 >>> test(b"example.t", lines)
663 >>> test(b"example.t", lines)
663 example[2] doctest.py 1
664 example[2] doctest.py 1
664 "from __future__ import print_function\\n' multiline\\nstring'\\n\\n"
665 "from __future__ import print_function\\n' multiline\\nstring'\\n\\n"
665 example[8] foo.py 7
666 example[8] foo.py 7
666 'from __future__ import print_function\\n'
667 'from __future__ import print_function\\n'
667 """
668 """
668 errors = []
669 errors = []
669 for name, starts, ends, code in testparseutil.pyembedded(f, src, errors):
670 for name, starts, ends, code in testparseutil.pyembedded(f, src, errors):
670 if not name:
671 if not name:
671 # use 'doctest.py', in order to make already existing
672 # use 'doctest.py', in order to make already existing
672 # doctest above pass instantly
673 # doctest above pass instantly
673 name = 'doctest.py'
674 name = 'doctest.py'
674 # "starts" is "line number" (1-origin), but embedded() is
675 # "starts" is "line number" (1-origin), but embedded() is
675 # expected to return "line offset" (0-origin). Therefore, this
676 # expected to return "line offset" (0-origin). Therefore, this
676 # yields "starts - 1".
677 # yields "starts - 1".
677 if not isinstance(modname, str):
678 if not isinstance(modname, str):
678 modname = modname.decode('utf8')
679 modname = modname.decode('utf8')
679 yield code, "%s[%d]" % (modname, starts), name, starts - 1
680 yield code, "%s[%d]" % (modname, starts), name, starts - 1
680
681
681 def sources(f, modname):
682 def sources(f, modname):
682 """Yields possibly multiple sources from a filepath
683 """Yields possibly multiple sources from a filepath
683
684
684 input: filepath, modulename
685 input: filepath, modulename
685 yields: script(string), modulename, filepath, linenumber
686 yields: script(string), modulename, filepath, linenumber
686
687
687 For embedded scripts, the modulename and filepath will be different
688 For embedded scripts, the modulename and filepath will be different
688 from the function arguments. linenumber is an offset relative to
689 from the function arguments. linenumber is an offset relative to
689 the input file.
690 the input file.
690 """
691 """
691 py = False
692 py = False
692 if not f.endswith('.t'):
693 if not f.endswith('.t'):
693 with open(f, 'rb') as src:
694 with open(f, 'rb') as src:
694 yield src.read(), modname, f, 0
695 yield src.read(), modname, f, 0
695 py = True
696 py = True
696 if py or f.endswith('.t'):
697 if py or f.endswith('.t'):
697 with open(f, 'r') as src:
698 with open(f, 'r') as src:
698 for script, modname, t, line in embedded(f, modname, src):
699 for script, modname, t, line in embedded(f, modname, src):
699 yield script, modname.encode('utf8'), t, line
700 yield script, modname.encode('utf8'), t, line
700
701
701 def main(argv):
702 def main(argv):
702 if len(argv) < 2 or (argv[1] == '-' and len(argv) > 2):
703 if len(argv) < 2 or (argv[1] == '-' and len(argv) > 2):
703 print('Usage: %s {-|file [file] [file] ...}')
704 print('Usage: %s {-|file [file] [file] ...}')
704 return 1
705 return 1
705 if argv[1] == '-':
706 if argv[1] == '-':
706 argv = argv[:1]
707 argv = argv[:1]
707 argv.extend(l.rstrip() for l in sys.stdin.readlines())
708 argv.extend(l.rstrip() for l in sys.stdin.readlines())
708 localmodpaths = {}
709 localmodpaths = {}
709 used_imports = {}
710 used_imports = {}
710 any_errors = False
711 any_errors = False
711 for source_path in argv[1:]:
712 for source_path in argv[1:]:
712 modname = dotted_name_of_path(source_path)
713 modname = dotted_name_of_path(source_path)
713 localmodpaths[modname] = source_path
714 localmodpaths[modname] = source_path
714 localmods = populateextmods(localmodpaths)
715 localmods = populateextmods(localmodpaths)
715 for localmodname, source_path in sorted(localmodpaths.items()):
716 for localmodname, source_path in sorted(localmodpaths.items()):
716 if not isinstance(localmodname, bytes):
717 if not isinstance(localmodname, bytes):
717 # This is only safe because all hg's files are ascii
718 # This is only safe because all hg's files are ascii
718 localmodname = localmodname.encode('ascii')
719 localmodname = localmodname.encode('ascii')
719 for src, modname, name, line in sources(source_path, localmodname):
720 for src, modname, name, line in sources(source_path, localmodname):
720 try:
721 try:
721 used_imports[modname] = sorted(
722 used_imports[modname] = sorted(
722 imported_modules(src, modname, name, localmods,
723 imported_modules(src, modname, name, localmods,
723 ignore_nested=True))
724 ignore_nested=True))
724 for error, lineno in verify_import_convention(modname, src,
725 for error, lineno in verify_import_convention(modname, src,
725 localmods):
726 localmods):
726 any_errors = True
727 any_errors = True
727 print('%s:%d: %s' % (source_path, lineno + line, error))
728 print('%s:%d: %s' % (source_path, lineno + line, error))
728 except SyntaxError as e:
729 except SyntaxError as e:
729 print('%s:%d: SyntaxError: %s' %
730 print('%s:%d: SyntaxError: %s' %
730 (source_path, e.lineno + line, e))
731 (source_path, e.lineno + line, e))
731 cycles = find_cycles(used_imports)
732 cycles = find_cycles(used_imports)
732 if cycles:
733 if cycles:
733 firstmods = set()
734 firstmods = set()
734 for c in sorted(cycles, key=_cycle_sortkey):
735 for c in sorted(cycles, key=_cycle_sortkey):
735 first = c.split()[0]
736 first = c.split()[0]
736 # As a rough cut, ignore any cycle that starts with the
737 # As a rough cut, ignore any cycle that starts with the
737 # same module as some other cycle. Otherwise we see lots
738 # same module as some other cycle. Otherwise we see lots
738 # of cycles that are effectively duplicates.
739 # of cycles that are effectively duplicates.
739 if first in firstmods:
740 if first in firstmods:
740 continue
741 continue
741 print('Import cycle:', c)
742 print('Import cycle:', c)
742 firstmods.add(first)
743 firstmods.add(first)
743 any_errors = True
744 any_errors = True
744 return any_errors != 0
745 return any_errors != 0
745
746
746 if __name__ == '__main__':
747 if __name__ == '__main__':
747 sys.exit(int(main(sys.argv)))
748 sys.exit(int(main(sys.argv)))
@@ -1,2702 +1,2704 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .revlogutils.constants import (
38 from .revlogutils.constants import (
39 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
40 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
41 REVIDX_DEFAULT_FLAGS,
42 REVIDX_ELLIPSIS,
43 REVIDX_EXTSTORED,
44 REVIDX_FLAGS_ORDER,
45 REVIDX_ISCENSORED,
46 REVIDX_KNOWN_FLAGS,
47 REVIDX_RAWTEXT_CHANGING_FLAGS,
48 REVLOGV0,
41 REVLOGV0,
49 REVLOGV1,
42 REVLOGV1,
50 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
51 REVLOGV2,
44 REVLOGV2,
52 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
56 )
49 )
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_ISCENSORED,
56 REVIDX_KNOWN_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 )
57 from .thirdparty import (
59 from .thirdparty import (
58 attr,
60 attr,
59 )
61 )
60 from . import (
62 from . import (
61 ancestor,
63 ancestor,
62 dagop,
64 dagop,
63 error,
65 error,
64 mdiff,
66 mdiff,
65 policy,
67 policy,
66 pycompat,
68 pycompat,
67 repository,
69 repository,
68 templatefilters,
70 templatefilters,
69 util,
71 util,
70 )
72 )
71 from .revlogutils import (
73 from .revlogutils import (
72 deltas as deltautil,
74 deltas as deltautil,
73 )
75 )
74 from .utils import (
76 from .utils import (
75 interfaceutil,
77 interfaceutil,
76 storageutil,
78 storageutil,
77 stringutil,
79 stringutil,
78 )
80 )
79
81
80 # blanked usage of all the name to prevent pyflakes constraints
82 # blanked usage of all the name to prevent pyflakes constraints
81 # We need these name available in the module for extensions.
83 # We need these name available in the module for extensions.
82 REVLOGV0
84 REVLOGV0
83 REVLOGV1
85 REVLOGV1
84 REVLOGV2
86 REVLOGV2
85 FLAG_INLINE_DATA
87 FLAG_INLINE_DATA
86 FLAG_GENERALDELTA
88 FLAG_GENERALDELTA
87 REVLOG_DEFAULT_FLAGS
89 REVLOG_DEFAULT_FLAGS
88 REVLOG_DEFAULT_FORMAT
90 REVLOG_DEFAULT_FORMAT
89 REVLOG_DEFAULT_VERSION
91 REVLOG_DEFAULT_VERSION
90 REVLOGV1_FLAGS
92 REVLOGV1_FLAGS
91 REVLOGV2_FLAGS
93 REVLOGV2_FLAGS
92 REVIDX_ISCENSORED
94 REVIDX_ISCENSORED
93 REVIDX_ELLIPSIS
95 REVIDX_ELLIPSIS
94 REVIDX_EXTSTORED
96 REVIDX_EXTSTORED
95 REVIDX_DEFAULT_FLAGS
97 REVIDX_DEFAULT_FLAGS
96 REVIDX_FLAGS_ORDER
98 REVIDX_FLAGS_ORDER
97 REVIDX_KNOWN_FLAGS
99 REVIDX_KNOWN_FLAGS
98 REVIDX_RAWTEXT_CHANGING_FLAGS
100 REVIDX_RAWTEXT_CHANGING_FLAGS
99
101
100 parsers = policy.importmod(r'parsers')
102 parsers = policy.importmod(r'parsers')
101 rustancestor = policy.importrust(r'ancestor')
103 rustancestor = policy.importrust(r'ancestor')
102 rustdagop = policy.importrust(r'dagop')
104 rustdagop = policy.importrust(r'dagop')
103
105
104 # Aliased for performance.
106 # Aliased for performance.
105 _zlibdecompress = zlib.decompress
107 _zlibdecompress = zlib.decompress
106
108
107 # max size of revlog with inline data
109 # max size of revlog with inline data
108 _maxinline = 131072
110 _maxinline = 131072
109 _chunksize = 1048576
111 _chunksize = 1048576
110
112
111 # Store flag processors (cf. 'addflagprocessor()' to register)
113 # Store flag processors (cf. 'addflagprocessor()' to register)
112 _flagprocessors = {
114 _flagprocessors = {
113 REVIDX_ISCENSORED: None,
115 REVIDX_ISCENSORED: None,
114 }
116 }
115
117
116 # Flag processors for REVIDX_ELLIPSIS.
118 # Flag processors for REVIDX_ELLIPSIS.
117 def ellipsisreadprocessor(rl, text):
119 def ellipsisreadprocessor(rl, text):
118 return text, False
120 return text, False
119
121
120 def ellipsiswriteprocessor(rl, text):
122 def ellipsiswriteprocessor(rl, text):
121 return text, False
123 return text, False
122
124
123 def ellipsisrawprocessor(rl, text):
125 def ellipsisrawprocessor(rl, text):
124 return False
126 return False
125
127
126 ellipsisprocessor = (
128 ellipsisprocessor = (
127 ellipsisreadprocessor,
129 ellipsisreadprocessor,
128 ellipsiswriteprocessor,
130 ellipsiswriteprocessor,
129 ellipsisrawprocessor,
131 ellipsisrawprocessor,
130 )
132 )
131
133
132 def addflagprocessor(flag, processor):
134 def addflagprocessor(flag, processor):
133 """Register a flag processor on a revision data flag.
135 """Register a flag processor on a revision data flag.
134
136
135 Invariant:
137 Invariant:
136 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
137 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
138 - Only one flag processor can be registered on a specific flag.
140 - Only one flag processor can be registered on a specific flag.
139 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
140 following signatures:
142 following signatures:
141 - (read) f(self, rawtext) -> text, bool
143 - (read) f(self, rawtext) -> text, bool
142 - (write) f(self, text) -> rawtext, bool
144 - (write) f(self, text) -> rawtext, bool
143 - (raw) f(self, rawtext) -> bool
145 - (raw) f(self, rawtext) -> bool
144 "text" is presented to the user. "rawtext" is stored in revlog data, not
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
145 directly visible to the user.
147 directly visible to the user.
146 The boolean returned by these transforms is used to determine whether
148 The boolean returned by these transforms is used to determine whether
147 the returned text can be used for hash integrity checking. For example,
149 the returned text can be used for hash integrity checking. For example,
148 if "write" returns False, then "text" is used to generate hash. If
150 if "write" returns False, then "text" is used to generate hash. If
149 "write" returns True, that basically means "rawtext" returned by "write"
151 "write" returns True, that basically means "rawtext" returned by "write"
150 should be used to generate hash. Usually, "write" and "read" return
152 should be used to generate hash. Usually, "write" and "read" return
151 different booleans. And "raw" returns a same boolean as "write".
153 different booleans. And "raw" returns a same boolean as "write".
152
154
153 Note: The 'raw' transform is used for changegroup generation and in some
155 Note: The 'raw' transform is used for changegroup generation and in some
154 debug commands. In this case the transform only indicates whether the
156 debug commands. In this case the transform only indicates whether the
155 contents can be used for hash integrity checks.
157 contents can be used for hash integrity checks.
156 """
158 """
157 _insertflagprocessor(flag, processor, _flagprocessors)
159 _insertflagprocessor(flag, processor, _flagprocessors)
158
160
159 def _insertflagprocessor(flag, processor, flagprocessors):
161 def _insertflagprocessor(flag, processor, flagprocessors):
160 if not flag & REVIDX_KNOWN_FLAGS:
162 if not flag & REVIDX_KNOWN_FLAGS:
161 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
162 raise error.ProgrammingError(msg)
164 raise error.ProgrammingError(msg)
163 if flag not in REVIDX_FLAGS_ORDER:
165 if flag not in REVIDX_FLAGS_ORDER:
164 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
165 raise error.ProgrammingError(msg)
167 raise error.ProgrammingError(msg)
166 if flag in flagprocessors:
168 if flag in flagprocessors:
167 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
168 raise error.Abort(msg)
170 raise error.Abort(msg)
169 flagprocessors[flag] = processor
171 flagprocessors[flag] = processor
170
172
171 def getoffset(q):
173 def getoffset(q):
172 return int(q >> 16)
174 return int(q >> 16)
173
175
174 def gettype(q):
176 def gettype(q):
175 return int(q & 0xFFFF)
177 return int(q & 0xFFFF)
176
178
177 def offset_type(offset, type):
179 def offset_type(offset, type):
178 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
179 raise ValueError('unknown revlog index flags')
181 raise ValueError('unknown revlog index flags')
180 return int(int(offset) << 16 | type)
182 return int(int(offset) << 16 | type)
181
183
182 @attr.s(slots=True, frozen=True)
184 @attr.s(slots=True, frozen=True)
183 class _revisioninfo(object):
185 class _revisioninfo(object):
184 """Information about a revision that allows building its fulltext
186 """Information about a revision that allows building its fulltext
185 node: expected hash of the revision
187 node: expected hash of the revision
186 p1, p2: parent revs of the revision
188 p1, p2: parent revs of the revision
187 btext: built text cache consisting of a one-element list
189 btext: built text cache consisting of a one-element list
188 cachedelta: (baserev, uncompressed_delta) or None
190 cachedelta: (baserev, uncompressed_delta) or None
189 flags: flags associated to the revision storage
191 flags: flags associated to the revision storage
190
192
191 One of btext[0] or cachedelta must be set.
193 One of btext[0] or cachedelta must be set.
192 """
194 """
193 node = attr.ib()
195 node = attr.ib()
194 p1 = attr.ib()
196 p1 = attr.ib()
195 p2 = attr.ib()
197 p2 = attr.ib()
196 btext = attr.ib()
198 btext = attr.ib()
197 textlen = attr.ib()
199 textlen = attr.ib()
198 cachedelta = attr.ib()
200 cachedelta = attr.ib()
199 flags = attr.ib()
201 flags = attr.ib()
200
202
201 @interfaceutil.implementer(repository.irevisiondelta)
203 @interfaceutil.implementer(repository.irevisiondelta)
202 @attr.s(slots=True)
204 @attr.s(slots=True)
203 class revlogrevisiondelta(object):
205 class revlogrevisiondelta(object):
204 node = attr.ib()
206 node = attr.ib()
205 p1node = attr.ib()
207 p1node = attr.ib()
206 p2node = attr.ib()
208 p2node = attr.ib()
207 basenode = attr.ib()
209 basenode = attr.ib()
208 flags = attr.ib()
210 flags = attr.ib()
209 baserevisionsize = attr.ib()
211 baserevisionsize = attr.ib()
210 revision = attr.ib()
212 revision = attr.ib()
211 delta = attr.ib()
213 delta = attr.ib()
212 linknode = attr.ib(default=None)
214 linknode = attr.ib(default=None)
213
215
214 @interfaceutil.implementer(repository.iverifyproblem)
216 @interfaceutil.implementer(repository.iverifyproblem)
215 @attr.s(frozen=True)
217 @attr.s(frozen=True)
216 class revlogproblem(object):
218 class revlogproblem(object):
217 warning = attr.ib(default=None)
219 warning = attr.ib(default=None)
218 error = attr.ib(default=None)
220 error = attr.ib(default=None)
219 node = attr.ib(default=None)
221 node = attr.ib(default=None)
220
222
221 # index v0:
223 # index v0:
222 # 4 bytes: offset
224 # 4 bytes: offset
223 # 4 bytes: compressed length
225 # 4 bytes: compressed length
224 # 4 bytes: base rev
226 # 4 bytes: base rev
225 # 4 bytes: link rev
227 # 4 bytes: link rev
226 # 20 bytes: parent 1 nodeid
228 # 20 bytes: parent 1 nodeid
227 # 20 bytes: parent 2 nodeid
229 # 20 bytes: parent 2 nodeid
228 # 20 bytes: nodeid
230 # 20 bytes: nodeid
229 indexformatv0 = struct.Struct(">4l20s20s20s")
231 indexformatv0 = struct.Struct(">4l20s20s20s")
230 indexformatv0_pack = indexformatv0.pack
232 indexformatv0_pack = indexformatv0.pack
231 indexformatv0_unpack = indexformatv0.unpack
233 indexformatv0_unpack = indexformatv0.unpack
232
234
233 class revlogoldindex(list):
235 class revlogoldindex(list):
234 def __getitem__(self, i):
236 def __getitem__(self, i):
235 if i == -1:
237 if i == -1:
236 return (0, 0, 0, -1, -1, -1, -1, nullid)
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
237 return list.__getitem__(self, i)
239 return list.__getitem__(self, i)
238
240
239 class revlogoldio(object):
241 class revlogoldio(object):
240 def __init__(self):
242 def __init__(self):
241 self.size = indexformatv0.size
243 self.size = indexformatv0.size
242
244
243 def parseindex(self, data, inline):
245 def parseindex(self, data, inline):
244 s = self.size
246 s = self.size
245 index = []
247 index = []
246 nodemap = {nullid: nullrev}
248 nodemap = {nullid: nullrev}
247 n = off = 0
249 n = off = 0
248 l = len(data)
250 l = len(data)
249 while off + s <= l:
251 while off + s <= l:
250 cur = data[off:off + s]
252 cur = data[off:off + s]
251 off += s
253 off += s
252 e = indexformatv0_unpack(cur)
254 e = indexformatv0_unpack(cur)
253 # transform to revlogv1 format
255 # transform to revlogv1 format
254 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
255 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
256 index.append(e2)
258 index.append(e2)
257 nodemap[e[6]] = n
259 nodemap[e[6]] = n
258 n += 1
260 n += 1
259
261
260 return revlogoldindex(index), nodemap, None
262 return revlogoldindex(index), nodemap, None
261
263
262 def packentry(self, entry, node, version, rev):
264 def packentry(self, entry, node, version, rev):
263 if gettype(entry[0]):
265 if gettype(entry[0]):
264 raise error.RevlogError(_('index entry flags need revlog '
266 raise error.RevlogError(_('index entry flags need revlog '
265 'version 1'))
267 'version 1'))
266 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
267 node(entry[5]), node(entry[6]), entry[7])
269 node(entry[5]), node(entry[6]), entry[7])
268 return indexformatv0_pack(*e2)
270 return indexformatv0_pack(*e2)
269
271
270 # index ng:
272 # index ng:
271 # 6 bytes: offset
273 # 6 bytes: offset
272 # 2 bytes: flags
274 # 2 bytes: flags
273 # 4 bytes: compressed length
275 # 4 bytes: compressed length
274 # 4 bytes: uncompressed length
276 # 4 bytes: uncompressed length
275 # 4 bytes: base rev
277 # 4 bytes: base rev
276 # 4 bytes: link rev
278 # 4 bytes: link rev
277 # 4 bytes: parent 1 rev
279 # 4 bytes: parent 1 rev
278 # 4 bytes: parent 2 rev
280 # 4 bytes: parent 2 rev
279 # 32 bytes: nodeid
281 # 32 bytes: nodeid
280 indexformatng = struct.Struct(">Qiiiiii20s12x")
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
281 indexformatng_pack = indexformatng.pack
283 indexformatng_pack = indexformatng.pack
282 versionformat = struct.Struct(">I")
284 versionformat = struct.Struct(">I")
283 versionformat_pack = versionformat.pack
285 versionformat_pack = versionformat.pack
284 versionformat_unpack = versionformat.unpack
286 versionformat_unpack = versionformat.unpack
285
287
286 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
287 # signed integer)
289 # signed integer)
288 _maxentrysize = 0x7fffffff
290 _maxentrysize = 0x7fffffff
289
291
290 class revlogio(object):
292 class revlogio(object):
291 def __init__(self):
293 def __init__(self):
292 self.size = indexformatng.size
294 self.size = indexformatng.size
293
295
294 def parseindex(self, data, inline):
296 def parseindex(self, data, inline):
295 # call the C implementation to parse the index data
297 # call the C implementation to parse the index data
296 index, cache = parsers.parse_index2(data, inline)
298 index, cache = parsers.parse_index2(data, inline)
297 return index, getattr(index, 'nodemap', None), cache
299 return index, getattr(index, 'nodemap', None), cache
298
300
299 def packentry(self, entry, node, version, rev):
301 def packentry(self, entry, node, version, rev):
300 p = indexformatng_pack(*entry)
302 p = indexformatng_pack(*entry)
301 if rev == 0:
303 if rev == 0:
302 p = versionformat_pack(version) + p[4:]
304 p = versionformat_pack(version) + p[4:]
303 return p
305 return p
304
306
305 class revlog(object):
307 class revlog(object):
306 """
308 """
307 the underlying revision storage object
309 the underlying revision storage object
308
310
309 A revlog consists of two parts, an index and the revision data.
311 A revlog consists of two parts, an index and the revision data.
310
312
311 The index is a file with a fixed record size containing
313 The index is a file with a fixed record size containing
312 information on each revision, including its nodeid (hash), the
314 information on each revision, including its nodeid (hash), the
313 nodeids of its parents, the position and offset of its data within
315 nodeids of its parents, the position and offset of its data within
314 the data file, and the revision it's based on. Finally, each entry
316 the data file, and the revision it's based on. Finally, each entry
315 contains a linkrev entry that can serve as a pointer to external
317 contains a linkrev entry that can serve as a pointer to external
316 data.
318 data.
317
319
318 The revision data itself is a linear collection of data chunks.
320 The revision data itself is a linear collection of data chunks.
319 Each chunk represents a revision and is usually represented as a
321 Each chunk represents a revision and is usually represented as a
320 delta against the previous chunk. To bound lookup time, runs of
322 delta against the previous chunk. To bound lookup time, runs of
321 deltas are limited to about 2 times the length of the original
323 deltas are limited to about 2 times the length of the original
322 version data. This makes retrieval of a version proportional to
324 version data. This makes retrieval of a version proportional to
323 its size, or O(1) relative to the number of revisions.
325 its size, or O(1) relative to the number of revisions.
324
326
325 Both pieces of the revlog are written to in an append-only
327 Both pieces of the revlog are written to in an append-only
326 fashion, which means we never need to rewrite a file to insert or
328 fashion, which means we never need to rewrite a file to insert or
327 remove data, and can use some simple techniques to avoid the need
329 remove data, and can use some simple techniques to avoid the need
328 for locking while reading.
330 for locking while reading.
329
331
330 If checkambig, indexfile is opened with checkambig=True at
332 If checkambig, indexfile is opened with checkambig=True at
331 writing, to avoid file stat ambiguity.
333 writing, to avoid file stat ambiguity.
332
334
333 If mmaplargeindex is True, and an mmapindexthreshold is set, the
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
334 index will be mmapped rather than read if it is larger than the
336 index will be mmapped rather than read if it is larger than the
335 configured threshold.
337 configured threshold.
336
338
337 If censorable is True, the revlog can have censored revisions.
339 If censorable is True, the revlog can have censored revisions.
338
340
339 If `upperboundcomp` is not None, this is the expected maximal gain from
341 If `upperboundcomp` is not None, this is the expected maximal gain from
340 compression for the data content.
342 compression for the data content.
341 """
343 """
342 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
344 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
343 mmaplargeindex=False, censorable=False,
345 mmaplargeindex=False, censorable=False,
344 upperboundcomp=None):
346 upperboundcomp=None):
345 """
347 """
346 create a revlog object
348 create a revlog object
347
349
348 opener is a function that abstracts the file opening operation
350 opener is a function that abstracts the file opening operation
349 and can be used to implement COW semantics or the like.
351 and can be used to implement COW semantics or the like.
350
352
351 """
353 """
352 self.upperboundcomp = upperboundcomp
354 self.upperboundcomp = upperboundcomp
353 self.indexfile = indexfile
355 self.indexfile = indexfile
354 self.datafile = datafile or (indexfile[:-2] + ".d")
356 self.datafile = datafile or (indexfile[:-2] + ".d")
355 self.opener = opener
357 self.opener = opener
356 # When True, indexfile is opened with checkambig=True at writing, to
358 # When True, indexfile is opened with checkambig=True at writing, to
357 # avoid file stat ambiguity.
359 # avoid file stat ambiguity.
358 self._checkambig = checkambig
360 self._checkambig = checkambig
359 self._mmaplargeindex = mmaplargeindex
361 self._mmaplargeindex = mmaplargeindex
360 self._censorable = censorable
362 self._censorable = censorable
361 # 3-tuple of (node, rev, text) for a raw revision.
363 # 3-tuple of (node, rev, text) for a raw revision.
362 self._revisioncache = None
364 self._revisioncache = None
363 # Maps rev to chain base rev.
365 # Maps rev to chain base rev.
364 self._chainbasecache = util.lrucachedict(100)
366 self._chainbasecache = util.lrucachedict(100)
365 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
367 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
366 self._chunkcache = (0, '')
368 self._chunkcache = (0, '')
367 # How much data to read and cache into the raw revlog data cache.
369 # How much data to read and cache into the raw revlog data cache.
368 self._chunkcachesize = 65536
370 self._chunkcachesize = 65536
369 self._maxchainlen = None
371 self._maxchainlen = None
370 self._deltabothparents = True
372 self._deltabothparents = True
371 self.index = []
373 self.index = []
372 # Mapping of partial identifiers to full nodes.
374 # Mapping of partial identifiers to full nodes.
373 self._pcache = {}
375 self._pcache = {}
374 # Mapping of revision integer to full node.
376 # Mapping of revision integer to full node.
375 self._nodecache = {nullid: nullrev}
377 self._nodecache = {nullid: nullrev}
376 self._nodepos = None
378 self._nodepos = None
377 self._compengine = 'zlib'
379 self._compengine = 'zlib'
378 self._compengineopts = {}
380 self._compengineopts = {}
379 self._maxdeltachainspan = -1
381 self._maxdeltachainspan = -1
380 self._withsparseread = False
382 self._withsparseread = False
381 self._sparserevlog = False
383 self._sparserevlog = False
382 self._srdensitythreshold = 0.50
384 self._srdensitythreshold = 0.50
383 self._srmingapsize = 262144
385 self._srmingapsize = 262144
384
386
385 # Make copy of flag processors so each revlog instance can support
387 # Make copy of flag processors so each revlog instance can support
386 # custom flags.
388 # custom flags.
387 self._flagprocessors = dict(_flagprocessors)
389 self._flagprocessors = dict(_flagprocessors)
388
390
389 # 2-tuple of file handles being used for active writing.
391 # 2-tuple of file handles being used for active writing.
390 self._writinghandles = None
392 self._writinghandles = None
391
393
392 self._loadindex()
394 self._loadindex()
393
395
394 def _loadindex(self):
396 def _loadindex(self):
395 mmapindexthreshold = None
397 mmapindexthreshold = None
396 opts = getattr(self.opener, 'options', {}) or {}
398 opts = getattr(self.opener, 'options', {}) or {}
397
399
398 if 'revlogv2' in opts:
400 if 'revlogv2' in opts:
399 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
401 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
400 elif 'revlogv1' in opts:
402 elif 'revlogv1' in opts:
401 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
403 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
402 if 'generaldelta' in opts:
404 if 'generaldelta' in opts:
403 newversionflags |= FLAG_GENERALDELTA
405 newversionflags |= FLAG_GENERALDELTA
404 elif getattr(self.opener, 'options', None) is not None:
406 elif getattr(self.opener, 'options', None) is not None:
405 # If options provided but no 'revlog*' found, the repository
407 # If options provided but no 'revlog*' found, the repository
406 # would have no 'requires' file in it, which means we have to
408 # would have no 'requires' file in it, which means we have to
407 # stick to the old format.
409 # stick to the old format.
408 newversionflags = REVLOGV0
410 newversionflags = REVLOGV0
409 else:
411 else:
410 newversionflags = REVLOG_DEFAULT_VERSION
412 newversionflags = REVLOG_DEFAULT_VERSION
411
413
412 if 'chunkcachesize' in opts:
414 if 'chunkcachesize' in opts:
413 self._chunkcachesize = opts['chunkcachesize']
415 self._chunkcachesize = opts['chunkcachesize']
414 if 'maxchainlen' in opts:
416 if 'maxchainlen' in opts:
415 self._maxchainlen = opts['maxchainlen']
417 self._maxchainlen = opts['maxchainlen']
416 if 'deltabothparents' in opts:
418 if 'deltabothparents' in opts:
417 self._deltabothparents = opts['deltabothparents']
419 self._deltabothparents = opts['deltabothparents']
418 self._lazydelta = bool(opts.get('lazydelta', True))
420 self._lazydelta = bool(opts.get('lazydelta', True))
419 self._lazydeltabase = False
421 self._lazydeltabase = False
420 if self._lazydelta:
422 if self._lazydelta:
421 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
423 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
422 if 'compengine' in opts:
424 if 'compengine' in opts:
423 self._compengine = opts['compengine']
425 self._compengine = opts['compengine']
424 if 'zlib.level' in opts:
426 if 'zlib.level' in opts:
425 self._compengineopts['zlib.level'] = opts['zlib.level']
427 self._compengineopts['zlib.level'] = opts['zlib.level']
426 if 'zstd.level' in opts:
428 if 'zstd.level' in opts:
427 self._compengineopts['zstd.level'] = opts['zstd.level']
429 self._compengineopts['zstd.level'] = opts['zstd.level']
428 if 'maxdeltachainspan' in opts:
430 if 'maxdeltachainspan' in opts:
429 self._maxdeltachainspan = opts['maxdeltachainspan']
431 self._maxdeltachainspan = opts['maxdeltachainspan']
430 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
432 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
431 mmapindexthreshold = opts['mmapindexthreshold']
433 mmapindexthreshold = opts['mmapindexthreshold']
432 self._sparserevlog = bool(opts.get('sparse-revlog', False))
434 self._sparserevlog = bool(opts.get('sparse-revlog', False))
433 withsparseread = bool(opts.get('with-sparse-read', False))
435 withsparseread = bool(opts.get('with-sparse-read', False))
434 # sparse-revlog forces sparse-read
436 # sparse-revlog forces sparse-read
435 self._withsparseread = self._sparserevlog or withsparseread
437 self._withsparseread = self._sparserevlog or withsparseread
436 if 'sparse-read-density-threshold' in opts:
438 if 'sparse-read-density-threshold' in opts:
437 self._srdensitythreshold = opts['sparse-read-density-threshold']
439 self._srdensitythreshold = opts['sparse-read-density-threshold']
438 if 'sparse-read-min-gap-size' in opts:
440 if 'sparse-read-min-gap-size' in opts:
439 self._srmingapsize = opts['sparse-read-min-gap-size']
441 self._srmingapsize = opts['sparse-read-min-gap-size']
440 if opts.get('enableellipsis'):
442 if opts.get('enableellipsis'):
441 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
443 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
442
444
443 # revlog v0 doesn't have flag processors
445 # revlog v0 doesn't have flag processors
444 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
446 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
445 _insertflagprocessor(flag, processor, self._flagprocessors)
447 _insertflagprocessor(flag, processor, self._flagprocessors)
446
448
447 if self._chunkcachesize <= 0:
449 if self._chunkcachesize <= 0:
448 raise error.RevlogError(_('revlog chunk cache size %r is not '
450 raise error.RevlogError(_('revlog chunk cache size %r is not '
449 'greater than 0') % self._chunkcachesize)
451 'greater than 0') % self._chunkcachesize)
450 elif self._chunkcachesize & (self._chunkcachesize - 1):
452 elif self._chunkcachesize & (self._chunkcachesize - 1):
451 raise error.RevlogError(_('revlog chunk cache size %r is not a '
453 raise error.RevlogError(_('revlog chunk cache size %r is not a '
452 'power of 2') % self._chunkcachesize)
454 'power of 2') % self._chunkcachesize)
453
455
454 indexdata = ''
456 indexdata = ''
455 self._initempty = True
457 self._initempty = True
456 try:
458 try:
457 with self._indexfp() as f:
459 with self._indexfp() as f:
458 if (mmapindexthreshold is not None and
460 if (mmapindexthreshold is not None and
459 self.opener.fstat(f).st_size >= mmapindexthreshold):
461 self.opener.fstat(f).st_size >= mmapindexthreshold):
460 # TODO: should .close() to release resources without
462 # TODO: should .close() to release resources without
461 # relying on Python GC
463 # relying on Python GC
462 indexdata = util.buffer(util.mmapread(f))
464 indexdata = util.buffer(util.mmapread(f))
463 else:
465 else:
464 indexdata = f.read()
466 indexdata = f.read()
465 if len(indexdata) > 0:
467 if len(indexdata) > 0:
466 versionflags = versionformat_unpack(indexdata[:4])[0]
468 versionflags = versionformat_unpack(indexdata[:4])[0]
467 self._initempty = False
469 self._initempty = False
468 else:
470 else:
469 versionflags = newversionflags
471 versionflags = newversionflags
470 except IOError as inst:
472 except IOError as inst:
471 if inst.errno != errno.ENOENT:
473 if inst.errno != errno.ENOENT:
472 raise
474 raise
473
475
474 versionflags = newversionflags
476 versionflags = newversionflags
475
477
476 self.version = versionflags
478 self.version = versionflags
477
479
478 flags = versionflags & ~0xFFFF
480 flags = versionflags & ~0xFFFF
479 fmt = versionflags & 0xFFFF
481 fmt = versionflags & 0xFFFF
480
482
481 if fmt == REVLOGV0:
483 if fmt == REVLOGV0:
482 if flags:
484 if flags:
483 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
485 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
484 'revlog %s') %
486 'revlog %s') %
485 (flags >> 16, fmt, self.indexfile))
487 (flags >> 16, fmt, self.indexfile))
486
488
487 self._inline = False
489 self._inline = False
488 self._generaldelta = False
490 self._generaldelta = False
489
491
490 elif fmt == REVLOGV1:
492 elif fmt == REVLOGV1:
491 if flags & ~REVLOGV1_FLAGS:
493 if flags & ~REVLOGV1_FLAGS:
492 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
494 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
493 'revlog %s') %
495 'revlog %s') %
494 (flags >> 16, fmt, self.indexfile))
496 (flags >> 16, fmt, self.indexfile))
495
497
496 self._inline = versionflags & FLAG_INLINE_DATA
498 self._inline = versionflags & FLAG_INLINE_DATA
497 self._generaldelta = versionflags & FLAG_GENERALDELTA
499 self._generaldelta = versionflags & FLAG_GENERALDELTA
498
500
499 elif fmt == REVLOGV2:
501 elif fmt == REVLOGV2:
500 if flags & ~REVLOGV2_FLAGS:
502 if flags & ~REVLOGV2_FLAGS:
501 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
503 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
502 'revlog %s') %
504 'revlog %s') %
503 (flags >> 16, fmt, self.indexfile))
505 (flags >> 16, fmt, self.indexfile))
504
506
505 self._inline = versionflags & FLAG_INLINE_DATA
507 self._inline = versionflags & FLAG_INLINE_DATA
506 # generaldelta implied by version 2 revlogs.
508 # generaldelta implied by version 2 revlogs.
507 self._generaldelta = True
509 self._generaldelta = True
508
510
509 else:
511 else:
510 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
512 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
511 (fmt, self.indexfile))
513 (fmt, self.indexfile))
512 # sparse-revlog can't be on without general-delta (issue6056)
514 # sparse-revlog can't be on without general-delta (issue6056)
513 if not self._generaldelta:
515 if not self._generaldelta:
514 self._sparserevlog = False
516 self._sparserevlog = False
515
517
516 self._storedeltachains = True
518 self._storedeltachains = True
517
519
518 self._io = revlogio()
520 self._io = revlogio()
519 if self.version == REVLOGV0:
521 if self.version == REVLOGV0:
520 self._io = revlogoldio()
522 self._io = revlogoldio()
521 try:
523 try:
522 d = self._io.parseindex(indexdata, self._inline)
524 d = self._io.parseindex(indexdata, self._inline)
523 except (ValueError, IndexError):
525 except (ValueError, IndexError):
524 raise error.RevlogError(_("index %s is corrupted") %
526 raise error.RevlogError(_("index %s is corrupted") %
525 self.indexfile)
527 self.indexfile)
526 self.index, nodemap, self._chunkcache = d
528 self.index, nodemap, self._chunkcache = d
527 if nodemap is not None:
529 if nodemap is not None:
528 self.nodemap = self._nodecache = nodemap
530 self.nodemap = self._nodecache = nodemap
529 if not self._chunkcache:
531 if not self._chunkcache:
530 self._chunkclear()
532 self._chunkclear()
531 # revnum -> (chain-length, sum-delta-length)
533 # revnum -> (chain-length, sum-delta-length)
532 self._chaininfocache = {}
534 self._chaininfocache = {}
533 # revlog header -> revlog compressor
535 # revlog header -> revlog compressor
534 self._decompressors = {}
536 self._decompressors = {}
535
537
536 @util.propertycache
538 @util.propertycache
537 def _compressor(self):
539 def _compressor(self):
538 engine = util.compengines[self._compengine]
540 engine = util.compengines[self._compengine]
539 return engine.revlogcompressor(self._compengineopts)
541 return engine.revlogcompressor(self._compengineopts)
540
542
541 def _indexfp(self, mode='r'):
543 def _indexfp(self, mode='r'):
542 """file object for the revlog's index file"""
544 """file object for the revlog's index file"""
543 args = {r'mode': mode}
545 args = {r'mode': mode}
544 if mode != 'r':
546 if mode != 'r':
545 args[r'checkambig'] = self._checkambig
547 args[r'checkambig'] = self._checkambig
546 if mode == 'w':
548 if mode == 'w':
547 args[r'atomictemp'] = True
549 args[r'atomictemp'] = True
548 return self.opener(self.indexfile, **args)
550 return self.opener(self.indexfile, **args)
549
551
550 def _datafp(self, mode='r'):
552 def _datafp(self, mode='r'):
551 """file object for the revlog's data file"""
553 """file object for the revlog's data file"""
552 return self.opener(self.datafile, mode=mode)
554 return self.opener(self.datafile, mode=mode)
553
555
554 @contextlib.contextmanager
556 @contextlib.contextmanager
555 def _datareadfp(self, existingfp=None):
557 def _datareadfp(self, existingfp=None):
556 """file object suitable to read data"""
558 """file object suitable to read data"""
557 # Use explicit file handle, if given.
559 # Use explicit file handle, if given.
558 if existingfp is not None:
560 if existingfp is not None:
559 yield existingfp
561 yield existingfp
560
562
561 # Use a file handle being actively used for writes, if available.
563 # Use a file handle being actively used for writes, if available.
562 # There is some danger to doing this because reads will seek the
564 # There is some danger to doing this because reads will seek the
563 # file. However, _writeentry() performs a SEEK_END before all writes,
565 # file. However, _writeentry() performs a SEEK_END before all writes,
564 # so we should be safe.
566 # so we should be safe.
565 elif self._writinghandles:
567 elif self._writinghandles:
566 if self._inline:
568 if self._inline:
567 yield self._writinghandles[0]
569 yield self._writinghandles[0]
568 else:
570 else:
569 yield self._writinghandles[1]
571 yield self._writinghandles[1]
570
572
571 # Otherwise open a new file handle.
573 # Otherwise open a new file handle.
572 else:
574 else:
573 if self._inline:
575 if self._inline:
574 func = self._indexfp
576 func = self._indexfp
575 else:
577 else:
576 func = self._datafp
578 func = self._datafp
577 with func() as fp:
579 with func() as fp:
578 yield fp
580 yield fp
579
581
580 def tip(self):
582 def tip(self):
581 return self.node(len(self.index) - 1)
583 return self.node(len(self.index) - 1)
582 def __contains__(self, rev):
584 def __contains__(self, rev):
583 return 0 <= rev < len(self)
585 return 0 <= rev < len(self)
584 def __len__(self):
586 def __len__(self):
585 return len(self.index)
587 return len(self.index)
586 def __iter__(self):
588 def __iter__(self):
587 return iter(pycompat.xrange(len(self)))
589 return iter(pycompat.xrange(len(self)))
588 def revs(self, start=0, stop=None):
590 def revs(self, start=0, stop=None):
589 """iterate over all rev in this revlog (from start to stop)"""
591 """iterate over all rev in this revlog (from start to stop)"""
590 return storageutil.iterrevs(len(self), start=start, stop=stop)
592 return storageutil.iterrevs(len(self), start=start, stop=stop)
591
593
592 @util.propertycache
594 @util.propertycache
593 def nodemap(self):
595 def nodemap(self):
594 if self.index:
596 if self.index:
595 # populate mapping down to the initial node
597 # populate mapping down to the initial node
596 node0 = self.index[0][7] # get around changelog filtering
598 node0 = self.index[0][7] # get around changelog filtering
597 self.rev(node0)
599 self.rev(node0)
598 return self._nodecache
600 return self._nodecache
599
601
600 def hasnode(self, node):
602 def hasnode(self, node):
601 try:
603 try:
602 self.rev(node)
604 self.rev(node)
603 return True
605 return True
604 except KeyError:
606 except KeyError:
605 return False
607 return False
606
608
607 def candelta(self, baserev, rev):
609 def candelta(self, baserev, rev):
608 """whether two revisions (baserev, rev) can be delta-ed or not"""
610 """whether two revisions (baserev, rev) can be delta-ed or not"""
609 # Disable delta if either rev requires a content-changing flag
611 # Disable delta if either rev requires a content-changing flag
610 # processor (ex. LFS). This is because such flag processor can alter
612 # processor (ex. LFS). This is because such flag processor can alter
611 # the rawtext content that the delta will be based on, and two clients
613 # the rawtext content that the delta will be based on, and two clients
612 # could have a same revlog node with different flags (i.e. different
614 # could have a same revlog node with different flags (i.e. different
613 # rawtext contents) and the delta could be incompatible.
615 # rawtext contents) and the delta could be incompatible.
614 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
616 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
615 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
617 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
616 return False
618 return False
617 return True
619 return True
618
620
619 def clearcaches(self):
621 def clearcaches(self):
620 self._revisioncache = None
622 self._revisioncache = None
621 self._chainbasecache.clear()
623 self._chainbasecache.clear()
622 self._chunkcache = (0, '')
624 self._chunkcache = (0, '')
623 self._pcache = {}
625 self._pcache = {}
624
626
625 try:
627 try:
626 # If we are using the native C version, you are in a fun case
628 # If we are using the native C version, you are in a fun case
627 # where self.index, self.nodemap and self._nodecaches is the same
629 # where self.index, self.nodemap and self._nodecaches is the same
628 # object.
630 # object.
629 self._nodecache.clearcaches()
631 self._nodecache.clearcaches()
630 except AttributeError:
632 except AttributeError:
631 self._nodecache = {nullid: nullrev}
633 self._nodecache = {nullid: nullrev}
632 self._nodepos = None
634 self._nodepos = None
633
635
634 def rev(self, node):
636 def rev(self, node):
635 try:
637 try:
636 return self._nodecache[node]
638 return self._nodecache[node]
637 except TypeError:
639 except TypeError:
638 raise
640 raise
639 except error.RevlogError:
641 except error.RevlogError:
640 # parsers.c radix tree lookup failed
642 # parsers.c radix tree lookup failed
641 if node == wdirid or node in wdirfilenodeids:
643 if node == wdirid or node in wdirfilenodeids:
642 raise error.WdirUnsupported
644 raise error.WdirUnsupported
643 raise error.LookupError(node, self.indexfile, _('no node'))
645 raise error.LookupError(node, self.indexfile, _('no node'))
644 except KeyError:
646 except KeyError:
645 # pure python cache lookup failed
647 # pure python cache lookup failed
646 n = self._nodecache
648 n = self._nodecache
647 i = self.index
649 i = self.index
648 p = self._nodepos
650 p = self._nodepos
649 if p is None:
651 if p is None:
650 p = len(i) - 1
652 p = len(i) - 1
651 else:
653 else:
652 assert p < len(i)
654 assert p < len(i)
653 for r in pycompat.xrange(p, -1, -1):
655 for r in pycompat.xrange(p, -1, -1):
654 v = i[r][7]
656 v = i[r][7]
655 n[v] = r
657 n[v] = r
656 if v == node:
658 if v == node:
657 self._nodepos = r - 1
659 self._nodepos = r - 1
658 return r
660 return r
659 if node == wdirid or node in wdirfilenodeids:
661 if node == wdirid or node in wdirfilenodeids:
660 raise error.WdirUnsupported
662 raise error.WdirUnsupported
661 raise error.LookupError(node, self.indexfile, _('no node'))
663 raise error.LookupError(node, self.indexfile, _('no node'))
662
664
663 # Accessors for index entries.
665 # Accessors for index entries.
664
666
665 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
667 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
666 # are flags.
668 # are flags.
667 def start(self, rev):
669 def start(self, rev):
668 return int(self.index[rev][0] >> 16)
670 return int(self.index[rev][0] >> 16)
669
671
670 def flags(self, rev):
672 def flags(self, rev):
671 return self.index[rev][0] & 0xFFFF
673 return self.index[rev][0] & 0xFFFF
672
674
673 def length(self, rev):
675 def length(self, rev):
674 return self.index[rev][1]
676 return self.index[rev][1]
675
677
676 def rawsize(self, rev):
678 def rawsize(self, rev):
677 """return the length of the uncompressed text for a given revision"""
679 """return the length of the uncompressed text for a given revision"""
678 l = self.index[rev][2]
680 l = self.index[rev][2]
679 if l >= 0:
681 if l >= 0:
680 return l
682 return l
681
683
682 t = self.revision(rev, raw=True)
684 t = self.revision(rev, raw=True)
683 return len(t)
685 return len(t)
684
686
685 def size(self, rev):
687 def size(self, rev):
686 """length of non-raw text (processed by a "read" flag processor)"""
688 """length of non-raw text (processed by a "read" flag processor)"""
687 # fast path: if no "read" flag processor could change the content,
689 # fast path: if no "read" flag processor could change the content,
688 # size is rawsize. note: ELLIPSIS is known to not change the content.
690 # size is rawsize. note: ELLIPSIS is known to not change the content.
689 flags = self.flags(rev)
691 flags = self.flags(rev)
690 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
692 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
691 return self.rawsize(rev)
693 return self.rawsize(rev)
692
694
693 return len(self.revision(rev, raw=False))
695 return len(self.revision(rev, raw=False))
694
696
695 def chainbase(self, rev):
697 def chainbase(self, rev):
696 base = self._chainbasecache.get(rev)
698 base = self._chainbasecache.get(rev)
697 if base is not None:
699 if base is not None:
698 return base
700 return base
699
701
700 index = self.index
702 index = self.index
701 iterrev = rev
703 iterrev = rev
702 base = index[iterrev][3]
704 base = index[iterrev][3]
703 while base != iterrev:
705 while base != iterrev:
704 iterrev = base
706 iterrev = base
705 base = index[iterrev][3]
707 base = index[iterrev][3]
706
708
707 self._chainbasecache[rev] = base
709 self._chainbasecache[rev] = base
708 return base
710 return base
709
711
710 def linkrev(self, rev):
712 def linkrev(self, rev):
711 return self.index[rev][4]
713 return self.index[rev][4]
712
714
713 def parentrevs(self, rev):
715 def parentrevs(self, rev):
714 try:
716 try:
715 entry = self.index[rev]
717 entry = self.index[rev]
716 except IndexError:
718 except IndexError:
717 if rev == wdirrev:
719 if rev == wdirrev:
718 raise error.WdirUnsupported
720 raise error.WdirUnsupported
719 raise
721 raise
720
722
721 return entry[5], entry[6]
723 return entry[5], entry[6]
722
724
723 # fast parentrevs(rev) where rev isn't filtered
725 # fast parentrevs(rev) where rev isn't filtered
724 _uncheckedparentrevs = parentrevs
726 _uncheckedparentrevs = parentrevs
725
727
726 def node(self, rev):
728 def node(self, rev):
727 try:
729 try:
728 return self.index[rev][7]
730 return self.index[rev][7]
729 except IndexError:
731 except IndexError:
730 if rev == wdirrev:
732 if rev == wdirrev:
731 raise error.WdirUnsupported
733 raise error.WdirUnsupported
732 raise
734 raise
733
735
734 # Derived from index values.
736 # Derived from index values.
735
737
736 def end(self, rev):
738 def end(self, rev):
737 return self.start(rev) + self.length(rev)
739 return self.start(rev) + self.length(rev)
738
740
739 def parents(self, node):
741 def parents(self, node):
740 i = self.index
742 i = self.index
741 d = i[self.rev(node)]
743 d = i[self.rev(node)]
742 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
744 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
743
745
744 def chainlen(self, rev):
746 def chainlen(self, rev):
745 return self._chaininfo(rev)[0]
747 return self._chaininfo(rev)[0]
746
748
747 def _chaininfo(self, rev):
749 def _chaininfo(self, rev):
748 chaininfocache = self._chaininfocache
750 chaininfocache = self._chaininfocache
749 if rev in chaininfocache:
751 if rev in chaininfocache:
750 return chaininfocache[rev]
752 return chaininfocache[rev]
751 index = self.index
753 index = self.index
752 generaldelta = self._generaldelta
754 generaldelta = self._generaldelta
753 iterrev = rev
755 iterrev = rev
754 e = index[iterrev]
756 e = index[iterrev]
755 clen = 0
757 clen = 0
756 compresseddeltalen = 0
758 compresseddeltalen = 0
757 while iterrev != e[3]:
759 while iterrev != e[3]:
758 clen += 1
760 clen += 1
759 compresseddeltalen += e[1]
761 compresseddeltalen += e[1]
760 if generaldelta:
762 if generaldelta:
761 iterrev = e[3]
763 iterrev = e[3]
762 else:
764 else:
763 iterrev -= 1
765 iterrev -= 1
764 if iterrev in chaininfocache:
766 if iterrev in chaininfocache:
765 t = chaininfocache[iterrev]
767 t = chaininfocache[iterrev]
766 clen += t[0]
768 clen += t[0]
767 compresseddeltalen += t[1]
769 compresseddeltalen += t[1]
768 break
770 break
769 e = index[iterrev]
771 e = index[iterrev]
770 else:
772 else:
771 # Add text length of base since decompressing that also takes
773 # Add text length of base since decompressing that also takes
772 # work. For cache hits the length is already included.
774 # work. For cache hits the length is already included.
773 compresseddeltalen += e[1]
775 compresseddeltalen += e[1]
774 r = (clen, compresseddeltalen)
776 r = (clen, compresseddeltalen)
775 chaininfocache[rev] = r
777 chaininfocache[rev] = r
776 return r
778 return r
777
779
778 def _deltachain(self, rev, stoprev=None):
780 def _deltachain(self, rev, stoprev=None):
779 """Obtain the delta chain for a revision.
781 """Obtain the delta chain for a revision.
780
782
781 ``stoprev`` specifies a revision to stop at. If not specified, we
783 ``stoprev`` specifies a revision to stop at. If not specified, we
782 stop at the base of the chain.
784 stop at the base of the chain.
783
785
784 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
786 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
785 revs in ascending order and ``stopped`` is a bool indicating whether
787 revs in ascending order and ``stopped`` is a bool indicating whether
786 ``stoprev`` was hit.
788 ``stoprev`` was hit.
787 """
789 """
788 # Try C implementation.
790 # Try C implementation.
789 try:
791 try:
790 return self.index.deltachain(rev, stoprev, self._generaldelta)
792 return self.index.deltachain(rev, stoprev, self._generaldelta)
791 except AttributeError:
793 except AttributeError:
792 pass
794 pass
793
795
794 chain = []
796 chain = []
795
797
796 # Alias to prevent attribute lookup in tight loop.
798 # Alias to prevent attribute lookup in tight loop.
797 index = self.index
799 index = self.index
798 generaldelta = self._generaldelta
800 generaldelta = self._generaldelta
799
801
800 iterrev = rev
802 iterrev = rev
801 e = index[iterrev]
803 e = index[iterrev]
802 while iterrev != e[3] and iterrev != stoprev:
804 while iterrev != e[3] and iterrev != stoprev:
803 chain.append(iterrev)
805 chain.append(iterrev)
804 if generaldelta:
806 if generaldelta:
805 iterrev = e[3]
807 iterrev = e[3]
806 else:
808 else:
807 iterrev -= 1
809 iterrev -= 1
808 e = index[iterrev]
810 e = index[iterrev]
809
811
810 if iterrev == stoprev:
812 if iterrev == stoprev:
811 stopped = True
813 stopped = True
812 else:
814 else:
813 chain.append(iterrev)
815 chain.append(iterrev)
814 stopped = False
816 stopped = False
815
817
816 chain.reverse()
818 chain.reverse()
817 return chain, stopped
819 return chain, stopped
818
820
819 def ancestors(self, revs, stoprev=0, inclusive=False):
821 def ancestors(self, revs, stoprev=0, inclusive=False):
820 """Generate the ancestors of 'revs' in reverse revision order.
822 """Generate the ancestors of 'revs' in reverse revision order.
821 Does not generate revs lower than stoprev.
823 Does not generate revs lower than stoprev.
822
824
823 See the documentation for ancestor.lazyancestors for more details."""
825 See the documentation for ancestor.lazyancestors for more details."""
824
826
825 # first, make sure start revisions aren't filtered
827 # first, make sure start revisions aren't filtered
826 revs = list(revs)
828 revs = list(revs)
827 checkrev = self.node
829 checkrev = self.node
828 for r in revs:
830 for r in revs:
829 checkrev(r)
831 checkrev(r)
830 # and we're sure ancestors aren't filtered as well
832 # and we're sure ancestors aren't filtered as well
831
833
832 if rustancestor is not None:
834 if rustancestor is not None:
833 lazyancestors = rustancestor.LazyAncestors
835 lazyancestors = rustancestor.LazyAncestors
834 arg = self.index
836 arg = self.index
835 elif util.safehasattr(parsers, 'rustlazyancestors'):
837 elif util.safehasattr(parsers, 'rustlazyancestors'):
836 lazyancestors = ancestor.rustlazyancestors
838 lazyancestors = ancestor.rustlazyancestors
837 arg = self.index
839 arg = self.index
838 else:
840 else:
839 lazyancestors = ancestor.lazyancestors
841 lazyancestors = ancestor.lazyancestors
840 arg = self._uncheckedparentrevs
842 arg = self._uncheckedparentrevs
841 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
843 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
842
844
843 def descendants(self, revs):
845 def descendants(self, revs):
844 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
846 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
845
847
846 def findcommonmissing(self, common=None, heads=None):
848 def findcommonmissing(self, common=None, heads=None):
847 """Return a tuple of the ancestors of common and the ancestors of heads
849 """Return a tuple of the ancestors of common and the ancestors of heads
848 that are not ancestors of common. In revset terminology, we return the
850 that are not ancestors of common. In revset terminology, we return the
849 tuple:
851 tuple:
850
852
851 ::common, (::heads) - (::common)
853 ::common, (::heads) - (::common)
852
854
853 The list is sorted by revision number, meaning it is
855 The list is sorted by revision number, meaning it is
854 topologically sorted.
856 topologically sorted.
855
857
856 'heads' and 'common' are both lists of node IDs. If heads is
858 'heads' and 'common' are both lists of node IDs. If heads is
857 not supplied, uses all of the revlog's heads. If common is not
859 not supplied, uses all of the revlog's heads. If common is not
858 supplied, uses nullid."""
860 supplied, uses nullid."""
859 if common is None:
861 if common is None:
860 common = [nullid]
862 common = [nullid]
861 if heads is None:
863 if heads is None:
862 heads = self.heads()
864 heads = self.heads()
863
865
864 common = [self.rev(n) for n in common]
866 common = [self.rev(n) for n in common]
865 heads = [self.rev(n) for n in heads]
867 heads = [self.rev(n) for n in heads]
866
868
867 # we want the ancestors, but inclusive
869 # we want the ancestors, but inclusive
868 class lazyset(object):
870 class lazyset(object):
869 def __init__(self, lazyvalues):
871 def __init__(self, lazyvalues):
870 self.addedvalues = set()
872 self.addedvalues = set()
871 self.lazyvalues = lazyvalues
873 self.lazyvalues = lazyvalues
872
874
873 def __contains__(self, value):
875 def __contains__(self, value):
874 return value in self.addedvalues or value in self.lazyvalues
876 return value in self.addedvalues or value in self.lazyvalues
875
877
876 def __iter__(self):
878 def __iter__(self):
877 added = self.addedvalues
879 added = self.addedvalues
878 for r in added:
880 for r in added:
879 yield r
881 yield r
880 for r in self.lazyvalues:
882 for r in self.lazyvalues:
881 if not r in added:
883 if not r in added:
882 yield r
884 yield r
883
885
884 def add(self, value):
886 def add(self, value):
885 self.addedvalues.add(value)
887 self.addedvalues.add(value)
886
888
887 def update(self, values):
889 def update(self, values):
888 self.addedvalues.update(values)
890 self.addedvalues.update(values)
889
891
890 has = lazyset(self.ancestors(common))
892 has = lazyset(self.ancestors(common))
891 has.add(nullrev)
893 has.add(nullrev)
892 has.update(common)
894 has.update(common)
893
895
894 # take all ancestors from heads that aren't in has
896 # take all ancestors from heads that aren't in has
895 missing = set()
897 missing = set()
896 visit = collections.deque(r for r in heads if r not in has)
898 visit = collections.deque(r for r in heads if r not in has)
897 while visit:
899 while visit:
898 r = visit.popleft()
900 r = visit.popleft()
899 if r in missing:
901 if r in missing:
900 continue
902 continue
901 else:
903 else:
902 missing.add(r)
904 missing.add(r)
903 for p in self.parentrevs(r):
905 for p in self.parentrevs(r):
904 if p not in has:
906 if p not in has:
905 visit.append(p)
907 visit.append(p)
906 missing = list(missing)
908 missing = list(missing)
907 missing.sort()
909 missing.sort()
908 return has, [self.node(miss) for miss in missing]
910 return has, [self.node(miss) for miss in missing]
909
911
910 def incrementalmissingrevs(self, common=None):
912 def incrementalmissingrevs(self, common=None):
911 """Return an object that can be used to incrementally compute the
913 """Return an object that can be used to incrementally compute the
912 revision numbers of the ancestors of arbitrary sets that are not
914 revision numbers of the ancestors of arbitrary sets that are not
913 ancestors of common. This is an ancestor.incrementalmissingancestors
915 ancestors of common. This is an ancestor.incrementalmissingancestors
914 object.
916 object.
915
917
916 'common' is a list of revision numbers. If common is not supplied, uses
918 'common' is a list of revision numbers. If common is not supplied, uses
917 nullrev.
919 nullrev.
918 """
920 """
919 if common is None:
921 if common is None:
920 common = [nullrev]
922 common = [nullrev]
921
923
922 if rustancestor is not None:
924 if rustancestor is not None:
923 return rustancestor.MissingAncestors(self.index, common)
925 return rustancestor.MissingAncestors(self.index, common)
924 return ancestor.incrementalmissingancestors(self.parentrevs, common)
926 return ancestor.incrementalmissingancestors(self.parentrevs, common)
925
927
926 def findmissingrevs(self, common=None, heads=None):
928 def findmissingrevs(self, common=None, heads=None):
927 """Return the revision numbers of the ancestors of heads that
929 """Return the revision numbers of the ancestors of heads that
928 are not ancestors of common.
930 are not ancestors of common.
929
931
930 More specifically, return a list of revision numbers corresponding to
932 More specifically, return a list of revision numbers corresponding to
931 nodes N such that every N satisfies the following constraints:
933 nodes N such that every N satisfies the following constraints:
932
934
933 1. N is an ancestor of some node in 'heads'
935 1. N is an ancestor of some node in 'heads'
934 2. N is not an ancestor of any node in 'common'
936 2. N is not an ancestor of any node in 'common'
935
937
936 The list is sorted by revision number, meaning it is
938 The list is sorted by revision number, meaning it is
937 topologically sorted.
939 topologically sorted.
938
940
939 'heads' and 'common' are both lists of revision numbers. If heads is
941 'heads' and 'common' are both lists of revision numbers. If heads is
940 not supplied, uses all of the revlog's heads. If common is not
942 not supplied, uses all of the revlog's heads. If common is not
941 supplied, uses nullid."""
943 supplied, uses nullid."""
942 if common is None:
944 if common is None:
943 common = [nullrev]
945 common = [nullrev]
944 if heads is None:
946 if heads is None:
945 heads = self.headrevs()
947 heads = self.headrevs()
946
948
947 inc = self.incrementalmissingrevs(common=common)
949 inc = self.incrementalmissingrevs(common=common)
948 return inc.missingancestors(heads)
950 return inc.missingancestors(heads)
949
951
950 def findmissing(self, common=None, heads=None):
952 def findmissing(self, common=None, heads=None):
951 """Return the ancestors of heads that are not ancestors of common.
953 """Return the ancestors of heads that are not ancestors of common.
952
954
953 More specifically, return a list of nodes N such that every N
955 More specifically, return a list of nodes N such that every N
954 satisfies the following constraints:
956 satisfies the following constraints:
955
957
956 1. N is an ancestor of some node in 'heads'
958 1. N is an ancestor of some node in 'heads'
957 2. N is not an ancestor of any node in 'common'
959 2. N is not an ancestor of any node in 'common'
958
960
959 The list is sorted by revision number, meaning it is
961 The list is sorted by revision number, meaning it is
960 topologically sorted.
962 topologically sorted.
961
963
962 'heads' and 'common' are both lists of node IDs. If heads is
964 'heads' and 'common' are both lists of node IDs. If heads is
963 not supplied, uses all of the revlog's heads. If common is not
965 not supplied, uses all of the revlog's heads. If common is not
964 supplied, uses nullid."""
966 supplied, uses nullid."""
965 if common is None:
967 if common is None:
966 common = [nullid]
968 common = [nullid]
967 if heads is None:
969 if heads is None:
968 heads = self.heads()
970 heads = self.heads()
969
971
970 common = [self.rev(n) for n in common]
972 common = [self.rev(n) for n in common]
971 heads = [self.rev(n) for n in heads]
973 heads = [self.rev(n) for n in heads]
972
974
973 inc = self.incrementalmissingrevs(common=common)
975 inc = self.incrementalmissingrevs(common=common)
974 return [self.node(r) for r in inc.missingancestors(heads)]
976 return [self.node(r) for r in inc.missingancestors(heads)]
975
977
976 def nodesbetween(self, roots=None, heads=None):
978 def nodesbetween(self, roots=None, heads=None):
977 """Return a topological path from 'roots' to 'heads'.
979 """Return a topological path from 'roots' to 'heads'.
978
980
979 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
981 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
980 topologically sorted list of all nodes N that satisfy both of
982 topologically sorted list of all nodes N that satisfy both of
981 these constraints:
983 these constraints:
982
984
983 1. N is a descendant of some node in 'roots'
985 1. N is a descendant of some node in 'roots'
984 2. N is an ancestor of some node in 'heads'
986 2. N is an ancestor of some node in 'heads'
985
987
986 Every node is considered to be both a descendant and an ancestor
988 Every node is considered to be both a descendant and an ancestor
987 of itself, so every reachable node in 'roots' and 'heads' will be
989 of itself, so every reachable node in 'roots' and 'heads' will be
988 included in 'nodes'.
990 included in 'nodes'.
989
991
990 'outroots' is the list of reachable nodes in 'roots', i.e., the
992 'outroots' is the list of reachable nodes in 'roots', i.e., the
991 subset of 'roots' that is returned in 'nodes'. Likewise,
993 subset of 'roots' that is returned in 'nodes'. Likewise,
992 'outheads' is the subset of 'heads' that is also in 'nodes'.
994 'outheads' is the subset of 'heads' that is also in 'nodes'.
993
995
994 'roots' and 'heads' are both lists of node IDs. If 'roots' is
996 'roots' and 'heads' are both lists of node IDs. If 'roots' is
995 unspecified, uses nullid as the only root. If 'heads' is
997 unspecified, uses nullid as the only root. If 'heads' is
996 unspecified, uses list of all of the revlog's heads."""
998 unspecified, uses list of all of the revlog's heads."""
997 nonodes = ([], [], [])
999 nonodes = ([], [], [])
998 if roots is not None:
1000 if roots is not None:
999 roots = list(roots)
1001 roots = list(roots)
1000 if not roots:
1002 if not roots:
1001 return nonodes
1003 return nonodes
1002 lowestrev = min([self.rev(n) for n in roots])
1004 lowestrev = min([self.rev(n) for n in roots])
1003 else:
1005 else:
1004 roots = [nullid] # Everybody's a descendant of nullid
1006 roots = [nullid] # Everybody's a descendant of nullid
1005 lowestrev = nullrev
1007 lowestrev = nullrev
1006 if (lowestrev == nullrev) and (heads is None):
1008 if (lowestrev == nullrev) and (heads is None):
1007 # We want _all_ the nodes!
1009 # We want _all_ the nodes!
1008 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1010 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1009 if heads is None:
1011 if heads is None:
1010 # All nodes are ancestors, so the latest ancestor is the last
1012 # All nodes are ancestors, so the latest ancestor is the last
1011 # node.
1013 # node.
1012 highestrev = len(self) - 1
1014 highestrev = len(self) - 1
1013 # Set ancestors to None to signal that every node is an ancestor.
1015 # Set ancestors to None to signal that every node is an ancestor.
1014 ancestors = None
1016 ancestors = None
1015 # Set heads to an empty dictionary for later discovery of heads
1017 # Set heads to an empty dictionary for later discovery of heads
1016 heads = {}
1018 heads = {}
1017 else:
1019 else:
1018 heads = list(heads)
1020 heads = list(heads)
1019 if not heads:
1021 if not heads:
1020 return nonodes
1022 return nonodes
1021 ancestors = set()
1023 ancestors = set()
1022 # Turn heads into a dictionary so we can remove 'fake' heads.
1024 # Turn heads into a dictionary so we can remove 'fake' heads.
1023 # Also, later we will be using it to filter out the heads we can't
1025 # Also, later we will be using it to filter out the heads we can't
1024 # find from roots.
1026 # find from roots.
1025 heads = dict.fromkeys(heads, False)
1027 heads = dict.fromkeys(heads, False)
1026 # Start at the top and keep marking parents until we're done.
1028 # Start at the top and keep marking parents until we're done.
1027 nodestotag = set(heads)
1029 nodestotag = set(heads)
1028 # Remember where the top was so we can use it as a limit later.
1030 # Remember where the top was so we can use it as a limit later.
1029 highestrev = max([self.rev(n) for n in nodestotag])
1031 highestrev = max([self.rev(n) for n in nodestotag])
1030 while nodestotag:
1032 while nodestotag:
1031 # grab a node to tag
1033 # grab a node to tag
1032 n = nodestotag.pop()
1034 n = nodestotag.pop()
1033 # Never tag nullid
1035 # Never tag nullid
1034 if n == nullid:
1036 if n == nullid:
1035 continue
1037 continue
1036 # A node's revision number represents its place in a
1038 # A node's revision number represents its place in a
1037 # topologically sorted list of nodes.
1039 # topologically sorted list of nodes.
1038 r = self.rev(n)
1040 r = self.rev(n)
1039 if r >= lowestrev:
1041 if r >= lowestrev:
1040 if n not in ancestors:
1042 if n not in ancestors:
1041 # If we are possibly a descendant of one of the roots
1043 # If we are possibly a descendant of one of the roots
1042 # and we haven't already been marked as an ancestor
1044 # and we haven't already been marked as an ancestor
1043 ancestors.add(n) # Mark as ancestor
1045 ancestors.add(n) # Mark as ancestor
1044 # Add non-nullid parents to list of nodes to tag.
1046 # Add non-nullid parents to list of nodes to tag.
1045 nodestotag.update([p for p in self.parents(n) if
1047 nodestotag.update([p for p in self.parents(n) if
1046 p != nullid])
1048 p != nullid])
1047 elif n in heads: # We've seen it before, is it a fake head?
1049 elif n in heads: # We've seen it before, is it a fake head?
1048 # So it is, real heads should not be the ancestors of
1050 # So it is, real heads should not be the ancestors of
1049 # any other heads.
1051 # any other heads.
1050 heads.pop(n)
1052 heads.pop(n)
1051 if not ancestors:
1053 if not ancestors:
1052 return nonodes
1054 return nonodes
1053 # Now that we have our set of ancestors, we want to remove any
1055 # Now that we have our set of ancestors, we want to remove any
1054 # roots that are not ancestors.
1056 # roots that are not ancestors.
1055
1057
1056 # If one of the roots was nullid, everything is included anyway.
1058 # If one of the roots was nullid, everything is included anyway.
1057 if lowestrev > nullrev:
1059 if lowestrev > nullrev:
1058 # But, since we weren't, let's recompute the lowest rev to not
1060 # But, since we weren't, let's recompute the lowest rev to not
1059 # include roots that aren't ancestors.
1061 # include roots that aren't ancestors.
1060
1062
1061 # Filter out roots that aren't ancestors of heads
1063 # Filter out roots that aren't ancestors of heads
1062 roots = [root for root in roots if root in ancestors]
1064 roots = [root for root in roots if root in ancestors]
1063 # Recompute the lowest revision
1065 # Recompute the lowest revision
1064 if roots:
1066 if roots:
1065 lowestrev = min([self.rev(root) for root in roots])
1067 lowestrev = min([self.rev(root) for root in roots])
1066 else:
1068 else:
1067 # No more roots? Return empty list
1069 # No more roots? Return empty list
1068 return nonodes
1070 return nonodes
1069 else:
1071 else:
1070 # We are descending from nullid, and don't need to care about
1072 # We are descending from nullid, and don't need to care about
1071 # any other roots.
1073 # any other roots.
1072 lowestrev = nullrev
1074 lowestrev = nullrev
1073 roots = [nullid]
1075 roots = [nullid]
1074 # Transform our roots list into a set.
1076 # Transform our roots list into a set.
1075 descendants = set(roots)
1077 descendants = set(roots)
1076 # Also, keep the original roots so we can filter out roots that aren't
1078 # Also, keep the original roots so we can filter out roots that aren't
1077 # 'real' roots (i.e. are descended from other roots).
1079 # 'real' roots (i.e. are descended from other roots).
1078 roots = descendants.copy()
1080 roots = descendants.copy()
1079 # Our topologically sorted list of output nodes.
1081 # Our topologically sorted list of output nodes.
1080 orderedout = []
1082 orderedout = []
1081 # Don't start at nullid since we don't want nullid in our output list,
1083 # Don't start at nullid since we don't want nullid in our output list,
1082 # and if nullid shows up in descendants, empty parents will look like
1084 # and if nullid shows up in descendants, empty parents will look like
1083 # they're descendants.
1085 # they're descendants.
1084 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1086 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1085 n = self.node(r)
1087 n = self.node(r)
1086 isdescendant = False
1088 isdescendant = False
1087 if lowestrev == nullrev: # Everybody is a descendant of nullid
1089 if lowestrev == nullrev: # Everybody is a descendant of nullid
1088 isdescendant = True
1090 isdescendant = True
1089 elif n in descendants:
1091 elif n in descendants:
1090 # n is already a descendant
1092 # n is already a descendant
1091 isdescendant = True
1093 isdescendant = True
1092 # This check only needs to be done here because all the roots
1094 # This check only needs to be done here because all the roots
1093 # will start being marked is descendants before the loop.
1095 # will start being marked is descendants before the loop.
1094 if n in roots:
1096 if n in roots:
1095 # If n was a root, check if it's a 'real' root.
1097 # If n was a root, check if it's a 'real' root.
1096 p = tuple(self.parents(n))
1098 p = tuple(self.parents(n))
1097 # If any of its parents are descendants, it's not a root.
1099 # If any of its parents are descendants, it's not a root.
1098 if (p[0] in descendants) or (p[1] in descendants):
1100 if (p[0] in descendants) or (p[1] in descendants):
1099 roots.remove(n)
1101 roots.remove(n)
1100 else:
1102 else:
1101 p = tuple(self.parents(n))
1103 p = tuple(self.parents(n))
1102 # A node is a descendant if either of its parents are
1104 # A node is a descendant if either of its parents are
1103 # descendants. (We seeded the dependents list with the roots
1105 # descendants. (We seeded the dependents list with the roots
1104 # up there, remember?)
1106 # up there, remember?)
1105 if (p[0] in descendants) or (p[1] in descendants):
1107 if (p[0] in descendants) or (p[1] in descendants):
1106 descendants.add(n)
1108 descendants.add(n)
1107 isdescendant = True
1109 isdescendant = True
1108 if isdescendant and ((ancestors is None) or (n in ancestors)):
1110 if isdescendant and ((ancestors is None) or (n in ancestors)):
1109 # Only include nodes that are both descendants and ancestors.
1111 # Only include nodes that are both descendants and ancestors.
1110 orderedout.append(n)
1112 orderedout.append(n)
1111 if (ancestors is not None) and (n in heads):
1113 if (ancestors is not None) and (n in heads):
1112 # We're trying to figure out which heads are reachable
1114 # We're trying to figure out which heads are reachable
1113 # from roots.
1115 # from roots.
1114 # Mark this head as having been reached
1116 # Mark this head as having been reached
1115 heads[n] = True
1117 heads[n] = True
1116 elif ancestors is None:
1118 elif ancestors is None:
1117 # Otherwise, we're trying to discover the heads.
1119 # Otherwise, we're trying to discover the heads.
1118 # Assume this is a head because if it isn't, the next step
1120 # Assume this is a head because if it isn't, the next step
1119 # will eventually remove it.
1121 # will eventually remove it.
1120 heads[n] = True
1122 heads[n] = True
1121 # But, obviously its parents aren't.
1123 # But, obviously its parents aren't.
1122 for p in self.parents(n):
1124 for p in self.parents(n):
1123 heads.pop(p, None)
1125 heads.pop(p, None)
1124 heads = [head for head, flag in heads.iteritems() if flag]
1126 heads = [head for head, flag in heads.iteritems() if flag]
1125 roots = list(roots)
1127 roots = list(roots)
1126 assert orderedout
1128 assert orderedout
1127 assert roots
1129 assert roots
1128 assert heads
1130 assert heads
1129 return (orderedout, roots, heads)
1131 return (orderedout, roots, heads)
1130
1132
1131 def headrevs(self, revs=None):
1133 def headrevs(self, revs=None):
1132 if revs is None:
1134 if revs is None:
1133 try:
1135 try:
1134 return self.index.headrevs()
1136 return self.index.headrevs()
1135 except AttributeError:
1137 except AttributeError:
1136 return self._headrevs()
1138 return self._headrevs()
1137 if rustdagop is not None:
1139 if rustdagop is not None:
1138 return rustdagop.headrevs(self.index, revs)
1140 return rustdagop.headrevs(self.index, revs)
1139 return dagop.headrevs(revs, self._uncheckedparentrevs)
1141 return dagop.headrevs(revs, self._uncheckedparentrevs)
1140
1142
1141 def computephases(self, roots):
1143 def computephases(self, roots):
1142 return self.index.computephasesmapsets(roots)
1144 return self.index.computephasesmapsets(roots)
1143
1145
1144 def _headrevs(self):
1146 def _headrevs(self):
1145 count = len(self)
1147 count = len(self)
1146 if not count:
1148 if not count:
1147 return [nullrev]
1149 return [nullrev]
1148 # we won't iter over filtered rev so nobody is a head at start
1150 # we won't iter over filtered rev so nobody is a head at start
1149 ishead = [0] * (count + 1)
1151 ishead = [0] * (count + 1)
1150 index = self.index
1152 index = self.index
1151 for r in self:
1153 for r in self:
1152 ishead[r] = 1 # I may be an head
1154 ishead[r] = 1 # I may be an head
1153 e = index[r]
1155 e = index[r]
1154 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1156 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1155 return [r for r, val in enumerate(ishead) if val]
1157 return [r for r, val in enumerate(ishead) if val]
1156
1158
1157 def heads(self, start=None, stop=None):
1159 def heads(self, start=None, stop=None):
1158 """return the list of all nodes that have no children
1160 """return the list of all nodes that have no children
1159
1161
1160 if start is specified, only heads that are descendants of
1162 if start is specified, only heads that are descendants of
1161 start will be returned
1163 start will be returned
1162 if stop is specified, it will consider all the revs from stop
1164 if stop is specified, it will consider all the revs from stop
1163 as if they had no children
1165 as if they had no children
1164 """
1166 """
1165 if start is None and stop is None:
1167 if start is None and stop is None:
1166 if not len(self):
1168 if not len(self):
1167 return [nullid]
1169 return [nullid]
1168 return [self.node(r) for r in self.headrevs()]
1170 return [self.node(r) for r in self.headrevs()]
1169
1171
1170 if start is None:
1172 if start is None:
1171 start = nullrev
1173 start = nullrev
1172 else:
1174 else:
1173 start = self.rev(start)
1175 start = self.rev(start)
1174
1176
1175 stoprevs = set(self.rev(n) for n in stop or [])
1177 stoprevs = set(self.rev(n) for n in stop or [])
1176
1178
1177 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1179 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1178 stoprevs=stoprevs)
1180 stoprevs=stoprevs)
1179
1181
1180 return [self.node(rev) for rev in revs]
1182 return [self.node(rev) for rev in revs]
1181
1183
1182 def children(self, node):
1184 def children(self, node):
1183 """find the children of a given node"""
1185 """find the children of a given node"""
1184 c = []
1186 c = []
1185 p = self.rev(node)
1187 p = self.rev(node)
1186 for r in self.revs(start=p + 1):
1188 for r in self.revs(start=p + 1):
1187 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1189 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1188 if prevs:
1190 if prevs:
1189 for pr in prevs:
1191 for pr in prevs:
1190 if pr == p:
1192 if pr == p:
1191 c.append(self.node(r))
1193 c.append(self.node(r))
1192 elif p == nullrev:
1194 elif p == nullrev:
1193 c.append(self.node(r))
1195 c.append(self.node(r))
1194 return c
1196 return c
1195
1197
1196 def commonancestorsheads(self, a, b):
1198 def commonancestorsheads(self, a, b):
1197 """calculate all the heads of the common ancestors of nodes a and b"""
1199 """calculate all the heads of the common ancestors of nodes a and b"""
1198 a, b = self.rev(a), self.rev(b)
1200 a, b = self.rev(a), self.rev(b)
1199 ancs = self._commonancestorsheads(a, b)
1201 ancs = self._commonancestorsheads(a, b)
1200 return pycompat.maplist(self.node, ancs)
1202 return pycompat.maplist(self.node, ancs)
1201
1203
1202 def _commonancestorsheads(self, *revs):
1204 def _commonancestorsheads(self, *revs):
1203 """calculate all the heads of the common ancestors of revs"""
1205 """calculate all the heads of the common ancestors of revs"""
1204 try:
1206 try:
1205 ancs = self.index.commonancestorsheads(*revs)
1207 ancs = self.index.commonancestorsheads(*revs)
1206 except (AttributeError, OverflowError): # C implementation failed
1208 except (AttributeError, OverflowError): # C implementation failed
1207 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1209 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1208 return ancs
1210 return ancs
1209
1211
1210 def isancestor(self, a, b):
1212 def isancestor(self, a, b):
1211 """return True if node a is an ancestor of node b
1213 """return True if node a is an ancestor of node b
1212
1214
1213 A revision is considered an ancestor of itself."""
1215 A revision is considered an ancestor of itself."""
1214 a, b = self.rev(a), self.rev(b)
1216 a, b = self.rev(a), self.rev(b)
1215 return self.isancestorrev(a, b)
1217 return self.isancestorrev(a, b)
1216
1218
1217 def isancestorrev(self, a, b):
1219 def isancestorrev(self, a, b):
1218 """return True if revision a is an ancestor of revision b
1220 """return True if revision a is an ancestor of revision b
1219
1221
1220 A revision is considered an ancestor of itself.
1222 A revision is considered an ancestor of itself.
1221
1223
1222 The implementation of this is trivial but the use of
1224 The implementation of this is trivial but the use of
1223 reachableroots is not."""
1225 reachableroots is not."""
1224 if a == nullrev:
1226 if a == nullrev:
1225 return True
1227 return True
1226 elif a == b:
1228 elif a == b:
1227 return True
1229 return True
1228 elif a > b:
1230 elif a > b:
1229 return False
1231 return False
1230 return bool(self.reachableroots(a, [b], [a], includepath=False))
1232 return bool(self.reachableroots(a, [b], [a], includepath=False))
1231
1233
1232 def reachableroots(self, minroot, heads, roots, includepath=False):
1234 def reachableroots(self, minroot, heads, roots, includepath=False):
1233 """return (heads(::<roots> and <roots>::<heads>))
1235 """return (heads(::<roots> and <roots>::<heads>))
1234
1236
1235 If includepath is True, return (<roots>::<heads>)."""
1237 If includepath is True, return (<roots>::<heads>)."""
1236 try:
1238 try:
1237 return self.index.reachableroots2(minroot, heads, roots,
1239 return self.index.reachableroots2(minroot, heads, roots,
1238 includepath)
1240 includepath)
1239 except AttributeError:
1241 except AttributeError:
1240 return dagop._reachablerootspure(self.parentrevs,
1242 return dagop._reachablerootspure(self.parentrevs,
1241 minroot, roots, heads, includepath)
1243 minroot, roots, heads, includepath)
1242
1244
1243 def ancestor(self, a, b):
1245 def ancestor(self, a, b):
1244 """calculate the "best" common ancestor of nodes a and b"""
1246 """calculate the "best" common ancestor of nodes a and b"""
1245
1247
1246 a, b = self.rev(a), self.rev(b)
1248 a, b = self.rev(a), self.rev(b)
1247 try:
1249 try:
1248 ancs = self.index.ancestors(a, b)
1250 ancs = self.index.ancestors(a, b)
1249 except (AttributeError, OverflowError):
1251 except (AttributeError, OverflowError):
1250 ancs = ancestor.ancestors(self.parentrevs, a, b)
1252 ancs = ancestor.ancestors(self.parentrevs, a, b)
1251 if ancs:
1253 if ancs:
1252 # choose a consistent winner when there's a tie
1254 # choose a consistent winner when there's a tie
1253 return min(map(self.node, ancs))
1255 return min(map(self.node, ancs))
1254 return nullid
1256 return nullid
1255
1257
1256 def _match(self, id):
1258 def _match(self, id):
1257 if isinstance(id, int):
1259 if isinstance(id, int):
1258 # rev
1260 # rev
1259 return self.node(id)
1261 return self.node(id)
1260 if len(id) == 20:
1262 if len(id) == 20:
1261 # possibly a binary node
1263 # possibly a binary node
1262 # odds of a binary node being all hex in ASCII are 1 in 10**25
1264 # odds of a binary node being all hex in ASCII are 1 in 10**25
1263 try:
1265 try:
1264 node = id
1266 node = id
1265 self.rev(node) # quick search the index
1267 self.rev(node) # quick search the index
1266 return node
1268 return node
1267 except error.LookupError:
1269 except error.LookupError:
1268 pass # may be partial hex id
1270 pass # may be partial hex id
1269 try:
1271 try:
1270 # str(rev)
1272 # str(rev)
1271 rev = int(id)
1273 rev = int(id)
1272 if "%d" % rev != id:
1274 if "%d" % rev != id:
1273 raise ValueError
1275 raise ValueError
1274 if rev < 0:
1276 if rev < 0:
1275 rev = len(self) + rev
1277 rev = len(self) + rev
1276 if rev < 0 or rev >= len(self):
1278 if rev < 0 or rev >= len(self):
1277 raise ValueError
1279 raise ValueError
1278 return self.node(rev)
1280 return self.node(rev)
1279 except (ValueError, OverflowError):
1281 except (ValueError, OverflowError):
1280 pass
1282 pass
1281 if len(id) == 40:
1283 if len(id) == 40:
1282 try:
1284 try:
1283 # a full hex nodeid?
1285 # a full hex nodeid?
1284 node = bin(id)
1286 node = bin(id)
1285 self.rev(node)
1287 self.rev(node)
1286 return node
1288 return node
1287 except (TypeError, error.LookupError):
1289 except (TypeError, error.LookupError):
1288 pass
1290 pass
1289
1291
1290 def _partialmatch(self, id):
1292 def _partialmatch(self, id):
1291 # we don't care wdirfilenodeids as they should be always full hash
1293 # we don't care wdirfilenodeids as they should be always full hash
1292 maybewdir = wdirhex.startswith(id)
1294 maybewdir = wdirhex.startswith(id)
1293 try:
1295 try:
1294 partial = self.index.partialmatch(id)
1296 partial = self.index.partialmatch(id)
1295 if partial and self.hasnode(partial):
1297 if partial and self.hasnode(partial):
1296 if maybewdir:
1298 if maybewdir:
1297 # single 'ff...' match in radix tree, ambiguous with wdir
1299 # single 'ff...' match in radix tree, ambiguous with wdir
1298 raise error.RevlogError
1300 raise error.RevlogError
1299 return partial
1301 return partial
1300 if maybewdir:
1302 if maybewdir:
1301 # no 'ff...' match in radix tree, wdir identified
1303 # no 'ff...' match in radix tree, wdir identified
1302 raise error.WdirUnsupported
1304 raise error.WdirUnsupported
1303 return None
1305 return None
1304 except error.RevlogError:
1306 except error.RevlogError:
1305 # parsers.c radix tree lookup gave multiple matches
1307 # parsers.c radix tree lookup gave multiple matches
1306 # fast path: for unfiltered changelog, radix tree is accurate
1308 # fast path: for unfiltered changelog, radix tree is accurate
1307 if not getattr(self, 'filteredrevs', None):
1309 if not getattr(self, 'filteredrevs', None):
1308 raise error.AmbiguousPrefixLookupError(
1310 raise error.AmbiguousPrefixLookupError(
1309 id, self.indexfile, _('ambiguous identifier'))
1311 id, self.indexfile, _('ambiguous identifier'))
1310 # fall through to slow path that filters hidden revisions
1312 # fall through to slow path that filters hidden revisions
1311 except (AttributeError, ValueError):
1313 except (AttributeError, ValueError):
1312 # we are pure python, or key was too short to search radix tree
1314 # we are pure python, or key was too short to search radix tree
1313 pass
1315 pass
1314
1316
1315 if id in self._pcache:
1317 if id in self._pcache:
1316 return self._pcache[id]
1318 return self._pcache[id]
1317
1319
1318 if len(id) <= 40:
1320 if len(id) <= 40:
1319 try:
1321 try:
1320 # hex(node)[:...]
1322 # hex(node)[:...]
1321 l = len(id) // 2 # grab an even number of digits
1323 l = len(id) // 2 # grab an even number of digits
1322 prefix = bin(id[:l * 2])
1324 prefix = bin(id[:l * 2])
1323 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1325 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1324 nl = [n for n in nl if hex(n).startswith(id) and
1326 nl = [n for n in nl if hex(n).startswith(id) and
1325 self.hasnode(n)]
1327 self.hasnode(n)]
1326 if nullhex.startswith(id):
1328 if nullhex.startswith(id):
1327 nl.append(nullid)
1329 nl.append(nullid)
1328 if len(nl) > 0:
1330 if len(nl) > 0:
1329 if len(nl) == 1 and not maybewdir:
1331 if len(nl) == 1 and not maybewdir:
1330 self._pcache[id] = nl[0]
1332 self._pcache[id] = nl[0]
1331 return nl[0]
1333 return nl[0]
1332 raise error.AmbiguousPrefixLookupError(
1334 raise error.AmbiguousPrefixLookupError(
1333 id, self.indexfile, _('ambiguous identifier'))
1335 id, self.indexfile, _('ambiguous identifier'))
1334 if maybewdir:
1336 if maybewdir:
1335 raise error.WdirUnsupported
1337 raise error.WdirUnsupported
1336 return None
1338 return None
1337 except TypeError:
1339 except TypeError:
1338 pass
1340 pass
1339
1341
1340 def lookup(self, id):
1342 def lookup(self, id):
1341 """locate a node based on:
1343 """locate a node based on:
1342 - revision number or str(revision number)
1344 - revision number or str(revision number)
1343 - nodeid or subset of hex nodeid
1345 - nodeid or subset of hex nodeid
1344 """
1346 """
1345 n = self._match(id)
1347 n = self._match(id)
1346 if n is not None:
1348 if n is not None:
1347 return n
1349 return n
1348 n = self._partialmatch(id)
1350 n = self._partialmatch(id)
1349 if n:
1351 if n:
1350 return n
1352 return n
1351
1353
1352 raise error.LookupError(id, self.indexfile, _('no match found'))
1354 raise error.LookupError(id, self.indexfile, _('no match found'))
1353
1355
1354 def shortest(self, node, minlength=1):
1356 def shortest(self, node, minlength=1):
1355 """Find the shortest unambiguous prefix that matches node."""
1357 """Find the shortest unambiguous prefix that matches node."""
1356 def isvalid(prefix):
1358 def isvalid(prefix):
1357 try:
1359 try:
1358 matchednode = self._partialmatch(prefix)
1360 matchednode = self._partialmatch(prefix)
1359 except error.AmbiguousPrefixLookupError:
1361 except error.AmbiguousPrefixLookupError:
1360 return False
1362 return False
1361 except error.WdirUnsupported:
1363 except error.WdirUnsupported:
1362 # single 'ff...' match
1364 # single 'ff...' match
1363 return True
1365 return True
1364 if matchednode is None:
1366 if matchednode is None:
1365 raise error.LookupError(node, self.indexfile, _('no node'))
1367 raise error.LookupError(node, self.indexfile, _('no node'))
1366 return True
1368 return True
1367
1369
1368 def maybewdir(prefix):
1370 def maybewdir(prefix):
1369 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1371 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1370
1372
1371 hexnode = hex(node)
1373 hexnode = hex(node)
1372
1374
1373 def disambiguate(hexnode, minlength):
1375 def disambiguate(hexnode, minlength):
1374 """Disambiguate against wdirid."""
1376 """Disambiguate against wdirid."""
1375 for length in range(minlength, 41):
1377 for length in range(minlength, 41):
1376 prefix = hexnode[:length]
1378 prefix = hexnode[:length]
1377 if not maybewdir(prefix):
1379 if not maybewdir(prefix):
1378 return prefix
1380 return prefix
1379
1381
1380 if not getattr(self, 'filteredrevs', None):
1382 if not getattr(self, 'filteredrevs', None):
1381 try:
1383 try:
1382 length = max(self.index.shortest(node), minlength)
1384 length = max(self.index.shortest(node), minlength)
1383 return disambiguate(hexnode, length)
1385 return disambiguate(hexnode, length)
1384 except error.RevlogError:
1386 except error.RevlogError:
1385 if node != wdirid:
1387 if node != wdirid:
1386 raise error.LookupError(node, self.indexfile, _('no node'))
1388 raise error.LookupError(node, self.indexfile, _('no node'))
1387 except AttributeError:
1389 except AttributeError:
1388 # Fall through to pure code
1390 # Fall through to pure code
1389 pass
1391 pass
1390
1392
1391 if node == wdirid:
1393 if node == wdirid:
1392 for length in range(minlength, 41):
1394 for length in range(minlength, 41):
1393 prefix = hexnode[:length]
1395 prefix = hexnode[:length]
1394 if isvalid(prefix):
1396 if isvalid(prefix):
1395 return prefix
1397 return prefix
1396
1398
1397 for length in range(minlength, 41):
1399 for length in range(minlength, 41):
1398 prefix = hexnode[:length]
1400 prefix = hexnode[:length]
1399 if isvalid(prefix):
1401 if isvalid(prefix):
1400 return disambiguate(hexnode, length)
1402 return disambiguate(hexnode, length)
1401
1403
1402 def cmp(self, node, text):
1404 def cmp(self, node, text):
1403 """compare text with a given file revision
1405 """compare text with a given file revision
1404
1406
1405 returns True if text is different than what is stored.
1407 returns True if text is different than what is stored.
1406 """
1408 """
1407 p1, p2 = self.parents(node)
1409 p1, p2 = self.parents(node)
1408 return storageutil.hashrevisionsha1(text, p1, p2) != node
1410 return storageutil.hashrevisionsha1(text, p1, p2) != node
1409
1411
1410 def _cachesegment(self, offset, data):
1412 def _cachesegment(self, offset, data):
1411 """Add a segment to the revlog cache.
1413 """Add a segment to the revlog cache.
1412
1414
1413 Accepts an absolute offset and the data that is at that location.
1415 Accepts an absolute offset and the data that is at that location.
1414 """
1416 """
1415 o, d = self._chunkcache
1417 o, d = self._chunkcache
1416 # try to add to existing cache
1418 # try to add to existing cache
1417 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1419 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1418 self._chunkcache = o, d + data
1420 self._chunkcache = o, d + data
1419 else:
1421 else:
1420 self._chunkcache = offset, data
1422 self._chunkcache = offset, data
1421
1423
1422 def _readsegment(self, offset, length, df=None):
1424 def _readsegment(self, offset, length, df=None):
1423 """Load a segment of raw data from the revlog.
1425 """Load a segment of raw data from the revlog.
1424
1426
1425 Accepts an absolute offset, length to read, and an optional existing
1427 Accepts an absolute offset, length to read, and an optional existing
1426 file handle to read from.
1428 file handle to read from.
1427
1429
1428 If an existing file handle is passed, it will be seeked and the
1430 If an existing file handle is passed, it will be seeked and the
1429 original seek position will NOT be restored.
1431 original seek position will NOT be restored.
1430
1432
1431 Returns a str or buffer of raw byte data.
1433 Returns a str or buffer of raw byte data.
1432
1434
1433 Raises if the requested number of bytes could not be read.
1435 Raises if the requested number of bytes could not be read.
1434 """
1436 """
1435 # Cache data both forward and backward around the requested
1437 # Cache data both forward and backward around the requested
1436 # data, in a fixed size window. This helps speed up operations
1438 # data, in a fixed size window. This helps speed up operations
1437 # involving reading the revlog backwards.
1439 # involving reading the revlog backwards.
1438 cachesize = self._chunkcachesize
1440 cachesize = self._chunkcachesize
1439 realoffset = offset & ~(cachesize - 1)
1441 realoffset = offset & ~(cachesize - 1)
1440 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1442 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1441 - realoffset)
1443 - realoffset)
1442 with self._datareadfp(df) as df:
1444 with self._datareadfp(df) as df:
1443 df.seek(realoffset)
1445 df.seek(realoffset)
1444 d = df.read(reallength)
1446 d = df.read(reallength)
1445
1447
1446 self._cachesegment(realoffset, d)
1448 self._cachesegment(realoffset, d)
1447 if offset != realoffset or reallength != length:
1449 if offset != realoffset or reallength != length:
1448 startoffset = offset - realoffset
1450 startoffset = offset - realoffset
1449 if len(d) - startoffset < length:
1451 if len(d) - startoffset < length:
1450 raise error.RevlogError(
1452 raise error.RevlogError(
1451 _('partial read of revlog %s; expected %d bytes from '
1453 _('partial read of revlog %s; expected %d bytes from '
1452 'offset %d, got %d') %
1454 'offset %d, got %d') %
1453 (self.indexfile if self._inline else self.datafile,
1455 (self.indexfile if self._inline else self.datafile,
1454 length, realoffset, len(d) - startoffset))
1456 length, realoffset, len(d) - startoffset))
1455
1457
1456 return util.buffer(d, startoffset, length)
1458 return util.buffer(d, startoffset, length)
1457
1459
1458 if len(d) < length:
1460 if len(d) < length:
1459 raise error.RevlogError(
1461 raise error.RevlogError(
1460 _('partial read of revlog %s; expected %d bytes from offset '
1462 _('partial read of revlog %s; expected %d bytes from offset '
1461 '%d, got %d') %
1463 '%d, got %d') %
1462 (self.indexfile if self._inline else self.datafile,
1464 (self.indexfile if self._inline else self.datafile,
1463 length, offset, len(d)))
1465 length, offset, len(d)))
1464
1466
1465 return d
1467 return d
1466
1468
1467 def _getsegment(self, offset, length, df=None):
1469 def _getsegment(self, offset, length, df=None):
1468 """Obtain a segment of raw data from the revlog.
1470 """Obtain a segment of raw data from the revlog.
1469
1471
1470 Accepts an absolute offset, length of bytes to obtain, and an
1472 Accepts an absolute offset, length of bytes to obtain, and an
1471 optional file handle to the already-opened revlog. If the file
1473 optional file handle to the already-opened revlog. If the file
1472 handle is used, it's original seek position will not be preserved.
1474 handle is used, it's original seek position will not be preserved.
1473
1475
1474 Requests for data may be returned from a cache.
1476 Requests for data may be returned from a cache.
1475
1477
1476 Returns a str or a buffer instance of raw byte data.
1478 Returns a str or a buffer instance of raw byte data.
1477 """
1479 """
1478 o, d = self._chunkcache
1480 o, d = self._chunkcache
1479 l = len(d)
1481 l = len(d)
1480
1482
1481 # is it in the cache?
1483 # is it in the cache?
1482 cachestart = offset - o
1484 cachestart = offset - o
1483 cacheend = cachestart + length
1485 cacheend = cachestart + length
1484 if cachestart >= 0 and cacheend <= l:
1486 if cachestart >= 0 and cacheend <= l:
1485 if cachestart == 0 and cacheend == l:
1487 if cachestart == 0 and cacheend == l:
1486 return d # avoid a copy
1488 return d # avoid a copy
1487 return util.buffer(d, cachestart, cacheend - cachestart)
1489 return util.buffer(d, cachestart, cacheend - cachestart)
1488
1490
1489 return self._readsegment(offset, length, df=df)
1491 return self._readsegment(offset, length, df=df)
1490
1492
1491 def _getsegmentforrevs(self, startrev, endrev, df=None):
1493 def _getsegmentforrevs(self, startrev, endrev, df=None):
1492 """Obtain a segment of raw data corresponding to a range of revisions.
1494 """Obtain a segment of raw data corresponding to a range of revisions.
1493
1495
1494 Accepts the start and end revisions and an optional already-open
1496 Accepts the start and end revisions and an optional already-open
1495 file handle to be used for reading. If the file handle is read, its
1497 file handle to be used for reading. If the file handle is read, its
1496 seek position will not be preserved.
1498 seek position will not be preserved.
1497
1499
1498 Requests for data may be satisfied by a cache.
1500 Requests for data may be satisfied by a cache.
1499
1501
1500 Returns a 2-tuple of (offset, data) for the requested range of
1502 Returns a 2-tuple of (offset, data) for the requested range of
1501 revisions. Offset is the integer offset from the beginning of the
1503 revisions. Offset is the integer offset from the beginning of the
1502 revlog and data is a str or buffer of the raw byte data.
1504 revlog and data is a str or buffer of the raw byte data.
1503
1505
1504 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1506 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1505 to determine where each revision's data begins and ends.
1507 to determine where each revision's data begins and ends.
1506 """
1508 """
1507 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1509 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1508 # (functions are expensive).
1510 # (functions are expensive).
1509 index = self.index
1511 index = self.index
1510 istart = index[startrev]
1512 istart = index[startrev]
1511 start = int(istart[0] >> 16)
1513 start = int(istart[0] >> 16)
1512 if startrev == endrev:
1514 if startrev == endrev:
1513 end = start + istart[1]
1515 end = start + istart[1]
1514 else:
1516 else:
1515 iend = index[endrev]
1517 iend = index[endrev]
1516 end = int(iend[0] >> 16) + iend[1]
1518 end = int(iend[0] >> 16) + iend[1]
1517
1519
1518 if self._inline:
1520 if self._inline:
1519 start += (startrev + 1) * self._io.size
1521 start += (startrev + 1) * self._io.size
1520 end += (endrev + 1) * self._io.size
1522 end += (endrev + 1) * self._io.size
1521 length = end - start
1523 length = end - start
1522
1524
1523 return start, self._getsegment(start, length, df=df)
1525 return start, self._getsegment(start, length, df=df)
1524
1526
1525 def _chunk(self, rev, df=None):
1527 def _chunk(self, rev, df=None):
1526 """Obtain a single decompressed chunk for a revision.
1528 """Obtain a single decompressed chunk for a revision.
1527
1529
1528 Accepts an integer revision and an optional already-open file handle
1530 Accepts an integer revision and an optional already-open file handle
1529 to be used for reading. If used, the seek position of the file will not
1531 to be used for reading. If used, the seek position of the file will not
1530 be preserved.
1532 be preserved.
1531
1533
1532 Returns a str holding uncompressed data for the requested revision.
1534 Returns a str holding uncompressed data for the requested revision.
1533 """
1535 """
1534 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1536 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1535
1537
1536 def _chunks(self, revs, df=None, targetsize=None):
1538 def _chunks(self, revs, df=None, targetsize=None):
1537 """Obtain decompressed chunks for the specified revisions.
1539 """Obtain decompressed chunks for the specified revisions.
1538
1540
1539 Accepts an iterable of numeric revisions that are assumed to be in
1541 Accepts an iterable of numeric revisions that are assumed to be in
1540 ascending order. Also accepts an optional already-open file handle
1542 ascending order. Also accepts an optional already-open file handle
1541 to be used for reading. If used, the seek position of the file will
1543 to be used for reading. If used, the seek position of the file will
1542 not be preserved.
1544 not be preserved.
1543
1545
1544 This function is similar to calling ``self._chunk()`` multiple times,
1546 This function is similar to calling ``self._chunk()`` multiple times,
1545 but is faster.
1547 but is faster.
1546
1548
1547 Returns a list with decompressed data for each requested revision.
1549 Returns a list with decompressed data for each requested revision.
1548 """
1550 """
1549 if not revs:
1551 if not revs:
1550 return []
1552 return []
1551 start = self.start
1553 start = self.start
1552 length = self.length
1554 length = self.length
1553 inline = self._inline
1555 inline = self._inline
1554 iosize = self._io.size
1556 iosize = self._io.size
1555 buffer = util.buffer
1557 buffer = util.buffer
1556
1558
1557 l = []
1559 l = []
1558 ladd = l.append
1560 ladd = l.append
1559
1561
1560 if not self._withsparseread:
1562 if not self._withsparseread:
1561 slicedchunks = (revs,)
1563 slicedchunks = (revs,)
1562 else:
1564 else:
1563 slicedchunks = deltautil.slicechunk(self, revs,
1565 slicedchunks = deltautil.slicechunk(self, revs,
1564 targetsize=targetsize)
1566 targetsize=targetsize)
1565
1567
1566 for revschunk in slicedchunks:
1568 for revschunk in slicedchunks:
1567 firstrev = revschunk[0]
1569 firstrev = revschunk[0]
1568 # Skip trailing revisions with empty diff
1570 # Skip trailing revisions with empty diff
1569 for lastrev in revschunk[::-1]:
1571 for lastrev in revschunk[::-1]:
1570 if length(lastrev) != 0:
1572 if length(lastrev) != 0:
1571 break
1573 break
1572
1574
1573 try:
1575 try:
1574 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1576 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1575 except OverflowError:
1577 except OverflowError:
1576 # issue4215 - we can't cache a run of chunks greater than
1578 # issue4215 - we can't cache a run of chunks greater than
1577 # 2G on Windows
1579 # 2G on Windows
1578 return [self._chunk(rev, df=df) for rev in revschunk]
1580 return [self._chunk(rev, df=df) for rev in revschunk]
1579
1581
1580 decomp = self.decompress
1582 decomp = self.decompress
1581 for rev in revschunk:
1583 for rev in revschunk:
1582 chunkstart = start(rev)
1584 chunkstart = start(rev)
1583 if inline:
1585 if inline:
1584 chunkstart += (rev + 1) * iosize
1586 chunkstart += (rev + 1) * iosize
1585 chunklength = length(rev)
1587 chunklength = length(rev)
1586 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1588 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1587
1589
1588 return l
1590 return l
1589
1591
1590 def _chunkclear(self):
1592 def _chunkclear(self):
1591 """Clear the raw chunk cache."""
1593 """Clear the raw chunk cache."""
1592 self._chunkcache = (0, '')
1594 self._chunkcache = (0, '')
1593
1595
1594 def deltaparent(self, rev):
1596 def deltaparent(self, rev):
1595 """return deltaparent of the given revision"""
1597 """return deltaparent of the given revision"""
1596 base = self.index[rev][3]
1598 base = self.index[rev][3]
1597 if base == rev:
1599 if base == rev:
1598 return nullrev
1600 return nullrev
1599 elif self._generaldelta:
1601 elif self._generaldelta:
1600 return base
1602 return base
1601 else:
1603 else:
1602 return rev - 1
1604 return rev - 1
1603
1605
1604 def issnapshot(self, rev):
1606 def issnapshot(self, rev):
1605 """tells whether rev is a snapshot
1607 """tells whether rev is a snapshot
1606 """
1608 """
1607 if not self._sparserevlog:
1609 if not self._sparserevlog:
1608 return self.deltaparent(rev) == nullrev
1610 return self.deltaparent(rev) == nullrev
1609 elif util.safehasattr(self.index, 'issnapshot'):
1611 elif util.safehasattr(self.index, 'issnapshot'):
1610 # directly assign the method to cache the testing and access
1612 # directly assign the method to cache the testing and access
1611 self.issnapshot = self.index.issnapshot
1613 self.issnapshot = self.index.issnapshot
1612 return self.issnapshot(rev)
1614 return self.issnapshot(rev)
1613 if rev == nullrev:
1615 if rev == nullrev:
1614 return True
1616 return True
1615 entry = self.index[rev]
1617 entry = self.index[rev]
1616 base = entry[3]
1618 base = entry[3]
1617 if base == rev:
1619 if base == rev:
1618 return True
1620 return True
1619 if base == nullrev:
1621 if base == nullrev:
1620 return True
1622 return True
1621 p1 = entry[5]
1623 p1 = entry[5]
1622 p2 = entry[6]
1624 p2 = entry[6]
1623 if base == p1 or base == p2:
1625 if base == p1 or base == p2:
1624 return False
1626 return False
1625 return self.issnapshot(base)
1627 return self.issnapshot(base)
1626
1628
1627 def snapshotdepth(self, rev):
1629 def snapshotdepth(self, rev):
1628 """number of snapshot in the chain before this one"""
1630 """number of snapshot in the chain before this one"""
1629 if not self.issnapshot(rev):
1631 if not self.issnapshot(rev):
1630 raise error.ProgrammingError('revision %d not a snapshot')
1632 raise error.ProgrammingError('revision %d not a snapshot')
1631 return len(self._deltachain(rev)[0]) - 1
1633 return len(self._deltachain(rev)[0]) - 1
1632
1634
1633 def revdiff(self, rev1, rev2):
1635 def revdiff(self, rev1, rev2):
1634 """return or calculate a delta between two revisions
1636 """return or calculate a delta between two revisions
1635
1637
1636 The delta calculated is in binary form and is intended to be written to
1638 The delta calculated is in binary form and is intended to be written to
1637 revlog data directly. So this function needs raw revision data.
1639 revlog data directly. So this function needs raw revision data.
1638 """
1640 """
1639 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1641 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1640 return bytes(self._chunk(rev2))
1642 return bytes(self._chunk(rev2))
1641
1643
1642 return mdiff.textdiff(self.revision(rev1, raw=True),
1644 return mdiff.textdiff(self.revision(rev1, raw=True),
1643 self.revision(rev2, raw=True))
1645 self.revision(rev2, raw=True))
1644
1646
1645 def revision(self, nodeorrev, _df=None, raw=False):
1647 def revision(self, nodeorrev, _df=None, raw=False):
1646 """return an uncompressed revision of a given node or revision
1648 """return an uncompressed revision of a given node or revision
1647 number.
1649 number.
1648
1650
1649 _df - an existing file handle to read from. (internal-only)
1651 _df - an existing file handle to read from. (internal-only)
1650 raw - an optional argument specifying if the revision data is to be
1652 raw - an optional argument specifying if the revision data is to be
1651 treated as raw data when applying flag transforms. 'raw' should be set
1653 treated as raw data when applying flag transforms. 'raw' should be set
1652 to True when generating changegroups or in debug commands.
1654 to True when generating changegroups or in debug commands.
1653 """
1655 """
1654 return self._revisiondata(nodeorrev, _df, raw=raw)
1656 return self._revisiondata(nodeorrev, _df, raw=raw)
1655
1657
1656 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1658 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1657 if isinstance(nodeorrev, int):
1659 if isinstance(nodeorrev, int):
1658 rev = nodeorrev
1660 rev = nodeorrev
1659 node = self.node(rev)
1661 node = self.node(rev)
1660 else:
1662 else:
1661 node = nodeorrev
1663 node = nodeorrev
1662 rev = None
1664 rev = None
1663
1665
1664 cachedrev = None
1666 cachedrev = None
1665 flags = None
1667 flags = None
1666 rawtext = None
1668 rawtext = None
1667 if node == nullid:
1669 if node == nullid:
1668 return ""
1670 return ""
1669 if self._revisioncache:
1671 if self._revisioncache:
1670 if self._revisioncache[0] == node:
1672 if self._revisioncache[0] == node:
1671 # _cache only stores rawtext
1673 # _cache only stores rawtext
1672 if raw:
1674 if raw:
1673 return self._revisioncache[2]
1675 return self._revisioncache[2]
1674 # duplicated, but good for perf
1676 # duplicated, but good for perf
1675 if rev is None:
1677 if rev is None:
1676 rev = self.rev(node)
1678 rev = self.rev(node)
1677 if flags is None:
1679 if flags is None:
1678 flags = self.flags(rev)
1680 flags = self.flags(rev)
1679 # no extra flags set, no flag processor runs, text = rawtext
1681 # no extra flags set, no flag processor runs, text = rawtext
1680 if flags == REVIDX_DEFAULT_FLAGS:
1682 if flags == REVIDX_DEFAULT_FLAGS:
1681 return self._revisioncache[2]
1683 return self._revisioncache[2]
1682 # rawtext is reusable. need to run flag processor
1684 # rawtext is reusable. need to run flag processor
1683 rawtext = self._revisioncache[2]
1685 rawtext = self._revisioncache[2]
1684
1686
1685 cachedrev = self._revisioncache[1]
1687 cachedrev = self._revisioncache[1]
1686
1688
1687 # look up what we need to read
1689 # look up what we need to read
1688 if rawtext is None:
1690 if rawtext is None:
1689 if rev is None:
1691 if rev is None:
1690 rev = self.rev(node)
1692 rev = self.rev(node)
1691
1693
1692 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1694 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1693 if stopped:
1695 if stopped:
1694 rawtext = self._revisioncache[2]
1696 rawtext = self._revisioncache[2]
1695
1697
1696 # drop cache to save memory
1698 # drop cache to save memory
1697 self._revisioncache = None
1699 self._revisioncache = None
1698
1700
1699 targetsize = None
1701 targetsize = None
1700 rawsize = self.index[rev][2]
1702 rawsize = self.index[rev][2]
1701 if 0 <= rawsize:
1703 if 0 <= rawsize:
1702 targetsize = 4 * rawsize
1704 targetsize = 4 * rawsize
1703
1705
1704 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1706 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1705 if rawtext is None:
1707 if rawtext is None:
1706 rawtext = bytes(bins[0])
1708 rawtext = bytes(bins[0])
1707 bins = bins[1:]
1709 bins = bins[1:]
1708
1710
1709 rawtext = mdiff.patches(rawtext, bins)
1711 rawtext = mdiff.patches(rawtext, bins)
1710 self._revisioncache = (node, rev, rawtext)
1712 self._revisioncache = (node, rev, rawtext)
1711
1713
1712 if flags is None:
1714 if flags is None:
1713 if rev is None:
1715 if rev is None:
1714 rev = self.rev(node)
1716 rev = self.rev(node)
1715 flags = self.flags(rev)
1717 flags = self.flags(rev)
1716
1718
1717 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1719 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1718 if validatehash:
1720 if validatehash:
1719 self.checkhash(text, node, rev=rev)
1721 self.checkhash(text, node, rev=rev)
1720
1722
1721 return text
1723 return text
1722
1724
1723 def rawdata(self, nodeorrev, _df=None, raw=False):
1725 def rawdata(self, nodeorrev, _df=None, raw=False):
1724 """return an uncompressed raw data of a given node or revision number.
1726 """return an uncompressed raw data of a given node or revision number.
1725
1727
1726 _df - an existing file handle to read from. (internal-only)
1728 _df - an existing file handle to read from. (internal-only)
1727 """
1729 """
1728 return self._revisiondata(nodeorrev, _df, raw=True)
1730 return self._revisiondata(nodeorrev, _df, raw=True)
1729
1731
1730 def hash(self, text, p1, p2):
1732 def hash(self, text, p1, p2):
1731 """Compute a node hash.
1733 """Compute a node hash.
1732
1734
1733 Available as a function so that subclasses can replace the hash
1735 Available as a function so that subclasses can replace the hash
1734 as needed.
1736 as needed.
1735 """
1737 """
1736 return storageutil.hashrevisionsha1(text, p1, p2)
1738 return storageutil.hashrevisionsha1(text, p1, p2)
1737
1739
1738 def _processflags(self, text, flags, operation, raw=False):
1740 def _processflags(self, text, flags, operation, raw=False):
1739 """Inspect revision data flags and applies transforms defined by
1741 """Inspect revision data flags and applies transforms defined by
1740 registered flag processors.
1742 registered flag processors.
1741
1743
1742 ``text`` - the revision data to process
1744 ``text`` - the revision data to process
1743 ``flags`` - the revision flags
1745 ``flags`` - the revision flags
1744 ``operation`` - the operation being performed (read or write)
1746 ``operation`` - the operation being performed (read or write)
1745 ``raw`` - an optional argument describing if the raw transform should be
1747 ``raw`` - an optional argument describing if the raw transform should be
1746 applied.
1748 applied.
1747
1749
1748 This method processes the flags in the order (or reverse order if
1750 This method processes the flags in the order (or reverse order if
1749 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1751 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1750 flag processors registered for present flags. The order of flags defined
1752 flag processors registered for present flags. The order of flags defined
1751 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1753 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1752
1754
1753 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1755 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1754 processed text and ``validatehash`` is a bool indicating whether the
1756 processed text and ``validatehash`` is a bool indicating whether the
1755 returned text should be checked for hash integrity.
1757 returned text should be checked for hash integrity.
1756
1758
1757 Note: If the ``raw`` argument is set, it has precedence over the
1759 Note: If the ``raw`` argument is set, it has precedence over the
1758 operation and will only update the value of ``validatehash``.
1760 operation and will only update the value of ``validatehash``.
1759 """
1761 """
1760 # fast path: no flag processors will run
1762 # fast path: no flag processors will run
1761 if flags == 0:
1763 if flags == 0:
1762 return text, True
1764 return text, True
1763 if not operation in ('read', 'write'):
1765 if not operation in ('read', 'write'):
1764 raise error.ProgrammingError(_("invalid '%s' operation") %
1766 raise error.ProgrammingError(_("invalid '%s' operation") %
1765 operation)
1767 operation)
1766 # Check all flags are known.
1768 # Check all flags are known.
1767 if flags & ~REVIDX_KNOWN_FLAGS:
1769 if flags & ~REVIDX_KNOWN_FLAGS:
1768 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1770 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1769 (flags & ~REVIDX_KNOWN_FLAGS))
1771 (flags & ~REVIDX_KNOWN_FLAGS))
1770 validatehash = True
1772 validatehash = True
1771 # Depending on the operation (read or write), the order might be
1773 # Depending on the operation (read or write), the order might be
1772 # reversed due to non-commutative transforms.
1774 # reversed due to non-commutative transforms.
1773 orderedflags = REVIDX_FLAGS_ORDER
1775 orderedflags = REVIDX_FLAGS_ORDER
1774 if operation == 'write':
1776 if operation == 'write':
1775 orderedflags = reversed(orderedflags)
1777 orderedflags = reversed(orderedflags)
1776
1778
1777 for flag in orderedflags:
1779 for flag in orderedflags:
1778 # If a flagprocessor has been registered for a known flag, apply the
1780 # If a flagprocessor has been registered for a known flag, apply the
1779 # related operation transform and update result tuple.
1781 # related operation transform and update result tuple.
1780 if flag & flags:
1782 if flag & flags:
1781 vhash = True
1783 vhash = True
1782
1784
1783 if flag not in self._flagprocessors:
1785 if flag not in self._flagprocessors:
1784 message = _("missing processor for flag '%#x'") % (flag)
1786 message = _("missing processor for flag '%#x'") % (flag)
1785 raise error.RevlogError(message)
1787 raise error.RevlogError(message)
1786
1788
1787 processor = self._flagprocessors[flag]
1789 processor = self._flagprocessors[flag]
1788 if processor is not None:
1790 if processor is not None:
1789 readtransform, writetransform, rawtransform = processor
1791 readtransform, writetransform, rawtransform = processor
1790
1792
1791 if raw:
1793 if raw:
1792 vhash = rawtransform(self, text)
1794 vhash = rawtransform(self, text)
1793 elif operation == 'read':
1795 elif operation == 'read':
1794 text, vhash = readtransform(self, text)
1796 text, vhash = readtransform(self, text)
1795 else: # write operation
1797 else: # write operation
1796 text, vhash = writetransform(self, text)
1798 text, vhash = writetransform(self, text)
1797 validatehash = validatehash and vhash
1799 validatehash = validatehash and vhash
1798
1800
1799 return text, validatehash
1801 return text, validatehash
1800
1802
1801 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1803 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1802 """Check node hash integrity.
1804 """Check node hash integrity.
1803
1805
1804 Available as a function so that subclasses can extend hash mismatch
1806 Available as a function so that subclasses can extend hash mismatch
1805 behaviors as needed.
1807 behaviors as needed.
1806 """
1808 """
1807 try:
1809 try:
1808 if p1 is None and p2 is None:
1810 if p1 is None and p2 is None:
1809 p1, p2 = self.parents(node)
1811 p1, p2 = self.parents(node)
1810 if node != self.hash(text, p1, p2):
1812 if node != self.hash(text, p1, p2):
1811 # Clear the revision cache on hash failure. The revision cache
1813 # Clear the revision cache on hash failure. The revision cache
1812 # only stores the raw revision and clearing the cache does have
1814 # only stores the raw revision and clearing the cache does have
1813 # the side-effect that we won't have a cache hit when the raw
1815 # the side-effect that we won't have a cache hit when the raw
1814 # revision data is accessed. But this case should be rare and
1816 # revision data is accessed. But this case should be rare and
1815 # it is extra work to teach the cache about the hash
1817 # it is extra work to teach the cache about the hash
1816 # verification state.
1818 # verification state.
1817 if self._revisioncache and self._revisioncache[0] == node:
1819 if self._revisioncache and self._revisioncache[0] == node:
1818 self._revisioncache = None
1820 self._revisioncache = None
1819
1821
1820 revornode = rev
1822 revornode = rev
1821 if revornode is None:
1823 if revornode is None:
1822 revornode = templatefilters.short(hex(node))
1824 revornode = templatefilters.short(hex(node))
1823 raise error.RevlogError(_("integrity check failed on %s:%s")
1825 raise error.RevlogError(_("integrity check failed on %s:%s")
1824 % (self.indexfile, pycompat.bytestr(revornode)))
1826 % (self.indexfile, pycompat.bytestr(revornode)))
1825 except error.RevlogError:
1827 except error.RevlogError:
1826 if self._censorable and storageutil.iscensoredtext(text):
1828 if self._censorable and storageutil.iscensoredtext(text):
1827 raise error.CensoredNodeError(self.indexfile, node, text)
1829 raise error.CensoredNodeError(self.indexfile, node, text)
1828 raise
1830 raise
1829
1831
1830 def _enforceinlinesize(self, tr, fp=None):
1832 def _enforceinlinesize(self, tr, fp=None):
1831 """Check if the revlog is too big for inline and convert if so.
1833 """Check if the revlog is too big for inline and convert if so.
1832
1834
1833 This should be called after revisions are added to the revlog. If the
1835 This should be called after revisions are added to the revlog. If the
1834 revlog has grown too large to be an inline revlog, it will convert it
1836 revlog has grown too large to be an inline revlog, it will convert it
1835 to use multiple index and data files.
1837 to use multiple index and data files.
1836 """
1838 """
1837 tiprev = len(self) - 1
1839 tiprev = len(self) - 1
1838 if (not self._inline or
1840 if (not self._inline or
1839 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1841 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1840 return
1842 return
1841
1843
1842 trinfo = tr.find(self.indexfile)
1844 trinfo = tr.find(self.indexfile)
1843 if trinfo is None:
1845 if trinfo is None:
1844 raise error.RevlogError(_("%s not found in the transaction")
1846 raise error.RevlogError(_("%s not found in the transaction")
1845 % self.indexfile)
1847 % self.indexfile)
1846
1848
1847 trindex = trinfo[2]
1849 trindex = trinfo[2]
1848 if trindex is not None:
1850 if trindex is not None:
1849 dataoff = self.start(trindex)
1851 dataoff = self.start(trindex)
1850 else:
1852 else:
1851 # revlog was stripped at start of transaction, use all leftover data
1853 # revlog was stripped at start of transaction, use all leftover data
1852 trindex = len(self) - 1
1854 trindex = len(self) - 1
1853 dataoff = self.end(tiprev)
1855 dataoff = self.end(tiprev)
1854
1856
1855 tr.add(self.datafile, dataoff)
1857 tr.add(self.datafile, dataoff)
1856
1858
1857 if fp:
1859 if fp:
1858 fp.flush()
1860 fp.flush()
1859 fp.close()
1861 fp.close()
1860 # We can't use the cached file handle after close(). So prevent
1862 # We can't use the cached file handle after close(). So prevent
1861 # its usage.
1863 # its usage.
1862 self._writinghandles = None
1864 self._writinghandles = None
1863
1865
1864 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1866 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1865 for r in self:
1867 for r in self:
1866 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1868 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1867
1869
1868 with self._indexfp('w') as fp:
1870 with self._indexfp('w') as fp:
1869 self.version &= ~FLAG_INLINE_DATA
1871 self.version &= ~FLAG_INLINE_DATA
1870 self._inline = False
1872 self._inline = False
1871 io = self._io
1873 io = self._io
1872 for i in self:
1874 for i in self:
1873 e = io.packentry(self.index[i], self.node, self.version, i)
1875 e = io.packentry(self.index[i], self.node, self.version, i)
1874 fp.write(e)
1876 fp.write(e)
1875
1877
1876 # the temp file replace the real index when we exit the context
1878 # the temp file replace the real index when we exit the context
1877 # manager
1879 # manager
1878
1880
1879 tr.replace(self.indexfile, trindex * self._io.size)
1881 tr.replace(self.indexfile, trindex * self._io.size)
1880 self._chunkclear()
1882 self._chunkclear()
1881
1883
1882 def _nodeduplicatecallback(self, transaction, node):
1884 def _nodeduplicatecallback(self, transaction, node):
1883 """called when trying to add a node already stored.
1885 """called when trying to add a node already stored.
1884 """
1886 """
1885
1887
1886 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1888 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1887 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1889 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1888 """add a revision to the log
1890 """add a revision to the log
1889
1891
1890 text - the revision data to add
1892 text - the revision data to add
1891 transaction - the transaction object used for rollback
1893 transaction - the transaction object used for rollback
1892 link - the linkrev data to add
1894 link - the linkrev data to add
1893 p1, p2 - the parent nodeids of the revision
1895 p1, p2 - the parent nodeids of the revision
1894 cachedelta - an optional precomputed delta
1896 cachedelta - an optional precomputed delta
1895 node - nodeid of revision; typically node is not specified, and it is
1897 node - nodeid of revision; typically node is not specified, and it is
1896 computed by default as hash(text, p1, p2), however subclasses might
1898 computed by default as hash(text, p1, p2), however subclasses might
1897 use different hashing method (and override checkhash() in such case)
1899 use different hashing method (and override checkhash() in such case)
1898 flags - the known flags to set on the revision
1900 flags - the known flags to set on the revision
1899 deltacomputer - an optional deltacomputer instance shared between
1901 deltacomputer - an optional deltacomputer instance shared between
1900 multiple calls
1902 multiple calls
1901 """
1903 """
1902 if link == nullrev:
1904 if link == nullrev:
1903 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1905 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1904 % self.indexfile)
1906 % self.indexfile)
1905
1907
1906 if flags:
1908 if flags:
1907 node = node or self.hash(text, p1, p2)
1909 node = node or self.hash(text, p1, p2)
1908
1910
1909 rawtext, validatehash = self._processflags(text, flags, 'write')
1911 rawtext, validatehash = self._processflags(text, flags, 'write')
1910
1912
1911 # If the flag processor modifies the revision data, ignore any provided
1913 # If the flag processor modifies the revision data, ignore any provided
1912 # cachedelta.
1914 # cachedelta.
1913 if rawtext != text:
1915 if rawtext != text:
1914 cachedelta = None
1916 cachedelta = None
1915
1917
1916 if len(rawtext) > _maxentrysize:
1918 if len(rawtext) > _maxentrysize:
1917 raise error.RevlogError(
1919 raise error.RevlogError(
1918 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1920 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1919 % (self.indexfile, len(rawtext)))
1921 % (self.indexfile, len(rawtext)))
1920
1922
1921 node = node or self.hash(rawtext, p1, p2)
1923 node = node or self.hash(rawtext, p1, p2)
1922 if node in self.nodemap:
1924 if node in self.nodemap:
1923 return node
1925 return node
1924
1926
1925 if validatehash:
1927 if validatehash:
1926 self.checkhash(rawtext, node, p1=p1, p2=p2)
1928 self.checkhash(rawtext, node, p1=p1, p2=p2)
1927
1929
1928 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1930 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1929 flags, cachedelta=cachedelta,
1931 flags, cachedelta=cachedelta,
1930 deltacomputer=deltacomputer)
1932 deltacomputer=deltacomputer)
1931
1933
1932 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1934 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1933 cachedelta=None, deltacomputer=None):
1935 cachedelta=None, deltacomputer=None):
1934 """add a raw revision with known flags, node and parents
1936 """add a raw revision with known flags, node and parents
1935 useful when reusing a revision not stored in this revlog (ex: received
1937 useful when reusing a revision not stored in this revlog (ex: received
1936 over wire, or read from an external bundle).
1938 over wire, or read from an external bundle).
1937 """
1939 """
1938 dfh = None
1940 dfh = None
1939 if not self._inline:
1941 if not self._inline:
1940 dfh = self._datafp("a+")
1942 dfh = self._datafp("a+")
1941 ifh = self._indexfp("a+")
1943 ifh = self._indexfp("a+")
1942 try:
1944 try:
1943 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1945 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1944 flags, cachedelta, ifh, dfh,
1946 flags, cachedelta, ifh, dfh,
1945 deltacomputer=deltacomputer)
1947 deltacomputer=deltacomputer)
1946 finally:
1948 finally:
1947 if dfh:
1949 if dfh:
1948 dfh.close()
1950 dfh.close()
1949 ifh.close()
1951 ifh.close()
1950
1952
1951 def compress(self, data):
1953 def compress(self, data):
1952 """Generate a possibly-compressed representation of data."""
1954 """Generate a possibly-compressed representation of data."""
1953 if not data:
1955 if not data:
1954 return '', data
1956 return '', data
1955
1957
1956 compressed = self._compressor.compress(data)
1958 compressed = self._compressor.compress(data)
1957
1959
1958 if compressed:
1960 if compressed:
1959 # The revlog compressor added the header in the returned data.
1961 # The revlog compressor added the header in the returned data.
1960 return '', compressed
1962 return '', compressed
1961
1963
1962 if data[0:1] == '\0':
1964 if data[0:1] == '\0':
1963 return '', data
1965 return '', data
1964 return 'u', data
1966 return 'u', data
1965
1967
1966 def decompress(self, data):
1968 def decompress(self, data):
1967 """Decompress a revlog chunk.
1969 """Decompress a revlog chunk.
1968
1970
1969 The chunk is expected to begin with a header identifying the
1971 The chunk is expected to begin with a header identifying the
1970 format type so it can be routed to an appropriate decompressor.
1972 format type so it can be routed to an appropriate decompressor.
1971 """
1973 """
1972 if not data:
1974 if not data:
1973 return data
1975 return data
1974
1976
1975 # Revlogs are read much more frequently than they are written and many
1977 # Revlogs are read much more frequently than they are written and many
1976 # chunks only take microseconds to decompress, so performance is
1978 # chunks only take microseconds to decompress, so performance is
1977 # important here.
1979 # important here.
1978 #
1980 #
1979 # We can make a few assumptions about revlogs:
1981 # We can make a few assumptions about revlogs:
1980 #
1982 #
1981 # 1) the majority of chunks will be compressed (as opposed to inline
1983 # 1) the majority of chunks will be compressed (as opposed to inline
1982 # raw data).
1984 # raw data).
1983 # 2) decompressing *any* data will likely by at least 10x slower than
1985 # 2) decompressing *any* data will likely by at least 10x slower than
1984 # returning raw inline data.
1986 # returning raw inline data.
1985 # 3) we want to prioritize common and officially supported compression
1987 # 3) we want to prioritize common and officially supported compression
1986 # engines
1988 # engines
1987 #
1989 #
1988 # It follows that we want to optimize for "decompress compressed data
1990 # It follows that we want to optimize for "decompress compressed data
1989 # when encoded with common and officially supported compression engines"
1991 # when encoded with common and officially supported compression engines"
1990 # case over "raw data" and "data encoded by less common or non-official
1992 # case over "raw data" and "data encoded by less common or non-official
1991 # compression engines." That is why we have the inline lookup first
1993 # compression engines." That is why we have the inline lookup first
1992 # followed by the compengines lookup.
1994 # followed by the compengines lookup.
1993 #
1995 #
1994 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1996 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1995 # compressed chunks. And this matters for changelog and manifest reads.
1997 # compressed chunks. And this matters for changelog and manifest reads.
1996 t = data[0:1]
1998 t = data[0:1]
1997
1999
1998 if t == 'x':
2000 if t == 'x':
1999 try:
2001 try:
2000 return _zlibdecompress(data)
2002 return _zlibdecompress(data)
2001 except zlib.error as e:
2003 except zlib.error as e:
2002 raise error.RevlogError(_('revlog decompress error: %s') %
2004 raise error.RevlogError(_('revlog decompress error: %s') %
2003 stringutil.forcebytestr(e))
2005 stringutil.forcebytestr(e))
2004 # '\0' is more common than 'u' so it goes first.
2006 # '\0' is more common than 'u' so it goes first.
2005 elif t == '\0':
2007 elif t == '\0':
2006 return data
2008 return data
2007 elif t == 'u':
2009 elif t == 'u':
2008 return util.buffer(data, 1)
2010 return util.buffer(data, 1)
2009
2011
2010 try:
2012 try:
2011 compressor = self._decompressors[t]
2013 compressor = self._decompressors[t]
2012 except KeyError:
2014 except KeyError:
2013 try:
2015 try:
2014 engine = util.compengines.forrevlogheader(t)
2016 engine = util.compengines.forrevlogheader(t)
2015 compressor = engine.revlogcompressor(self._compengineopts)
2017 compressor = engine.revlogcompressor(self._compengineopts)
2016 self._decompressors[t] = compressor
2018 self._decompressors[t] = compressor
2017 except KeyError:
2019 except KeyError:
2018 raise error.RevlogError(_('unknown compression type %r') % t)
2020 raise error.RevlogError(_('unknown compression type %r') % t)
2019
2021
2020 return compressor.decompress(data)
2022 return compressor.decompress(data)
2021
2023
2022 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2024 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2023 cachedelta, ifh, dfh, alwayscache=False,
2025 cachedelta, ifh, dfh, alwayscache=False,
2024 deltacomputer=None):
2026 deltacomputer=None):
2025 """internal function to add revisions to the log
2027 """internal function to add revisions to the log
2026
2028
2027 see addrevision for argument descriptions.
2029 see addrevision for argument descriptions.
2028
2030
2029 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2031 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2030
2032
2031 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2033 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2032 be used.
2034 be used.
2033
2035
2034 invariants:
2036 invariants:
2035 - rawtext is optional (can be None); if not set, cachedelta must be set.
2037 - rawtext is optional (can be None); if not set, cachedelta must be set.
2036 if both are set, they must correspond to each other.
2038 if both are set, they must correspond to each other.
2037 """
2039 """
2038 if node == nullid:
2040 if node == nullid:
2039 raise error.RevlogError(_("%s: attempt to add null revision") %
2041 raise error.RevlogError(_("%s: attempt to add null revision") %
2040 self.indexfile)
2042 self.indexfile)
2041 if node == wdirid or node in wdirfilenodeids:
2043 if node == wdirid or node in wdirfilenodeids:
2042 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2044 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2043 self.indexfile)
2045 self.indexfile)
2044
2046
2045 if self._inline:
2047 if self._inline:
2046 fh = ifh
2048 fh = ifh
2047 else:
2049 else:
2048 fh = dfh
2050 fh = dfh
2049
2051
2050 btext = [rawtext]
2052 btext = [rawtext]
2051
2053
2052 curr = len(self)
2054 curr = len(self)
2053 prev = curr - 1
2055 prev = curr - 1
2054 offset = self.end(prev)
2056 offset = self.end(prev)
2055 p1r, p2r = self.rev(p1), self.rev(p2)
2057 p1r, p2r = self.rev(p1), self.rev(p2)
2056
2058
2057 # full versions are inserted when the needed deltas
2059 # full versions are inserted when the needed deltas
2058 # become comparable to the uncompressed text
2060 # become comparable to the uncompressed text
2059 if rawtext is None:
2061 if rawtext is None:
2060 # need rawtext size, before changed by flag processors, which is
2062 # need rawtext size, before changed by flag processors, which is
2061 # the non-raw size. use revlog explicitly to avoid filelog's extra
2063 # the non-raw size. use revlog explicitly to avoid filelog's extra
2062 # logic that might remove metadata size.
2064 # logic that might remove metadata size.
2063 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2065 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2064 cachedelta[1])
2066 cachedelta[1])
2065 else:
2067 else:
2066 textlen = len(rawtext)
2068 textlen = len(rawtext)
2067
2069
2068 if deltacomputer is None:
2070 if deltacomputer is None:
2069 deltacomputer = deltautil.deltacomputer(self)
2071 deltacomputer = deltautil.deltacomputer(self)
2070
2072
2071 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2073 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2072
2074
2073 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2075 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2074
2076
2075 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2077 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2076 deltainfo.base, link, p1r, p2r, node)
2078 deltainfo.base, link, p1r, p2r, node)
2077 self.index.append(e)
2079 self.index.append(e)
2078 self.nodemap[node] = curr
2080 self.nodemap[node] = curr
2079
2081
2080 # Reset the pure node cache start lookup offset to account for new
2082 # Reset the pure node cache start lookup offset to account for new
2081 # revision.
2083 # revision.
2082 if self._nodepos is not None:
2084 if self._nodepos is not None:
2083 self._nodepos = curr
2085 self._nodepos = curr
2084
2086
2085 entry = self._io.packentry(e, self.node, self.version, curr)
2087 entry = self._io.packentry(e, self.node, self.version, curr)
2086 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2088 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2087 link, offset)
2089 link, offset)
2088
2090
2089 rawtext = btext[0]
2091 rawtext = btext[0]
2090
2092
2091 if alwayscache and rawtext is None:
2093 if alwayscache and rawtext is None:
2092 rawtext = deltacomputer.buildtext(revinfo, fh)
2094 rawtext = deltacomputer.buildtext(revinfo, fh)
2093
2095
2094 if type(rawtext) == bytes: # only accept immutable objects
2096 if type(rawtext) == bytes: # only accept immutable objects
2095 self._revisioncache = (node, curr, rawtext)
2097 self._revisioncache = (node, curr, rawtext)
2096 self._chainbasecache[curr] = deltainfo.chainbase
2098 self._chainbasecache[curr] = deltainfo.chainbase
2097 return node
2099 return node
2098
2100
2099 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2101 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2100 # Files opened in a+ mode have inconsistent behavior on various
2102 # Files opened in a+ mode have inconsistent behavior on various
2101 # platforms. Windows requires that a file positioning call be made
2103 # platforms. Windows requires that a file positioning call be made
2102 # when the file handle transitions between reads and writes. See
2104 # when the file handle transitions between reads and writes. See
2103 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2105 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2104 # platforms, Python or the platform itself can be buggy. Some versions
2106 # platforms, Python or the platform itself can be buggy. Some versions
2105 # of Solaris have been observed to not append at the end of the file
2107 # of Solaris have been observed to not append at the end of the file
2106 # if the file was seeked to before the end. See issue4943 for more.
2108 # if the file was seeked to before the end. See issue4943 for more.
2107 #
2109 #
2108 # We work around this issue by inserting a seek() before writing.
2110 # We work around this issue by inserting a seek() before writing.
2109 # Note: This is likely not necessary on Python 3. However, because
2111 # Note: This is likely not necessary on Python 3. However, because
2110 # the file handle is reused for reads and may be seeked there, we need
2112 # the file handle is reused for reads and may be seeked there, we need
2111 # to be careful before changing this.
2113 # to be careful before changing this.
2112 ifh.seek(0, os.SEEK_END)
2114 ifh.seek(0, os.SEEK_END)
2113 if dfh:
2115 if dfh:
2114 dfh.seek(0, os.SEEK_END)
2116 dfh.seek(0, os.SEEK_END)
2115
2117
2116 curr = len(self) - 1
2118 curr = len(self) - 1
2117 if not self._inline:
2119 if not self._inline:
2118 transaction.add(self.datafile, offset)
2120 transaction.add(self.datafile, offset)
2119 transaction.add(self.indexfile, curr * len(entry))
2121 transaction.add(self.indexfile, curr * len(entry))
2120 if data[0]:
2122 if data[0]:
2121 dfh.write(data[0])
2123 dfh.write(data[0])
2122 dfh.write(data[1])
2124 dfh.write(data[1])
2123 ifh.write(entry)
2125 ifh.write(entry)
2124 else:
2126 else:
2125 offset += curr * self._io.size
2127 offset += curr * self._io.size
2126 transaction.add(self.indexfile, offset, curr)
2128 transaction.add(self.indexfile, offset, curr)
2127 ifh.write(entry)
2129 ifh.write(entry)
2128 ifh.write(data[0])
2130 ifh.write(data[0])
2129 ifh.write(data[1])
2131 ifh.write(data[1])
2130 self._enforceinlinesize(transaction, ifh)
2132 self._enforceinlinesize(transaction, ifh)
2131
2133
2132 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2134 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2133 """
2135 """
2134 add a delta group
2136 add a delta group
2135
2137
2136 given a set of deltas, add them to the revision log. the
2138 given a set of deltas, add them to the revision log. the
2137 first delta is against its parent, which should be in our
2139 first delta is against its parent, which should be in our
2138 log, the rest are against the previous delta.
2140 log, the rest are against the previous delta.
2139
2141
2140 If ``addrevisioncb`` is defined, it will be called with arguments of
2142 If ``addrevisioncb`` is defined, it will be called with arguments of
2141 this revlog and the node that was added.
2143 this revlog and the node that was added.
2142 """
2144 """
2143
2145
2144 if self._writinghandles:
2146 if self._writinghandles:
2145 raise error.ProgrammingError('cannot nest addgroup() calls')
2147 raise error.ProgrammingError('cannot nest addgroup() calls')
2146
2148
2147 nodes = []
2149 nodes = []
2148
2150
2149 r = len(self)
2151 r = len(self)
2150 end = 0
2152 end = 0
2151 if r:
2153 if r:
2152 end = self.end(r - 1)
2154 end = self.end(r - 1)
2153 ifh = self._indexfp("a+")
2155 ifh = self._indexfp("a+")
2154 isize = r * self._io.size
2156 isize = r * self._io.size
2155 if self._inline:
2157 if self._inline:
2156 transaction.add(self.indexfile, end + isize, r)
2158 transaction.add(self.indexfile, end + isize, r)
2157 dfh = None
2159 dfh = None
2158 else:
2160 else:
2159 transaction.add(self.indexfile, isize, r)
2161 transaction.add(self.indexfile, isize, r)
2160 transaction.add(self.datafile, end)
2162 transaction.add(self.datafile, end)
2161 dfh = self._datafp("a+")
2163 dfh = self._datafp("a+")
2162 def flush():
2164 def flush():
2163 if dfh:
2165 if dfh:
2164 dfh.flush()
2166 dfh.flush()
2165 ifh.flush()
2167 ifh.flush()
2166
2168
2167 self._writinghandles = (ifh, dfh)
2169 self._writinghandles = (ifh, dfh)
2168
2170
2169 try:
2171 try:
2170 deltacomputer = deltautil.deltacomputer(self)
2172 deltacomputer = deltautil.deltacomputer(self)
2171 # loop through our set of deltas
2173 # loop through our set of deltas
2172 for data in deltas:
2174 for data in deltas:
2173 node, p1, p2, linknode, deltabase, delta, flags = data
2175 node, p1, p2, linknode, deltabase, delta, flags = data
2174 link = linkmapper(linknode)
2176 link = linkmapper(linknode)
2175 flags = flags or REVIDX_DEFAULT_FLAGS
2177 flags = flags or REVIDX_DEFAULT_FLAGS
2176
2178
2177 nodes.append(node)
2179 nodes.append(node)
2178
2180
2179 if node in self.nodemap:
2181 if node in self.nodemap:
2180 self._nodeduplicatecallback(transaction, node)
2182 self._nodeduplicatecallback(transaction, node)
2181 # this can happen if two branches make the same change
2183 # this can happen if two branches make the same change
2182 continue
2184 continue
2183
2185
2184 for p in (p1, p2):
2186 for p in (p1, p2):
2185 if p not in self.nodemap:
2187 if p not in self.nodemap:
2186 raise error.LookupError(p, self.indexfile,
2188 raise error.LookupError(p, self.indexfile,
2187 _('unknown parent'))
2189 _('unknown parent'))
2188
2190
2189 if deltabase not in self.nodemap:
2191 if deltabase not in self.nodemap:
2190 raise error.LookupError(deltabase, self.indexfile,
2192 raise error.LookupError(deltabase, self.indexfile,
2191 _('unknown delta base'))
2193 _('unknown delta base'))
2192
2194
2193 baserev = self.rev(deltabase)
2195 baserev = self.rev(deltabase)
2194
2196
2195 if baserev != nullrev and self.iscensored(baserev):
2197 if baserev != nullrev and self.iscensored(baserev):
2196 # if base is censored, delta must be full replacement in a
2198 # if base is censored, delta must be full replacement in a
2197 # single patch operation
2199 # single patch operation
2198 hlen = struct.calcsize(">lll")
2200 hlen = struct.calcsize(">lll")
2199 oldlen = self.rawsize(baserev)
2201 oldlen = self.rawsize(baserev)
2200 newlen = len(delta) - hlen
2202 newlen = len(delta) - hlen
2201 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2203 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2202 raise error.CensoredBaseError(self.indexfile,
2204 raise error.CensoredBaseError(self.indexfile,
2203 self.node(baserev))
2205 self.node(baserev))
2204
2206
2205 if not flags and self._peek_iscensored(baserev, delta, flush):
2207 if not flags and self._peek_iscensored(baserev, delta, flush):
2206 flags |= REVIDX_ISCENSORED
2208 flags |= REVIDX_ISCENSORED
2207
2209
2208 # We assume consumers of addrevisioncb will want to retrieve
2210 # We assume consumers of addrevisioncb will want to retrieve
2209 # the added revision, which will require a call to
2211 # the added revision, which will require a call to
2210 # revision(). revision() will fast path if there is a cache
2212 # revision(). revision() will fast path if there is a cache
2211 # hit. So, we tell _addrevision() to always cache in this case.
2213 # hit. So, we tell _addrevision() to always cache in this case.
2212 # We're only using addgroup() in the context of changegroup
2214 # We're only using addgroup() in the context of changegroup
2213 # generation so the revision data can always be handled as raw
2215 # generation so the revision data can always be handled as raw
2214 # by the flagprocessor.
2216 # by the flagprocessor.
2215 self._addrevision(node, None, transaction, link,
2217 self._addrevision(node, None, transaction, link,
2216 p1, p2, flags, (baserev, delta),
2218 p1, p2, flags, (baserev, delta),
2217 ifh, dfh,
2219 ifh, dfh,
2218 alwayscache=bool(addrevisioncb),
2220 alwayscache=bool(addrevisioncb),
2219 deltacomputer=deltacomputer)
2221 deltacomputer=deltacomputer)
2220
2222
2221 if addrevisioncb:
2223 if addrevisioncb:
2222 addrevisioncb(self, node)
2224 addrevisioncb(self, node)
2223
2225
2224 if not dfh and not self._inline:
2226 if not dfh and not self._inline:
2225 # addrevision switched from inline to conventional
2227 # addrevision switched from inline to conventional
2226 # reopen the index
2228 # reopen the index
2227 ifh.close()
2229 ifh.close()
2228 dfh = self._datafp("a+")
2230 dfh = self._datafp("a+")
2229 ifh = self._indexfp("a+")
2231 ifh = self._indexfp("a+")
2230 self._writinghandles = (ifh, dfh)
2232 self._writinghandles = (ifh, dfh)
2231 finally:
2233 finally:
2232 self._writinghandles = None
2234 self._writinghandles = None
2233
2235
2234 if dfh:
2236 if dfh:
2235 dfh.close()
2237 dfh.close()
2236 ifh.close()
2238 ifh.close()
2237
2239
2238 return nodes
2240 return nodes
2239
2241
2240 def iscensored(self, rev):
2242 def iscensored(self, rev):
2241 """Check if a file revision is censored."""
2243 """Check if a file revision is censored."""
2242 if not self._censorable:
2244 if not self._censorable:
2243 return False
2245 return False
2244
2246
2245 return self.flags(rev) & REVIDX_ISCENSORED
2247 return self.flags(rev) & REVIDX_ISCENSORED
2246
2248
2247 def _peek_iscensored(self, baserev, delta, flush):
2249 def _peek_iscensored(self, baserev, delta, flush):
2248 """Quickly check if a delta produces a censored revision."""
2250 """Quickly check if a delta produces a censored revision."""
2249 if not self._censorable:
2251 if not self._censorable:
2250 return False
2252 return False
2251
2253
2252 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2254 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2253
2255
2254 def getstrippoint(self, minlink):
2256 def getstrippoint(self, minlink):
2255 """find the minimum rev that must be stripped to strip the linkrev
2257 """find the minimum rev that must be stripped to strip the linkrev
2256
2258
2257 Returns a tuple containing the minimum rev and a set of all revs that
2259 Returns a tuple containing the minimum rev and a set of all revs that
2258 have linkrevs that will be broken by this strip.
2260 have linkrevs that will be broken by this strip.
2259 """
2261 """
2260 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2262 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2261 self.headrevs(),
2263 self.headrevs(),
2262 self.linkrev, self.parentrevs)
2264 self.linkrev, self.parentrevs)
2263
2265
2264 def strip(self, minlink, transaction):
2266 def strip(self, minlink, transaction):
2265 """truncate the revlog on the first revision with a linkrev >= minlink
2267 """truncate the revlog on the first revision with a linkrev >= minlink
2266
2268
2267 This function is called when we're stripping revision minlink and
2269 This function is called when we're stripping revision minlink and
2268 its descendants from the repository.
2270 its descendants from the repository.
2269
2271
2270 We have to remove all revisions with linkrev >= minlink, because
2272 We have to remove all revisions with linkrev >= minlink, because
2271 the equivalent changelog revisions will be renumbered after the
2273 the equivalent changelog revisions will be renumbered after the
2272 strip.
2274 strip.
2273
2275
2274 So we truncate the revlog on the first of these revisions, and
2276 So we truncate the revlog on the first of these revisions, and
2275 trust that the caller has saved the revisions that shouldn't be
2277 trust that the caller has saved the revisions that shouldn't be
2276 removed and that it'll re-add them after this truncation.
2278 removed and that it'll re-add them after this truncation.
2277 """
2279 """
2278 if len(self) == 0:
2280 if len(self) == 0:
2279 return
2281 return
2280
2282
2281 rev, _ = self.getstrippoint(minlink)
2283 rev, _ = self.getstrippoint(minlink)
2282 if rev == len(self):
2284 if rev == len(self):
2283 return
2285 return
2284
2286
2285 # first truncate the files on disk
2287 # first truncate the files on disk
2286 end = self.start(rev)
2288 end = self.start(rev)
2287 if not self._inline:
2289 if not self._inline:
2288 transaction.add(self.datafile, end)
2290 transaction.add(self.datafile, end)
2289 end = rev * self._io.size
2291 end = rev * self._io.size
2290 else:
2292 else:
2291 end += rev * self._io.size
2293 end += rev * self._io.size
2292
2294
2293 transaction.add(self.indexfile, end)
2295 transaction.add(self.indexfile, end)
2294
2296
2295 # then reset internal state in memory to forget those revisions
2297 # then reset internal state in memory to forget those revisions
2296 self._revisioncache = None
2298 self._revisioncache = None
2297 self._chaininfocache = {}
2299 self._chaininfocache = {}
2298 self._chunkclear()
2300 self._chunkclear()
2299 for x in pycompat.xrange(rev, len(self)):
2301 for x in pycompat.xrange(rev, len(self)):
2300 del self.nodemap[self.node(x)]
2302 del self.nodemap[self.node(x)]
2301
2303
2302 del self.index[rev:-1]
2304 del self.index[rev:-1]
2303 self._nodepos = None
2305 self._nodepos = None
2304
2306
2305 def checksize(self):
2307 def checksize(self):
2306 """Check size of index and data files
2308 """Check size of index and data files
2307
2309
2308 return a (dd, di) tuple.
2310 return a (dd, di) tuple.
2309 - dd: extra bytes for the "data" file
2311 - dd: extra bytes for the "data" file
2310 - di: extra bytes for the "index" file
2312 - di: extra bytes for the "index" file
2311
2313
2312 A healthy revlog will return (0, 0).
2314 A healthy revlog will return (0, 0).
2313 """
2315 """
2314 expected = 0
2316 expected = 0
2315 if len(self):
2317 if len(self):
2316 expected = max(0, self.end(len(self) - 1))
2318 expected = max(0, self.end(len(self) - 1))
2317
2319
2318 try:
2320 try:
2319 with self._datafp() as f:
2321 with self._datafp() as f:
2320 f.seek(0, io.SEEK_END)
2322 f.seek(0, io.SEEK_END)
2321 actual = f.tell()
2323 actual = f.tell()
2322 dd = actual - expected
2324 dd = actual - expected
2323 except IOError as inst:
2325 except IOError as inst:
2324 if inst.errno != errno.ENOENT:
2326 if inst.errno != errno.ENOENT:
2325 raise
2327 raise
2326 dd = 0
2328 dd = 0
2327
2329
2328 try:
2330 try:
2329 f = self.opener(self.indexfile)
2331 f = self.opener(self.indexfile)
2330 f.seek(0, io.SEEK_END)
2332 f.seek(0, io.SEEK_END)
2331 actual = f.tell()
2333 actual = f.tell()
2332 f.close()
2334 f.close()
2333 s = self._io.size
2335 s = self._io.size
2334 i = max(0, actual // s)
2336 i = max(0, actual // s)
2335 di = actual - (i * s)
2337 di = actual - (i * s)
2336 if self._inline:
2338 if self._inline:
2337 databytes = 0
2339 databytes = 0
2338 for r in self:
2340 for r in self:
2339 databytes += max(0, self.length(r))
2341 databytes += max(0, self.length(r))
2340 dd = 0
2342 dd = 0
2341 di = actual - len(self) * s - databytes
2343 di = actual - len(self) * s - databytes
2342 except IOError as inst:
2344 except IOError as inst:
2343 if inst.errno != errno.ENOENT:
2345 if inst.errno != errno.ENOENT:
2344 raise
2346 raise
2345 di = 0
2347 di = 0
2346
2348
2347 return (dd, di)
2349 return (dd, di)
2348
2350
2349 def files(self):
2351 def files(self):
2350 res = [self.indexfile]
2352 res = [self.indexfile]
2351 if not self._inline:
2353 if not self._inline:
2352 res.append(self.datafile)
2354 res.append(self.datafile)
2353 return res
2355 return res
2354
2356
2355 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2357 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2356 assumehaveparentrevisions=False,
2358 assumehaveparentrevisions=False,
2357 deltamode=repository.CG_DELTAMODE_STD):
2359 deltamode=repository.CG_DELTAMODE_STD):
2358 if nodesorder not in ('nodes', 'storage', 'linear', None):
2360 if nodesorder not in ('nodes', 'storage', 'linear', None):
2359 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2361 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2360 nodesorder)
2362 nodesorder)
2361
2363
2362 if nodesorder is None and not self._generaldelta:
2364 if nodesorder is None and not self._generaldelta:
2363 nodesorder = 'storage'
2365 nodesorder = 'storage'
2364
2366
2365 if (not self._storedeltachains and
2367 if (not self._storedeltachains and
2366 deltamode != repository.CG_DELTAMODE_PREV):
2368 deltamode != repository.CG_DELTAMODE_PREV):
2367 deltamode = repository.CG_DELTAMODE_FULL
2369 deltamode = repository.CG_DELTAMODE_FULL
2368
2370
2369 return storageutil.emitrevisions(
2371 return storageutil.emitrevisions(
2370 self, nodes, nodesorder, revlogrevisiondelta,
2372 self, nodes, nodesorder, revlogrevisiondelta,
2371 deltaparentfn=self.deltaparent,
2373 deltaparentfn=self.deltaparent,
2372 candeltafn=self.candelta,
2374 candeltafn=self.candelta,
2373 rawsizefn=self.rawsize,
2375 rawsizefn=self.rawsize,
2374 revdifffn=self.revdiff,
2376 revdifffn=self.revdiff,
2375 flagsfn=self.flags,
2377 flagsfn=self.flags,
2376 deltamode=deltamode,
2378 deltamode=deltamode,
2377 revisiondata=revisiondata,
2379 revisiondata=revisiondata,
2378 assumehaveparentrevisions=assumehaveparentrevisions)
2380 assumehaveparentrevisions=assumehaveparentrevisions)
2379
2381
2380 DELTAREUSEALWAYS = 'always'
2382 DELTAREUSEALWAYS = 'always'
2381 DELTAREUSESAMEREVS = 'samerevs'
2383 DELTAREUSESAMEREVS = 'samerevs'
2382 DELTAREUSENEVER = 'never'
2384 DELTAREUSENEVER = 'never'
2383
2385
2384 DELTAREUSEFULLADD = 'fulladd'
2386 DELTAREUSEFULLADD = 'fulladd'
2385
2387
2386 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2388 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2387
2389
2388 def clone(self, tr, destrevlog, addrevisioncb=None,
2390 def clone(self, tr, destrevlog, addrevisioncb=None,
2389 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2391 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2390 """Copy this revlog to another, possibly with format changes.
2392 """Copy this revlog to another, possibly with format changes.
2391
2393
2392 The destination revlog will contain the same revisions and nodes.
2394 The destination revlog will contain the same revisions and nodes.
2393 However, it may not be bit-for-bit identical due to e.g. delta encoding
2395 However, it may not be bit-for-bit identical due to e.g. delta encoding
2394 differences.
2396 differences.
2395
2397
2396 The ``deltareuse`` argument control how deltas from the existing revlog
2398 The ``deltareuse`` argument control how deltas from the existing revlog
2397 are preserved in the destination revlog. The argument can have the
2399 are preserved in the destination revlog. The argument can have the
2398 following values:
2400 following values:
2399
2401
2400 DELTAREUSEALWAYS
2402 DELTAREUSEALWAYS
2401 Deltas will always be reused (if possible), even if the destination
2403 Deltas will always be reused (if possible), even if the destination
2402 revlog would not select the same revisions for the delta. This is the
2404 revlog would not select the same revisions for the delta. This is the
2403 fastest mode of operation.
2405 fastest mode of operation.
2404 DELTAREUSESAMEREVS
2406 DELTAREUSESAMEREVS
2405 Deltas will be reused if the destination revlog would pick the same
2407 Deltas will be reused if the destination revlog would pick the same
2406 revisions for the delta. This mode strikes a balance between speed
2408 revisions for the delta. This mode strikes a balance between speed
2407 and optimization.
2409 and optimization.
2408 DELTAREUSENEVER
2410 DELTAREUSENEVER
2409 Deltas will never be reused. This is the slowest mode of execution.
2411 Deltas will never be reused. This is the slowest mode of execution.
2410 This mode can be used to recompute deltas (e.g. if the diff/delta
2412 This mode can be used to recompute deltas (e.g. if the diff/delta
2411 algorithm changes).
2413 algorithm changes).
2412
2414
2413 Delta computation can be slow, so the choice of delta reuse policy can
2415 Delta computation can be slow, so the choice of delta reuse policy can
2414 significantly affect run time.
2416 significantly affect run time.
2415
2417
2416 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2418 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2417 two extremes. Deltas will be reused if they are appropriate. But if the
2419 two extremes. Deltas will be reused if they are appropriate. But if the
2418 delta could choose a better revision, it will do so. This means if you
2420 delta could choose a better revision, it will do so. This means if you
2419 are converting a non-generaldelta revlog to a generaldelta revlog,
2421 are converting a non-generaldelta revlog to a generaldelta revlog,
2420 deltas will be recomputed if the delta's parent isn't a parent of the
2422 deltas will be recomputed if the delta's parent isn't a parent of the
2421 revision.
2423 revision.
2422
2424
2423 In addition to the delta policy, the ``forcedeltabothparents``
2425 In addition to the delta policy, the ``forcedeltabothparents``
2424 argument controls whether to force compute deltas against both parents
2426 argument controls whether to force compute deltas against both parents
2425 for merges. By default, the current default is used.
2427 for merges. By default, the current default is used.
2426 """
2428 """
2427 if deltareuse not in self.DELTAREUSEALL:
2429 if deltareuse not in self.DELTAREUSEALL:
2428 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2430 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2429
2431
2430 if len(destrevlog):
2432 if len(destrevlog):
2431 raise ValueError(_('destination revlog is not empty'))
2433 raise ValueError(_('destination revlog is not empty'))
2432
2434
2433 if getattr(self, 'filteredrevs', None):
2435 if getattr(self, 'filteredrevs', None):
2434 raise ValueError(_('source revlog has filtered revisions'))
2436 raise ValueError(_('source revlog has filtered revisions'))
2435 if getattr(destrevlog, 'filteredrevs', None):
2437 if getattr(destrevlog, 'filteredrevs', None):
2436 raise ValueError(_('destination revlog has filtered revisions'))
2438 raise ValueError(_('destination revlog has filtered revisions'))
2437
2439
2438 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2440 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2439 # if possible.
2441 # if possible.
2440 oldlazydelta = destrevlog._lazydelta
2442 oldlazydelta = destrevlog._lazydelta
2441 oldlazydeltabase = destrevlog._lazydeltabase
2443 oldlazydeltabase = destrevlog._lazydeltabase
2442 oldamd = destrevlog._deltabothparents
2444 oldamd = destrevlog._deltabothparents
2443
2445
2444 try:
2446 try:
2445 if deltareuse == self.DELTAREUSEALWAYS:
2447 if deltareuse == self.DELTAREUSEALWAYS:
2446 destrevlog._lazydeltabase = True
2448 destrevlog._lazydeltabase = True
2447 destrevlog._lazydelta = True
2449 destrevlog._lazydelta = True
2448 elif deltareuse == self.DELTAREUSESAMEREVS:
2450 elif deltareuse == self.DELTAREUSESAMEREVS:
2449 destrevlog._lazydeltabase = False
2451 destrevlog._lazydeltabase = False
2450 destrevlog._lazydelta = True
2452 destrevlog._lazydelta = True
2451 elif deltareuse == self.DELTAREUSENEVER:
2453 elif deltareuse == self.DELTAREUSENEVER:
2452 destrevlog._lazydeltabase = False
2454 destrevlog._lazydeltabase = False
2453 destrevlog._lazydelta = False
2455 destrevlog._lazydelta = False
2454
2456
2455 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2457 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2456
2458
2457 deltacomputer = deltautil.deltacomputer(destrevlog)
2459 deltacomputer = deltautil.deltacomputer(destrevlog)
2458 index = self.index
2460 index = self.index
2459 for rev in self:
2461 for rev in self:
2460 entry = index[rev]
2462 entry = index[rev]
2461
2463
2462 # Some classes override linkrev to take filtered revs into
2464 # Some classes override linkrev to take filtered revs into
2463 # account. Use raw entry from index.
2465 # account. Use raw entry from index.
2464 flags = entry[0] & 0xffff
2466 flags = entry[0] & 0xffff
2465 linkrev = entry[4]
2467 linkrev = entry[4]
2466 p1 = index[entry[5]][7]
2468 p1 = index[entry[5]][7]
2467 p2 = index[entry[6]][7]
2469 p2 = index[entry[6]][7]
2468 node = entry[7]
2470 node = entry[7]
2469
2471
2470 # (Possibly) reuse the delta from the revlog if allowed and
2472 # (Possibly) reuse the delta from the revlog if allowed and
2471 # the revlog chunk is a delta.
2473 # the revlog chunk is a delta.
2472 cachedelta = None
2474 cachedelta = None
2473 rawtext = None
2475 rawtext = None
2474 if (deltareuse != self.DELTAREUSEFULLADD
2476 if (deltareuse != self.DELTAREUSEFULLADD
2475 and destrevlog._lazydelta):
2477 and destrevlog._lazydelta):
2476 dp = self.deltaparent(rev)
2478 dp = self.deltaparent(rev)
2477 if dp != nullrev:
2479 if dp != nullrev:
2478 cachedelta = (dp, bytes(self._chunk(rev)))
2480 cachedelta = (dp, bytes(self._chunk(rev)))
2479
2481
2480 if not cachedelta:
2482 if not cachedelta:
2481 rawtext = self.revision(rev, raw=True)
2483 rawtext = self.revision(rev, raw=True)
2482
2484
2483
2485
2484 if deltareuse == self.DELTAREUSEFULLADD:
2486 if deltareuse == self.DELTAREUSEFULLADD:
2485 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2487 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2486 cachedelta=cachedelta,
2488 cachedelta=cachedelta,
2487 node=node, flags=flags,
2489 node=node, flags=flags,
2488 deltacomputer=deltacomputer)
2490 deltacomputer=deltacomputer)
2489 else:
2491 else:
2490 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2492 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2491 checkambig=False)
2493 checkambig=False)
2492 dfh = None
2494 dfh = None
2493 if not destrevlog._inline:
2495 if not destrevlog._inline:
2494 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2496 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2495 try:
2497 try:
2496 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2498 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2497 p2, flags, cachedelta, ifh, dfh,
2499 p2, flags, cachedelta, ifh, dfh,
2498 deltacomputer=deltacomputer)
2500 deltacomputer=deltacomputer)
2499 finally:
2501 finally:
2500 if dfh:
2502 if dfh:
2501 dfh.close()
2503 dfh.close()
2502 ifh.close()
2504 ifh.close()
2503
2505
2504 if addrevisioncb:
2506 if addrevisioncb:
2505 addrevisioncb(self, rev, node)
2507 addrevisioncb(self, rev, node)
2506 finally:
2508 finally:
2507 destrevlog._lazydelta = oldlazydelta
2509 destrevlog._lazydelta = oldlazydelta
2508 destrevlog._lazydeltabase = oldlazydeltabase
2510 destrevlog._lazydeltabase = oldlazydeltabase
2509 destrevlog._deltabothparents = oldamd
2511 destrevlog._deltabothparents = oldamd
2510
2512
2511 def censorrevision(self, tr, censornode, tombstone=b''):
2513 def censorrevision(self, tr, censornode, tombstone=b''):
2512 if (self.version & 0xFFFF) == REVLOGV0:
2514 if (self.version & 0xFFFF) == REVLOGV0:
2513 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2515 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2514 self.version)
2516 self.version)
2515
2517
2516 censorrev = self.rev(censornode)
2518 censorrev = self.rev(censornode)
2517 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2519 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2518
2520
2519 if len(tombstone) > self.rawsize(censorrev):
2521 if len(tombstone) > self.rawsize(censorrev):
2520 raise error.Abort(_('censor tombstone must be no longer than '
2522 raise error.Abort(_('censor tombstone must be no longer than '
2521 'censored data'))
2523 'censored data'))
2522
2524
2523 # Rewriting the revlog in place is hard. Our strategy for censoring is
2525 # Rewriting the revlog in place is hard. Our strategy for censoring is
2524 # to create a new revlog, copy all revisions to it, then replace the
2526 # to create a new revlog, copy all revisions to it, then replace the
2525 # revlogs on transaction close.
2527 # revlogs on transaction close.
2526
2528
2527 newindexfile = self.indexfile + b'.tmpcensored'
2529 newindexfile = self.indexfile + b'.tmpcensored'
2528 newdatafile = self.datafile + b'.tmpcensored'
2530 newdatafile = self.datafile + b'.tmpcensored'
2529
2531
2530 # This is a bit dangerous. We could easily have a mismatch of state.
2532 # This is a bit dangerous. We could easily have a mismatch of state.
2531 newrl = revlog(self.opener, newindexfile, newdatafile,
2533 newrl = revlog(self.opener, newindexfile, newdatafile,
2532 censorable=True)
2534 censorable=True)
2533 newrl.version = self.version
2535 newrl.version = self.version
2534 newrl._generaldelta = self._generaldelta
2536 newrl._generaldelta = self._generaldelta
2535 newrl._io = self._io
2537 newrl._io = self._io
2536
2538
2537 for rev in self.revs():
2539 for rev in self.revs():
2538 node = self.node(rev)
2540 node = self.node(rev)
2539 p1, p2 = self.parents(node)
2541 p1, p2 = self.parents(node)
2540
2542
2541 if rev == censorrev:
2543 if rev == censorrev:
2542 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2544 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2543 p1, p2, censornode, REVIDX_ISCENSORED)
2545 p1, p2, censornode, REVIDX_ISCENSORED)
2544
2546
2545 if newrl.deltaparent(rev) != nullrev:
2547 if newrl.deltaparent(rev) != nullrev:
2546 raise error.Abort(_('censored revision stored as delta; '
2548 raise error.Abort(_('censored revision stored as delta; '
2547 'cannot censor'),
2549 'cannot censor'),
2548 hint=_('censoring of revlogs is not '
2550 hint=_('censoring of revlogs is not '
2549 'fully implemented; please report '
2551 'fully implemented; please report '
2550 'this bug'))
2552 'this bug'))
2551 continue
2553 continue
2552
2554
2553 if self.iscensored(rev):
2555 if self.iscensored(rev):
2554 if self.deltaparent(rev) != nullrev:
2556 if self.deltaparent(rev) != nullrev:
2555 raise error.Abort(_('cannot censor due to censored '
2557 raise error.Abort(_('cannot censor due to censored '
2556 'revision having delta stored'))
2558 'revision having delta stored'))
2557 rawtext = self._chunk(rev)
2559 rawtext = self._chunk(rev)
2558 else:
2560 else:
2559 rawtext = self.revision(rev, raw=True)
2561 rawtext = self.revision(rev, raw=True)
2560
2562
2561 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2563 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2562 self.flags(rev))
2564 self.flags(rev))
2563
2565
2564 tr.addbackup(self.indexfile, location='store')
2566 tr.addbackup(self.indexfile, location='store')
2565 if not self._inline:
2567 if not self._inline:
2566 tr.addbackup(self.datafile, location='store')
2568 tr.addbackup(self.datafile, location='store')
2567
2569
2568 self.opener.rename(newrl.indexfile, self.indexfile)
2570 self.opener.rename(newrl.indexfile, self.indexfile)
2569 if not self._inline:
2571 if not self._inline:
2570 self.opener.rename(newrl.datafile, self.datafile)
2572 self.opener.rename(newrl.datafile, self.datafile)
2571
2573
2572 self.clearcaches()
2574 self.clearcaches()
2573 self._loadindex()
2575 self._loadindex()
2574
2576
2575 def verifyintegrity(self, state):
2577 def verifyintegrity(self, state):
2576 """Verifies the integrity of the revlog.
2578 """Verifies the integrity of the revlog.
2577
2579
2578 Yields ``revlogproblem`` instances describing problems that are
2580 Yields ``revlogproblem`` instances describing problems that are
2579 found.
2581 found.
2580 """
2582 """
2581 dd, di = self.checksize()
2583 dd, di = self.checksize()
2582 if dd:
2584 if dd:
2583 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2585 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2584 if di:
2586 if di:
2585 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2587 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2586
2588
2587 version = self.version & 0xFFFF
2589 version = self.version & 0xFFFF
2588
2590
2589 # The verifier tells us what version revlog we should be.
2591 # The verifier tells us what version revlog we should be.
2590 if version != state['expectedversion']:
2592 if version != state['expectedversion']:
2591 yield revlogproblem(
2593 yield revlogproblem(
2592 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2594 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2593 (self.indexfile, version, state['expectedversion']))
2595 (self.indexfile, version, state['expectedversion']))
2594
2596
2595 state['skipread'] = set()
2597 state['skipread'] = set()
2596
2598
2597 for rev in self:
2599 for rev in self:
2598 node = self.node(rev)
2600 node = self.node(rev)
2599
2601
2600 # Verify contents. 4 cases to care about:
2602 # Verify contents. 4 cases to care about:
2601 #
2603 #
2602 # common: the most common case
2604 # common: the most common case
2603 # rename: with a rename
2605 # rename: with a rename
2604 # meta: file content starts with b'\1\n', the metadata
2606 # meta: file content starts with b'\1\n', the metadata
2605 # header defined in filelog.py, but without a rename
2607 # header defined in filelog.py, but without a rename
2606 # ext: content stored externally
2608 # ext: content stored externally
2607 #
2609 #
2608 # More formally, their differences are shown below:
2610 # More formally, their differences are shown below:
2609 #
2611 #
2610 # | common | rename | meta | ext
2612 # | common | rename | meta | ext
2611 # -------------------------------------------------------
2613 # -------------------------------------------------------
2612 # flags() | 0 | 0 | 0 | not 0
2614 # flags() | 0 | 0 | 0 | not 0
2613 # renamed() | False | True | False | ?
2615 # renamed() | False | True | False | ?
2614 # rawtext[0:2]=='\1\n'| False | True | True | ?
2616 # rawtext[0:2]=='\1\n'| False | True | True | ?
2615 #
2617 #
2616 # "rawtext" means the raw text stored in revlog data, which
2618 # "rawtext" means the raw text stored in revlog data, which
2617 # could be retrieved by "revision(rev, raw=True)". "text"
2619 # could be retrieved by "revision(rev, raw=True)". "text"
2618 # mentioned below is "revision(rev, raw=False)".
2620 # mentioned below is "revision(rev, raw=False)".
2619 #
2621 #
2620 # There are 3 different lengths stored physically:
2622 # There are 3 different lengths stored physically:
2621 # 1. L1: rawsize, stored in revlog index
2623 # 1. L1: rawsize, stored in revlog index
2622 # 2. L2: len(rawtext), stored in revlog data
2624 # 2. L2: len(rawtext), stored in revlog data
2623 # 3. L3: len(text), stored in revlog data if flags==0, or
2625 # 3. L3: len(text), stored in revlog data if flags==0, or
2624 # possibly somewhere else if flags!=0
2626 # possibly somewhere else if flags!=0
2625 #
2627 #
2626 # L1 should be equal to L2. L3 could be different from them.
2628 # L1 should be equal to L2. L3 could be different from them.
2627 # "text" may or may not affect commit hash depending on flag
2629 # "text" may or may not affect commit hash depending on flag
2628 # processors (see revlog.addflagprocessor).
2630 # processors (see revlog.addflagprocessor).
2629 #
2631 #
2630 # | common | rename | meta | ext
2632 # | common | rename | meta | ext
2631 # -------------------------------------------------
2633 # -------------------------------------------------
2632 # rawsize() | L1 | L1 | L1 | L1
2634 # rawsize() | L1 | L1 | L1 | L1
2633 # size() | L1 | L2-LM | L1(*) | L1 (?)
2635 # size() | L1 | L2-LM | L1(*) | L1 (?)
2634 # len(rawtext) | L2 | L2 | L2 | L2
2636 # len(rawtext) | L2 | L2 | L2 | L2
2635 # len(text) | L2 | L2 | L2 | L3
2637 # len(text) | L2 | L2 | L2 | L3
2636 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2638 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2637 #
2639 #
2638 # LM: length of metadata, depending on rawtext
2640 # LM: length of metadata, depending on rawtext
2639 # (*): not ideal, see comment in filelog.size
2641 # (*): not ideal, see comment in filelog.size
2640 # (?): could be "- len(meta)" if the resolved content has
2642 # (?): could be "- len(meta)" if the resolved content has
2641 # rename metadata
2643 # rename metadata
2642 #
2644 #
2643 # Checks needed to be done:
2645 # Checks needed to be done:
2644 # 1. length check: L1 == L2, in all cases.
2646 # 1. length check: L1 == L2, in all cases.
2645 # 2. hash check: depending on flag processor, we may need to
2647 # 2. hash check: depending on flag processor, we may need to
2646 # use either "text" (external), or "rawtext" (in revlog).
2648 # use either "text" (external), or "rawtext" (in revlog).
2647
2649
2648 try:
2650 try:
2649 skipflags = state.get('skipflags', 0)
2651 skipflags = state.get('skipflags', 0)
2650 if skipflags:
2652 if skipflags:
2651 skipflags &= self.flags(rev)
2653 skipflags &= self.flags(rev)
2652
2654
2653 if skipflags:
2655 if skipflags:
2654 state['skipread'].add(node)
2656 state['skipread'].add(node)
2655 else:
2657 else:
2656 # Side-effect: read content and verify hash.
2658 # Side-effect: read content and verify hash.
2657 self.revision(node)
2659 self.revision(node)
2658
2660
2659 l1 = self.rawsize(rev)
2661 l1 = self.rawsize(rev)
2660 l2 = len(self.revision(node, raw=True))
2662 l2 = len(self.revision(node, raw=True))
2661
2663
2662 if l1 != l2:
2664 if l1 != l2:
2663 yield revlogproblem(
2665 yield revlogproblem(
2664 error=_('unpacked size is %d, %d expected') % (l2, l1),
2666 error=_('unpacked size is %d, %d expected') % (l2, l1),
2665 node=node)
2667 node=node)
2666
2668
2667 except error.CensoredNodeError:
2669 except error.CensoredNodeError:
2668 if state['erroroncensored']:
2670 if state['erroroncensored']:
2669 yield revlogproblem(error=_('censored file data'),
2671 yield revlogproblem(error=_('censored file data'),
2670 node=node)
2672 node=node)
2671 state['skipread'].add(node)
2673 state['skipread'].add(node)
2672 except Exception as e:
2674 except Exception as e:
2673 yield revlogproblem(
2675 yield revlogproblem(
2674 error=_('unpacking %s: %s') % (short(node),
2676 error=_('unpacking %s: %s') % (short(node),
2675 stringutil.forcebytestr(e)),
2677 stringutil.forcebytestr(e)),
2676 node=node)
2678 node=node)
2677 state['skipread'].add(node)
2679 state['skipread'].add(node)
2678
2680
2679 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2681 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2680 revisionscount=False, trackedsize=False,
2682 revisionscount=False, trackedsize=False,
2681 storedsize=False):
2683 storedsize=False):
2682 d = {}
2684 d = {}
2683
2685
2684 if exclusivefiles:
2686 if exclusivefiles:
2685 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2687 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2686 if not self._inline:
2688 if not self._inline:
2687 d['exclusivefiles'].append((self.opener, self.datafile))
2689 d['exclusivefiles'].append((self.opener, self.datafile))
2688
2690
2689 if sharedfiles:
2691 if sharedfiles:
2690 d['sharedfiles'] = []
2692 d['sharedfiles'] = []
2691
2693
2692 if revisionscount:
2694 if revisionscount:
2693 d['revisionscount'] = len(self)
2695 d['revisionscount'] = len(self)
2694
2696
2695 if trackedsize:
2697 if trackedsize:
2696 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2698 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2697
2699
2698 if storedsize:
2700 if storedsize:
2699 d['storedsize'] = sum(self.opener.stat(path).st_size
2701 d['storedsize'] = sum(self.opener.stat(path).st_size
2700 for path in self.files())
2702 for path in self.files())
2701
2703
2702 return d
2704 return d
General Comments 0
You need to be logged in to leave comments. Login now