##// END OF EJS Templates
revlog: fix index computation during inline->non-inline transition...
Joerg Sonnenberger -
r48064:21ed126b default
parent child Browse files
Show More
@@ -0,0 +1,5 b''
1 import os
2
3
4 def killme(ui, repo, hooktype, **wkargs):
5 os._exit(80)
@@ -0,0 +1,35 b''
1 Test correctness of revlog inline -> non-inline transition
2 ----------------------------------------------------------
3
4 Test offset computation to correctly factor in the index entries themselve.
5 Test repo has one small, one moderate and one big change. The clone has
6 the small and moderate change and will transition to non-inline storage when
7 adding the big change.
8
9 $ hg init troffset-computation --config format.revlog-compression=none
10 $ cd troffset-computation
11 $ printf '% 20d' '1' > file
12 $ hg commit -Aqm_
13 $ printf '% 1024d' '1' > file
14 $ hg commit -Aqm_
15 $ dd if=/dev/zero of=file bs=1k count=128 > /dev/null 2>&1
16 $ hg commit -Aqm_
17 $ cd ..
18
19 $ hg clone -r 1 troffset-computation troffset-computation-copy --config format.revlog-compression=none -q
20 $ cd troffset-computation-copy
21 $ cat > .hg/hgrc <<EOF
22 > [hooks]
23 > pretxnchangegroup = python:$TESTDIR/helper-killhook.py:killme
24 > EOF
25 #if chg
26 $ hg pull ../troffset-computation
27 pulling from ../troffset-computation
28 [255]
29 #else
30 $ hg pull ../troffset-computation
31 pulling from ../troffset-computation
32 [80]
33 #endif
34 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file | tail -1
35 data/file.i 128
@@ -1,3454 +1,3454 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 CHANGELOGV2,
38 CHANGELOGV2,
39 COMP_MODE_DEFAULT,
39 COMP_MODE_DEFAULT,
40 COMP_MODE_INLINE,
40 COMP_MODE_INLINE,
41 COMP_MODE_PLAIN,
41 COMP_MODE_PLAIN,
42 FEATURES_BY_VERSION,
42 FEATURES_BY_VERSION,
43 FLAG_GENERALDELTA,
43 FLAG_GENERALDELTA,
44 FLAG_INLINE_DATA,
44 FLAG_INLINE_DATA,
45 INDEX_HEADER,
45 INDEX_HEADER,
46 KIND_CHANGELOG,
46 KIND_CHANGELOG,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 SUPPORTED_FLAGS,
55 SUPPORTED_FLAGS,
56 )
56 )
57 from .revlogutils.flagutil import (
57 from .revlogutils.flagutil import (
58 REVIDX_DEFAULT_FLAGS,
58 REVIDX_DEFAULT_FLAGS,
59 REVIDX_ELLIPSIS,
59 REVIDX_ELLIPSIS,
60 REVIDX_EXTSTORED,
60 REVIDX_EXTSTORED,
61 REVIDX_FLAGS_ORDER,
61 REVIDX_FLAGS_ORDER,
62 REVIDX_HASCOPIESINFO,
62 REVIDX_HASCOPIESINFO,
63 REVIDX_ISCENSORED,
63 REVIDX_ISCENSORED,
64 REVIDX_RAWTEXT_CHANGING_FLAGS,
64 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 )
65 )
66 from .thirdparty import attr
66 from .thirdparty import attr
67 from . import (
67 from . import (
68 ancestor,
68 ancestor,
69 dagop,
69 dagop,
70 error,
70 error,
71 mdiff,
71 mdiff,
72 policy,
72 policy,
73 pycompat,
73 pycompat,
74 templatefilters,
74 templatefilters,
75 util,
75 util,
76 )
76 )
77 from .interfaces import (
77 from .interfaces import (
78 repository,
78 repository,
79 util as interfaceutil,
79 util as interfaceutil,
80 )
80 )
81 from .revlogutils import (
81 from .revlogutils import (
82 deltas as deltautil,
82 deltas as deltautil,
83 docket as docketutil,
83 docket as docketutil,
84 flagutil,
84 flagutil,
85 nodemap as nodemaputil,
85 nodemap as nodemaputil,
86 revlogv0,
86 revlogv0,
87 sidedata as sidedatautil,
87 sidedata as sidedatautil,
88 )
88 )
89 from .utils import (
89 from .utils import (
90 storageutil,
90 storageutil,
91 stringutil,
91 stringutil,
92 )
92 )
93
93
94 # blanked usage of all the name to prevent pyflakes constraints
94 # blanked usage of all the name to prevent pyflakes constraints
95 # We need these name available in the module for extensions.
95 # We need these name available in the module for extensions.
96
96
97 REVLOGV0
97 REVLOGV0
98 REVLOGV1
98 REVLOGV1
99 REVLOGV2
99 REVLOGV2
100 FLAG_INLINE_DATA
100 FLAG_INLINE_DATA
101 FLAG_GENERALDELTA
101 FLAG_GENERALDELTA
102 REVLOG_DEFAULT_FLAGS
102 REVLOG_DEFAULT_FLAGS
103 REVLOG_DEFAULT_FORMAT
103 REVLOG_DEFAULT_FORMAT
104 REVLOG_DEFAULT_VERSION
104 REVLOG_DEFAULT_VERSION
105 REVLOGV1_FLAGS
105 REVLOGV1_FLAGS
106 REVLOGV2_FLAGS
106 REVLOGV2_FLAGS
107 REVIDX_ISCENSORED
107 REVIDX_ISCENSORED
108 REVIDX_ELLIPSIS
108 REVIDX_ELLIPSIS
109 REVIDX_HASCOPIESINFO
109 REVIDX_HASCOPIESINFO
110 REVIDX_EXTSTORED
110 REVIDX_EXTSTORED
111 REVIDX_DEFAULT_FLAGS
111 REVIDX_DEFAULT_FLAGS
112 REVIDX_FLAGS_ORDER
112 REVIDX_FLAGS_ORDER
113 REVIDX_RAWTEXT_CHANGING_FLAGS
113 REVIDX_RAWTEXT_CHANGING_FLAGS
114
114
115 parsers = policy.importmod('parsers')
115 parsers = policy.importmod('parsers')
116 rustancestor = policy.importrust('ancestor')
116 rustancestor = policy.importrust('ancestor')
117 rustdagop = policy.importrust('dagop')
117 rustdagop = policy.importrust('dagop')
118 rustrevlog = policy.importrust('revlog')
118 rustrevlog = policy.importrust('revlog')
119
119
120 # Aliased for performance.
120 # Aliased for performance.
121 _zlibdecompress = zlib.decompress
121 _zlibdecompress = zlib.decompress
122
122
123 # max size of revlog with inline data
123 # max size of revlog with inline data
124 _maxinline = 131072
124 _maxinline = 131072
125 _chunksize = 1048576
125 _chunksize = 1048576
126
126
127 # Flag processors for REVIDX_ELLIPSIS.
127 # Flag processors for REVIDX_ELLIPSIS.
128 def ellipsisreadprocessor(rl, text):
128 def ellipsisreadprocessor(rl, text):
129 return text, False
129 return text, False
130
130
131
131
132 def ellipsiswriteprocessor(rl, text):
132 def ellipsiswriteprocessor(rl, text):
133 return text, False
133 return text, False
134
134
135
135
136 def ellipsisrawprocessor(rl, text):
136 def ellipsisrawprocessor(rl, text):
137 return False
137 return False
138
138
139
139
140 ellipsisprocessor = (
140 ellipsisprocessor = (
141 ellipsisreadprocessor,
141 ellipsisreadprocessor,
142 ellipsiswriteprocessor,
142 ellipsiswriteprocessor,
143 ellipsisrawprocessor,
143 ellipsisrawprocessor,
144 )
144 )
145
145
146
146
147 def offset_type(offset, type):
147 def offset_type(offset, type):
148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
149 raise ValueError(b'unknown revlog index flags')
149 raise ValueError(b'unknown revlog index flags')
150 return int(int(offset) << 16 | type)
150 return int(int(offset) << 16 | type)
151
151
152
152
153 def _verify_revision(rl, skipflags, state, node):
153 def _verify_revision(rl, skipflags, state, node):
154 """Verify the integrity of the given revlog ``node`` while providing a hook
154 """Verify the integrity of the given revlog ``node`` while providing a hook
155 point for extensions to influence the operation."""
155 point for extensions to influence the operation."""
156 if skipflags:
156 if skipflags:
157 state[b'skipread'].add(node)
157 state[b'skipread'].add(node)
158 else:
158 else:
159 # Side-effect: read content and verify hash.
159 # Side-effect: read content and verify hash.
160 rl.revision(node)
160 rl.revision(node)
161
161
162
162
163 # True if a fast implementation for persistent-nodemap is available
163 # True if a fast implementation for persistent-nodemap is available
164 #
164 #
165 # We also consider we have a "fast" implementation in "pure" python because
165 # We also consider we have a "fast" implementation in "pure" python because
166 # people using pure don't really have performance consideration (and a
166 # people using pure don't really have performance consideration (and a
167 # wheelbarrow of other slowness source)
167 # wheelbarrow of other slowness source)
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
169 parsers, 'BaseIndexObject'
169 parsers, 'BaseIndexObject'
170 )
170 )
171
171
172
172
173 @attr.s(slots=True, frozen=True)
173 @attr.s(slots=True, frozen=True)
174 class _revisioninfo(object):
174 class _revisioninfo(object):
175 """Information about a revision that allows building its fulltext
175 """Information about a revision that allows building its fulltext
176 node: expected hash of the revision
176 node: expected hash of the revision
177 p1, p2: parent revs of the revision
177 p1, p2: parent revs of the revision
178 btext: built text cache consisting of a one-element list
178 btext: built text cache consisting of a one-element list
179 cachedelta: (baserev, uncompressed_delta) or None
179 cachedelta: (baserev, uncompressed_delta) or None
180 flags: flags associated to the revision storage
180 flags: flags associated to the revision storage
181
181
182 One of btext[0] or cachedelta must be set.
182 One of btext[0] or cachedelta must be set.
183 """
183 """
184
184
185 node = attr.ib()
185 node = attr.ib()
186 p1 = attr.ib()
186 p1 = attr.ib()
187 p2 = attr.ib()
187 p2 = attr.ib()
188 btext = attr.ib()
188 btext = attr.ib()
189 textlen = attr.ib()
189 textlen = attr.ib()
190 cachedelta = attr.ib()
190 cachedelta = attr.ib()
191 flags = attr.ib()
191 flags = attr.ib()
192
192
193
193
194 @interfaceutil.implementer(repository.irevisiondelta)
194 @interfaceutil.implementer(repository.irevisiondelta)
195 @attr.s(slots=True)
195 @attr.s(slots=True)
196 class revlogrevisiondelta(object):
196 class revlogrevisiondelta(object):
197 node = attr.ib()
197 node = attr.ib()
198 p1node = attr.ib()
198 p1node = attr.ib()
199 p2node = attr.ib()
199 p2node = attr.ib()
200 basenode = attr.ib()
200 basenode = attr.ib()
201 flags = attr.ib()
201 flags = attr.ib()
202 baserevisionsize = attr.ib()
202 baserevisionsize = attr.ib()
203 revision = attr.ib()
203 revision = attr.ib()
204 delta = attr.ib()
204 delta = attr.ib()
205 sidedata = attr.ib()
205 sidedata = attr.ib()
206 protocol_flags = attr.ib()
206 protocol_flags = attr.ib()
207 linknode = attr.ib(default=None)
207 linknode = attr.ib(default=None)
208
208
209
209
210 @interfaceutil.implementer(repository.iverifyproblem)
210 @interfaceutil.implementer(repository.iverifyproblem)
211 @attr.s(frozen=True)
211 @attr.s(frozen=True)
212 class revlogproblem(object):
212 class revlogproblem(object):
213 warning = attr.ib(default=None)
213 warning = attr.ib(default=None)
214 error = attr.ib(default=None)
214 error = attr.ib(default=None)
215 node = attr.ib(default=None)
215 node = attr.ib(default=None)
216
216
217
217
218 def parse_index_v1(data, inline):
218 def parse_index_v1(data, inline):
219 # call the C implementation to parse the index data
219 # call the C implementation to parse the index data
220 index, cache = parsers.parse_index2(data, inline)
220 index, cache = parsers.parse_index2(data, inline)
221 return index, cache
221 return index, cache
222
222
223
223
224 def parse_index_v2(data, inline):
224 def parse_index_v2(data, inline):
225 # call the C implementation to parse the index data
225 # call the C implementation to parse the index data
226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
227 return index, cache
227 return index, cache
228
228
229
229
230 def parse_index_cl_v2(data, inline):
230 def parse_index_cl_v2(data, inline):
231 # call the C implementation to parse the index data
231 # call the C implementation to parse the index data
232 assert not inline
232 assert not inline
233 from .pure.parsers import parse_index_cl_v2
233 from .pure.parsers import parse_index_cl_v2
234
234
235 index, cache = parse_index_cl_v2(data)
235 index, cache = parse_index_cl_v2(data)
236 return index, cache
236 return index, cache
237
237
238
238
239 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
239 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
240
240
241 def parse_index_v1_nodemap(data, inline):
241 def parse_index_v1_nodemap(data, inline):
242 index, cache = parsers.parse_index_devel_nodemap(data, inline)
242 index, cache = parsers.parse_index_devel_nodemap(data, inline)
243 return index, cache
243 return index, cache
244
244
245
245
246 else:
246 else:
247 parse_index_v1_nodemap = None
247 parse_index_v1_nodemap = None
248
248
249
249
250 def parse_index_v1_mixed(data, inline):
250 def parse_index_v1_mixed(data, inline):
251 index, cache = parse_index_v1(data, inline)
251 index, cache = parse_index_v1(data, inline)
252 return rustrevlog.MixedIndex(index), cache
252 return rustrevlog.MixedIndex(index), cache
253
253
254
254
255 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
255 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
256 # signed integer)
256 # signed integer)
257 _maxentrysize = 0x7FFFFFFF
257 _maxentrysize = 0x7FFFFFFF
258
258
259
259
260 class revlog(object):
260 class revlog(object):
261 """
261 """
262 the underlying revision storage object
262 the underlying revision storage object
263
263
264 A revlog consists of two parts, an index and the revision data.
264 A revlog consists of two parts, an index and the revision data.
265
265
266 The index is a file with a fixed record size containing
266 The index is a file with a fixed record size containing
267 information on each revision, including its nodeid (hash), the
267 information on each revision, including its nodeid (hash), the
268 nodeids of its parents, the position and offset of its data within
268 nodeids of its parents, the position and offset of its data within
269 the data file, and the revision it's based on. Finally, each entry
269 the data file, and the revision it's based on. Finally, each entry
270 contains a linkrev entry that can serve as a pointer to external
270 contains a linkrev entry that can serve as a pointer to external
271 data.
271 data.
272
272
273 The revision data itself is a linear collection of data chunks.
273 The revision data itself is a linear collection of data chunks.
274 Each chunk represents a revision and is usually represented as a
274 Each chunk represents a revision and is usually represented as a
275 delta against the previous chunk. To bound lookup time, runs of
275 delta against the previous chunk. To bound lookup time, runs of
276 deltas are limited to about 2 times the length of the original
276 deltas are limited to about 2 times the length of the original
277 version data. This makes retrieval of a version proportional to
277 version data. This makes retrieval of a version proportional to
278 its size, or O(1) relative to the number of revisions.
278 its size, or O(1) relative to the number of revisions.
279
279
280 Both pieces of the revlog are written to in an append-only
280 Both pieces of the revlog are written to in an append-only
281 fashion, which means we never need to rewrite a file to insert or
281 fashion, which means we never need to rewrite a file to insert or
282 remove data, and can use some simple techniques to avoid the need
282 remove data, and can use some simple techniques to avoid the need
283 for locking while reading.
283 for locking while reading.
284
284
285 If checkambig, indexfile is opened with checkambig=True at
285 If checkambig, indexfile is opened with checkambig=True at
286 writing, to avoid file stat ambiguity.
286 writing, to avoid file stat ambiguity.
287
287
288 If mmaplargeindex is True, and an mmapindexthreshold is set, the
288 If mmaplargeindex is True, and an mmapindexthreshold is set, the
289 index will be mmapped rather than read if it is larger than the
289 index will be mmapped rather than read if it is larger than the
290 configured threshold.
290 configured threshold.
291
291
292 If censorable is True, the revlog can have censored revisions.
292 If censorable is True, the revlog can have censored revisions.
293
293
294 If `upperboundcomp` is not None, this is the expected maximal gain from
294 If `upperboundcomp` is not None, this is the expected maximal gain from
295 compression for the data content.
295 compression for the data content.
296
296
297 `concurrencychecker` is an optional function that receives 3 arguments: a
297 `concurrencychecker` is an optional function that receives 3 arguments: a
298 file handle, a filename, and an expected position. It should check whether
298 file handle, a filename, and an expected position. It should check whether
299 the current position in the file handle is valid, and log/warn/fail (by
299 the current position in the file handle is valid, and log/warn/fail (by
300 raising).
300 raising).
301
301
302
302
303 Internal details
303 Internal details
304 ----------------
304 ----------------
305
305
306 A large part of the revlog logic deals with revisions' "index entries", tuple
306 A large part of the revlog logic deals with revisions' "index entries", tuple
307 objects that contains the same "items" whatever the revlog version.
307 objects that contains the same "items" whatever the revlog version.
308 Different versions will have different ways of storing these items (sometimes
308 Different versions will have different ways of storing these items (sometimes
309 not having them at all), but the tuple will always be the same. New fields
309 not having them at all), but the tuple will always be the same. New fields
310 are usually added at the end to avoid breaking existing code that relies
310 are usually added at the end to avoid breaking existing code that relies
311 on the existing order. The field are defined as follows:
311 on the existing order. The field are defined as follows:
312
312
313 [0] offset:
313 [0] offset:
314 The byte index of the start of revision data chunk.
314 The byte index of the start of revision data chunk.
315 That value is shifted up by 16 bits. use "offset = field >> 16" to
315 That value is shifted up by 16 bits. use "offset = field >> 16" to
316 retrieve it.
316 retrieve it.
317
317
318 flags:
318 flags:
319 A flag field that carries special information or changes the behavior
319 A flag field that carries special information or changes the behavior
320 of the revision. (see `REVIDX_*` constants for details)
320 of the revision. (see `REVIDX_*` constants for details)
321 The flag field only occupies the first 16 bits of this field,
321 The flag field only occupies the first 16 bits of this field,
322 use "flags = field & 0xFFFF" to retrieve the value.
322 use "flags = field & 0xFFFF" to retrieve the value.
323
323
324 [1] compressed length:
324 [1] compressed length:
325 The size, in bytes, of the chunk on disk
325 The size, in bytes, of the chunk on disk
326
326
327 [2] uncompressed length:
327 [2] uncompressed length:
328 The size, in bytes, of the full revision once reconstructed.
328 The size, in bytes, of the full revision once reconstructed.
329
329
330 [3] base rev:
330 [3] base rev:
331 Either the base of the revision delta chain (without general
331 Either the base of the revision delta chain (without general
332 delta), or the base of the delta (stored in the data chunk)
332 delta), or the base of the delta (stored in the data chunk)
333 with general delta.
333 with general delta.
334
334
335 [4] link rev:
335 [4] link rev:
336 Changelog revision number of the changeset introducing this
336 Changelog revision number of the changeset introducing this
337 revision.
337 revision.
338
338
339 [5] parent 1 rev:
339 [5] parent 1 rev:
340 Revision number of the first parent
340 Revision number of the first parent
341
341
342 [6] parent 2 rev:
342 [6] parent 2 rev:
343 Revision number of the second parent
343 Revision number of the second parent
344
344
345 [7] node id:
345 [7] node id:
346 The node id of the current revision
346 The node id of the current revision
347
347
348 [8] sidedata offset:
348 [8] sidedata offset:
349 The byte index of the start of the revision's side-data chunk.
349 The byte index of the start of the revision's side-data chunk.
350
350
351 [9] sidedata chunk length:
351 [9] sidedata chunk length:
352 The size, in bytes, of the revision's side-data chunk.
352 The size, in bytes, of the revision's side-data chunk.
353
353
354 [10] data compression mode:
354 [10] data compression mode:
355 two bits that detail the way the data chunk is compressed on disk.
355 two bits that detail the way the data chunk is compressed on disk.
356 (see "COMP_MODE_*" constants for details). For revlog version 0 and
356 (see "COMP_MODE_*" constants for details). For revlog version 0 and
357 1 this will always be COMP_MODE_INLINE.
357 1 this will always be COMP_MODE_INLINE.
358
358
359 [11] side-data compression mode:
359 [11] side-data compression mode:
360 two bits that detail the way the sidedata chunk is compressed on disk.
360 two bits that detail the way the sidedata chunk is compressed on disk.
361 (see "COMP_MODE_*" constants for details)
361 (see "COMP_MODE_*" constants for details)
362 """
362 """
363
363
364 _flagserrorclass = error.RevlogError
364 _flagserrorclass = error.RevlogError
365
365
366 def __init__(
366 def __init__(
367 self,
367 self,
368 opener,
368 opener,
369 target,
369 target,
370 radix,
370 radix,
371 postfix=None, # only exist for `tmpcensored` now
371 postfix=None, # only exist for `tmpcensored` now
372 checkambig=False,
372 checkambig=False,
373 mmaplargeindex=False,
373 mmaplargeindex=False,
374 censorable=False,
374 censorable=False,
375 upperboundcomp=None,
375 upperboundcomp=None,
376 persistentnodemap=False,
376 persistentnodemap=False,
377 concurrencychecker=None,
377 concurrencychecker=None,
378 trypending=False,
378 trypending=False,
379 ):
379 ):
380 """
380 """
381 create a revlog object
381 create a revlog object
382
382
383 opener is a function that abstracts the file opening operation
383 opener is a function that abstracts the file opening operation
384 and can be used to implement COW semantics or the like.
384 and can be used to implement COW semantics or the like.
385
385
386 `target`: a (KIND, ID) tuple that identify the content stored in
386 `target`: a (KIND, ID) tuple that identify the content stored in
387 this revlog. It help the rest of the code to understand what the revlog
387 this revlog. It help the rest of the code to understand what the revlog
388 is about without having to resort to heuristic and index filename
388 is about without having to resort to heuristic and index filename
389 analysis. Note: that this must be reliably be set by normal code, but
389 analysis. Note: that this must be reliably be set by normal code, but
390 that test, debug, or performance measurement code might not set this to
390 that test, debug, or performance measurement code might not set this to
391 accurate value.
391 accurate value.
392 """
392 """
393 self.upperboundcomp = upperboundcomp
393 self.upperboundcomp = upperboundcomp
394
394
395 self.radix = radix
395 self.radix = radix
396
396
397 self._docket_file = None
397 self._docket_file = None
398 self._indexfile = None
398 self._indexfile = None
399 self._datafile = None
399 self._datafile = None
400 self._nodemap_file = None
400 self._nodemap_file = None
401 self.postfix = postfix
401 self.postfix = postfix
402 self._trypending = trypending
402 self._trypending = trypending
403 self.opener = opener
403 self.opener = opener
404 if persistentnodemap:
404 if persistentnodemap:
405 self._nodemap_file = nodemaputil.get_nodemap_file(self)
405 self._nodemap_file = nodemaputil.get_nodemap_file(self)
406
406
407 assert target[0] in ALL_KINDS
407 assert target[0] in ALL_KINDS
408 assert len(target) == 2
408 assert len(target) == 2
409 self.target = target
409 self.target = target
410 # When True, indexfile is opened with checkambig=True at writing, to
410 # When True, indexfile is opened with checkambig=True at writing, to
411 # avoid file stat ambiguity.
411 # avoid file stat ambiguity.
412 self._checkambig = checkambig
412 self._checkambig = checkambig
413 self._mmaplargeindex = mmaplargeindex
413 self._mmaplargeindex = mmaplargeindex
414 self._censorable = censorable
414 self._censorable = censorable
415 # 3-tuple of (node, rev, text) for a raw revision.
415 # 3-tuple of (node, rev, text) for a raw revision.
416 self._revisioncache = None
416 self._revisioncache = None
417 # Maps rev to chain base rev.
417 # Maps rev to chain base rev.
418 self._chainbasecache = util.lrucachedict(100)
418 self._chainbasecache = util.lrucachedict(100)
419 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
419 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
420 self._chunkcache = (0, b'')
420 self._chunkcache = (0, b'')
421 # How much data to read and cache into the raw revlog data cache.
421 # How much data to read and cache into the raw revlog data cache.
422 self._chunkcachesize = 65536
422 self._chunkcachesize = 65536
423 self._maxchainlen = None
423 self._maxchainlen = None
424 self._deltabothparents = True
424 self._deltabothparents = True
425 self.index = None
425 self.index = None
426 self._docket = None
426 self._docket = None
427 self._nodemap_docket = None
427 self._nodemap_docket = None
428 # Mapping of partial identifiers to full nodes.
428 # Mapping of partial identifiers to full nodes.
429 self._pcache = {}
429 self._pcache = {}
430 # Mapping of revision integer to full node.
430 # Mapping of revision integer to full node.
431 self._compengine = b'zlib'
431 self._compengine = b'zlib'
432 self._compengineopts = {}
432 self._compengineopts = {}
433 self._maxdeltachainspan = -1
433 self._maxdeltachainspan = -1
434 self._withsparseread = False
434 self._withsparseread = False
435 self._sparserevlog = False
435 self._sparserevlog = False
436 self.hassidedata = False
436 self.hassidedata = False
437 self._srdensitythreshold = 0.50
437 self._srdensitythreshold = 0.50
438 self._srmingapsize = 262144
438 self._srmingapsize = 262144
439
439
440 # Make copy of flag processors so each revlog instance can support
440 # Make copy of flag processors so each revlog instance can support
441 # custom flags.
441 # custom flags.
442 self._flagprocessors = dict(flagutil.flagprocessors)
442 self._flagprocessors = dict(flagutil.flagprocessors)
443
443
444 # 2-tuple of file handles being used for active writing.
444 # 2-tuple of file handles being used for active writing.
445 self._writinghandles = None
445 self._writinghandles = None
446 # prevent nesting of addgroup
446 # prevent nesting of addgroup
447 self._adding_group = None
447 self._adding_group = None
448
448
449 self._loadindex()
449 self._loadindex()
450
450
451 self._concurrencychecker = concurrencychecker
451 self._concurrencychecker = concurrencychecker
452
452
453 def _init_opts(self):
453 def _init_opts(self):
454 """process options (from above/config) to setup associated default revlog mode
454 """process options (from above/config) to setup associated default revlog mode
455
455
456 These values might be affected when actually reading on disk information.
456 These values might be affected when actually reading on disk information.
457
457
458 The relevant values are returned for use in _loadindex().
458 The relevant values are returned for use in _loadindex().
459
459
460 * newversionflags:
460 * newversionflags:
461 version header to use if we need to create a new revlog
461 version header to use if we need to create a new revlog
462
462
463 * mmapindexthreshold:
463 * mmapindexthreshold:
464 minimal index size for start to use mmap
464 minimal index size for start to use mmap
465
465
466 * force_nodemap:
466 * force_nodemap:
467 force the usage of a "development" version of the nodemap code
467 force the usage of a "development" version of the nodemap code
468 """
468 """
469 mmapindexthreshold = None
469 mmapindexthreshold = None
470 opts = self.opener.options
470 opts = self.opener.options
471
471
472 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
472 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
473 new_header = CHANGELOGV2
473 new_header = CHANGELOGV2
474 elif b'revlogv2' in opts:
474 elif b'revlogv2' in opts:
475 new_header = REVLOGV2
475 new_header = REVLOGV2
476 elif b'revlogv1' in opts:
476 elif b'revlogv1' in opts:
477 new_header = REVLOGV1 | FLAG_INLINE_DATA
477 new_header = REVLOGV1 | FLAG_INLINE_DATA
478 if b'generaldelta' in opts:
478 if b'generaldelta' in opts:
479 new_header |= FLAG_GENERALDELTA
479 new_header |= FLAG_GENERALDELTA
480 elif b'revlogv0' in self.opener.options:
480 elif b'revlogv0' in self.opener.options:
481 new_header = REVLOGV0
481 new_header = REVLOGV0
482 else:
482 else:
483 new_header = REVLOG_DEFAULT_VERSION
483 new_header = REVLOG_DEFAULT_VERSION
484
484
485 if b'chunkcachesize' in opts:
485 if b'chunkcachesize' in opts:
486 self._chunkcachesize = opts[b'chunkcachesize']
486 self._chunkcachesize = opts[b'chunkcachesize']
487 if b'maxchainlen' in opts:
487 if b'maxchainlen' in opts:
488 self._maxchainlen = opts[b'maxchainlen']
488 self._maxchainlen = opts[b'maxchainlen']
489 if b'deltabothparents' in opts:
489 if b'deltabothparents' in opts:
490 self._deltabothparents = opts[b'deltabothparents']
490 self._deltabothparents = opts[b'deltabothparents']
491 self._lazydelta = bool(opts.get(b'lazydelta', True))
491 self._lazydelta = bool(opts.get(b'lazydelta', True))
492 self._lazydeltabase = False
492 self._lazydeltabase = False
493 if self._lazydelta:
493 if self._lazydelta:
494 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
494 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
495 if b'compengine' in opts:
495 if b'compengine' in opts:
496 self._compengine = opts[b'compengine']
496 self._compengine = opts[b'compengine']
497 if b'zlib.level' in opts:
497 if b'zlib.level' in opts:
498 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
498 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
499 if b'zstd.level' in opts:
499 if b'zstd.level' in opts:
500 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
500 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
501 if b'maxdeltachainspan' in opts:
501 if b'maxdeltachainspan' in opts:
502 self._maxdeltachainspan = opts[b'maxdeltachainspan']
502 self._maxdeltachainspan = opts[b'maxdeltachainspan']
503 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
503 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
504 mmapindexthreshold = opts[b'mmapindexthreshold']
504 mmapindexthreshold = opts[b'mmapindexthreshold']
505 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
505 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
506 withsparseread = bool(opts.get(b'with-sparse-read', False))
506 withsparseread = bool(opts.get(b'with-sparse-read', False))
507 # sparse-revlog forces sparse-read
507 # sparse-revlog forces sparse-read
508 self._withsparseread = self._sparserevlog or withsparseread
508 self._withsparseread = self._sparserevlog or withsparseread
509 if b'sparse-read-density-threshold' in opts:
509 if b'sparse-read-density-threshold' in opts:
510 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
510 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
511 if b'sparse-read-min-gap-size' in opts:
511 if b'sparse-read-min-gap-size' in opts:
512 self._srmingapsize = opts[b'sparse-read-min-gap-size']
512 self._srmingapsize = opts[b'sparse-read-min-gap-size']
513 if opts.get(b'enableellipsis'):
513 if opts.get(b'enableellipsis'):
514 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
514 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
515
515
516 # revlog v0 doesn't have flag processors
516 # revlog v0 doesn't have flag processors
517 for flag, processor in pycompat.iteritems(
517 for flag, processor in pycompat.iteritems(
518 opts.get(b'flagprocessors', {})
518 opts.get(b'flagprocessors', {})
519 ):
519 ):
520 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
520 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
521
521
522 if self._chunkcachesize <= 0:
522 if self._chunkcachesize <= 0:
523 raise error.RevlogError(
523 raise error.RevlogError(
524 _(b'revlog chunk cache size %r is not greater than 0')
524 _(b'revlog chunk cache size %r is not greater than 0')
525 % self._chunkcachesize
525 % self._chunkcachesize
526 )
526 )
527 elif self._chunkcachesize & (self._chunkcachesize - 1):
527 elif self._chunkcachesize & (self._chunkcachesize - 1):
528 raise error.RevlogError(
528 raise error.RevlogError(
529 _(b'revlog chunk cache size %r is not a power of 2')
529 _(b'revlog chunk cache size %r is not a power of 2')
530 % self._chunkcachesize
530 % self._chunkcachesize
531 )
531 )
532 force_nodemap = opts.get(b'devel-force-nodemap', False)
532 force_nodemap = opts.get(b'devel-force-nodemap', False)
533 return new_header, mmapindexthreshold, force_nodemap
533 return new_header, mmapindexthreshold, force_nodemap
534
534
535 def _get_data(self, filepath, mmap_threshold, size=None):
535 def _get_data(self, filepath, mmap_threshold, size=None):
536 """return a file content with or without mmap
536 """return a file content with or without mmap
537
537
538 If the file is missing return the empty string"""
538 If the file is missing return the empty string"""
539 try:
539 try:
540 with self.opener(filepath) as fp:
540 with self.opener(filepath) as fp:
541 if mmap_threshold is not None:
541 if mmap_threshold is not None:
542 file_size = self.opener.fstat(fp).st_size
542 file_size = self.opener.fstat(fp).st_size
543 if file_size >= mmap_threshold:
543 if file_size >= mmap_threshold:
544 if size is not None:
544 if size is not None:
545 # avoid potentiel mmap crash
545 # avoid potentiel mmap crash
546 size = min(file_size, size)
546 size = min(file_size, size)
547 # TODO: should .close() to release resources without
547 # TODO: should .close() to release resources without
548 # relying on Python GC
548 # relying on Python GC
549 if size is None:
549 if size is None:
550 return util.buffer(util.mmapread(fp))
550 return util.buffer(util.mmapread(fp))
551 else:
551 else:
552 return util.buffer(util.mmapread(fp, size))
552 return util.buffer(util.mmapread(fp, size))
553 if size is None:
553 if size is None:
554 return fp.read()
554 return fp.read()
555 else:
555 else:
556 return fp.read(size)
556 return fp.read(size)
557 except IOError as inst:
557 except IOError as inst:
558 if inst.errno != errno.ENOENT:
558 if inst.errno != errno.ENOENT:
559 raise
559 raise
560 return b''
560 return b''
561
561
562 def _loadindex(self):
562 def _loadindex(self):
563
563
564 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
564 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
565
565
566 if self.postfix is not None:
566 if self.postfix is not None:
567 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
567 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
568 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
568 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
569 entry_point = b'%s.i.a' % self.radix
569 entry_point = b'%s.i.a' % self.radix
570 else:
570 else:
571 entry_point = b'%s.i' % self.radix
571 entry_point = b'%s.i' % self.radix
572
572
573 entry_data = b''
573 entry_data = b''
574 self._initempty = True
574 self._initempty = True
575 entry_data = self._get_data(entry_point, mmapindexthreshold)
575 entry_data = self._get_data(entry_point, mmapindexthreshold)
576 if len(entry_data) > 0:
576 if len(entry_data) > 0:
577 header = INDEX_HEADER.unpack(entry_data[:4])[0]
577 header = INDEX_HEADER.unpack(entry_data[:4])[0]
578 self._initempty = False
578 self._initempty = False
579 else:
579 else:
580 header = new_header
580 header = new_header
581
581
582 self._format_flags = header & ~0xFFFF
582 self._format_flags = header & ~0xFFFF
583 self._format_version = header & 0xFFFF
583 self._format_version = header & 0xFFFF
584
584
585 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
585 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
586 if supported_flags is None:
586 if supported_flags is None:
587 msg = _(b'unknown version (%d) in revlog %s')
587 msg = _(b'unknown version (%d) in revlog %s')
588 msg %= (self._format_version, self.display_id)
588 msg %= (self._format_version, self.display_id)
589 raise error.RevlogError(msg)
589 raise error.RevlogError(msg)
590 elif self._format_flags & ~supported_flags:
590 elif self._format_flags & ~supported_flags:
591 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
591 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
592 display_flag = self._format_flags >> 16
592 display_flag = self._format_flags >> 16
593 msg %= (display_flag, self._format_version, self.display_id)
593 msg %= (display_flag, self._format_version, self.display_id)
594 raise error.RevlogError(msg)
594 raise error.RevlogError(msg)
595
595
596 features = FEATURES_BY_VERSION[self._format_version]
596 features = FEATURES_BY_VERSION[self._format_version]
597 self._inline = features[b'inline'](self._format_flags)
597 self._inline = features[b'inline'](self._format_flags)
598 self._generaldelta = features[b'generaldelta'](self._format_flags)
598 self._generaldelta = features[b'generaldelta'](self._format_flags)
599 self.hassidedata = features[b'sidedata']
599 self.hassidedata = features[b'sidedata']
600
600
601 if not features[b'docket']:
601 if not features[b'docket']:
602 self._indexfile = entry_point
602 self._indexfile = entry_point
603 index_data = entry_data
603 index_data = entry_data
604 else:
604 else:
605 self._docket_file = entry_point
605 self._docket_file = entry_point
606 if self._initempty:
606 if self._initempty:
607 self._docket = docketutil.default_docket(self, header)
607 self._docket = docketutil.default_docket(self, header)
608 else:
608 else:
609 self._docket = docketutil.parse_docket(
609 self._docket = docketutil.parse_docket(
610 self, entry_data, use_pending=self._trypending
610 self, entry_data, use_pending=self._trypending
611 )
611 )
612 self._indexfile = self._docket.index_filepath()
612 self._indexfile = self._docket.index_filepath()
613 index_data = b''
613 index_data = b''
614 index_size = self._docket.index_end
614 index_size = self._docket.index_end
615 if index_size > 0:
615 if index_size > 0:
616 index_data = self._get_data(
616 index_data = self._get_data(
617 self._indexfile, mmapindexthreshold, size=index_size
617 self._indexfile, mmapindexthreshold, size=index_size
618 )
618 )
619 if len(index_data) < index_size:
619 if len(index_data) < index_size:
620 msg = _(b'too few index data for %s: got %d, expected %d')
620 msg = _(b'too few index data for %s: got %d, expected %d')
621 msg %= (self.display_id, len(index_data), index_size)
621 msg %= (self.display_id, len(index_data), index_size)
622 raise error.RevlogError(msg)
622 raise error.RevlogError(msg)
623
623
624 self._inline = False
624 self._inline = False
625 # generaldelta implied by version 2 revlogs.
625 # generaldelta implied by version 2 revlogs.
626 self._generaldelta = True
626 self._generaldelta = True
627 # the logic for persistent nodemap will be dealt with within the
627 # the logic for persistent nodemap will be dealt with within the
628 # main docket, so disable it for now.
628 # main docket, so disable it for now.
629 self._nodemap_file = None
629 self._nodemap_file = None
630
630
631 if self.postfix is None:
631 if self.postfix is None:
632 self._datafile = b'%s.d' % self.radix
632 self._datafile = b'%s.d' % self.radix
633 else:
633 else:
634 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
634 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
635
635
636 self.nodeconstants = sha1nodeconstants
636 self.nodeconstants = sha1nodeconstants
637 self.nullid = self.nodeconstants.nullid
637 self.nullid = self.nodeconstants.nullid
638
638
639 # sparse-revlog can't be on without general-delta (issue6056)
639 # sparse-revlog can't be on without general-delta (issue6056)
640 if not self._generaldelta:
640 if not self._generaldelta:
641 self._sparserevlog = False
641 self._sparserevlog = False
642
642
643 self._storedeltachains = True
643 self._storedeltachains = True
644
644
645 devel_nodemap = (
645 devel_nodemap = (
646 self._nodemap_file
646 self._nodemap_file
647 and force_nodemap
647 and force_nodemap
648 and parse_index_v1_nodemap is not None
648 and parse_index_v1_nodemap is not None
649 )
649 )
650
650
651 use_rust_index = False
651 use_rust_index = False
652 if rustrevlog is not None:
652 if rustrevlog is not None:
653 if self._nodemap_file is not None:
653 if self._nodemap_file is not None:
654 use_rust_index = True
654 use_rust_index = True
655 else:
655 else:
656 use_rust_index = self.opener.options.get(b'rust.index')
656 use_rust_index = self.opener.options.get(b'rust.index')
657
657
658 self._parse_index = parse_index_v1
658 self._parse_index = parse_index_v1
659 if self._format_version == REVLOGV0:
659 if self._format_version == REVLOGV0:
660 self._parse_index = revlogv0.parse_index_v0
660 self._parse_index = revlogv0.parse_index_v0
661 elif self._format_version == REVLOGV2:
661 elif self._format_version == REVLOGV2:
662 self._parse_index = parse_index_v2
662 self._parse_index = parse_index_v2
663 elif self._format_version == CHANGELOGV2:
663 elif self._format_version == CHANGELOGV2:
664 self._parse_index = parse_index_cl_v2
664 self._parse_index = parse_index_cl_v2
665 elif devel_nodemap:
665 elif devel_nodemap:
666 self._parse_index = parse_index_v1_nodemap
666 self._parse_index = parse_index_v1_nodemap
667 elif use_rust_index:
667 elif use_rust_index:
668 self._parse_index = parse_index_v1_mixed
668 self._parse_index = parse_index_v1_mixed
669 try:
669 try:
670 d = self._parse_index(index_data, self._inline)
670 d = self._parse_index(index_data, self._inline)
671 index, _chunkcache = d
671 index, _chunkcache = d
672 use_nodemap = (
672 use_nodemap = (
673 not self._inline
673 not self._inline
674 and self._nodemap_file is not None
674 and self._nodemap_file is not None
675 and util.safehasattr(index, 'update_nodemap_data')
675 and util.safehasattr(index, 'update_nodemap_data')
676 )
676 )
677 if use_nodemap:
677 if use_nodemap:
678 nodemap_data = nodemaputil.persisted_data(self)
678 nodemap_data = nodemaputil.persisted_data(self)
679 if nodemap_data is not None:
679 if nodemap_data is not None:
680 docket = nodemap_data[0]
680 docket = nodemap_data[0]
681 if (
681 if (
682 len(d[0]) > docket.tip_rev
682 len(d[0]) > docket.tip_rev
683 and d[0][docket.tip_rev][7] == docket.tip_node
683 and d[0][docket.tip_rev][7] == docket.tip_node
684 ):
684 ):
685 # no changelog tampering
685 # no changelog tampering
686 self._nodemap_docket = docket
686 self._nodemap_docket = docket
687 index.update_nodemap_data(*nodemap_data)
687 index.update_nodemap_data(*nodemap_data)
688 except (ValueError, IndexError):
688 except (ValueError, IndexError):
689 raise error.RevlogError(
689 raise error.RevlogError(
690 _(b"index %s is corrupted") % self.display_id
690 _(b"index %s is corrupted") % self.display_id
691 )
691 )
692 self.index, self._chunkcache = d
692 self.index, self._chunkcache = d
693 if not self._chunkcache:
693 if not self._chunkcache:
694 self._chunkclear()
694 self._chunkclear()
695 # revnum -> (chain-length, sum-delta-length)
695 # revnum -> (chain-length, sum-delta-length)
696 self._chaininfocache = util.lrucachedict(500)
696 self._chaininfocache = util.lrucachedict(500)
697 # revlog header -> revlog compressor
697 # revlog header -> revlog compressor
698 self._decompressors = {}
698 self._decompressors = {}
699
699
700 @util.propertycache
700 @util.propertycache
701 def revlog_kind(self):
701 def revlog_kind(self):
702 return self.target[0]
702 return self.target[0]
703
703
704 @util.propertycache
704 @util.propertycache
705 def display_id(self):
705 def display_id(self):
706 """The public facing "ID" of the revlog that we use in message"""
706 """The public facing "ID" of the revlog that we use in message"""
707 # Maybe we should build a user facing representation of
707 # Maybe we should build a user facing representation of
708 # revlog.target instead of using `self.radix`
708 # revlog.target instead of using `self.radix`
709 return self.radix
709 return self.radix
710
710
711 def _get_decompressor(self, t):
711 def _get_decompressor(self, t):
712 try:
712 try:
713 compressor = self._decompressors[t]
713 compressor = self._decompressors[t]
714 except KeyError:
714 except KeyError:
715 try:
715 try:
716 engine = util.compengines.forrevlogheader(t)
716 engine = util.compengines.forrevlogheader(t)
717 compressor = engine.revlogcompressor(self._compengineopts)
717 compressor = engine.revlogcompressor(self._compengineopts)
718 self._decompressors[t] = compressor
718 self._decompressors[t] = compressor
719 except KeyError:
719 except KeyError:
720 raise error.RevlogError(
720 raise error.RevlogError(
721 _(b'unknown compression type %s') % binascii.hexlify(t)
721 _(b'unknown compression type %s') % binascii.hexlify(t)
722 )
722 )
723 return compressor
723 return compressor
724
724
725 @util.propertycache
725 @util.propertycache
726 def _compressor(self):
726 def _compressor(self):
727 engine = util.compengines[self._compengine]
727 engine = util.compengines[self._compengine]
728 return engine.revlogcompressor(self._compengineopts)
728 return engine.revlogcompressor(self._compengineopts)
729
729
730 @util.propertycache
730 @util.propertycache
731 def _decompressor(self):
731 def _decompressor(self):
732 """the default decompressor"""
732 """the default decompressor"""
733 if self._docket is None:
733 if self._docket is None:
734 return None
734 return None
735 t = self._docket.default_compression_header
735 t = self._docket.default_compression_header
736 c = self._get_decompressor(t)
736 c = self._get_decompressor(t)
737 return c.decompress
737 return c.decompress
738
738
739 def _indexfp(self):
739 def _indexfp(self):
740 """file object for the revlog's index file"""
740 """file object for the revlog's index file"""
741 return self.opener(self._indexfile, mode=b"r")
741 return self.opener(self._indexfile, mode=b"r")
742
742
743 def __index_write_fp(self):
743 def __index_write_fp(self):
744 # You should not use this directly and use `_writing` instead
744 # You should not use this directly and use `_writing` instead
745 try:
745 try:
746 f = self.opener(
746 f = self.opener(
747 self._indexfile, mode=b"r+", checkambig=self._checkambig
747 self._indexfile, mode=b"r+", checkambig=self._checkambig
748 )
748 )
749 if self._docket is None:
749 if self._docket is None:
750 f.seek(0, os.SEEK_END)
750 f.seek(0, os.SEEK_END)
751 else:
751 else:
752 f.seek(self._docket.index_end, os.SEEK_SET)
752 f.seek(self._docket.index_end, os.SEEK_SET)
753 return f
753 return f
754 except IOError as inst:
754 except IOError as inst:
755 if inst.errno != errno.ENOENT:
755 if inst.errno != errno.ENOENT:
756 raise
756 raise
757 return self.opener(
757 return self.opener(
758 self._indexfile, mode=b"w+", checkambig=self._checkambig
758 self._indexfile, mode=b"w+", checkambig=self._checkambig
759 )
759 )
760
760
761 def __index_new_fp(self):
761 def __index_new_fp(self):
762 # You should not use this unless you are upgrading from inline revlog
762 # You should not use this unless you are upgrading from inline revlog
763 return self.opener(
763 return self.opener(
764 self._indexfile,
764 self._indexfile,
765 mode=b"w",
765 mode=b"w",
766 checkambig=self._checkambig,
766 checkambig=self._checkambig,
767 atomictemp=True,
767 atomictemp=True,
768 )
768 )
769
769
770 def _datafp(self, mode=b'r'):
770 def _datafp(self, mode=b'r'):
771 """file object for the revlog's data file"""
771 """file object for the revlog's data file"""
772 return self.opener(self._datafile, mode=mode)
772 return self.opener(self._datafile, mode=mode)
773
773
774 @contextlib.contextmanager
774 @contextlib.contextmanager
775 def _datareadfp(self, existingfp=None):
775 def _datareadfp(self, existingfp=None):
776 """file object suitable to read data"""
776 """file object suitable to read data"""
777 # Use explicit file handle, if given.
777 # Use explicit file handle, if given.
778 if existingfp is not None:
778 if existingfp is not None:
779 yield existingfp
779 yield existingfp
780
780
781 # Use a file handle being actively used for writes, if available.
781 # Use a file handle being actively used for writes, if available.
782 # There is some danger to doing this because reads will seek the
782 # There is some danger to doing this because reads will seek the
783 # file. However, _writeentry() performs a SEEK_END before all writes,
783 # file. However, _writeentry() performs a SEEK_END before all writes,
784 # so we should be safe.
784 # so we should be safe.
785 elif self._writinghandles:
785 elif self._writinghandles:
786 if self._inline:
786 if self._inline:
787 yield self._writinghandles[0]
787 yield self._writinghandles[0]
788 else:
788 else:
789 yield self._writinghandles[1]
789 yield self._writinghandles[1]
790
790
791 # Otherwise open a new file handle.
791 # Otherwise open a new file handle.
792 else:
792 else:
793 if self._inline:
793 if self._inline:
794 func = self._indexfp
794 func = self._indexfp
795 else:
795 else:
796 func = self._datafp
796 func = self._datafp
797 with func() as fp:
797 with func() as fp:
798 yield fp
798 yield fp
799
799
800 def tiprev(self):
800 def tiprev(self):
801 return len(self.index) - 1
801 return len(self.index) - 1
802
802
803 def tip(self):
803 def tip(self):
804 return self.node(self.tiprev())
804 return self.node(self.tiprev())
805
805
806 def __contains__(self, rev):
806 def __contains__(self, rev):
807 return 0 <= rev < len(self)
807 return 0 <= rev < len(self)
808
808
809 def __len__(self):
809 def __len__(self):
810 return len(self.index)
810 return len(self.index)
811
811
812 def __iter__(self):
812 def __iter__(self):
813 return iter(pycompat.xrange(len(self)))
813 return iter(pycompat.xrange(len(self)))
814
814
815 def revs(self, start=0, stop=None):
815 def revs(self, start=0, stop=None):
816 """iterate over all rev in this revlog (from start to stop)"""
816 """iterate over all rev in this revlog (from start to stop)"""
817 return storageutil.iterrevs(len(self), start=start, stop=stop)
817 return storageutil.iterrevs(len(self), start=start, stop=stop)
818
818
819 @property
819 @property
820 def nodemap(self):
820 def nodemap(self):
821 msg = (
821 msg = (
822 b"revlog.nodemap is deprecated, "
822 b"revlog.nodemap is deprecated, "
823 b"use revlog.index.[has_node|rev|get_rev]"
823 b"use revlog.index.[has_node|rev|get_rev]"
824 )
824 )
825 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
825 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
826 return self.index.nodemap
826 return self.index.nodemap
827
827
828 @property
828 @property
829 def _nodecache(self):
829 def _nodecache(self):
830 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
830 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
831 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
831 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
832 return self.index.nodemap
832 return self.index.nodemap
833
833
834 def hasnode(self, node):
834 def hasnode(self, node):
835 try:
835 try:
836 self.rev(node)
836 self.rev(node)
837 return True
837 return True
838 except KeyError:
838 except KeyError:
839 return False
839 return False
840
840
841 def candelta(self, baserev, rev):
841 def candelta(self, baserev, rev):
842 """whether two revisions (baserev, rev) can be delta-ed or not"""
842 """whether two revisions (baserev, rev) can be delta-ed or not"""
843 # Disable delta if either rev requires a content-changing flag
843 # Disable delta if either rev requires a content-changing flag
844 # processor (ex. LFS). This is because such flag processor can alter
844 # processor (ex. LFS). This is because such flag processor can alter
845 # the rawtext content that the delta will be based on, and two clients
845 # the rawtext content that the delta will be based on, and two clients
846 # could have a same revlog node with different flags (i.e. different
846 # could have a same revlog node with different flags (i.e. different
847 # rawtext contents) and the delta could be incompatible.
847 # rawtext contents) and the delta could be incompatible.
848 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
848 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
849 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
849 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
850 ):
850 ):
851 return False
851 return False
852 return True
852 return True
853
853
854 def update_caches(self, transaction):
854 def update_caches(self, transaction):
855 if self._nodemap_file is not None:
855 if self._nodemap_file is not None:
856 if transaction is None:
856 if transaction is None:
857 nodemaputil.update_persistent_nodemap(self)
857 nodemaputil.update_persistent_nodemap(self)
858 else:
858 else:
859 nodemaputil.setup_persistent_nodemap(transaction, self)
859 nodemaputil.setup_persistent_nodemap(transaction, self)
860
860
861 def clearcaches(self):
861 def clearcaches(self):
862 self._revisioncache = None
862 self._revisioncache = None
863 self._chainbasecache.clear()
863 self._chainbasecache.clear()
864 self._chunkcache = (0, b'')
864 self._chunkcache = (0, b'')
865 self._pcache = {}
865 self._pcache = {}
866 self._nodemap_docket = None
866 self._nodemap_docket = None
867 self.index.clearcaches()
867 self.index.clearcaches()
868 # The python code is the one responsible for validating the docket, we
868 # The python code is the one responsible for validating the docket, we
869 # end up having to refresh it here.
869 # end up having to refresh it here.
870 use_nodemap = (
870 use_nodemap = (
871 not self._inline
871 not self._inline
872 and self._nodemap_file is not None
872 and self._nodemap_file is not None
873 and util.safehasattr(self.index, 'update_nodemap_data')
873 and util.safehasattr(self.index, 'update_nodemap_data')
874 )
874 )
875 if use_nodemap:
875 if use_nodemap:
876 nodemap_data = nodemaputil.persisted_data(self)
876 nodemap_data = nodemaputil.persisted_data(self)
877 if nodemap_data is not None:
877 if nodemap_data is not None:
878 self._nodemap_docket = nodemap_data[0]
878 self._nodemap_docket = nodemap_data[0]
879 self.index.update_nodemap_data(*nodemap_data)
879 self.index.update_nodemap_data(*nodemap_data)
880
880
881 def rev(self, node):
881 def rev(self, node):
882 try:
882 try:
883 return self.index.rev(node)
883 return self.index.rev(node)
884 except TypeError:
884 except TypeError:
885 raise
885 raise
886 except error.RevlogError:
886 except error.RevlogError:
887 # parsers.c radix tree lookup failed
887 # parsers.c radix tree lookup failed
888 if (
888 if (
889 node == self.nodeconstants.wdirid
889 node == self.nodeconstants.wdirid
890 or node in self.nodeconstants.wdirfilenodeids
890 or node in self.nodeconstants.wdirfilenodeids
891 ):
891 ):
892 raise error.WdirUnsupported
892 raise error.WdirUnsupported
893 raise error.LookupError(node, self.display_id, _(b'no node'))
893 raise error.LookupError(node, self.display_id, _(b'no node'))
894
894
895 # Accessors for index entries.
895 # Accessors for index entries.
896
896
897 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
897 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
898 # are flags.
898 # are flags.
899 def start(self, rev):
899 def start(self, rev):
900 return int(self.index[rev][0] >> 16)
900 return int(self.index[rev][0] >> 16)
901
901
902 def flags(self, rev):
902 def flags(self, rev):
903 return self.index[rev][0] & 0xFFFF
903 return self.index[rev][0] & 0xFFFF
904
904
905 def length(self, rev):
905 def length(self, rev):
906 return self.index[rev][1]
906 return self.index[rev][1]
907
907
908 def sidedata_length(self, rev):
908 def sidedata_length(self, rev):
909 if not self.hassidedata:
909 if not self.hassidedata:
910 return 0
910 return 0
911 return self.index[rev][9]
911 return self.index[rev][9]
912
912
913 def rawsize(self, rev):
913 def rawsize(self, rev):
914 """return the length of the uncompressed text for a given revision"""
914 """return the length of the uncompressed text for a given revision"""
915 l = self.index[rev][2]
915 l = self.index[rev][2]
916 if l >= 0:
916 if l >= 0:
917 return l
917 return l
918
918
919 t = self.rawdata(rev)
919 t = self.rawdata(rev)
920 return len(t)
920 return len(t)
921
921
922 def size(self, rev):
922 def size(self, rev):
923 """length of non-raw text (processed by a "read" flag processor)"""
923 """length of non-raw text (processed by a "read" flag processor)"""
924 # fast path: if no "read" flag processor could change the content,
924 # fast path: if no "read" flag processor could change the content,
925 # size is rawsize. note: ELLIPSIS is known to not change the content.
925 # size is rawsize. note: ELLIPSIS is known to not change the content.
926 flags = self.flags(rev)
926 flags = self.flags(rev)
927 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
927 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
928 return self.rawsize(rev)
928 return self.rawsize(rev)
929
929
930 return len(self.revision(rev, raw=False))
930 return len(self.revision(rev, raw=False))
931
931
932 def chainbase(self, rev):
932 def chainbase(self, rev):
933 base = self._chainbasecache.get(rev)
933 base = self._chainbasecache.get(rev)
934 if base is not None:
934 if base is not None:
935 return base
935 return base
936
936
937 index = self.index
937 index = self.index
938 iterrev = rev
938 iterrev = rev
939 base = index[iterrev][3]
939 base = index[iterrev][3]
940 while base != iterrev:
940 while base != iterrev:
941 iterrev = base
941 iterrev = base
942 base = index[iterrev][3]
942 base = index[iterrev][3]
943
943
944 self._chainbasecache[rev] = base
944 self._chainbasecache[rev] = base
945 return base
945 return base
946
946
947 def linkrev(self, rev):
947 def linkrev(self, rev):
948 return self.index[rev][4]
948 return self.index[rev][4]
949
949
950 def parentrevs(self, rev):
950 def parentrevs(self, rev):
951 try:
951 try:
952 entry = self.index[rev]
952 entry = self.index[rev]
953 except IndexError:
953 except IndexError:
954 if rev == wdirrev:
954 if rev == wdirrev:
955 raise error.WdirUnsupported
955 raise error.WdirUnsupported
956 raise
956 raise
957 if entry[5] == nullrev:
957 if entry[5] == nullrev:
958 return entry[6], entry[5]
958 return entry[6], entry[5]
959 else:
959 else:
960 return entry[5], entry[6]
960 return entry[5], entry[6]
961
961
962 # fast parentrevs(rev) where rev isn't filtered
962 # fast parentrevs(rev) where rev isn't filtered
963 _uncheckedparentrevs = parentrevs
963 _uncheckedparentrevs = parentrevs
964
964
965 def node(self, rev):
965 def node(self, rev):
966 try:
966 try:
967 return self.index[rev][7]
967 return self.index[rev][7]
968 except IndexError:
968 except IndexError:
969 if rev == wdirrev:
969 if rev == wdirrev:
970 raise error.WdirUnsupported
970 raise error.WdirUnsupported
971 raise
971 raise
972
972
973 # Derived from index values.
973 # Derived from index values.
974
974
975 def end(self, rev):
975 def end(self, rev):
976 return self.start(rev) + self.length(rev)
976 return self.start(rev) + self.length(rev)
977
977
978 def parents(self, node):
978 def parents(self, node):
979 i = self.index
979 i = self.index
980 d = i[self.rev(node)]
980 d = i[self.rev(node)]
981 # inline node() to avoid function call overhead
981 # inline node() to avoid function call overhead
982 if d[5] == self.nullid:
982 if d[5] == self.nullid:
983 return i[d[6]][7], i[d[5]][7]
983 return i[d[6]][7], i[d[5]][7]
984 else:
984 else:
985 return i[d[5]][7], i[d[6]][7]
985 return i[d[5]][7], i[d[6]][7]
986
986
987 def chainlen(self, rev):
987 def chainlen(self, rev):
988 return self._chaininfo(rev)[0]
988 return self._chaininfo(rev)[0]
989
989
990 def _chaininfo(self, rev):
990 def _chaininfo(self, rev):
991 chaininfocache = self._chaininfocache
991 chaininfocache = self._chaininfocache
992 if rev in chaininfocache:
992 if rev in chaininfocache:
993 return chaininfocache[rev]
993 return chaininfocache[rev]
994 index = self.index
994 index = self.index
995 generaldelta = self._generaldelta
995 generaldelta = self._generaldelta
996 iterrev = rev
996 iterrev = rev
997 e = index[iterrev]
997 e = index[iterrev]
998 clen = 0
998 clen = 0
999 compresseddeltalen = 0
999 compresseddeltalen = 0
1000 while iterrev != e[3]:
1000 while iterrev != e[3]:
1001 clen += 1
1001 clen += 1
1002 compresseddeltalen += e[1]
1002 compresseddeltalen += e[1]
1003 if generaldelta:
1003 if generaldelta:
1004 iterrev = e[3]
1004 iterrev = e[3]
1005 else:
1005 else:
1006 iterrev -= 1
1006 iterrev -= 1
1007 if iterrev in chaininfocache:
1007 if iterrev in chaininfocache:
1008 t = chaininfocache[iterrev]
1008 t = chaininfocache[iterrev]
1009 clen += t[0]
1009 clen += t[0]
1010 compresseddeltalen += t[1]
1010 compresseddeltalen += t[1]
1011 break
1011 break
1012 e = index[iterrev]
1012 e = index[iterrev]
1013 else:
1013 else:
1014 # Add text length of base since decompressing that also takes
1014 # Add text length of base since decompressing that also takes
1015 # work. For cache hits the length is already included.
1015 # work. For cache hits the length is already included.
1016 compresseddeltalen += e[1]
1016 compresseddeltalen += e[1]
1017 r = (clen, compresseddeltalen)
1017 r = (clen, compresseddeltalen)
1018 chaininfocache[rev] = r
1018 chaininfocache[rev] = r
1019 return r
1019 return r
1020
1020
1021 def _deltachain(self, rev, stoprev=None):
1021 def _deltachain(self, rev, stoprev=None):
1022 """Obtain the delta chain for a revision.
1022 """Obtain the delta chain for a revision.
1023
1023
1024 ``stoprev`` specifies a revision to stop at. If not specified, we
1024 ``stoprev`` specifies a revision to stop at. If not specified, we
1025 stop at the base of the chain.
1025 stop at the base of the chain.
1026
1026
1027 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1027 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1028 revs in ascending order and ``stopped`` is a bool indicating whether
1028 revs in ascending order and ``stopped`` is a bool indicating whether
1029 ``stoprev`` was hit.
1029 ``stoprev`` was hit.
1030 """
1030 """
1031 # Try C implementation.
1031 # Try C implementation.
1032 try:
1032 try:
1033 return self.index.deltachain(rev, stoprev, self._generaldelta)
1033 return self.index.deltachain(rev, stoprev, self._generaldelta)
1034 except AttributeError:
1034 except AttributeError:
1035 pass
1035 pass
1036
1036
1037 chain = []
1037 chain = []
1038
1038
1039 # Alias to prevent attribute lookup in tight loop.
1039 # Alias to prevent attribute lookup in tight loop.
1040 index = self.index
1040 index = self.index
1041 generaldelta = self._generaldelta
1041 generaldelta = self._generaldelta
1042
1042
1043 iterrev = rev
1043 iterrev = rev
1044 e = index[iterrev]
1044 e = index[iterrev]
1045 while iterrev != e[3] and iterrev != stoprev:
1045 while iterrev != e[3] and iterrev != stoprev:
1046 chain.append(iterrev)
1046 chain.append(iterrev)
1047 if generaldelta:
1047 if generaldelta:
1048 iterrev = e[3]
1048 iterrev = e[3]
1049 else:
1049 else:
1050 iterrev -= 1
1050 iterrev -= 1
1051 e = index[iterrev]
1051 e = index[iterrev]
1052
1052
1053 if iterrev == stoprev:
1053 if iterrev == stoprev:
1054 stopped = True
1054 stopped = True
1055 else:
1055 else:
1056 chain.append(iterrev)
1056 chain.append(iterrev)
1057 stopped = False
1057 stopped = False
1058
1058
1059 chain.reverse()
1059 chain.reverse()
1060 return chain, stopped
1060 return chain, stopped
1061
1061
1062 def ancestors(self, revs, stoprev=0, inclusive=False):
1062 def ancestors(self, revs, stoprev=0, inclusive=False):
1063 """Generate the ancestors of 'revs' in reverse revision order.
1063 """Generate the ancestors of 'revs' in reverse revision order.
1064 Does not generate revs lower than stoprev.
1064 Does not generate revs lower than stoprev.
1065
1065
1066 See the documentation for ancestor.lazyancestors for more details."""
1066 See the documentation for ancestor.lazyancestors for more details."""
1067
1067
1068 # first, make sure start revisions aren't filtered
1068 # first, make sure start revisions aren't filtered
1069 revs = list(revs)
1069 revs = list(revs)
1070 checkrev = self.node
1070 checkrev = self.node
1071 for r in revs:
1071 for r in revs:
1072 checkrev(r)
1072 checkrev(r)
1073 # and we're sure ancestors aren't filtered as well
1073 # and we're sure ancestors aren't filtered as well
1074
1074
1075 if rustancestor is not None and self.index.rust_ext_compat:
1075 if rustancestor is not None and self.index.rust_ext_compat:
1076 lazyancestors = rustancestor.LazyAncestors
1076 lazyancestors = rustancestor.LazyAncestors
1077 arg = self.index
1077 arg = self.index
1078 else:
1078 else:
1079 lazyancestors = ancestor.lazyancestors
1079 lazyancestors = ancestor.lazyancestors
1080 arg = self._uncheckedparentrevs
1080 arg = self._uncheckedparentrevs
1081 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1081 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1082
1082
1083 def descendants(self, revs):
1083 def descendants(self, revs):
1084 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1084 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1085
1085
1086 def findcommonmissing(self, common=None, heads=None):
1086 def findcommonmissing(self, common=None, heads=None):
1087 """Return a tuple of the ancestors of common and the ancestors of heads
1087 """Return a tuple of the ancestors of common and the ancestors of heads
1088 that are not ancestors of common. In revset terminology, we return the
1088 that are not ancestors of common. In revset terminology, we return the
1089 tuple:
1089 tuple:
1090
1090
1091 ::common, (::heads) - (::common)
1091 ::common, (::heads) - (::common)
1092
1092
1093 The list is sorted by revision number, meaning it is
1093 The list is sorted by revision number, meaning it is
1094 topologically sorted.
1094 topologically sorted.
1095
1095
1096 'heads' and 'common' are both lists of node IDs. If heads is
1096 'heads' and 'common' are both lists of node IDs. If heads is
1097 not supplied, uses all of the revlog's heads. If common is not
1097 not supplied, uses all of the revlog's heads. If common is not
1098 supplied, uses nullid."""
1098 supplied, uses nullid."""
1099 if common is None:
1099 if common is None:
1100 common = [self.nullid]
1100 common = [self.nullid]
1101 if heads is None:
1101 if heads is None:
1102 heads = self.heads()
1102 heads = self.heads()
1103
1103
1104 common = [self.rev(n) for n in common]
1104 common = [self.rev(n) for n in common]
1105 heads = [self.rev(n) for n in heads]
1105 heads = [self.rev(n) for n in heads]
1106
1106
1107 # we want the ancestors, but inclusive
1107 # we want the ancestors, but inclusive
1108 class lazyset(object):
1108 class lazyset(object):
1109 def __init__(self, lazyvalues):
1109 def __init__(self, lazyvalues):
1110 self.addedvalues = set()
1110 self.addedvalues = set()
1111 self.lazyvalues = lazyvalues
1111 self.lazyvalues = lazyvalues
1112
1112
1113 def __contains__(self, value):
1113 def __contains__(self, value):
1114 return value in self.addedvalues or value in self.lazyvalues
1114 return value in self.addedvalues or value in self.lazyvalues
1115
1115
1116 def __iter__(self):
1116 def __iter__(self):
1117 added = self.addedvalues
1117 added = self.addedvalues
1118 for r in added:
1118 for r in added:
1119 yield r
1119 yield r
1120 for r in self.lazyvalues:
1120 for r in self.lazyvalues:
1121 if not r in added:
1121 if not r in added:
1122 yield r
1122 yield r
1123
1123
1124 def add(self, value):
1124 def add(self, value):
1125 self.addedvalues.add(value)
1125 self.addedvalues.add(value)
1126
1126
1127 def update(self, values):
1127 def update(self, values):
1128 self.addedvalues.update(values)
1128 self.addedvalues.update(values)
1129
1129
1130 has = lazyset(self.ancestors(common))
1130 has = lazyset(self.ancestors(common))
1131 has.add(nullrev)
1131 has.add(nullrev)
1132 has.update(common)
1132 has.update(common)
1133
1133
1134 # take all ancestors from heads that aren't in has
1134 # take all ancestors from heads that aren't in has
1135 missing = set()
1135 missing = set()
1136 visit = collections.deque(r for r in heads if r not in has)
1136 visit = collections.deque(r for r in heads if r not in has)
1137 while visit:
1137 while visit:
1138 r = visit.popleft()
1138 r = visit.popleft()
1139 if r in missing:
1139 if r in missing:
1140 continue
1140 continue
1141 else:
1141 else:
1142 missing.add(r)
1142 missing.add(r)
1143 for p in self.parentrevs(r):
1143 for p in self.parentrevs(r):
1144 if p not in has:
1144 if p not in has:
1145 visit.append(p)
1145 visit.append(p)
1146 missing = list(missing)
1146 missing = list(missing)
1147 missing.sort()
1147 missing.sort()
1148 return has, [self.node(miss) for miss in missing]
1148 return has, [self.node(miss) for miss in missing]
1149
1149
1150 def incrementalmissingrevs(self, common=None):
1150 def incrementalmissingrevs(self, common=None):
1151 """Return an object that can be used to incrementally compute the
1151 """Return an object that can be used to incrementally compute the
1152 revision numbers of the ancestors of arbitrary sets that are not
1152 revision numbers of the ancestors of arbitrary sets that are not
1153 ancestors of common. This is an ancestor.incrementalmissingancestors
1153 ancestors of common. This is an ancestor.incrementalmissingancestors
1154 object.
1154 object.
1155
1155
1156 'common' is a list of revision numbers. If common is not supplied, uses
1156 'common' is a list of revision numbers. If common is not supplied, uses
1157 nullrev.
1157 nullrev.
1158 """
1158 """
1159 if common is None:
1159 if common is None:
1160 common = [nullrev]
1160 common = [nullrev]
1161
1161
1162 if rustancestor is not None and self.index.rust_ext_compat:
1162 if rustancestor is not None and self.index.rust_ext_compat:
1163 return rustancestor.MissingAncestors(self.index, common)
1163 return rustancestor.MissingAncestors(self.index, common)
1164 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1164 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1165
1165
1166 def findmissingrevs(self, common=None, heads=None):
1166 def findmissingrevs(self, common=None, heads=None):
1167 """Return the revision numbers of the ancestors of heads that
1167 """Return the revision numbers of the ancestors of heads that
1168 are not ancestors of common.
1168 are not ancestors of common.
1169
1169
1170 More specifically, return a list of revision numbers corresponding to
1170 More specifically, return a list of revision numbers corresponding to
1171 nodes N such that every N satisfies the following constraints:
1171 nodes N such that every N satisfies the following constraints:
1172
1172
1173 1. N is an ancestor of some node in 'heads'
1173 1. N is an ancestor of some node in 'heads'
1174 2. N is not an ancestor of any node in 'common'
1174 2. N is not an ancestor of any node in 'common'
1175
1175
1176 The list is sorted by revision number, meaning it is
1176 The list is sorted by revision number, meaning it is
1177 topologically sorted.
1177 topologically sorted.
1178
1178
1179 'heads' and 'common' are both lists of revision numbers. If heads is
1179 'heads' and 'common' are both lists of revision numbers. If heads is
1180 not supplied, uses all of the revlog's heads. If common is not
1180 not supplied, uses all of the revlog's heads. If common is not
1181 supplied, uses nullid."""
1181 supplied, uses nullid."""
1182 if common is None:
1182 if common is None:
1183 common = [nullrev]
1183 common = [nullrev]
1184 if heads is None:
1184 if heads is None:
1185 heads = self.headrevs()
1185 heads = self.headrevs()
1186
1186
1187 inc = self.incrementalmissingrevs(common=common)
1187 inc = self.incrementalmissingrevs(common=common)
1188 return inc.missingancestors(heads)
1188 return inc.missingancestors(heads)
1189
1189
1190 def findmissing(self, common=None, heads=None):
1190 def findmissing(self, common=None, heads=None):
1191 """Return the ancestors of heads that are not ancestors of common.
1191 """Return the ancestors of heads that are not ancestors of common.
1192
1192
1193 More specifically, return a list of nodes N such that every N
1193 More specifically, return a list of nodes N such that every N
1194 satisfies the following constraints:
1194 satisfies the following constraints:
1195
1195
1196 1. N is an ancestor of some node in 'heads'
1196 1. N is an ancestor of some node in 'heads'
1197 2. N is not an ancestor of any node in 'common'
1197 2. N is not an ancestor of any node in 'common'
1198
1198
1199 The list is sorted by revision number, meaning it is
1199 The list is sorted by revision number, meaning it is
1200 topologically sorted.
1200 topologically sorted.
1201
1201
1202 'heads' and 'common' are both lists of node IDs. If heads is
1202 'heads' and 'common' are both lists of node IDs. If heads is
1203 not supplied, uses all of the revlog's heads. If common is not
1203 not supplied, uses all of the revlog's heads. If common is not
1204 supplied, uses nullid."""
1204 supplied, uses nullid."""
1205 if common is None:
1205 if common is None:
1206 common = [self.nullid]
1206 common = [self.nullid]
1207 if heads is None:
1207 if heads is None:
1208 heads = self.heads()
1208 heads = self.heads()
1209
1209
1210 common = [self.rev(n) for n in common]
1210 common = [self.rev(n) for n in common]
1211 heads = [self.rev(n) for n in heads]
1211 heads = [self.rev(n) for n in heads]
1212
1212
1213 inc = self.incrementalmissingrevs(common=common)
1213 inc = self.incrementalmissingrevs(common=common)
1214 return [self.node(r) for r in inc.missingancestors(heads)]
1214 return [self.node(r) for r in inc.missingancestors(heads)]
1215
1215
1216 def nodesbetween(self, roots=None, heads=None):
1216 def nodesbetween(self, roots=None, heads=None):
1217 """Return a topological path from 'roots' to 'heads'.
1217 """Return a topological path from 'roots' to 'heads'.
1218
1218
1219 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1219 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1220 topologically sorted list of all nodes N that satisfy both of
1220 topologically sorted list of all nodes N that satisfy both of
1221 these constraints:
1221 these constraints:
1222
1222
1223 1. N is a descendant of some node in 'roots'
1223 1. N is a descendant of some node in 'roots'
1224 2. N is an ancestor of some node in 'heads'
1224 2. N is an ancestor of some node in 'heads'
1225
1225
1226 Every node is considered to be both a descendant and an ancestor
1226 Every node is considered to be both a descendant and an ancestor
1227 of itself, so every reachable node in 'roots' and 'heads' will be
1227 of itself, so every reachable node in 'roots' and 'heads' will be
1228 included in 'nodes'.
1228 included in 'nodes'.
1229
1229
1230 'outroots' is the list of reachable nodes in 'roots', i.e., the
1230 'outroots' is the list of reachable nodes in 'roots', i.e., the
1231 subset of 'roots' that is returned in 'nodes'. Likewise,
1231 subset of 'roots' that is returned in 'nodes'. Likewise,
1232 'outheads' is the subset of 'heads' that is also in 'nodes'.
1232 'outheads' is the subset of 'heads' that is also in 'nodes'.
1233
1233
1234 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1234 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1235 unspecified, uses nullid as the only root. If 'heads' is
1235 unspecified, uses nullid as the only root. If 'heads' is
1236 unspecified, uses list of all of the revlog's heads."""
1236 unspecified, uses list of all of the revlog's heads."""
1237 nonodes = ([], [], [])
1237 nonodes = ([], [], [])
1238 if roots is not None:
1238 if roots is not None:
1239 roots = list(roots)
1239 roots = list(roots)
1240 if not roots:
1240 if not roots:
1241 return nonodes
1241 return nonodes
1242 lowestrev = min([self.rev(n) for n in roots])
1242 lowestrev = min([self.rev(n) for n in roots])
1243 else:
1243 else:
1244 roots = [self.nullid] # Everybody's a descendant of nullid
1244 roots = [self.nullid] # Everybody's a descendant of nullid
1245 lowestrev = nullrev
1245 lowestrev = nullrev
1246 if (lowestrev == nullrev) and (heads is None):
1246 if (lowestrev == nullrev) and (heads is None):
1247 # We want _all_ the nodes!
1247 # We want _all_ the nodes!
1248 return (
1248 return (
1249 [self.node(r) for r in self],
1249 [self.node(r) for r in self],
1250 [self.nullid],
1250 [self.nullid],
1251 list(self.heads()),
1251 list(self.heads()),
1252 )
1252 )
1253 if heads is None:
1253 if heads is None:
1254 # All nodes are ancestors, so the latest ancestor is the last
1254 # All nodes are ancestors, so the latest ancestor is the last
1255 # node.
1255 # node.
1256 highestrev = len(self) - 1
1256 highestrev = len(self) - 1
1257 # Set ancestors to None to signal that every node is an ancestor.
1257 # Set ancestors to None to signal that every node is an ancestor.
1258 ancestors = None
1258 ancestors = None
1259 # Set heads to an empty dictionary for later discovery of heads
1259 # Set heads to an empty dictionary for later discovery of heads
1260 heads = {}
1260 heads = {}
1261 else:
1261 else:
1262 heads = list(heads)
1262 heads = list(heads)
1263 if not heads:
1263 if not heads:
1264 return nonodes
1264 return nonodes
1265 ancestors = set()
1265 ancestors = set()
1266 # Turn heads into a dictionary so we can remove 'fake' heads.
1266 # Turn heads into a dictionary so we can remove 'fake' heads.
1267 # Also, later we will be using it to filter out the heads we can't
1267 # Also, later we will be using it to filter out the heads we can't
1268 # find from roots.
1268 # find from roots.
1269 heads = dict.fromkeys(heads, False)
1269 heads = dict.fromkeys(heads, False)
1270 # Start at the top and keep marking parents until we're done.
1270 # Start at the top and keep marking parents until we're done.
1271 nodestotag = set(heads)
1271 nodestotag = set(heads)
1272 # Remember where the top was so we can use it as a limit later.
1272 # Remember where the top was so we can use it as a limit later.
1273 highestrev = max([self.rev(n) for n in nodestotag])
1273 highestrev = max([self.rev(n) for n in nodestotag])
1274 while nodestotag:
1274 while nodestotag:
1275 # grab a node to tag
1275 # grab a node to tag
1276 n = nodestotag.pop()
1276 n = nodestotag.pop()
1277 # Never tag nullid
1277 # Never tag nullid
1278 if n == self.nullid:
1278 if n == self.nullid:
1279 continue
1279 continue
1280 # A node's revision number represents its place in a
1280 # A node's revision number represents its place in a
1281 # topologically sorted list of nodes.
1281 # topologically sorted list of nodes.
1282 r = self.rev(n)
1282 r = self.rev(n)
1283 if r >= lowestrev:
1283 if r >= lowestrev:
1284 if n not in ancestors:
1284 if n not in ancestors:
1285 # If we are possibly a descendant of one of the roots
1285 # If we are possibly a descendant of one of the roots
1286 # and we haven't already been marked as an ancestor
1286 # and we haven't already been marked as an ancestor
1287 ancestors.add(n) # Mark as ancestor
1287 ancestors.add(n) # Mark as ancestor
1288 # Add non-nullid parents to list of nodes to tag.
1288 # Add non-nullid parents to list of nodes to tag.
1289 nodestotag.update(
1289 nodestotag.update(
1290 [p for p in self.parents(n) if p != self.nullid]
1290 [p for p in self.parents(n) if p != self.nullid]
1291 )
1291 )
1292 elif n in heads: # We've seen it before, is it a fake head?
1292 elif n in heads: # We've seen it before, is it a fake head?
1293 # So it is, real heads should not be the ancestors of
1293 # So it is, real heads should not be the ancestors of
1294 # any other heads.
1294 # any other heads.
1295 heads.pop(n)
1295 heads.pop(n)
1296 if not ancestors:
1296 if not ancestors:
1297 return nonodes
1297 return nonodes
1298 # Now that we have our set of ancestors, we want to remove any
1298 # Now that we have our set of ancestors, we want to remove any
1299 # roots that are not ancestors.
1299 # roots that are not ancestors.
1300
1300
1301 # If one of the roots was nullid, everything is included anyway.
1301 # If one of the roots was nullid, everything is included anyway.
1302 if lowestrev > nullrev:
1302 if lowestrev > nullrev:
1303 # But, since we weren't, let's recompute the lowest rev to not
1303 # But, since we weren't, let's recompute the lowest rev to not
1304 # include roots that aren't ancestors.
1304 # include roots that aren't ancestors.
1305
1305
1306 # Filter out roots that aren't ancestors of heads
1306 # Filter out roots that aren't ancestors of heads
1307 roots = [root for root in roots if root in ancestors]
1307 roots = [root for root in roots if root in ancestors]
1308 # Recompute the lowest revision
1308 # Recompute the lowest revision
1309 if roots:
1309 if roots:
1310 lowestrev = min([self.rev(root) for root in roots])
1310 lowestrev = min([self.rev(root) for root in roots])
1311 else:
1311 else:
1312 # No more roots? Return empty list
1312 # No more roots? Return empty list
1313 return nonodes
1313 return nonodes
1314 else:
1314 else:
1315 # We are descending from nullid, and don't need to care about
1315 # We are descending from nullid, and don't need to care about
1316 # any other roots.
1316 # any other roots.
1317 lowestrev = nullrev
1317 lowestrev = nullrev
1318 roots = [self.nullid]
1318 roots = [self.nullid]
1319 # Transform our roots list into a set.
1319 # Transform our roots list into a set.
1320 descendants = set(roots)
1320 descendants = set(roots)
1321 # Also, keep the original roots so we can filter out roots that aren't
1321 # Also, keep the original roots so we can filter out roots that aren't
1322 # 'real' roots (i.e. are descended from other roots).
1322 # 'real' roots (i.e. are descended from other roots).
1323 roots = descendants.copy()
1323 roots = descendants.copy()
1324 # Our topologically sorted list of output nodes.
1324 # Our topologically sorted list of output nodes.
1325 orderedout = []
1325 orderedout = []
1326 # Don't start at nullid since we don't want nullid in our output list,
1326 # Don't start at nullid since we don't want nullid in our output list,
1327 # and if nullid shows up in descendants, empty parents will look like
1327 # and if nullid shows up in descendants, empty parents will look like
1328 # they're descendants.
1328 # they're descendants.
1329 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1329 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1330 n = self.node(r)
1330 n = self.node(r)
1331 isdescendant = False
1331 isdescendant = False
1332 if lowestrev == nullrev: # Everybody is a descendant of nullid
1332 if lowestrev == nullrev: # Everybody is a descendant of nullid
1333 isdescendant = True
1333 isdescendant = True
1334 elif n in descendants:
1334 elif n in descendants:
1335 # n is already a descendant
1335 # n is already a descendant
1336 isdescendant = True
1336 isdescendant = True
1337 # This check only needs to be done here because all the roots
1337 # This check only needs to be done here because all the roots
1338 # will start being marked is descendants before the loop.
1338 # will start being marked is descendants before the loop.
1339 if n in roots:
1339 if n in roots:
1340 # If n was a root, check if it's a 'real' root.
1340 # If n was a root, check if it's a 'real' root.
1341 p = tuple(self.parents(n))
1341 p = tuple(self.parents(n))
1342 # If any of its parents are descendants, it's not a root.
1342 # If any of its parents are descendants, it's not a root.
1343 if (p[0] in descendants) or (p[1] in descendants):
1343 if (p[0] in descendants) or (p[1] in descendants):
1344 roots.remove(n)
1344 roots.remove(n)
1345 else:
1345 else:
1346 p = tuple(self.parents(n))
1346 p = tuple(self.parents(n))
1347 # A node is a descendant if either of its parents are
1347 # A node is a descendant if either of its parents are
1348 # descendants. (We seeded the dependents list with the roots
1348 # descendants. (We seeded the dependents list with the roots
1349 # up there, remember?)
1349 # up there, remember?)
1350 if (p[0] in descendants) or (p[1] in descendants):
1350 if (p[0] in descendants) or (p[1] in descendants):
1351 descendants.add(n)
1351 descendants.add(n)
1352 isdescendant = True
1352 isdescendant = True
1353 if isdescendant and ((ancestors is None) or (n in ancestors)):
1353 if isdescendant and ((ancestors is None) or (n in ancestors)):
1354 # Only include nodes that are both descendants and ancestors.
1354 # Only include nodes that are both descendants and ancestors.
1355 orderedout.append(n)
1355 orderedout.append(n)
1356 if (ancestors is not None) and (n in heads):
1356 if (ancestors is not None) and (n in heads):
1357 # We're trying to figure out which heads are reachable
1357 # We're trying to figure out which heads are reachable
1358 # from roots.
1358 # from roots.
1359 # Mark this head as having been reached
1359 # Mark this head as having been reached
1360 heads[n] = True
1360 heads[n] = True
1361 elif ancestors is None:
1361 elif ancestors is None:
1362 # Otherwise, we're trying to discover the heads.
1362 # Otherwise, we're trying to discover the heads.
1363 # Assume this is a head because if it isn't, the next step
1363 # Assume this is a head because if it isn't, the next step
1364 # will eventually remove it.
1364 # will eventually remove it.
1365 heads[n] = True
1365 heads[n] = True
1366 # But, obviously its parents aren't.
1366 # But, obviously its parents aren't.
1367 for p in self.parents(n):
1367 for p in self.parents(n):
1368 heads.pop(p, None)
1368 heads.pop(p, None)
1369 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1369 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1370 roots = list(roots)
1370 roots = list(roots)
1371 assert orderedout
1371 assert orderedout
1372 assert roots
1372 assert roots
1373 assert heads
1373 assert heads
1374 return (orderedout, roots, heads)
1374 return (orderedout, roots, heads)
1375
1375
1376 def headrevs(self, revs=None):
1376 def headrevs(self, revs=None):
1377 if revs is None:
1377 if revs is None:
1378 try:
1378 try:
1379 return self.index.headrevs()
1379 return self.index.headrevs()
1380 except AttributeError:
1380 except AttributeError:
1381 return self._headrevs()
1381 return self._headrevs()
1382 if rustdagop is not None and self.index.rust_ext_compat:
1382 if rustdagop is not None and self.index.rust_ext_compat:
1383 return rustdagop.headrevs(self.index, revs)
1383 return rustdagop.headrevs(self.index, revs)
1384 return dagop.headrevs(revs, self._uncheckedparentrevs)
1384 return dagop.headrevs(revs, self._uncheckedparentrevs)
1385
1385
1386 def computephases(self, roots):
1386 def computephases(self, roots):
1387 return self.index.computephasesmapsets(roots)
1387 return self.index.computephasesmapsets(roots)
1388
1388
1389 def _headrevs(self):
1389 def _headrevs(self):
1390 count = len(self)
1390 count = len(self)
1391 if not count:
1391 if not count:
1392 return [nullrev]
1392 return [nullrev]
1393 # we won't iter over filtered rev so nobody is a head at start
1393 # we won't iter over filtered rev so nobody is a head at start
1394 ishead = [0] * (count + 1)
1394 ishead = [0] * (count + 1)
1395 index = self.index
1395 index = self.index
1396 for r in self:
1396 for r in self:
1397 ishead[r] = 1 # I may be an head
1397 ishead[r] = 1 # I may be an head
1398 e = index[r]
1398 e = index[r]
1399 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1399 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1400 return [r for r, val in enumerate(ishead) if val]
1400 return [r for r, val in enumerate(ishead) if val]
1401
1401
1402 def heads(self, start=None, stop=None):
1402 def heads(self, start=None, stop=None):
1403 """return the list of all nodes that have no children
1403 """return the list of all nodes that have no children
1404
1404
1405 if start is specified, only heads that are descendants of
1405 if start is specified, only heads that are descendants of
1406 start will be returned
1406 start will be returned
1407 if stop is specified, it will consider all the revs from stop
1407 if stop is specified, it will consider all the revs from stop
1408 as if they had no children
1408 as if they had no children
1409 """
1409 """
1410 if start is None and stop is None:
1410 if start is None and stop is None:
1411 if not len(self):
1411 if not len(self):
1412 return [self.nullid]
1412 return [self.nullid]
1413 return [self.node(r) for r in self.headrevs()]
1413 return [self.node(r) for r in self.headrevs()]
1414
1414
1415 if start is None:
1415 if start is None:
1416 start = nullrev
1416 start = nullrev
1417 else:
1417 else:
1418 start = self.rev(start)
1418 start = self.rev(start)
1419
1419
1420 stoprevs = {self.rev(n) for n in stop or []}
1420 stoprevs = {self.rev(n) for n in stop or []}
1421
1421
1422 revs = dagop.headrevssubset(
1422 revs = dagop.headrevssubset(
1423 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1423 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1424 )
1424 )
1425
1425
1426 return [self.node(rev) for rev in revs]
1426 return [self.node(rev) for rev in revs]
1427
1427
1428 def children(self, node):
1428 def children(self, node):
1429 """find the children of a given node"""
1429 """find the children of a given node"""
1430 c = []
1430 c = []
1431 p = self.rev(node)
1431 p = self.rev(node)
1432 for r in self.revs(start=p + 1):
1432 for r in self.revs(start=p + 1):
1433 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1433 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1434 if prevs:
1434 if prevs:
1435 for pr in prevs:
1435 for pr in prevs:
1436 if pr == p:
1436 if pr == p:
1437 c.append(self.node(r))
1437 c.append(self.node(r))
1438 elif p == nullrev:
1438 elif p == nullrev:
1439 c.append(self.node(r))
1439 c.append(self.node(r))
1440 return c
1440 return c
1441
1441
1442 def commonancestorsheads(self, a, b):
1442 def commonancestorsheads(self, a, b):
1443 """calculate all the heads of the common ancestors of nodes a and b"""
1443 """calculate all the heads of the common ancestors of nodes a and b"""
1444 a, b = self.rev(a), self.rev(b)
1444 a, b = self.rev(a), self.rev(b)
1445 ancs = self._commonancestorsheads(a, b)
1445 ancs = self._commonancestorsheads(a, b)
1446 return pycompat.maplist(self.node, ancs)
1446 return pycompat.maplist(self.node, ancs)
1447
1447
1448 def _commonancestorsheads(self, *revs):
1448 def _commonancestorsheads(self, *revs):
1449 """calculate all the heads of the common ancestors of revs"""
1449 """calculate all the heads of the common ancestors of revs"""
1450 try:
1450 try:
1451 ancs = self.index.commonancestorsheads(*revs)
1451 ancs = self.index.commonancestorsheads(*revs)
1452 except (AttributeError, OverflowError): # C implementation failed
1452 except (AttributeError, OverflowError): # C implementation failed
1453 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1453 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1454 return ancs
1454 return ancs
1455
1455
1456 def isancestor(self, a, b):
1456 def isancestor(self, a, b):
1457 """return True if node a is an ancestor of node b
1457 """return True if node a is an ancestor of node b
1458
1458
1459 A revision is considered an ancestor of itself."""
1459 A revision is considered an ancestor of itself."""
1460 a, b = self.rev(a), self.rev(b)
1460 a, b = self.rev(a), self.rev(b)
1461 return self.isancestorrev(a, b)
1461 return self.isancestorrev(a, b)
1462
1462
1463 def isancestorrev(self, a, b):
1463 def isancestorrev(self, a, b):
1464 """return True if revision a is an ancestor of revision b
1464 """return True if revision a is an ancestor of revision b
1465
1465
1466 A revision is considered an ancestor of itself.
1466 A revision is considered an ancestor of itself.
1467
1467
1468 The implementation of this is trivial but the use of
1468 The implementation of this is trivial but the use of
1469 reachableroots is not."""
1469 reachableroots is not."""
1470 if a == nullrev:
1470 if a == nullrev:
1471 return True
1471 return True
1472 elif a == b:
1472 elif a == b:
1473 return True
1473 return True
1474 elif a > b:
1474 elif a > b:
1475 return False
1475 return False
1476 return bool(self.reachableroots(a, [b], [a], includepath=False))
1476 return bool(self.reachableroots(a, [b], [a], includepath=False))
1477
1477
1478 def reachableroots(self, minroot, heads, roots, includepath=False):
1478 def reachableroots(self, minroot, heads, roots, includepath=False):
1479 """return (heads(::(<roots> and <roots>::<heads>)))
1479 """return (heads(::(<roots> and <roots>::<heads>)))
1480
1480
1481 If includepath is True, return (<roots>::<heads>)."""
1481 If includepath is True, return (<roots>::<heads>)."""
1482 try:
1482 try:
1483 return self.index.reachableroots2(
1483 return self.index.reachableroots2(
1484 minroot, heads, roots, includepath
1484 minroot, heads, roots, includepath
1485 )
1485 )
1486 except AttributeError:
1486 except AttributeError:
1487 return dagop._reachablerootspure(
1487 return dagop._reachablerootspure(
1488 self.parentrevs, minroot, roots, heads, includepath
1488 self.parentrevs, minroot, roots, heads, includepath
1489 )
1489 )
1490
1490
1491 def ancestor(self, a, b):
1491 def ancestor(self, a, b):
1492 """calculate the "best" common ancestor of nodes a and b"""
1492 """calculate the "best" common ancestor of nodes a and b"""
1493
1493
1494 a, b = self.rev(a), self.rev(b)
1494 a, b = self.rev(a), self.rev(b)
1495 try:
1495 try:
1496 ancs = self.index.ancestors(a, b)
1496 ancs = self.index.ancestors(a, b)
1497 except (AttributeError, OverflowError):
1497 except (AttributeError, OverflowError):
1498 ancs = ancestor.ancestors(self.parentrevs, a, b)
1498 ancs = ancestor.ancestors(self.parentrevs, a, b)
1499 if ancs:
1499 if ancs:
1500 # choose a consistent winner when there's a tie
1500 # choose a consistent winner when there's a tie
1501 return min(map(self.node, ancs))
1501 return min(map(self.node, ancs))
1502 return self.nullid
1502 return self.nullid
1503
1503
1504 def _match(self, id):
1504 def _match(self, id):
1505 if isinstance(id, int):
1505 if isinstance(id, int):
1506 # rev
1506 # rev
1507 return self.node(id)
1507 return self.node(id)
1508 if len(id) == self.nodeconstants.nodelen:
1508 if len(id) == self.nodeconstants.nodelen:
1509 # possibly a binary node
1509 # possibly a binary node
1510 # odds of a binary node being all hex in ASCII are 1 in 10**25
1510 # odds of a binary node being all hex in ASCII are 1 in 10**25
1511 try:
1511 try:
1512 node = id
1512 node = id
1513 self.rev(node) # quick search the index
1513 self.rev(node) # quick search the index
1514 return node
1514 return node
1515 except error.LookupError:
1515 except error.LookupError:
1516 pass # may be partial hex id
1516 pass # may be partial hex id
1517 try:
1517 try:
1518 # str(rev)
1518 # str(rev)
1519 rev = int(id)
1519 rev = int(id)
1520 if b"%d" % rev != id:
1520 if b"%d" % rev != id:
1521 raise ValueError
1521 raise ValueError
1522 if rev < 0:
1522 if rev < 0:
1523 rev = len(self) + rev
1523 rev = len(self) + rev
1524 if rev < 0 or rev >= len(self):
1524 if rev < 0 or rev >= len(self):
1525 raise ValueError
1525 raise ValueError
1526 return self.node(rev)
1526 return self.node(rev)
1527 except (ValueError, OverflowError):
1527 except (ValueError, OverflowError):
1528 pass
1528 pass
1529 if len(id) == 2 * self.nodeconstants.nodelen:
1529 if len(id) == 2 * self.nodeconstants.nodelen:
1530 try:
1530 try:
1531 # a full hex nodeid?
1531 # a full hex nodeid?
1532 node = bin(id)
1532 node = bin(id)
1533 self.rev(node)
1533 self.rev(node)
1534 return node
1534 return node
1535 except (TypeError, error.LookupError):
1535 except (TypeError, error.LookupError):
1536 pass
1536 pass
1537
1537
1538 def _partialmatch(self, id):
1538 def _partialmatch(self, id):
1539 # we don't care wdirfilenodeids as they should be always full hash
1539 # we don't care wdirfilenodeids as they should be always full hash
1540 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1540 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1541 try:
1541 try:
1542 partial = self.index.partialmatch(id)
1542 partial = self.index.partialmatch(id)
1543 if partial and self.hasnode(partial):
1543 if partial and self.hasnode(partial):
1544 if maybewdir:
1544 if maybewdir:
1545 # single 'ff...' match in radix tree, ambiguous with wdir
1545 # single 'ff...' match in radix tree, ambiguous with wdir
1546 raise error.RevlogError
1546 raise error.RevlogError
1547 return partial
1547 return partial
1548 if maybewdir:
1548 if maybewdir:
1549 # no 'ff...' match in radix tree, wdir identified
1549 # no 'ff...' match in radix tree, wdir identified
1550 raise error.WdirUnsupported
1550 raise error.WdirUnsupported
1551 return None
1551 return None
1552 except error.RevlogError:
1552 except error.RevlogError:
1553 # parsers.c radix tree lookup gave multiple matches
1553 # parsers.c radix tree lookup gave multiple matches
1554 # fast path: for unfiltered changelog, radix tree is accurate
1554 # fast path: for unfiltered changelog, radix tree is accurate
1555 if not getattr(self, 'filteredrevs', None):
1555 if not getattr(self, 'filteredrevs', None):
1556 raise error.AmbiguousPrefixLookupError(
1556 raise error.AmbiguousPrefixLookupError(
1557 id, self.display_id, _(b'ambiguous identifier')
1557 id, self.display_id, _(b'ambiguous identifier')
1558 )
1558 )
1559 # fall through to slow path that filters hidden revisions
1559 # fall through to slow path that filters hidden revisions
1560 except (AttributeError, ValueError):
1560 except (AttributeError, ValueError):
1561 # we are pure python, or key was too short to search radix tree
1561 # we are pure python, or key was too short to search radix tree
1562 pass
1562 pass
1563
1563
1564 if id in self._pcache:
1564 if id in self._pcache:
1565 return self._pcache[id]
1565 return self._pcache[id]
1566
1566
1567 if len(id) <= 40:
1567 if len(id) <= 40:
1568 try:
1568 try:
1569 # hex(node)[:...]
1569 # hex(node)[:...]
1570 l = len(id) // 2 # grab an even number of digits
1570 l = len(id) // 2 # grab an even number of digits
1571 prefix = bin(id[: l * 2])
1571 prefix = bin(id[: l * 2])
1572 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1572 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1573 nl = [
1573 nl = [
1574 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1574 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1575 ]
1575 ]
1576 if self.nodeconstants.nullhex.startswith(id):
1576 if self.nodeconstants.nullhex.startswith(id):
1577 nl.append(self.nullid)
1577 nl.append(self.nullid)
1578 if len(nl) > 0:
1578 if len(nl) > 0:
1579 if len(nl) == 1 and not maybewdir:
1579 if len(nl) == 1 and not maybewdir:
1580 self._pcache[id] = nl[0]
1580 self._pcache[id] = nl[0]
1581 return nl[0]
1581 return nl[0]
1582 raise error.AmbiguousPrefixLookupError(
1582 raise error.AmbiguousPrefixLookupError(
1583 id, self.display_id, _(b'ambiguous identifier')
1583 id, self.display_id, _(b'ambiguous identifier')
1584 )
1584 )
1585 if maybewdir:
1585 if maybewdir:
1586 raise error.WdirUnsupported
1586 raise error.WdirUnsupported
1587 return None
1587 return None
1588 except TypeError:
1588 except TypeError:
1589 pass
1589 pass
1590
1590
1591 def lookup(self, id):
1591 def lookup(self, id):
1592 """locate a node based on:
1592 """locate a node based on:
1593 - revision number or str(revision number)
1593 - revision number or str(revision number)
1594 - nodeid or subset of hex nodeid
1594 - nodeid or subset of hex nodeid
1595 """
1595 """
1596 n = self._match(id)
1596 n = self._match(id)
1597 if n is not None:
1597 if n is not None:
1598 return n
1598 return n
1599 n = self._partialmatch(id)
1599 n = self._partialmatch(id)
1600 if n:
1600 if n:
1601 return n
1601 return n
1602
1602
1603 raise error.LookupError(id, self.display_id, _(b'no match found'))
1603 raise error.LookupError(id, self.display_id, _(b'no match found'))
1604
1604
1605 def shortest(self, node, minlength=1):
1605 def shortest(self, node, minlength=1):
1606 """Find the shortest unambiguous prefix that matches node."""
1606 """Find the shortest unambiguous prefix that matches node."""
1607
1607
1608 def isvalid(prefix):
1608 def isvalid(prefix):
1609 try:
1609 try:
1610 matchednode = self._partialmatch(prefix)
1610 matchednode = self._partialmatch(prefix)
1611 except error.AmbiguousPrefixLookupError:
1611 except error.AmbiguousPrefixLookupError:
1612 return False
1612 return False
1613 except error.WdirUnsupported:
1613 except error.WdirUnsupported:
1614 # single 'ff...' match
1614 # single 'ff...' match
1615 return True
1615 return True
1616 if matchednode is None:
1616 if matchednode is None:
1617 raise error.LookupError(node, self.display_id, _(b'no node'))
1617 raise error.LookupError(node, self.display_id, _(b'no node'))
1618 return True
1618 return True
1619
1619
1620 def maybewdir(prefix):
1620 def maybewdir(prefix):
1621 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1621 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1622
1622
1623 hexnode = hex(node)
1623 hexnode = hex(node)
1624
1624
1625 def disambiguate(hexnode, minlength):
1625 def disambiguate(hexnode, minlength):
1626 """Disambiguate against wdirid."""
1626 """Disambiguate against wdirid."""
1627 for length in range(minlength, len(hexnode) + 1):
1627 for length in range(minlength, len(hexnode) + 1):
1628 prefix = hexnode[:length]
1628 prefix = hexnode[:length]
1629 if not maybewdir(prefix):
1629 if not maybewdir(prefix):
1630 return prefix
1630 return prefix
1631
1631
1632 if not getattr(self, 'filteredrevs', None):
1632 if not getattr(self, 'filteredrevs', None):
1633 try:
1633 try:
1634 length = max(self.index.shortest(node), minlength)
1634 length = max(self.index.shortest(node), minlength)
1635 return disambiguate(hexnode, length)
1635 return disambiguate(hexnode, length)
1636 except error.RevlogError:
1636 except error.RevlogError:
1637 if node != self.nodeconstants.wdirid:
1637 if node != self.nodeconstants.wdirid:
1638 raise error.LookupError(
1638 raise error.LookupError(
1639 node, self.display_id, _(b'no node')
1639 node, self.display_id, _(b'no node')
1640 )
1640 )
1641 except AttributeError:
1641 except AttributeError:
1642 # Fall through to pure code
1642 # Fall through to pure code
1643 pass
1643 pass
1644
1644
1645 if node == self.nodeconstants.wdirid:
1645 if node == self.nodeconstants.wdirid:
1646 for length in range(minlength, len(hexnode) + 1):
1646 for length in range(minlength, len(hexnode) + 1):
1647 prefix = hexnode[:length]
1647 prefix = hexnode[:length]
1648 if isvalid(prefix):
1648 if isvalid(prefix):
1649 return prefix
1649 return prefix
1650
1650
1651 for length in range(minlength, len(hexnode) + 1):
1651 for length in range(minlength, len(hexnode) + 1):
1652 prefix = hexnode[:length]
1652 prefix = hexnode[:length]
1653 if isvalid(prefix):
1653 if isvalid(prefix):
1654 return disambiguate(hexnode, length)
1654 return disambiguate(hexnode, length)
1655
1655
1656 def cmp(self, node, text):
1656 def cmp(self, node, text):
1657 """compare text with a given file revision
1657 """compare text with a given file revision
1658
1658
1659 returns True if text is different than what is stored.
1659 returns True if text is different than what is stored.
1660 """
1660 """
1661 p1, p2 = self.parents(node)
1661 p1, p2 = self.parents(node)
1662 return storageutil.hashrevisionsha1(text, p1, p2) != node
1662 return storageutil.hashrevisionsha1(text, p1, p2) != node
1663
1663
1664 def _cachesegment(self, offset, data):
1664 def _cachesegment(self, offset, data):
1665 """Add a segment to the revlog cache.
1665 """Add a segment to the revlog cache.
1666
1666
1667 Accepts an absolute offset and the data that is at that location.
1667 Accepts an absolute offset and the data that is at that location.
1668 """
1668 """
1669 o, d = self._chunkcache
1669 o, d = self._chunkcache
1670 # try to add to existing cache
1670 # try to add to existing cache
1671 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1671 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1672 self._chunkcache = o, d + data
1672 self._chunkcache = o, d + data
1673 else:
1673 else:
1674 self._chunkcache = offset, data
1674 self._chunkcache = offset, data
1675
1675
1676 def _readsegment(self, offset, length, df=None):
1676 def _readsegment(self, offset, length, df=None):
1677 """Load a segment of raw data from the revlog.
1677 """Load a segment of raw data from the revlog.
1678
1678
1679 Accepts an absolute offset, length to read, and an optional existing
1679 Accepts an absolute offset, length to read, and an optional existing
1680 file handle to read from.
1680 file handle to read from.
1681
1681
1682 If an existing file handle is passed, it will be seeked and the
1682 If an existing file handle is passed, it will be seeked and the
1683 original seek position will NOT be restored.
1683 original seek position will NOT be restored.
1684
1684
1685 Returns a str or buffer of raw byte data.
1685 Returns a str or buffer of raw byte data.
1686
1686
1687 Raises if the requested number of bytes could not be read.
1687 Raises if the requested number of bytes could not be read.
1688 """
1688 """
1689 # Cache data both forward and backward around the requested
1689 # Cache data both forward and backward around the requested
1690 # data, in a fixed size window. This helps speed up operations
1690 # data, in a fixed size window. This helps speed up operations
1691 # involving reading the revlog backwards.
1691 # involving reading the revlog backwards.
1692 cachesize = self._chunkcachesize
1692 cachesize = self._chunkcachesize
1693 realoffset = offset & ~(cachesize - 1)
1693 realoffset = offset & ~(cachesize - 1)
1694 reallength = (
1694 reallength = (
1695 (offset + length + cachesize) & ~(cachesize - 1)
1695 (offset + length + cachesize) & ~(cachesize - 1)
1696 ) - realoffset
1696 ) - realoffset
1697 with self._datareadfp(df) as df:
1697 with self._datareadfp(df) as df:
1698 df.seek(realoffset)
1698 df.seek(realoffset)
1699 d = df.read(reallength)
1699 d = df.read(reallength)
1700
1700
1701 self._cachesegment(realoffset, d)
1701 self._cachesegment(realoffset, d)
1702 if offset != realoffset or reallength != length:
1702 if offset != realoffset or reallength != length:
1703 startoffset = offset - realoffset
1703 startoffset = offset - realoffset
1704 if len(d) - startoffset < length:
1704 if len(d) - startoffset < length:
1705 raise error.RevlogError(
1705 raise error.RevlogError(
1706 _(
1706 _(
1707 b'partial read of revlog %s; expected %d bytes from '
1707 b'partial read of revlog %s; expected %d bytes from '
1708 b'offset %d, got %d'
1708 b'offset %d, got %d'
1709 )
1709 )
1710 % (
1710 % (
1711 self._indexfile if self._inline else self._datafile,
1711 self._indexfile if self._inline else self._datafile,
1712 length,
1712 length,
1713 offset,
1713 offset,
1714 len(d) - startoffset,
1714 len(d) - startoffset,
1715 )
1715 )
1716 )
1716 )
1717
1717
1718 return util.buffer(d, startoffset, length)
1718 return util.buffer(d, startoffset, length)
1719
1719
1720 if len(d) < length:
1720 if len(d) < length:
1721 raise error.RevlogError(
1721 raise error.RevlogError(
1722 _(
1722 _(
1723 b'partial read of revlog %s; expected %d bytes from offset '
1723 b'partial read of revlog %s; expected %d bytes from offset '
1724 b'%d, got %d'
1724 b'%d, got %d'
1725 )
1725 )
1726 % (
1726 % (
1727 self._indexfile if self._inline else self._datafile,
1727 self._indexfile if self._inline else self._datafile,
1728 length,
1728 length,
1729 offset,
1729 offset,
1730 len(d),
1730 len(d),
1731 )
1731 )
1732 )
1732 )
1733
1733
1734 return d
1734 return d
1735
1735
1736 def _getsegment(self, offset, length, df=None):
1736 def _getsegment(self, offset, length, df=None):
1737 """Obtain a segment of raw data from the revlog.
1737 """Obtain a segment of raw data from the revlog.
1738
1738
1739 Accepts an absolute offset, length of bytes to obtain, and an
1739 Accepts an absolute offset, length of bytes to obtain, and an
1740 optional file handle to the already-opened revlog. If the file
1740 optional file handle to the already-opened revlog. If the file
1741 handle is used, it's original seek position will not be preserved.
1741 handle is used, it's original seek position will not be preserved.
1742
1742
1743 Requests for data may be returned from a cache.
1743 Requests for data may be returned from a cache.
1744
1744
1745 Returns a str or a buffer instance of raw byte data.
1745 Returns a str or a buffer instance of raw byte data.
1746 """
1746 """
1747 o, d = self._chunkcache
1747 o, d = self._chunkcache
1748 l = len(d)
1748 l = len(d)
1749
1749
1750 # is it in the cache?
1750 # is it in the cache?
1751 cachestart = offset - o
1751 cachestart = offset - o
1752 cacheend = cachestart + length
1752 cacheend = cachestart + length
1753 if cachestart >= 0 and cacheend <= l:
1753 if cachestart >= 0 and cacheend <= l:
1754 if cachestart == 0 and cacheend == l:
1754 if cachestart == 0 and cacheend == l:
1755 return d # avoid a copy
1755 return d # avoid a copy
1756 return util.buffer(d, cachestart, cacheend - cachestart)
1756 return util.buffer(d, cachestart, cacheend - cachestart)
1757
1757
1758 return self._readsegment(offset, length, df=df)
1758 return self._readsegment(offset, length, df=df)
1759
1759
1760 def _getsegmentforrevs(self, startrev, endrev, df=None):
1760 def _getsegmentforrevs(self, startrev, endrev, df=None):
1761 """Obtain a segment of raw data corresponding to a range of revisions.
1761 """Obtain a segment of raw data corresponding to a range of revisions.
1762
1762
1763 Accepts the start and end revisions and an optional already-open
1763 Accepts the start and end revisions and an optional already-open
1764 file handle to be used for reading. If the file handle is read, its
1764 file handle to be used for reading. If the file handle is read, its
1765 seek position will not be preserved.
1765 seek position will not be preserved.
1766
1766
1767 Requests for data may be satisfied by a cache.
1767 Requests for data may be satisfied by a cache.
1768
1768
1769 Returns a 2-tuple of (offset, data) for the requested range of
1769 Returns a 2-tuple of (offset, data) for the requested range of
1770 revisions. Offset is the integer offset from the beginning of the
1770 revisions. Offset is the integer offset from the beginning of the
1771 revlog and data is a str or buffer of the raw byte data.
1771 revlog and data is a str or buffer of the raw byte data.
1772
1772
1773 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1773 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1774 to determine where each revision's data begins and ends.
1774 to determine where each revision's data begins and ends.
1775 """
1775 """
1776 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1776 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1777 # (functions are expensive).
1777 # (functions are expensive).
1778 index = self.index
1778 index = self.index
1779 istart = index[startrev]
1779 istart = index[startrev]
1780 start = int(istart[0] >> 16)
1780 start = int(istart[0] >> 16)
1781 if startrev == endrev:
1781 if startrev == endrev:
1782 end = start + istart[1]
1782 end = start + istart[1]
1783 else:
1783 else:
1784 iend = index[endrev]
1784 iend = index[endrev]
1785 end = int(iend[0] >> 16) + iend[1]
1785 end = int(iend[0] >> 16) + iend[1]
1786
1786
1787 if self._inline:
1787 if self._inline:
1788 start += (startrev + 1) * self.index.entry_size
1788 start += (startrev + 1) * self.index.entry_size
1789 end += (endrev + 1) * self.index.entry_size
1789 end += (endrev + 1) * self.index.entry_size
1790 length = end - start
1790 length = end - start
1791
1791
1792 return start, self._getsegment(start, length, df=df)
1792 return start, self._getsegment(start, length, df=df)
1793
1793
1794 def _chunk(self, rev, df=None):
1794 def _chunk(self, rev, df=None):
1795 """Obtain a single decompressed chunk for a revision.
1795 """Obtain a single decompressed chunk for a revision.
1796
1796
1797 Accepts an integer revision and an optional already-open file handle
1797 Accepts an integer revision and an optional already-open file handle
1798 to be used for reading. If used, the seek position of the file will not
1798 to be used for reading. If used, the seek position of the file will not
1799 be preserved.
1799 be preserved.
1800
1800
1801 Returns a str holding uncompressed data for the requested revision.
1801 Returns a str holding uncompressed data for the requested revision.
1802 """
1802 """
1803 compression_mode = self.index[rev][10]
1803 compression_mode = self.index[rev][10]
1804 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1804 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1805 if compression_mode == COMP_MODE_PLAIN:
1805 if compression_mode == COMP_MODE_PLAIN:
1806 return data
1806 return data
1807 elif compression_mode == COMP_MODE_DEFAULT:
1807 elif compression_mode == COMP_MODE_DEFAULT:
1808 return self._decompressor(data)
1808 return self._decompressor(data)
1809 elif compression_mode == COMP_MODE_INLINE:
1809 elif compression_mode == COMP_MODE_INLINE:
1810 return self.decompress(data)
1810 return self.decompress(data)
1811 else:
1811 else:
1812 msg = 'unknown compression mode %d'
1812 msg = 'unknown compression mode %d'
1813 msg %= compression_mode
1813 msg %= compression_mode
1814 raise error.RevlogError(msg)
1814 raise error.RevlogError(msg)
1815
1815
1816 def _chunks(self, revs, df=None, targetsize=None):
1816 def _chunks(self, revs, df=None, targetsize=None):
1817 """Obtain decompressed chunks for the specified revisions.
1817 """Obtain decompressed chunks for the specified revisions.
1818
1818
1819 Accepts an iterable of numeric revisions that are assumed to be in
1819 Accepts an iterable of numeric revisions that are assumed to be in
1820 ascending order. Also accepts an optional already-open file handle
1820 ascending order. Also accepts an optional already-open file handle
1821 to be used for reading. If used, the seek position of the file will
1821 to be used for reading. If used, the seek position of the file will
1822 not be preserved.
1822 not be preserved.
1823
1823
1824 This function is similar to calling ``self._chunk()`` multiple times,
1824 This function is similar to calling ``self._chunk()`` multiple times,
1825 but is faster.
1825 but is faster.
1826
1826
1827 Returns a list with decompressed data for each requested revision.
1827 Returns a list with decompressed data for each requested revision.
1828 """
1828 """
1829 if not revs:
1829 if not revs:
1830 return []
1830 return []
1831 start = self.start
1831 start = self.start
1832 length = self.length
1832 length = self.length
1833 inline = self._inline
1833 inline = self._inline
1834 iosize = self.index.entry_size
1834 iosize = self.index.entry_size
1835 buffer = util.buffer
1835 buffer = util.buffer
1836
1836
1837 l = []
1837 l = []
1838 ladd = l.append
1838 ladd = l.append
1839
1839
1840 if not self._withsparseread:
1840 if not self._withsparseread:
1841 slicedchunks = (revs,)
1841 slicedchunks = (revs,)
1842 else:
1842 else:
1843 slicedchunks = deltautil.slicechunk(
1843 slicedchunks = deltautil.slicechunk(
1844 self, revs, targetsize=targetsize
1844 self, revs, targetsize=targetsize
1845 )
1845 )
1846
1846
1847 for revschunk in slicedchunks:
1847 for revschunk in slicedchunks:
1848 firstrev = revschunk[0]
1848 firstrev = revschunk[0]
1849 # Skip trailing revisions with empty diff
1849 # Skip trailing revisions with empty diff
1850 for lastrev in revschunk[::-1]:
1850 for lastrev in revschunk[::-1]:
1851 if length(lastrev) != 0:
1851 if length(lastrev) != 0:
1852 break
1852 break
1853
1853
1854 try:
1854 try:
1855 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1855 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1856 except OverflowError:
1856 except OverflowError:
1857 # issue4215 - we can't cache a run of chunks greater than
1857 # issue4215 - we can't cache a run of chunks greater than
1858 # 2G on Windows
1858 # 2G on Windows
1859 return [self._chunk(rev, df=df) for rev in revschunk]
1859 return [self._chunk(rev, df=df) for rev in revschunk]
1860
1860
1861 decomp = self.decompress
1861 decomp = self.decompress
1862 # self._decompressor might be None, but will not be used in that case
1862 # self._decompressor might be None, but will not be used in that case
1863 def_decomp = self._decompressor
1863 def_decomp = self._decompressor
1864 for rev in revschunk:
1864 for rev in revschunk:
1865 chunkstart = start(rev)
1865 chunkstart = start(rev)
1866 if inline:
1866 if inline:
1867 chunkstart += (rev + 1) * iosize
1867 chunkstart += (rev + 1) * iosize
1868 chunklength = length(rev)
1868 chunklength = length(rev)
1869 comp_mode = self.index[rev][10]
1869 comp_mode = self.index[rev][10]
1870 c = buffer(data, chunkstart - offset, chunklength)
1870 c = buffer(data, chunkstart - offset, chunklength)
1871 if comp_mode == COMP_MODE_PLAIN:
1871 if comp_mode == COMP_MODE_PLAIN:
1872 ladd(c)
1872 ladd(c)
1873 elif comp_mode == COMP_MODE_INLINE:
1873 elif comp_mode == COMP_MODE_INLINE:
1874 ladd(decomp(c))
1874 ladd(decomp(c))
1875 elif comp_mode == COMP_MODE_DEFAULT:
1875 elif comp_mode == COMP_MODE_DEFAULT:
1876 ladd(def_decomp(c))
1876 ladd(def_decomp(c))
1877 else:
1877 else:
1878 msg = 'unknown compression mode %d'
1878 msg = 'unknown compression mode %d'
1879 msg %= comp_mode
1879 msg %= comp_mode
1880 raise error.RevlogError(msg)
1880 raise error.RevlogError(msg)
1881
1881
1882 return l
1882 return l
1883
1883
1884 def _chunkclear(self):
1884 def _chunkclear(self):
1885 """Clear the raw chunk cache."""
1885 """Clear the raw chunk cache."""
1886 self._chunkcache = (0, b'')
1886 self._chunkcache = (0, b'')
1887
1887
1888 def deltaparent(self, rev):
1888 def deltaparent(self, rev):
1889 """return deltaparent of the given revision"""
1889 """return deltaparent of the given revision"""
1890 base = self.index[rev][3]
1890 base = self.index[rev][3]
1891 if base == rev:
1891 if base == rev:
1892 return nullrev
1892 return nullrev
1893 elif self._generaldelta:
1893 elif self._generaldelta:
1894 return base
1894 return base
1895 else:
1895 else:
1896 return rev - 1
1896 return rev - 1
1897
1897
1898 def issnapshot(self, rev):
1898 def issnapshot(self, rev):
1899 """tells whether rev is a snapshot"""
1899 """tells whether rev is a snapshot"""
1900 if not self._sparserevlog:
1900 if not self._sparserevlog:
1901 return self.deltaparent(rev) == nullrev
1901 return self.deltaparent(rev) == nullrev
1902 elif util.safehasattr(self.index, b'issnapshot'):
1902 elif util.safehasattr(self.index, b'issnapshot'):
1903 # directly assign the method to cache the testing and access
1903 # directly assign the method to cache the testing and access
1904 self.issnapshot = self.index.issnapshot
1904 self.issnapshot = self.index.issnapshot
1905 return self.issnapshot(rev)
1905 return self.issnapshot(rev)
1906 if rev == nullrev:
1906 if rev == nullrev:
1907 return True
1907 return True
1908 entry = self.index[rev]
1908 entry = self.index[rev]
1909 base = entry[3]
1909 base = entry[3]
1910 if base == rev:
1910 if base == rev:
1911 return True
1911 return True
1912 if base == nullrev:
1912 if base == nullrev:
1913 return True
1913 return True
1914 p1 = entry[5]
1914 p1 = entry[5]
1915 p2 = entry[6]
1915 p2 = entry[6]
1916 if base == p1 or base == p2:
1916 if base == p1 or base == p2:
1917 return False
1917 return False
1918 return self.issnapshot(base)
1918 return self.issnapshot(base)
1919
1919
1920 def snapshotdepth(self, rev):
1920 def snapshotdepth(self, rev):
1921 """number of snapshot in the chain before this one"""
1921 """number of snapshot in the chain before this one"""
1922 if not self.issnapshot(rev):
1922 if not self.issnapshot(rev):
1923 raise error.ProgrammingError(b'revision %d not a snapshot')
1923 raise error.ProgrammingError(b'revision %d not a snapshot')
1924 return len(self._deltachain(rev)[0]) - 1
1924 return len(self._deltachain(rev)[0]) - 1
1925
1925
1926 def revdiff(self, rev1, rev2):
1926 def revdiff(self, rev1, rev2):
1927 """return or calculate a delta between two revisions
1927 """return or calculate a delta between two revisions
1928
1928
1929 The delta calculated is in binary form and is intended to be written to
1929 The delta calculated is in binary form and is intended to be written to
1930 revlog data directly. So this function needs raw revision data.
1930 revlog data directly. So this function needs raw revision data.
1931 """
1931 """
1932 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1932 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1933 return bytes(self._chunk(rev2))
1933 return bytes(self._chunk(rev2))
1934
1934
1935 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1935 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1936
1936
1937 def _processflags(self, text, flags, operation, raw=False):
1937 def _processflags(self, text, flags, operation, raw=False):
1938 """deprecated entry point to access flag processors"""
1938 """deprecated entry point to access flag processors"""
1939 msg = b'_processflag(...) use the specialized variant'
1939 msg = b'_processflag(...) use the specialized variant'
1940 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1940 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1941 if raw:
1941 if raw:
1942 return text, flagutil.processflagsraw(self, text, flags)
1942 return text, flagutil.processflagsraw(self, text, flags)
1943 elif operation == b'read':
1943 elif operation == b'read':
1944 return flagutil.processflagsread(self, text, flags)
1944 return flagutil.processflagsread(self, text, flags)
1945 else: # write operation
1945 else: # write operation
1946 return flagutil.processflagswrite(self, text, flags)
1946 return flagutil.processflagswrite(self, text, flags)
1947
1947
1948 def revision(self, nodeorrev, _df=None, raw=False):
1948 def revision(self, nodeorrev, _df=None, raw=False):
1949 """return an uncompressed revision of a given node or revision
1949 """return an uncompressed revision of a given node or revision
1950 number.
1950 number.
1951
1951
1952 _df - an existing file handle to read from. (internal-only)
1952 _df - an existing file handle to read from. (internal-only)
1953 raw - an optional argument specifying if the revision data is to be
1953 raw - an optional argument specifying if the revision data is to be
1954 treated as raw data when applying flag transforms. 'raw' should be set
1954 treated as raw data when applying flag transforms. 'raw' should be set
1955 to True when generating changegroups or in debug commands.
1955 to True when generating changegroups or in debug commands.
1956 """
1956 """
1957 if raw:
1957 if raw:
1958 msg = (
1958 msg = (
1959 b'revlog.revision(..., raw=True) is deprecated, '
1959 b'revlog.revision(..., raw=True) is deprecated, '
1960 b'use revlog.rawdata(...)'
1960 b'use revlog.rawdata(...)'
1961 )
1961 )
1962 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1962 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1963 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1963 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1964
1964
1965 def sidedata(self, nodeorrev, _df=None):
1965 def sidedata(self, nodeorrev, _df=None):
1966 """a map of extra data related to the changeset but not part of the hash
1966 """a map of extra data related to the changeset but not part of the hash
1967
1967
1968 This function currently return a dictionary. However, more advanced
1968 This function currently return a dictionary. However, more advanced
1969 mapping object will likely be used in the future for a more
1969 mapping object will likely be used in the future for a more
1970 efficient/lazy code.
1970 efficient/lazy code.
1971 """
1971 """
1972 return self._revisiondata(nodeorrev, _df)[1]
1972 return self._revisiondata(nodeorrev, _df)[1]
1973
1973
1974 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1974 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1975 # deal with <nodeorrev> argument type
1975 # deal with <nodeorrev> argument type
1976 if isinstance(nodeorrev, int):
1976 if isinstance(nodeorrev, int):
1977 rev = nodeorrev
1977 rev = nodeorrev
1978 node = self.node(rev)
1978 node = self.node(rev)
1979 else:
1979 else:
1980 node = nodeorrev
1980 node = nodeorrev
1981 rev = None
1981 rev = None
1982
1982
1983 # fast path the special `nullid` rev
1983 # fast path the special `nullid` rev
1984 if node == self.nullid:
1984 if node == self.nullid:
1985 return b"", {}
1985 return b"", {}
1986
1986
1987 # ``rawtext`` is the text as stored inside the revlog. Might be the
1987 # ``rawtext`` is the text as stored inside the revlog. Might be the
1988 # revision or might need to be processed to retrieve the revision.
1988 # revision or might need to be processed to retrieve the revision.
1989 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1989 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1990
1990
1991 if self.hassidedata:
1991 if self.hassidedata:
1992 if rev is None:
1992 if rev is None:
1993 rev = self.rev(node)
1993 rev = self.rev(node)
1994 sidedata = self._sidedata(rev)
1994 sidedata = self._sidedata(rev)
1995 else:
1995 else:
1996 sidedata = {}
1996 sidedata = {}
1997
1997
1998 if raw and validated:
1998 if raw and validated:
1999 # if we don't want to process the raw text and that raw
1999 # if we don't want to process the raw text and that raw
2000 # text is cached, we can exit early.
2000 # text is cached, we can exit early.
2001 return rawtext, sidedata
2001 return rawtext, sidedata
2002 if rev is None:
2002 if rev is None:
2003 rev = self.rev(node)
2003 rev = self.rev(node)
2004 # the revlog's flag for this revision
2004 # the revlog's flag for this revision
2005 # (usually alter its state or content)
2005 # (usually alter its state or content)
2006 flags = self.flags(rev)
2006 flags = self.flags(rev)
2007
2007
2008 if validated and flags == REVIDX_DEFAULT_FLAGS:
2008 if validated and flags == REVIDX_DEFAULT_FLAGS:
2009 # no extra flags set, no flag processor runs, text = rawtext
2009 # no extra flags set, no flag processor runs, text = rawtext
2010 return rawtext, sidedata
2010 return rawtext, sidedata
2011
2011
2012 if raw:
2012 if raw:
2013 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2013 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2014 text = rawtext
2014 text = rawtext
2015 else:
2015 else:
2016 r = flagutil.processflagsread(self, rawtext, flags)
2016 r = flagutil.processflagsread(self, rawtext, flags)
2017 text, validatehash = r
2017 text, validatehash = r
2018 if validatehash:
2018 if validatehash:
2019 self.checkhash(text, node, rev=rev)
2019 self.checkhash(text, node, rev=rev)
2020 if not validated:
2020 if not validated:
2021 self._revisioncache = (node, rev, rawtext)
2021 self._revisioncache = (node, rev, rawtext)
2022
2022
2023 return text, sidedata
2023 return text, sidedata
2024
2024
2025 def _rawtext(self, node, rev, _df=None):
2025 def _rawtext(self, node, rev, _df=None):
2026 """return the possibly unvalidated rawtext for a revision
2026 """return the possibly unvalidated rawtext for a revision
2027
2027
2028 returns (rev, rawtext, validated)
2028 returns (rev, rawtext, validated)
2029 """
2029 """
2030
2030
2031 # revision in the cache (could be useful to apply delta)
2031 # revision in the cache (could be useful to apply delta)
2032 cachedrev = None
2032 cachedrev = None
2033 # An intermediate text to apply deltas to
2033 # An intermediate text to apply deltas to
2034 basetext = None
2034 basetext = None
2035
2035
2036 # Check if we have the entry in cache
2036 # Check if we have the entry in cache
2037 # The cache entry looks like (node, rev, rawtext)
2037 # The cache entry looks like (node, rev, rawtext)
2038 if self._revisioncache:
2038 if self._revisioncache:
2039 if self._revisioncache[0] == node:
2039 if self._revisioncache[0] == node:
2040 return (rev, self._revisioncache[2], True)
2040 return (rev, self._revisioncache[2], True)
2041 cachedrev = self._revisioncache[1]
2041 cachedrev = self._revisioncache[1]
2042
2042
2043 if rev is None:
2043 if rev is None:
2044 rev = self.rev(node)
2044 rev = self.rev(node)
2045
2045
2046 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2046 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2047 if stopped:
2047 if stopped:
2048 basetext = self._revisioncache[2]
2048 basetext = self._revisioncache[2]
2049
2049
2050 # drop cache to save memory, the caller is expected to
2050 # drop cache to save memory, the caller is expected to
2051 # update self._revisioncache after validating the text
2051 # update self._revisioncache after validating the text
2052 self._revisioncache = None
2052 self._revisioncache = None
2053
2053
2054 targetsize = None
2054 targetsize = None
2055 rawsize = self.index[rev][2]
2055 rawsize = self.index[rev][2]
2056 if 0 <= rawsize:
2056 if 0 <= rawsize:
2057 targetsize = 4 * rawsize
2057 targetsize = 4 * rawsize
2058
2058
2059 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2059 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2060 if basetext is None:
2060 if basetext is None:
2061 basetext = bytes(bins[0])
2061 basetext = bytes(bins[0])
2062 bins = bins[1:]
2062 bins = bins[1:]
2063
2063
2064 rawtext = mdiff.patches(basetext, bins)
2064 rawtext = mdiff.patches(basetext, bins)
2065 del basetext # let us have a chance to free memory early
2065 del basetext # let us have a chance to free memory early
2066 return (rev, rawtext, False)
2066 return (rev, rawtext, False)
2067
2067
2068 def _sidedata(self, rev):
2068 def _sidedata(self, rev):
2069 """Return the sidedata for a given revision number."""
2069 """Return the sidedata for a given revision number."""
2070 index_entry = self.index[rev]
2070 index_entry = self.index[rev]
2071 sidedata_offset = index_entry[8]
2071 sidedata_offset = index_entry[8]
2072 sidedata_size = index_entry[9]
2072 sidedata_size = index_entry[9]
2073
2073
2074 if self._inline:
2074 if self._inline:
2075 sidedata_offset += self.index.entry_size * (1 + rev)
2075 sidedata_offset += self.index.entry_size * (1 + rev)
2076 if sidedata_size == 0:
2076 if sidedata_size == 0:
2077 return {}
2077 return {}
2078
2078
2079 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2079 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2080 comp = self.index[rev][11]
2080 comp = self.index[rev][11]
2081 if comp == COMP_MODE_PLAIN:
2081 if comp == COMP_MODE_PLAIN:
2082 segment = comp_segment
2082 segment = comp_segment
2083 elif comp == COMP_MODE_DEFAULT:
2083 elif comp == COMP_MODE_DEFAULT:
2084 segment = self._decompressor(comp_segment)
2084 segment = self._decompressor(comp_segment)
2085 elif comp == COMP_MODE_INLINE:
2085 elif comp == COMP_MODE_INLINE:
2086 segment = self.decompress(comp_segment)
2086 segment = self.decompress(comp_segment)
2087 else:
2087 else:
2088 msg = 'unknown compression mode %d'
2088 msg = 'unknown compression mode %d'
2089 msg %= comp
2089 msg %= comp
2090 raise error.RevlogError(msg)
2090 raise error.RevlogError(msg)
2091
2091
2092 sidedata = sidedatautil.deserialize_sidedata(segment)
2092 sidedata = sidedatautil.deserialize_sidedata(segment)
2093 return sidedata
2093 return sidedata
2094
2094
2095 def rawdata(self, nodeorrev, _df=None):
2095 def rawdata(self, nodeorrev, _df=None):
2096 """return an uncompressed raw data of a given node or revision number.
2096 """return an uncompressed raw data of a given node or revision number.
2097
2097
2098 _df - an existing file handle to read from. (internal-only)
2098 _df - an existing file handle to read from. (internal-only)
2099 """
2099 """
2100 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2100 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2101
2101
2102 def hash(self, text, p1, p2):
2102 def hash(self, text, p1, p2):
2103 """Compute a node hash.
2103 """Compute a node hash.
2104
2104
2105 Available as a function so that subclasses can replace the hash
2105 Available as a function so that subclasses can replace the hash
2106 as needed.
2106 as needed.
2107 """
2107 """
2108 return storageutil.hashrevisionsha1(text, p1, p2)
2108 return storageutil.hashrevisionsha1(text, p1, p2)
2109
2109
2110 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2110 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2111 """Check node hash integrity.
2111 """Check node hash integrity.
2112
2112
2113 Available as a function so that subclasses can extend hash mismatch
2113 Available as a function so that subclasses can extend hash mismatch
2114 behaviors as needed.
2114 behaviors as needed.
2115 """
2115 """
2116 try:
2116 try:
2117 if p1 is None and p2 is None:
2117 if p1 is None and p2 is None:
2118 p1, p2 = self.parents(node)
2118 p1, p2 = self.parents(node)
2119 if node != self.hash(text, p1, p2):
2119 if node != self.hash(text, p1, p2):
2120 # Clear the revision cache on hash failure. The revision cache
2120 # Clear the revision cache on hash failure. The revision cache
2121 # only stores the raw revision and clearing the cache does have
2121 # only stores the raw revision and clearing the cache does have
2122 # the side-effect that we won't have a cache hit when the raw
2122 # the side-effect that we won't have a cache hit when the raw
2123 # revision data is accessed. But this case should be rare and
2123 # revision data is accessed. But this case should be rare and
2124 # it is extra work to teach the cache about the hash
2124 # it is extra work to teach the cache about the hash
2125 # verification state.
2125 # verification state.
2126 if self._revisioncache and self._revisioncache[0] == node:
2126 if self._revisioncache and self._revisioncache[0] == node:
2127 self._revisioncache = None
2127 self._revisioncache = None
2128
2128
2129 revornode = rev
2129 revornode = rev
2130 if revornode is None:
2130 if revornode is None:
2131 revornode = templatefilters.short(hex(node))
2131 revornode = templatefilters.short(hex(node))
2132 raise error.RevlogError(
2132 raise error.RevlogError(
2133 _(b"integrity check failed on %s:%s")
2133 _(b"integrity check failed on %s:%s")
2134 % (self.display_id, pycompat.bytestr(revornode))
2134 % (self.display_id, pycompat.bytestr(revornode))
2135 )
2135 )
2136 except error.RevlogError:
2136 except error.RevlogError:
2137 if self._censorable and storageutil.iscensoredtext(text):
2137 if self._censorable and storageutil.iscensoredtext(text):
2138 raise error.CensoredNodeError(self.display_id, node, text)
2138 raise error.CensoredNodeError(self.display_id, node, text)
2139 raise
2139 raise
2140
2140
2141 def _enforceinlinesize(self, tr):
2141 def _enforceinlinesize(self, tr):
2142 """Check if the revlog is too big for inline and convert if so.
2142 """Check if the revlog is too big for inline and convert if so.
2143
2143
2144 This should be called after revisions are added to the revlog. If the
2144 This should be called after revisions are added to the revlog. If the
2145 revlog has grown too large to be an inline revlog, it will convert it
2145 revlog has grown too large to be an inline revlog, it will convert it
2146 to use multiple index and data files.
2146 to use multiple index and data files.
2147 """
2147 """
2148 tiprev = len(self) - 1
2148 tiprev = len(self) - 1
2149 total_size = self.start(tiprev) + self.length(tiprev)
2149 total_size = self.start(tiprev) + self.length(tiprev)
2150 if not self._inline or total_size < _maxinline:
2150 if not self._inline or total_size < _maxinline:
2151 return
2151 return
2152
2152
2153 troffset = tr.findoffset(self._indexfile)
2153 troffset = tr.findoffset(self._indexfile)
2154 if troffset is None:
2154 if troffset is None:
2155 raise error.RevlogError(
2155 raise error.RevlogError(
2156 _(b"%s not found in the transaction") % self._indexfile
2156 _(b"%s not found in the transaction") % self._indexfile
2157 )
2157 )
2158 trindex = 0
2158 trindex = 0
2159 tr.add(self._datafile, 0)
2159 tr.add(self._datafile, 0)
2160
2160
2161 existing_handles = False
2161 existing_handles = False
2162 if self._writinghandles is not None:
2162 if self._writinghandles is not None:
2163 existing_handles = True
2163 existing_handles = True
2164 fp = self._writinghandles[0]
2164 fp = self._writinghandles[0]
2165 fp.flush()
2165 fp.flush()
2166 fp.close()
2166 fp.close()
2167 # We can't use the cached file handle after close(). So prevent
2167 # We can't use the cached file handle after close(). So prevent
2168 # its usage.
2168 # its usage.
2169 self._writinghandles = None
2169 self._writinghandles = None
2170
2170
2171 new_dfh = self._datafp(b'w+')
2171 new_dfh = self._datafp(b'w+')
2172 new_dfh.truncate(0) # drop any potentially existing data
2172 new_dfh.truncate(0) # drop any potentially existing data
2173 try:
2173 try:
2174 with self._indexfp() as read_ifh:
2174 with self._indexfp() as read_ifh:
2175 for r in self:
2175 for r in self:
2176 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2176 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2177 if troffset <= self.start(r):
2177 if troffset <= self.start(r) + r * self.index.entry_size:
2178 trindex = r
2178 trindex = r
2179 new_dfh.flush()
2179 new_dfh.flush()
2180
2180
2181 with self.__index_new_fp() as fp:
2181 with self.__index_new_fp() as fp:
2182 self._format_flags &= ~FLAG_INLINE_DATA
2182 self._format_flags &= ~FLAG_INLINE_DATA
2183 self._inline = False
2183 self._inline = False
2184 for i in self:
2184 for i in self:
2185 e = self.index.entry_binary(i)
2185 e = self.index.entry_binary(i)
2186 if i == 0 and self._docket is None:
2186 if i == 0 and self._docket is None:
2187 header = self._format_flags | self._format_version
2187 header = self._format_flags | self._format_version
2188 header = self.index.pack_header(header)
2188 header = self.index.pack_header(header)
2189 e = header + e
2189 e = header + e
2190 fp.write(e)
2190 fp.write(e)
2191 if self._docket is not None:
2191 if self._docket is not None:
2192 self._docket.index_end = fp.tell()
2192 self._docket.index_end = fp.tell()
2193 # the temp file replace the real index when we exit the context
2193 # the temp file replace the real index when we exit the context
2194 # manager
2194 # manager
2195
2195
2196 tr.replace(self._indexfile, trindex * self.index.entry_size)
2196 tr.replace(self._indexfile, trindex * self.index.entry_size)
2197 nodemaputil.setup_persistent_nodemap(tr, self)
2197 nodemaputil.setup_persistent_nodemap(tr, self)
2198 self._chunkclear()
2198 self._chunkclear()
2199
2199
2200 if existing_handles:
2200 if existing_handles:
2201 # switched from inline to conventional reopen the index
2201 # switched from inline to conventional reopen the index
2202 ifh = self.__index_write_fp()
2202 ifh = self.__index_write_fp()
2203 self._writinghandles = (ifh, new_dfh)
2203 self._writinghandles = (ifh, new_dfh)
2204 new_dfh = None
2204 new_dfh = None
2205 finally:
2205 finally:
2206 if new_dfh is not None:
2206 if new_dfh is not None:
2207 new_dfh.close()
2207 new_dfh.close()
2208
2208
2209 def _nodeduplicatecallback(self, transaction, node):
2209 def _nodeduplicatecallback(self, transaction, node):
2210 """called when trying to add a node already stored."""
2210 """called when trying to add a node already stored."""
2211
2211
2212 @contextlib.contextmanager
2212 @contextlib.contextmanager
2213 def _writing(self, transaction):
2213 def _writing(self, transaction):
2214 if self._trypending:
2214 if self._trypending:
2215 msg = b'try to write in a `trypending` revlog: %s'
2215 msg = b'try to write in a `trypending` revlog: %s'
2216 msg %= self.display_id
2216 msg %= self.display_id
2217 raise error.ProgrammingError(msg)
2217 raise error.ProgrammingError(msg)
2218 if self._writinghandles is not None:
2218 if self._writinghandles is not None:
2219 yield
2219 yield
2220 else:
2220 else:
2221 r = len(self)
2221 r = len(self)
2222 dsize = 0
2222 dsize = 0
2223 if r:
2223 if r:
2224 dsize = self.end(r - 1)
2224 dsize = self.end(r - 1)
2225 dfh = None
2225 dfh = None
2226 if not self._inline:
2226 if not self._inline:
2227 try:
2227 try:
2228 dfh = self._datafp(b"r+")
2228 dfh = self._datafp(b"r+")
2229 if self._docket is None:
2229 if self._docket is None:
2230 dfh.seek(0, os.SEEK_END)
2230 dfh.seek(0, os.SEEK_END)
2231 else:
2231 else:
2232 dfh.seek(self._docket.data_end, os.SEEK_SET)
2232 dfh.seek(self._docket.data_end, os.SEEK_SET)
2233 except IOError as inst:
2233 except IOError as inst:
2234 if inst.errno != errno.ENOENT:
2234 if inst.errno != errno.ENOENT:
2235 raise
2235 raise
2236 dfh = self._datafp(b"w+")
2236 dfh = self._datafp(b"w+")
2237 transaction.add(self._datafile, dsize)
2237 transaction.add(self._datafile, dsize)
2238 try:
2238 try:
2239 isize = r * self.index.entry_size
2239 isize = r * self.index.entry_size
2240 ifh = self.__index_write_fp()
2240 ifh = self.__index_write_fp()
2241 if self._inline:
2241 if self._inline:
2242 transaction.add(self._indexfile, dsize + isize)
2242 transaction.add(self._indexfile, dsize + isize)
2243 else:
2243 else:
2244 transaction.add(self._indexfile, isize)
2244 transaction.add(self._indexfile, isize)
2245 try:
2245 try:
2246 self._writinghandles = (ifh, dfh)
2246 self._writinghandles = (ifh, dfh)
2247 try:
2247 try:
2248 yield
2248 yield
2249 if self._docket is not None:
2249 if self._docket is not None:
2250 self._write_docket(transaction)
2250 self._write_docket(transaction)
2251 finally:
2251 finally:
2252 self._writinghandles = None
2252 self._writinghandles = None
2253 finally:
2253 finally:
2254 ifh.close()
2254 ifh.close()
2255 finally:
2255 finally:
2256 if dfh is not None:
2256 if dfh is not None:
2257 dfh.close()
2257 dfh.close()
2258
2258
2259 def _write_docket(self, transaction):
2259 def _write_docket(self, transaction):
2260 """write the current docket on disk
2260 """write the current docket on disk
2261
2261
2262 Exist as a method to help changelog to implement transaction logic
2262 Exist as a method to help changelog to implement transaction logic
2263
2263
2264 We could also imagine using the same transaction logic for all revlog
2264 We could also imagine using the same transaction logic for all revlog
2265 since docket are cheap."""
2265 since docket are cheap."""
2266 self._docket.write(transaction)
2266 self._docket.write(transaction)
2267
2267
2268 def addrevision(
2268 def addrevision(
2269 self,
2269 self,
2270 text,
2270 text,
2271 transaction,
2271 transaction,
2272 link,
2272 link,
2273 p1,
2273 p1,
2274 p2,
2274 p2,
2275 cachedelta=None,
2275 cachedelta=None,
2276 node=None,
2276 node=None,
2277 flags=REVIDX_DEFAULT_FLAGS,
2277 flags=REVIDX_DEFAULT_FLAGS,
2278 deltacomputer=None,
2278 deltacomputer=None,
2279 sidedata=None,
2279 sidedata=None,
2280 ):
2280 ):
2281 """add a revision to the log
2281 """add a revision to the log
2282
2282
2283 text - the revision data to add
2283 text - the revision data to add
2284 transaction - the transaction object used for rollback
2284 transaction - the transaction object used for rollback
2285 link - the linkrev data to add
2285 link - the linkrev data to add
2286 p1, p2 - the parent nodeids of the revision
2286 p1, p2 - the parent nodeids of the revision
2287 cachedelta - an optional precomputed delta
2287 cachedelta - an optional precomputed delta
2288 node - nodeid of revision; typically node is not specified, and it is
2288 node - nodeid of revision; typically node is not specified, and it is
2289 computed by default as hash(text, p1, p2), however subclasses might
2289 computed by default as hash(text, p1, p2), however subclasses might
2290 use different hashing method (and override checkhash() in such case)
2290 use different hashing method (and override checkhash() in such case)
2291 flags - the known flags to set on the revision
2291 flags - the known flags to set on the revision
2292 deltacomputer - an optional deltacomputer instance shared between
2292 deltacomputer - an optional deltacomputer instance shared between
2293 multiple calls
2293 multiple calls
2294 """
2294 """
2295 if link == nullrev:
2295 if link == nullrev:
2296 raise error.RevlogError(
2296 raise error.RevlogError(
2297 _(b"attempted to add linkrev -1 to %s") % self.display_id
2297 _(b"attempted to add linkrev -1 to %s") % self.display_id
2298 )
2298 )
2299
2299
2300 if sidedata is None:
2300 if sidedata is None:
2301 sidedata = {}
2301 sidedata = {}
2302 elif sidedata and not self.hassidedata:
2302 elif sidedata and not self.hassidedata:
2303 raise error.ProgrammingError(
2303 raise error.ProgrammingError(
2304 _(b"trying to add sidedata to a revlog who don't support them")
2304 _(b"trying to add sidedata to a revlog who don't support them")
2305 )
2305 )
2306
2306
2307 if flags:
2307 if flags:
2308 node = node or self.hash(text, p1, p2)
2308 node = node or self.hash(text, p1, p2)
2309
2309
2310 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2310 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2311
2311
2312 # If the flag processor modifies the revision data, ignore any provided
2312 # If the flag processor modifies the revision data, ignore any provided
2313 # cachedelta.
2313 # cachedelta.
2314 if rawtext != text:
2314 if rawtext != text:
2315 cachedelta = None
2315 cachedelta = None
2316
2316
2317 if len(rawtext) > _maxentrysize:
2317 if len(rawtext) > _maxentrysize:
2318 raise error.RevlogError(
2318 raise error.RevlogError(
2319 _(
2319 _(
2320 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2320 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2321 )
2321 )
2322 % (self.display_id, len(rawtext))
2322 % (self.display_id, len(rawtext))
2323 )
2323 )
2324
2324
2325 node = node or self.hash(rawtext, p1, p2)
2325 node = node or self.hash(rawtext, p1, p2)
2326 rev = self.index.get_rev(node)
2326 rev = self.index.get_rev(node)
2327 if rev is not None:
2327 if rev is not None:
2328 return rev
2328 return rev
2329
2329
2330 if validatehash:
2330 if validatehash:
2331 self.checkhash(rawtext, node, p1=p1, p2=p2)
2331 self.checkhash(rawtext, node, p1=p1, p2=p2)
2332
2332
2333 return self.addrawrevision(
2333 return self.addrawrevision(
2334 rawtext,
2334 rawtext,
2335 transaction,
2335 transaction,
2336 link,
2336 link,
2337 p1,
2337 p1,
2338 p2,
2338 p2,
2339 node,
2339 node,
2340 flags,
2340 flags,
2341 cachedelta=cachedelta,
2341 cachedelta=cachedelta,
2342 deltacomputer=deltacomputer,
2342 deltacomputer=deltacomputer,
2343 sidedata=sidedata,
2343 sidedata=sidedata,
2344 )
2344 )
2345
2345
2346 def addrawrevision(
2346 def addrawrevision(
2347 self,
2347 self,
2348 rawtext,
2348 rawtext,
2349 transaction,
2349 transaction,
2350 link,
2350 link,
2351 p1,
2351 p1,
2352 p2,
2352 p2,
2353 node,
2353 node,
2354 flags,
2354 flags,
2355 cachedelta=None,
2355 cachedelta=None,
2356 deltacomputer=None,
2356 deltacomputer=None,
2357 sidedata=None,
2357 sidedata=None,
2358 ):
2358 ):
2359 """add a raw revision with known flags, node and parents
2359 """add a raw revision with known flags, node and parents
2360 useful when reusing a revision not stored in this revlog (ex: received
2360 useful when reusing a revision not stored in this revlog (ex: received
2361 over wire, or read from an external bundle).
2361 over wire, or read from an external bundle).
2362 """
2362 """
2363 with self._writing(transaction):
2363 with self._writing(transaction):
2364 return self._addrevision(
2364 return self._addrevision(
2365 node,
2365 node,
2366 rawtext,
2366 rawtext,
2367 transaction,
2367 transaction,
2368 link,
2368 link,
2369 p1,
2369 p1,
2370 p2,
2370 p2,
2371 flags,
2371 flags,
2372 cachedelta,
2372 cachedelta,
2373 deltacomputer=deltacomputer,
2373 deltacomputer=deltacomputer,
2374 sidedata=sidedata,
2374 sidedata=sidedata,
2375 )
2375 )
2376
2376
2377 def compress(self, data):
2377 def compress(self, data):
2378 """Generate a possibly-compressed representation of data."""
2378 """Generate a possibly-compressed representation of data."""
2379 if not data:
2379 if not data:
2380 return b'', data
2380 return b'', data
2381
2381
2382 compressed = self._compressor.compress(data)
2382 compressed = self._compressor.compress(data)
2383
2383
2384 if compressed:
2384 if compressed:
2385 # The revlog compressor added the header in the returned data.
2385 # The revlog compressor added the header in the returned data.
2386 return b'', compressed
2386 return b'', compressed
2387
2387
2388 if data[0:1] == b'\0':
2388 if data[0:1] == b'\0':
2389 return b'', data
2389 return b'', data
2390 return b'u', data
2390 return b'u', data
2391
2391
2392 def decompress(self, data):
2392 def decompress(self, data):
2393 """Decompress a revlog chunk.
2393 """Decompress a revlog chunk.
2394
2394
2395 The chunk is expected to begin with a header identifying the
2395 The chunk is expected to begin with a header identifying the
2396 format type so it can be routed to an appropriate decompressor.
2396 format type so it can be routed to an appropriate decompressor.
2397 """
2397 """
2398 if not data:
2398 if not data:
2399 return data
2399 return data
2400
2400
2401 # Revlogs are read much more frequently than they are written and many
2401 # Revlogs are read much more frequently than they are written and many
2402 # chunks only take microseconds to decompress, so performance is
2402 # chunks only take microseconds to decompress, so performance is
2403 # important here.
2403 # important here.
2404 #
2404 #
2405 # We can make a few assumptions about revlogs:
2405 # We can make a few assumptions about revlogs:
2406 #
2406 #
2407 # 1) the majority of chunks will be compressed (as opposed to inline
2407 # 1) the majority of chunks will be compressed (as opposed to inline
2408 # raw data).
2408 # raw data).
2409 # 2) decompressing *any* data will likely by at least 10x slower than
2409 # 2) decompressing *any* data will likely by at least 10x slower than
2410 # returning raw inline data.
2410 # returning raw inline data.
2411 # 3) we want to prioritize common and officially supported compression
2411 # 3) we want to prioritize common and officially supported compression
2412 # engines
2412 # engines
2413 #
2413 #
2414 # It follows that we want to optimize for "decompress compressed data
2414 # It follows that we want to optimize for "decompress compressed data
2415 # when encoded with common and officially supported compression engines"
2415 # when encoded with common and officially supported compression engines"
2416 # case over "raw data" and "data encoded by less common or non-official
2416 # case over "raw data" and "data encoded by less common or non-official
2417 # compression engines." That is why we have the inline lookup first
2417 # compression engines." That is why we have the inline lookup first
2418 # followed by the compengines lookup.
2418 # followed by the compengines lookup.
2419 #
2419 #
2420 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2420 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2421 # compressed chunks. And this matters for changelog and manifest reads.
2421 # compressed chunks. And this matters for changelog and manifest reads.
2422 t = data[0:1]
2422 t = data[0:1]
2423
2423
2424 if t == b'x':
2424 if t == b'x':
2425 try:
2425 try:
2426 return _zlibdecompress(data)
2426 return _zlibdecompress(data)
2427 except zlib.error as e:
2427 except zlib.error as e:
2428 raise error.RevlogError(
2428 raise error.RevlogError(
2429 _(b'revlog decompress error: %s')
2429 _(b'revlog decompress error: %s')
2430 % stringutil.forcebytestr(e)
2430 % stringutil.forcebytestr(e)
2431 )
2431 )
2432 # '\0' is more common than 'u' so it goes first.
2432 # '\0' is more common than 'u' so it goes first.
2433 elif t == b'\0':
2433 elif t == b'\0':
2434 return data
2434 return data
2435 elif t == b'u':
2435 elif t == b'u':
2436 return util.buffer(data, 1)
2436 return util.buffer(data, 1)
2437
2437
2438 compressor = self._get_decompressor(t)
2438 compressor = self._get_decompressor(t)
2439
2439
2440 return compressor.decompress(data)
2440 return compressor.decompress(data)
2441
2441
2442 def _addrevision(
2442 def _addrevision(
2443 self,
2443 self,
2444 node,
2444 node,
2445 rawtext,
2445 rawtext,
2446 transaction,
2446 transaction,
2447 link,
2447 link,
2448 p1,
2448 p1,
2449 p2,
2449 p2,
2450 flags,
2450 flags,
2451 cachedelta,
2451 cachedelta,
2452 alwayscache=False,
2452 alwayscache=False,
2453 deltacomputer=None,
2453 deltacomputer=None,
2454 sidedata=None,
2454 sidedata=None,
2455 ):
2455 ):
2456 """internal function to add revisions to the log
2456 """internal function to add revisions to the log
2457
2457
2458 see addrevision for argument descriptions.
2458 see addrevision for argument descriptions.
2459
2459
2460 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2460 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2461
2461
2462 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2462 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2463 be used.
2463 be used.
2464
2464
2465 invariants:
2465 invariants:
2466 - rawtext is optional (can be None); if not set, cachedelta must be set.
2466 - rawtext is optional (can be None); if not set, cachedelta must be set.
2467 if both are set, they must correspond to each other.
2467 if both are set, they must correspond to each other.
2468 """
2468 """
2469 if node == self.nullid:
2469 if node == self.nullid:
2470 raise error.RevlogError(
2470 raise error.RevlogError(
2471 _(b"%s: attempt to add null revision") % self.display_id
2471 _(b"%s: attempt to add null revision") % self.display_id
2472 )
2472 )
2473 if (
2473 if (
2474 node == self.nodeconstants.wdirid
2474 node == self.nodeconstants.wdirid
2475 or node in self.nodeconstants.wdirfilenodeids
2475 or node in self.nodeconstants.wdirfilenodeids
2476 ):
2476 ):
2477 raise error.RevlogError(
2477 raise error.RevlogError(
2478 _(b"%s: attempt to add wdir revision") % self.display_id
2478 _(b"%s: attempt to add wdir revision") % self.display_id
2479 )
2479 )
2480 if self._writinghandles is None:
2480 if self._writinghandles is None:
2481 msg = b'adding revision outside `revlog._writing` context'
2481 msg = b'adding revision outside `revlog._writing` context'
2482 raise error.ProgrammingError(msg)
2482 raise error.ProgrammingError(msg)
2483
2483
2484 if self._inline:
2484 if self._inline:
2485 fh = self._writinghandles[0]
2485 fh = self._writinghandles[0]
2486 else:
2486 else:
2487 fh = self._writinghandles[1]
2487 fh = self._writinghandles[1]
2488
2488
2489 btext = [rawtext]
2489 btext = [rawtext]
2490
2490
2491 curr = len(self)
2491 curr = len(self)
2492 prev = curr - 1
2492 prev = curr - 1
2493
2493
2494 offset = self._get_data_offset(prev)
2494 offset = self._get_data_offset(prev)
2495
2495
2496 if self._concurrencychecker:
2496 if self._concurrencychecker:
2497 ifh, dfh = self._writinghandles
2497 ifh, dfh = self._writinghandles
2498 if self._inline:
2498 if self._inline:
2499 # offset is "as if" it were in the .d file, so we need to add on
2499 # offset is "as if" it were in the .d file, so we need to add on
2500 # the size of the entry metadata.
2500 # the size of the entry metadata.
2501 self._concurrencychecker(
2501 self._concurrencychecker(
2502 ifh, self._indexfile, offset + curr * self.index.entry_size
2502 ifh, self._indexfile, offset + curr * self.index.entry_size
2503 )
2503 )
2504 else:
2504 else:
2505 # Entries in the .i are a consistent size.
2505 # Entries in the .i are a consistent size.
2506 self._concurrencychecker(
2506 self._concurrencychecker(
2507 ifh, self._indexfile, curr * self.index.entry_size
2507 ifh, self._indexfile, curr * self.index.entry_size
2508 )
2508 )
2509 self._concurrencychecker(dfh, self._datafile, offset)
2509 self._concurrencychecker(dfh, self._datafile, offset)
2510
2510
2511 p1r, p2r = self.rev(p1), self.rev(p2)
2511 p1r, p2r = self.rev(p1), self.rev(p2)
2512
2512
2513 # full versions are inserted when the needed deltas
2513 # full versions are inserted when the needed deltas
2514 # become comparable to the uncompressed text
2514 # become comparable to the uncompressed text
2515 if rawtext is None:
2515 if rawtext is None:
2516 # need rawtext size, before changed by flag processors, which is
2516 # need rawtext size, before changed by flag processors, which is
2517 # the non-raw size. use revlog explicitly to avoid filelog's extra
2517 # the non-raw size. use revlog explicitly to avoid filelog's extra
2518 # logic that might remove metadata size.
2518 # logic that might remove metadata size.
2519 textlen = mdiff.patchedsize(
2519 textlen = mdiff.patchedsize(
2520 revlog.size(self, cachedelta[0]), cachedelta[1]
2520 revlog.size(self, cachedelta[0]), cachedelta[1]
2521 )
2521 )
2522 else:
2522 else:
2523 textlen = len(rawtext)
2523 textlen = len(rawtext)
2524
2524
2525 if deltacomputer is None:
2525 if deltacomputer is None:
2526 deltacomputer = deltautil.deltacomputer(self)
2526 deltacomputer = deltautil.deltacomputer(self)
2527
2527
2528 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2528 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2529
2529
2530 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2530 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2531
2531
2532 compression_mode = COMP_MODE_INLINE
2532 compression_mode = COMP_MODE_INLINE
2533 if self._docket is not None:
2533 if self._docket is not None:
2534 h, d = deltainfo.data
2534 h, d = deltainfo.data
2535 if not h and not d:
2535 if not h and not d:
2536 # not data to store at all... declare them uncompressed
2536 # not data to store at all... declare them uncompressed
2537 compression_mode = COMP_MODE_PLAIN
2537 compression_mode = COMP_MODE_PLAIN
2538 elif not h:
2538 elif not h:
2539 t = d[0:1]
2539 t = d[0:1]
2540 if t == b'\0':
2540 if t == b'\0':
2541 compression_mode = COMP_MODE_PLAIN
2541 compression_mode = COMP_MODE_PLAIN
2542 elif t == self._docket.default_compression_header:
2542 elif t == self._docket.default_compression_header:
2543 compression_mode = COMP_MODE_DEFAULT
2543 compression_mode = COMP_MODE_DEFAULT
2544 elif h == b'u':
2544 elif h == b'u':
2545 # we have a more efficient way to declare uncompressed
2545 # we have a more efficient way to declare uncompressed
2546 h = b''
2546 h = b''
2547 compression_mode = COMP_MODE_PLAIN
2547 compression_mode = COMP_MODE_PLAIN
2548 deltainfo = deltautil.drop_u_compression(deltainfo)
2548 deltainfo = deltautil.drop_u_compression(deltainfo)
2549
2549
2550 sidedata_compression_mode = COMP_MODE_INLINE
2550 sidedata_compression_mode = COMP_MODE_INLINE
2551 if sidedata and self.hassidedata:
2551 if sidedata and self.hassidedata:
2552 sidedata_compression_mode = COMP_MODE_PLAIN
2552 sidedata_compression_mode = COMP_MODE_PLAIN
2553 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2553 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2554 sidedata_offset = offset + deltainfo.deltalen
2554 sidedata_offset = offset + deltainfo.deltalen
2555 h, comp_sidedata = self.compress(serialized_sidedata)
2555 h, comp_sidedata = self.compress(serialized_sidedata)
2556 if (
2556 if (
2557 h != b'u'
2557 h != b'u'
2558 and comp_sidedata[0:1] != b'\0'
2558 and comp_sidedata[0:1] != b'\0'
2559 and len(comp_sidedata) < len(serialized_sidedata)
2559 and len(comp_sidedata) < len(serialized_sidedata)
2560 ):
2560 ):
2561 assert not h
2561 assert not h
2562 if (
2562 if (
2563 comp_sidedata[0:1]
2563 comp_sidedata[0:1]
2564 == self._docket.default_compression_header
2564 == self._docket.default_compression_header
2565 ):
2565 ):
2566 sidedata_compression_mode = COMP_MODE_DEFAULT
2566 sidedata_compression_mode = COMP_MODE_DEFAULT
2567 serialized_sidedata = comp_sidedata
2567 serialized_sidedata = comp_sidedata
2568 else:
2568 else:
2569 sidedata_compression_mode = COMP_MODE_INLINE
2569 sidedata_compression_mode = COMP_MODE_INLINE
2570 serialized_sidedata = comp_sidedata
2570 serialized_sidedata = comp_sidedata
2571 else:
2571 else:
2572 serialized_sidedata = b""
2572 serialized_sidedata = b""
2573 # Don't store the offset if the sidedata is empty, that way
2573 # Don't store the offset if the sidedata is empty, that way
2574 # we can easily detect empty sidedata and they will be no different
2574 # we can easily detect empty sidedata and they will be no different
2575 # than ones we manually add.
2575 # than ones we manually add.
2576 sidedata_offset = 0
2576 sidedata_offset = 0
2577
2577
2578 e = (
2578 e = (
2579 offset_type(offset, flags),
2579 offset_type(offset, flags),
2580 deltainfo.deltalen,
2580 deltainfo.deltalen,
2581 textlen,
2581 textlen,
2582 deltainfo.base,
2582 deltainfo.base,
2583 link,
2583 link,
2584 p1r,
2584 p1r,
2585 p2r,
2585 p2r,
2586 node,
2586 node,
2587 sidedata_offset,
2587 sidedata_offset,
2588 len(serialized_sidedata),
2588 len(serialized_sidedata),
2589 compression_mode,
2589 compression_mode,
2590 sidedata_compression_mode,
2590 sidedata_compression_mode,
2591 )
2591 )
2592
2592
2593 self.index.append(e)
2593 self.index.append(e)
2594 entry = self.index.entry_binary(curr)
2594 entry = self.index.entry_binary(curr)
2595 if curr == 0 and self._docket is None:
2595 if curr == 0 and self._docket is None:
2596 header = self._format_flags | self._format_version
2596 header = self._format_flags | self._format_version
2597 header = self.index.pack_header(header)
2597 header = self.index.pack_header(header)
2598 entry = header + entry
2598 entry = header + entry
2599 self._writeentry(
2599 self._writeentry(
2600 transaction,
2600 transaction,
2601 entry,
2601 entry,
2602 deltainfo.data,
2602 deltainfo.data,
2603 link,
2603 link,
2604 offset,
2604 offset,
2605 serialized_sidedata,
2605 serialized_sidedata,
2606 )
2606 )
2607
2607
2608 rawtext = btext[0]
2608 rawtext = btext[0]
2609
2609
2610 if alwayscache and rawtext is None:
2610 if alwayscache and rawtext is None:
2611 rawtext = deltacomputer.buildtext(revinfo, fh)
2611 rawtext = deltacomputer.buildtext(revinfo, fh)
2612
2612
2613 if type(rawtext) == bytes: # only accept immutable objects
2613 if type(rawtext) == bytes: # only accept immutable objects
2614 self._revisioncache = (node, curr, rawtext)
2614 self._revisioncache = (node, curr, rawtext)
2615 self._chainbasecache[curr] = deltainfo.chainbase
2615 self._chainbasecache[curr] = deltainfo.chainbase
2616 return curr
2616 return curr
2617
2617
2618 def _get_data_offset(self, prev):
2618 def _get_data_offset(self, prev):
2619 """Returns the current offset in the (in-transaction) data file.
2619 """Returns the current offset in the (in-transaction) data file.
2620 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2620 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2621 file to store that information: since sidedata can be rewritten to the
2621 file to store that information: since sidedata can be rewritten to the
2622 end of the data file within a transaction, you can have cases where, for
2622 end of the data file within a transaction, you can have cases where, for
2623 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2623 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2624 to `n - 1`'s sidedata being written after `n`'s data.
2624 to `n - 1`'s sidedata being written after `n`'s data.
2625
2625
2626 TODO cache this in a docket file before getting out of experimental."""
2626 TODO cache this in a docket file before getting out of experimental."""
2627 if self._docket is None:
2627 if self._docket is None:
2628 return self.end(prev)
2628 return self.end(prev)
2629 else:
2629 else:
2630 return self._docket.data_end
2630 return self._docket.data_end
2631
2631
2632 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2632 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2633 # Files opened in a+ mode have inconsistent behavior on various
2633 # Files opened in a+ mode have inconsistent behavior on various
2634 # platforms. Windows requires that a file positioning call be made
2634 # platforms. Windows requires that a file positioning call be made
2635 # when the file handle transitions between reads and writes. See
2635 # when the file handle transitions between reads and writes. See
2636 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2636 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2637 # platforms, Python or the platform itself can be buggy. Some versions
2637 # platforms, Python or the platform itself can be buggy. Some versions
2638 # of Solaris have been observed to not append at the end of the file
2638 # of Solaris have been observed to not append at the end of the file
2639 # if the file was seeked to before the end. See issue4943 for more.
2639 # if the file was seeked to before the end. See issue4943 for more.
2640 #
2640 #
2641 # We work around this issue by inserting a seek() before writing.
2641 # We work around this issue by inserting a seek() before writing.
2642 # Note: This is likely not necessary on Python 3. However, because
2642 # Note: This is likely not necessary on Python 3. However, because
2643 # the file handle is reused for reads and may be seeked there, we need
2643 # the file handle is reused for reads and may be seeked there, we need
2644 # to be careful before changing this.
2644 # to be careful before changing this.
2645 if self._writinghandles is None:
2645 if self._writinghandles is None:
2646 msg = b'adding revision outside `revlog._writing` context'
2646 msg = b'adding revision outside `revlog._writing` context'
2647 raise error.ProgrammingError(msg)
2647 raise error.ProgrammingError(msg)
2648 ifh, dfh = self._writinghandles
2648 ifh, dfh = self._writinghandles
2649 if self._docket is None:
2649 if self._docket is None:
2650 ifh.seek(0, os.SEEK_END)
2650 ifh.seek(0, os.SEEK_END)
2651 else:
2651 else:
2652 ifh.seek(self._docket.index_end, os.SEEK_SET)
2652 ifh.seek(self._docket.index_end, os.SEEK_SET)
2653 if dfh:
2653 if dfh:
2654 if self._docket is None:
2654 if self._docket is None:
2655 dfh.seek(0, os.SEEK_END)
2655 dfh.seek(0, os.SEEK_END)
2656 else:
2656 else:
2657 dfh.seek(self._docket.data_end, os.SEEK_SET)
2657 dfh.seek(self._docket.data_end, os.SEEK_SET)
2658
2658
2659 curr = len(self) - 1
2659 curr = len(self) - 1
2660 if not self._inline:
2660 if not self._inline:
2661 transaction.add(self._datafile, offset)
2661 transaction.add(self._datafile, offset)
2662 transaction.add(self._indexfile, curr * len(entry))
2662 transaction.add(self._indexfile, curr * len(entry))
2663 if data[0]:
2663 if data[0]:
2664 dfh.write(data[0])
2664 dfh.write(data[0])
2665 dfh.write(data[1])
2665 dfh.write(data[1])
2666 if sidedata:
2666 if sidedata:
2667 dfh.write(sidedata)
2667 dfh.write(sidedata)
2668 ifh.write(entry)
2668 ifh.write(entry)
2669 else:
2669 else:
2670 offset += curr * self.index.entry_size
2670 offset += curr * self.index.entry_size
2671 transaction.add(self._indexfile, offset)
2671 transaction.add(self._indexfile, offset)
2672 ifh.write(entry)
2672 ifh.write(entry)
2673 ifh.write(data[0])
2673 ifh.write(data[0])
2674 ifh.write(data[1])
2674 ifh.write(data[1])
2675 if sidedata:
2675 if sidedata:
2676 ifh.write(sidedata)
2676 ifh.write(sidedata)
2677 self._enforceinlinesize(transaction)
2677 self._enforceinlinesize(transaction)
2678 if self._docket is not None:
2678 if self._docket is not None:
2679 self._docket.index_end = self._writinghandles[0].tell()
2679 self._docket.index_end = self._writinghandles[0].tell()
2680 self._docket.data_end = self._writinghandles[1].tell()
2680 self._docket.data_end = self._writinghandles[1].tell()
2681
2681
2682 nodemaputil.setup_persistent_nodemap(transaction, self)
2682 nodemaputil.setup_persistent_nodemap(transaction, self)
2683
2683
2684 def addgroup(
2684 def addgroup(
2685 self,
2685 self,
2686 deltas,
2686 deltas,
2687 linkmapper,
2687 linkmapper,
2688 transaction,
2688 transaction,
2689 alwayscache=False,
2689 alwayscache=False,
2690 addrevisioncb=None,
2690 addrevisioncb=None,
2691 duplicaterevisioncb=None,
2691 duplicaterevisioncb=None,
2692 ):
2692 ):
2693 """
2693 """
2694 add a delta group
2694 add a delta group
2695
2695
2696 given a set of deltas, add them to the revision log. the
2696 given a set of deltas, add them to the revision log. the
2697 first delta is against its parent, which should be in our
2697 first delta is against its parent, which should be in our
2698 log, the rest are against the previous delta.
2698 log, the rest are against the previous delta.
2699
2699
2700 If ``addrevisioncb`` is defined, it will be called with arguments of
2700 If ``addrevisioncb`` is defined, it will be called with arguments of
2701 this revlog and the node that was added.
2701 this revlog and the node that was added.
2702 """
2702 """
2703
2703
2704 if self._adding_group:
2704 if self._adding_group:
2705 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2705 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2706
2706
2707 self._adding_group = True
2707 self._adding_group = True
2708 empty = True
2708 empty = True
2709 try:
2709 try:
2710 with self._writing(transaction):
2710 with self._writing(transaction):
2711 deltacomputer = deltautil.deltacomputer(self)
2711 deltacomputer = deltautil.deltacomputer(self)
2712 # loop through our set of deltas
2712 # loop through our set of deltas
2713 for data in deltas:
2713 for data in deltas:
2714 (
2714 (
2715 node,
2715 node,
2716 p1,
2716 p1,
2717 p2,
2717 p2,
2718 linknode,
2718 linknode,
2719 deltabase,
2719 deltabase,
2720 delta,
2720 delta,
2721 flags,
2721 flags,
2722 sidedata,
2722 sidedata,
2723 ) = data
2723 ) = data
2724 link = linkmapper(linknode)
2724 link = linkmapper(linknode)
2725 flags = flags or REVIDX_DEFAULT_FLAGS
2725 flags = flags or REVIDX_DEFAULT_FLAGS
2726
2726
2727 rev = self.index.get_rev(node)
2727 rev = self.index.get_rev(node)
2728 if rev is not None:
2728 if rev is not None:
2729 # this can happen if two branches make the same change
2729 # this can happen if two branches make the same change
2730 self._nodeduplicatecallback(transaction, rev)
2730 self._nodeduplicatecallback(transaction, rev)
2731 if duplicaterevisioncb:
2731 if duplicaterevisioncb:
2732 duplicaterevisioncb(self, rev)
2732 duplicaterevisioncb(self, rev)
2733 empty = False
2733 empty = False
2734 continue
2734 continue
2735
2735
2736 for p in (p1, p2):
2736 for p in (p1, p2):
2737 if not self.index.has_node(p):
2737 if not self.index.has_node(p):
2738 raise error.LookupError(
2738 raise error.LookupError(
2739 p, self.radix, _(b'unknown parent')
2739 p, self.radix, _(b'unknown parent')
2740 )
2740 )
2741
2741
2742 if not self.index.has_node(deltabase):
2742 if not self.index.has_node(deltabase):
2743 raise error.LookupError(
2743 raise error.LookupError(
2744 deltabase, self.display_id, _(b'unknown delta base')
2744 deltabase, self.display_id, _(b'unknown delta base')
2745 )
2745 )
2746
2746
2747 baserev = self.rev(deltabase)
2747 baserev = self.rev(deltabase)
2748
2748
2749 if baserev != nullrev and self.iscensored(baserev):
2749 if baserev != nullrev and self.iscensored(baserev):
2750 # if base is censored, delta must be full replacement in a
2750 # if base is censored, delta must be full replacement in a
2751 # single patch operation
2751 # single patch operation
2752 hlen = struct.calcsize(b">lll")
2752 hlen = struct.calcsize(b">lll")
2753 oldlen = self.rawsize(baserev)
2753 oldlen = self.rawsize(baserev)
2754 newlen = len(delta) - hlen
2754 newlen = len(delta) - hlen
2755 if delta[:hlen] != mdiff.replacediffheader(
2755 if delta[:hlen] != mdiff.replacediffheader(
2756 oldlen, newlen
2756 oldlen, newlen
2757 ):
2757 ):
2758 raise error.CensoredBaseError(
2758 raise error.CensoredBaseError(
2759 self.display_id, self.node(baserev)
2759 self.display_id, self.node(baserev)
2760 )
2760 )
2761
2761
2762 if not flags and self._peek_iscensored(baserev, delta):
2762 if not flags and self._peek_iscensored(baserev, delta):
2763 flags |= REVIDX_ISCENSORED
2763 flags |= REVIDX_ISCENSORED
2764
2764
2765 # We assume consumers of addrevisioncb will want to retrieve
2765 # We assume consumers of addrevisioncb will want to retrieve
2766 # the added revision, which will require a call to
2766 # the added revision, which will require a call to
2767 # revision(). revision() will fast path if there is a cache
2767 # revision(). revision() will fast path if there is a cache
2768 # hit. So, we tell _addrevision() to always cache in this case.
2768 # hit. So, we tell _addrevision() to always cache in this case.
2769 # We're only using addgroup() in the context of changegroup
2769 # We're only using addgroup() in the context of changegroup
2770 # generation so the revision data can always be handled as raw
2770 # generation so the revision data can always be handled as raw
2771 # by the flagprocessor.
2771 # by the flagprocessor.
2772 rev = self._addrevision(
2772 rev = self._addrevision(
2773 node,
2773 node,
2774 None,
2774 None,
2775 transaction,
2775 transaction,
2776 link,
2776 link,
2777 p1,
2777 p1,
2778 p2,
2778 p2,
2779 flags,
2779 flags,
2780 (baserev, delta),
2780 (baserev, delta),
2781 alwayscache=alwayscache,
2781 alwayscache=alwayscache,
2782 deltacomputer=deltacomputer,
2782 deltacomputer=deltacomputer,
2783 sidedata=sidedata,
2783 sidedata=sidedata,
2784 )
2784 )
2785
2785
2786 if addrevisioncb:
2786 if addrevisioncb:
2787 addrevisioncb(self, rev)
2787 addrevisioncb(self, rev)
2788 empty = False
2788 empty = False
2789 finally:
2789 finally:
2790 self._adding_group = False
2790 self._adding_group = False
2791 return not empty
2791 return not empty
2792
2792
2793 def iscensored(self, rev):
2793 def iscensored(self, rev):
2794 """Check if a file revision is censored."""
2794 """Check if a file revision is censored."""
2795 if not self._censorable:
2795 if not self._censorable:
2796 return False
2796 return False
2797
2797
2798 return self.flags(rev) & REVIDX_ISCENSORED
2798 return self.flags(rev) & REVIDX_ISCENSORED
2799
2799
2800 def _peek_iscensored(self, baserev, delta):
2800 def _peek_iscensored(self, baserev, delta):
2801 """Quickly check if a delta produces a censored revision."""
2801 """Quickly check if a delta produces a censored revision."""
2802 if not self._censorable:
2802 if not self._censorable:
2803 return False
2803 return False
2804
2804
2805 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2805 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2806
2806
2807 def getstrippoint(self, minlink):
2807 def getstrippoint(self, minlink):
2808 """find the minimum rev that must be stripped to strip the linkrev
2808 """find the minimum rev that must be stripped to strip the linkrev
2809
2809
2810 Returns a tuple containing the minimum rev and a set of all revs that
2810 Returns a tuple containing the minimum rev and a set of all revs that
2811 have linkrevs that will be broken by this strip.
2811 have linkrevs that will be broken by this strip.
2812 """
2812 """
2813 return storageutil.resolvestripinfo(
2813 return storageutil.resolvestripinfo(
2814 minlink,
2814 minlink,
2815 len(self) - 1,
2815 len(self) - 1,
2816 self.headrevs(),
2816 self.headrevs(),
2817 self.linkrev,
2817 self.linkrev,
2818 self.parentrevs,
2818 self.parentrevs,
2819 )
2819 )
2820
2820
2821 def strip(self, minlink, transaction):
2821 def strip(self, minlink, transaction):
2822 """truncate the revlog on the first revision with a linkrev >= minlink
2822 """truncate the revlog on the first revision with a linkrev >= minlink
2823
2823
2824 This function is called when we're stripping revision minlink and
2824 This function is called when we're stripping revision minlink and
2825 its descendants from the repository.
2825 its descendants from the repository.
2826
2826
2827 We have to remove all revisions with linkrev >= minlink, because
2827 We have to remove all revisions with linkrev >= minlink, because
2828 the equivalent changelog revisions will be renumbered after the
2828 the equivalent changelog revisions will be renumbered after the
2829 strip.
2829 strip.
2830
2830
2831 So we truncate the revlog on the first of these revisions, and
2831 So we truncate the revlog on the first of these revisions, and
2832 trust that the caller has saved the revisions that shouldn't be
2832 trust that the caller has saved the revisions that shouldn't be
2833 removed and that it'll re-add them after this truncation.
2833 removed and that it'll re-add them after this truncation.
2834 """
2834 """
2835 if len(self) == 0:
2835 if len(self) == 0:
2836 return
2836 return
2837
2837
2838 rev, _ = self.getstrippoint(minlink)
2838 rev, _ = self.getstrippoint(minlink)
2839 if rev == len(self):
2839 if rev == len(self):
2840 return
2840 return
2841
2841
2842 # first truncate the files on disk
2842 # first truncate the files on disk
2843 data_end = self.start(rev)
2843 data_end = self.start(rev)
2844 if not self._inline:
2844 if not self._inline:
2845 transaction.add(self._datafile, data_end)
2845 transaction.add(self._datafile, data_end)
2846 end = rev * self.index.entry_size
2846 end = rev * self.index.entry_size
2847 else:
2847 else:
2848 end = data_end + (rev * self.index.entry_size)
2848 end = data_end + (rev * self.index.entry_size)
2849
2849
2850 transaction.add(self._indexfile, end)
2850 transaction.add(self._indexfile, end)
2851 if self._docket is not None:
2851 if self._docket is not None:
2852 # XXX we could, leverage the docket while stripping. However it is
2852 # XXX we could, leverage the docket while stripping. However it is
2853 # not powerfull enough at the time of this comment
2853 # not powerfull enough at the time of this comment
2854 self._docket.index_end = end
2854 self._docket.index_end = end
2855 self._docket.data_end = data_end
2855 self._docket.data_end = data_end
2856 self._docket.write(transaction, stripping=True)
2856 self._docket.write(transaction, stripping=True)
2857
2857
2858 # then reset internal state in memory to forget those revisions
2858 # then reset internal state in memory to forget those revisions
2859 self._revisioncache = None
2859 self._revisioncache = None
2860 self._chaininfocache = util.lrucachedict(500)
2860 self._chaininfocache = util.lrucachedict(500)
2861 self._chunkclear()
2861 self._chunkclear()
2862
2862
2863 del self.index[rev:-1]
2863 del self.index[rev:-1]
2864
2864
2865 def checksize(self):
2865 def checksize(self):
2866 """Check size of index and data files
2866 """Check size of index and data files
2867
2867
2868 return a (dd, di) tuple.
2868 return a (dd, di) tuple.
2869 - dd: extra bytes for the "data" file
2869 - dd: extra bytes for the "data" file
2870 - di: extra bytes for the "index" file
2870 - di: extra bytes for the "index" file
2871
2871
2872 A healthy revlog will return (0, 0).
2872 A healthy revlog will return (0, 0).
2873 """
2873 """
2874 expected = 0
2874 expected = 0
2875 if len(self):
2875 if len(self):
2876 expected = max(0, self.end(len(self) - 1))
2876 expected = max(0, self.end(len(self) - 1))
2877
2877
2878 try:
2878 try:
2879 with self._datafp() as f:
2879 with self._datafp() as f:
2880 f.seek(0, io.SEEK_END)
2880 f.seek(0, io.SEEK_END)
2881 actual = f.tell()
2881 actual = f.tell()
2882 dd = actual - expected
2882 dd = actual - expected
2883 except IOError as inst:
2883 except IOError as inst:
2884 if inst.errno != errno.ENOENT:
2884 if inst.errno != errno.ENOENT:
2885 raise
2885 raise
2886 dd = 0
2886 dd = 0
2887
2887
2888 try:
2888 try:
2889 f = self.opener(self._indexfile)
2889 f = self.opener(self._indexfile)
2890 f.seek(0, io.SEEK_END)
2890 f.seek(0, io.SEEK_END)
2891 actual = f.tell()
2891 actual = f.tell()
2892 f.close()
2892 f.close()
2893 s = self.index.entry_size
2893 s = self.index.entry_size
2894 i = max(0, actual // s)
2894 i = max(0, actual // s)
2895 di = actual - (i * s)
2895 di = actual - (i * s)
2896 if self._inline:
2896 if self._inline:
2897 databytes = 0
2897 databytes = 0
2898 for r in self:
2898 for r in self:
2899 databytes += max(0, self.length(r))
2899 databytes += max(0, self.length(r))
2900 dd = 0
2900 dd = 0
2901 di = actual - len(self) * s - databytes
2901 di = actual - len(self) * s - databytes
2902 except IOError as inst:
2902 except IOError as inst:
2903 if inst.errno != errno.ENOENT:
2903 if inst.errno != errno.ENOENT:
2904 raise
2904 raise
2905 di = 0
2905 di = 0
2906
2906
2907 return (dd, di)
2907 return (dd, di)
2908
2908
2909 def files(self):
2909 def files(self):
2910 res = [self._indexfile]
2910 res = [self._indexfile]
2911 if not self._inline:
2911 if not self._inline:
2912 res.append(self._datafile)
2912 res.append(self._datafile)
2913 return res
2913 return res
2914
2914
2915 def emitrevisions(
2915 def emitrevisions(
2916 self,
2916 self,
2917 nodes,
2917 nodes,
2918 nodesorder=None,
2918 nodesorder=None,
2919 revisiondata=False,
2919 revisiondata=False,
2920 assumehaveparentrevisions=False,
2920 assumehaveparentrevisions=False,
2921 deltamode=repository.CG_DELTAMODE_STD,
2921 deltamode=repository.CG_DELTAMODE_STD,
2922 sidedata_helpers=None,
2922 sidedata_helpers=None,
2923 ):
2923 ):
2924 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2924 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2925 raise error.ProgrammingError(
2925 raise error.ProgrammingError(
2926 b'unhandled value for nodesorder: %s' % nodesorder
2926 b'unhandled value for nodesorder: %s' % nodesorder
2927 )
2927 )
2928
2928
2929 if nodesorder is None and not self._generaldelta:
2929 if nodesorder is None and not self._generaldelta:
2930 nodesorder = b'storage'
2930 nodesorder = b'storage'
2931
2931
2932 if (
2932 if (
2933 not self._storedeltachains
2933 not self._storedeltachains
2934 and deltamode != repository.CG_DELTAMODE_PREV
2934 and deltamode != repository.CG_DELTAMODE_PREV
2935 ):
2935 ):
2936 deltamode = repository.CG_DELTAMODE_FULL
2936 deltamode = repository.CG_DELTAMODE_FULL
2937
2937
2938 return storageutil.emitrevisions(
2938 return storageutil.emitrevisions(
2939 self,
2939 self,
2940 nodes,
2940 nodes,
2941 nodesorder,
2941 nodesorder,
2942 revlogrevisiondelta,
2942 revlogrevisiondelta,
2943 deltaparentfn=self.deltaparent,
2943 deltaparentfn=self.deltaparent,
2944 candeltafn=self.candelta,
2944 candeltafn=self.candelta,
2945 rawsizefn=self.rawsize,
2945 rawsizefn=self.rawsize,
2946 revdifffn=self.revdiff,
2946 revdifffn=self.revdiff,
2947 flagsfn=self.flags,
2947 flagsfn=self.flags,
2948 deltamode=deltamode,
2948 deltamode=deltamode,
2949 revisiondata=revisiondata,
2949 revisiondata=revisiondata,
2950 assumehaveparentrevisions=assumehaveparentrevisions,
2950 assumehaveparentrevisions=assumehaveparentrevisions,
2951 sidedata_helpers=sidedata_helpers,
2951 sidedata_helpers=sidedata_helpers,
2952 )
2952 )
2953
2953
2954 DELTAREUSEALWAYS = b'always'
2954 DELTAREUSEALWAYS = b'always'
2955 DELTAREUSESAMEREVS = b'samerevs'
2955 DELTAREUSESAMEREVS = b'samerevs'
2956 DELTAREUSENEVER = b'never'
2956 DELTAREUSENEVER = b'never'
2957
2957
2958 DELTAREUSEFULLADD = b'fulladd'
2958 DELTAREUSEFULLADD = b'fulladd'
2959
2959
2960 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2960 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2961
2961
2962 def clone(
2962 def clone(
2963 self,
2963 self,
2964 tr,
2964 tr,
2965 destrevlog,
2965 destrevlog,
2966 addrevisioncb=None,
2966 addrevisioncb=None,
2967 deltareuse=DELTAREUSESAMEREVS,
2967 deltareuse=DELTAREUSESAMEREVS,
2968 forcedeltabothparents=None,
2968 forcedeltabothparents=None,
2969 sidedata_helpers=None,
2969 sidedata_helpers=None,
2970 ):
2970 ):
2971 """Copy this revlog to another, possibly with format changes.
2971 """Copy this revlog to another, possibly with format changes.
2972
2972
2973 The destination revlog will contain the same revisions and nodes.
2973 The destination revlog will contain the same revisions and nodes.
2974 However, it may not be bit-for-bit identical due to e.g. delta encoding
2974 However, it may not be bit-for-bit identical due to e.g. delta encoding
2975 differences.
2975 differences.
2976
2976
2977 The ``deltareuse`` argument control how deltas from the existing revlog
2977 The ``deltareuse`` argument control how deltas from the existing revlog
2978 are preserved in the destination revlog. The argument can have the
2978 are preserved in the destination revlog. The argument can have the
2979 following values:
2979 following values:
2980
2980
2981 DELTAREUSEALWAYS
2981 DELTAREUSEALWAYS
2982 Deltas will always be reused (if possible), even if the destination
2982 Deltas will always be reused (if possible), even if the destination
2983 revlog would not select the same revisions for the delta. This is the
2983 revlog would not select the same revisions for the delta. This is the
2984 fastest mode of operation.
2984 fastest mode of operation.
2985 DELTAREUSESAMEREVS
2985 DELTAREUSESAMEREVS
2986 Deltas will be reused if the destination revlog would pick the same
2986 Deltas will be reused if the destination revlog would pick the same
2987 revisions for the delta. This mode strikes a balance between speed
2987 revisions for the delta. This mode strikes a balance between speed
2988 and optimization.
2988 and optimization.
2989 DELTAREUSENEVER
2989 DELTAREUSENEVER
2990 Deltas will never be reused. This is the slowest mode of execution.
2990 Deltas will never be reused. This is the slowest mode of execution.
2991 This mode can be used to recompute deltas (e.g. if the diff/delta
2991 This mode can be used to recompute deltas (e.g. if the diff/delta
2992 algorithm changes).
2992 algorithm changes).
2993 DELTAREUSEFULLADD
2993 DELTAREUSEFULLADD
2994 Revision will be re-added as if their were new content. This is
2994 Revision will be re-added as if their were new content. This is
2995 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2995 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2996 eg: large file detection and handling.
2996 eg: large file detection and handling.
2997
2997
2998 Delta computation can be slow, so the choice of delta reuse policy can
2998 Delta computation can be slow, so the choice of delta reuse policy can
2999 significantly affect run time.
2999 significantly affect run time.
3000
3000
3001 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3001 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3002 two extremes. Deltas will be reused if they are appropriate. But if the
3002 two extremes. Deltas will be reused if they are appropriate. But if the
3003 delta could choose a better revision, it will do so. This means if you
3003 delta could choose a better revision, it will do so. This means if you
3004 are converting a non-generaldelta revlog to a generaldelta revlog,
3004 are converting a non-generaldelta revlog to a generaldelta revlog,
3005 deltas will be recomputed if the delta's parent isn't a parent of the
3005 deltas will be recomputed if the delta's parent isn't a parent of the
3006 revision.
3006 revision.
3007
3007
3008 In addition to the delta policy, the ``forcedeltabothparents``
3008 In addition to the delta policy, the ``forcedeltabothparents``
3009 argument controls whether to force compute deltas against both parents
3009 argument controls whether to force compute deltas against both parents
3010 for merges. By default, the current default is used.
3010 for merges. By default, the current default is used.
3011
3011
3012 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3012 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3013 `sidedata_helpers`.
3013 `sidedata_helpers`.
3014 """
3014 """
3015 if deltareuse not in self.DELTAREUSEALL:
3015 if deltareuse not in self.DELTAREUSEALL:
3016 raise ValueError(
3016 raise ValueError(
3017 _(b'value for deltareuse invalid: %s') % deltareuse
3017 _(b'value for deltareuse invalid: %s') % deltareuse
3018 )
3018 )
3019
3019
3020 if len(destrevlog):
3020 if len(destrevlog):
3021 raise ValueError(_(b'destination revlog is not empty'))
3021 raise ValueError(_(b'destination revlog is not empty'))
3022
3022
3023 if getattr(self, 'filteredrevs', None):
3023 if getattr(self, 'filteredrevs', None):
3024 raise ValueError(_(b'source revlog has filtered revisions'))
3024 raise ValueError(_(b'source revlog has filtered revisions'))
3025 if getattr(destrevlog, 'filteredrevs', None):
3025 if getattr(destrevlog, 'filteredrevs', None):
3026 raise ValueError(_(b'destination revlog has filtered revisions'))
3026 raise ValueError(_(b'destination revlog has filtered revisions'))
3027
3027
3028 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3028 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3029 # if possible.
3029 # if possible.
3030 oldlazydelta = destrevlog._lazydelta
3030 oldlazydelta = destrevlog._lazydelta
3031 oldlazydeltabase = destrevlog._lazydeltabase
3031 oldlazydeltabase = destrevlog._lazydeltabase
3032 oldamd = destrevlog._deltabothparents
3032 oldamd = destrevlog._deltabothparents
3033
3033
3034 try:
3034 try:
3035 if deltareuse == self.DELTAREUSEALWAYS:
3035 if deltareuse == self.DELTAREUSEALWAYS:
3036 destrevlog._lazydeltabase = True
3036 destrevlog._lazydeltabase = True
3037 destrevlog._lazydelta = True
3037 destrevlog._lazydelta = True
3038 elif deltareuse == self.DELTAREUSESAMEREVS:
3038 elif deltareuse == self.DELTAREUSESAMEREVS:
3039 destrevlog._lazydeltabase = False
3039 destrevlog._lazydeltabase = False
3040 destrevlog._lazydelta = True
3040 destrevlog._lazydelta = True
3041 elif deltareuse == self.DELTAREUSENEVER:
3041 elif deltareuse == self.DELTAREUSENEVER:
3042 destrevlog._lazydeltabase = False
3042 destrevlog._lazydeltabase = False
3043 destrevlog._lazydelta = False
3043 destrevlog._lazydelta = False
3044
3044
3045 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3045 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3046
3046
3047 self._clone(
3047 self._clone(
3048 tr,
3048 tr,
3049 destrevlog,
3049 destrevlog,
3050 addrevisioncb,
3050 addrevisioncb,
3051 deltareuse,
3051 deltareuse,
3052 forcedeltabothparents,
3052 forcedeltabothparents,
3053 sidedata_helpers,
3053 sidedata_helpers,
3054 )
3054 )
3055
3055
3056 finally:
3056 finally:
3057 destrevlog._lazydelta = oldlazydelta
3057 destrevlog._lazydelta = oldlazydelta
3058 destrevlog._lazydeltabase = oldlazydeltabase
3058 destrevlog._lazydeltabase = oldlazydeltabase
3059 destrevlog._deltabothparents = oldamd
3059 destrevlog._deltabothparents = oldamd
3060
3060
3061 def _clone(
3061 def _clone(
3062 self,
3062 self,
3063 tr,
3063 tr,
3064 destrevlog,
3064 destrevlog,
3065 addrevisioncb,
3065 addrevisioncb,
3066 deltareuse,
3066 deltareuse,
3067 forcedeltabothparents,
3067 forcedeltabothparents,
3068 sidedata_helpers,
3068 sidedata_helpers,
3069 ):
3069 ):
3070 """perform the core duty of `revlog.clone` after parameter processing"""
3070 """perform the core duty of `revlog.clone` after parameter processing"""
3071 deltacomputer = deltautil.deltacomputer(destrevlog)
3071 deltacomputer = deltautil.deltacomputer(destrevlog)
3072 index = self.index
3072 index = self.index
3073 for rev in self:
3073 for rev in self:
3074 entry = index[rev]
3074 entry = index[rev]
3075
3075
3076 # Some classes override linkrev to take filtered revs into
3076 # Some classes override linkrev to take filtered revs into
3077 # account. Use raw entry from index.
3077 # account. Use raw entry from index.
3078 flags = entry[0] & 0xFFFF
3078 flags = entry[0] & 0xFFFF
3079 linkrev = entry[4]
3079 linkrev = entry[4]
3080 p1 = index[entry[5]][7]
3080 p1 = index[entry[5]][7]
3081 p2 = index[entry[6]][7]
3081 p2 = index[entry[6]][7]
3082 node = entry[7]
3082 node = entry[7]
3083
3083
3084 # (Possibly) reuse the delta from the revlog if allowed and
3084 # (Possibly) reuse the delta from the revlog if allowed and
3085 # the revlog chunk is a delta.
3085 # the revlog chunk is a delta.
3086 cachedelta = None
3086 cachedelta = None
3087 rawtext = None
3087 rawtext = None
3088 if deltareuse == self.DELTAREUSEFULLADD:
3088 if deltareuse == self.DELTAREUSEFULLADD:
3089 text, sidedata = self._revisiondata(rev)
3089 text, sidedata = self._revisiondata(rev)
3090
3090
3091 if sidedata_helpers is not None:
3091 if sidedata_helpers is not None:
3092 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3092 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3093 self, sidedata_helpers, sidedata, rev
3093 self, sidedata_helpers, sidedata, rev
3094 )
3094 )
3095 flags = flags | new_flags[0] & ~new_flags[1]
3095 flags = flags | new_flags[0] & ~new_flags[1]
3096
3096
3097 destrevlog.addrevision(
3097 destrevlog.addrevision(
3098 text,
3098 text,
3099 tr,
3099 tr,
3100 linkrev,
3100 linkrev,
3101 p1,
3101 p1,
3102 p2,
3102 p2,
3103 cachedelta=cachedelta,
3103 cachedelta=cachedelta,
3104 node=node,
3104 node=node,
3105 flags=flags,
3105 flags=flags,
3106 deltacomputer=deltacomputer,
3106 deltacomputer=deltacomputer,
3107 sidedata=sidedata,
3107 sidedata=sidedata,
3108 )
3108 )
3109 else:
3109 else:
3110 if destrevlog._lazydelta:
3110 if destrevlog._lazydelta:
3111 dp = self.deltaparent(rev)
3111 dp = self.deltaparent(rev)
3112 if dp != nullrev:
3112 if dp != nullrev:
3113 cachedelta = (dp, bytes(self._chunk(rev)))
3113 cachedelta = (dp, bytes(self._chunk(rev)))
3114
3114
3115 sidedata = None
3115 sidedata = None
3116 if not cachedelta:
3116 if not cachedelta:
3117 rawtext, sidedata = self._revisiondata(rev)
3117 rawtext, sidedata = self._revisiondata(rev)
3118 if sidedata is None:
3118 if sidedata is None:
3119 sidedata = self.sidedata(rev)
3119 sidedata = self.sidedata(rev)
3120
3120
3121 if sidedata_helpers is not None:
3121 if sidedata_helpers is not None:
3122 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3122 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3123 self, sidedata_helpers, sidedata, rev
3123 self, sidedata_helpers, sidedata, rev
3124 )
3124 )
3125 flags = flags | new_flags[0] & ~new_flags[1]
3125 flags = flags | new_flags[0] & ~new_flags[1]
3126
3126
3127 with destrevlog._writing(tr):
3127 with destrevlog._writing(tr):
3128 destrevlog._addrevision(
3128 destrevlog._addrevision(
3129 node,
3129 node,
3130 rawtext,
3130 rawtext,
3131 tr,
3131 tr,
3132 linkrev,
3132 linkrev,
3133 p1,
3133 p1,
3134 p2,
3134 p2,
3135 flags,
3135 flags,
3136 cachedelta,
3136 cachedelta,
3137 deltacomputer=deltacomputer,
3137 deltacomputer=deltacomputer,
3138 sidedata=sidedata,
3138 sidedata=sidedata,
3139 )
3139 )
3140
3140
3141 if addrevisioncb:
3141 if addrevisioncb:
3142 addrevisioncb(self, rev, node)
3142 addrevisioncb(self, rev, node)
3143
3143
3144 def censorrevision(self, tr, censornode, tombstone=b''):
3144 def censorrevision(self, tr, censornode, tombstone=b''):
3145 if self._format_version == REVLOGV0:
3145 if self._format_version == REVLOGV0:
3146 raise error.RevlogError(
3146 raise error.RevlogError(
3147 _(b'cannot censor with version %d revlogs')
3147 _(b'cannot censor with version %d revlogs')
3148 % self._format_version
3148 % self._format_version
3149 )
3149 )
3150
3150
3151 censorrev = self.rev(censornode)
3151 censorrev = self.rev(censornode)
3152 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3152 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3153
3153
3154 if len(tombstone) > self.rawsize(censorrev):
3154 if len(tombstone) > self.rawsize(censorrev):
3155 raise error.Abort(
3155 raise error.Abort(
3156 _(b'censor tombstone must be no longer than censored data')
3156 _(b'censor tombstone must be no longer than censored data')
3157 )
3157 )
3158
3158
3159 # Rewriting the revlog in place is hard. Our strategy for censoring is
3159 # Rewriting the revlog in place is hard. Our strategy for censoring is
3160 # to create a new revlog, copy all revisions to it, then replace the
3160 # to create a new revlog, copy all revisions to it, then replace the
3161 # revlogs on transaction close.
3161 # revlogs on transaction close.
3162 #
3162 #
3163 # This is a bit dangerous. We could easily have a mismatch of state.
3163 # This is a bit dangerous. We could easily have a mismatch of state.
3164 newrl = revlog(
3164 newrl = revlog(
3165 self.opener,
3165 self.opener,
3166 target=self.target,
3166 target=self.target,
3167 radix=self.radix,
3167 radix=self.radix,
3168 postfix=b'tmpcensored',
3168 postfix=b'tmpcensored',
3169 censorable=True,
3169 censorable=True,
3170 )
3170 )
3171 newrl._format_version = self._format_version
3171 newrl._format_version = self._format_version
3172 newrl._format_flags = self._format_flags
3172 newrl._format_flags = self._format_flags
3173 newrl._generaldelta = self._generaldelta
3173 newrl._generaldelta = self._generaldelta
3174 newrl._parse_index = self._parse_index
3174 newrl._parse_index = self._parse_index
3175
3175
3176 for rev in self.revs():
3176 for rev in self.revs():
3177 node = self.node(rev)
3177 node = self.node(rev)
3178 p1, p2 = self.parents(node)
3178 p1, p2 = self.parents(node)
3179
3179
3180 if rev == censorrev:
3180 if rev == censorrev:
3181 newrl.addrawrevision(
3181 newrl.addrawrevision(
3182 tombstone,
3182 tombstone,
3183 tr,
3183 tr,
3184 self.linkrev(censorrev),
3184 self.linkrev(censorrev),
3185 p1,
3185 p1,
3186 p2,
3186 p2,
3187 censornode,
3187 censornode,
3188 REVIDX_ISCENSORED,
3188 REVIDX_ISCENSORED,
3189 )
3189 )
3190
3190
3191 if newrl.deltaparent(rev) != nullrev:
3191 if newrl.deltaparent(rev) != nullrev:
3192 raise error.Abort(
3192 raise error.Abort(
3193 _(
3193 _(
3194 b'censored revision stored as delta; '
3194 b'censored revision stored as delta; '
3195 b'cannot censor'
3195 b'cannot censor'
3196 ),
3196 ),
3197 hint=_(
3197 hint=_(
3198 b'censoring of revlogs is not '
3198 b'censoring of revlogs is not '
3199 b'fully implemented; please report '
3199 b'fully implemented; please report '
3200 b'this bug'
3200 b'this bug'
3201 ),
3201 ),
3202 )
3202 )
3203 continue
3203 continue
3204
3204
3205 if self.iscensored(rev):
3205 if self.iscensored(rev):
3206 if self.deltaparent(rev) != nullrev:
3206 if self.deltaparent(rev) != nullrev:
3207 raise error.Abort(
3207 raise error.Abort(
3208 _(
3208 _(
3209 b'cannot censor due to censored '
3209 b'cannot censor due to censored '
3210 b'revision having delta stored'
3210 b'revision having delta stored'
3211 )
3211 )
3212 )
3212 )
3213 rawtext = self._chunk(rev)
3213 rawtext = self._chunk(rev)
3214 else:
3214 else:
3215 rawtext = self.rawdata(rev)
3215 rawtext = self.rawdata(rev)
3216
3216
3217 newrl.addrawrevision(
3217 newrl.addrawrevision(
3218 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3218 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3219 )
3219 )
3220
3220
3221 tr.addbackup(self._indexfile, location=b'store')
3221 tr.addbackup(self._indexfile, location=b'store')
3222 if not self._inline:
3222 if not self._inline:
3223 tr.addbackup(self._datafile, location=b'store')
3223 tr.addbackup(self._datafile, location=b'store')
3224
3224
3225 self.opener.rename(newrl._indexfile, self._indexfile)
3225 self.opener.rename(newrl._indexfile, self._indexfile)
3226 if not self._inline:
3226 if not self._inline:
3227 self.opener.rename(newrl._datafile, self._datafile)
3227 self.opener.rename(newrl._datafile, self._datafile)
3228
3228
3229 self.clearcaches()
3229 self.clearcaches()
3230 self._loadindex()
3230 self._loadindex()
3231
3231
3232 def verifyintegrity(self, state):
3232 def verifyintegrity(self, state):
3233 """Verifies the integrity of the revlog.
3233 """Verifies the integrity of the revlog.
3234
3234
3235 Yields ``revlogproblem`` instances describing problems that are
3235 Yields ``revlogproblem`` instances describing problems that are
3236 found.
3236 found.
3237 """
3237 """
3238 dd, di = self.checksize()
3238 dd, di = self.checksize()
3239 if dd:
3239 if dd:
3240 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3240 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3241 if di:
3241 if di:
3242 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3242 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3243
3243
3244 version = self._format_version
3244 version = self._format_version
3245
3245
3246 # The verifier tells us what version revlog we should be.
3246 # The verifier tells us what version revlog we should be.
3247 if version != state[b'expectedversion']:
3247 if version != state[b'expectedversion']:
3248 yield revlogproblem(
3248 yield revlogproblem(
3249 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3249 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3250 % (self.display_id, version, state[b'expectedversion'])
3250 % (self.display_id, version, state[b'expectedversion'])
3251 )
3251 )
3252
3252
3253 state[b'skipread'] = set()
3253 state[b'skipread'] = set()
3254 state[b'safe_renamed'] = set()
3254 state[b'safe_renamed'] = set()
3255
3255
3256 for rev in self:
3256 for rev in self:
3257 node = self.node(rev)
3257 node = self.node(rev)
3258
3258
3259 # Verify contents. 4 cases to care about:
3259 # Verify contents. 4 cases to care about:
3260 #
3260 #
3261 # common: the most common case
3261 # common: the most common case
3262 # rename: with a rename
3262 # rename: with a rename
3263 # meta: file content starts with b'\1\n', the metadata
3263 # meta: file content starts with b'\1\n', the metadata
3264 # header defined in filelog.py, but without a rename
3264 # header defined in filelog.py, but without a rename
3265 # ext: content stored externally
3265 # ext: content stored externally
3266 #
3266 #
3267 # More formally, their differences are shown below:
3267 # More formally, their differences are shown below:
3268 #
3268 #
3269 # | common | rename | meta | ext
3269 # | common | rename | meta | ext
3270 # -------------------------------------------------------
3270 # -------------------------------------------------------
3271 # flags() | 0 | 0 | 0 | not 0
3271 # flags() | 0 | 0 | 0 | not 0
3272 # renamed() | False | True | False | ?
3272 # renamed() | False | True | False | ?
3273 # rawtext[0:2]=='\1\n'| False | True | True | ?
3273 # rawtext[0:2]=='\1\n'| False | True | True | ?
3274 #
3274 #
3275 # "rawtext" means the raw text stored in revlog data, which
3275 # "rawtext" means the raw text stored in revlog data, which
3276 # could be retrieved by "rawdata(rev)". "text"
3276 # could be retrieved by "rawdata(rev)". "text"
3277 # mentioned below is "revision(rev)".
3277 # mentioned below is "revision(rev)".
3278 #
3278 #
3279 # There are 3 different lengths stored physically:
3279 # There are 3 different lengths stored physically:
3280 # 1. L1: rawsize, stored in revlog index
3280 # 1. L1: rawsize, stored in revlog index
3281 # 2. L2: len(rawtext), stored in revlog data
3281 # 2. L2: len(rawtext), stored in revlog data
3282 # 3. L3: len(text), stored in revlog data if flags==0, or
3282 # 3. L3: len(text), stored in revlog data if flags==0, or
3283 # possibly somewhere else if flags!=0
3283 # possibly somewhere else if flags!=0
3284 #
3284 #
3285 # L1 should be equal to L2. L3 could be different from them.
3285 # L1 should be equal to L2. L3 could be different from them.
3286 # "text" may or may not affect commit hash depending on flag
3286 # "text" may or may not affect commit hash depending on flag
3287 # processors (see flagutil.addflagprocessor).
3287 # processors (see flagutil.addflagprocessor).
3288 #
3288 #
3289 # | common | rename | meta | ext
3289 # | common | rename | meta | ext
3290 # -------------------------------------------------
3290 # -------------------------------------------------
3291 # rawsize() | L1 | L1 | L1 | L1
3291 # rawsize() | L1 | L1 | L1 | L1
3292 # size() | L1 | L2-LM | L1(*) | L1 (?)
3292 # size() | L1 | L2-LM | L1(*) | L1 (?)
3293 # len(rawtext) | L2 | L2 | L2 | L2
3293 # len(rawtext) | L2 | L2 | L2 | L2
3294 # len(text) | L2 | L2 | L2 | L3
3294 # len(text) | L2 | L2 | L2 | L3
3295 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3295 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3296 #
3296 #
3297 # LM: length of metadata, depending on rawtext
3297 # LM: length of metadata, depending on rawtext
3298 # (*): not ideal, see comment in filelog.size
3298 # (*): not ideal, see comment in filelog.size
3299 # (?): could be "- len(meta)" if the resolved content has
3299 # (?): could be "- len(meta)" if the resolved content has
3300 # rename metadata
3300 # rename metadata
3301 #
3301 #
3302 # Checks needed to be done:
3302 # Checks needed to be done:
3303 # 1. length check: L1 == L2, in all cases.
3303 # 1. length check: L1 == L2, in all cases.
3304 # 2. hash check: depending on flag processor, we may need to
3304 # 2. hash check: depending on flag processor, we may need to
3305 # use either "text" (external), or "rawtext" (in revlog).
3305 # use either "text" (external), or "rawtext" (in revlog).
3306
3306
3307 try:
3307 try:
3308 skipflags = state.get(b'skipflags', 0)
3308 skipflags = state.get(b'skipflags', 0)
3309 if skipflags:
3309 if skipflags:
3310 skipflags &= self.flags(rev)
3310 skipflags &= self.flags(rev)
3311
3311
3312 _verify_revision(self, skipflags, state, node)
3312 _verify_revision(self, skipflags, state, node)
3313
3313
3314 l1 = self.rawsize(rev)
3314 l1 = self.rawsize(rev)
3315 l2 = len(self.rawdata(node))
3315 l2 = len(self.rawdata(node))
3316
3316
3317 if l1 != l2:
3317 if l1 != l2:
3318 yield revlogproblem(
3318 yield revlogproblem(
3319 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3319 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3320 node=node,
3320 node=node,
3321 )
3321 )
3322
3322
3323 except error.CensoredNodeError:
3323 except error.CensoredNodeError:
3324 if state[b'erroroncensored']:
3324 if state[b'erroroncensored']:
3325 yield revlogproblem(
3325 yield revlogproblem(
3326 error=_(b'censored file data'), node=node
3326 error=_(b'censored file data'), node=node
3327 )
3327 )
3328 state[b'skipread'].add(node)
3328 state[b'skipread'].add(node)
3329 except Exception as e:
3329 except Exception as e:
3330 yield revlogproblem(
3330 yield revlogproblem(
3331 error=_(b'unpacking %s: %s')
3331 error=_(b'unpacking %s: %s')
3332 % (short(node), stringutil.forcebytestr(e)),
3332 % (short(node), stringutil.forcebytestr(e)),
3333 node=node,
3333 node=node,
3334 )
3334 )
3335 state[b'skipread'].add(node)
3335 state[b'skipread'].add(node)
3336
3336
3337 def storageinfo(
3337 def storageinfo(
3338 self,
3338 self,
3339 exclusivefiles=False,
3339 exclusivefiles=False,
3340 sharedfiles=False,
3340 sharedfiles=False,
3341 revisionscount=False,
3341 revisionscount=False,
3342 trackedsize=False,
3342 trackedsize=False,
3343 storedsize=False,
3343 storedsize=False,
3344 ):
3344 ):
3345 d = {}
3345 d = {}
3346
3346
3347 if exclusivefiles:
3347 if exclusivefiles:
3348 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3348 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3349 if not self._inline:
3349 if not self._inline:
3350 d[b'exclusivefiles'].append((self.opener, self._datafile))
3350 d[b'exclusivefiles'].append((self.opener, self._datafile))
3351
3351
3352 if sharedfiles:
3352 if sharedfiles:
3353 d[b'sharedfiles'] = []
3353 d[b'sharedfiles'] = []
3354
3354
3355 if revisionscount:
3355 if revisionscount:
3356 d[b'revisionscount'] = len(self)
3356 d[b'revisionscount'] = len(self)
3357
3357
3358 if trackedsize:
3358 if trackedsize:
3359 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3359 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3360
3360
3361 if storedsize:
3361 if storedsize:
3362 d[b'storedsize'] = sum(
3362 d[b'storedsize'] = sum(
3363 self.opener.stat(path).st_size for path in self.files()
3363 self.opener.stat(path).st_size for path in self.files()
3364 )
3364 )
3365
3365
3366 return d
3366 return d
3367
3367
3368 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3368 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3369 if not self.hassidedata:
3369 if not self.hassidedata:
3370 return
3370 return
3371 # revlog formats with sidedata support does not support inline
3371 # revlog formats with sidedata support does not support inline
3372 assert not self._inline
3372 assert not self._inline
3373 if not helpers[1] and not helpers[2]:
3373 if not helpers[1] and not helpers[2]:
3374 # Nothing to generate or remove
3374 # Nothing to generate or remove
3375 return
3375 return
3376
3376
3377 new_entries = []
3377 new_entries = []
3378 # append the new sidedata
3378 # append the new sidedata
3379 with self._writing(transaction):
3379 with self._writing(transaction):
3380 ifh, dfh = self._writinghandles
3380 ifh, dfh = self._writinghandles
3381 if self._docket is not None:
3381 if self._docket is not None:
3382 dfh.seek(self._docket.data_end, os.SEEK_SET)
3382 dfh.seek(self._docket.data_end, os.SEEK_SET)
3383 else:
3383 else:
3384 dfh.seek(0, os.SEEK_END)
3384 dfh.seek(0, os.SEEK_END)
3385
3385
3386 current_offset = dfh.tell()
3386 current_offset = dfh.tell()
3387 for rev in range(startrev, endrev + 1):
3387 for rev in range(startrev, endrev + 1):
3388 entry = self.index[rev]
3388 entry = self.index[rev]
3389 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3389 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3390 store=self,
3390 store=self,
3391 sidedata_helpers=helpers,
3391 sidedata_helpers=helpers,
3392 sidedata={},
3392 sidedata={},
3393 rev=rev,
3393 rev=rev,
3394 )
3394 )
3395
3395
3396 serialized_sidedata = sidedatautil.serialize_sidedata(
3396 serialized_sidedata = sidedatautil.serialize_sidedata(
3397 new_sidedata
3397 new_sidedata
3398 )
3398 )
3399
3399
3400 sidedata_compression_mode = COMP_MODE_INLINE
3400 sidedata_compression_mode = COMP_MODE_INLINE
3401 if serialized_sidedata and self.hassidedata:
3401 if serialized_sidedata and self.hassidedata:
3402 sidedata_compression_mode = COMP_MODE_PLAIN
3402 sidedata_compression_mode = COMP_MODE_PLAIN
3403 h, comp_sidedata = self.compress(serialized_sidedata)
3403 h, comp_sidedata = self.compress(serialized_sidedata)
3404 if (
3404 if (
3405 h != b'u'
3405 h != b'u'
3406 and comp_sidedata[0] != b'\0'
3406 and comp_sidedata[0] != b'\0'
3407 and len(comp_sidedata) < len(serialized_sidedata)
3407 and len(comp_sidedata) < len(serialized_sidedata)
3408 ):
3408 ):
3409 assert not h
3409 assert not h
3410 if (
3410 if (
3411 comp_sidedata[0]
3411 comp_sidedata[0]
3412 == self._docket.default_compression_header
3412 == self._docket.default_compression_header
3413 ):
3413 ):
3414 sidedata_compression_mode = COMP_MODE_DEFAULT
3414 sidedata_compression_mode = COMP_MODE_DEFAULT
3415 serialized_sidedata = comp_sidedata
3415 serialized_sidedata = comp_sidedata
3416 else:
3416 else:
3417 sidedata_compression_mode = COMP_MODE_INLINE
3417 sidedata_compression_mode = COMP_MODE_INLINE
3418 serialized_sidedata = comp_sidedata
3418 serialized_sidedata = comp_sidedata
3419 if entry[8] != 0 or entry[9] != 0:
3419 if entry[8] != 0 or entry[9] != 0:
3420 # rewriting entries that already have sidedata is not
3420 # rewriting entries that already have sidedata is not
3421 # supported yet, because it introduces garbage data in the
3421 # supported yet, because it introduces garbage data in the
3422 # revlog.
3422 # revlog.
3423 msg = b"rewriting existing sidedata is not supported yet"
3423 msg = b"rewriting existing sidedata is not supported yet"
3424 raise error.Abort(msg)
3424 raise error.Abort(msg)
3425
3425
3426 # Apply (potential) flags to add and to remove after running
3426 # Apply (potential) flags to add and to remove after running
3427 # the sidedata helpers
3427 # the sidedata helpers
3428 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3428 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3429 entry_update = (
3429 entry_update = (
3430 current_offset,
3430 current_offset,
3431 len(serialized_sidedata),
3431 len(serialized_sidedata),
3432 new_offset_flags,
3432 new_offset_flags,
3433 sidedata_compression_mode,
3433 sidedata_compression_mode,
3434 )
3434 )
3435
3435
3436 # the sidedata computation might have move the file cursors around
3436 # the sidedata computation might have move the file cursors around
3437 dfh.seek(current_offset, os.SEEK_SET)
3437 dfh.seek(current_offset, os.SEEK_SET)
3438 dfh.write(serialized_sidedata)
3438 dfh.write(serialized_sidedata)
3439 new_entries.append(entry_update)
3439 new_entries.append(entry_update)
3440 current_offset += len(serialized_sidedata)
3440 current_offset += len(serialized_sidedata)
3441 if self._docket is not None:
3441 if self._docket is not None:
3442 self._docket.data_end = dfh.tell()
3442 self._docket.data_end = dfh.tell()
3443
3443
3444 # rewrite the new index entries
3444 # rewrite the new index entries
3445 ifh.seek(startrev * self.index.entry_size)
3445 ifh.seek(startrev * self.index.entry_size)
3446 for i, e in enumerate(new_entries):
3446 for i, e in enumerate(new_entries):
3447 rev = startrev + i
3447 rev = startrev + i
3448 self.index.replace_sidedata_info(rev, *e)
3448 self.index.replace_sidedata_info(rev, *e)
3449 packed = self.index.entry_binary(rev)
3449 packed = self.index.entry_binary(rev)
3450 if rev == 0 and self._docket is None:
3450 if rev == 0 and self._docket is None:
3451 header = self._format_flags | self._format_version
3451 header = self._format_flags | self._format_version
3452 header = self.index.pack_header(header)
3452 header = self.index.pack_header(header)
3453 packed = header + packed
3453 packed = header + packed
3454 ifh.write(packed)
3454 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now