##// END OF EJS Templates
revlog: update data file record before index rename...
Joerg Sonnenberger -
r48060:d92310d4 default draft
parent child Browse files
Show More
@@ -1,3454 +1,3461 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 CHANGELOGV2,
38 CHANGELOGV2,
39 COMP_MODE_DEFAULT,
39 COMP_MODE_DEFAULT,
40 COMP_MODE_INLINE,
40 COMP_MODE_INLINE,
41 COMP_MODE_PLAIN,
41 COMP_MODE_PLAIN,
42 FEATURES_BY_VERSION,
42 FEATURES_BY_VERSION,
43 FLAG_GENERALDELTA,
43 FLAG_GENERALDELTA,
44 FLAG_INLINE_DATA,
44 FLAG_INLINE_DATA,
45 INDEX_HEADER,
45 INDEX_HEADER,
46 KIND_CHANGELOG,
46 KIND_CHANGELOG,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 SUPPORTED_FLAGS,
55 SUPPORTED_FLAGS,
56 )
56 )
57 from .revlogutils.flagutil import (
57 from .revlogutils.flagutil import (
58 REVIDX_DEFAULT_FLAGS,
58 REVIDX_DEFAULT_FLAGS,
59 REVIDX_ELLIPSIS,
59 REVIDX_ELLIPSIS,
60 REVIDX_EXTSTORED,
60 REVIDX_EXTSTORED,
61 REVIDX_FLAGS_ORDER,
61 REVIDX_FLAGS_ORDER,
62 REVIDX_HASCOPIESINFO,
62 REVIDX_HASCOPIESINFO,
63 REVIDX_ISCENSORED,
63 REVIDX_ISCENSORED,
64 REVIDX_RAWTEXT_CHANGING_FLAGS,
64 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 )
65 )
66 from .thirdparty import attr
66 from .thirdparty import attr
67 from . import (
67 from . import (
68 ancestor,
68 ancestor,
69 dagop,
69 dagop,
70 error,
70 error,
71 mdiff,
71 mdiff,
72 policy,
72 policy,
73 pycompat,
73 pycompat,
74 templatefilters,
74 templatefilters,
75 util,
75 util,
76 )
76 )
77 from .interfaces import (
77 from .interfaces import (
78 repository,
78 repository,
79 util as interfaceutil,
79 util as interfaceutil,
80 )
80 )
81 from .revlogutils import (
81 from .revlogutils import (
82 deltas as deltautil,
82 deltas as deltautil,
83 docket as docketutil,
83 docket as docketutil,
84 flagutil,
84 flagutil,
85 nodemap as nodemaputil,
85 nodemap as nodemaputil,
86 revlogv0,
86 revlogv0,
87 sidedata as sidedatautil,
87 sidedata as sidedatautil,
88 )
88 )
89 from .utils import (
89 from .utils import (
90 storageutil,
90 storageutil,
91 stringutil,
91 stringutil,
92 )
92 )
93
93
94 # blanked usage of all the name to prevent pyflakes constraints
94 # blanked usage of all the name to prevent pyflakes constraints
95 # We need these name available in the module for extensions.
95 # We need these name available in the module for extensions.
96
96
97 REVLOGV0
97 REVLOGV0
98 REVLOGV1
98 REVLOGV1
99 REVLOGV2
99 REVLOGV2
100 FLAG_INLINE_DATA
100 FLAG_INLINE_DATA
101 FLAG_GENERALDELTA
101 FLAG_GENERALDELTA
102 REVLOG_DEFAULT_FLAGS
102 REVLOG_DEFAULT_FLAGS
103 REVLOG_DEFAULT_FORMAT
103 REVLOG_DEFAULT_FORMAT
104 REVLOG_DEFAULT_VERSION
104 REVLOG_DEFAULT_VERSION
105 REVLOGV1_FLAGS
105 REVLOGV1_FLAGS
106 REVLOGV2_FLAGS
106 REVLOGV2_FLAGS
107 REVIDX_ISCENSORED
107 REVIDX_ISCENSORED
108 REVIDX_ELLIPSIS
108 REVIDX_ELLIPSIS
109 REVIDX_HASCOPIESINFO
109 REVIDX_HASCOPIESINFO
110 REVIDX_EXTSTORED
110 REVIDX_EXTSTORED
111 REVIDX_DEFAULT_FLAGS
111 REVIDX_DEFAULT_FLAGS
112 REVIDX_FLAGS_ORDER
112 REVIDX_FLAGS_ORDER
113 REVIDX_RAWTEXT_CHANGING_FLAGS
113 REVIDX_RAWTEXT_CHANGING_FLAGS
114
114
115 parsers = policy.importmod('parsers')
115 parsers = policy.importmod('parsers')
116 rustancestor = policy.importrust('ancestor')
116 rustancestor = policy.importrust('ancestor')
117 rustdagop = policy.importrust('dagop')
117 rustdagop = policy.importrust('dagop')
118 rustrevlog = policy.importrust('revlog')
118 rustrevlog = policy.importrust('revlog')
119
119
120 # Aliased for performance.
120 # Aliased for performance.
121 _zlibdecompress = zlib.decompress
121 _zlibdecompress = zlib.decompress
122
122
123 # max size of revlog with inline data
123 # max size of revlog with inline data
124 _maxinline = 131072
124 _maxinline = 131072
125 _chunksize = 1048576
125 _chunksize = 1048576
126
126
127 # Flag processors for REVIDX_ELLIPSIS.
127 # Flag processors for REVIDX_ELLIPSIS.
128 def ellipsisreadprocessor(rl, text):
128 def ellipsisreadprocessor(rl, text):
129 return text, False
129 return text, False
130
130
131
131
132 def ellipsiswriteprocessor(rl, text):
132 def ellipsiswriteprocessor(rl, text):
133 return text, False
133 return text, False
134
134
135
135
136 def ellipsisrawprocessor(rl, text):
136 def ellipsisrawprocessor(rl, text):
137 return False
137 return False
138
138
139
139
140 ellipsisprocessor = (
140 ellipsisprocessor = (
141 ellipsisreadprocessor,
141 ellipsisreadprocessor,
142 ellipsiswriteprocessor,
142 ellipsiswriteprocessor,
143 ellipsisrawprocessor,
143 ellipsisrawprocessor,
144 )
144 )
145
145
146
146
147 def offset_type(offset, type):
147 def offset_type(offset, type):
148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
149 raise ValueError(b'unknown revlog index flags')
149 raise ValueError(b'unknown revlog index flags')
150 return int(int(offset) << 16 | type)
150 return int(int(offset) << 16 | type)
151
151
152
152
153 def _verify_revision(rl, skipflags, state, node):
153 def _verify_revision(rl, skipflags, state, node):
154 """Verify the integrity of the given revlog ``node`` while providing a hook
154 """Verify the integrity of the given revlog ``node`` while providing a hook
155 point for extensions to influence the operation."""
155 point for extensions to influence the operation."""
156 if skipflags:
156 if skipflags:
157 state[b'skipread'].add(node)
157 state[b'skipread'].add(node)
158 else:
158 else:
159 # Side-effect: read content and verify hash.
159 # Side-effect: read content and verify hash.
160 rl.revision(node)
160 rl.revision(node)
161
161
162
162
163 # True if a fast implementation for persistent-nodemap is available
163 # True if a fast implementation for persistent-nodemap is available
164 #
164 #
165 # We also consider we have a "fast" implementation in "pure" python because
165 # We also consider we have a "fast" implementation in "pure" python because
166 # people using pure don't really have performance consideration (and a
166 # people using pure don't really have performance consideration (and a
167 # wheelbarrow of other slowness source)
167 # wheelbarrow of other slowness source)
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
169 parsers, 'BaseIndexObject'
169 parsers, 'BaseIndexObject'
170 )
170 )
171
171
172
172
173 @attr.s(slots=True, frozen=True)
173 @attr.s(slots=True, frozen=True)
174 class _revisioninfo(object):
174 class _revisioninfo(object):
175 """Information about a revision that allows building its fulltext
175 """Information about a revision that allows building its fulltext
176 node: expected hash of the revision
176 node: expected hash of the revision
177 p1, p2: parent revs of the revision
177 p1, p2: parent revs of the revision
178 btext: built text cache consisting of a one-element list
178 btext: built text cache consisting of a one-element list
179 cachedelta: (baserev, uncompressed_delta) or None
179 cachedelta: (baserev, uncompressed_delta) or None
180 flags: flags associated to the revision storage
180 flags: flags associated to the revision storage
181
181
182 One of btext[0] or cachedelta must be set.
182 One of btext[0] or cachedelta must be set.
183 """
183 """
184
184
185 node = attr.ib()
185 node = attr.ib()
186 p1 = attr.ib()
186 p1 = attr.ib()
187 p2 = attr.ib()
187 p2 = attr.ib()
188 btext = attr.ib()
188 btext = attr.ib()
189 textlen = attr.ib()
189 textlen = attr.ib()
190 cachedelta = attr.ib()
190 cachedelta = attr.ib()
191 flags = attr.ib()
191 flags = attr.ib()
192
192
193
193
194 @interfaceutil.implementer(repository.irevisiondelta)
194 @interfaceutil.implementer(repository.irevisiondelta)
195 @attr.s(slots=True)
195 @attr.s(slots=True)
196 class revlogrevisiondelta(object):
196 class revlogrevisiondelta(object):
197 node = attr.ib()
197 node = attr.ib()
198 p1node = attr.ib()
198 p1node = attr.ib()
199 p2node = attr.ib()
199 p2node = attr.ib()
200 basenode = attr.ib()
200 basenode = attr.ib()
201 flags = attr.ib()
201 flags = attr.ib()
202 baserevisionsize = attr.ib()
202 baserevisionsize = attr.ib()
203 revision = attr.ib()
203 revision = attr.ib()
204 delta = attr.ib()
204 delta = attr.ib()
205 sidedata = attr.ib()
205 sidedata = attr.ib()
206 protocol_flags = attr.ib()
206 protocol_flags = attr.ib()
207 linknode = attr.ib(default=None)
207 linknode = attr.ib(default=None)
208
208
209
209
210 @interfaceutil.implementer(repository.iverifyproblem)
210 @interfaceutil.implementer(repository.iverifyproblem)
211 @attr.s(frozen=True)
211 @attr.s(frozen=True)
212 class revlogproblem(object):
212 class revlogproblem(object):
213 warning = attr.ib(default=None)
213 warning = attr.ib(default=None)
214 error = attr.ib(default=None)
214 error = attr.ib(default=None)
215 node = attr.ib(default=None)
215 node = attr.ib(default=None)
216
216
217
217
218 def parse_index_v1(data, inline):
218 def parse_index_v1(data, inline):
219 # call the C implementation to parse the index data
219 # call the C implementation to parse the index data
220 index, cache = parsers.parse_index2(data, inline)
220 index, cache = parsers.parse_index2(data, inline)
221 return index, cache
221 return index, cache
222
222
223
223
224 def parse_index_v2(data, inline):
224 def parse_index_v2(data, inline):
225 # call the C implementation to parse the index data
225 # call the C implementation to parse the index data
226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
227 return index, cache
227 return index, cache
228
228
229
229
230 def parse_index_cl_v2(data, inline):
230 def parse_index_cl_v2(data, inline):
231 # call the C implementation to parse the index data
231 # call the C implementation to parse the index data
232 assert not inline
232 assert not inline
233 from .pure.parsers import parse_index_cl_v2
233 from .pure.parsers import parse_index_cl_v2
234
234
235 index, cache = parse_index_cl_v2(data)
235 index, cache = parse_index_cl_v2(data)
236 return index, cache
236 return index, cache
237
237
238
238
239 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
239 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
240
240
241 def parse_index_v1_nodemap(data, inline):
241 def parse_index_v1_nodemap(data, inline):
242 index, cache = parsers.parse_index_devel_nodemap(data, inline)
242 index, cache = parsers.parse_index_devel_nodemap(data, inline)
243 return index, cache
243 return index, cache
244
244
245
245
246 else:
246 else:
247 parse_index_v1_nodemap = None
247 parse_index_v1_nodemap = None
248
248
249
249
250 def parse_index_v1_mixed(data, inline):
250 def parse_index_v1_mixed(data, inline):
251 index, cache = parse_index_v1(data, inline)
251 index, cache = parse_index_v1(data, inline)
252 return rustrevlog.MixedIndex(index), cache
252 return rustrevlog.MixedIndex(index), cache
253
253
254
254
255 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
255 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
256 # signed integer)
256 # signed integer)
257 _maxentrysize = 0x7FFFFFFF
257 _maxentrysize = 0x7FFFFFFF
258
258
259
259
260 class revlog(object):
260 class revlog(object):
261 """
261 """
262 the underlying revision storage object
262 the underlying revision storage object
263
263
264 A revlog consists of two parts, an index and the revision data.
264 A revlog consists of two parts, an index and the revision data.
265
265
266 The index is a file with a fixed record size containing
266 The index is a file with a fixed record size containing
267 information on each revision, including its nodeid (hash), the
267 information on each revision, including its nodeid (hash), the
268 nodeids of its parents, the position and offset of its data within
268 nodeids of its parents, the position and offset of its data within
269 the data file, and the revision it's based on. Finally, each entry
269 the data file, and the revision it's based on. Finally, each entry
270 contains a linkrev entry that can serve as a pointer to external
270 contains a linkrev entry that can serve as a pointer to external
271 data.
271 data.
272
272
273 The revision data itself is a linear collection of data chunks.
273 The revision data itself is a linear collection of data chunks.
274 Each chunk represents a revision and is usually represented as a
274 Each chunk represents a revision and is usually represented as a
275 delta against the previous chunk. To bound lookup time, runs of
275 delta against the previous chunk. To bound lookup time, runs of
276 deltas are limited to about 2 times the length of the original
276 deltas are limited to about 2 times the length of the original
277 version data. This makes retrieval of a version proportional to
277 version data. This makes retrieval of a version proportional to
278 its size, or O(1) relative to the number of revisions.
278 its size, or O(1) relative to the number of revisions.
279
279
280 Both pieces of the revlog are written to in an append-only
280 Both pieces of the revlog are written to in an append-only
281 fashion, which means we never need to rewrite a file to insert or
281 fashion, which means we never need to rewrite a file to insert or
282 remove data, and can use some simple techniques to avoid the need
282 remove data, and can use some simple techniques to avoid the need
283 for locking while reading.
283 for locking while reading.
284
284
285 If checkambig, indexfile is opened with checkambig=True at
285 If checkambig, indexfile is opened with checkambig=True at
286 writing, to avoid file stat ambiguity.
286 writing, to avoid file stat ambiguity.
287
287
288 If mmaplargeindex is True, and an mmapindexthreshold is set, the
288 If mmaplargeindex is True, and an mmapindexthreshold is set, the
289 index will be mmapped rather than read if it is larger than the
289 index will be mmapped rather than read if it is larger than the
290 configured threshold.
290 configured threshold.
291
291
292 If censorable is True, the revlog can have censored revisions.
292 If censorable is True, the revlog can have censored revisions.
293
293
294 If `upperboundcomp` is not None, this is the expected maximal gain from
294 If `upperboundcomp` is not None, this is the expected maximal gain from
295 compression for the data content.
295 compression for the data content.
296
296
297 `concurrencychecker` is an optional function that receives 3 arguments: a
297 `concurrencychecker` is an optional function that receives 3 arguments: a
298 file handle, a filename, and an expected position. It should check whether
298 file handle, a filename, and an expected position. It should check whether
299 the current position in the file handle is valid, and log/warn/fail (by
299 the current position in the file handle is valid, and log/warn/fail (by
300 raising).
300 raising).
301
301
302
302
303 Internal details
303 Internal details
304 ----------------
304 ----------------
305
305
306 A large part of the revlog logic deals with revisions' "index entries", tuple
306 A large part of the revlog logic deals with revisions' "index entries", tuple
307 objects that contains the same "items" whatever the revlog version.
307 objects that contains the same "items" whatever the revlog version.
308 Different versions will have different ways of storing these items (sometimes
308 Different versions will have different ways of storing these items (sometimes
309 not having them at all), but the tuple will always be the same. New fields
309 not having them at all), but the tuple will always be the same. New fields
310 are usually added at the end to avoid breaking existing code that relies
310 are usually added at the end to avoid breaking existing code that relies
311 on the existing order. The field are defined as follows:
311 on the existing order. The field are defined as follows:
312
312
313 [0] offset:
313 [0] offset:
314 The byte index of the start of revision data chunk.
314 The byte index of the start of revision data chunk.
315 That value is shifted up by 16 bits. use "offset = field >> 16" to
315 That value is shifted up by 16 bits. use "offset = field >> 16" to
316 retrieve it.
316 retrieve it.
317
317
318 flags:
318 flags:
319 A flag field that carries special information or changes the behavior
319 A flag field that carries special information or changes the behavior
320 of the revision. (see `REVIDX_*` constants for details)
320 of the revision. (see `REVIDX_*` constants for details)
321 The flag field only occupies the first 16 bits of this field,
321 The flag field only occupies the first 16 bits of this field,
322 use "flags = field & 0xFFFF" to retrieve the value.
322 use "flags = field & 0xFFFF" to retrieve the value.
323
323
324 [1] compressed length:
324 [1] compressed length:
325 The size, in bytes, of the chunk on disk
325 The size, in bytes, of the chunk on disk
326
326
327 [2] uncompressed length:
327 [2] uncompressed length:
328 The size, in bytes, of the full revision once reconstructed.
328 The size, in bytes, of the full revision once reconstructed.
329
329
330 [3] base rev:
330 [3] base rev:
331 Either the base of the revision delta chain (without general
331 Either the base of the revision delta chain (without general
332 delta), or the base of the delta (stored in the data chunk)
332 delta), or the base of the delta (stored in the data chunk)
333 with general delta.
333 with general delta.
334
334
335 [4] link rev:
335 [4] link rev:
336 Changelog revision number of the changeset introducing this
336 Changelog revision number of the changeset introducing this
337 revision.
337 revision.
338
338
339 [5] parent 1 rev:
339 [5] parent 1 rev:
340 Revision number of the first parent
340 Revision number of the first parent
341
341
342 [6] parent 2 rev:
342 [6] parent 2 rev:
343 Revision number of the second parent
343 Revision number of the second parent
344
344
345 [7] node id:
345 [7] node id:
346 The node id of the current revision
346 The node id of the current revision
347
347
348 [8] sidedata offset:
348 [8] sidedata offset:
349 The byte index of the start of the revision's side-data chunk.
349 The byte index of the start of the revision's side-data chunk.
350
350
351 [9] sidedata chunk length:
351 [9] sidedata chunk length:
352 The size, in bytes, of the revision's side-data chunk.
352 The size, in bytes, of the revision's side-data chunk.
353
353
354 [10] data compression mode:
354 [10] data compression mode:
355 two bits that detail the way the data chunk is compressed on disk.
355 two bits that detail the way the data chunk is compressed on disk.
356 (see "COMP_MODE_*" constants for details). For revlog version 0 and
356 (see "COMP_MODE_*" constants for details). For revlog version 0 and
357 1 this will always be COMP_MODE_INLINE.
357 1 this will always be COMP_MODE_INLINE.
358
358
359 [11] side-data compression mode:
359 [11] side-data compression mode:
360 two bits that detail the way the sidedata chunk is compressed on disk.
360 two bits that detail the way the sidedata chunk is compressed on disk.
361 (see "COMP_MODE_*" constants for details)
361 (see "COMP_MODE_*" constants for details)
362 """
362 """
363
363
364 _flagserrorclass = error.RevlogError
364 _flagserrorclass = error.RevlogError
365
365
366 def __init__(
366 def __init__(
367 self,
367 self,
368 opener,
368 opener,
369 target,
369 target,
370 radix,
370 radix,
371 postfix=None, # only exist for `tmpcensored` now
371 postfix=None, # only exist for `tmpcensored` now
372 checkambig=False,
372 checkambig=False,
373 mmaplargeindex=False,
373 mmaplargeindex=False,
374 censorable=False,
374 censorable=False,
375 upperboundcomp=None,
375 upperboundcomp=None,
376 persistentnodemap=False,
376 persistentnodemap=False,
377 concurrencychecker=None,
377 concurrencychecker=None,
378 trypending=False,
378 trypending=False,
379 ):
379 ):
380 """
380 """
381 create a revlog object
381 create a revlog object
382
382
383 opener is a function that abstracts the file opening operation
383 opener is a function that abstracts the file opening operation
384 and can be used to implement COW semantics or the like.
384 and can be used to implement COW semantics or the like.
385
385
386 `target`: a (KIND, ID) tuple that identify the content stored in
386 `target`: a (KIND, ID) tuple that identify the content stored in
387 this revlog. It help the rest of the code to understand what the revlog
387 this revlog. It help the rest of the code to understand what the revlog
388 is about without having to resort to heuristic and index filename
388 is about without having to resort to heuristic and index filename
389 analysis. Note: that this must be reliably be set by normal code, but
389 analysis. Note: that this must be reliably be set by normal code, but
390 that test, debug, or performance measurement code might not set this to
390 that test, debug, or performance measurement code might not set this to
391 accurate value.
391 accurate value.
392 """
392 """
393 self.upperboundcomp = upperboundcomp
393 self.upperboundcomp = upperboundcomp
394
394
395 self.radix = radix
395 self.radix = radix
396
396
397 self._docket_file = None
397 self._docket_file = None
398 self._indexfile = None
398 self._indexfile = None
399 self._datafile = None
399 self._datafile = None
400 self._nodemap_file = None
400 self._nodemap_file = None
401 self.postfix = postfix
401 self.postfix = postfix
402 self._trypending = trypending
402 self._trypending = trypending
403 self.opener = opener
403 self.opener = opener
404 if persistentnodemap:
404 if persistentnodemap:
405 self._nodemap_file = nodemaputil.get_nodemap_file(self)
405 self._nodemap_file = nodemaputil.get_nodemap_file(self)
406
406
407 assert target[0] in ALL_KINDS
407 assert target[0] in ALL_KINDS
408 assert len(target) == 2
408 assert len(target) == 2
409 self.target = target
409 self.target = target
410 # When True, indexfile is opened with checkambig=True at writing, to
410 # When True, indexfile is opened with checkambig=True at writing, to
411 # avoid file stat ambiguity.
411 # avoid file stat ambiguity.
412 self._checkambig = checkambig
412 self._checkambig = checkambig
413 self._mmaplargeindex = mmaplargeindex
413 self._mmaplargeindex = mmaplargeindex
414 self._censorable = censorable
414 self._censorable = censorable
415 # 3-tuple of (node, rev, text) for a raw revision.
415 # 3-tuple of (node, rev, text) for a raw revision.
416 self._revisioncache = None
416 self._revisioncache = None
417 # Maps rev to chain base rev.
417 # Maps rev to chain base rev.
418 self._chainbasecache = util.lrucachedict(100)
418 self._chainbasecache = util.lrucachedict(100)
419 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
419 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
420 self._chunkcache = (0, b'')
420 self._chunkcache = (0, b'')
421 # How much data to read and cache into the raw revlog data cache.
421 # How much data to read and cache into the raw revlog data cache.
422 self._chunkcachesize = 65536
422 self._chunkcachesize = 65536
423 self._maxchainlen = None
423 self._maxchainlen = None
424 self._deltabothparents = True
424 self._deltabothparents = True
425 self.index = None
425 self.index = None
426 self._docket = None
426 self._docket = None
427 self._nodemap_docket = None
427 self._nodemap_docket = None
428 # Mapping of partial identifiers to full nodes.
428 # Mapping of partial identifiers to full nodes.
429 self._pcache = {}
429 self._pcache = {}
430 # Mapping of revision integer to full node.
430 # Mapping of revision integer to full node.
431 self._compengine = b'zlib'
431 self._compengine = b'zlib'
432 self._compengineopts = {}
432 self._compengineopts = {}
433 self._maxdeltachainspan = -1
433 self._maxdeltachainspan = -1
434 self._withsparseread = False
434 self._withsparseread = False
435 self._sparserevlog = False
435 self._sparserevlog = False
436 self.hassidedata = False
436 self.hassidedata = False
437 self._srdensitythreshold = 0.50
437 self._srdensitythreshold = 0.50
438 self._srmingapsize = 262144
438 self._srmingapsize = 262144
439
439
440 # Make copy of flag processors so each revlog instance can support
440 # Make copy of flag processors so each revlog instance can support
441 # custom flags.
441 # custom flags.
442 self._flagprocessors = dict(flagutil.flagprocessors)
442 self._flagprocessors = dict(flagutil.flagprocessors)
443
443
444 # 2-tuple of file handles being used for active writing.
444 # 2-tuple of file handles being used for active writing.
445 self._writinghandles = None
445 self._writinghandles = None
446 # prevent nesting of addgroup
446 # prevent nesting of addgroup
447 self._adding_group = None
447 self._adding_group = None
448
448
449 self._loadindex()
449 self._loadindex()
450
450
451 self._concurrencychecker = concurrencychecker
451 self._concurrencychecker = concurrencychecker
452
452
453 def _init_opts(self):
453 def _init_opts(self):
454 """process options (from above/config) to setup associated default revlog mode
454 """process options (from above/config) to setup associated default revlog mode
455
455
456 These values might be affected when actually reading on disk information.
456 These values might be affected when actually reading on disk information.
457
457
458 The relevant values are returned for use in _loadindex().
458 The relevant values are returned for use in _loadindex().
459
459
460 * newversionflags:
460 * newversionflags:
461 version header to use if we need to create a new revlog
461 version header to use if we need to create a new revlog
462
462
463 * mmapindexthreshold:
463 * mmapindexthreshold:
464 minimal index size for start to use mmap
464 minimal index size for start to use mmap
465
465
466 * force_nodemap:
466 * force_nodemap:
467 force the usage of a "development" version of the nodemap code
467 force the usage of a "development" version of the nodemap code
468 """
468 """
469 mmapindexthreshold = None
469 mmapindexthreshold = None
470 opts = self.opener.options
470 opts = self.opener.options
471
471
472 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
472 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
473 new_header = CHANGELOGV2
473 new_header = CHANGELOGV2
474 elif b'revlogv2' in opts:
474 elif b'revlogv2' in opts:
475 new_header = REVLOGV2
475 new_header = REVLOGV2
476 elif b'revlogv1' in opts:
476 elif b'revlogv1' in opts:
477 new_header = REVLOGV1 | FLAG_INLINE_DATA
477 new_header = REVLOGV1 | FLAG_INLINE_DATA
478 if b'generaldelta' in opts:
478 if b'generaldelta' in opts:
479 new_header |= FLAG_GENERALDELTA
479 new_header |= FLAG_GENERALDELTA
480 elif b'revlogv0' in self.opener.options:
480 elif b'revlogv0' in self.opener.options:
481 new_header = REVLOGV0
481 new_header = REVLOGV0
482 else:
482 else:
483 new_header = REVLOG_DEFAULT_VERSION
483 new_header = REVLOG_DEFAULT_VERSION
484
484
485 if b'chunkcachesize' in opts:
485 if b'chunkcachesize' in opts:
486 self._chunkcachesize = opts[b'chunkcachesize']
486 self._chunkcachesize = opts[b'chunkcachesize']
487 if b'maxchainlen' in opts:
487 if b'maxchainlen' in opts:
488 self._maxchainlen = opts[b'maxchainlen']
488 self._maxchainlen = opts[b'maxchainlen']
489 if b'deltabothparents' in opts:
489 if b'deltabothparents' in opts:
490 self._deltabothparents = opts[b'deltabothparents']
490 self._deltabothparents = opts[b'deltabothparents']
491 self._lazydelta = bool(opts.get(b'lazydelta', True))
491 self._lazydelta = bool(opts.get(b'lazydelta', True))
492 self._lazydeltabase = False
492 self._lazydeltabase = False
493 if self._lazydelta:
493 if self._lazydelta:
494 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
494 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
495 if b'compengine' in opts:
495 if b'compengine' in opts:
496 self._compengine = opts[b'compengine']
496 self._compengine = opts[b'compengine']
497 if b'zlib.level' in opts:
497 if b'zlib.level' in opts:
498 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
498 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
499 if b'zstd.level' in opts:
499 if b'zstd.level' in opts:
500 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
500 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
501 if b'maxdeltachainspan' in opts:
501 if b'maxdeltachainspan' in opts:
502 self._maxdeltachainspan = opts[b'maxdeltachainspan']
502 self._maxdeltachainspan = opts[b'maxdeltachainspan']
503 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
503 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
504 mmapindexthreshold = opts[b'mmapindexthreshold']
504 mmapindexthreshold = opts[b'mmapindexthreshold']
505 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
505 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
506 withsparseread = bool(opts.get(b'with-sparse-read', False))
506 withsparseread = bool(opts.get(b'with-sparse-read', False))
507 # sparse-revlog forces sparse-read
507 # sparse-revlog forces sparse-read
508 self._withsparseread = self._sparserevlog or withsparseread
508 self._withsparseread = self._sparserevlog or withsparseread
509 if b'sparse-read-density-threshold' in opts:
509 if b'sparse-read-density-threshold' in opts:
510 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
510 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
511 if b'sparse-read-min-gap-size' in opts:
511 if b'sparse-read-min-gap-size' in opts:
512 self._srmingapsize = opts[b'sparse-read-min-gap-size']
512 self._srmingapsize = opts[b'sparse-read-min-gap-size']
513 if opts.get(b'enableellipsis'):
513 if opts.get(b'enableellipsis'):
514 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
514 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
515
515
516 # revlog v0 doesn't have flag processors
516 # revlog v0 doesn't have flag processors
517 for flag, processor in pycompat.iteritems(
517 for flag, processor in pycompat.iteritems(
518 opts.get(b'flagprocessors', {})
518 opts.get(b'flagprocessors', {})
519 ):
519 ):
520 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
520 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
521
521
522 if self._chunkcachesize <= 0:
522 if self._chunkcachesize <= 0:
523 raise error.RevlogError(
523 raise error.RevlogError(
524 _(b'revlog chunk cache size %r is not greater than 0')
524 _(b'revlog chunk cache size %r is not greater than 0')
525 % self._chunkcachesize
525 % self._chunkcachesize
526 )
526 )
527 elif self._chunkcachesize & (self._chunkcachesize - 1):
527 elif self._chunkcachesize & (self._chunkcachesize - 1):
528 raise error.RevlogError(
528 raise error.RevlogError(
529 _(b'revlog chunk cache size %r is not a power of 2')
529 _(b'revlog chunk cache size %r is not a power of 2')
530 % self._chunkcachesize
530 % self._chunkcachesize
531 )
531 )
532 force_nodemap = opts.get(b'devel-force-nodemap', False)
532 force_nodemap = opts.get(b'devel-force-nodemap', False)
533 return new_header, mmapindexthreshold, force_nodemap
533 return new_header, mmapindexthreshold, force_nodemap
534
534
535 def _get_data(self, filepath, mmap_threshold, size=None):
535 def _get_data(self, filepath, mmap_threshold, size=None):
536 """return a file content with or without mmap
536 """return a file content with or without mmap
537
537
538 If the file is missing return the empty string"""
538 If the file is missing return the empty string"""
539 try:
539 try:
540 with self.opener(filepath) as fp:
540 with self.opener(filepath) as fp:
541 if mmap_threshold is not None:
541 if mmap_threshold is not None:
542 file_size = self.opener.fstat(fp).st_size
542 file_size = self.opener.fstat(fp).st_size
543 if file_size >= mmap_threshold:
543 if file_size >= mmap_threshold:
544 if size is not None:
544 if size is not None:
545 # avoid potentiel mmap crash
545 # avoid potentiel mmap crash
546 size = min(file_size, size)
546 size = min(file_size, size)
547 # TODO: should .close() to release resources without
547 # TODO: should .close() to release resources without
548 # relying on Python GC
548 # relying on Python GC
549 if size is None:
549 if size is None:
550 return util.buffer(util.mmapread(fp))
550 return util.buffer(util.mmapread(fp))
551 else:
551 else:
552 return util.buffer(util.mmapread(fp, size))
552 return util.buffer(util.mmapread(fp, size))
553 if size is None:
553 if size is None:
554 return fp.read()
554 return fp.read()
555 else:
555 else:
556 return fp.read(size)
556 return fp.read(size)
557 except IOError as inst:
557 except IOError as inst:
558 if inst.errno != errno.ENOENT:
558 if inst.errno != errno.ENOENT:
559 raise
559 raise
560 return b''
560 return b''
561
561
562 def _loadindex(self):
562 def _loadindex(self):
563
563
564 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
564 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
565
565
566 if self.postfix is not None:
566 if self.postfix is not None:
567 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
567 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
568 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
568 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
569 entry_point = b'%s.i.a' % self.radix
569 entry_point = b'%s.i.a' % self.radix
570 else:
570 else:
571 entry_point = b'%s.i' % self.radix
571 entry_point = b'%s.i' % self.radix
572
572
573 entry_data = b''
573 entry_data = b''
574 self._initempty = True
574 self._initempty = True
575 entry_data = self._get_data(entry_point, mmapindexthreshold)
575 entry_data = self._get_data(entry_point, mmapindexthreshold)
576 if len(entry_data) > 0:
576 if len(entry_data) > 0:
577 header = INDEX_HEADER.unpack(entry_data[:4])[0]
577 header = INDEX_HEADER.unpack(entry_data[:4])[0]
578 self._initempty = False
578 self._initempty = False
579 else:
579 else:
580 header = new_header
580 header = new_header
581
581
582 self._format_flags = header & ~0xFFFF
582 self._format_flags = header & ~0xFFFF
583 self._format_version = header & 0xFFFF
583 self._format_version = header & 0xFFFF
584
584
585 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
585 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
586 if supported_flags is None:
586 if supported_flags is None:
587 msg = _(b'unknown version (%d) in revlog %s')
587 msg = _(b'unknown version (%d) in revlog %s')
588 msg %= (self._format_version, self.display_id)
588 msg %= (self._format_version, self.display_id)
589 raise error.RevlogError(msg)
589 raise error.RevlogError(msg)
590 elif self._format_flags & ~supported_flags:
590 elif self._format_flags & ~supported_flags:
591 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
591 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
592 display_flag = self._format_flags >> 16
592 display_flag = self._format_flags >> 16
593 msg %= (display_flag, self._format_version, self.display_id)
593 msg %= (display_flag, self._format_version, self.display_id)
594 raise error.RevlogError(msg)
594 raise error.RevlogError(msg)
595
595
596 features = FEATURES_BY_VERSION[self._format_version]
596 features = FEATURES_BY_VERSION[self._format_version]
597 self._inline = features[b'inline'](self._format_flags)
597 self._inline = features[b'inline'](self._format_flags)
598 self._generaldelta = features[b'generaldelta'](self._format_flags)
598 self._generaldelta = features[b'generaldelta'](self._format_flags)
599 self.hassidedata = features[b'sidedata']
599 self.hassidedata = features[b'sidedata']
600
600
601 if not features[b'docket']:
601 if not features[b'docket']:
602 self._indexfile = entry_point
602 self._indexfile = entry_point
603 index_data = entry_data
603 index_data = entry_data
604 else:
604 else:
605 self._docket_file = entry_point
605 self._docket_file = entry_point
606 if self._initempty:
606 if self._initempty:
607 self._docket = docketutil.default_docket(self, header)
607 self._docket = docketutil.default_docket(self, header)
608 else:
608 else:
609 self._docket = docketutil.parse_docket(
609 self._docket = docketutil.parse_docket(
610 self, entry_data, use_pending=self._trypending
610 self, entry_data, use_pending=self._trypending
611 )
611 )
612 self._indexfile = self._docket.index_filepath()
612 self._indexfile = self._docket.index_filepath()
613 index_data = b''
613 index_data = b''
614 index_size = self._docket.index_end
614 index_size = self._docket.index_end
615 if index_size > 0:
615 if index_size > 0:
616 index_data = self._get_data(
616 index_data = self._get_data(
617 self._indexfile, mmapindexthreshold, size=index_size
617 self._indexfile, mmapindexthreshold, size=index_size
618 )
618 )
619 if len(index_data) < index_size:
619 if len(index_data) < index_size:
620 msg = _(b'too few index data for %s: got %d, expected %d')
620 msg = _(b'too few index data for %s: got %d, expected %d')
621 msg %= (self.display_id, len(index_data), index_size)
621 msg %= (self.display_id, len(index_data), index_size)
622 raise error.RevlogError(msg)
622 raise error.RevlogError(msg)
623
623
624 self._inline = False
624 self._inline = False
625 # generaldelta implied by version 2 revlogs.
625 # generaldelta implied by version 2 revlogs.
626 self._generaldelta = True
626 self._generaldelta = True
627 # the logic for persistent nodemap will be dealt with within the
627 # the logic for persistent nodemap will be dealt with within the
628 # main docket, so disable it for now.
628 # main docket, so disable it for now.
629 self._nodemap_file = None
629 self._nodemap_file = None
630
630
631 if self.postfix is None:
631 if self.postfix is None:
632 self._datafile = b'%s.d' % self.radix
632 self._datafile = b'%s.d' % self.radix
633 else:
633 else:
634 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
634 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
635
635
636 self.nodeconstants = sha1nodeconstants
636 self.nodeconstants = sha1nodeconstants
637 self.nullid = self.nodeconstants.nullid
637 self.nullid = self.nodeconstants.nullid
638
638
639 # sparse-revlog can't be on without general-delta (issue6056)
639 # sparse-revlog can't be on without general-delta (issue6056)
640 if not self._generaldelta:
640 if not self._generaldelta:
641 self._sparserevlog = False
641 self._sparserevlog = False
642
642
643 self._storedeltachains = True
643 self._storedeltachains = True
644
644
645 devel_nodemap = (
645 devel_nodemap = (
646 self._nodemap_file
646 self._nodemap_file
647 and force_nodemap
647 and force_nodemap
648 and parse_index_v1_nodemap is not None
648 and parse_index_v1_nodemap is not None
649 )
649 )
650
650
651 use_rust_index = False
651 use_rust_index = False
652 if rustrevlog is not None:
652 if rustrevlog is not None:
653 if self._nodemap_file is not None:
653 if self._nodemap_file is not None:
654 use_rust_index = True
654 use_rust_index = True
655 else:
655 else:
656 use_rust_index = self.opener.options.get(b'rust.index')
656 use_rust_index = self.opener.options.get(b'rust.index')
657
657
658 self._parse_index = parse_index_v1
658 self._parse_index = parse_index_v1
659 if self._format_version == REVLOGV0:
659 if self._format_version == REVLOGV0:
660 self._parse_index = revlogv0.parse_index_v0
660 self._parse_index = revlogv0.parse_index_v0
661 elif self._format_version == REVLOGV2:
661 elif self._format_version == REVLOGV2:
662 self._parse_index = parse_index_v2
662 self._parse_index = parse_index_v2
663 elif self._format_version == CHANGELOGV2:
663 elif self._format_version == CHANGELOGV2:
664 self._parse_index = parse_index_cl_v2
664 self._parse_index = parse_index_cl_v2
665 elif devel_nodemap:
665 elif devel_nodemap:
666 self._parse_index = parse_index_v1_nodemap
666 self._parse_index = parse_index_v1_nodemap
667 elif use_rust_index:
667 elif use_rust_index:
668 self._parse_index = parse_index_v1_mixed
668 self._parse_index = parse_index_v1_mixed
669 try:
669 try:
670 d = self._parse_index(index_data, self._inline)
670 d = self._parse_index(index_data, self._inline)
671 index, _chunkcache = d
671 index, _chunkcache = d
672 use_nodemap = (
672 use_nodemap = (
673 not self._inline
673 not self._inline
674 and self._nodemap_file is not None
674 and self._nodemap_file is not None
675 and util.safehasattr(index, 'update_nodemap_data')
675 and util.safehasattr(index, 'update_nodemap_data')
676 )
676 )
677 if use_nodemap:
677 if use_nodemap:
678 nodemap_data = nodemaputil.persisted_data(self)
678 nodemap_data = nodemaputil.persisted_data(self)
679 if nodemap_data is not None:
679 if nodemap_data is not None:
680 docket = nodemap_data[0]
680 docket = nodemap_data[0]
681 if (
681 if (
682 len(d[0]) > docket.tip_rev
682 len(d[0]) > docket.tip_rev
683 and d[0][docket.tip_rev][7] == docket.tip_node
683 and d[0][docket.tip_rev][7] == docket.tip_node
684 ):
684 ):
685 # no changelog tampering
685 # no changelog tampering
686 self._nodemap_docket = docket
686 self._nodemap_docket = docket
687 index.update_nodemap_data(*nodemap_data)
687 index.update_nodemap_data(*nodemap_data)
688 except (ValueError, IndexError):
688 except (ValueError, IndexError):
689 raise error.RevlogError(
689 raise error.RevlogError(
690 _(b"index %s is corrupted") % self.display_id
690 _(b"index %s is corrupted") % self.display_id
691 )
691 )
692 self.index, self._chunkcache = d
692 self.index, self._chunkcache = d
693 if not self._chunkcache:
693 if not self._chunkcache:
694 self._chunkclear()
694 self._chunkclear()
695 # revnum -> (chain-length, sum-delta-length)
695 # revnum -> (chain-length, sum-delta-length)
696 self._chaininfocache = util.lrucachedict(500)
696 self._chaininfocache = util.lrucachedict(500)
697 # revlog header -> revlog compressor
697 # revlog header -> revlog compressor
698 self._decompressors = {}
698 self._decompressors = {}
699
699
700 @util.propertycache
700 @util.propertycache
701 def revlog_kind(self):
701 def revlog_kind(self):
702 return self.target[0]
702 return self.target[0]
703
703
704 @util.propertycache
704 @util.propertycache
705 def display_id(self):
705 def display_id(self):
706 """The public facing "ID" of the revlog that we use in message"""
706 """The public facing "ID" of the revlog that we use in message"""
707 # Maybe we should build a user facing representation of
707 # Maybe we should build a user facing representation of
708 # revlog.target instead of using `self.radix`
708 # revlog.target instead of using `self.radix`
709 return self.radix
709 return self.radix
710
710
711 def _get_decompressor(self, t):
711 def _get_decompressor(self, t):
712 try:
712 try:
713 compressor = self._decompressors[t]
713 compressor = self._decompressors[t]
714 except KeyError:
714 except KeyError:
715 try:
715 try:
716 engine = util.compengines.forrevlogheader(t)
716 engine = util.compengines.forrevlogheader(t)
717 compressor = engine.revlogcompressor(self._compengineopts)
717 compressor = engine.revlogcompressor(self._compengineopts)
718 self._decompressors[t] = compressor
718 self._decompressors[t] = compressor
719 except KeyError:
719 except KeyError:
720 raise error.RevlogError(
720 raise error.RevlogError(
721 _(b'unknown compression type %s') % binascii.hexlify(t)
721 _(b'unknown compression type %s') % binascii.hexlify(t)
722 )
722 )
723 return compressor
723 return compressor
724
724
725 @util.propertycache
725 @util.propertycache
726 def _compressor(self):
726 def _compressor(self):
727 engine = util.compengines[self._compengine]
727 engine = util.compengines[self._compengine]
728 return engine.revlogcompressor(self._compengineopts)
728 return engine.revlogcompressor(self._compengineopts)
729
729
730 @util.propertycache
730 @util.propertycache
731 def _decompressor(self):
731 def _decompressor(self):
732 """the default decompressor"""
732 """the default decompressor"""
733 if self._docket is None:
733 if self._docket is None:
734 return None
734 return None
735 t = self._docket.default_compression_header
735 t = self._docket.default_compression_header
736 c = self._get_decompressor(t)
736 c = self._get_decompressor(t)
737 return c.decompress
737 return c.decompress
738
738
739 def _indexfp(self):
739 def _indexfp(self):
740 """file object for the revlog's index file"""
740 """file object for the revlog's index file"""
741 return self.opener(self._indexfile, mode=b"r")
741 return self.opener(self._indexfile, mode=b"r")
742
742
743 def __index_write_fp(self):
743 def __index_write_fp(self):
744 # You should not use this directly and use `_writing` instead
744 # You should not use this directly and use `_writing` instead
745 try:
745 try:
746 f = self.opener(
746 f = self.opener(
747 self._indexfile, mode=b"r+", checkambig=self._checkambig
747 self._indexfile, mode=b"r+", checkambig=self._checkambig
748 )
748 )
749 if self._docket is None:
749 if self._docket is None:
750 f.seek(0, os.SEEK_END)
750 f.seek(0, os.SEEK_END)
751 else:
751 else:
752 f.seek(self._docket.index_end, os.SEEK_SET)
752 f.seek(self._docket.index_end, os.SEEK_SET)
753 return f
753 return f
754 except IOError as inst:
754 except IOError as inst:
755 if inst.errno != errno.ENOENT:
755 if inst.errno != errno.ENOENT:
756 raise
756 raise
757 return self.opener(
757 return self.opener(
758 self._indexfile, mode=b"w+", checkambig=self._checkambig
758 self._indexfile, mode=b"w+", checkambig=self._checkambig
759 )
759 )
760
760
761 def __index_new_fp(self):
761 def __index_new_fp(self):
762 # You should not use this unless you are upgrading from inline revlog
762 # You should not use this unless you are upgrading from inline revlog
763 return self.opener(
763 return self.opener(
764 self._indexfile,
764 self._indexfile,
765 mode=b"w",
765 mode=b"w",
766 checkambig=self._checkambig,
766 checkambig=self._checkambig,
767 atomictemp=True,
767 atomictemp=True,
768 )
768 )
769
769
770 def _datafp(self, mode=b'r'):
770 def _datafp(self, mode=b'r'):
771 """file object for the revlog's data file"""
771 """file object for the revlog's data file"""
772 return self.opener(self._datafile, mode=mode)
772 return self.opener(self._datafile, mode=mode)
773
773
774 @contextlib.contextmanager
774 @contextlib.contextmanager
775 def _datareadfp(self, existingfp=None):
775 def _datareadfp(self, existingfp=None):
776 """file object suitable to read data"""
776 """file object suitable to read data"""
777 # Use explicit file handle, if given.
777 # Use explicit file handle, if given.
778 if existingfp is not None:
778 if existingfp is not None:
779 yield existingfp
779 yield existingfp
780
780
781 # Use a file handle being actively used for writes, if available.
781 # Use a file handle being actively used for writes, if available.
782 # There is some danger to doing this because reads will seek the
782 # There is some danger to doing this because reads will seek the
783 # file. However, _writeentry() performs a SEEK_END before all writes,
783 # file. However, _writeentry() performs a SEEK_END before all writes,
784 # so we should be safe.
784 # so we should be safe.
785 elif self._writinghandles:
785 elif self._writinghandles:
786 if self._inline:
786 if self._inline:
787 yield self._writinghandles[0]
787 yield self._writinghandles[0]
788 else:
788 else:
789 yield self._writinghandles[1]
789 yield self._writinghandles[1]
790
790
791 # Otherwise open a new file handle.
791 # Otherwise open a new file handle.
792 else:
792 else:
793 if self._inline:
793 if self._inline:
794 func = self._indexfp
794 func = self._indexfp
795 else:
795 else:
796 func = self._datafp
796 func = self._datafp
797 with func() as fp:
797 with func() as fp:
798 yield fp
798 yield fp
799
799
800 def tiprev(self):
800 def tiprev(self):
801 return len(self.index) - 1
801 return len(self.index) - 1
802
802
803 def tip(self):
803 def tip(self):
804 return self.node(self.tiprev())
804 return self.node(self.tiprev())
805
805
806 def __contains__(self, rev):
806 def __contains__(self, rev):
807 return 0 <= rev < len(self)
807 return 0 <= rev < len(self)
808
808
809 def __len__(self):
809 def __len__(self):
810 return len(self.index)
810 return len(self.index)
811
811
812 def __iter__(self):
812 def __iter__(self):
813 return iter(pycompat.xrange(len(self)))
813 return iter(pycompat.xrange(len(self)))
814
814
815 def revs(self, start=0, stop=None):
815 def revs(self, start=0, stop=None):
816 """iterate over all rev in this revlog (from start to stop)"""
816 """iterate over all rev in this revlog (from start to stop)"""
817 return storageutil.iterrevs(len(self), start=start, stop=stop)
817 return storageutil.iterrevs(len(self), start=start, stop=stop)
818
818
819 @property
819 @property
820 def nodemap(self):
820 def nodemap(self):
821 msg = (
821 msg = (
822 b"revlog.nodemap is deprecated, "
822 b"revlog.nodemap is deprecated, "
823 b"use revlog.index.[has_node|rev|get_rev]"
823 b"use revlog.index.[has_node|rev|get_rev]"
824 )
824 )
825 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
825 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
826 return self.index.nodemap
826 return self.index.nodemap
827
827
828 @property
828 @property
829 def _nodecache(self):
829 def _nodecache(self):
830 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
830 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
831 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
831 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
832 return self.index.nodemap
832 return self.index.nodemap
833
833
834 def hasnode(self, node):
834 def hasnode(self, node):
835 try:
835 try:
836 self.rev(node)
836 self.rev(node)
837 return True
837 return True
838 except KeyError:
838 except KeyError:
839 return False
839 return False
840
840
841 def candelta(self, baserev, rev):
841 def candelta(self, baserev, rev):
842 """whether two revisions (baserev, rev) can be delta-ed or not"""
842 """whether two revisions (baserev, rev) can be delta-ed or not"""
843 # Disable delta if either rev requires a content-changing flag
843 # Disable delta if either rev requires a content-changing flag
844 # processor (ex. LFS). This is because such flag processor can alter
844 # processor (ex. LFS). This is because such flag processor can alter
845 # the rawtext content that the delta will be based on, and two clients
845 # the rawtext content that the delta will be based on, and two clients
846 # could have a same revlog node with different flags (i.e. different
846 # could have a same revlog node with different flags (i.e. different
847 # rawtext contents) and the delta could be incompatible.
847 # rawtext contents) and the delta could be incompatible.
848 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
848 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
849 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
849 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
850 ):
850 ):
851 return False
851 return False
852 return True
852 return True
853
853
854 def update_caches(self, transaction):
854 def update_caches(self, transaction):
855 if self._nodemap_file is not None:
855 if self._nodemap_file is not None:
856 if transaction is None:
856 if transaction is None:
857 nodemaputil.update_persistent_nodemap(self)
857 nodemaputil.update_persistent_nodemap(self)
858 else:
858 else:
859 nodemaputil.setup_persistent_nodemap(transaction, self)
859 nodemaputil.setup_persistent_nodemap(transaction, self)
860
860
861 def clearcaches(self):
861 def clearcaches(self):
862 self._revisioncache = None
862 self._revisioncache = None
863 self._chainbasecache.clear()
863 self._chainbasecache.clear()
864 self._chunkcache = (0, b'')
864 self._chunkcache = (0, b'')
865 self._pcache = {}
865 self._pcache = {}
866 self._nodemap_docket = None
866 self._nodemap_docket = None
867 self.index.clearcaches()
867 self.index.clearcaches()
868 # The python code is the one responsible for validating the docket, we
868 # The python code is the one responsible for validating the docket, we
869 # end up having to refresh it here.
869 # end up having to refresh it here.
870 use_nodemap = (
870 use_nodemap = (
871 not self._inline
871 not self._inline
872 and self._nodemap_file is not None
872 and self._nodemap_file is not None
873 and util.safehasattr(self.index, 'update_nodemap_data')
873 and util.safehasattr(self.index, 'update_nodemap_data')
874 )
874 )
875 if use_nodemap:
875 if use_nodemap:
876 nodemap_data = nodemaputil.persisted_data(self)
876 nodemap_data = nodemaputil.persisted_data(self)
877 if nodemap_data is not None:
877 if nodemap_data is not None:
878 self._nodemap_docket = nodemap_data[0]
878 self._nodemap_docket = nodemap_data[0]
879 self.index.update_nodemap_data(*nodemap_data)
879 self.index.update_nodemap_data(*nodemap_data)
880
880
881 def rev(self, node):
881 def rev(self, node):
882 try:
882 try:
883 return self.index.rev(node)
883 return self.index.rev(node)
884 except TypeError:
884 except TypeError:
885 raise
885 raise
886 except error.RevlogError:
886 except error.RevlogError:
887 # parsers.c radix tree lookup failed
887 # parsers.c radix tree lookup failed
888 if (
888 if (
889 node == self.nodeconstants.wdirid
889 node == self.nodeconstants.wdirid
890 or node in self.nodeconstants.wdirfilenodeids
890 or node in self.nodeconstants.wdirfilenodeids
891 ):
891 ):
892 raise error.WdirUnsupported
892 raise error.WdirUnsupported
893 raise error.LookupError(node, self.display_id, _(b'no node'))
893 raise error.LookupError(node, self.display_id, _(b'no node'))
894
894
895 # Accessors for index entries.
895 # Accessors for index entries.
896
896
897 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
897 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
898 # are flags.
898 # are flags.
899 def start(self, rev):
899 def start(self, rev):
900 return int(self.index[rev][0] >> 16)
900 return int(self.index[rev][0] >> 16)
901
901
902 def flags(self, rev):
902 def flags(self, rev):
903 return self.index[rev][0] & 0xFFFF
903 return self.index[rev][0] & 0xFFFF
904
904
905 def length(self, rev):
905 def length(self, rev):
906 return self.index[rev][1]
906 return self.index[rev][1]
907
907
908 def sidedata_length(self, rev):
908 def sidedata_length(self, rev):
909 if not self.hassidedata:
909 if not self.hassidedata:
910 return 0
910 return 0
911 return self.index[rev][9]
911 return self.index[rev][9]
912
912
913 def rawsize(self, rev):
913 def rawsize(self, rev):
914 """return the length of the uncompressed text for a given revision"""
914 """return the length of the uncompressed text for a given revision"""
915 l = self.index[rev][2]
915 l = self.index[rev][2]
916 if l >= 0:
916 if l >= 0:
917 return l
917 return l
918
918
919 t = self.rawdata(rev)
919 t = self.rawdata(rev)
920 return len(t)
920 return len(t)
921
921
922 def size(self, rev):
922 def size(self, rev):
923 """length of non-raw text (processed by a "read" flag processor)"""
923 """length of non-raw text (processed by a "read" flag processor)"""
924 # fast path: if no "read" flag processor could change the content,
924 # fast path: if no "read" flag processor could change the content,
925 # size is rawsize. note: ELLIPSIS is known to not change the content.
925 # size is rawsize. note: ELLIPSIS is known to not change the content.
926 flags = self.flags(rev)
926 flags = self.flags(rev)
927 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
927 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
928 return self.rawsize(rev)
928 return self.rawsize(rev)
929
929
930 return len(self.revision(rev, raw=False))
930 return len(self.revision(rev, raw=False))
931
931
932 def chainbase(self, rev):
932 def chainbase(self, rev):
933 base = self._chainbasecache.get(rev)
933 base = self._chainbasecache.get(rev)
934 if base is not None:
934 if base is not None:
935 return base
935 return base
936
936
937 index = self.index
937 index = self.index
938 iterrev = rev
938 iterrev = rev
939 base = index[iterrev][3]
939 base = index[iterrev][3]
940 while base != iterrev:
940 while base != iterrev:
941 iterrev = base
941 iterrev = base
942 base = index[iterrev][3]
942 base = index[iterrev][3]
943
943
944 self._chainbasecache[rev] = base
944 self._chainbasecache[rev] = base
945 return base
945 return base
946
946
947 def linkrev(self, rev):
947 def linkrev(self, rev):
948 return self.index[rev][4]
948 return self.index[rev][4]
949
949
950 def parentrevs(self, rev):
950 def parentrevs(self, rev):
951 try:
951 try:
952 entry = self.index[rev]
952 entry = self.index[rev]
953 except IndexError:
953 except IndexError:
954 if rev == wdirrev:
954 if rev == wdirrev:
955 raise error.WdirUnsupported
955 raise error.WdirUnsupported
956 raise
956 raise
957 if entry[5] == nullrev:
957 if entry[5] == nullrev:
958 return entry[6], entry[5]
958 return entry[6], entry[5]
959 else:
959 else:
960 return entry[5], entry[6]
960 return entry[5], entry[6]
961
961
962 # fast parentrevs(rev) where rev isn't filtered
962 # fast parentrevs(rev) where rev isn't filtered
963 _uncheckedparentrevs = parentrevs
963 _uncheckedparentrevs = parentrevs
964
964
965 def node(self, rev):
965 def node(self, rev):
966 try:
966 try:
967 return self.index[rev][7]
967 return self.index[rev][7]
968 except IndexError:
968 except IndexError:
969 if rev == wdirrev:
969 if rev == wdirrev:
970 raise error.WdirUnsupported
970 raise error.WdirUnsupported
971 raise
971 raise
972
972
973 # Derived from index values.
973 # Derived from index values.
974
974
975 def end(self, rev):
975 def end(self, rev):
976 return self.start(rev) + self.length(rev)
976 return self.start(rev) + self.length(rev)
977
977
978 def parents(self, node):
978 def parents(self, node):
979 i = self.index
979 i = self.index
980 d = i[self.rev(node)]
980 d = i[self.rev(node)]
981 # inline node() to avoid function call overhead
981 # inline node() to avoid function call overhead
982 if d[5] == self.nullid:
982 if d[5] == self.nullid:
983 return i[d[6]][7], i[d[5]][7]
983 return i[d[6]][7], i[d[5]][7]
984 else:
984 else:
985 return i[d[5]][7], i[d[6]][7]
985 return i[d[5]][7], i[d[6]][7]
986
986
987 def chainlen(self, rev):
987 def chainlen(self, rev):
988 return self._chaininfo(rev)[0]
988 return self._chaininfo(rev)[0]
989
989
990 def _chaininfo(self, rev):
990 def _chaininfo(self, rev):
991 chaininfocache = self._chaininfocache
991 chaininfocache = self._chaininfocache
992 if rev in chaininfocache:
992 if rev in chaininfocache:
993 return chaininfocache[rev]
993 return chaininfocache[rev]
994 index = self.index
994 index = self.index
995 generaldelta = self._generaldelta
995 generaldelta = self._generaldelta
996 iterrev = rev
996 iterrev = rev
997 e = index[iterrev]
997 e = index[iterrev]
998 clen = 0
998 clen = 0
999 compresseddeltalen = 0
999 compresseddeltalen = 0
1000 while iterrev != e[3]:
1000 while iterrev != e[3]:
1001 clen += 1
1001 clen += 1
1002 compresseddeltalen += e[1]
1002 compresseddeltalen += e[1]
1003 if generaldelta:
1003 if generaldelta:
1004 iterrev = e[3]
1004 iterrev = e[3]
1005 else:
1005 else:
1006 iterrev -= 1
1006 iterrev -= 1
1007 if iterrev in chaininfocache:
1007 if iterrev in chaininfocache:
1008 t = chaininfocache[iterrev]
1008 t = chaininfocache[iterrev]
1009 clen += t[0]
1009 clen += t[0]
1010 compresseddeltalen += t[1]
1010 compresseddeltalen += t[1]
1011 break
1011 break
1012 e = index[iterrev]
1012 e = index[iterrev]
1013 else:
1013 else:
1014 # Add text length of base since decompressing that also takes
1014 # Add text length of base since decompressing that also takes
1015 # work. For cache hits the length is already included.
1015 # work. For cache hits the length is already included.
1016 compresseddeltalen += e[1]
1016 compresseddeltalen += e[1]
1017 r = (clen, compresseddeltalen)
1017 r = (clen, compresseddeltalen)
1018 chaininfocache[rev] = r
1018 chaininfocache[rev] = r
1019 return r
1019 return r
1020
1020
1021 def _deltachain(self, rev, stoprev=None):
1021 def _deltachain(self, rev, stoprev=None):
1022 """Obtain the delta chain for a revision.
1022 """Obtain the delta chain for a revision.
1023
1023
1024 ``stoprev`` specifies a revision to stop at. If not specified, we
1024 ``stoprev`` specifies a revision to stop at. If not specified, we
1025 stop at the base of the chain.
1025 stop at the base of the chain.
1026
1026
1027 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1027 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1028 revs in ascending order and ``stopped`` is a bool indicating whether
1028 revs in ascending order and ``stopped`` is a bool indicating whether
1029 ``stoprev`` was hit.
1029 ``stoprev`` was hit.
1030 """
1030 """
1031 # Try C implementation.
1031 # Try C implementation.
1032 try:
1032 try:
1033 return self.index.deltachain(rev, stoprev, self._generaldelta)
1033 return self.index.deltachain(rev, stoprev, self._generaldelta)
1034 except AttributeError:
1034 except AttributeError:
1035 pass
1035 pass
1036
1036
1037 chain = []
1037 chain = []
1038
1038
1039 # Alias to prevent attribute lookup in tight loop.
1039 # Alias to prevent attribute lookup in tight loop.
1040 index = self.index
1040 index = self.index
1041 generaldelta = self._generaldelta
1041 generaldelta = self._generaldelta
1042
1042
1043 iterrev = rev
1043 iterrev = rev
1044 e = index[iterrev]
1044 e = index[iterrev]
1045 while iterrev != e[3] and iterrev != stoprev:
1045 while iterrev != e[3] and iterrev != stoprev:
1046 chain.append(iterrev)
1046 chain.append(iterrev)
1047 if generaldelta:
1047 if generaldelta:
1048 iterrev = e[3]
1048 iterrev = e[3]
1049 else:
1049 else:
1050 iterrev -= 1
1050 iterrev -= 1
1051 e = index[iterrev]
1051 e = index[iterrev]
1052
1052
1053 if iterrev == stoprev:
1053 if iterrev == stoprev:
1054 stopped = True
1054 stopped = True
1055 else:
1055 else:
1056 chain.append(iterrev)
1056 chain.append(iterrev)
1057 stopped = False
1057 stopped = False
1058
1058
1059 chain.reverse()
1059 chain.reverse()
1060 return chain, stopped
1060 return chain, stopped
1061
1061
1062 def ancestors(self, revs, stoprev=0, inclusive=False):
1062 def ancestors(self, revs, stoprev=0, inclusive=False):
1063 """Generate the ancestors of 'revs' in reverse revision order.
1063 """Generate the ancestors of 'revs' in reverse revision order.
1064 Does not generate revs lower than stoprev.
1064 Does not generate revs lower than stoprev.
1065
1065
1066 See the documentation for ancestor.lazyancestors for more details."""
1066 See the documentation for ancestor.lazyancestors for more details."""
1067
1067
1068 # first, make sure start revisions aren't filtered
1068 # first, make sure start revisions aren't filtered
1069 revs = list(revs)
1069 revs = list(revs)
1070 checkrev = self.node
1070 checkrev = self.node
1071 for r in revs:
1071 for r in revs:
1072 checkrev(r)
1072 checkrev(r)
1073 # and we're sure ancestors aren't filtered as well
1073 # and we're sure ancestors aren't filtered as well
1074
1074
1075 if rustancestor is not None and self.index.rust_ext_compat:
1075 if rustancestor is not None and self.index.rust_ext_compat:
1076 lazyancestors = rustancestor.LazyAncestors
1076 lazyancestors = rustancestor.LazyAncestors
1077 arg = self.index
1077 arg = self.index
1078 else:
1078 else:
1079 lazyancestors = ancestor.lazyancestors
1079 lazyancestors = ancestor.lazyancestors
1080 arg = self._uncheckedparentrevs
1080 arg = self._uncheckedparentrevs
1081 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1081 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1082
1082
1083 def descendants(self, revs):
1083 def descendants(self, revs):
1084 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1084 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1085
1085
1086 def findcommonmissing(self, common=None, heads=None):
1086 def findcommonmissing(self, common=None, heads=None):
1087 """Return a tuple of the ancestors of common and the ancestors of heads
1087 """Return a tuple of the ancestors of common and the ancestors of heads
1088 that are not ancestors of common. In revset terminology, we return the
1088 that are not ancestors of common. In revset terminology, we return the
1089 tuple:
1089 tuple:
1090
1090
1091 ::common, (::heads) - (::common)
1091 ::common, (::heads) - (::common)
1092
1092
1093 The list is sorted by revision number, meaning it is
1093 The list is sorted by revision number, meaning it is
1094 topologically sorted.
1094 topologically sorted.
1095
1095
1096 'heads' and 'common' are both lists of node IDs. If heads is
1096 'heads' and 'common' are both lists of node IDs. If heads is
1097 not supplied, uses all of the revlog's heads. If common is not
1097 not supplied, uses all of the revlog's heads. If common is not
1098 supplied, uses nullid."""
1098 supplied, uses nullid."""
1099 if common is None:
1099 if common is None:
1100 common = [self.nullid]
1100 common = [self.nullid]
1101 if heads is None:
1101 if heads is None:
1102 heads = self.heads()
1102 heads = self.heads()
1103
1103
1104 common = [self.rev(n) for n in common]
1104 common = [self.rev(n) for n in common]
1105 heads = [self.rev(n) for n in heads]
1105 heads = [self.rev(n) for n in heads]
1106
1106
1107 # we want the ancestors, but inclusive
1107 # we want the ancestors, but inclusive
1108 class lazyset(object):
1108 class lazyset(object):
1109 def __init__(self, lazyvalues):
1109 def __init__(self, lazyvalues):
1110 self.addedvalues = set()
1110 self.addedvalues = set()
1111 self.lazyvalues = lazyvalues
1111 self.lazyvalues = lazyvalues
1112
1112
1113 def __contains__(self, value):
1113 def __contains__(self, value):
1114 return value in self.addedvalues or value in self.lazyvalues
1114 return value in self.addedvalues or value in self.lazyvalues
1115
1115
1116 def __iter__(self):
1116 def __iter__(self):
1117 added = self.addedvalues
1117 added = self.addedvalues
1118 for r in added:
1118 for r in added:
1119 yield r
1119 yield r
1120 for r in self.lazyvalues:
1120 for r in self.lazyvalues:
1121 if not r in added:
1121 if not r in added:
1122 yield r
1122 yield r
1123
1123
1124 def add(self, value):
1124 def add(self, value):
1125 self.addedvalues.add(value)
1125 self.addedvalues.add(value)
1126
1126
1127 def update(self, values):
1127 def update(self, values):
1128 self.addedvalues.update(values)
1128 self.addedvalues.update(values)
1129
1129
1130 has = lazyset(self.ancestors(common))
1130 has = lazyset(self.ancestors(common))
1131 has.add(nullrev)
1131 has.add(nullrev)
1132 has.update(common)
1132 has.update(common)
1133
1133
1134 # take all ancestors from heads that aren't in has
1134 # take all ancestors from heads that aren't in has
1135 missing = set()
1135 missing = set()
1136 visit = collections.deque(r for r in heads if r not in has)
1136 visit = collections.deque(r for r in heads if r not in has)
1137 while visit:
1137 while visit:
1138 r = visit.popleft()
1138 r = visit.popleft()
1139 if r in missing:
1139 if r in missing:
1140 continue
1140 continue
1141 else:
1141 else:
1142 missing.add(r)
1142 missing.add(r)
1143 for p in self.parentrevs(r):
1143 for p in self.parentrevs(r):
1144 if p not in has:
1144 if p not in has:
1145 visit.append(p)
1145 visit.append(p)
1146 missing = list(missing)
1146 missing = list(missing)
1147 missing.sort()
1147 missing.sort()
1148 return has, [self.node(miss) for miss in missing]
1148 return has, [self.node(miss) for miss in missing]
1149
1149
1150 def incrementalmissingrevs(self, common=None):
1150 def incrementalmissingrevs(self, common=None):
1151 """Return an object that can be used to incrementally compute the
1151 """Return an object that can be used to incrementally compute the
1152 revision numbers of the ancestors of arbitrary sets that are not
1152 revision numbers of the ancestors of arbitrary sets that are not
1153 ancestors of common. This is an ancestor.incrementalmissingancestors
1153 ancestors of common. This is an ancestor.incrementalmissingancestors
1154 object.
1154 object.
1155
1155
1156 'common' is a list of revision numbers. If common is not supplied, uses
1156 'common' is a list of revision numbers. If common is not supplied, uses
1157 nullrev.
1157 nullrev.
1158 """
1158 """
1159 if common is None:
1159 if common is None:
1160 common = [nullrev]
1160 common = [nullrev]
1161
1161
1162 if rustancestor is not None and self.index.rust_ext_compat:
1162 if rustancestor is not None and self.index.rust_ext_compat:
1163 return rustancestor.MissingAncestors(self.index, common)
1163 return rustancestor.MissingAncestors(self.index, common)
1164 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1164 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1165
1165
1166 def findmissingrevs(self, common=None, heads=None):
1166 def findmissingrevs(self, common=None, heads=None):
1167 """Return the revision numbers of the ancestors of heads that
1167 """Return the revision numbers of the ancestors of heads that
1168 are not ancestors of common.
1168 are not ancestors of common.
1169
1169
1170 More specifically, return a list of revision numbers corresponding to
1170 More specifically, return a list of revision numbers corresponding to
1171 nodes N such that every N satisfies the following constraints:
1171 nodes N such that every N satisfies the following constraints:
1172
1172
1173 1. N is an ancestor of some node in 'heads'
1173 1. N is an ancestor of some node in 'heads'
1174 2. N is not an ancestor of any node in 'common'
1174 2. N is not an ancestor of any node in 'common'
1175
1175
1176 The list is sorted by revision number, meaning it is
1176 The list is sorted by revision number, meaning it is
1177 topologically sorted.
1177 topologically sorted.
1178
1178
1179 'heads' and 'common' are both lists of revision numbers. If heads is
1179 'heads' and 'common' are both lists of revision numbers. If heads is
1180 not supplied, uses all of the revlog's heads. If common is not
1180 not supplied, uses all of the revlog's heads. If common is not
1181 supplied, uses nullid."""
1181 supplied, uses nullid."""
1182 if common is None:
1182 if common is None:
1183 common = [nullrev]
1183 common = [nullrev]
1184 if heads is None:
1184 if heads is None:
1185 heads = self.headrevs()
1185 heads = self.headrevs()
1186
1186
1187 inc = self.incrementalmissingrevs(common=common)
1187 inc = self.incrementalmissingrevs(common=common)
1188 return inc.missingancestors(heads)
1188 return inc.missingancestors(heads)
1189
1189
1190 def findmissing(self, common=None, heads=None):
1190 def findmissing(self, common=None, heads=None):
1191 """Return the ancestors of heads that are not ancestors of common.
1191 """Return the ancestors of heads that are not ancestors of common.
1192
1192
1193 More specifically, return a list of nodes N such that every N
1193 More specifically, return a list of nodes N such that every N
1194 satisfies the following constraints:
1194 satisfies the following constraints:
1195
1195
1196 1. N is an ancestor of some node in 'heads'
1196 1. N is an ancestor of some node in 'heads'
1197 2. N is not an ancestor of any node in 'common'
1197 2. N is not an ancestor of any node in 'common'
1198
1198
1199 The list is sorted by revision number, meaning it is
1199 The list is sorted by revision number, meaning it is
1200 topologically sorted.
1200 topologically sorted.
1201
1201
1202 'heads' and 'common' are both lists of node IDs. If heads is
1202 'heads' and 'common' are both lists of node IDs. If heads is
1203 not supplied, uses all of the revlog's heads. If common is not
1203 not supplied, uses all of the revlog's heads. If common is not
1204 supplied, uses nullid."""
1204 supplied, uses nullid."""
1205 if common is None:
1205 if common is None:
1206 common = [self.nullid]
1206 common = [self.nullid]
1207 if heads is None:
1207 if heads is None:
1208 heads = self.heads()
1208 heads = self.heads()
1209
1209
1210 common = [self.rev(n) for n in common]
1210 common = [self.rev(n) for n in common]
1211 heads = [self.rev(n) for n in heads]
1211 heads = [self.rev(n) for n in heads]
1212
1212
1213 inc = self.incrementalmissingrevs(common=common)
1213 inc = self.incrementalmissingrevs(common=common)
1214 return [self.node(r) for r in inc.missingancestors(heads)]
1214 return [self.node(r) for r in inc.missingancestors(heads)]
1215
1215
1216 def nodesbetween(self, roots=None, heads=None):
1216 def nodesbetween(self, roots=None, heads=None):
1217 """Return a topological path from 'roots' to 'heads'.
1217 """Return a topological path from 'roots' to 'heads'.
1218
1218
1219 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1219 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1220 topologically sorted list of all nodes N that satisfy both of
1220 topologically sorted list of all nodes N that satisfy both of
1221 these constraints:
1221 these constraints:
1222
1222
1223 1. N is a descendant of some node in 'roots'
1223 1. N is a descendant of some node in 'roots'
1224 2. N is an ancestor of some node in 'heads'
1224 2. N is an ancestor of some node in 'heads'
1225
1225
1226 Every node is considered to be both a descendant and an ancestor
1226 Every node is considered to be both a descendant and an ancestor
1227 of itself, so every reachable node in 'roots' and 'heads' will be
1227 of itself, so every reachable node in 'roots' and 'heads' will be
1228 included in 'nodes'.
1228 included in 'nodes'.
1229
1229
1230 'outroots' is the list of reachable nodes in 'roots', i.e., the
1230 'outroots' is the list of reachable nodes in 'roots', i.e., the
1231 subset of 'roots' that is returned in 'nodes'. Likewise,
1231 subset of 'roots' that is returned in 'nodes'. Likewise,
1232 'outheads' is the subset of 'heads' that is also in 'nodes'.
1232 'outheads' is the subset of 'heads' that is also in 'nodes'.
1233
1233
1234 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1234 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1235 unspecified, uses nullid as the only root. If 'heads' is
1235 unspecified, uses nullid as the only root. If 'heads' is
1236 unspecified, uses list of all of the revlog's heads."""
1236 unspecified, uses list of all of the revlog's heads."""
1237 nonodes = ([], [], [])
1237 nonodes = ([], [], [])
1238 if roots is not None:
1238 if roots is not None:
1239 roots = list(roots)
1239 roots = list(roots)
1240 if not roots:
1240 if not roots:
1241 return nonodes
1241 return nonodes
1242 lowestrev = min([self.rev(n) for n in roots])
1242 lowestrev = min([self.rev(n) for n in roots])
1243 else:
1243 else:
1244 roots = [self.nullid] # Everybody's a descendant of nullid
1244 roots = [self.nullid] # Everybody's a descendant of nullid
1245 lowestrev = nullrev
1245 lowestrev = nullrev
1246 if (lowestrev == nullrev) and (heads is None):
1246 if (lowestrev == nullrev) and (heads is None):
1247 # We want _all_ the nodes!
1247 # We want _all_ the nodes!
1248 return (
1248 return (
1249 [self.node(r) for r in self],
1249 [self.node(r) for r in self],
1250 [self.nullid],
1250 [self.nullid],
1251 list(self.heads()),
1251 list(self.heads()),
1252 )
1252 )
1253 if heads is None:
1253 if heads is None:
1254 # All nodes are ancestors, so the latest ancestor is the last
1254 # All nodes are ancestors, so the latest ancestor is the last
1255 # node.
1255 # node.
1256 highestrev = len(self) - 1
1256 highestrev = len(self) - 1
1257 # Set ancestors to None to signal that every node is an ancestor.
1257 # Set ancestors to None to signal that every node is an ancestor.
1258 ancestors = None
1258 ancestors = None
1259 # Set heads to an empty dictionary for later discovery of heads
1259 # Set heads to an empty dictionary for later discovery of heads
1260 heads = {}
1260 heads = {}
1261 else:
1261 else:
1262 heads = list(heads)
1262 heads = list(heads)
1263 if not heads:
1263 if not heads:
1264 return nonodes
1264 return nonodes
1265 ancestors = set()
1265 ancestors = set()
1266 # Turn heads into a dictionary so we can remove 'fake' heads.
1266 # Turn heads into a dictionary so we can remove 'fake' heads.
1267 # Also, later we will be using it to filter out the heads we can't
1267 # Also, later we will be using it to filter out the heads we can't
1268 # find from roots.
1268 # find from roots.
1269 heads = dict.fromkeys(heads, False)
1269 heads = dict.fromkeys(heads, False)
1270 # Start at the top and keep marking parents until we're done.
1270 # Start at the top and keep marking parents until we're done.
1271 nodestotag = set(heads)
1271 nodestotag = set(heads)
1272 # Remember where the top was so we can use it as a limit later.
1272 # Remember where the top was so we can use it as a limit later.
1273 highestrev = max([self.rev(n) for n in nodestotag])
1273 highestrev = max([self.rev(n) for n in nodestotag])
1274 while nodestotag:
1274 while nodestotag:
1275 # grab a node to tag
1275 # grab a node to tag
1276 n = nodestotag.pop()
1276 n = nodestotag.pop()
1277 # Never tag nullid
1277 # Never tag nullid
1278 if n == self.nullid:
1278 if n == self.nullid:
1279 continue
1279 continue
1280 # A node's revision number represents its place in a
1280 # A node's revision number represents its place in a
1281 # topologically sorted list of nodes.
1281 # topologically sorted list of nodes.
1282 r = self.rev(n)
1282 r = self.rev(n)
1283 if r >= lowestrev:
1283 if r >= lowestrev:
1284 if n not in ancestors:
1284 if n not in ancestors:
1285 # If we are possibly a descendant of one of the roots
1285 # If we are possibly a descendant of one of the roots
1286 # and we haven't already been marked as an ancestor
1286 # and we haven't already been marked as an ancestor
1287 ancestors.add(n) # Mark as ancestor
1287 ancestors.add(n) # Mark as ancestor
1288 # Add non-nullid parents to list of nodes to tag.
1288 # Add non-nullid parents to list of nodes to tag.
1289 nodestotag.update(
1289 nodestotag.update(
1290 [p for p in self.parents(n) if p != self.nullid]
1290 [p for p in self.parents(n) if p != self.nullid]
1291 )
1291 )
1292 elif n in heads: # We've seen it before, is it a fake head?
1292 elif n in heads: # We've seen it before, is it a fake head?
1293 # So it is, real heads should not be the ancestors of
1293 # So it is, real heads should not be the ancestors of
1294 # any other heads.
1294 # any other heads.
1295 heads.pop(n)
1295 heads.pop(n)
1296 if not ancestors:
1296 if not ancestors:
1297 return nonodes
1297 return nonodes
1298 # Now that we have our set of ancestors, we want to remove any
1298 # Now that we have our set of ancestors, we want to remove any
1299 # roots that are not ancestors.
1299 # roots that are not ancestors.
1300
1300
1301 # If one of the roots was nullid, everything is included anyway.
1301 # If one of the roots was nullid, everything is included anyway.
1302 if lowestrev > nullrev:
1302 if lowestrev > nullrev:
1303 # But, since we weren't, let's recompute the lowest rev to not
1303 # But, since we weren't, let's recompute the lowest rev to not
1304 # include roots that aren't ancestors.
1304 # include roots that aren't ancestors.
1305
1305
1306 # Filter out roots that aren't ancestors of heads
1306 # Filter out roots that aren't ancestors of heads
1307 roots = [root for root in roots if root in ancestors]
1307 roots = [root for root in roots if root in ancestors]
1308 # Recompute the lowest revision
1308 # Recompute the lowest revision
1309 if roots:
1309 if roots:
1310 lowestrev = min([self.rev(root) for root in roots])
1310 lowestrev = min([self.rev(root) for root in roots])
1311 else:
1311 else:
1312 # No more roots? Return empty list
1312 # No more roots? Return empty list
1313 return nonodes
1313 return nonodes
1314 else:
1314 else:
1315 # We are descending from nullid, and don't need to care about
1315 # We are descending from nullid, and don't need to care about
1316 # any other roots.
1316 # any other roots.
1317 lowestrev = nullrev
1317 lowestrev = nullrev
1318 roots = [self.nullid]
1318 roots = [self.nullid]
1319 # Transform our roots list into a set.
1319 # Transform our roots list into a set.
1320 descendants = set(roots)
1320 descendants = set(roots)
1321 # Also, keep the original roots so we can filter out roots that aren't
1321 # Also, keep the original roots so we can filter out roots that aren't
1322 # 'real' roots (i.e. are descended from other roots).
1322 # 'real' roots (i.e. are descended from other roots).
1323 roots = descendants.copy()
1323 roots = descendants.copy()
1324 # Our topologically sorted list of output nodes.
1324 # Our topologically sorted list of output nodes.
1325 orderedout = []
1325 orderedout = []
1326 # Don't start at nullid since we don't want nullid in our output list,
1326 # Don't start at nullid since we don't want nullid in our output list,
1327 # and if nullid shows up in descendants, empty parents will look like
1327 # and if nullid shows up in descendants, empty parents will look like
1328 # they're descendants.
1328 # they're descendants.
1329 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1329 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1330 n = self.node(r)
1330 n = self.node(r)
1331 isdescendant = False
1331 isdescendant = False
1332 if lowestrev == nullrev: # Everybody is a descendant of nullid
1332 if lowestrev == nullrev: # Everybody is a descendant of nullid
1333 isdescendant = True
1333 isdescendant = True
1334 elif n in descendants:
1334 elif n in descendants:
1335 # n is already a descendant
1335 # n is already a descendant
1336 isdescendant = True
1336 isdescendant = True
1337 # This check only needs to be done here because all the roots
1337 # This check only needs to be done here because all the roots
1338 # will start being marked is descendants before the loop.
1338 # will start being marked is descendants before the loop.
1339 if n in roots:
1339 if n in roots:
1340 # If n was a root, check if it's a 'real' root.
1340 # If n was a root, check if it's a 'real' root.
1341 p = tuple(self.parents(n))
1341 p = tuple(self.parents(n))
1342 # If any of its parents are descendants, it's not a root.
1342 # If any of its parents are descendants, it's not a root.
1343 if (p[0] in descendants) or (p[1] in descendants):
1343 if (p[0] in descendants) or (p[1] in descendants):
1344 roots.remove(n)
1344 roots.remove(n)
1345 else:
1345 else:
1346 p = tuple(self.parents(n))
1346 p = tuple(self.parents(n))
1347 # A node is a descendant if either of its parents are
1347 # A node is a descendant if either of its parents are
1348 # descendants. (We seeded the dependents list with the roots
1348 # descendants. (We seeded the dependents list with the roots
1349 # up there, remember?)
1349 # up there, remember?)
1350 if (p[0] in descendants) or (p[1] in descendants):
1350 if (p[0] in descendants) or (p[1] in descendants):
1351 descendants.add(n)
1351 descendants.add(n)
1352 isdescendant = True
1352 isdescendant = True
1353 if isdescendant and ((ancestors is None) or (n in ancestors)):
1353 if isdescendant and ((ancestors is None) or (n in ancestors)):
1354 # Only include nodes that are both descendants and ancestors.
1354 # Only include nodes that are both descendants and ancestors.
1355 orderedout.append(n)
1355 orderedout.append(n)
1356 if (ancestors is not None) and (n in heads):
1356 if (ancestors is not None) and (n in heads):
1357 # We're trying to figure out which heads are reachable
1357 # We're trying to figure out which heads are reachable
1358 # from roots.
1358 # from roots.
1359 # Mark this head as having been reached
1359 # Mark this head as having been reached
1360 heads[n] = True
1360 heads[n] = True
1361 elif ancestors is None:
1361 elif ancestors is None:
1362 # Otherwise, we're trying to discover the heads.
1362 # Otherwise, we're trying to discover the heads.
1363 # Assume this is a head because if it isn't, the next step
1363 # Assume this is a head because if it isn't, the next step
1364 # will eventually remove it.
1364 # will eventually remove it.
1365 heads[n] = True
1365 heads[n] = True
1366 # But, obviously its parents aren't.
1366 # But, obviously its parents aren't.
1367 for p in self.parents(n):
1367 for p in self.parents(n):
1368 heads.pop(p, None)
1368 heads.pop(p, None)
1369 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1369 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1370 roots = list(roots)
1370 roots = list(roots)
1371 assert orderedout
1371 assert orderedout
1372 assert roots
1372 assert roots
1373 assert heads
1373 assert heads
1374 return (orderedout, roots, heads)
1374 return (orderedout, roots, heads)
1375
1375
1376 def headrevs(self, revs=None):
1376 def headrevs(self, revs=None):
1377 if revs is None:
1377 if revs is None:
1378 try:
1378 try:
1379 return self.index.headrevs()
1379 return self.index.headrevs()
1380 except AttributeError:
1380 except AttributeError:
1381 return self._headrevs()
1381 return self._headrevs()
1382 if rustdagop is not None and self.index.rust_ext_compat:
1382 if rustdagop is not None and self.index.rust_ext_compat:
1383 return rustdagop.headrevs(self.index, revs)
1383 return rustdagop.headrevs(self.index, revs)
1384 return dagop.headrevs(revs, self._uncheckedparentrevs)
1384 return dagop.headrevs(revs, self._uncheckedparentrevs)
1385
1385
1386 def computephases(self, roots):
1386 def computephases(self, roots):
1387 return self.index.computephasesmapsets(roots)
1387 return self.index.computephasesmapsets(roots)
1388
1388
1389 def _headrevs(self):
1389 def _headrevs(self):
1390 count = len(self)
1390 count = len(self)
1391 if not count:
1391 if not count:
1392 return [nullrev]
1392 return [nullrev]
1393 # we won't iter over filtered rev so nobody is a head at start
1393 # we won't iter over filtered rev so nobody is a head at start
1394 ishead = [0] * (count + 1)
1394 ishead = [0] * (count + 1)
1395 index = self.index
1395 index = self.index
1396 for r in self:
1396 for r in self:
1397 ishead[r] = 1 # I may be an head
1397 ishead[r] = 1 # I may be an head
1398 e = index[r]
1398 e = index[r]
1399 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1399 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1400 return [r for r, val in enumerate(ishead) if val]
1400 return [r for r, val in enumerate(ishead) if val]
1401
1401
1402 def heads(self, start=None, stop=None):
1402 def heads(self, start=None, stop=None):
1403 """return the list of all nodes that have no children
1403 """return the list of all nodes that have no children
1404
1404
1405 if start is specified, only heads that are descendants of
1405 if start is specified, only heads that are descendants of
1406 start will be returned
1406 start will be returned
1407 if stop is specified, it will consider all the revs from stop
1407 if stop is specified, it will consider all the revs from stop
1408 as if they had no children
1408 as if they had no children
1409 """
1409 """
1410 if start is None and stop is None:
1410 if start is None and stop is None:
1411 if not len(self):
1411 if not len(self):
1412 return [self.nullid]
1412 return [self.nullid]
1413 return [self.node(r) for r in self.headrevs()]
1413 return [self.node(r) for r in self.headrevs()]
1414
1414
1415 if start is None:
1415 if start is None:
1416 start = nullrev
1416 start = nullrev
1417 else:
1417 else:
1418 start = self.rev(start)
1418 start = self.rev(start)
1419
1419
1420 stoprevs = {self.rev(n) for n in stop or []}
1420 stoprevs = {self.rev(n) for n in stop or []}
1421
1421
1422 revs = dagop.headrevssubset(
1422 revs = dagop.headrevssubset(
1423 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1423 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1424 )
1424 )
1425
1425
1426 return [self.node(rev) for rev in revs]
1426 return [self.node(rev) for rev in revs]
1427
1427
1428 def children(self, node):
1428 def children(self, node):
1429 """find the children of a given node"""
1429 """find the children of a given node"""
1430 c = []
1430 c = []
1431 p = self.rev(node)
1431 p = self.rev(node)
1432 for r in self.revs(start=p + 1):
1432 for r in self.revs(start=p + 1):
1433 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1433 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1434 if prevs:
1434 if prevs:
1435 for pr in prevs:
1435 for pr in prevs:
1436 if pr == p:
1436 if pr == p:
1437 c.append(self.node(r))
1437 c.append(self.node(r))
1438 elif p == nullrev:
1438 elif p == nullrev:
1439 c.append(self.node(r))
1439 c.append(self.node(r))
1440 return c
1440 return c
1441
1441
1442 def commonancestorsheads(self, a, b):
1442 def commonancestorsheads(self, a, b):
1443 """calculate all the heads of the common ancestors of nodes a and b"""
1443 """calculate all the heads of the common ancestors of nodes a and b"""
1444 a, b = self.rev(a), self.rev(b)
1444 a, b = self.rev(a), self.rev(b)
1445 ancs = self._commonancestorsheads(a, b)
1445 ancs = self._commonancestorsheads(a, b)
1446 return pycompat.maplist(self.node, ancs)
1446 return pycompat.maplist(self.node, ancs)
1447
1447
1448 def _commonancestorsheads(self, *revs):
1448 def _commonancestorsheads(self, *revs):
1449 """calculate all the heads of the common ancestors of revs"""
1449 """calculate all the heads of the common ancestors of revs"""
1450 try:
1450 try:
1451 ancs = self.index.commonancestorsheads(*revs)
1451 ancs = self.index.commonancestorsheads(*revs)
1452 except (AttributeError, OverflowError): # C implementation failed
1452 except (AttributeError, OverflowError): # C implementation failed
1453 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1453 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1454 return ancs
1454 return ancs
1455
1455
1456 def isancestor(self, a, b):
1456 def isancestor(self, a, b):
1457 """return True if node a is an ancestor of node b
1457 """return True if node a is an ancestor of node b
1458
1458
1459 A revision is considered an ancestor of itself."""
1459 A revision is considered an ancestor of itself."""
1460 a, b = self.rev(a), self.rev(b)
1460 a, b = self.rev(a), self.rev(b)
1461 return self.isancestorrev(a, b)
1461 return self.isancestorrev(a, b)
1462
1462
1463 def isancestorrev(self, a, b):
1463 def isancestorrev(self, a, b):
1464 """return True if revision a is an ancestor of revision b
1464 """return True if revision a is an ancestor of revision b
1465
1465
1466 A revision is considered an ancestor of itself.
1466 A revision is considered an ancestor of itself.
1467
1467
1468 The implementation of this is trivial but the use of
1468 The implementation of this is trivial but the use of
1469 reachableroots is not."""
1469 reachableroots is not."""
1470 if a == nullrev:
1470 if a == nullrev:
1471 return True
1471 return True
1472 elif a == b:
1472 elif a == b:
1473 return True
1473 return True
1474 elif a > b:
1474 elif a > b:
1475 return False
1475 return False
1476 return bool(self.reachableroots(a, [b], [a], includepath=False))
1476 return bool(self.reachableroots(a, [b], [a], includepath=False))
1477
1477
1478 def reachableroots(self, minroot, heads, roots, includepath=False):
1478 def reachableroots(self, minroot, heads, roots, includepath=False):
1479 """return (heads(::(<roots> and <roots>::<heads>)))
1479 """return (heads(::(<roots> and <roots>::<heads>)))
1480
1480
1481 If includepath is True, return (<roots>::<heads>)."""
1481 If includepath is True, return (<roots>::<heads>)."""
1482 try:
1482 try:
1483 return self.index.reachableroots2(
1483 return self.index.reachableroots2(
1484 minroot, heads, roots, includepath
1484 minroot, heads, roots, includepath
1485 )
1485 )
1486 except AttributeError:
1486 except AttributeError:
1487 return dagop._reachablerootspure(
1487 return dagop._reachablerootspure(
1488 self.parentrevs, minroot, roots, heads, includepath
1488 self.parentrevs, minroot, roots, heads, includepath
1489 )
1489 )
1490
1490
1491 def ancestor(self, a, b):
1491 def ancestor(self, a, b):
1492 """calculate the "best" common ancestor of nodes a and b"""
1492 """calculate the "best" common ancestor of nodes a and b"""
1493
1493
1494 a, b = self.rev(a), self.rev(b)
1494 a, b = self.rev(a), self.rev(b)
1495 try:
1495 try:
1496 ancs = self.index.ancestors(a, b)
1496 ancs = self.index.ancestors(a, b)
1497 except (AttributeError, OverflowError):
1497 except (AttributeError, OverflowError):
1498 ancs = ancestor.ancestors(self.parentrevs, a, b)
1498 ancs = ancestor.ancestors(self.parentrevs, a, b)
1499 if ancs:
1499 if ancs:
1500 # choose a consistent winner when there's a tie
1500 # choose a consistent winner when there's a tie
1501 return min(map(self.node, ancs))
1501 return min(map(self.node, ancs))
1502 return self.nullid
1502 return self.nullid
1503
1503
1504 def _match(self, id):
1504 def _match(self, id):
1505 if isinstance(id, int):
1505 if isinstance(id, int):
1506 # rev
1506 # rev
1507 return self.node(id)
1507 return self.node(id)
1508 if len(id) == self.nodeconstants.nodelen:
1508 if len(id) == self.nodeconstants.nodelen:
1509 # possibly a binary node
1509 # possibly a binary node
1510 # odds of a binary node being all hex in ASCII are 1 in 10**25
1510 # odds of a binary node being all hex in ASCII are 1 in 10**25
1511 try:
1511 try:
1512 node = id
1512 node = id
1513 self.rev(node) # quick search the index
1513 self.rev(node) # quick search the index
1514 return node
1514 return node
1515 except error.LookupError:
1515 except error.LookupError:
1516 pass # may be partial hex id
1516 pass # may be partial hex id
1517 try:
1517 try:
1518 # str(rev)
1518 # str(rev)
1519 rev = int(id)
1519 rev = int(id)
1520 if b"%d" % rev != id:
1520 if b"%d" % rev != id:
1521 raise ValueError
1521 raise ValueError
1522 if rev < 0:
1522 if rev < 0:
1523 rev = len(self) + rev
1523 rev = len(self) + rev
1524 if rev < 0 or rev >= len(self):
1524 if rev < 0 or rev >= len(self):
1525 raise ValueError
1525 raise ValueError
1526 return self.node(rev)
1526 return self.node(rev)
1527 except (ValueError, OverflowError):
1527 except (ValueError, OverflowError):
1528 pass
1528 pass
1529 if len(id) == 2 * self.nodeconstants.nodelen:
1529 if len(id) == 2 * self.nodeconstants.nodelen:
1530 try:
1530 try:
1531 # a full hex nodeid?
1531 # a full hex nodeid?
1532 node = bin(id)
1532 node = bin(id)
1533 self.rev(node)
1533 self.rev(node)
1534 return node
1534 return node
1535 except (TypeError, error.LookupError):
1535 except (TypeError, error.LookupError):
1536 pass
1536 pass
1537
1537
1538 def _partialmatch(self, id):
1538 def _partialmatch(self, id):
1539 # we don't care wdirfilenodeids as they should be always full hash
1539 # we don't care wdirfilenodeids as they should be always full hash
1540 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1540 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1541 try:
1541 try:
1542 partial = self.index.partialmatch(id)
1542 partial = self.index.partialmatch(id)
1543 if partial and self.hasnode(partial):
1543 if partial and self.hasnode(partial):
1544 if maybewdir:
1544 if maybewdir:
1545 # single 'ff...' match in radix tree, ambiguous with wdir
1545 # single 'ff...' match in radix tree, ambiguous with wdir
1546 raise error.RevlogError
1546 raise error.RevlogError
1547 return partial
1547 return partial
1548 if maybewdir:
1548 if maybewdir:
1549 # no 'ff...' match in radix tree, wdir identified
1549 # no 'ff...' match in radix tree, wdir identified
1550 raise error.WdirUnsupported
1550 raise error.WdirUnsupported
1551 return None
1551 return None
1552 except error.RevlogError:
1552 except error.RevlogError:
1553 # parsers.c radix tree lookup gave multiple matches
1553 # parsers.c radix tree lookup gave multiple matches
1554 # fast path: for unfiltered changelog, radix tree is accurate
1554 # fast path: for unfiltered changelog, radix tree is accurate
1555 if not getattr(self, 'filteredrevs', None):
1555 if not getattr(self, 'filteredrevs', None):
1556 raise error.AmbiguousPrefixLookupError(
1556 raise error.AmbiguousPrefixLookupError(
1557 id, self.display_id, _(b'ambiguous identifier')
1557 id, self.display_id, _(b'ambiguous identifier')
1558 )
1558 )
1559 # fall through to slow path that filters hidden revisions
1559 # fall through to slow path that filters hidden revisions
1560 except (AttributeError, ValueError):
1560 except (AttributeError, ValueError):
1561 # we are pure python, or key was too short to search radix tree
1561 # we are pure python, or key was too short to search radix tree
1562 pass
1562 pass
1563
1563
1564 if id in self._pcache:
1564 if id in self._pcache:
1565 return self._pcache[id]
1565 return self._pcache[id]
1566
1566
1567 if len(id) <= 40:
1567 if len(id) <= 40:
1568 try:
1568 try:
1569 # hex(node)[:...]
1569 # hex(node)[:...]
1570 l = len(id) // 2 # grab an even number of digits
1570 l = len(id) // 2 # grab an even number of digits
1571 prefix = bin(id[: l * 2])
1571 prefix = bin(id[: l * 2])
1572 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1572 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1573 nl = [
1573 nl = [
1574 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1574 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1575 ]
1575 ]
1576 if self.nodeconstants.nullhex.startswith(id):
1576 if self.nodeconstants.nullhex.startswith(id):
1577 nl.append(self.nullid)
1577 nl.append(self.nullid)
1578 if len(nl) > 0:
1578 if len(nl) > 0:
1579 if len(nl) == 1 and not maybewdir:
1579 if len(nl) == 1 and not maybewdir:
1580 self._pcache[id] = nl[0]
1580 self._pcache[id] = nl[0]
1581 return nl[0]
1581 return nl[0]
1582 raise error.AmbiguousPrefixLookupError(
1582 raise error.AmbiguousPrefixLookupError(
1583 id, self.display_id, _(b'ambiguous identifier')
1583 id, self.display_id, _(b'ambiguous identifier')
1584 )
1584 )
1585 if maybewdir:
1585 if maybewdir:
1586 raise error.WdirUnsupported
1586 raise error.WdirUnsupported
1587 return None
1587 return None
1588 except TypeError:
1588 except TypeError:
1589 pass
1589 pass
1590
1590
1591 def lookup(self, id):
1591 def lookup(self, id):
1592 """locate a node based on:
1592 """locate a node based on:
1593 - revision number or str(revision number)
1593 - revision number or str(revision number)
1594 - nodeid or subset of hex nodeid
1594 - nodeid or subset of hex nodeid
1595 """
1595 """
1596 n = self._match(id)
1596 n = self._match(id)
1597 if n is not None:
1597 if n is not None:
1598 return n
1598 return n
1599 n = self._partialmatch(id)
1599 n = self._partialmatch(id)
1600 if n:
1600 if n:
1601 return n
1601 return n
1602
1602
1603 raise error.LookupError(id, self.display_id, _(b'no match found'))
1603 raise error.LookupError(id, self.display_id, _(b'no match found'))
1604
1604
1605 def shortest(self, node, minlength=1):
1605 def shortest(self, node, minlength=1):
1606 """Find the shortest unambiguous prefix that matches node."""
1606 """Find the shortest unambiguous prefix that matches node."""
1607
1607
1608 def isvalid(prefix):
1608 def isvalid(prefix):
1609 try:
1609 try:
1610 matchednode = self._partialmatch(prefix)
1610 matchednode = self._partialmatch(prefix)
1611 except error.AmbiguousPrefixLookupError:
1611 except error.AmbiguousPrefixLookupError:
1612 return False
1612 return False
1613 except error.WdirUnsupported:
1613 except error.WdirUnsupported:
1614 # single 'ff...' match
1614 # single 'ff...' match
1615 return True
1615 return True
1616 if matchednode is None:
1616 if matchednode is None:
1617 raise error.LookupError(node, self.display_id, _(b'no node'))
1617 raise error.LookupError(node, self.display_id, _(b'no node'))
1618 return True
1618 return True
1619
1619
1620 def maybewdir(prefix):
1620 def maybewdir(prefix):
1621 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1621 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1622
1622
1623 hexnode = hex(node)
1623 hexnode = hex(node)
1624
1624
1625 def disambiguate(hexnode, minlength):
1625 def disambiguate(hexnode, minlength):
1626 """Disambiguate against wdirid."""
1626 """Disambiguate against wdirid."""
1627 for length in range(minlength, len(hexnode) + 1):
1627 for length in range(minlength, len(hexnode) + 1):
1628 prefix = hexnode[:length]
1628 prefix = hexnode[:length]
1629 if not maybewdir(prefix):
1629 if not maybewdir(prefix):
1630 return prefix
1630 return prefix
1631
1631
1632 if not getattr(self, 'filteredrevs', None):
1632 if not getattr(self, 'filteredrevs', None):
1633 try:
1633 try:
1634 length = max(self.index.shortest(node), minlength)
1634 length = max(self.index.shortest(node), minlength)
1635 return disambiguate(hexnode, length)
1635 return disambiguate(hexnode, length)
1636 except error.RevlogError:
1636 except error.RevlogError:
1637 if node != self.nodeconstants.wdirid:
1637 if node != self.nodeconstants.wdirid:
1638 raise error.LookupError(
1638 raise error.LookupError(
1639 node, self.display_id, _(b'no node')
1639 node, self.display_id, _(b'no node')
1640 )
1640 )
1641 except AttributeError:
1641 except AttributeError:
1642 # Fall through to pure code
1642 # Fall through to pure code
1643 pass
1643 pass
1644
1644
1645 if node == self.nodeconstants.wdirid:
1645 if node == self.nodeconstants.wdirid:
1646 for length in range(minlength, len(hexnode) + 1):
1646 for length in range(minlength, len(hexnode) + 1):
1647 prefix = hexnode[:length]
1647 prefix = hexnode[:length]
1648 if isvalid(prefix):
1648 if isvalid(prefix):
1649 return prefix
1649 return prefix
1650
1650
1651 for length in range(minlength, len(hexnode) + 1):
1651 for length in range(minlength, len(hexnode) + 1):
1652 prefix = hexnode[:length]
1652 prefix = hexnode[:length]
1653 if isvalid(prefix):
1653 if isvalid(prefix):
1654 return disambiguate(hexnode, length)
1654 return disambiguate(hexnode, length)
1655
1655
1656 def cmp(self, node, text):
1656 def cmp(self, node, text):
1657 """compare text with a given file revision
1657 """compare text with a given file revision
1658
1658
1659 returns True if text is different than what is stored.
1659 returns True if text is different than what is stored.
1660 """
1660 """
1661 p1, p2 = self.parents(node)
1661 p1, p2 = self.parents(node)
1662 return storageutil.hashrevisionsha1(text, p1, p2) != node
1662 return storageutil.hashrevisionsha1(text, p1, p2) != node
1663
1663
1664 def _cachesegment(self, offset, data):
1664 def _cachesegment(self, offset, data):
1665 """Add a segment to the revlog cache.
1665 """Add a segment to the revlog cache.
1666
1666
1667 Accepts an absolute offset and the data that is at that location.
1667 Accepts an absolute offset and the data that is at that location.
1668 """
1668 """
1669 o, d = self._chunkcache
1669 o, d = self._chunkcache
1670 # try to add to existing cache
1670 # try to add to existing cache
1671 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1671 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1672 self._chunkcache = o, d + data
1672 self._chunkcache = o, d + data
1673 else:
1673 else:
1674 self._chunkcache = offset, data
1674 self._chunkcache = offset, data
1675
1675
1676 def _readsegment(self, offset, length, df=None):
1676 def _readsegment(self, offset, length, df=None):
1677 """Load a segment of raw data from the revlog.
1677 """Load a segment of raw data from the revlog.
1678
1678
1679 Accepts an absolute offset, length to read, and an optional existing
1679 Accepts an absolute offset, length to read, and an optional existing
1680 file handle to read from.
1680 file handle to read from.
1681
1681
1682 If an existing file handle is passed, it will be seeked and the
1682 If an existing file handle is passed, it will be seeked and the
1683 original seek position will NOT be restored.
1683 original seek position will NOT be restored.
1684
1684
1685 Returns a str or buffer of raw byte data.
1685 Returns a str or buffer of raw byte data.
1686
1686
1687 Raises if the requested number of bytes could not be read.
1687 Raises if the requested number of bytes could not be read.
1688 """
1688 """
1689 # Cache data both forward and backward around the requested
1689 # Cache data both forward and backward around the requested
1690 # data, in a fixed size window. This helps speed up operations
1690 # data, in a fixed size window. This helps speed up operations
1691 # involving reading the revlog backwards.
1691 # involving reading the revlog backwards.
1692 cachesize = self._chunkcachesize
1692 cachesize = self._chunkcachesize
1693 realoffset = offset & ~(cachesize - 1)
1693 realoffset = offset & ~(cachesize - 1)
1694 reallength = (
1694 reallength = (
1695 (offset + length + cachesize) & ~(cachesize - 1)
1695 (offset + length + cachesize) & ~(cachesize - 1)
1696 ) - realoffset
1696 ) - realoffset
1697 with self._datareadfp(df) as df:
1697 with self._datareadfp(df) as df:
1698 df.seek(realoffset)
1698 df.seek(realoffset)
1699 d = df.read(reallength)
1699 d = df.read(reallength)
1700
1700
1701 self._cachesegment(realoffset, d)
1701 self._cachesegment(realoffset, d)
1702 if offset != realoffset or reallength != length:
1702 if offset != realoffset or reallength != length:
1703 startoffset = offset - realoffset
1703 startoffset = offset - realoffset
1704 if len(d) - startoffset < length:
1704 if len(d) - startoffset < length:
1705 raise error.RevlogError(
1705 raise error.RevlogError(
1706 _(
1706 _(
1707 b'partial read of revlog %s; expected %d bytes from '
1707 b'partial read of revlog %s; expected %d bytes from '
1708 b'offset %d, got %d'
1708 b'offset %d, got %d'
1709 )
1709 )
1710 % (
1710 % (
1711 self._indexfile if self._inline else self._datafile,
1711 self._indexfile if self._inline else self._datafile,
1712 length,
1712 length,
1713 offset,
1713 offset,
1714 len(d) - startoffset,
1714 len(d) - startoffset,
1715 )
1715 )
1716 )
1716 )
1717
1717
1718 return util.buffer(d, startoffset, length)
1718 return util.buffer(d, startoffset, length)
1719
1719
1720 if len(d) < length:
1720 if len(d) < length:
1721 raise error.RevlogError(
1721 raise error.RevlogError(
1722 _(
1722 _(
1723 b'partial read of revlog %s; expected %d bytes from offset '
1723 b'partial read of revlog %s; expected %d bytes from offset '
1724 b'%d, got %d'
1724 b'%d, got %d'
1725 )
1725 )
1726 % (
1726 % (
1727 self._indexfile if self._inline else self._datafile,
1727 self._indexfile if self._inline else self._datafile,
1728 length,
1728 length,
1729 offset,
1729 offset,
1730 len(d),
1730 len(d),
1731 )
1731 )
1732 )
1732 )
1733
1733
1734 return d
1734 return d
1735
1735
1736 def _getsegment(self, offset, length, df=None):
1736 def _getsegment(self, offset, length, df=None):
1737 """Obtain a segment of raw data from the revlog.
1737 """Obtain a segment of raw data from the revlog.
1738
1738
1739 Accepts an absolute offset, length of bytes to obtain, and an
1739 Accepts an absolute offset, length of bytes to obtain, and an
1740 optional file handle to the already-opened revlog. If the file
1740 optional file handle to the already-opened revlog. If the file
1741 handle is used, it's original seek position will not be preserved.
1741 handle is used, it's original seek position will not be preserved.
1742
1742
1743 Requests for data may be returned from a cache.
1743 Requests for data may be returned from a cache.
1744
1744
1745 Returns a str or a buffer instance of raw byte data.
1745 Returns a str or a buffer instance of raw byte data.
1746 """
1746 """
1747 o, d = self._chunkcache
1747 o, d = self._chunkcache
1748 l = len(d)
1748 l = len(d)
1749
1749
1750 # is it in the cache?
1750 # is it in the cache?
1751 cachestart = offset - o
1751 cachestart = offset - o
1752 cacheend = cachestart + length
1752 cacheend = cachestart + length
1753 if cachestart >= 0 and cacheend <= l:
1753 if cachestart >= 0 and cacheend <= l:
1754 if cachestart == 0 and cacheend == l:
1754 if cachestart == 0 and cacheend == l:
1755 return d # avoid a copy
1755 return d # avoid a copy
1756 return util.buffer(d, cachestart, cacheend - cachestart)
1756 return util.buffer(d, cachestart, cacheend - cachestart)
1757
1757
1758 return self._readsegment(offset, length, df=df)
1758 return self._readsegment(offset, length, df=df)
1759
1759
1760 def _getsegmentforrevs(self, startrev, endrev, df=None):
1760 def _getsegmentforrevs(self, startrev, endrev, df=None):
1761 """Obtain a segment of raw data corresponding to a range of revisions.
1761 """Obtain a segment of raw data corresponding to a range of revisions.
1762
1762
1763 Accepts the start and end revisions and an optional already-open
1763 Accepts the start and end revisions and an optional already-open
1764 file handle to be used for reading. If the file handle is read, its
1764 file handle to be used for reading. If the file handle is read, its
1765 seek position will not be preserved.
1765 seek position will not be preserved.
1766
1766
1767 Requests for data may be satisfied by a cache.
1767 Requests for data may be satisfied by a cache.
1768
1768
1769 Returns a 2-tuple of (offset, data) for the requested range of
1769 Returns a 2-tuple of (offset, data) for the requested range of
1770 revisions. Offset is the integer offset from the beginning of the
1770 revisions. Offset is the integer offset from the beginning of the
1771 revlog and data is a str or buffer of the raw byte data.
1771 revlog and data is a str or buffer of the raw byte data.
1772
1772
1773 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1773 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1774 to determine where each revision's data begins and ends.
1774 to determine where each revision's data begins and ends.
1775 """
1775 """
1776 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1776 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1777 # (functions are expensive).
1777 # (functions are expensive).
1778 index = self.index
1778 index = self.index
1779 istart = index[startrev]
1779 istart = index[startrev]
1780 start = int(istart[0] >> 16)
1780 start = int(istart[0] >> 16)
1781 if startrev == endrev:
1781 if startrev == endrev:
1782 end = start + istart[1]
1782 end = start + istart[1]
1783 else:
1783 else:
1784 iend = index[endrev]
1784 iend = index[endrev]
1785 end = int(iend[0] >> 16) + iend[1]
1785 end = int(iend[0] >> 16) + iend[1]
1786
1786
1787 if self._inline:
1787 if self._inline:
1788 start += (startrev + 1) * self.index.entry_size
1788 start += (startrev + 1) * self.index.entry_size
1789 end += (endrev + 1) * self.index.entry_size
1789 end += (endrev + 1) * self.index.entry_size
1790 length = end - start
1790 length = end - start
1791
1791
1792 return start, self._getsegment(start, length, df=df)
1792 return start, self._getsegment(start, length, df=df)
1793
1793
1794 def _chunk(self, rev, df=None):
1794 def _chunk(self, rev, df=None):
1795 """Obtain a single decompressed chunk for a revision.
1795 """Obtain a single decompressed chunk for a revision.
1796
1796
1797 Accepts an integer revision and an optional already-open file handle
1797 Accepts an integer revision and an optional already-open file handle
1798 to be used for reading. If used, the seek position of the file will not
1798 to be used for reading. If used, the seek position of the file will not
1799 be preserved.
1799 be preserved.
1800
1800
1801 Returns a str holding uncompressed data for the requested revision.
1801 Returns a str holding uncompressed data for the requested revision.
1802 """
1802 """
1803 compression_mode = self.index[rev][10]
1803 compression_mode = self.index[rev][10]
1804 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1804 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1805 if compression_mode == COMP_MODE_PLAIN:
1805 if compression_mode == COMP_MODE_PLAIN:
1806 return data
1806 return data
1807 elif compression_mode == COMP_MODE_DEFAULT:
1807 elif compression_mode == COMP_MODE_DEFAULT:
1808 return self._decompressor(data)
1808 return self._decompressor(data)
1809 elif compression_mode == COMP_MODE_INLINE:
1809 elif compression_mode == COMP_MODE_INLINE:
1810 return self.decompress(data)
1810 return self.decompress(data)
1811 else:
1811 else:
1812 msg = 'unknown compression mode %d'
1812 msg = 'unknown compression mode %d'
1813 msg %= compression_mode
1813 msg %= compression_mode
1814 raise error.RevlogError(msg)
1814 raise error.RevlogError(msg)
1815
1815
1816 def _chunks(self, revs, df=None, targetsize=None):
1816 def _chunks(self, revs, df=None, targetsize=None):
1817 """Obtain decompressed chunks for the specified revisions.
1817 """Obtain decompressed chunks for the specified revisions.
1818
1818
1819 Accepts an iterable of numeric revisions that are assumed to be in
1819 Accepts an iterable of numeric revisions that are assumed to be in
1820 ascending order. Also accepts an optional already-open file handle
1820 ascending order. Also accepts an optional already-open file handle
1821 to be used for reading. If used, the seek position of the file will
1821 to be used for reading. If used, the seek position of the file will
1822 not be preserved.
1822 not be preserved.
1823
1823
1824 This function is similar to calling ``self._chunk()`` multiple times,
1824 This function is similar to calling ``self._chunk()`` multiple times,
1825 but is faster.
1825 but is faster.
1826
1826
1827 Returns a list with decompressed data for each requested revision.
1827 Returns a list with decompressed data for each requested revision.
1828 """
1828 """
1829 if not revs:
1829 if not revs:
1830 return []
1830 return []
1831 start = self.start
1831 start = self.start
1832 length = self.length
1832 length = self.length
1833 inline = self._inline
1833 inline = self._inline
1834 iosize = self.index.entry_size
1834 iosize = self.index.entry_size
1835 buffer = util.buffer
1835 buffer = util.buffer
1836
1836
1837 l = []
1837 l = []
1838 ladd = l.append
1838 ladd = l.append
1839
1839
1840 if not self._withsparseread:
1840 if not self._withsparseread:
1841 slicedchunks = (revs,)
1841 slicedchunks = (revs,)
1842 else:
1842 else:
1843 slicedchunks = deltautil.slicechunk(
1843 slicedchunks = deltautil.slicechunk(
1844 self, revs, targetsize=targetsize
1844 self, revs, targetsize=targetsize
1845 )
1845 )
1846
1846
1847 for revschunk in slicedchunks:
1847 for revschunk in slicedchunks:
1848 firstrev = revschunk[0]
1848 firstrev = revschunk[0]
1849 # Skip trailing revisions with empty diff
1849 # Skip trailing revisions with empty diff
1850 for lastrev in revschunk[::-1]:
1850 for lastrev in revschunk[::-1]:
1851 if length(lastrev) != 0:
1851 if length(lastrev) != 0:
1852 break
1852 break
1853
1853
1854 try:
1854 try:
1855 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1855 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1856 except OverflowError:
1856 except OverflowError:
1857 # issue4215 - we can't cache a run of chunks greater than
1857 # issue4215 - we can't cache a run of chunks greater than
1858 # 2G on Windows
1858 # 2G on Windows
1859 return [self._chunk(rev, df=df) for rev in revschunk]
1859 return [self._chunk(rev, df=df) for rev in revschunk]
1860
1860
1861 decomp = self.decompress
1861 decomp = self.decompress
1862 # self._decompressor might be None, but will not be used in that case
1862 # self._decompressor might be None, but will not be used in that case
1863 def_decomp = self._decompressor
1863 def_decomp = self._decompressor
1864 for rev in revschunk:
1864 for rev in revschunk:
1865 chunkstart = start(rev)
1865 chunkstart = start(rev)
1866 if inline:
1866 if inline:
1867 chunkstart += (rev + 1) * iosize
1867 chunkstart += (rev + 1) * iosize
1868 chunklength = length(rev)
1868 chunklength = length(rev)
1869 comp_mode = self.index[rev][10]
1869 comp_mode = self.index[rev][10]
1870 c = buffer(data, chunkstart - offset, chunklength)
1870 c = buffer(data, chunkstart - offset, chunklength)
1871 if comp_mode == COMP_MODE_PLAIN:
1871 if comp_mode == COMP_MODE_PLAIN:
1872 ladd(c)
1872 ladd(c)
1873 elif comp_mode == COMP_MODE_INLINE:
1873 elif comp_mode == COMP_MODE_INLINE:
1874 ladd(decomp(c))
1874 ladd(decomp(c))
1875 elif comp_mode == COMP_MODE_DEFAULT:
1875 elif comp_mode == COMP_MODE_DEFAULT:
1876 ladd(def_decomp(c))
1876 ladd(def_decomp(c))
1877 else:
1877 else:
1878 msg = 'unknown compression mode %d'
1878 msg = 'unknown compression mode %d'
1879 msg %= comp_mode
1879 msg %= comp_mode
1880 raise error.RevlogError(msg)
1880 raise error.RevlogError(msg)
1881
1881
1882 return l
1882 return l
1883
1883
1884 def _chunkclear(self):
1884 def _chunkclear(self):
1885 """Clear the raw chunk cache."""
1885 """Clear the raw chunk cache."""
1886 self._chunkcache = (0, b'')
1886 self._chunkcache = (0, b'')
1887
1887
1888 def deltaparent(self, rev):
1888 def deltaparent(self, rev):
1889 """return deltaparent of the given revision"""
1889 """return deltaparent of the given revision"""
1890 base = self.index[rev][3]
1890 base = self.index[rev][3]
1891 if base == rev:
1891 if base == rev:
1892 return nullrev
1892 return nullrev
1893 elif self._generaldelta:
1893 elif self._generaldelta:
1894 return base
1894 return base
1895 else:
1895 else:
1896 return rev - 1
1896 return rev - 1
1897
1897
1898 def issnapshot(self, rev):
1898 def issnapshot(self, rev):
1899 """tells whether rev is a snapshot"""
1899 """tells whether rev is a snapshot"""
1900 if not self._sparserevlog:
1900 if not self._sparserevlog:
1901 return self.deltaparent(rev) == nullrev
1901 return self.deltaparent(rev) == nullrev
1902 elif util.safehasattr(self.index, b'issnapshot'):
1902 elif util.safehasattr(self.index, b'issnapshot'):
1903 # directly assign the method to cache the testing and access
1903 # directly assign the method to cache the testing and access
1904 self.issnapshot = self.index.issnapshot
1904 self.issnapshot = self.index.issnapshot
1905 return self.issnapshot(rev)
1905 return self.issnapshot(rev)
1906 if rev == nullrev:
1906 if rev == nullrev:
1907 return True
1907 return True
1908 entry = self.index[rev]
1908 entry = self.index[rev]
1909 base = entry[3]
1909 base = entry[3]
1910 if base == rev:
1910 if base == rev:
1911 return True
1911 return True
1912 if base == nullrev:
1912 if base == nullrev:
1913 return True
1913 return True
1914 p1 = entry[5]
1914 p1 = entry[5]
1915 p2 = entry[6]
1915 p2 = entry[6]
1916 if base == p1 or base == p2:
1916 if base == p1 or base == p2:
1917 return False
1917 return False
1918 return self.issnapshot(base)
1918 return self.issnapshot(base)
1919
1919
1920 def snapshotdepth(self, rev):
1920 def snapshotdepth(self, rev):
1921 """number of snapshot in the chain before this one"""
1921 """number of snapshot in the chain before this one"""
1922 if not self.issnapshot(rev):
1922 if not self.issnapshot(rev):
1923 raise error.ProgrammingError(b'revision %d not a snapshot')
1923 raise error.ProgrammingError(b'revision %d not a snapshot')
1924 return len(self._deltachain(rev)[0]) - 1
1924 return len(self._deltachain(rev)[0]) - 1
1925
1925
1926 def revdiff(self, rev1, rev2):
1926 def revdiff(self, rev1, rev2):
1927 """return or calculate a delta between two revisions
1927 """return or calculate a delta between two revisions
1928
1928
1929 The delta calculated is in binary form and is intended to be written to
1929 The delta calculated is in binary form and is intended to be written to
1930 revlog data directly. So this function needs raw revision data.
1930 revlog data directly. So this function needs raw revision data.
1931 """
1931 """
1932 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1932 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1933 return bytes(self._chunk(rev2))
1933 return bytes(self._chunk(rev2))
1934
1934
1935 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1935 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1936
1936
1937 def _processflags(self, text, flags, operation, raw=False):
1937 def _processflags(self, text, flags, operation, raw=False):
1938 """deprecated entry point to access flag processors"""
1938 """deprecated entry point to access flag processors"""
1939 msg = b'_processflag(...) use the specialized variant'
1939 msg = b'_processflag(...) use the specialized variant'
1940 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1940 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1941 if raw:
1941 if raw:
1942 return text, flagutil.processflagsraw(self, text, flags)
1942 return text, flagutil.processflagsraw(self, text, flags)
1943 elif operation == b'read':
1943 elif operation == b'read':
1944 return flagutil.processflagsread(self, text, flags)
1944 return flagutil.processflagsread(self, text, flags)
1945 else: # write operation
1945 else: # write operation
1946 return flagutil.processflagswrite(self, text, flags)
1946 return flagutil.processflagswrite(self, text, flags)
1947
1947
1948 def revision(self, nodeorrev, _df=None, raw=False):
1948 def revision(self, nodeorrev, _df=None, raw=False):
1949 """return an uncompressed revision of a given node or revision
1949 """return an uncompressed revision of a given node or revision
1950 number.
1950 number.
1951
1951
1952 _df - an existing file handle to read from. (internal-only)
1952 _df - an existing file handle to read from. (internal-only)
1953 raw - an optional argument specifying if the revision data is to be
1953 raw - an optional argument specifying if the revision data is to be
1954 treated as raw data when applying flag transforms. 'raw' should be set
1954 treated as raw data when applying flag transforms. 'raw' should be set
1955 to True when generating changegroups or in debug commands.
1955 to True when generating changegroups or in debug commands.
1956 """
1956 """
1957 if raw:
1957 if raw:
1958 msg = (
1958 msg = (
1959 b'revlog.revision(..., raw=True) is deprecated, '
1959 b'revlog.revision(..., raw=True) is deprecated, '
1960 b'use revlog.rawdata(...)'
1960 b'use revlog.rawdata(...)'
1961 )
1961 )
1962 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1962 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1963 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1963 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1964
1964
1965 def sidedata(self, nodeorrev, _df=None):
1965 def sidedata(self, nodeorrev, _df=None):
1966 """a map of extra data related to the changeset but not part of the hash
1966 """a map of extra data related to the changeset but not part of the hash
1967
1967
1968 This function currently return a dictionary. However, more advanced
1968 This function currently return a dictionary. However, more advanced
1969 mapping object will likely be used in the future for a more
1969 mapping object will likely be used in the future for a more
1970 efficient/lazy code.
1970 efficient/lazy code.
1971 """
1971 """
1972 return self._revisiondata(nodeorrev, _df)[1]
1972 return self._revisiondata(nodeorrev, _df)[1]
1973
1973
1974 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1974 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1975 # deal with <nodeorrev> argument type
1975 # deal with <nodeorrev> argument type
1976 if isinstance(nodeorrev, int):
1976 if isinstance(nodeorrev, int):
1977 rev = nodeorrev
1977 rev = nodeorrev
1978 node = self.node(rev)
1978 node = self.node(rev)
1979 else:
1979 else:
1980 node = nodeorrev
1980 node = nodeorrev
1981 rev = None
1981 rev = None
1982
1982
1983 # fast path the special `nullid` rev
1983 # fast path the special `nullid` rev
1984 if node == self.nullid:
1984 if node == self.nullid:
1985 return b"", {}
1985 return b"", {}
1986
1986
1987 # ``rawtext`` is the text as stored inside the revlog. Might be the
1987 # ``rawtext`` is the text as stored inside the revlog. Might be the
1988 # revision or might need to be processed to retrieve the revision.
1988 # revision or might need to be processed to retrieve the revision.
1989 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1989 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1990
1990
1991 if self.hassidedata:
1991 if self.hassidedata:
1992 if rev is None:
1992 if rev is None:
1993 rev = self.rev(node)
1993 rev = self.rev(node)
1994 sidedata = self._sidedata(rev)
1994 sidedata = self._sidedata(rev)
1995 else:
1995 else:
1996 sidedata = {}
1996 sidedata = {}
1997
1997
1998 if raw and validated:
1998 if raw and validated:
1999 # if we don't want to process the raw text and that raw
1999 # if we don't want to process the raw text and that raw
2000 # text is cached, we can exit early.
2000 # text is cached, we can exit early.
2001 return rawtext, sidedata
2001 return rawtext, sidedata
2002 if rev is None:
2002 if rev is None:
2003 rev = self.rev(node)
2003 rev = self.rev(node)
2004 # the revlog's flag for this revision
2004 # the revlog's flag for this revision
2005 # (usually alter its state or content)
2005 # (usually alter its state or content)
2006 flags = self.flags(rev)
2006 flags = self.flags(rev)
2007
2007
2008 if validated and flags == REVIDX_DEFAULT_FLAGS:
2008 if validated and flags == REVIDX_DEFAULT_FLAGS:
2009 # no extra flags set, no flag processor runs, text = rawtext
2009 # no extra flags set, no flag processor runs, text = rawtext
2010 return rawtext, sidedata
2010 return rawtext, sidedata
2011
2011
2012 if raw:
2012 if raw:
2013 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2013 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2014 text = rawtext
2014 text = rawtext
2015 else:
2015 else:
2016 r = flagutil.processflagsread(self, rawtext, flags)
2016 r = flagutil.processflagsread(self, rawtext, flags)
2017 text, validatehash = r
2017 text, validatehash = r
2018 if validatehash:
2018 if validatehash:
2019 self.checkhash(text, node, rev=rev)
2019 self.checkhash(text, node, rev=rev)
2020 if not validated:
2020 if not validated:
2021 self._revisioncache = (node, rev, rawtext)
2021 self._revisioncache = (node, rev, rawtext)
2022
2022
2023 return text, sidedata
2023 return text, sidedata
2024
2024
2025 def _rawtext(self, node, rev, _df=None):
2025 def _rawtext(self, node, rev, _df=None):
2026 """return the possibly unvalidated rawtext for a revision
2026 """return the possibly unvalidated rawtext for a revision
2027
2027
2028 returns (rev, rawtext, validated)
2028 returns (rev, rawtext, validated)
2029 """
2029 """
2030
2030
2031 # revision in the cache (could be useful to apply delta)
2031 # revision in the cache (could be useful to apply delta)
2032 cachedrev = None
2032 cachedrev = None
2033 # An intermediate text to apply deltas to
2033 # An intermediate text to apply deltas to
2034 basetext = None
2034 basetext = None
2035
2035
2036 # Check if we have the entry in cache
2036 # Check if we have the entry in cache
2037 # The cache entry looks like (node, rev, rawtext)
2037 # The cache entry looks like (node, rev, rawtext)
2038 if self._revisioncache:
2038 if self._revisioncache:
2039 if self._revisioncache[0] == node:
2039 if self._revisioncache[0] == node:
2040 return (rev, self._revisioncache[2], True)
2040 return (rev, self._revisioncache[2], True)
2041 cachedrev = self._revisioncache[1]
2041 cachedrev = self._revisioncache[1]
2042
2042
2043 if rev is None:
2043 if rev is None:
2044 rev = self.rev(node)
2044 rev = self.rev(node)
2045
2045
2046 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2046 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2047 if stopped:
2047 if stopped:
2048 basetext = self._revisioncache[2]
2048 basetext = self._revisioncache[2]
2049
2049
2050 # drop cache to save memory, the caller is expected to
2050 # drop cache to save memory, the caller is expected to
2051 # update self._revisioncache after validating the text
2051 # update self._revisioncache after validating the text
2052 self._revisioncache = None
2052 self._revisioncache = None
2053
2053
2054 targetsize = None
2054 targetsize = None
2055 rawsize = self.index[rev][2]
2055 rawsize = self.index[rev][2]
2056 if 0 <= rawsize:
2056 if 0 <= rawsize:
2057 targetsize = 4 * rawsize
2057 targetsize = 4 * rawsize
2058
2058
2059 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2059 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2060 if basetext is None:
2060 if basetext is None:
2061 basetext = bytes(bins[0])
2061 basetext = bytes(bins[0])
2062 bins = bins[1:]
2062 bins = bins[1:]
2063
2063
2064 rawtext = mdiff.patches(basetext, bins)
2064 rawtext = mdiff.patches(basetext, bins)
2065 del basetext # let us have a chance to free memory early
2065 del basetext # let us have a chance to free memory early
2066 return (rev, rawtext, False)
2066 return (rev, rawtext, False)
2067
2067
2068 def _sidedata(self, rev):
2068 def _sidedata(self, rev):
2069 """Return the sidedata for a given revision number."""
2069 """Return the sidedata for a given revision number."""
2070 index_entry = self.index[rev]
2070 index_entry = self.index[rev]
2071 sidedata_offset = index_entry[8]
2071 sidedata_offset = index_entry[8]
2072 sidedata_size = index_entry[9]
2072 sidedata_size = index_entry[9]
2073
2073
2074 if self._inline:
2074 if self._inline:
2075 sidedata_offset += self.index.entry_size * (1 + rev)
2075 sidedata_offset += self.index.entry_size * (1 + rev)
2076 if sidedata_size == 0:
2076 if sidedata_size == 0:
2077 return {}
2077 return {}
2078
2078
2079 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2079 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2080 comp = self.index[rev][11]
2080 comp = self.index[rev][11]
2081 if comp == COMP_MODE_PLAIN:
2081 if comp == COMP_MODE_PLAIN:
2082 segment = comp_segment
2082 segment = comp_segment
2083 elif comp == COMP_MODE_DEFAULT:
2083 elif comp == COMP_MODE_DEFAULT:
2084 segment = self._decompressor(comp_segment)
2084 segment = self._decompressor(comp_segment)
2085 elif comp == COMP_MODE_INLINE:
2085 elif comp == COMP_MODE_INLINE:
2086 segment = self.decompress(comp_segment)
2086 segment = self.decompress(comp_segment)
2087 else:
2087 else:
2088 msg = 'unknown compression mode %d'
2088 msg = 'unknown compression mode %d'
2089 msg %= comp
2089 msg %= comp
2090 raise error.RevlogError(msg)
2090 raise error.RevlogError(msg)
2091
2091
2092 sidedata = sidedatautil.deserialize_sidedata(segment)
2092 sidedata = sidedatautil.deserialize_sidedata(segment)
2093 return sidedata
2093 return sidedata
2094
2094
2095 def rawdata(self, nodeorrev, _df=None):
2095 def rawdata(self, nodeorrev, _df=None):
2096 """return an uncompressed raw data of a given node or revision number.
2096 """return an uncompressed raw data of a given node or revision number.
2097
2097
2098 _df - an existing file handle to read from. (internal-only)
2098 _df - an existing file handle to read from. (internal-only)
2099 """
2099 """
2100 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2100 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2101
2101
2102 def hash(self, text, p1, p2):
2102 def hash(self, text, p1, p2):
2103 """Compute a node hash.
2103 """Compute a node hash.
2104
2104
2105 Available as a function so that subclasses can replace the hash
2105 Available as a function so that subclasses can replace the hash
2106 as needed.
2106 as needed.
2107 """
2107 """
2108 return storageutil.hashrevisionsha1(text, p1, p2)
2108 return storageutil.hashrevisionsha1(text, p1, p2)
2109
2109
2110 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2110 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2111 """Check node hash integrity.
2111 """Check node hash integrity.
2112
2112
2113 Available as a function so that subclasses can extend hash mismatch
2113 Available as a function so that subclasses can extend hash mismatch
2114 behaviors as needed.
2114 behaviors as needed.
2115 """
2115 """
2116 try:
2116 try:
2117 if p1 is None and p2 is None:
2117 if p1 is None and p2 is None:
2118 p1, p2 = self.parents(node)
2118 p1, p2 = self.parents(node)
2119 if node != self.hash(text, p1, p2):
2119 if node != self.hash(text, p1, p2):
2120 # Clear the revision cache on hash failure. The revision cache
2120 # Clear the revision cache on hash failure. The revision cache
2121 # only stores the raw revision and clearing the cache does have
2121 # only stores the raw revision and clearing the cache does have
2122 # the side-effect that we won't have a cache hit when the raw
2122 # the side-effect that we won't have a cache hit when the raw
2123 # revision data is accessed. But this case should be rare and
2123 # revision data is accessed. But this case should be rare and
2124 # it is extra work to teach the cache about the hash
2124 # it is extra work to teach the cache about the hash
2125 # verification state.
2125 # verification state.
2126 if self._revisioncache and self._revisioncache[0] == node:
2126 if self._revisioncache and self._revisioncache[0] == node:
2127 self._revisioncache = None
2127 self._revisioncache = None
2128
2128
2129 revornode = rev
2129 revornode = rev
2130 if revornode is None:
2130 if revornode is None:
2131 revornode = templatefilters.short(hex(node))
2131 revornode = templatefilters.short(hex(node))
2132 raise error.RevlogError(
2132 raise error.RevlogError(
2133 _(b"integrity check failed on %s:%s")
2133 _(b"integrity check failed on %s:%s")
2134 % (self.display_id, pycompat.bytestr(revornode))
2134 % (self.display_id, pycompat.bytestr(revornode))
2135 )
2135 )
2136 except error.RevlogError:
2136 except error.RevlogError:
2137 if self._censorable and storageutil.iscensoredtext(text):
2137 if self._censorable and storageutil.iscensoredtext(text):
2138 raise error.CensoredNodeError(self.display_id, node, text)
2138 raise error.CensoredNodeError(self.display_id, node, text)
2139 raise
2139 raise
2140
2140
2141 def _enforceinlinesize(self, tr):
2141 def _enforceinlinesize(self, tr):
2142 """Check if the revlog is too big for inline and convert if so.
2142 """Check if the revlog is too big for inline and convert if so.
2143
2143
2144 This should be called after revisions are added to the revlog. If the
2144 This should be called after revisions are added to the revlog. If the
2145 revlog has grown too large to be an inline revlog, it will convert it
2145 revlog has grown too large to be an inline revlog, it will convert it
2146 to use multiple index and data files.
2146 to use multiple index and data files.
2147 """
2147 """
2148 tiprev = len(self) - 1
2148 tiprev = len(self) - 1
2149 total_size = self.start(tiprev) + self.length(tiprev)
2149 total_size = self.start(tiprev) + self.length(tiprev)
2150 if not self._inline or total_size < _maxinline:
2150 if not self._inline or total_size < _maxinline:
2151 return
2151 return
2152
2152
2153 troffset = tr.findoffset(self._indexfile)
2153 troffset = tr.findoffset(self._indexfile)
2154 if troffset is None:
2154 if troffset is None:
2155 raise error.RevlogError(
2155 raise error.RevlogError(
2156 _(b"%s not found in the transaction") % self._indexfile
2156 _(b"%s not found in the transaction") % self._indexfile
2157 )
2157 )
2158 trindex = 0
2158 trindex = 0
2159 tr.add(self._datafile, 0)
2159 tr.add(self._datafile, 0)
2160
2160
2161 existing_handles = False
2161 existing_handles = False
2162 if self._writinghandles is not None:
2162 if self._writinghandles is not None:
2163 existing_handles = True
2163 existing_handles = True
2164 fp = self._writinghandles[0]
2164 fp = self._writinghandles[0]
2165 fp.flush()
2165 fp.flush()
2166 fp.close()
2166 fp.close()
2167 # We can't use the cached file handle after close(). So prevent
2167 # We can't use the cached file handle after close(). So prevent
2168 # its usage.
2168 # its usage.
2169 self._writinghandles = None
2169 self._writinghandles = None
2170
2170
2171 new_dfh = self._datafp(b'w+')
2171 new_dfh = self._datafp(b'w+')
2172 new_dfh.truncate(0) # drop any potentially existing data
2172 new_dfh.truncate(0) # drop any potentially existing data
2173 try:
2173 try:
2174 with self._indexfp() as read_ifh:
2174 with self._indexfp() as read_ifh:
2175 for r in self:
2175 for r in self:
2176 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2176 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2177 if troffset <= self.start(r) + r * self.index.entry_size:
2177 if troffset <= self.start(r) + r * self.index.entry_size:
2178 trindex = r
2178 trindex = r
2179 new_dfh.flush()
2179 new_dfh.flush()
2180
2180
2181 with self.__index_new_fp() as fp:
2181 with self.__index_new_fp() as fp:
2182 self._format_flags &= ~FLAG_INLINE_DATA
2182 self._format_flags &= ~FLAG_INLINE_DATA
2183 self._inline = False
2183 self._inline = False
2184 for i in self:
2184 for i in self:
2185 e = self.index.entry_binary(i)
2185 e = self.index.entry_binary(i)
2186 if i == 0 and self._docket is None:
2186 if i == 0 and self._docket is None:
2187 header = self._format_flags | self._format_version
2187 header = self._format_flags | self._format_version
2188 header = self.index.pack_header(header)
2188 header = self.index.pack_header(header)
2189 e = header + e
2189 e = header + e
2190 fp.write(e)
2190 fp.write(e)
2191 if self._docket is not None:
2191 if self._docket is not None:
2192 self._docket.index_end = fp.tell()
2192 self._docket.index_end = fp.tell()
2193
2194 # There is a small transactional race here. If the rename of
2195 # the index fails, we should remove the datafile. It is more
2196 # important to ensure that the data file is not truncated
2197 # when the index is replaced as otherwise data is lost.
2198 tr.replace(self._datafile, self.start(trindex))
2199
2193 # the temp file replace the real index when we exit the context
2200 # the temp file replace the real index when we exit the context
2194 # manager
2201 # manager
2195
2202
2196 tr.replace(self._indexfile, trindex * self.index.entry_size)
2203 tr.replace(self._indexfile, trindex * self.index.entry_size)
2197 nodemaputil.setup_persistent_nodemap(tr, self)
2204 nodemaputil.setup_persistent_nodemap(tr, self)
2198 self._chunkclear()
2205 self._chunkclear()
2199
2206
2200 if existing_handles:
2207 if existing_handles:
2201 # switched from inline to conventional reopen the index
2208 # switched from inline to conventional reopen the index
2202 ifh = self.__index_write_fp()
2209 ifh = self.__index_write_fp()
2203 self._writinghandles = (ifh, new_dfh)
2210 self._writinghandles = (ifh, new_dfh)
2204 new_dfh = None
2211 new_dfh = None
2205 finally:
2212 finally:
2206 if new_dfh is not None:
2213 if new_dfh is not None:
2207 new_dfh.close()
2214 new_dfh.close()
2208
2215
2209 def _nodeduplicatecallback(self, transaction, node):
2216 def _nodeduplicatecallback(self, transaction, node):
2210 """called when trying to add a node already stored."""
2217 """called when trying to add a node already stored."""
2211
2218
2212 @contextlib.contextmanager
2219 @contextlib.contextmanager
2213 def _writing(self, transaction):
2220 def _writing(self, transaction):
2214 if self._trypending:
2221 if self._trypending:
2215 msg = b'try to write in a `trypending` revlog: %s'
2222 msg = b'try to write in a `trypending` revlog: %s'
2216 msg %= self.display_id
2223 msg %= self.display_id
2217 raise error.ProgrammingError(msg)
2224 raise error.ProgrammingError(msg)
2218 if self._writinghandles is not None:
2225 if self._writinghandles is not None:
2219 yield
2226 yield
2220 else:
2227 else:
2221 r = len(self)
2228 r = len(self)
2222 dsize = 0
2229 dsize = 0
2223 if r:
2230 if r:
2224 dsize = self.end(r - 1)
2231 dsize = self.end(r - 1)
2225 dfh = None
2232 dfh = None
2226 if not self._inline:
2233 if not self._inline:
2227 try:
2234 try:
2228 dfh = self._datafp(b"r+")
2235 dfh = self._datafp(b"r+")
2229 if self._docket is None:
2236 if self._docket is None:
2230 dfh.seek(0, os.SEEK_END)
2237 dfh.seek(0, os.SEEK_END)
2231 else:
2238 else:
2232 dfh.seek(self._docket.data_end, os.SEEK_SET)
2239 dfh.seek(self._docket.data_end, os.SEEK_SET)
2233 except IOError as inst:
2240 except IOError as inst:
2234 if inst.errno != errno.ENOENT:
2241 if inst.errno != errno.ENOENT:
2235 raise
2242 raise
2236 dfh = self._datafp(b"w+")
2243 dfh = self._datafp(b"w+")
2237 transaction.add(self._datafile, dsize)
2244 transaction.add(self._datafile, dsize)
2238 try:
2245 try:
2239 isize = r * self.index.entry_size
2246 isize = r * self.index.entry_size
2240 ifh = self.__index_write_fp()
2247 ifh = self.__index_write_fp()
2241 if self._inline:
2248 if self._inline:
2242 transaction.add(self._indexfile, dsize + isize)
2249 transaction.add(self._indexfile, dsize + isize)
2243 else:
2250 else:
2244 transaction.add(self._indexfile, isize)
2251 transaction.add(self._indexfile, isize)
2245 try:
2252 try:
2246 self._writinghandles = (ifh, dfh)
2253 self._writinghandles = (ifh, dfh)
2247 try:
2254 try:
2248 yield
2255 yield
2249 if self._docket is not None:
2256 if self._docket is not None:
2250 self._write_docket(transaction)
2257 self._write_docket(transaction)
2251 finally:
2258 finally:
2252 self._writinghandles = None
2259 self._writinghandles = None
2253 finally:
2260 finally:
2254 ifh.close()
2261 ifh.close()
2255 finally:
2262 finally:
2256 if dfh is not None:
2263 if dfh is not None:
2257 dfh.close()
2264 dfh.close()
2258
2265
2259 def _write_docket(self, transaction):
2266 def _write_docket(self, transaction):
2260 """write the current docket on disk
2267 """write the current docket on disk
2261
2268
2262 Exist as a method to help changelog to implement transaction logic
2269 Exist as a method to help changelog to implement transaction logic
2263
2270
2264 We could also imagine using the same transaction logic for all revlog
2271 We could also imagine using the same transaction logic for all revlog
2265 since docket are cheap."""
2272 since docket are cheap."""
2266 self._docket.write(transaction)
2273 self._docket.write(transaction)
2267
2274
2268 def addrevision(
2275 def addrevision(
2269 self,
2276 self,
2270 text,
2277 text,
2271 transaction,
2278 transaction,
2272 link,
2279 link,
2273 p1,
2280 p1,
2274 p2,
2281 p2,
2275 cachedelta=None,
2282 cachedelta=None,
2276 node=None,
2283 node=None,
2277 flags=REVIDX_DEFAULT_FLAGS,
2284 flags=REVIDX_DEFAULT_FLAGS,
2278 deltacomputer=None,
2285 deltacomputer=None,
2279 sidedata=None,
2286 sidedata=None,
2280 ):
2287 ):
2281 """add a revision to the log
2288 """add a revision to the log
2282
2289
2283 text - the revision data to add
2290 text - the revision data to add
2284 transaction - the transaction object used for rollback
2291 transaction - the transaction object used for rollback
2285 link - the linkrev data to add
2292 link - the linkrev data to add
2286 p1, p2 - the parent nodeids of the revision
2293 p1, p2 - the parent nodeids of the revision
2287 cachedelta - an optional precomputed delta
2294 cachedelta - an optional precomputed delta
2288 node - nodeid of revision; typically node is not specified, and it is
2295 node - nodeid of revision; typically node is not specified, and it is
2289 computed by default as hash(text, p1, p2), however subclasses might
2296 computed by default as hash(text, p1, p2), however subclasses might
2290 use different hashing method (and override checkhash() in such case)
2297 use different hashing method (and override checkhash() in such case)
2291 flags - the known flags to set on the revision
2298 flags - the known flags to set on the revision
2292 deltacomputer - an optional deltacomputer instance shared between
2299 deltacomputer - an optional deltacomputer instance shared between
2293 multiple calls
2300 multiple calls
2294 """
2301 """
2295 if link == nullrev:
2302 if link == nullrev:
2296 raise error.RevlogError(
2303 raise error.RevlogError(
2297 _(b"attempted to add linkrev -1 to %s") % self.display_id
2304 _(b"attempted to add linkrev -1 to %s") % self.display_id
2298 )
2305 )
2299
2306
2300 if sidedata is None:
2307 if sidedata is None:
2301 sidedata = {}
2308 sidedata = {}
2302 elif sidedata and not self.hassidedata:
2309 elif sidedata and not self.hassidedata:
2303 raise error.ProgrammingError(
2310 raise error.ProgrammingError(
2304 _(b"trying to add sidedata to a revlog who don't support them")
2311 _(b"trying to add sidedata to a revlog who don't support them")
2305 )
2312 )
2306
2313
2307 if flags:
2314 if flags:
2308 node = node or self.hash(text, p1, p2)
2315 node = node or self.hash(text, p1, p2)
2309
2316
2310 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2317 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2311
2318
2312 # If the flag processor modifies the revision data, ignore any provided
2319 # If the flag processor modifies the revision data, ignore any provided
2313 # cachedelta.
2320 # cachedelta.
2314 if rawtext != text:
2321 if rawtext != text:
2315 cachedelta = None
2322 cachedelta = None
2316
2323
2317 if len(rawtext) > _maxentrysize:
2324 if len(rawtext) > _maxentrysize:
2318 raise error.RevlogError(
2325 raise error.RevlogError(
2319 _(
2326 _(
2320 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2327 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2321 )
2328 )
2322 % (self.display_id, len(rawtext))
2329 % (self.display_id, len(rawtext))
2323 )
2330 )
2324
2331
2325 node = node or self.hash(rawtext, p1, p2)
2332 node = node or self.hash(rawtext, p1, p2)
2326 rev = self.index.get_rev(node)
2333 rev = self.index.get_rev(node)
2327 if rev is not None:
2334 if rev is not None:
2328 return rev
2335 return rev
2329
2336
2330 if validatehash:
2337 if validatehash:
2331 self.checkhash(rawtext, node, p1=p1, p2=p2)
2338 self.checkhash(rawtext, node, p1=p1, p2=p2)
2332
2339
2333 return self.addrawrevision(
2340 return self.addrawrevision(
2334 rawtext,
2341 rawtext,
2335 transaction,
2342 transaction,
2336 link,
2343 link,
2337 p1,
2344 p1,
2338 p2,
2345 p2,
2339 node,
2346 node,
2340 flags,
2347 flags,
2341 cachedelta=cachedelta,
2348 cachedelta=cachedelta,
2342 deltacomputer=deltacomputer,
2349 deltacomputer=deltacomputer,
2343 sidedata=sidedata,
2350 sidedata=sidedata,
2344 )
2351 )
2345
2352
2346 def addrawrevision(
2353 def addrawrevision(
2347 self,
2354 self,
2348 rawtext,
2355 rawtext,
2349 transaction,
2356 transaction,
2350 link,
2357 link,
2351 p1,
2358 p1,
2352 p2,
2359 p2,
2353 node,
2360 node,
2354 flags,
2361 flags,
2355 cachedelta=None,
2362 cachedelta=None,
2356 deltacomputer=None,
2363 deltacomputer=None,
2357 sidedata=None,
2364 sidedata=None,
2358 ):
2365 ):
2359 """add a raw revision with known flags, node and parents
2366 """add a raw revision with known flags, node and parents
2360 useful when reusing a revision not stored in this revlog (ex: received
2367 useful when reusing a revision not stored in this revlog (ex: received
2361 over wire, or read from an external bundle).
2368 over wire, or read from an external bundle).
2362 """
2369 """
2363 with self._writing(transaction):
2370 with self._writing(transaction):
2364 return self._addrevision(
2371 return self._addrevision(
2365 node,
2372 node,
2366 rawtext,
2373 rawtext,
2367 transaction,
2374 transaction,
2368 link,
2375 link,
2369 p1,
2376 p1,
2370 p2,
2377 p2,
2371 flags,
2378 flags,
2372 cachedelta,
2379 cachedelta,
2373 deltacomputer=deltacomputer,
2380 deltacomputer=deltacomputer,
2374 sidedata=sidedata,
2381 sidedata=sidedata,
2375 )
2382 )
2376
2383
2377 def compress(self, data):
2384 def compress(self, data):
2378 """Generate a possibly-compressed representation of data."""
2385 """Generate a possibly-compressed representation of data."""
2379 if not data:
2386 if not data:
2380 return b'', data
2387 return b'', data
2381
2388
2382 compressed = self._compressor.compress(data)
2389 compressed = self._compressor.compress(data)
2383
2390
2384 if compressed:
2391 if compressed:
2385 # The revlog compressor added the header in the returned data.
2392 # The revlog compressor added the header in the returned data.
2386 return b'', compressed
2393 return b'', compressed
2387
2394
2388 if data[0:1] == b'\0':
2395 if data[0:1] == b'\0':
2389 return b'', data
2396 return b'', data
2390 return b'u', data
2397 return b'u', data
2391
2398
2392 def decompress(self, data):
2399 def decompress(self, data):
2393 """Decompress a revlog chunk.
2400 """Decompress a revlog chunk.
2394
2401
2395 The chunk is expected to begin with a header identifying the
2402 The chunk is expected to begin with a header identifying the
2396 format type so it can be routed to an appropriate decompressor.
2403 format type so it can be routed to an appropriate decompressor.
2397 """
2404 """
2398 if not data:
2405 if not data:
2399 return data
2406 return data
2400
2407
2401 # Revlogs are read much more frequently than they are written and many
2408 # Revlogs are read much more frequently than they are written and many
2402 # chunks only take microseconds to decompress, so performance is
2409 # chunks only take microseconds to decompress, so performance is
2403 # important here.
2410 # important here.
2404 #
2411 #
2405 # We can make a few assumptions about revlogs:
2412 # We can make a few assumptions about revlogs:
2406 #
2413 #
2407 # 1) the majority of chunks will be compressed (as opposed to inline
2414 # 1) the majority of chunks will be compressed (as opposed to inline
2408 # raw data).
2415 # raw data).
2409 # 2) decompressing *any* data will likely by at least 10x slower than
2416 # 2) decompressing *any* data will likely by at least 10x slower than
2410 # returning raw inline data.
2417 # returning raw inline data.
2411 # 3) we want to prioritize common and officially supported compression
2418 # 3) we want to prioritize common and officially supported compression
2412 # engines
2419 # engines
2413 #
2420 #
2414 # It follows that we want to optimize for "decompress compressed data
2421 # It follows that we want to optimize for "decompress compressed data
2415 # when encoded with common and officially supported compression engines"
2422 # when encoded with common and officially supported compression engines"
2416 # case over "raw data" and "data encoded by less common or non-official
2423 # case over "raw data" and "data encoded by less common or non-official
2417 # compression engines." That is why we have the inline lookup first
2424 # compression engines." That is why we have the inline lookup first
2418 # followed by the compengines lookup.
2425 # followed by the compengines lookup.
2419 #
2426 #
2420 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2427 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2421 # compressed chunks. And this matters for changelog and manifest reads.
2428 # compressed chunks. And this matters for changelog and manifest reads.
2422 t = data[0:1]
2429 t = data[0:1]
2423
2430
2424 if t == b'x':
2431 if t == b'x':
2425 try:
2432 try:
2426 return _zlibdecompress(data)
2433 return _zlibdecompress(data)
2427 except zlib.error as e:
2434 except zlib.error as e:
2428 raise error.RevlogError(
2435 raise error.RevlogError(
2429 _(b'revlog decompress error: %s')
2436 _(b'revlog decompress error: %s')
2430 % stringutil.forcebytestr(e)
2437 % stringutil.forcebytestr(e)
2431 )
2438 )
2432 # '\0' is more common than 'u' so it goes first.
2439 # '\0' is more common than 'u' so it goes first.
2433 elif t == b'\0':
2440 elif t == b'\0':
2434 return data
2441 return data
2435 elif t == b'u':
2442 elif t == b'u':
2436 return util.buffer(data, 1)
2443 return util.buffer(data, 1)
2437
2444
2438 compressor = self._get_decompressor(t)
2445 compressor = self._get_decompressor(t)
2439
2446
2440 return compressor.decompress(data)
2447 return compressor.decompress(data)
2441
2448
2442 def _addrevision(
2449 def _addrevision(
2443 self,
2450 self,
2444 node,
2451 node,
2445 rawtext,
2452 rawtext,
2446 transaction,
2453 transaction,
2447 link,
2454 link,
2448 p1,
2455 p1,
2449 p2,
2456 p2,
2450 flags,
2457 flags,
2451 cachedelta,
2458 cachedelta,
2452 alwayscache=False,
2459 alwayscache=False,
2453 deltacomputer=None,
2460 deltacomputer=None,
2454 sidedata=None,
2461 sidedata=None,
2455 ):
2462 ):
2456 """internal function to add revisions to the log
2463 """internal function to add revisions to the log
2457
2464
2458 see addrevision for argument descriptions.
2465 see addrevision for argument descriptions.
2459
2466
2460 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2467 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2461
2468
2462 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2469 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2463 be used.
2470 be used.
2464
2471
2465 invariants:
2472 invariants:
2466 - rawtext is optional (can be None); if not set, cachedelta must be set.
2473 - rawtext is optional (can be None); if not set, cachedelta must be set.
2467 if both are set, they must correspond to each other.
2474 if both are set, they must correspond to each other.
2468 """
2475 """
2469 if node == self.nullid:
2476 if node == self.nullid:
2470 raise error.RevlogError(
2477 raise error.RevlogError(
2471 _(b"%s: attempt to add null revision") % self.display_id
2478 _(b"%s: attempt to add null revision") % self.display_id
2472 )
2479 )
2473 if (
2480 if (
2474 node == self.nodeconstants.wdirid
2481 node == self.nodeconstants.wdirid
2475 or node in self.nodeconstants.wdirfilenodeids
2482 or node in self.nodeconstants.wdirfilenodeids
2476 ):
2483 ):
2477 raise error.RevlogError(
2484 raise error.RevlogError(
2478 _(b"%s: attempt to add wdir revision") % self.display_id
2485 _(b"%s: attempt to add wdir revision") % self.display_id
2479 )
2486 )
2480 if self._writinghandles is None:
2487 if self._writinghandles is None:
2481 msg = b'adding revision outside `revlog._writing` context'
2488 msg = b'adding revision outside `revlog._writing` context'
2482 raise error.ProgrammingError(msg)
2489 raise error.ProgrammingError(msg)
2483
2490
2484 if self._inline:
2491 if self._inline:
2485 fh = self._writinghandles[0]
2492 fh = self._writinghandles[0]
2486 else:
2493 else:
2487 fh = self._writinghandles[1]
2494 fh = self._writinghandles[1]
2488
2495
2489 btext = [rawtext]
2496 btext = [rawtext]
2490
2497
2491 curr = len(self)
2498 curr = len(self)
2492 prev = curr - 1
2499 prev = curr - 1
2493
2500
2494 offset = self._get_data_offset(prev)
2501 offset = self._get_data_offset(prev)
2495
2502
2496 if self._concurrencychecker:
2503 if self._concurrencychecker:
2497 ifh, dfh = self._writinghandles
2504 ifh, dfh = self._writinghandles
2498 if self._inline:
2505 if self._inline:
2499 # offset is "as if" it were in the .d file, so we need to add on
2506 # offset is "as if" it were in the .d file, so we need to add on
2500 # the size of the entry metadata.
2507 # the size of the entry metadata.
2501 self._concurrencychecker(
2508 self._concurrencychecker(
2502 ifh, self._indexfile, offset + curr * self.index.entry_size
2509 ifh, self._indexfile, offset + curr * self.index.entry_size
2503 )
2510 )
2504 else:
2511 else:
2505 # Entries in the .i are a consistent size.
2512 # Entries in the .i are a consistent size.
2506 self._concurrencychecker(
2513 self._concurrencychecker(
2507 ifh, self._indexfile, curr * self.index.entry_size
2514 ifh, self._indexfile, curr * self.index.entry_size
2508 )
2515 )
2509 self._concurrencychecker(dfh, self._datafile, offset)
2516 self._concurrencychecker(dfh, self._datafile, offset)
2510
2517
2511 p1r, p2r = self.rev(p1), self.rev(p2)
2518 p1r, p2r = self.rev(p1), self.rev(p2)
2512
2519
2513 # full versions are inserted when the needed deltas
2520 # full versions are inserted when the needed deltas
2514 # become comparable to the uncompressed text
2521 # become comparable to the uncompressed text
2515 if rawtext is None:
2522 if rawtext is None:
2516 # need rawtext size, before changed by flag processors, which is
2523 # need rawtext size, before changed by flag processors, which is
2517 # the non-raw size. use revlog explicitly to avoid filelog's extra
2524 # the non-raw size. use revlog explicitly to avoid filelog's extra
2518 # logic that might remove metadata size.
2525 # logic that might remove metadata size.
2519 textlen = mdiff.patchedsize(
2526 textlen = mdiff.patchedsize(
2520 revlog.size(self, cachedelta[0]), cachedelta[1]
2527 revlog.size(self, cachedelta[0]), cachedelta[1]
2521 )
2528 )
2522 else:
2529 else:
2523 textlen = len(rawtext)
2530 textlen = len(rawtext)
2524
2531
2525 if deltacomputer is None:
2532 if deltacomputer is None:
2526 deltacomputer = deltautil.deltacomputer(self)
2533 deltacomputer = deltautil.deltacomputer(self)
2527
2534
2528 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2535 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2529
2536
2530 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2537 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2531
2538
2532 compression_mode = COMP_MODE_INLINE
2539 compression_mode = COMP_MODE_INLINE
2533 if self._docket is not None:
2540 if self._docket is not None:
2534 h, d = deltainfo.data
2541 h, d = deltainfo.data
2535 if not h and not d:
2542 if not h and not d:
2536 # not data to store at all... declare them uncompressed
2543 # not data to store at all... declare them uncompressed
2537 compression_mode = COMP_MODE_PLAIN
2544 compression_mode = COMP_MODE_PLAIN
2538 elif not h:
2545 elif not h:
2539 t = d[0:1]
2546 t = d[0:1]
2540 if t == b'\0':
2547 if t == b'\0':
2541 compression_mode = COMP_MODE_PLAIN
2548 compression_mode = COMP_MODE_PLAIN
2542 elif t == self._docket.default_compression_header:
2549 elif t == self._docket.default_compression_header:
2543 compression_mode = COMP_MODE_DEFAULT
2550 compression_mode = COMP_MODE_DEFAULT
2544 elif h == b'u':
2551 elif h == b'u':
2545 # we have a more efficient way to declare uncompressed
2552 # we have a more efficient way to declare uncompressed
2546 h = b''
2553 h = b''
2547 compression_mode = COMP_MODE_PLAIN
2554 compression_mode = COMP_MODE_PLAIN
2548 deltainfo = deltautil.drop_u_compression(deltainfo)
2555 deltainfo = deltautil.drop_u_compression(deltainfo)
2549
2556
2550 sidedata_compression_mode = COMP_MODE_INLINE
2557 sidedata_compression_mode = COMP_MODE_INLINE
2551 if sidedata and self.hassidedata:
2558 if sidedata and self.hassidedata:
2552 sidedata_compression_mode = COMP_MODE_PLAIN
2559 sidedata_compression_mode = COMP_MODE_PLAIN
2553 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2560 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2554 sidedata_offset = offset + deltainfo.deltalen
2561 sidedata_offset = offset + deltainfo.deltalen
2555 h, comp_sidedata = self.compress(serialized_sidedata)
2562 h, comp_sidedata = self.compress(serialized_sidedata)
2556 if (
2563 if (
2557 h != b'u'
2564 h != b'u'
2558 and comp_sidedata[0:1] != b'\0'
2565 and comp_sidedata[0:1] != b'\0'
2559 and len(comp_sidedata) < len(serialized_sidedata)
2566 and len(comp_sidedata) < len(serialized_sidedata)
2560 ):
2567 ):
2561 assert not h
2568 assert not h
2562 if (
2569 if (
2563 comp_sidedata[0:1]
2570 comp_sidedata[0:1]
2564 == self._docket.default_compression_header
2571 == self._docket.default_compression_header
2565 ):
2572 ):
2566 sidedata_compression_mode = COMP_MODE_DEFAULT
2573 sidedata_compression_mode = COMP_MODE_DEFAULT
2567 serialized_sidedata = comp_sidedata
2574 serialized_sidedata = comp_sidedata
2568 else:
2575 else:
2569 sidedata_compression_mode = COMP_MODE_INLINE
2576 sidedata_compression_mode = COMP_MODE_INLINE
2570 serialized_sidedata = comp_sidedata
2577 serialized_sidedata = comp_sidedata
2571 else:
2578 else:
2572 serialized_sidedata = b""
2579 serialized_sidedata = b""
2573 # Don't store the offset if the sidedata is empty, that way
2580 # Don't store the offset if the sidedata is empty, that way
2574 # we can easily detect empty sidedata and they will be no different
2581 # we can easily detect empty sidedata and they will be no different
2575 # than ones we manually add.
2582 # than ones we manually add.
2576 sidedata_offset = 0
2583 sidedata_offset = 0
2577
2584
2578 e = (
2585 e = (
2579 offset_type(offset, flags),
2586 offset_type(offset, flags),
2580 deltainfo.deltalen,
2587 deltainfo.deltalen,
2581 textlen,
2588 textlen,
2582 deltainfo.base,
2589 deltainfo.base,
2583 link,
2590 link,
2584 p1r,
2591 p1r,
2585 p2r,
2592 p2r,
2586 node,
2593 node,
2587 sidedata_offset,
2594 sidedata_offset,
2588 len(serialized_sidedata),
2595 len(serialized_sidedata),
2589 compression_mode,
2596 compression_mode,
2590 sidedata_compression_mode,
2597 sidedata_compression_mode,
2591 )
2598 )
2592
2599
2593 self.index.append(e)
2600 self.index.append(e)
2594 entry = self.index.entry_binary(curr)
2601 entry = self.index.entry_binary(curr)
2595 if curr == 0 and self._docket is None:
2602 if curr == 0 and self._docket is None:
2596 header = self._format_flags | self._format_version
2603 header = self._format_flags | self._format_version
2597 header = self.index.pack_header(header)
2604 header = self.index.pack_header(header)
2598 entry = header + entry
2605 entry = header + entry
2599 self._writeentry(
2606 self._writeentry(
2600 transaction,
2607 transaction,
2601 entry,
2608 entry,
2602 deltainfo.data,
2609 deltainfo.data,
2603 link,
2610 link,
2604 offset,
2611 offset,
2605 serialized_sidedata,
2612 serialized_sidedata,
2606 )
2613 )
2607
2614
2608 rawtext = btext[0]
2615 rawtext = btext[0]
2609
2616
2610 if alwayscache and rawtext is None:
2617 if alwayscache and rawtext is None:
2611 rawtext = deltacomputer.buildtext(revinfo, fh)
2618 rawtext = deltacomputer.buildtext(revinfo, fh)
2612
2619
2613 if type(rawtext) == bytes: # only accept immutable objects
2620 if type(rawtext) == bytes: # only accept immutable objects
2614 self._revisioncache = (node, curr, rawtext)
2621 self._revisioncache = (node, curr, rawtext)
2615 self._chainbasecache[curr] = deltainfo.chainbase
2622 self._chainbasecache[curr] = deltainfo.chainbase
2616 return curr
2623 return curr
2617
2624
2618 def _get_data_offset(self, prev):
2625 def _get_data_offset(self, prev):
2619 """Returns the current offset in the (in-transaction) data file.
2626 """Returns the current offset in the (in-transaction) data file.
2620 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2627 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2621 file to store that information: since sidedata can be rewritten to the
2628 file to store that information: since sidedata can be rewritten to the
2622 end of the data file within a transaction, you can have cases where, for
2629 end of the data file within a transaction, you can have cases where, for
2623 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2630 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2624 to `n - 1`'s sidedata being written after `n`'s data.
2631 to `n - 1`'s sidedata being written after `n`'s data.
2625
2632
2626 TODO cache this in a docket file before getting out of experimental."""
2633 TODO cache this in a docket file before getting out of experimental."""
2627 if self._docket is None:
2634 if self._docket is None:
2628 return self.end(prev)
2635 return self.end(prev)
2629 else:
2636 else:
2630 return self._docket.data_end
2637 return self._docket.data_end
2631
2638
2632 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2639 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2633 # Files opened in a+ mode have inconsistent behavior on various
2640 # Files opened in a+ mode have inconsistent behavior on various
2634 # platforms. Windows requires that a file positioning call be made
2641 # platforms. Windows requires that a file positioning call be made
2635 # when the file handle transitions between reads and writes. See
2642 # when the file handle transitions between reads and writes. See
2636 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2643 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2637 # platforms, Python or the platform itself can be buggy. Some versions
2644 # platforms, Python or the platform itself can be buggy. Some versions
2638 # of Solaris have been observed to not append at the end of the file
2645 # of Solaris have been observed to not append at the end of the file
2639 # if the file was seeked to before the end. See issue4943 for more.
2646 # if the file was seeked to before the end. See issue4943 for more.
2640 #
2647 #
2641 # We work around this issue by inserting a seek() before writing.
2648 # We work around this issue by inserting a seek() before writing.
2642 # Note: This is likely not necessary on Python 3. However, because
2649 # Note: This is likely not necessary on Python 3. However, because
2643 # the file handle is reused for reads and may be seeked there, we need
2650 # the file handle is reused for reads and may be seeked there, we need
2644 # to be careful before changing this.
2651 # to be careful before changing this.
2645 if self._writinghandles is None:
2652 if self._writinghandles is None:
2646 msg = b'adding revision outside `revlog._writing` context'
2653 msg = b'adding revision outside `revlog._writing` context'
2647 raise error.ProgrammingError(msg)
2654 raise error.ProgrammingError(msg)
2648 ifh, dfh = self._writinghandles
2655 ifh, dfh = self._writinghandles
2649 if self._docket is None:
2656 if self._docket is None:
2650 ifh.seek(0, os.SEEK_END)
2657 ifh.seek(0, os.SEEK_END)
2651 else:
2658 else:
2652 ifh.seek(self._docket.index_end, os.SEEK_SET)
2659 ifh.seek(self._docket.index_end, os.SEEK_SET)
2653 if dfh:
2660 if dfh:
2654 if self._docket is None:
2661 if self._docket is None:
2655 dfh.seek(0, os.SEEK_END)
2662 dfh.seek(0, os.SEEK_END)
2656 else:
2663 else:
2657 dfh.seek(self._docket.data_end, os.SEEK_SET)
2664 dfh.seek(self._docket.data_end, os.SEEK_SET)
2658
2665
2659 curr = len(self) - 1
2666 curr = len(self) - 1
2660 if not self._inline:
2667 if not self._inline:
2661 transaction.add(self._datafile, offset)
2668 transaction.add(self._datafile, offset)
2662 transaction.add(self._indexfile, curr * len(entry))
2669 transaction.add(self._indexfile, curr * len(entry))
2663 if data[0]:
2670 if data[0]:
2664 dfh.write(data[0])
2671 dfh.write(data[0])
2665 dfh.write(data[1])
2672 dfh.write(data[1])
2666 if sidedata:
2673 if sidedata:
2667 dfh.write(sidedata)
2674 dfh.write(sidedata)
2668 ifh.write(entry)
2675 ifh.write(entry)
2669 else:
2676 else:
2670 offset += curr * self.index.entry_size
2677 offset += curr * self.index.entry_size
2671 transaction.add(self._indexfile, offset)
2678 transaction.add(self._indexfile, offset)
2672 ifh.write(entry)
2679 ifh.write(entry)
2673 ifh.write(data[0])
2680 ifh.write(data[0])
2674 ifh.write(data[1])
2681 ifh.write(data[1])
2675 if sidedata:
2682 if sidedata:
2676 ifh.write(sidedata)
2683 ifh.write(sidedata)
2677 self._enforceinlinesize(transaction)
2684 self._enforceinlinesize(transaction)
2678 if self._docket is not None:
2685 if self._docket is not None:
2679 self._docket.index_end = self._writinghandles[0].tell()
2686 self._docket.index_end = self._writinghandles[0].tell()
2680 self._docket.data_end = self._writinghandles[1].tell()
2687 self._docket.data_end = self._writinghandles[1].tell()
2681
2688
2682 nodemaputil.setup_persistent_nodemap(transaction, self)
2689 nodemaputil.setup_persistent_nodemap(transaction, self)
2683
2690
2684 def addgroup(
2691 def addgroup(
2685 self,
2692 self,
2686 deltas,
2693 deltas,
2687 linkmapper,
2694 linkmapper,
2688 transaction,
2695 transaction,
2689 alwayscache=False,
2696 alwayscache=False,
2690 addrevisioncb=None,
2697 addrevisioncb=None,
2691 duplicaterevisioncb=None,
2698 duplicaterevisioncb=None,
2692 ):
2699 ):
2693 """
2700 """
2694 add a delta group
2701 add a delta group
2695
2702
2696 given a set of deltas, add them to the revision log. the
2703 given a set of deltas, add them to the revision log. the
2697 first delta is against its parent, which should be in our
2704 first delta is against its parent, which should be in our
2698 log, the rest are against the previous delta.
2705 log, the rest are against the previous delta.
2699
2706
2700 If ``addrevisioncb`` is defined, it will be called with arguments of
2707 If ``addrevisioncb`` is defined, it will be called with arguments of
2701 this revlog and the node that was added.
2708 this revlog and the node that was added.
2702 """
2709 """
2703
2710
2704 if self._adding_group:
2711 if self._adding_group:
2705 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2712 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2706
2713
2707 self._adding_group = True
2714 self._adding_group = True
2708 empty = True
2715 empty = True
2709 try:
2716 try:
2710 with self._writing(transaction):
2717 with self._writing(transaction):
2711 deltacomputer = deltautil.deltacomputer(self)
2718 deltacomputer = deltautil.deltacomputer(self)
2712 # loop through our set of deltas
2719 # loop through our set of deltas
2713 for data in deltas:
2720 for data in deltas:
2714 (
2721 (
2715 node,
2722 node,
2716 p1,
2723 p1,
2717 p2,
2724 p2,
2718 linknode,
2725 linknode,
2719 deltabase,
2726 deltabase,
2720 delta,
2727 delta,
2721 flags,
2728 flags,
2722 sidedata,
2729 sidedata,
2723 ) = data
2730 ) = data
2724 link = linkmapper(linknode)
2731 link = linkmapper(linknode)
2725 flags = flags or REVIDX_DEFAULT_FLAGS
2732 flags = flags or REVIDX_DEFAULT_FLAGS
2726
2733
2727 rev = self.index.get_rev(node)
2734 rev = self.index.get_rev(node)
2728 if rev is not None:
2735 if rev is not None:
2729 # this can happen if two branches make the same change
2736 # this can happen if two branches make the same change
2730 self._nodeduplicatecallback(transaction, rev)
2737 self._nodeduplicatecallback(transaction, rev)
2731 if duplicaterevisioncb:
2738 if duplicaterevisioncb:
2732 duplicaterevisioncb(self, rev)
2739 duplicaterevisioncb(self, rev)
2733 empty = False
2740 empty = False
2734 continue
2741 continue
2735
2742
2736 for p in (p1, p2):
2743 for p in (p1, p2):
2737 if not self.index.has_node(p):
2744 if not self.index.has_node(p):
2738 raise error.LookupError(
2745 raise error.LookupError(
2739 p, self.radix, _(b'unknown parent')
2746 p, self.radix, _(b'unknown parent')
2740 )
2747 )
2741
2748
2742 if not self.index.has_node(deltabase):
2749 if not self.index.has_node(deltabase):
2743 raise error.LookupError(
2750 raise error.LookupError(
2744 deltabase, self.display_id, _(b'unknown delta base')
2751 deltabase, self.display_id, _(b'unknown delta base')
2745 )
2752 )
2746
2753
2747 baserev = self.rev(deltabase)
2754 baserev = self.rev(deltabase)
2748
2755
2749 if baserev != nullrev and self.iscensored(baserev):
2756 if baserev != nullrev and self.iscensored(baserev):
2750 # if base is censored, delta must be full replacement in a
2757 # if base is censored, delta must be full replacement in a
2751 # single patch operation
2758 # single patch operation
2752 hlen = struct.calcsize(b">lll")
2759 hlen = struct.calcsize(b">lll")
2753 oldlen = self.rawsize(baserev)
2760 oldlen = self.rawsize(baserev)
2754 newlen = len(delta) - hlen
2761 newlen = len(delta) - hlen
2755 if delta[:hlen] != mdiff.replacediffheader(
2762 if delta[:hlen] != mdiff.replacediffheader(
2756 oldlen, newlen
2763 oldlen, newlen
2757 ):
2764 ):
2758 raise error.CensoredBaseError(
2765 raise error.CensoredBaseError(
2759 self.display_id, self.node(baserev)
2766 self.display_id, self.node(baserev)
2760 )
2767 )
2761
2768
2762 if not flags and self._peek_iscensored(baserev, delta):
2769 if not flags and self._peek_iscensored(baserev, delta):
2763 flags |= REVIDX_ISCENSORED
2770 flags |= REVIDX_ISCENSORED
2764
2771
2765 # We assume consumers of addrevisioncb will want to retrieve
2772 # We assume consumers of addrevisioncb will want to retrieve
2766 # the added revision, which will require a call to
2773 # the added revision, which will require a call to
2767 # revision(). revision() will fast path if there is a cache
2774 # revision(). revision() will fast path if there is a cache
2768 # hit. So, we tell _addrevision() to always cache in this case.
2775 # hit. So, we tell _addrevision() to always cache in this case.
2769 # We're only using addgroup() in the context of changegroup
2776 # We're only using addgroup() in the context of changegroup
2770 # generation so the revision data can always be handled as raw
2777 # generation so the revision data can always be handled as raw
2771 # by the flagprocessor.
2778 # by the flagprocessor.
2772 rev = self._addrevision(
2779 rev = self._addrevision(
2773 node,
2780 node,
2774 None,
2781 None,
2775 transaction,
2782 transaction,
2776 link,
2783 link,
2777 p1,
2784 p1,
2778 p2,
2785 p2,
2779 flags,
2786 flags,
2780 (baserev, delta),
2787 (baserev, delta),
2781 alwayscache=alwayscache,
2788 alwayscache=alwayscache,
2782 deltacomputer=deltacomputer,
2789 deltacomputer=deltacomputer,
2783 sidedata=sidedata,
2790 sidedata=sidedata,
2784 )
2791 )
2785
2792
2786 if addrevisioncb:
2793 if addrevisioncb:
2787 addrevisioncb(self, rev)
2794 addrevisioncb(self, rev)
2788 empty = False
2795 empty = False
2789 finally:
2796 finally:
2790 self._adding_group = False
2797 self._adding_group = False
2791 return not empty
2798 return not empty
2792
2799
2793 def iscensored(self, rev):
2800 def iscensored(self, rev):
2794 """Check if a file revision is censored."""
2801 """Check if a file revision is censored."""
2795 if not self._censorable:
2802 if not self._censorable:
2796 return False
2803 return False
2797
2804
2798 return self.flags(rev) & REVIDX_ISCENSORED
2805 return self.flags(rev) & REVIDX_ISCENSORED
2799
2806
2800 def _peek_iscensored(self, baserev, delta):
2807 def _peek_iscensored(self, baserev, delta):
2801 """Quickly check if a delta produces a censored revision."""
2808 """Quickly check if a delta produces a censored revision."""
2802 if not self._censorable:
2809 if not self._censorable:
2803 return False
2810 return False
2804
2811
2805 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2812 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2806
2813
2807 def getstrippoint(self, minlink):
2814 def getstrippoint(self, minlink):
2808 """find the minimum rev that must be stripped to strip the linkrev
2815 """find the minimum rev that must be stripped to strip the linkrev
2809
2816
2810 Returns a tuple containing the minimum rev and a set of all revs that
2817 Returns a tuple containing the minimum rev and a set of all revs that
2811 have linkrevs that will be broken by this strip.
2818 have linkrevs that will be broken by this strip.
2812 """
2819 """
2813 return storageutil.resolvestripinfo(
2820 return storageutil.resolvestripinfo(
2814 minlink,
2821 minlink,
2815 len(self) - 1,
2822 len(self) - 1,
2816 self.headrevs(),
2823 self.headrevs(),
2817 self.linkrev,
2824 self.linkrev,
2818 self.parentrevs,
2825 self.parentrevs,
2819 )
2826 )
2820
2827
2821 def strip(self, minlink, transaction):
2828 def strip(self, minlink, transaction):
2822 """truncate the revlog on the first revision with a linkrev >= minlink
2829 """truncate the revlog on the first revision with a linkrev >= minlink
2823
2830
2824 This function is called when we're stripping revision minlink and
2831 This function is called when we're stripping revision minlink and
2825 its descendants from the repository.
2832 its descendants from the repository.
2826
2833
2827 We have to remove all revisions with linkrev >= minlink, because
2834 We have to remove all revisions with linkrev >= minlink, because
2828 the equivalent changelog revisions will be renumbered after the
2835 the equivalent changelog revisions will be renumbered after the
2829 strip.
2836 strip.
2830
2837
2831 So we truncate the revlog on the first of these revisions, and
2838 So we truncate the revlog on the first of these revisions, and
2832 trust that the caller has saved the revisions that shouldn't be
2839 trust that the caller has saved the revisions that shouldn't be
2833 removed and that it'll re-add them after this truncation.
2840 removed and that it'll re-add them after this truncation.
2834 """
2841 """
2835 if len(self) == 0:
2842 if len(self) == 0:
2836 return
2843 return
2837
2844
2838 rev, _ = self.getstrippoint(minlink)
2845 rev, _ = self.getstrippoint(minlink)
2839 if rev == len(self):
2846 if rev == len(self):
2840 return
2847 return
2841
2848
2842 # first truncate the files on disk
2849 # first truncate the files on disk
2843 data_end = self.start(rev)
2850 data_end = self.start(rev)
2844 if not self._inline:
2851 if not self._inline:
2845 transaction.add(self._datafile, data_end)
2852 transaction.add(self._datafile, data_end)
2846 end = rev * self.index.entry_size
2853 end = rev * self.index.entry_size
2847 else:
2854 else:
2848 end = data_end + (rev * self.index.entry_size)
2855 end = data_end + (rev * self.index.entry_size)
2849
2856
2850 transaction.add(self._indexfile, end)
2857 transaction.add(self._indexfile, end)
2851 if self._docket is not None:
2858 if self._docket is not None:
2852 # XXX we could, leverage the docket while stripping. However it is
2859 # XXX we could, leverage the docket while stripping. However it is
2853 # not powerfull enough at the time of this comment
2860 # not powerfull enough at the time of this comment
2854 self._docket.index_end = end
2861 self._docket.index_end = end
2855 self._docket.data_end = data_end
2862 self._docket.data_end = data_end
2856 self._docket.write(transaction, stripping=True)
2863 self._docket.write(transaction, stripping=True)
2857
2864
2858 # then reset internal state in memory to forget those revisions
2865 # then reset internal state in memory to forget those revisions
2859 self._revisioncache = None
2866 self._revisioncache = None
2860 self._chaininfocache = util.lrucachedict(500)
2867 self._chaininfocache = util.lrucachedict(500)
2861 self._chunkclear()
2868 self._chunkclear()
2862
2869
2863 del self.index[rev:-1]
2870 del self.index[rev:-1]
2864
2871
2865 def checksize(self):
2872 def checksize(self):
2866 """Check size of index and data files
2873 """Check size of index and data files
2867
2874
2868 return a (dd, di) tuple.
2875 return a (dd, di) tuple.
2869 - dd: extra bytes for the "data" file
2876 - dd: extra bytes for the "data" file
2870 - di: extra bytes for the "index" file
2877 - di: extra bytes for the "index" file
2871
2878
2872 A healthy revlog will return (0, 0).
2879 A healthy revlog will return (0, 0).
2873 """
2880 """
2874 expected = 0
2881 expected = 0
2875 if len(self):
2882 if len(self):
2876 expected = max(0, self.end(len(self) - 1))
2883 expected = max(0, self.end(len(self) - 1))
2877
2884
2878 try:
2885 try:
2879 with self._datafp() as f:
2886 with self._datafp() as f:
2880 f.seek(0, io.SEEK_END)
2887 f.seek(0, io.SEEK_END)
2881 actual = f.tell()
2888 actual = f.tell()
2882 dd = actual - expected
2889 dd = actual - expected
2883 except IOError as inst:
2890 except IOError as inst:
2884 if inst.errno != errno.ENOENT:
2891 if inst.errno != errno.ENOENT:
2885 raise
2892 raise
2886 dd = 0
2893 dd = 0
2887
2894
2888 try:
2895 try:
2889 f = self.opener(self._indexfile)
2896 f = self.opener(self._indexfile)
2890 f.seek(0, io.SEEK_END)
2897 f.seek(0, io.SEEK_END)
2891 actual = f.tell()
2898 actual = f.tell()
2892 f.close()
2899 f.close()
2893 s = self.index.entry_size
2900 s = self.index.entry_size
2894 i = max(0, actual // s)
2901 i = max(0, actual // s)
2895 di = actual - (i * s)
2902 di = actual - (i * s)
2896 if self._inline:
2903 if self._inline:
2897 databytes = 0
2904 databytes = 0
2898 for r in self:
2905 for r in self:
2899 databytes += max(0, self.length(r))
2906 databytes += max(0, self.length(r))
2900 dd = 0
2907 dd = 0
2901 di = actual - len(self) * s - databytes
2908 di = actual - len(self) * s - databytes
2902 except IOError as inst:
2909 except IOError as inst:
2903 if inst.errno != errno.ENOENT:
2910 if inst.errno != errno.ENOENT:
2904 raise
2911 raise
2905 di = 0
2912 di = 0
2906
2913
2907 return (dd, di)
2914 return (dd, di)
2908
2915
2909 def files(self):
2916 def files(self):
2910 res = [self._indexfile]
2917 res = [self._indexfile]
2911 if not self._inline:
2918 if not self._inline:
2912 res.append(self._datafile)
2919 res.append(self._datafile)
2913 return res
2920 return res
2914
2921
2915 def emitrevisions(
2922 def emitrevisions(
2916 self,
2923 self,
2917 nodes,
2924 nodes,
2918 nodesorder=None,
2925 nodesorder=None,
2919 revisiondata=False,
2926 revisiondata=False,
2920 assumehaveparentrevisions=False,
2927 assumehaveparentrevisions=False,
2921 deltamode=repository.CG_DELTAMODE_STD,
2928 deltamode=repository.CG_DELTAMODE_STD,
2922 sidedata_helpers=None,
2929 sidedata_helpers=None,
2923 ):
2930 ):
2924 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2931 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2925 raise error.ProgrammingError(
2932 raise error.ProgrammingError(
2926 b'unhandled value for nodesorder: %s' % nodesorder
2933 b'unhandled value for nodesorder: %s' % nodesorder
2927 )
2934 )
2928
2935
2929 if nodesorder is None and not self._generaldelta:
2936 if nodesorder is None and not self._generaldelta:
2930 nodesorder = b'storage'
2937 nodesorder = b'storage'
2931
2938
2932 if (
2939 if (
2933 not self._storedeltachains
2940 not self._storedeltachains
2934 and deltamode != repository.CG_DELTAMODE_PREV
2941 and deltamode != repository.CG_DELTAMODE_PREV
2935 ):
2942 ):
2936 deltamode = repository.CG_DELTAMODE_FULL
2943 deltamode = repository.CG_DELTAMODE_FULL
2937
2944
2938 return storageutil.emitrevisions(
2945 return storageutil.emitrevisions(
2939 self,
2946 self,
2940 nodes,
2947 nodes,
2941 nodesorder,
2948 nodesorder,
2942 revlogrevisiondelta,
2949 revlogrevisiondelta,
2943 deltaparentfn=self.deltaparent,
2950 deltaparentfn=self.deltaparent,
2944 candeltafn=self.candelta,
2951 candeltafn=self.candelta,
2945 rawsizefn=self.rawsize,
2952 rawsizefn=self.rawsize,
2946 revdifffn=self.revdiff,
2953 revdifffn=self.revdiff,
2947 flagsfn=self.flags,
2954 flagsfn=self.flags,
2948 deltamode=deltamode,
2955 deltamode=deltamode,
2949 revisiondata=revisiondata,
2956 revisiondata=revisiondata,
2950 assumehaveparentrevisions=assumehaveparentrevisions,
2957 assumehaveparentrevisions=assumehaveparentrevisions,
2951 sidedata_helpers=sidedata_helpers,
2958 sidedata_helpers=sidedata_helpers,
2952 )
2959 )
2953
2960
2954 DELTAREUSEALWAYS = b'always'
2961 DELTAREUSEALWAYS = b'always'
2955 DELTAREUSESAMEREVS = b'samerevs'
2962 DELTAREUSESAMEREVS = b'samerevs'
2956 DELTAREUSENEVER = b'never'
2963 DELTAREUSENEVER = b'never'
2957
2964
2958 DELTAREUSEFULLADD = b'fulladd'
2965 DELTAREUSEFULLADD = b'fulladd'
2959
2966
2960 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2967 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2961
2968
2962 def clone(
2969 def clone(
2963 self,
2970 self,
2964 tr,
2971 tr,
2965 destrevlog,
2972 destrevlog,
2966 addrevisioncb=None,
2973 addrevisioncb=None,
2967 deltareuse=DELTAREUSESAMEREVS,
2974 deltareuse=DELTAREUSESAMEREVS,
2968 forcedeltabothparents=None,
2975 forcedeltabothparents=None,
2969 sidedata_helpers=None,
2976 sidedata_helpers=None,
2970 ):
2977 ):
2971 """Copy this revlog to another, possibly with format changes.
2978 """Copy this revlog to another, possibly with format changes.
2972
2979
2973 The destination revlog will contain the same revisions and nodes.
2980 The destination revlog will contain the same revisions and nodes.
2974 However, it may not be bit-for-bit identical due to e.g. delta encoding
2981 However, it may not be bit-for-bit identical due to e.g. delta encoding
2975 differences.
2982 differences.
2976
2983
2977 The ``deltareuse`` argument control how deltas from the existing revlog
2984 The ``deltareuse`` argument control how deltas from the existing revlog
2978 are preserved in the destination revlog. The argument can have the
2985 are preserved in the destination revlog. The argument can have the
2979 following values:
2986 following values:
2980
2987
2981 DELTAREUSEALWAYS
2988 DELTAREUSEALWAYS
2982 Deltas will always be reused (if possible), even if the destination
2989 Deltas will always be reused (if possible), even if the destination
2983 revlog would not select the same revisions for the delta. This is the
2990 revlog would not select the same revisions for the delta. This is the
2984 fastest mode of operation.
2991 fastest mode of operation.
2985 DELTAREUSESAMEREVS
2992 DELTAREUSESAMEREVS
2986 Deltas will be reused if the destination revlog would pick the same
2993 Deltas will be reused if the destination revlog would pick the same
2987 revisions for the delta. This mode strikes a balance between speed
2994 revisions for the delta. This mode strikes a balance between speed
2988 and optimization.
2995 and optimization.
2989 DELTAREUSENEVER
2996 DELTAREUSENEVER
2990 Deltas will never be reused. This is the slowest mode of execution.
2997 Deltas will never be reused. This is the slowest mode of execution.
2991 This mode can be used to recompute deltas (e.g. if the diff/delta
2998 This mode can be used to recompute deltas (e.g. if the diff/delta
2992 algorithm changes).
2999 algorithm changes).
2993 DELTAREUSEFULLADD
3000 DELTAREUSEFULLADD
2994 Revision will be re-added as if their were new content. This is
3001 Revision will be re-added as if their were new content. This is
2995 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3002 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2996 eg: large file detection and handling.
3003 eg: large file detection and handling.
2997
3004
2998 Delta computation can be slow, so the choice of delta reuse policy can
3005 Delta computation can be slow, so the choice of delta reuse policy can
2999 significantly affect run time.
3006 significantly affect run time.
3000
3007
3001 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3008 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3002 two extremes. Deltas will be reused if they are appropriate. But if the
3009 two extremes. Deltas will be reused if they are appropriate. But if the
3003 delta could choose a better revision, it will do so. This means if you
3010 delta could choose a better revision, it will do so. This means if you
3004 are converting a non-generaldelta revlog to a generaldelta revlog,
3011 are converting a non-generaldelta revlog to a generaldelta revlog,
3005 deltas will be recomputed if the delta's parent isn't a parent of the
3012 deltas will be recomputed if the delta's parent isn't a parent of the
3006 revision.
3013 revision.
3007
3014
3008 In addition to the delta policy, the ``forcedeltabothparents``
3015 In addition to the delta policy, the ``forcedeltabothparents``
3009 argument controls whether to force compute deltas against both parents
3016 argument controls whether to force compute deltas against both parents
3010 for merges. By default, the current default is used.
3017 for merges. By default, the current default is used.
3011
3018
3012 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3019 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3013 `sidedata_helpers`.
3020 `sidedata_helpers`.
3014 """
3021 """
3015 if deltareuse not in self.DELTAREUSEALL:
3022 if deltareuse not in self.DELTAREUSEALL:
3016 raise ValueError(
3023 raise ValueError(
3017 _(b'value for deltareuse invalid: %s') % deltareuse
3024 _(b'value for deltareuse invalid: %s') % deltareuse
3018 )
3025 )
3019
3026
3020 if len(destrevlog):
3027 if len(destrevlog):
3021 raise ValueError(_(b'destination revlog is not empty'))
3028 raise ValueError(_(b'destination revlog is not empty'))
3022
3029
3023 if getattr(self, 'filteredrevs', None):
3030 if getattr(self, 'filteredrevs', None):
3024 raise ValueError(_(b'source revlog has filtered revisions'))
3031 raise ValueError(_(b'source revlog has filtered revisions'))
3025 if getattr(destrevlog, 'filteredrevs', None):
3032 if getattr(destrevlog, 'filteredrevs', None):
3026 raise ValueError(_(b'destination revlog has filtered revisions'))
3033 raise ValueError(_(b'destination revlog has filtered revisions'))
3027
3034
3028 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3035 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3029 # if possible.
3036 # if possible.
3030 oldlazydelta = destrevlog._lazydelta
3037 oldlazydelta = destrevlog._lazydelta
3031 oldlazydeltabase = destrevlog._lazydeltabase
3038 oldlazydeltabase = destrevlog._lazydeltabase
3032 oldamd = destrevlog._deltabothparents
3039 oldamd = destrevlog._deltabothparents
3033
3040
3034 try:
3041 try:
3035 if deltareuse == self.DELTAREUSEALWAYS:
3042 if deltareuse == self.DELTAREUSEALWAYS:
3036 destrevlog._lazydeltabase = True
3043 destrevlog._lazydeltabase = True
3037 destrevlog._lazydelta = True
3044 destrevlog._lazydelta = True
3038 elif deltareuse == self.DELTAREUSESAMEREVS:
3045 elif deltareuse == self.DELTAREUSESAMEREVS:
3039 destrevlog._lazydeltabase = False
3046 destrevlog._lazydeltabase = False
3040 destrevlog._lazydelta = True
3047 destrevlog._lazydelta = True
3041 elif deltareuse == self.DELTAREUSENEVER:
3048 elif deltareuse == self.DELTAREUSENEVER:
3042 destrevlog._lazydeltabase = False
3049 destrevlog._lazydeltabase = False
3043 destrevlog._lazydelta = False
3050 destrevlog._lazydelta = False
3044
3051
3045 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3052 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3046
3053
3047 self._clone(
3054 self._clone(
3048 tr,
3055 tr,
3049 destrevlog,
3056 destrevlog,
3050 addrevisioncb,
3057 addrevisioncb,
3051 deltareuse,
3058 deltareuse,
3052 forcedeltabothparents,
3059 forcedeltabothparents,
3053 sidedata_helpers,
3060 sidedata_helpers,
3054 )
3061 )
3055
3062
3056 finally:
3063 finally:
3057 destrevlog._lazydelta = oldlazydelta
3064 destrevlog._lazydelta = oldlazydelta
3058 destrevlog._lazydeltabase = oldlazydeltabase
3065 destrevlog._lazydeltabase = oldlazydeltabase
3059 destrevlog._deltabothparents = oldamd
3066 destrevlog._deltabothparents = oldamd
3060
3067
3061 def _clone(
3068 def _clone(
3062 self,
3069 self,
3063 tr,
3070 tr,
3064 destrevlog,
3071 destrevlog,
3065 addrevisioncb,
3072 addrevisioncb,
3066 deltareuse,
3073 deltareuse,
3067 forcedeltabothparents,
3074 forcedeltabothparents,
3068 sidedata_helpers,
3075 sidedata_helpers,
3069 ):
3076 ):
3070 """perform the core duty of `revlog.clone` after parameter processing"""
3077 """perform the core duty of `revlog.clone` after parameter processing"""
3071 deltacomputer = deltautil.deltacomputer(destrevlog)
3078 deltacomputer = deltautil.deltacomputer(destrevlog)
3072 index = self.index
3079 index = self.index
3073 for rev in self:
3080 for rev in self:
3074 entry = index[rev]
3081 entry = index[rev]
3075
3082
3076 # Some classes override linkrev to take filtered revs into
3083 # Some classes override linkrev to take filtered revs into
3077 # account. Use raw entry from index.
3084 # account. Use raw entry from index.
3078 flags = entry[0] & 0xFFFF
3085 flags = entry[0] & 0xFFFF
3079 linkrev = entry[4]
3086 linkrev = entry[4]
3080 p1 = index[entry[5]][7]
3087 p1 = index[entry[5]][7]
3081 p2 = index[entry[6]][7]
3088 p2 = index[entry[6]][7]
3082 node = entry[7]
3089 node = entry[7]
3083
3090
3084 # (Possibly) reuse the delta from the revlog if allowed and
3091 # (Possibly) reuse the delta from the revlog if allowed and
3085 # the revlog chunk is a delta.
3092 # the revlog chunk is a delta.
3086 cachedelta = None
3093 cachedelta = None
3087 rawtext = None
3094 rawtext = None
3088 if deltareuse == self.DELTAREUSEFULLADD:
3095 if deltareuse == self.DELTAREUSEFULLADD:
3089 text, sidedata = self._revisiondata(rev)
3096 text, sidedata = self._revisiondata(rev)
3090
3097
3091 if sidedata_helpers is not None:
3098 if sidedata_helpers is not None:
3092 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3099 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3093 self, sidedata_helpers, sidedata, rev
3100 self, sidedata_helpers, sidedata, rev
3094 )
3101 )
3095 flags = flags | new_flags[0] & ~new_flags[1]
3102 flags = flags | new_flags[0] & ~new_flags[1]
3096
3103
3097 destrevlog.addrevision(
3104 destrevlog.addrevision(
3098 text,
3105 text,
3099 tr,
3106 tr,
3100 linkrev,
3107 linkrev,
3101 p1,
3108 p1,
3102 p2,
3109 p2,
3103 cachedelta=cachedelta,
3110 cachedelta=cachedelta,
3104 node=node,
3111 node=node,
3105 flags=flags,
3112 flags=flags,
3106 deltacomputer=deltacomputer,
3113 deltacomputer=deltacomputer,
3107 sidedata=sidedata,
3114 sidedata=sidedata,
3108 )
3115 )
3109 else:
3116 else:
3110 if destrevlog._lazydelta:
3117 if destrevlog._lazydelta:
3111 dp = self.deltaparent(rev)
3118 dp = self.deltaparent(rev)
3112 if dp != nullrev:
3119 if dp != nullrev:
3113 cachedelta = (dp, bytes(self._chunk(rev)))
3120 cachedelta = (dp, bytes(self._chunk(rev)))
3114
3121
3115 sidedata = None
3122 sidedata = None
3116 if not cachedelta:
3123 if not cachedelta:
3117 rawtext, sidedata = self._revisiondata(rev)
3124 rawtext, sidedata = self._revisiondata(rev)
3118 if sidedata is None:
3125 if sidedata is None:
3119 sidedata = self.sidedata(rev)
3126 sidedata = self.sidedata(rev)
3120
3127
3121 if sidedata_helpers is not None:
3128 if sidedata_helpers is not None:
3122 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3129 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3123 self, sidedata_helpers, sidedata, rev
3130 self, sidedata_helpers, sidedata, rev
3124 )
3131 )
3125 flags = flags | new_flags[0] & ~new_flags[1]
3132 flags = flags | new_flags[0] & ~new_flags[1]
3126
3133
3127 with destrevlog._writing(tr):
3134 with destrevlog._writing(tr):
3128 destrevlog._addrevision(
3135 destrevlog._addrevision(
3129 node,
3136 node,
3130 rawtext,
3137 rawtext,
3131 tr,
3138 tr,
3132 linkrev,
3139 linkrev,
3133 p1,
3140 p1,
3134 p2,
3141 p2,
3135 flags,
3142 flags,
3136 cachedelta,
3143 cachedelta,
3137 deltacomputer=deltacomputer,
3144 deltacomputer=deltacomputer,
3138 sidedata=sidedata,
3145 sidedata=sidedata,
3139 )
3146 )
3140
3147
3141 if addrevisioncb:
3148 if addrevisioncb:
3142 addrevisioncb(self, rev, node)
3149 addrevisioncb(self, rev, node)
3143
3150
3144 def censorrevision(self, tr, censornode, tombstone=b''):
3151 def censorrevision(self, tr, censornode, tombstone=b''):
3145 if self._format_version == REVLOGV0:
3152 if self._format_version == REVLOGV0:
3146 raise error.RevlogError(
3153 raise error.RevlogError(
3147 _(b'cannot censor with version %d revlogs')
3154 _(b'cannot censor with version %d revlogs')
3148 % self._format_version
3155 % self._format_version
3149 )
3156 )
3150
3157
3151 censorrev = self.rev(censornode)
3158 censorrev = self.rev(censornode)
3152 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3159 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3153
3160
3154 if len(tombstone) > self.rawsize(censorrev):
3161 if len(tombstone) > self.rawsize(censorrev):
3155 raise error.Abort(
3162 raise error.Abort(
3156 _(b'censor tombstone must be no longer than censored data')
3163 _(b'censor tombstone must be no longer than censored data')
3157 )
3164 )
3158
3165
3159 # Rewriting the revlog in place is hard. Our strategy for censoring is
3166 # Rewriting the revlog in place is hard. Our strategy for censoring is
3160 # to create a new revlog, copy all revisions to it, then replace the
3167 # to create a new revlog, copy all revisions to it, then replace the
3161 # revlogs on transaction close.
3168 # revlogs on transaction close.
3162 #
3169 #
3163 # This is a bit dangerous. We could easily have a mismatch of state.
3170 # This is a bit dangerous. We could easily have a mismatch of state.
3164 newrl = revlog(
3171 newrl = revlog(
3165 self.opener,
3172 self.opener,
3166 target=self.target,
3173 target=self.target,
3167 radix=self.radix,
3174 radix=self.radix,
3168 postfix=b'tmpcensored',
3175 postfix=b'tmpcensored',
3169 censorable=True,
3176 censorable=True,
3170 )
3177 )
3171 newrl._format_version = self._format_version
3178 newrl._format_version = self._format_version
3172 newrl._format_flags = self._format_flags
3179 newrl._format_flags = self._format_flags
3173 newrl._generaldelta = self._generaldelta
3180 newrl._generaldelta = self._generaldelta
3174 newrl._parse_index = self._parse_index
3181 newrl._parse_index = self._parse_index
3175
3182
3176 for rev in self.revs():
3183 for rev in self.revs():
3177 node = self.node(rev)
3184 node = self.node(rev)
3178 p1, p2 = self.parents(node)
3185 p1, p2 = self.parents(node)
3179
3186
3180 if rev == censorrev:
3187 if rev == censorrev:
3181 newrl.addrawrevision(
3188 newrl.addrawrevision(
3182 tombstone,
3189 tombstone,
3183 tr,
3190 tr,
3184 self.linkrev(censorrev),
3191 self.linkrev(censorrev),
3185 p1,
3192 p1,
3186 p2,
3193 p2,
3187 censornode,
3194 censornode,
3188 REVIDX_ISCENSORED,
3195 REVIDX_ISCENSORED,
3189 )
3196 )
3190
3197
3191 if newrl.deltaparent(rev) != nullrev:
3198 if newrl.deltaparent(rev) != nullrev:
3192 raise error.Abort(
3199 raise error.Abort(
3193 _(
3200 _(
3194 b'censored revision stored as delta; '
3201 b'censored revision stored as delta; '
3195 b'cannot censor'
3202 b'cannot censor'
3196 ),
3203 ),
3197 hint=_(
3204 hint=_(
3198 b'censoring of revlogs is not '
3205 b'censoring of revlogs is not '
3199 b'fully implemented; please report '
3206 b'fully implemented; please report '
3200 b'this bug'
3207 b'this bug'
3201 ),
3208 ),
3202 )
3209 )
3203 continue
3210 continue
3204
3211
3205 if self.iscensored(rev):
3212 if self.iscensored(rev):
3206 if self.deltaparent(rev) != nullrev:
3213 if self.deltaparent(rev) != nullrev:
3207 raise error.Abort(
3214 raise error.Abort(
3208 _(
3215 _(
3209 b'cannot censor due to censored '
3216 b'cannot censor due to censored '
3210 b'revision having delta stored'
3217 b'revision having delta stored'
3211 )
3218 )
3212 )
3219 )
3213 rawtext = self._chunk(rev)
3220 rawtext = self._chunk(rev)
3214 else:
3221 else:
3215 rawtext = self.rawdata(rev)
3222 rawtext = self.rawdata(rev)
3216
3223
3217 newrl.addrawrevision(
3224 newrl.addrawrevision(
3218 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3225 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3219 )
3226 )
3220
3227
3221 tr.addbackup(self._indexfile, location=b'store')
3228 tr.addbackup(self._indexfile, location=b'store')
3222 if not self._inline:
3229 if not self._inline:
3223 tr.addbackup(self._datafile, location=b'store')
3230 tr.addbackup(self._datafile, location=b'store')
3224
3231
3225 self.opener.rename(newrl._indexfile, self._indexfile)
3232 self.opener.rename(newrl._indexfile, self._indexfile)
3226 if not self._inline:
3233 if not self._inline:
3227 self.opener.rename(newrl._datafile, self._datafile)
3234 self.opener.rename(newrl._datafile, self._datafile)
3228
3235
3229 self.clearcaches()
3236 self.clearcaches()
3230 self._loadindex()
3237 self._loadindex()
3231
3238
3232 def verifyintegrity(self, state):
3239 def verifyintegrity(self, state):
3233 """Verifies the integrity of the revlog.
3240 """Verifies the integrity of the revlog.
3234
3241
3235 Yields ``revlogproblem`` instances describing problems that are
3242 Yields ``revlogproblem`` instances describing problems that are
3236 found.
3243 found.
3237 """
3244 """
3238 dd, di = self.checksize()
3245 dd, di = self.checksize()
3239 if dd:
3246 if dd:
3240 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3247 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3241 if di:
3248 if di:
3242 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3249 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3243
3250
3244 version = self._format_version
3251 version = self._format_version
3245
3252
3246 # The verifier tells us what version revlog we should be.
3253 # The verifier tells us what version revlog we should be.
3247 if version != state[b'expectedversion']:
3254 if version != state[b'expectedversion']:
3248 yield revlogproblem(
3255 yield revlogproblem(
3249 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3256 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3250 % (self.display_id, version, state[b'expectedversion'])
3257 % (self.display_id, version, state[b'expectedversion'])
3251 )
3258 )
3252
3259
3253 state[b'skipread'] = set()
3260 state[b'skipread'] = set()
3254 state[b'safe_renamed'] = set()
3261 state[b'safe_renamed'] = set()
3255
3262
3256 for rev in self:
3263 for rev in self:
3257 node = self.node(rev)
3264 node = self.node(rev)
3258
3265
3259 # Verify contents. 4 cases to care about:
3266 # Verify contents. 4 cases to care about:
3260 #
3267 #
3261 # common: the most common case
3268 # common: the most common case
3262 # rename: with a rename
3269 # rename: with a rename
3263 # meta: file content starts with b'\1\n', the metadata
3270 # meta: file content starts with b'\1\n', the metadata
3264 # header defined in filelog.py, but without a rename
3271 # header defined in filelog.py, but without a rename
3265 # ext: content stored externally
3272 # ext: content stored externally
3266 #
3273 #
3267 # More formally, their differences are shown below:
3274 # More formally, their differences are shown below:
3268 #
3275 #
3269 # | common | rename | meta | ext
3276 # | common | rename | meta | ext
3270 # -------------------------------------------------------
3277 # -------------------------------------------------------
3271 # flags() | 0 | 0 | 0 | not 0
3278 # flags() | 0 | 0 | 0 | not 0
3272 # renamed() | False | True | False | ?
3279 # renamed() | False | True | False | ?
3273 # rawtext[0:2]=='\1\n'| False | True | True | ?
3280 # rawtext[0:2]=='\1\n'| False | True | True | ?
3274 #
3281 #
3275 # "rawtext" means the raw text stored in revlog data, which
3282 # "rawtext" means the raw text stored in revlog data, which
3276 # could be retrieved by "rawdata(rev)". "text"
3283 # could be retrieved by "rawdata(rev)". "text"
3277 # mentioned below is "revision(rev)".
3284 # mentioned below is "revision(rev)".
3278 #
3285 #
3279 # There are 3 different lengths stored physically:
3286 # There are 3 different lengths stored physically:
3280 # 1. L1: rawsize, stored in revlog index
3287 # 1. L1: rawsize, stored in revlog index
3281 # 2. L2: len(rawtext), stored in revlog data
3288 # 2. L2: len(rawtext), stored in revlog data
3282 # 3. L3: len(text), stored in revlog data if flags==0, or
3289 # 3. L3: len(text), stored in revlog data if flags==0, or
3283 # possibly somewhere else if flags!=0
3290 # possibly somewhere else if flags!=0
3284 #
3291 #
3285 # L1 should be equal to L2. L3 could be different from them.
3292 # L1 should be equal to L2. L3 could be different from them.
3286 # "text" may or may not affect commit hash depending on flag
3293 # "text" may or may not affect commit hash depending on flag
3287 # processors (see flagutil.addflagprocessor).
3294 # processors (see flagutil.addflagprocessor).
3288 #
3295 #
3289 # | common | rename | meta | ext
3296 # | common | rename | meta | ext
3290 # -------------------------------------------------
3297 # -------------------------------------------------
3291 # rawsize() | L1 | L1 | L1 | L1
3298 # rawsize() | L1 | L1 | L1 | L1
3292 # size() | L1 | L2-LM | L1(*) | L1 (?)
3299 # size() | L1 | L2-LM | L1(*) | L1 (?)
3293 # len(rawtext) | L2 | L2 | L2 | L2
3300 # len(rawtext) | L2 | L2 | L2 | L2
3294 # len(text) | L2 | L2 | L2 | L3
3301 # len(text) | L2 | L2 | L2 | L3
3295 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3302 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3296 #
3303 #
3297 # LM: length of metadata, depending on rawtext
3304 # LM: length of metadata, depending on rawtext
3298 # (*): not ideal, see comment in filelog.size
3305 # (*): not ideal, see comment in filelog.size
3299 # (?): could be "- len(meta)" if the resolved content has
3306 # (?): could be "- len(meta)" if the resolved content has
3300 # rename metadata
3307 # rename metadata
3301 #
3308 #
3302 # Checks needed to be done:
3309 # Checks needed to be done:
3303 # 1. length check: L1 == L2, in all cases.
3310 # 1. length check: L1 == L2, in all cases.
3304 # 2. hash check: depending on flag processor, we may need to
3311 # 2. hash check: depending on flag processor, we may need to
3305 # use either "text" (external), or "rawtext" (in revlog).
3312 # use either "text" (external), or "rawtext" (in revlog).
3306
3313
3307 try:
3314 try:
3308 skipflags = state.get(b'skipflags', 0)
3315 skipflags = state.get(b'skipflags', 0)
3309 if skipflags:
3316 if skipflags:
3310 skipflags &= self.flags(rev)
3317 skipflags &= self.flags(rev)
3311
3318
3312 _verify_revision(self, skipflags, state, node)
3319 _verify_revision(self, skipflags, state, node)
3313
3320
3314 l1 = self.rawsize(rev)
3321 l1 = self.rawsize(rev)
3315 l2 = len(self.rawdata(node))
3322 l2 = len(self.rawdata(node))
3316
3323
3317 if l1 != l2:
3324 if l1 != l2:
3318 yield revlogproblem(
3325 yield revlogproblem(
3319 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3326 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3320 node=node,
3327 node=node,
3321 )
3328 )
3322
3329
3323 except error.CensoredNodeError:
3330 except error.CensoredNodeError:
3324 if state[b'erroroncensored']:
3331 if state[b'erroroncensored']:
3325 yield revlogproblem(
3332 yield revlogproblem(
3326 error=_(b'censored file data'), node=node
3333 error=_(b'censored file data'), node=node
3327 )
3334 )
3328 state[b'skipread'].add(node)
3335 state[b'skipread'].add(node)
3329 except Exception as e:
3336 except Exception as e:
3330 yield revlogproblem(
3337 yield revlogproblem(
3331 error=_(b'unpacking %s: %s')
3338 error=_(b'unpacking %s: %s')
3332 % (short(node), stringutil.forcebytestr(e)),
3339 % (short(node), stringutil.forcebytestr(e)),
3333 node=node,
3340 node=node,
3334 )
3341 )
3335 state[b'skipread'].add(node)
3342 state[b'skipread'].add(node)
3336
3343
3337 def storageinfo(
3344 def storageinfo(
3338 self,
3345 self,
3339 exclusivefiles=False,
3346 exclusivefiles=False,
3340 sharedfiles=False,
3347 sharedfiles=False,
3341 revisionscount=False,
3348 revisionscount=False,
3342 trackedsize=False,
3349 trackedsize=False,
3343 storedsize=False,
3350 storedsize=False,
3344 ):
3351 ):
3345 d = {}
3352 d = {}
3346
3353
3347 if exclusivefiles:
3354 if exclusivefiles:
3348 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3355 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3349 if not self._inline:
3356 if not self._inline:
3350 d[b'exclusivefiles'].append((self.opener, self._datafile))
3357 d[b'exclusivefiles'].append((self.opener, self._datafile))
3351
3358
3352 if sharedfiles:
3359 if sharedfiles:
3353 d[b'sharedfiles'] = []
3360 d[b'sharedfiles'] = []
3354
3361
3355 if revisionscount:
3362 if revisionscount:
3356 d[b'revisionscount'] = len(self)
3363 d[b'revisionscount'] = len(self)
3357
3364
3358 if trackedsize:
3365 if trackedsize:
3359 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3366 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3360
3367
3361 if storedsize:
3368 if storedsize:
3362 d[b'storedsize'] = sum(
3369 d[b'storedsize'] = sum(
3363 self.opener.stat(path).st_size for path in self.files()
3370 self.opener.stat(path).st_size for path in self.files()
3364 )
3371 )
3365
3372
3366 return d
3373 return d
3367
3374
3368 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3375 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3369 if not self.hassidedata:
3376 if not self.hassidedata:
3370 return
3377 return
3371 # revlog formats with sidedata support does not support inline
3378 # revlog formats with sidedata support does not support inline
3372 assert not self._inline
3379 assert not self._inline
3373 if not helpers[1] and not helpers[2]:
3380 if not helpers[1] and not helpers[2]:
3374 # Nothing to generate or remove
3381 # Nothing to generate or remove
3375 return
3382 return
3376
3383
3377 new_entries = []
3384 new_entries = []
3378 # append the new sidedata
3385 # append the new sidedata
3379 with self._writing(transaction):
3386 with self._writing(transaction):
3380 ifh, dfh = self._writinghandles
3387 ifh, dfh = self._writinghandles
3381 if self._docket is not None:
3388 if self._docket is not None:
3382 dfh.seek(self._docket.data_end, os.SEEK_SET)
3389 dfh.seek(self._docket.data_end, os.SEEK_SET)
3383 else:
3390 else:
3384 dfh.seek(0, os.SEEK_END)
3391 dfh.seek(0, os.SEEK_END)
3385
3392
3386 current_offset = dfh.tell()
3393 current_offset = dfh.tell()
3387 for rev in range(startrev, endrev + 1):
3394 for rev in range(startrev, endrev + 1):
3388 entry = self.index[rev]
3395 entry = self.index[rev]
3389 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3396 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3390 store=self,
3397 store=self,
3391 sidedata_helpers=helpers,
3398 sidedata_helpers=helpers,
3392 sidedata={},
3399 sidedata={},
3393 rev=rev,
3400 rev=rev,
3394 )
3401 )
3395
3402
3396 serialized_sidedata = sidedatautil.serialize_sidedata(
3403 serialized_sidedata = sidedatautil.serialize_sidedata(
3397 new_sidedata
3404 new_sidedata
3398 )
3405 )
3399
3406
3400 sidedata_compression_mode = COMP_MODE_INLINE
3407 sidedata_compression_mode = COMP_MODE_INLINE
3401 if serialized_sidedata and self.hassidedata:
3408 if serialized_sidedata and self.hassidedata:
3402 sidedata_compression_mode = COMP_MODE_PLAIN
3409 sidedata_compression_mode = COMP_MODE_PLAIN
3403 h, comp_sidedata = self.compress(serialized_sidedata)
3410 h, comp_sidedata = self.compress(serialized_sidedata)
3404 if (
3411 if (
3405 h != b'u'
3412 h != b'u'
3406 and comp_sidedata[0] != b'\0'
3413 and comp_sidedata[0] != b'\0'
3407 and len(comp_sidedata) < len(serialized_sidedata)
3414 and len(comp_sidedata) < len(serialized_sidedata)
3408 ):
3415 ):
3409 assert not h
3416 assert not h
3410 if (
3417 if (
3411 comp_sidedata[0]
3418 comp_sidedata[0]
3412 == self._docket.default_compression_header
3419 == self._docket.default_compression_header
3413 ):
3420 ):
3414 sidedata_compression_mode = COMP_MODE_DEFAULT
3421 sidedata_compression_mode = COMP_MODE_DEFAULT
3415 serialized_sidedata = comp_sidedata
3422 serialized_sidedata = comp_sidedata
3416 else:
3423 else:
3417 sidedata_compression_mode = COMP_MODE_INLINE
3424 sidedata_compression_mode = COMP_MODE_INLINE
3418 serialized_sidedata = comp_sidedata
3425 serialized_sidedata = comp_sidedata
3419 if entry[8] != 0 or entry[9] != 0:
3426 if entry[8] != 0 or entry[9] != 0:
3420 # rewriting entries that already have sidedata is not
3427 # rewriting entries that already have sidedata is not
3421 # supported yet, because it introduces garbage data in the
3428 # supported yet, because it introduces garbage data in the
3422 # revlog.
3429 # revlog.
3423 msg = b"rewriting existing sidedata is not supported yet"
3430 msg = b"rewriting existing sidedata is not supported yet"
3424 raise error.Abort(msg)
3431 raise error.Abort(msg)
3425
3432
3426 # Apply (potential) flags to add and to remove after running
3433 # Apply (potential) flags to add and to remove after running
3427 # the sidedata helpers
3434 # the sidedata helpers
3428 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3435 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3429 entry_update = (
3436 entry_update = (
3430 current_offset,
3437 current_offset,
3431 len(serialized_sidedata),
3438 len(serialized_sidedata),
3432 new_offset_flags,
3439 new_offset_flags,
3433 sidedata_compression_mode,
3440 sidedata_compression_mode,
3434 )
3441 )
3435
3442
3436 # the sidedata computation might have move the file cursors around
3443 # the sidedata computation might have move the file cursors around
3437 dfh.seek(current_offset, os.SEEK_SET)
3444 dfh.seek(current_offset, os.SEEK_SET)
3438 dfh.write(serialized_sidedata)
3445 dfh.write(serialized_sidedata)
3439 new_entries.append(entry_update)
3446 new_entries.append(entry_update)
3440 current_offset += len(serialized_sidedata)
3447 current_offset += len(serialized_sidedata)
3441 if self._docket is not None:
3448 if self._docket is not None:
3442 self._docket.data_end = dfh.tell()
3449 self._docket.data_end = dfh.tell()
3443
3450
3444 # rewrite the new index entries
3451 # rewrite the new index entries
3445 ifh.seek(startrev * self.index.entry_size)
3452 ifh.seek(startrev * self.index.entry_size)
3446 for i, e in enumerate(new_entries):
3453 for i, e in enumerate(new_entries):
3447 rev = startrev + i
3454 rev = startrev + i
3448 self.index.replace_sidedata_info(rev, *e)
3455 self.index.replace_sidedata_info(rev, *e)
3449 packed = self.index.entry_binary(rev)
3456 packed = self.index.entry_binary(rev)
3450 if rev == 0 and self._docket is None:
3457 if rev == 0 and self._docket is None:
3451 header = self._format_flags | self._format_version
3458 header = self._format_flags | self._format_version
3452 header = self.index.pack_header(header)
3459 header = self.index.pack_header(header)
3453 packed = header + packed
3460 packed = header + packed
3454 ifh.write(packed)
3461 ifh.write(packed)
@@ -1,29 +1,86 b''
1 Test correctness of revlog inline -> non-inline transition
1 Test correctness of revlog inline -> non-inline transition
2 ----------------------------------------------------------
2 ----------------------------------------------------------
3
3
4 Helper extension to intercept renames.
5
6 $ cat > $TESTTMP/intercept_rename.py << EOF
7 > import os
8 > import sys
9 > from mercurial import extensions, util
10 >
11 > def extsetup(ui):
12 > def close(orig, *args, **kwargs):
13 > path = args[0]._atomictempfile__name
14 > if path.endswith(b'/.hg/store/data/file.i'):
15 > os._exit(80)
16 > return orig(*args, **kwargs)
17 > extensions.wrapfunction(util.atomictempfile, 'close', close)
18 > EOF
19
20
4 Test offset computation to correctly factor in the index entries themselve.
21 Test offset computation to correctly factor in the index entries themselve.
22 Also test that the new data size has the correct size if the transaction is aborted
23 after the index has been replaced.
24
5 Test repo has one small, one moderate and one big change. The clone has
25 Test repo has one small, one moderate and one big change. The clone has
6 the small and moderate change and will transition to non-inline storage when
26 the small and moderate change and will transition to non-inline storage when
7 adding the big change.
27 adding the big change.
8
28
9 $ hg init troffset-computation --config format.revlog-compression=none
29 $ hg init troffset-computation --config format.revlog-compression=none
10 $ cd troffset-computation
30 $ cd troffset-computation
11 $ printf '% 20d' '1' > file
31 $ printf '% 20d' '1' > file
12 $ hg commit -Aqm_
32 $ hg commit -Aqm_
13 $ printf '% 1024d' '1' > file
33 $ printf '% 1024d' '1' > file
14 $ hg commit -Aqm_
34 $ hg commit -Aqm_
15 $ dd if=/dev/zero of=file bs=1k count=128 > /dev/null 2>&1
35 $ dd if=/dev/zero of=file bs=1k count=128 > /dev/null 2>&1
16 $ hg commit -Aqm_
36 $ hg commit -Aqm_
17 $ cd ..
37 $ cd ..
18
38
19 $ hg clone -r 1 troffset-computation troffset-computation-copy --config format.revlog-compression=none -q
39 $ hg clone -r 1 troffset-computation troffset-computation-copy --config format.revlog-compression=none -q
20 $ cd troffset-computation-copy
40 $ cd troffset-computation-copy
41
42 Reference size:
43
44 $ f -s .hg/store/data/file*
45 .hg/store/data/file.i: size=1174
46
21 $ cat > .hg/hgrc <<EOF
47 $ cat > .hg/hgrc <<EOF
22 > [hooks]
48 > [hooks]
23 > pretxnchangegroup = python:$TESTDIR/helper-killhook.py:killme
49 > pretxnchangegroup = python:$TESTDIR/helper-killhook.py:killme
24 > EOF
50 > EOF
25 $ hg pull ../troffset-computation
51 $ hg pull ../troffset-computation
26 pulling from ../troffset-computation
52 pulling from ../troffset-computation
27 [80]
53 [80]
28 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file | tail -1
54
55 The first file.i entry should match the size above.
56 The first file.d entry is the temporary record during the split,
57 the second entry after the split happened. The sum of the second file.d
58 and the second file.i entry should match the first file.i entry.
59
60 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file
61 data/file.i 1174
62 data/file.d 0
63 data/file.d 1046
29 data/file.i 128
64 data/file.i 128
65 $ cd ..
66
67 Now retry the same but intercept the rename of the index and check that
68 the journal does not contain the new index size. This demonstrates the edge case
69 where the data file is left as garbage.
70
71 $ hg clone -r 1 troffset-computation troffset-computation-copy2 --config format.revlog-compression=none -q
72 $ cd troffset-computation-copy2
73 $ cat > .hg/hgrc <<EOF
74 > [extensions]
75 > intercept_rename = $TESTTMP/intercept_rename.py
76 > [hooks]
77 > pretxnchangegroup = python:$TESTDIR/helper-killhook.py:killme
78 > EOF
79 $ hg pull ../troffset-computation
80 pulling from ../troffset-computation
81 [80]
82 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file
83 data/file.i 1174
84 data/file.d 0
85 data/file.d 1046
86 $ cd ..
General Comments 0
You need to be logged in to leave comments. Login now