##// END OF EJS Templates
revlog: implement sidedata without using _revisiondata...
marmoute -
r48174:d6a52783 default
parent child Browse files
Show More
@@ -1,3472 +1,3477 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 CHANGELOGV2,
38 CHANGELOGV2,
39 COMP_MODE_DEFAULT,
39 COMP_MODE_DEFAULT,
40 COMP_MODE_INLINE,
40 COMP_MODE_INLINE,
41 COMP_MODE_PLAIN,
41 COMP_MODE_PLAIN,
42 FEATURES_BY_VERSION,
42 FEATURES_BY_VERSION,
43 FLAG_GENERALDELTA,
43 FLAG_GENERALDELTA,
44 FLAG_INLINE_DATA,
44 FLAG_INLINE_DATA,
45 INDEX_HEADER,
45 INDEX_HEADER,
46 KIND_CHANGELOG,
46 KIND_CHANGELOG,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 SUPPORTED_FLAGS,
55 SUPPORTED_FLAGS,
56 )
56 )
57 from .revlogutils.flagutil import (
57 from .revlogutils.flagutil import (
58 REVIDX_DEFAULT_FLAGS,
58 REVIDX_DEFAULT_FLAGS,
59 REVIDX_ELLIPSIS,
59 REVIDX_ELLIPSIS,
60 REVIDX_EXTSTORED,
60 REVIDX_EXTSTORED,
61 REVIDX_FLAGS_ORDER,
61 REVIDX_FLAGS_ORDER,
62 REVIDX_HASCOPIESINFO,
62 REVIDX_HASCOPIESINFO,
63 REVIDX_ISCENSORED,
63 REVIDX_ISCENSORED,
64 REVIDX_RAWTEXT_CHANGING_FLAGS,
64 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 )
65 )
66 from .thirdparty import attr
66 from .thirdparty import attr
67 from . import (
67 from . import (
68 ancestor,
68 ancestor,
69 dagop,
69 dagop,
70 error,
70 error,
71 mdiff,
71 mdiff,
72 policy,
72 policy,
73 pycompat,
73 pycompat,
74 templatefilters,
74 templatefilters,
75 util,
75 util,
76 )
76 )
77 from .interfaces import (
77 from .interfaces import (
78 repository,
78 repository,
79 util as interfaceutil,
79 util as interfaceutil,
80 )
80 )
81 from .revlogutils import (
81 from .revlogutils import (
82 deltas as deltautil,
82 deltas as deltautil,
83 docket as docketutil,
83 docket as docketutil,
84 flagutil,
84 flagutil,
85 nodemap as nodemaputil,
85 nodemap as nodemaputil,
86 revlogv0,
86 revlogv0,
87 sidedata as sidedatautil,
87 sidedata as sidedatautil,
88 )
88 )
89 from .utils import (
89 from .utils import (
90 storageutil,
90 storageutil,
91 stringutil,
91 stringutil,
92 )
92 )
93
93
94 # blanked usage of all the name to prevent pyflakes constraints
94 # blanked usage of all the name to prevent pyflakes constraints
95 # We need these name available in the module for extensions.
95 # We need these name available in the module for extensions.
96
96
97 REVLOGV0
97 REVLOGV0
98 REVLOGV1
98 REVLOGV1
99 REVLOGV2
99 REVLOGV2
100 FLAG_INLINE_DATA
100 FLAG_INLINE_DATA
101 FLAG_GENERALDELTA
101 FLAG_GENERALDELTA
102 REVLOG_DEFAULT_FLAGS
102 REVLOG_DEFAULT_FLAGS
103 REVLOG_DEFAULT_FORMAT
103 REVLOG_DEFAULT_FORMAT
104 REVLOG_DEFAULT_VERSION
104 REVLOG_DEFAULT_VERSION
105 REVLOGV1_FLAGS
105 REVLOGV1_FLAGS
106 REVLOGV2_FLAGS
106 REVLOGV2_FLAGS
107 REVIDX_ISCENSORED
107 REVIDX_ISCENSORED
108 REVIDX_ELLIPSIS
108 REVIDX_ELLIPSIS
109 REVIDX_HASCOPIESINFO
109 REVIDX_HASCOPIESINFO
110 REVIDX_EXTSTORED
110 REVIDX_EXTSTORED
111 REVIDX_DEFAULT_FLAGS
111 REVIDX_DEFAULT_FLAGS
112 REVIDX_FLAGS_ORDER
112 REVIDX_FLAGS_ORDER
113 REVIDX_RAWTEXT_CHANGING_FLAGS
113 REVIDX_RAWTEXT_CHANGING_FLAGS
114
114
115 parsers = policy.importmod('parsers')
115 parsers = policy.importmod('parsers')
116 rustancestor = policy.importrust('ancestor')
116 rustancestor = policy.importrust('ancestor')
117 rustdagop = policy.importrust('dagop')
117 rustdagop = policy.importrust('dagop')
118 rustrevlog = policy.importrust('revlog')
118 rustrevlog = policy.importrust('revlog')
119
119
120 # Aliased for performance.
120 # Aliased for performance.
121 _zlibdecompress = zlib.decompress
121 _zlibdecompress = zlib.decompress
122
122
123 # max size of revlog with inline data
123 # max size of revlog with inline data
124 _maxinline = 131072
124 _maxinline = 131072
125 _chunksize = 1048576
125 _chunksize = 1048576
126
126
127 # Flag processors for REVIDX_ELLIPSIS.
127 # Flag processors for REVIDX_ELLIPSIS.
128 def ellipsisreadprocessor(rl, text):
128 def ellipsisreadprocessor(rl, text):
129 return text, False
129 return text, False
130
130
131
131
132 def ellipsiswriteprocessor(rl, text):
132 def ellipsiswriteprocessor(rl, text):
133 return text, False
133 return text, False
134
134
135
135
136 def ellipsisrawprocessor(rl, text):
136 def ellipsisrawprocessor(rl, text):
137 return False
137 return False
138
138
139
139
140 ellipsisprocessor = (
140 ellipsisprocessor = (
141 ellipsisreadprocessor,
141 ellipsisreadprocessor,
142 ellipsiswriteprocessor,
142 ellipsiswriteprocessor,
143 ellipsisrawprocessor,
143 ellipsisrawprocessor,
144 )
144 )
145
145
146
146
147 def offset_type(offset, type):
147 def offset_type(offset, type):
148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
149 raise ValueError(b'unknown revlog index flags')
149 raise ValueError(b'unknown revlog index flags')
150 return int(int(offset) << 16 | type)
150 return int(int(offset) << 16 | type)
151
151
152
152
153 def _verify_revision(rl, skipflags, state, node):
153 def _verify_revision(rl, skipflags, state, node):
154 """Verify the integrity of the given revlog ``node`` while providing a hook
154 """Verify the integrity of the given revlog ``node`` while providing a hook
155 point for extensions to influence the operation."""
155 point for extensions to influence the operation."""
156 if skipflags:
156 if skipflags:
157 state[b'skipread'].add(node)
157 state[b'skipread'].add(node)
158 else:
158 else:
159 # Side-effect: read content and verify hash.
159 # Side-effect: read content and verify hash.
160 rl.revision(node)
160 rl.revision(node)
161
161
162
162
163 # True if a fast implementation for persistent-nodemap is available
163 # True if a fast implementation for persistent-nodemap is available
164 #
164 #
165 # We also consider we have a "fast" implementation in "pure" python because
165 # We also consider we have a "fast" implementation in "pure" python because
166 # people using pure don't really have performance consideration (and a
166 # people using pure don't really have performance consideration (and a
167 # wheelbarrow of other slowness source)
167 # wheelbarrow of other slowness source)
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
169 parsers, 'BaseIndexObject'
169 parsers, 'BaseIndexObject'
170 )
170 )
171
171
172
172
173 @attr.s(slots=True, frozen=True)
173 @attr.s(slots=True, frozen=True)
174 class _revisioninfo(object):
174 class _revisioninfo(object):
175 """Information about a revision that allows building its fulltext
175 """Information about a revision that allows building its fulltext
176 node: expected hash of the revision
176 node: expected hash of the revision
177 p1, p2: parent revs of the revision
177 p1, p2: parent revs of the revision
178 btext: built text cache consisting of a one-element list
178 btext: built text cache consisting of a one-element list
179 cachedelta: (baserev, uncompressed_delta) or None
179 cachedelta: (baserev, uncompressed_delta) or None
180 flags: flags associated to the revision storage
180 flags: flags associated to the revision storage
181
181
182 One of btext[0] or cachedelta must be set.
182 One of btext[0] or cachedelta must be set.
183 """
183 """
184
184
185 node = attr.ib()
185 node = attr.ib()
186 p1 = attr.ib()
186 p1 = attr.ib()
187 p2 = attr.ib()
187 p2 = attr.ib()
188 btext = attr.ib()
188 btext = attr.ib()
189 textlen = attr.ib()
189 textlen = attr.ib()
190 cachedelta = attr.ib()
190 cachedelta = attr.ib()
191 flags = attr.ib()
191 flags = attr.ib()
192
192
193
193
194 @interfaceutil.implementer(repository.irevisiondelta)
194 @interfaceutil.implementer(repository.irevisiondelta)
195 @attr.s(slots=True)
195 @attr.s(slots=True)
196 class revlogrevisiondelta(object):
196 class revlogrevisiondelta(object):
197 node = attr.ib()
197 node = attr.ib()
198 p1node = attr.ib()
198 p1node = attr.ib()
199 p2node = attr.ib()
199 p2node = attr.ib()
200 basenode = attr.ib()
200 basenode = attr.ib()
201 flags = attr.ib()
201 flags = attr.ib()
202 baserevisionsize = attr.ib()
202 baserevisionsize = attr.ib()
203 revision = attr.ib()
203 revision = attr.ib()
204 delta = attr.ib()
204 delta = attr.ib()
205 sidedata = attr.ib()
205 sidedata = attr.ib()
206 protocol_flags = attr.ib()
206 protocol_flags = attr.ib()
207 linknode = attr.ib(default=None)
207 linknode = attr.ib(default=None)
208
208
209
209
210 @interfaceutil.implementer(repository.iverifyproblem)
210 @interfaceutil.implementer(repository.iverifyproblem)
211 @attr.s(frozen=True)
211 @attr.s(frozen=True)
212 class revlogproblem(object):
212 class revlogproblem(object):
213 warning = attr.ib(default=None)
213 warning = attr.ib(default=None)
214 error = attr.ib(default=None)
214 error = attr.ib(default=None)
215 node = attr.ib(default=None)
215 node = attr.ib(default=None)
216
216
217
217
218 def parse_index_v1(data, inline):
218 def parse_index_v1(data, inline):
219 # call the C implementation to parse the index data
219 # call the C implementation to parse the index data
220 index, cache = parsers.parse_index2(data, inline)
220 index, cache = parsers.parse_index2(data, inline)
221 return index, cache
221 return index, cache
222
222
223
223
224 def parse_index_v2(data, inline):
224 def parse_index_v2(data, inline):
225 # call the C implementation to parse the index data
225 # call the C implementation to parse the index data
226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
227 return index, cache
227 return index, cache
228
228
229
229
230 def parse_index_cl_v2(data, inline):
230 def parse_index_cl_v2(data, inline):
231 # call the C implementation to parse the index data
231 # call the C implementation to parse the index data
232 assert not inline
232 assert not inline
233 from .pure.parsers import parse_index_cl_v2
233 from .pure.parsers import parse_index_cl_v2
234
234
235 index, cache = parse_index_cl_v2(data)
235 index, cache = parse_index_cl_v2(data)
236 return index, cache
236 return index, cache
237
237
238
238
239 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
239 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
240
240
241 def parse_index_v1_nodemap(data, inline):
241 def parse_index_v1_nodemap(data, inline):
242 index, cache = parsers.parse_index_devel_nodemap(data, inline)
242 index, cache = parsers.parse_index_devel_nodemap(data, inline)
243 return index, cache
243 return index, cache
244
244
245
245
246 else:
246 else:
247 parse_index_v1_nodemap = None
247 parse_index_v1_nodemap = None
248
248
249
249
250 def parse_index_v1_mixed(data, inline):
250 def parse_index_v1_mixed(data, inline):
251 index, cache = parse_index_v1(data, inline)
251 index, cache = parse_index_v1(data, inline)
252 return rustrevlog.MixedIndex(index), cache
252 return rustrevlog.MixedIndex(index), cache
253
253
254
254
255 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
255 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
256 # signed integer)
256 # signed integer)
257 _maxentrysize = 0x7FFFFFFF
257 _maxentrysize = 0x7FFFFFFF
258
258
259
259
260 class revlog(object):
260 class revlog(object):
261 """
261 """
262 the underlying revision storage object
262 the underlying revision storage object
263
263
264 A revlog consists of two parts, an index and the revision data.
264 A revlog consists of two parts, an index and the revision data.
265
265
266 The index is a file with a fixed record size containing
266 The index is a file with a fixed record size containing
267 information on each revision, including its nodeid (hash), the
267 information on each revision, including its nodeid (hash), the
268 nodeids of its parents, the position and offset of its data within
268 nodeids of its parents, the position and offset of its data within
269 the data file, and the revision it's based on. Finally, each entry
269 the data file, and the revision it's based on. Finally, each entry
270 contains a linkrev entry that can serve as a pointer to external
270 contains a linkrev entry that can serve as a pointer to external
271 data.
271 data.
272
272
273 The revision data itself is a linear collection of data chunks.
273 The revision data itself is a linear collection of data chunks.
274 Each chunk represents a revision and is usually represented as a
274 Each chunk represents a revision and is usually represented as a
275 delta against the previous chunk. To bound lookup time, runs of
275 delta against the previous chunk. To bound lookup time, runs of
276 deltas are limited to about 2 times the length of the original
276 deltas are limited to about 2 times the length of the original
277 version data. This makes retrieval of a version proportional to
277 version data. This makes retrieval of a version proportional to
278 its size, or O(1) relative to the number of revisions.
278 its size, or O(1) relative to the number of revisions.
279
279
280 Both pieces of the revlog are written to in an append-only
280 Both pieces of the revlog are written to in an append-only
281 fashion, which means we never need to rewrite a file to insert or
281 fashion, which means we never need to rewrite a file to insert or
282 remove data, and can use some simple techniques to avoid the need
282 remove data, and can use some simple techniques to avoid the need
283 for locking while reading.
283 for locking while reading.
284
284
285 If checkambig, indexfile is opened with checkambig=True at
285 If checkambig, indexfile is opened with checkambig=True at
286 writing, to avoid file stat ambiguity.
286 writing, to avoid file stat ambiguity.
287
287
288 If mmaplargeindex is True, and an mmapindexthreshold is set, the
288 If mmaplargeindex is True, and an mmapindexthreshold is set, the
289 index will be mmapped rather than read if it is larger than the
289 index will be mmapped rather than read if it is larger than the
290 configured threshold.
290 configured threshold.
291
291
292 If censorable is True, the revlog can have censored revisions.
292 If censorable is True, the revlog can have censored revisions.
293
293
294 If `upperboundcomp` is not None, this is the expected maximal gain from
294 If `upperboundcomp` is not None, this is the expected maximal gain from
295 compression for the data content.
295 compression for the data content.
296
296
297 `concurrencychecker` is an optional function that receives 3 arguments: a
297 `concurrencychecker` is an optional function that receives 3 arguments: a
298 file handle, a filename, and an expected position. It should check whether
298 file handle, a filename, and an expected position. It should check whether
299 the current position in the file handle is valid, and log/warn/fail (by
299 the current position in the file handle is valid, and log/warn/fail (by
300 raising).
300 raising).
301
301
302
302
303 Internal details
303 Internal details
304 ----------------
304 ----------------
305
305
306 A large part of the revlog logic deals with revisions' "index entries", tuple
306 A large part of the revlog logic deals with revisions' "index entries", tuple
307 objects that contains the same "items" whatever the revlog version.
307 objects that contains the same "items" whatever the revlog version.
308 Different versions will have different ways of storing these items (sometimes
308 Different versions will have different ways of storing these items (sometimes
309 not having them at all), but the tuple will always be the same. New fields
309 not having them at all), but the tuple will always be the same. New fields
310 are usually added at the end to avoid breaking existing code that relies
310 are usually added at the end to avoid breaking existing code that relies
311 on the existing order. The field are defined as follows:
311 on the existing order. The field are defined as follows:
312
312
313 [0] offset:
313 [0] offset:
314 The byte index of the start of revision data chunk.
314 The byte index of the start of revision data chunk.
315 That value is shifted up by 16 bits. use "offset = field >> 16" to
315 That value is shifted up by 16 bits. use "offset = field >> 16" to
316 retrieve it.
316 retrieve it.
317
317
318 flags:
318 flags:
319 A flag field that carries special information or changes the behavior
319 A flag field that carries special information or changes the behavior
320 of the revision. (see `REVIDX_*` constants for details)
320 of the revision. (see `REVIDX_*` constants for details)
321 The flag field only occupies the first 16 bits of this field,
321 The flag field only occupies the first 16 bits of this field,
322 use "flags = field & 0xFFFF" to retrieve the value.
322 use "flags = field & 0xFFFF" to retrieve the value.
323
323
324 [1] compressed length:
324 [1] compressed length:
325 The size, in bytes, of the chunk on disk
325 The size, in bytes, of the chunk on disk
326
326
327 [2] uncompressed length:
327 [2] uncompressed length:
328 The size, in bytes, of the full revision once reconstructed.
328 The size, in bytes, of the full revision once reconstructed.
329
329
330 [3] base rev:
330 [3] base rev:
331 Either the base of the revision delta chain (without general
331 Either the base of the revision delta chain (without general
332 delta), or the base of the delta (stored in the data chunk)
332 delta), or the base of the delta (stored in the data chunk)
333 with general delta.
333 with general delta.
334
334
335 [4] link rev:
335 [4] link rev:
336 Changelog revision number of the changeset introducing this
336 Changelog revision number of the changeset introducing this
337 revision.
337 revision.
338
338
339 [5] parent 1 rev:
339 [5] parent 1 rev:
340 Revision number of the first parent
340 Revision number of the first parent
341
341
342 [6] parent 2 rev:
342 [6] parent 2 rev:
343 Revision number of the second parent
343 Revision number of the second parent
344
344
345 [7] node id:
345 [7] node id:
346 The node id of the current revision
346 The node id of the current revision
347
347
348 [8] sidedata offset:
348 [8] sidedata offset:
349 The byte index of the start of the revision's side-data chunk.
349 The byte index of the start of the revision's side-data chunk.
350
350
351 [9] sidedata chunk length:
351 [9] sidedata chunk length:
352 The size, in bytes, of the revision's side-data chunk.
352 The size, in bytes, of the revision's side-data chunk.
353
353
354 [10] data compression mode:
354 [10] data compression mode:
355 two bits that detail the way the data chunk is compressed on disk.
355 two bits that detail the way the data chunk is compressed on disk.
356 (see "COMP_MODE_*" constants for details). For revlog version 0 and
356 (see "COMP_MODE_*" constants for details). For revlog version 0 and
357 1 this will always be COMP_MODE_INLINE.
357 1 this will always be COMP_MODE_INLINE.
358
358
359 [11] side-data compression mode:
359 [11] side-data compression mode:
360 two bits that detail the way the sidedata chunk is compressed on disk.
360 two bits that detail the way the sidedata chunk is compressed on disk.
361 (see "COMP_MODE_*" constants for details)
361 (see "COMP_MODE_*" constants for details)
362 """
362 """
363
363
364 _flagserrorclass = error.RevlogError
364 _flagserrorclass = error.RevlogError
365
365
366 def __init__(
366 def __init__(
367 self,
367 self,
368 opener,
368 opener,
369 target,
369 target,
370 radix,
370 radix,
371 postfix=None, # only exist for `tmpcensored` now
371 postfix=None, # only exist for `tmpcensored` now
372 checkambig=False,
372 checkambig=False,
373 mmaplargeindex=False,
373 mmaplargeindex=False,
374 censorable=False,
374 censorable=False,
375 upperboundcomp=None,
375 upperboundcomp=None,
376 persistentnodemap=False,
376 persistentnodemap=False,
377 concurrencychecker=None,
377 concurrencychecker=None,
378 trypending=False,
378 trypending=False,
379 ):
379 ):
380 """
380 """
381 create a revlog object
381 create a revlog object
382
382
383 opener is a function that abstracts the file opening operation
383 opener is a function that abstracts the file opening operation
384 and can be used to implement COW semantics or the like.
384 and can be used to implement COW semantics or the like.
385
385
386 `target`: a (KIND, ID) tuple that identify the content stored in
386 `target`: a (KIND, ID) tuple that identify the content stored in
387 this revlog. It help the rest of the code to understand what the revlog
387 this revlog. It help the rest of the code to understand what the revlog
388 is about without having to resort to heuristic and index filename
388 is about without having to resort to heuristic and index filename
389 analysis. Note: that this must be reliably be set by normal code, but
389 analysis. Note: that this must be reliably be set by normal code, but
390 that test, debug, or performance measurement code might not set this to
390 that test, debug, or performance measurement code might not set this to
391 accurate value.
391 accurate value.
392 """
392 """
393 self.upperboundcomp = upperboundcomp
393 self.upperboundcomp = upperboundcomp
394
394
395 self.radix = radix
395 self.radix = radix
396
396
397 self._docket_file = None
397 self._docket_file = None
398 self._indexfile = None
398 self._indexfile = None
399 self._datafile = None
399 self._datafile = None
400 self._nodemap_file = None
400 self._nodemap_file = None
401 self.postfix = postfix
401 self.postfix = postfix
402 self._trypending = trypending
402 self._trypending = trypending
403 self.opener = opener
403 self.opener = opener
404 if persistentnodemap:
404 if persistentnodemap:
405 self._nodemap_file = nodemaputil.get_nodemap_file(self)
405 self._nodemap_file = nodemaputil.get_nodemap_file(self)
406
406
407 assert target[0] in ALL_KINDS
407 assert target[0] in ALL_KINDS
408 assert len(target) == 2
408 assert len(target) == 2
409 self.target = target
409 self.target = target
410 # When True, indexfile is opened with checkambig=True at writing, to
410 # When True, indexfile is opened with checkambig=True at writing, to
411 # avoid file stat ambiguity.
411 # avoid file stat ambiguity.
412 self._checkambig = checkambig
412 self._checkambig = checkambig
413 self._mmaplargeindex = mmaplargeindex
413 self._mmaplargeindex = mmaplargeindex
414 self._censorable = censorable
414 self._censorable = censorable
415 # 3-tuple of (node, rev, text) for a raw revision.
415 # 3-tuple of (node, rev, text) for a raw revision.
416 self._revisioncache = None
416 self._revisioncache = None
417 # Maps rev to chain base rev.
417 # Maps rev to chain base rev.
418 self._chainbasecache = util.lrucachedict(100)
418 self._chainbasecache = util.lrucachedict(100)
419 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
419 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
420 self._chunkcache = (0, b'')
420 self._chunkcache = (0, b'')
421 # How much data to read and cache into the raw revlog data cache.
421 # How much data to read and cache into the raw revlog data cache.
422 self._chunkcachesize = 65536
422 self._chunkcachesize = 65536
423 self._maxchainlen = None
423 self._maxchainlen = None
424 self._deltabothparents = True
424 self._deltabothparents = True
425 self.index = None
425 self.index = None
426 self._docket = None
426 self._docket = None
427 self._nodemap_docket = None
427 self._nodemap_docket = None
428 # Mapping of partial identifiers to full nodes.
428 # Mapping of partial identifiers to full nodes.
429 self._pcache = {}
429 self._pcache = {}
430 # Mapping of revision integer to full node.
430 # Mapping of revision integer to full node.
431 self._compengine = b'zlib'
431 self._compengine = b'zlib'
432 self._compengineopts = {}
432 self._compengineopts = {}
433 self._maxdeltachainspan = -1
433 self._maxdeltachainspan = -1
434 self._withsparseread = False
434 self._withsparseread = False
435 self._sparserevlog = False
435 self._sparserevlog = False
436 self.hassidedata = False
436 self.hassidedata = False
437 self._srdensitythreshold = 0.50
437 self._srdensitythreshold = 0.50
438 self._srmingapsize = 262144
438 self._srmingapsize = 262144
439
439
440 # Make copy of flag processors so each revlog instance can support
440 # Make copy of flag processors so each revlog instance can support
441 # custom flags.
441 # custom flags.
442 self._flagprocessors = dict(flagutil.flagprocessors)
442 self._flagprocessors = dict(flagutil.flagprocessors)
443
443
444 # 2-tuple of file handles being used for active writing.
444 # 2-tuple of file handles being used for active writing.
445 self._writinghandles = None
445 self._writinghandles = None
446 # prevent nesting of addgroup
446 # prevent nesting of addgroup
447 self._adding_group = None
447 self._adding_group = None
448
448
449 self._loadindex()
449 self._loadindex()
450
450
451 self._concurrencychecker = concurrencychecker
451 self._concurrencychecker = concurrencychecker
452
452
453 def _init_opts(self):
453 def _init_opts(self):
454 """process options (from above/config) to setup associated default revlog mode
454 """process options (from above/config) to setup associated default revlog mode
455
455
456 These values might be affected when actually reading on disk information.
456 These values might be affected when actually reading on disk information.
457
457
458 The relevant values are returned for use in _loadindex().
458 The relevant values are returned for use in _loadindex().
459
459
460 * newversionflags:
460 * newversionflags:
461 version header to use if we need to create a new revlog
461 version header to use if we need to create a new revlog
462
462
463 * mmapindexthreshold:
463 * mmapindexthreshold:
464 minimal index size for start to use mmap
464 minimal index size for start to use mmap
465
465
466 * force_nodemap:
466 * force_nodemap:
467 force the usage of a "development" version of the nodemap code
467 force the usage of a "development" version of the nodemap code
468 """
468 """
469 mmapindexthreshold = None
469 mmapindexthreshold = None
470 opts = self.opener.options
470 opts = self.opener.options
471
471
472 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
472 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
473 new_header = CHANGELOGV2
473 new_header = CHANGELOGV2
474 elif b'revlogv2' in opts:
474 elif b'revlogv2' in opts:
475 new_header = REVLOGV2
475 new_header = REVLOGV2
476 elif b'revlogv1' in opts:
476 elif b'revlogv1' in opts:
477 new_header = REVLOGV1 | FLAG_INLINE_DATA
477 new_header = REVLOGV1 | FLAG_INLINE_DATA
478 if b'generaldelta' in opts:
478 if b'generaldelta' in opts:
479 new_header |= FLAG_GENERALDELTA
479 new_header |= FLAG_GENERALDELTA
480 elif b'revlogv0' in self.opener.options:
480 elif b'revlogv0' in self.opener.options:
481 new_header = REVLOGV0
481 new_header = REVLOGV0
482 else:
482 else:
483 new_header = REVLOG_DEFAULT_VERSION
483 new_header = REVLOG_DEFAULT_VERSION
484
484
485 if b'chunkcachesize' in opts:
485 if b'chunkcachesize' in opts:
486 self._chunkcachesize = opts[b'chunkcachesize']
486 self._chunkcachesize = opts[b'chunkcachesize']
487 if b'maxchainlen' in opts:
487 if b'maxchainlen' in opts:
488 self._maxchainlen = opts[b'maxchainlen']
488 self._maxchainlen = opts[b'maxchainlen']
489 if b'deltabothparents' in opts:
489 if b'deltabothparents' in opts:
490 self._deltabothparents = opts[b'deltabothparents']
490 self._deltabothparents = opts[b'deltabothparents']
491 self._lazydelta = bool(opts.get(b'lazydelta', True))
491 self._lazydelta = bool(opts.get(b'lazydelta', True))
492 self._lazydeltabase = False
492 self._lazydeltabase = False
493 if self._lazydelta:
493 if self._lazydelta:
494 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
494 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
495 if b'compengine' in opts:
495 if b'compengine' in opts:
496 self._compengine = opts[b'compengine']
496 self._compengine = opts[b'compengine']
497 if b'zlib.level' in opts:
497 if b'zlib.level' in opts:
498 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
498 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
499 if b'zstd.level' in opts:
499 if b'zstd.level' in opts:
500 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
500 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
501 if b'maxdeltachainspan' in opts:
501 if b'maxdeltachainspan' in opts:
502 self._maxdeltachainspan = opts[b'maxdeltachainspan']
502 self._maxdeltachainspan = opts[b'maxdeltachainspan']
503 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
503 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
504 mmapindexthreshold = opts[b'mmapindexthreshold']
504 mmapindexthreshold = opts[b'mmapindexthreshold']
505 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
505 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
506 withsparseread = bool(opts.get(b'with-sparse-read', False))
506 withsparseread = bool(opts.get(b'with-sparse-read', False))
507 # sparse-revlog forces sparse-read
507 # sparse-revlog forces sparse-read
508 self._withsparseread = self._sparserevlog or withsparseread
508 self._withsparseread = self._sparserevlog or withsparseread
509 if b'sparse-read-density-threshold' in opts:
509 if b'sparse-read-density-threshold' in opts:
510 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
510 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
511 if b'sparse-read-min-gap-size' in opts:
511 if b'sparse-read-min-gap-size' in opts:
512 self._srmingapsize = opts[b'sparse-read-min-gap-size']
512 self._srmingapsize = opts[b'sparse-read-min-gap-size']
513 if opts.get(b'enableellipsis'):
513 if opts.get(b'enableellipsis'):
514 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
514 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
515
515
516 # revlog v0 doesn't have flag processors
516 # revlog v0 doesn't have flag processors
517 for flag, processor in pycompat.iteritems(
517 for flag, processor in pycompat.iteritems(
518 opts.get(b'flagprocessors', {})
518 opts.get(b'flagprocessors', {})
519 ):
519 ):
520 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
520 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
521
521
522 if self._chunkcachesize <= 0:
522 if self._chunkcachesize <= 0:
523 raise error.RevlogError(
523 raise error.RevlogError(
524 _(b'revlog chunk cache size %r is not greater than 0')
524 _(b'revlog chunk cache size %r is not greater than 0')
525 % self._chunkcachesize
525 % self._chunkcachesize
526 )
526 )
527 elif self._chunkcachesize & (self._chunkcachesize - 1):
527 elif self._chunkcachesize & (self._chunkcachesize - 1):
528 raise error.RevlogError(
528 raise error.RevlogError(
529 _(b'revlog chunk cache size %r is not a power of 2')
529 _(b'revlog chunk cache size %r is not a power of 2')
530 % self._chunkcachesize
530 % self._chunkcachesize
531 )
531 )
532 force_nodemap = opts.get(b'devel-force-nodemap', False)
532 force_nodemap = opts.get(b'devel-force-nodemap', False)
533 return new_header, mmapindexthreshold, force_nodemap
533 return new_header, mmapindexthreshold, force_nodemap
534
534
535 def _get_data(self, filepath, mmap_threshold, size=None):
535 def _get_data(self, filepath, mmap_threshold, size=None):
536 """return a file content with or without mmap
536 """return a file content with or without mmap
537
537
538 If the file is missing return the empty string"""
538 If the file is missing return the empty string"""
539 try:
539 try:
540 with self.opener(filepath) as fp:
540 with self.opener(filepath) as fp:
541 if mmap_threshold is not None:
541 if mmap_threshold is not None:
542 file_size = self.opener.fstat(fp).st_size
542 file_size = self.opener.fstat(fp).st_size
543 if file_size >= mmap_threshold:
543 if file_size >= mmap_threshold:
544 if size is not None:
544 if size is not None:
545 # avoid potentiel mmap crash
545 # avoid potentiel mmap crash
546 size = min(file_size, size)
546 size = min(file_size, size)
547 # TODO: should .close() to release resources without
547 # TODO: should .close() to release resources without
548 # relying on Python GC
548 # relying on Python GC
549 if size is None:
549 if size is None:
550 return util.buffer(util.mmapread(fp))
550 return util.buffer(util.mmapread(fp))
551 else:
551 else:
552 return util.buffer(util.mmapread(fp, size))
552 return util.buffer(util.mmapread(fp, size))
553 if size is None:
553 if size is None:
554 return fp.read()
554 return fp.read()
555 else:
555 else:
556 return fp.read(size)
556 return fp.read(size)
557 except IOError as inst:
557 except IOError as inst:
558 if inst.errno != errno.ENOENT:
558 if inst.errno != errno.ENOENT:
559 raise
559 raise
560 return b''
560 return b''
561
561
562 def _loadindex(self):
562 def _loadindex(self):
563
563
564 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
564 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
565
565
566 if self.postfix is not None:
566 if self.postfix is not None:
567 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
567 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
568 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
568 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
569 entry_point = b'%s.i.a' % self.radix
569 entry_point = b'%s.i.a' % self.radix
570 else:
570 else:
571 entry_point = b'%s.i' % self.radix
571 entry_point = b'%s.i' % self.radix
572
572
573 entry_data = b''
573 entry_data = b''
574 self._initempty = True
574 self._initempty = True
575 entry_data = self._get_data(entry_point, mmapindexthreshold)
575 entry_data = self._get_data(entry_point, mmapindexthreshold)
576 if len(entry_data) > 0:
576 if len(entry_data) > 0:
577 header = INDEX_HEADER.unpack(entry_data[:4])[0]
577 header = INDEX_HEADER.unpack(entry_data[:4])[0]
578 self._initempty = False
578 self._initempty = False
579 else:
579 else:
580 header = new_header
580 header = new_header
581
581
582 self._format_flags = header & ~0xFFFF
582 self._format_flags = header & ~0xFFFF
583 self._format_version = header & 0xFFFF
583 self._format_version = header & 0xFFFF
584
584
585 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
585 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
586 if supported_flags is None:
586 if supported_flags is None:
587 msg = _(b'unknown version (%d) in revlog %s')
587 msg = _(b'unknown version (%d) in revlog %s')
588 msg %= (self._format_version, self.display_id)
588 msg %= (self._format_version, self.display_id)
589 raise error.RevlogError(msg)
589 raise error.RevlogError(msg)
590 elif self._format_flags & ~supported_flags:
590 elif self._format_flags & ~supported_flags:
591 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
591 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
592 display_flag = self._format_flags >> 16
592 display_flag = self._format_flags >> 16
593 msg %= (display_flag, self._format_version, self.display_id)
593 msg %= (display_flag, self._format_version, self.display_id)
594 raise error.RevlogError(msg)
594 raise error.RevlogError(msg)
595
595
596 features = FEATURES_BY_VERSION[self._format_version]
596 features = FEATURES_BY_VERSION[self._format_version]
597 self._inline = features[b'inline'](self._format_flags)
597 self._inline = features[b'inline'](self._format_flags)
598 self._generaldelta = features[b'generaldelta'](self._format_flags)
598 self._generaldelta = features[b'generaldelta'](self._format_flags)
599 self.hassidedata = features[b'sidedata']
599 self.hassidedata = features[b'sidedata']
600
600
601 if not features[b'docket']:
601 if not features[b'docket']:
602 self._indexfile = entry_point
602 self._indexfile = entry_point
603 index_data = entry_data
603 index_data = entry_data
604 else:
604 else:
605 self._docket_file = entry_point
605 self._docket_file = entry_point
606 if self._initempty:
606 if self._initempty:
607 self._docket = docketutil.default_docket(self, header)
607 self._docket = docketutil.default_docket(self, header)
608 else:
608 else:
609 self._docket = docketutil.parse_docket(
609 self._docket = docketutil.parse_docket(
610 self, entry_data, use_pending=self._trypending
610 self, entry_data, use_pending=self._trypending
611 )
611 )
612 self._indexfile = self._docket.index_filepath()
612 self._indexfile = self._docket.index_filepath()
613 index_data = b''
613 index_data = b''
614 index_size = self._docket.index_end
614 index_size = self._docket.index_end
615 if index_size > 0:
615 if index_size > 0:
616 index_data = self._get_data(
616 index_data = self._get_data(
617 self._indexfile, mmapindexthreshold, size=index_size
617 self._indexfile, mmapindexthreshold, size=index_size
618 )
618 )
619 if len(index_data) < index_size:
619 if len(index_data) < index_size:
620 msg = _(b'too few index data for %s: got %d, expected %d')
620 msg = _(b'too few index data for %s: got %d, expected %d')
621 msg %= (self.display_id, len(index_data), index_size)
621 msg %= (self.display_id, len(index_data), index_size)
622 raise error.RevlogError(msg)
622 raise error.RevlogError(msg)
623
623
624 self._inline = False
624 self._inline = False
625 # generaldelta implied by version 2 revlogs.
625 # generaldelta implied by version 2 revlogs.
626 self._generaldelta = True
626 self._generaldelta = True
627 # the logic for persistent nodemap will be dealt with within the
627 # the logic for persistent nodemap will be dealt with within the
628 # main docket, so disable it for now.
628 # main docket, so disable it for now.
629 self._nodemap_file = None
629 self._nodemap_file = None
630
630
631 if self._docket is not None:
631 if self._docket is not None:
632 self._datafile = self._docket.data_filepath()
632 self._datafile = self._docket.data_filepath()
633 elif self.postfix is None:
633 elif self.postfix is None:
634 self._datafile = b'%s.d' % self.radix
634 self._datafile = b'%s.d' % self.radix
635 else:
635 else:
636 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
636 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
637
637
638 self.nodeconstants = sha1nodeconstants
638 self.nodeconstants = sha1nodeconstants
639 self.nullid = self.nodeconstants.nullid
639 self.nullid = self.nodeconstants.nullid
640
640
641 # sparse-revlog can't be on without general-delta (issue6056)
641 # sparse-revlog can't be on without general-delta (issue6056)
642 if not self._generaldelta:
642 if not self._generaldelta:
643 self._sparserevlog = False
643 self._sparserevlog = False
644
644
645 self._storedeltachains = True
645 self._storedeltachains = True
646
646
647 devel_nodemap = (
647 devel_nodemap = (
648 self._nodemap_file
648 self._nodemap_file
649 and force_nodemap
649 and force_nodemap
650 and parse_index_v1_nodemap is not None
650 and parse_index_v1_nodemap is not None
651 )
651 )
652
652
653 use_rust_index = False
653 use_rust_index = False
654 if rustrevlog is not None:
654 if rustrevlog is not None:
655 if self._nodemap_file is not None:
655 if self._nodemap_file is not None:
656 use_rust_index = True
656 use_rust_index = True
657 else:
657 else:
658 use_rust_index = self.opener.options.get(b'rust.index')
658 use_rust_index = self.opener.options.get(b'rust.index')
659
659
660 self._parse_index = parse_index_v1
660 self._parse_index = parse_index_v1
661 if self._format_version == REVLOGV0:
661 if self._format_version == REVLOGV0:
662 self._parse_index = revlogv0.parse_index_v0
662 self._parse_index = revlogv0.parse_index_v0
663 elif self._format_version == REVLOGV2:
663 elif self._format_version == REVLOGV2:
664 self._parse_index = parse_index_v2
664 self._parse_index = parse_index_v2
665 elif self._format_version == CHANGELOGV2:
665 elif self._format_version == CHANGELOGV2:
666 self._parse_index = parse_index_cl_v2
666 self._parse_index = parse_index_cl_v2
667 elif devel_nodemap:
667 elif devel_nodemap:
668 self._parse_index = parse_index_v1_nodemap
668 self._parse_index = parse_index_v1_nodemap
669 elif use_rust_index:
669 elif use_rust_index:
670 self._parse_index = parse_index_v1_mixed
670 self._parse_index = parse_index_v1_mixed
671 try:
671 try:
672 d = self._parse_index(index_data, self._inline)
672 d = self._parse_index(index_data, self._inline)
673 index, _chunkcache = d
673 index, _chunkcache = d
674 use_nodemap = (
674 use_nodemap = (
675 not self._inline
675 not self._inline
676 and self._nodemap_file is not None
676 and self._nodemap_file is not None
677 and util.safehasattr(index, 'update_nodemap_data')
677 and util.safehasattr(index, 'update_nodemap_data')
678 )
678 )
679 if use_nodemap:
679 if use_nodemap:
680 nodemap_data = nodemaputil.persisted_data(self)
680 nodemap_data = nodemaputil.persisted_data(self)
681 if nodemap_data is not None:
681 if nodemap_data is not None:
682 docket = nodemap_data[0]
682 docket = nodemap_data[0]
683 if (
683 if (
684 len(d[0]) > docket.tip_rev
684 len(d[0]) > docket.tip_rev
685 and d[0][docket.tip_rev][7] == docket.tip_node
685 and d[0][docket.tip_rev][7] == docket.tip_node
686 ):
686 ):
687 # no changelog tampering
687 # no changelog tampering
688 self._nodemap_docket = docket
688 self._nodemap_docket = docket
689 index.update_nodemap_data(*nodemap_data)
689 index.update_nodemap_data(*nodemap_data)
690 except (ValueError, IndexError):
690 except (ValueError, IndexError):
691 raise error.RevlogError(
691 raise error.RevlogError(
692 _(b"index %s is corrupted") % self.display_id
692 _(b"index %s is corrupted") % self.display_id
693 )
693 )
694 self.index, self._chunkcache = d
694 self.index, self._chunkcache = d
695 if not self._chunkcache:
695 if not self._chunkcache:
696 self._chunkclear()
696 self._chunkclear()
697 # revnum -> (chain-length, sum-delta-length)
697 # revnum -> (chain-length, sum-delta-length)
698 self._chaininfocache = util.lrucachedict(500)
698 self._chaininfocache = util.lrucachedict(500)
699 # revlog header -> revlog compressor
699 # revlog header -> revlog compressor
700 self._decompressors = {}
700 self._decompressors = {}
701
701
702 @util.propertycache
702 @util.propertycache
703 def revlog_kind(self):
703 def revlog_kind(self):
704 return self.target[0]
704 return self.target[0]
705
705
706 @util.propertycache
706 @util.propertycache
707 def display_id(self):
707 def display_id(self):
708 """The public facing "ID" of the revlog that we use in message"""
708 """The public facing "ID" of the revlog that we use in message"""
709 # Maybe we should build a user facing representation of
709 # Maybe we should build a user facing representation of
710 # revlog.target instead of using `self.radix`
710 # revlog.target instead of using `self.radix`
711 return self.radix
711 return self.radix
712
712
713 def _get_decompressor(self, t):
713 def _get_decompressor(self, t):
714 try:
714 try:
715 compressor = self._decompressors[t]
715 compressor = self._decompressors[t]
716 except KeyError:
716 except KeyError:
717 try:
717 try:
718 engine = util.compengines.forrevlogheader(t)
718 engine = util.compengines.forrevlogheader(t)
719 compressor = engine.revlogcompressor(self._compengineopts)
719 compressor = engine.revlogcompressor(self._compengineopts)
720 self._decompressors[t] = compressor
720 self._decompressors[t] = compressor
721 except KeyError:
721 except KeyError:
722 raise error.RevlogError(
722 raise error.RevlogError(
723 _(b'unknown compression type %s') % binascii.hexlify(t)
723 _(b'unknown compression type %s') % binascii.hexlify(t)
724 )
724 )
725 return compressor
725 return compressor
726
726
727 @util.propertycache
727 @util.propertycache
728 def _compressor(self):
728 def _compressor(self):
729 engine = util.compengines[self._compengine]
729 engine = util.compengines[self._compengine]
730 return engine.revlogcompressor(self._compengineopts)
730 return engine.revlogcompressor(self._compengineopts)
731
731
732 @util.propertycache
732 @util.propertycache
733 def _decompressor(self):
733 def _decompressor(self):
734 """the default decompressor"""
734 """the default decompressor"""
735 if self._docket is None:
735 if self._docket is None:
736 return None
736 return None
737 t = self._docket.default_compression_header
737 t = self._docket.default_compression_header
738 c = self._get_decompressor(t)
738 c = self._get_decompressor(t)
739 return c.decompress
739 return c.decompress
740
740
741 def _indexfp(self):
741 def _indexfp(self):
742 """file object for the revlog's index file"""
742 """file object for the revlog's index file"""
743 return self.opener(self._indexfile, mode=b"r")
743 return self.opener(self._indexfile, mode=b"r")
744
744
745 def __index_write_fp(self):
745 def __index_write_fp(self):
746 # You should not use this directly and use `_writing` instead
746 # You should not use this directly and use `_writing` instead
747 try:
747 try:
748 f = self.opener(
748 f = self.opener(
749 self._indexfile, mode=b"r+", checkambig=self._checkambig
749 self._indexfile, mode=b"r+", checkambig=self._checkambig
750 )
750 )
751 if self._docket is None:
751 if self._docket is None:
752 f.seek(0, os.SEEK_END)
752 f.seek(0, os.SEEK_END)
753 else:
753 else:
754 f.seek(self._docket.index_end, os.SEEK_SET)
754 f.seek(self._docket.index_end, os.SEEK_SET)
755 return f
755 return f
756 except IOError as inst:
756 except IOError as inst:
757 if inst.errno != errno.ENOENT:
757 if inst.errno != errno.ENOENT:
758 raise
758 raise
759 return self.opener(
759 return self.opener(
760 self._indexfile, mode=b"w+", checkambig=self._checkambig
760 self._indexfile, mode=b"w+", checkambig=self._checkambig
761 )
761 )
762
762
763 def __index_new_fp(self):
763 def __index_new_fp(self):
764 # You should not use this unless you are upgrading from inline revlog
764 # You should not use this unless you are upgrading from inline revlog
765 return self.opener(
765 return self.opener(
766 self._indexfile,
766 self._indexfile,
767 mode=b"w",
767 mode=b"w",
768 checkambig=self._checkambig,
768 checkambig=self._checkambig,
769 atomictemp=True,
769 atomictemp=True,
770 )
770 )
771
771
772 def _datafp(self, mode=b'r'):
772 def _datafp(self, mode=b'r'):
773 """file object for the revlog's data file"""
773 """file object for the revlog's data file"""
774 return self.opener(self._datafile, mode=mode)
774 return self.opener(self._datafile, mode=mode)
775
775
776 @contextlib.contextmanager
776 @contextlib.contextmanager
777 def _datareadfp(self, existingfp=None):
777 def _datareadfp(self, existingfp=None):
778 """file object suitable to read data"""
778 """file object suitable to read data"""
779 # Use explicit file handle, if given.
779 # Use explicit file handle, if given.
780 if existingfp is not None:
780 if existingfp is not None:
781 yield existingfp
781 yield existingfp
782
782
783 # Use a file handle being actively used for writes, if available.
783 # Use a file handle being actively used for writes, if available.
784 # There is some danger to doing this because reads will seek the
784 # There is some danger to doing this because reads will seek the
785 # file. However, _writeentry() performs a SEEK_END before all writes,
785 # file. However, _writeentry() performs a SEEK_END before all writes,
786 # so we should be safe.
786 # so we should be safe.
787 elif self._writinghandles:
787 elif self._writinghandles:
788 if self._inline:
788 if self._inline:
789 yield self._writinghandles[0]
789 yield self._writinghandles[0]
790 else:
790 else:
791 yield self._writinghandles[1]
791 yield self._writinghandles[1]
792
792
793 # Otherwise open a new file handle.
793 # Otherwise open a new file handle.
794 else:
794 else:
795 if self._inline:
795 if self._inline:
796 func = self._indexfp
796 func = self._indexfp
797 else:
797 else:
798 func = self._datafp
798 func = self._datafp
799 with func() as fp:
799 with func() as fp:
800 yield fp
800 yield fp
801
801
802 def tiprev(self):
802 def tiprev(self):
803 return len(self.index) - 1
803 return len(self.index) - 1
804
804
805 def tip(self):
805 def tip(self):
806 return self.node(self.tiprev())
806 return self.node(self.tiprev())
807
807
808 def __contains__(self, rev):
808 def __contains__(self, rev):
809 return 0 <= rev < len(self)
809 return 0 <= rev < len(self)
810
810
811 def __len__(self):
811 def __len__(self):
812 return len(self.index)
812 return len(self.index)
813
813
814 def __iter__(self):
814 def __iter__(self):
815 return iter(pycompat.xrange(len(self)))
815 return iter(pycompat.xrange(len(self)))
816
816
817 def revs(self, start=0, stop=None):
817 def revs(self, start=0, stop=None):
818 """iterate over all rev in this revlog (from start to stop)"""
818 """iterate over all rev in this revlog (from start to stop)"""
819 return storageutil.iterrevs(len(self), start=start, stop=stop)
819 return storageutil.iterrevs(len(self), start=start, stop=stop)
820
820
821 @property
821 @property
822 def nodemap(self):
822 def nodemap(self):
823 msg = (
823 msg = (
824 b"revlog.nodemap is deprecated, "
824 b"revlog.nodemap is deprecated, "
825 b"use revlog.index.[has_node|rev|get_rev]"
825 b"use revlog.index.[has_node|rev|get_rev]"
826 )
826 )
827 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
827 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
828 return self.index.nodemap
828 return self.index.nodemap
829
829
830 @property
830 @property
831 def _nodecache(self):
831 def _nodecache(self):
832 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
832 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
833 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
833 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
834 return self.index.nodemap
834 return self.index.nodemap
835
835
836 def hasnode(self, node):
836 def hasnode(self, node):
837 try:
837 try:
838 self.rev(node)
838 self.rev(node)
839 return True
839 return True
840 except KeyError:
840 except KeyError:
841 return False
841 return False
842
842
843 def candelta(self, baserev, rev):
843 def candelta(self, baserev, rev):
844 """whether two revisions (baserev, rev) can be delta-ed or not"""
844 """whether two revisions (baserev, rev) can be delta-ed or not"""
845 # Disable delta if either rev requires a content-changing flag
845 # Disable delta if either rev requires a content-changing flag
846 # processor (ex. LFS). This is because such flag processor can alter
846 # processor (ex. LFS). This is because such flag processor can alter
847 # the rawtext content that the delta will be based on, and two clients
847 # the rawtext content that the delta will be based on, and two clients
848 # could have a same revlog node with different flags (i.e. different
848 # could have a same revlog node with different flags (i.e. different
849 # rawtext contents) and the delta could be incompatible.
849 # rawtext contents) and the delta could be incompatible.
850 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
850 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
851 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
851 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
852 ):
852 ):
853 return False
853 return False
854 return True
854 return True
855
855
856 def update_caches(self, transaction):
856 def update_caches(self, transaction):
857 if self._nodemap_file is not None:
857 if self._nodemap_file is not None:
858 if transaction is None:
858 if transaction is None:
859 nodemaputil.update_persistent_nodemap(self)
859 nodemaputil.update_persistent_nodemap(self)
860 else:
860 else:
861 nodemaputil.setup_persistent_nodemap(transaction, self)
861 nodemaputil.setup_persistent_nodemap(transaction, self)
862
862
863 def clearcaches(self):
863 def clearcaches(self):
864 self._revisioncache = None
864 self._revisioncache = None
865 self._chainbasecache.clear()
865 self._chainbasecache.clear()
866 self._chunkcache = (0, b'')
866 self._chunkcache = (0, b'')
867 self._pcache = {}
867 self._pcache = {}
868 self._nodemap_docket = None
868 self._nodemap_docket = None
869 self.index.clearcaches()
869 self.index.clearcaches()
870 # The python code is the one responsible for validating the docket, we
870 # The python code is the one responsible for validating the docket, we
871 # end up having to refresh it here.
871 # end up having to refresh it here.
872 use_nodemap = (
872 use_nodemap = (
873 not self._inline
873 not self._inline
874 and self._nodemap_file is not None
874 and self._nodemap_file is not None
875 and util.safehasattr(self.index, 'update_nodemap_data')
875 and util.safehasattr(self.index, 'update_nodemap_data')
876 )
876 )
877 if use_nodemap:
877 if use_nodemap:
878 nodemap_data = nodemaputil.persisted_data(self)
878 nodemap_data = nodemaputil.persisted_data(self)
879 if nodemap_data is not None:
879 if nodemap_data is not None:
880 self._nodemap_docket = nodemap_data[0]
880 self._nodemap_docket = nodemap_data[0]
881 self.index.update_nodemap_data(*nodemap_data)
881 self.index.update_nodemap_data(*nodemap_data)
882
882
883 def rev(self, node):
883 def rev(self, node):
884 try:
884 try:
885 return self.index.rev(node)
885 return self.index.rev(node)
886 except TypeError:
886 except TypeError:
887 raise
887 raise
888 except error.RevlogError:
888 except error.RevlogError:
889 # parsers.c radix tree lookup failed
889 # parsers.c radix tree lookup failed
890 if (
890 if (
891 node == self.nodeconstants.wdirid
891 node == self.nodeconstants.wdirid
892 or node in self.nodeconstants.wdirfilenodeids
892 or node in self.nodeconstants.wdirfilenodeids
893 ):
893 ):
894 raise error.WdirUnsupported
894 raise error.WdirUnsupported
895 raise error.LookupError(node, self.display_id, _(b'no node'))
895 raise error.LookupError(node, self.display_id, _(b'no node'))
896
896
897 # Accessors for index entries.
897 # Accessors for index entries.
898
898
899 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
899 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
900 # are flags.
900 # are flags.
901 def start(self, rev):
901 def start(self, rev):
902 return int(self.index[rev][0] >> 16)
902 return int(self.index[rev][0] >> 16)
903
903
904 def flags(self, rev):
904 def flags(self, rev):
905 return self.index[rev][0] & 0xFFFF
905 return self.index[rev][0] & 0xFFFF
906
906
907 def length(self, rev):
907 def length(self, rev):
908 return self.index[rev][1]
908 return self.index[rev][1]
909
909
910 def sidedata_length(self, rev):
910 def sidedata_length(self, rev):
911 if not self.hassidedata:
911 if not self.hassidedata:
912 return 0
912 return 0
913 return self.index[rev][9]
913 return self.index[rev][9]
914
914
915 def rawsize(self, rev):
915 def rawsize(self, rev):
916 """return the length of the uncompressed text for a given revision"""
916 """return the length of the uncompressed text for a given revision"""
917 l = self.index[rev][2]
917 l = self.index[rev][2]
918 if l >= 0:
918 if l >= 0:
919 return l
919 return l
920
920
921 t = self.rawdata(rev)
921 t = self.rawdata(rev)
922 return len(t)
922 return len(t)
923
923
924 def size(self, rev):
924 def size(self, rev):
925 """length of non-raw text (processed by a "read" flag processor)"""
925 """length of non-raw text (processed by a "read" flag processor)"""
926 # fast path: if no "read" flag processor could change the content,
926 # fast path: if no "read" flag processor could change the content,
927 # size is rawsize. note: ELLIPSIS is known to not change the content.
927 # size is rawsize. note: ELLIPSIS is known to not change the content.
928 flags = self.flags(rev)
928 flags = self.flags(rev)
929 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
929 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
930 return self.rawsize(rev)
930 return self.rawsize(rev)
931
931
932 return len(self.revision(rev, raw=False))
932 return len(self.revision(rev, raw=False))
933
933
934 def chainbase(self, rev):
934 def chainbase(self, rev):
935 base = self._chainbasecache.get(rev)
935 base = self._chainbasecache.get(rev)
936 if base is not None:
936 if base is not None:
937 return base
937 return base
938
938
939 index = self.index
939 index = self.index
940 iterrev = rev
940 iterrev = rev
941 base = index[iterrev][3]
941 base = index[iterrev][3]
942 while base != iterrev:
942 while base != iterrev:
943 iterrev = base
943 iterrev = base
944 base = index[iterrev][3]
944 base = index[iterrev][3]
945
945
946 self._chainbasecache[rev] = base
946 self._chainbasecache[rev] = base
947 return base
947 return base
948
948
949 def linkrev(self, rev):
949 def linkrev(self, rev):
950 return self.index[rev][4]
950 return self.index[rev][4]
951
951
952 def parentrevs(self, rev):
952 def parentrevs(self, rev):
953 try:
953 try:
954 entry = self.index[rev]
954 entry = self.index[rev]
955 except IndexError:
955 except IndexError:
956 if rev == wdirrev:
956 if rev == wdirrev:
957 raise error.WdirUnsupported
957 raise error.WdirUnsupported
958 raise
958 raise
959 if entry[5] == nullrev:
959 if entry[5] == nullrev:
960 return entry[6], entry[5]
960 return entry[6], entry[5]
961 else:
961 else:
962 return entry[5], entry[6]
962 return entry[5], entry[6]
963
963
964 # fast parentrevs(rev) where rev isn't filtered
964 # fast parentrevs(rev) where rev isn't filtered
965 _uncheckedparentrevs = parentrevs
965 _uncheckedparentrevs = parentrevs
966
966
967 def node(self, rev):
967 def node(self, rev):
968 try:
968 try:
969 return self.index[rev][7]
969 return self.index[rev][7]
970 except IndexError:
970 except IndexError:
971 if rev == wdirrev:
971 if rev == wdirrev:
972 raise error.WdirUnsupported
972 raise error.WdirUnsupported
973 raise
973 raise
974
974
975 # Derived from index values.
975 # Derived from index values.
976
976
977 def end(self, rev):
977 def end(self, rev):
978 return self.start(rev) + self.length(rev)
978 return self.start(rev) + self.length(rev)
979
979
980 def parents(self, node):
980 def parents(self, node):
981 i = self.index
981 i = self.index
982 d = i[self.rev(node)]
982 d = i[self.rev(node)]
983 # inline node() to avoid function call overhead
983 # inline node() to avoid function call overhead
984 if d[5] == self.nullid:
984 if d[5] == self.nullid:
985 return i[d[6]][7], i[d[5]][7]
985 return i[d[6]][7], i[d[5]][7]
986 else:
986 else:
987 return i[d[5]][7], i[d[6]][7]
987 return i[d[5]][7], i[d[6]][7]
988
988
989 def chainlen(self, rev):
989 def chainlen(self, rev):
990 return self._chaininfo(rev)[0]
990 return self._chaininfo(rev)[0]
991
991
992 def _chaininfo(self, rev):
992 def _chaininfo(self, rev):
993 chaininfocache = self._chaininfocache
993 chaininfocache = self._chaininfocache
994 if rev in chaininfocache:
994 if rev in chaininfocache:
995 return chaininfocache[rev]
995 return chaininfocache[rev]
996 index = self.index
996 index = self.index
997 generaldelta = self._generaldelta
997 generaldelta = self._generaldelta
998 iterrev = rev
998 iterrev = rev
999 e = index[iterrev]
999 e = index[iterrev]
1000 clen = 0
1000 clen = 0
1001 compresseddeltalen = 0
1001 compresseddeltalen = 0
1002 while iterrev != e[3]:
1002 while iterrev != e[3]:
1003 clen += 1
1003 clen += 1
1004 compresseddeltalen += e[1]
1004 compresseddeltalen += e[1]
1005 if generaldelta:
1005 if generaldelta:
1006 iterrev = e[3]
1006 iterrev = e[3]
1007 else:
1007 else:
1008 iterrev -= 1
1008 iterrev -= 1
1009 if iterrev in chaininfocache:
1009 if iterrev in chaininfocache:
1010 t = chaininfocache[iterrev]
1010 t = chaininfocache[iterrev]
1011 clen += t[0]
1011 clen += t[0]
1012 compresseddeltalen += t[1]
1012 compresseddeltalen += t[1]
1013 break
1013 break
1014 e = index[iterrev]
1014 e = index[iterrev]
1015 else:
1015 else:
1016 # Add text length of base since decompressing that also takes
1016 # Add text length of base since decompressing that also takes
1017 # work. For cache hits the length is already included.
1017 # work. For cache hits the length is already included.
1018 compresseddeltalen += e[1]
1018 compresseddeltalen += e[1]
1019 r = (clen, compresseddeltalen)
1019 r = (clen, compresseddeltalen)
1020 chaininfocache[rev] = r
1020 chaininfocache[rev] = r
1021 return r
1021 return r
1022
1022
1023 def _deltachain(self, rev, stoprev=None):
1023 def _deltachain(self, rev, stoprev=None):
1024 """Obtain the delta chain for a revision.
1024 """Obtain the delta chain for a revision.
1025
1025
1026 ``stoprev`` specifies a revision to stop at. If not specified, we
1026 ``stoprev`` specifies a revision to stop at. If not specified, we
1027 stop at the base of the chain.
1027 stop at the base of the chain.
1028
1028
1029 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1029 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1030 revs in ascending order and ``stopped`` is a bool indicating whether
1030 revs in ascending order and ``stopped`` is a bool indicating whether
1031 ``stoprev`` was hit.
1031 ``stoprev`` was hit.
1032 """
1032 """
1033 # Try C implementation.
1033 # Try C implementation.
1034 try:
1034 try:
1035 return self.index.deltachain(rev, stoprev, self._generaldelta)
1035 return self.index.deltachain(rev, stoprev, self._generaldelta)
1036 except AttributeError:
1036 except AttributeError:
1037 pass
1037 pass
1038
1038
1039 chain = []
1039 chain = []
1040
1040
1041 # Alias to prevent attribute lookup in tight loop.
1041 # Alias to prevent attribute lookup in tight loop.
1042 index = self.index
1042 index = self.index
1043 generaldelta = self._generaldelta
1043 generaldelta = self._generaldelta
1044
1044
1045 iterrev = rev
1045 iterrev = rev
1046 e = index[iterrev]
1046 e = index[iterrev]
1047 while iterrev != e[3] and iterrev != stoprev:
1047 while iterrev != e[3] and iterrev != stoprev:
1048 chain.append(iterrev)
1048 chain.append(iterrev)
1049 if generaldelta:
1049 if generaldelta:
1050 iterrev = e[3]
1050 iterrev = e[3]
1051 else:
1051 else:
1052 iterrev -= 1
1052 iterrev -= 1
1053 e = index[iterrev]
1053 e = index[iterrev]
1054
1054
1055 if iterrev == stoprev:
1055 if iterrev == stoprev:
1056 stopped = True
1056 stopped = True
1057 else:
1057 else:
1058 chain.append(iterrev)
1058 chain.append(iterrev)
1059 stopped = False
1059 stopped = False
1060
1060
1061 chain.reverse()
1061 chain.reverse()
1062 return chain, stopped
1062 return chain, stopped
1063
1063
1064 def ancestors(self, revs, stoprev=0, inclusive=False):
1064 def ancestors(self, revs, stoprev=0, inclusive=False):
1065 """Generate the ancestors of 'revs' in reverse revision order.
1065 """Generate the ancestors of 'revs' in reverse revision order.
1066 Does not generate revs lower than stoprev.
1066 Does not generate revs lower than stoprev.
1067
1067
1068 See the documentation for ancestor.lazyancestors for more details."""
1068 See the documentation for ancestor.lazyancestors for more details."""
1069
1069
1070 # first, make sure start revisions aren't filtered
1070 # first, make sure start revisions aren't filtered
1071 revs = list(revs)
1071 revs = list(revs)
1072 checkrev = self.node
1072 checkrev = self.node
1073 for r in revs:
1073 for r in revs:
1074 checkrev(r)
1074 checkrev(r)
1075 # and we're sure ancestors aren't filtered as well
1075 # and we're sure ancestors aren't filtered as well
1076
1076
1077 if rustancestor is not None and self.index.rust_ext_compat:
1077 if rustancestor is not None and self.index.rust_ext_compat:
1078 lazyancestors = rustancestor.LazyAncestors
1078 lazyancestors = rustancestor.LazyAncestors
1079 arg = self.index
1079 arg = self.index
1080 else:
1080 else:
1081 lazyancestors = ancestor.lazyancestors
1081 lazyancestors = ancestor.lazyancestors
1082 arg = self._uncheckedparentrevs
1082 arg = self._uncheckedparentrevs
1083 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1083 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1084
1084
1085 def descendants(self, revs):
1085 def descendants(self, revs):
1086 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1086 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1087
1087
1088 def findcommonmissing(self, common=None, heads=None):
1088 def findcommonmissing(self, common=None, heads=None):
1089 """Return a tuple of the ancestors of common and the ancestors of heads
1089 """Return a tuple of the ancestors of common and the ancestors of heads
1090 that are not ancestors of common. In revset terminology, we return the
1090 that are not ancestors of common. In revset terminology, we return the
1091 tuple:
1091 tuple:
1092
1092
1093 ::common, (::heads) - (::common)
1093 ::common, (::heads) - (::common)
1094
1094
1095 The list is sorted by revision number, meaning it is
1095 The list is sorted by revision number, meaning it is
1096 topologically sorted.
1096 topologically sorted.
1097
1097
1098 'heads' and 'common' are both lists of node IDs. If heads is
1098 'heads' and 'common' are both lists of node IDs. If heads is
1099 not supplied, uses all of the revlog's heads. If common is not
1099 not supplied, uses all of the revlog's heads. If common is not
1100 supplied, uses nullid."""
1100 supplied, uses nullid."""
1101 if common is None:
1101 if common is None:
1102 common = [self.nullid]
1102 common = [self.nullid]
1103 if heads is None:
1103 if heads is None:
1104 heads = self.heads()
1104 heads = self.heads()
1105
1105
1106 common = [self.rev(n) for n in common]
1106 common = [self.rev(n) for n in common]
1107 heads = [self.rev(n) for n in heads]
1107 heads = [self.rev(n) for n in heads]
1108
1108
1109 # we want the ancestors, but inclusive
1109 # we want the ancestors, but inclusive
1110 class lazyset(object):
1110 class lazyset(object):
1111 def __init__(self, lazyvalues):
1111 def __init__(self, lazyvalues):
1112 self.addedvalues = set()
1112 self.addedvalues = set()
1113 self.lazyvalues = lazyvalues
1113 self.lazyvalues = lazyvalues
1114
1114
1115 def __contains__(self, value):
1115 def __contains__(self, value):
1116 return value in self.addedvalues or value in self.lazyvalues
1116 return value in self.addedvalues or value in self.lazyvalues
1117
1117
1118 def __iter__(self):
1118 def __iter__(self):
1119 added = self.addedvalues
1119 added = self.addedvalues
1120 for r in added:
1120 for r in added:
1121 yield r
1121 yield r
1122 for r in self.lazyvalues:
1122 for r in self.lazyvalues:
1123 if not r in added:
1123 if not r in added:
1124 yield r
1124 yield r
1125
1125
1126 def add(self, value):
1126 def add(self, value):
1127 self.addedvalues.add(value)
1127 self.addedvalues.add(value)
1128
1128
1129 def update(self, values):
1129 def update(self, values):
1130 self.addedvalues.update(values)
1130 self.addedvalues.update(values)
1131
1131
1132 has = lazyset(self.ancestors(common))
1132 has = lazyset(self.ancestors(common))
1133 has.add(nullrev)
1133 has.add(nullrev)
1134 has.update(common)
1134 has.update(common)
1135
1135
1136 # take all ancestors from heads that aren't in has
1136 # take all ancestors from heads that aren't in has
1137 missing = set()
1137 missing = set()
1138 visit = collections.deque(r for r in heads if r not in has)
1138 visit = collections.deque(r for r in heads if r not in has)
1139 while visit:
1139 while visit:
1140 r = visit.popleft()
1140 r = visit.popleft()
1141 if r in missing:
1141 if r in missing:
1142 continue
1142 continue
1143 else:
1143 else:
1144 missing.add(r)
1144 missing.add(r)
1145 for p in self.parentrevs(r):
1145 for p in self.parentrevs(r):
1146 if p not in has:
1146 if p not in has:
1147 visit.append(p)
1147 visit.append(p)
1148 missing = list(missing)
1148 missing = list(missing)
1149 missing.sort()
1149 missing.sort()
1150 return has, [self.node(miss) for miss in missing]
1150 return has, [self.node(miss) for miss in missing]
1151
1151
1152 def incrementalmissingrevs(self, common=None):
1152 def incrementalmissingrevs(self, common=None):
1153 """Return an object that can be used to incrementally compute the
1153 """Return an object that can be used to incrementally compute the
1154 revision numbers of the ancestors of arbitrary sets that are not
1154 revision numbers of the ancestors of arbitrary sets that are not
1155 ancestors of common. This is an ancestor.incrementalmissingancestors
1155 ancestors of common. This is an ancestor.incrementalmissingancestors
1156 object.
1156 object.
1157
1157
1158 'common' is a list of revision numbers. If common is not supplied, uses
1158 'common' is a list of revision numbers. If common is not supplied, uses
1159 nullrev.
1159 nullrev.
1160 """
1160 """
1161 if common is None:
1161 if common is None:
1162 common = [nullrev]
1162 common = [nullrev]
1163
1163
1164 if rustancestor is not None and self.index.rust_ext_compat:
1164 if rustancestor is not None and self.index.rust_ext_compat:
1165 return rustancestor.MissingAncestors(self.index, common)
1165 return rustancestor.MissingAncestors(self.index, common)
1166 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1166 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1167
1167
1168 def findmissingrevs(self, common=None, heads=None):
1168 def findmissingrevs(self, common=None, heads=None):
1169 """Return the revision numbers of the ancestors of heads that
1169 """Return the revision numbers of the ancestors of heads that
1170 are not ancestors of common.
1170 are not ancestors of common.
1171
1171
1172 More specifically, return a list of revision numbers corresponding to
1172 More specifically, return a list of revision numbers corresponding to
1173 nodes N such that every N satisfies the following constraints:
1173 nodes N such that every N satisfies the following constraints:
1174
1174
1175 1. N is an ancestor of some node in 'heads'
1175 1. N is an ancestor of some node in 'heads'
1176 2. N is not an ancestor of any node in 'common'
1176 2. N is not an ancestor of any node in 'common'
1177
1177
1178 The list is sorted by revision number, meaning it is
1178 The list is sorted by revision number, meaning it is
1179 topologically sorted.
1179 topologically sorted.
1180
1180
1181 'heads' and 'common' are both lists of revision numbers. If heads is
1181 'heads' and 'common' are both lists of revision numbers. If heads is
1182 not supplied, uses all of the revlog's heads. If common is not
1182 not supplied, uses all of the revlog's heads. If common is not
1183 supplied, uses nullid."""
1183 supplied, uses nullid."""
1184 if common is None:
1184 if common is None:
1185 common = [nullrev]
1185 common = [nullrev]
1186 if heads is None:
1186 if heads is None:
1187 heads = self.headrevs()
1187 heads = self.headrevs()
1188
1188
1189 inc = self.incrementalmissingrevs(common=common)
1189 inc = self.incrementalmissingrevs(common=common)
1190 return inc.missingancestors(heads)
1190 return inc.missingancestors(heads)
1191
1191
1192 def findmissing(self, common=None, heads=None):
1192 def findmissing(self, common=None, heads=None):
1193 """Return the ancestors of heads that are not ancestors of common.
1193 """Return the ancestors of heads that are not ancestors of common.
1194
1194
1195 More specifically, return a list of nodes N such that every N
1195 More specifically, return a list of nodes N such that every N
1196 satisfies the following constraints:
1196 satisfies the following constraints:
1197
1197
1198 1. N is an ancestor of some node in 'heads'
1198 1. N is an ancestor of some node in 'heads'
1199 2. N is not an ancestor of any node in 'common'
1199 2. N is not an ancestor of any node in 'common'
1200
1200
1201 The list is sorted by revision number, meaning it is
1201 The list is sorted by revision number, meaning it is
1202 topologically sorted.
1202 topologically sorted.
1203
1203
1204 'heads' and 'common' are both lists of node IDs. If heads is
1204 'heads' and 'common' are both lists of node IDs. If heads is
1205 not supplied, uses all of the revlog's heads. If common is not
1205 not supplied, uses all of the revlog's heads. If common is not
1206 supplied, uses nullid."""
1206 supplied, uses nullid."""
1207 if common is None:
1207 if common is None:
1208 common = [self.nullid]
1208 common = [self.nullid]
1209 if heads is None:
1209 if heads is None:
1210 heads = self.heads()
1210 heads = self.heads()
1211
1211
1212 common = [self.rev(n) for n in common]
1212 common = [self.rev(n) for n in common]
1213 heads = [self.rev(n) for n in heads]
1213 heads = [self.rev(n) for n in heads]
1214
1214
1215 inc = self.incrementalmissingrevs(common=common)
1215 inc = self.incrementalmissingrevs(common=common)
1216 return [self.node(r) for r in inc.missingancestors(heads)]
1216 return [self.node(r) for r in inc.missingancestors(heads)]
1217
1217
1218 def nodesbetween(self, roots=None, heads=None):
1218 def nodesbetween(self, roots=None, heads=None):
1219 """Return a topological path from 'roots' to 'heads'.
1219 """Return a topological path from 'roots' to 'heads'.
1220
1220
1221 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1221 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1222 topologically sorted list of all nodes N that satisfy both of
1222 topologically sorted list of all nodes N that satisfy both of
1223 these constraints:
1223 these constraints:
1224
1224
1225 1. N is a descendant of some node in 'roots'
1225 1. N is a descendant of some node in 'roots'
1226 2. N is an ancestor of some node in 'heads'
1226 2. N is an ancestor of some node in 'heads'
1227
1227
1228 Every node is considered to be both a descendant and an ancestor
1228 Every node is considered to be both a descendant and an ancestor
1229 of itself, so every reachable node in 'roots' and 'heads' will be
1229 of itself, so every reachable node in 'roots' and 'heads' will be
1230 included in 'nodes'.
1230 included in 'nodes'.
1231
1231
1232 'outroots' is the list of reachable nodes in 'roots', i.e., the
1232 'outroots' is the list of reachable nodes in 'roots', i.e., the
1233 subset of 'roots' that is returned in 'nodes'. Likewise,
1233 subset of 'roots' that is returned in 'nodes'. Likewise,
1234 'outheads' is the subset of 'heads' that is also in 'nodes'.
1234 'outheads' is the subset of 'heads' that is also in 'nodes'.
1235
1235
1236 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1236 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1237 unspecified, uses nullid as the only root. If 'heads' is
1237 unspecified, uses nullid as the only root. If 'heads' is
1238 unspecified, uses list of all of the revlog's heads."""
1238 unspecified, uses list of all of the revlog's heads."""
1239 nonodes = ([], [], [])
1239 nonodes = ([], [], [])
1240 if roots is not None:
1240 if roots is not None:
1241 roots = list(roots)
1241 roots = list(roots)
1242 if not roots:
1242 if not roots:
1243 return nonodes
1243 return nonodes
1244 lowestrev = min([self.rev(n) for n in roots])
1244 lowestrev = min([self.rev(n) for n in roots])
1245 else:
1245 else:
1246 roots = [self.nullid] # Everybody's a descendant of nullid
1246 roots = [self.nullid] # Everybody's a descendant of nullid
1247 lowestrev = nullrev
1247 lowestrev = nullrev
1248 if (lowestrev == nullrev) and (heads is None):
1248 if (lowestrev == nullrev) and (heads is None):
1249 # We want _all_ the nodes!
1249 # We want _all_ the nodes!
1250 return (
1250 return (
1251 [self.node(r) for r in self],
1251 [self.node(r) for r in self],
1252 [self.nullid],
1252 [self.nullid],
1253 list(self.heads()),
1253 list(self.heads()),
1254 )
1254 )
1255 if heads is None:
1255 if heads is None:
1256 # All nodes are ancestors, so the latest ancestor is the last
1256 # All nodes are ancestors, so the latest ancestor is the last
1257 # node.
1257 # node.
1258 highestrev = len(self) - 1
1258 highestrev = len(self) - 1
1259 # Set ancestors to None to signal that every node is an ancestor.
1259 # Set ancestors to None to signal that every node is an ancestor.
1260 ancestors = None
1260 ancestors = None
1261 # Set heads to an empty dictionary for later discovery of heads
1261 # Set heads to an empty dictionary for later discovery of heads
1262 heads = {}
1262 heads = {}
1263 else:
1263 else:
1264 heads = list(heads)
1264 heads = list(heads)
1265 if not heads:
1265 if not heads:
1266 return nonodes
1266 return nonodes
1267 ancestors = set()
1267 ancestors = set()
1268 # Turn heads into a dictionary so we can remove 'fake' heads.
1268 # Turn heads into a dictionary so we can remove 'fake' heads.
1269 # Also, later we will be using it to filter out the heads we can't
1269 # Also, later we will be using it to filter out the heads we can't
1270 # find from roots.
1270 # find from roots.
1271 heads = dict.fromkeys(heads, False)
1271 heads = dict.fromkeys(heads, False)
1272 # Start at the top and keep marking parents until we're done.
1272 # Start at the top and keep marking parents until we're done.
1273 nodestotag = set(heads)
1273 nodestotag = set(heads)
1274 # Remember where the top was so we can use it as a limit later.
1274 # Remember where the top was so we can use it as a limit later.
1275 highestrev = max([self.rev(n) for n in nodestotag])
1275 highestrev = max([self.rev(n) for n in nodestotag])
1276 while nodestotag:
1276 while nodestotag:
1277 # grab a node to tag
1277 # grab a node to tag
1278 n = nodestotag.pop()
1278 n = nodestotag.pop()
1279 # Never tag nullid
1279 # Never tag nullid
1280 if n == self.nullid:
1280 if n == self.nullid:
1281 continue
1281 continue
1282 # A node's revision number represents its place in a
1282 # A node's revision number represents its place in a
1283 # topologically sorted list of nodes.
1283 # topologically sorted list of nodes.
1284 r = self.rev(n)
1284 r = self.rev(n)
1285 if r >= lowestrev:
1285 if r >= lowestrev:
1286 if n not in ancestors:
1286 if n not in ancestors:
1287 # If we are possibly a descendant of one of the roots
1287 # If we are possibly a descendant of one of the roots
1288 # and we haven't already been marked as an ancestor
1288 # and we haven't already been marked as an ancestor
1289 ancestors.add(n) # Mark as ancestor
1289 ancestors.add(n) # Mark as ancestor
1290 # Add non-nullid parents to list of nodes to tag.
1290 # Add non-nullid parents to list of nodes to tag.
1291 nodestotag.update(
1291 nodestotag.update(
1292 [p for p in self.parents(n) if p != self.nullid]
1292 [p for p in self.parents(n) if p != self.nullid]
1293 )
1293 )
1294 elif n in heads: # We've seen it before, is it a fake head?
1294 elif n in heads: # We've seen it before, is it a fake head?
1295 # So it is, real heads should not be the ancestors of
1295 # So it is, real heads should not be the ancestors of
1296 # any other heads.
1296 # any other heads.
1297 heads.pop(n)
1297 heads.pop(n)
1298 if not ancestors:
1298 if not ancestors:
1299 return nonodes
1299 return nonodes
1300 # Now that we have our set of ancestors, we want to remove any
1300 # Now that we have our set of ancestors, we want to remove any
1301 # roots that are not ancestors.
1301 # roots that are not ancestors.
1302
1302
1303 # If one of the roots was nullid, everything is included anyway.
1303 # If one of the roots was nullid, everything is included anyway.
1304 if lowestrev > nullrev:
1304 if lowestrev > nullrev:
1305 # But, since we weren't, let's recompute the lowest rev to not
1305 # But, since we weren't, let's recompute the lowest rev to not
1306 # include roots that aren't ancestors.
1306 # include roots that aren't ancestors.
1307
1307
1308 # Filter out roots that aren't ancestors of heads
1308 # Filter out roots that aren't ancestors of heads
1309 roots = [root for root in roots if root in ancestors]
1309 roots = [root for root in roots if root in ancestors]
1310 # Recompute the lowest revision
1310 # Recompute the lowest revision
1311 if roots:
1311 if roots:
1312 lowestrev = min([self.rev(root) for root in roots])
1312 lowestrev = min([self.rev(root) for root in roots])
1313 else:
1313 else:
1314 # No more roots? Return empty list
1314 # No more roots? Return empty list
1315 return nonodes
1315 return nonodes
1316 else:
1316 else:
1317 # We are descending from nullid, and don't need to care about
1317 # We are descending from nullid, and don't need to care about
1318 # any other roots.
1318 # any other roots.
1319 lowestrev = nullrev
1319 lowestrev = nullrev
1320 roots = [self.nullid]
1320 roots = [self.nullid]
1321 # Transform our roots list into a set.
1321 # Transform our roots list into a set.
1322 descendants = set(roots)
1322 descendants = set(roots)
1323 # Also, keep the original roots so we can filter out roots that aren't
1323 # Also, keep the original roots so we can filter out roots that aren't
1324 # 'real' roots (i.e. are descended from other roots).
1324 # 'real' roots (i.e. are descended from other roots).
1325 roots = descendants.copy()
1325 roots = descendants.copy()
1326 # Our topologically sorted list of output nodes.
1326 # Our topologically sorted list of output nodes.
1327 orderedout = []
1327 orderedout = []
1328 # Don't start at nullid since we don't want nullid in our output list,
1328 # Don't start at nullid since we don't want nullid in our output list,
1329 # and if nullid shows up in descendants, empty parents will look like
1329 # and if nullid shows up in descendants, empty parents will look like
1330 # they're descendants.
1330 # they're descendants.
1331 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1331 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1332 n = self.node(r)
1332 n = self.node(r)
1333 isdescendant = False
1333 isdescendant = False
1334 if lowestrev == nullrev: # Everybody is a descendant of nullid
1334 if lowestrev == nullrev: # Everybody is a descendant of nullid
1335 isdescendant = True
1335 isdescendant = True
1336 elif n in descendants:
1336 elif n in descendants:
1337 # n is already a descendant
1337 # n is already a descendant
1338 isdescendant = True
1338 isdescendant = True
1339 # This check only needs to be done here because all the roots
1339 # This check only needs to be done here because all the roots
1340 # will start being marked is descendants before the loop.
1340 # will start being marked is descendants before the loop.
1341 if n in roots:
1341 if n in roots:
1342 # If n was a root, check if it's a 'real' root.
1342 # If n was a root, check if it's a 'real' root.
1343 p = tuple(self.parents(n))
1343 p = tuple(self.parents(n))
1344 # If any of its parents are descendants, it's not a root.
1344 # If any of its parents are descendants, it's not a root.
1345 if (p[0] in descendants) or (p[1] in descendants):
1345 if (p[0] in descendants) or (p[1] in descendants):
1346 roots.remove(n)
1346 roots.remove(n)
1347 else:
1347 else:
1348 p = tuple(self.parents(n))
1348 p = tuple(self.parents(n))
1349 # A node is a descendant if either of its parents are
1349 # A node is a descendant if either of its parents are
1350 # descendants. (We seeded the dependents list with the roots
1350 # descendants. (We seeded the dependents list with the roots
1351 # up there, remember?)
1351 # up there, remember?)
1352 if (p[0] in descendants) or (p[1] in descendants):
1352 if (p[0] in descendants) or (p[1] in descendants):
1353 descendants.add(n)
1353 descendants.add(n)
1354 isdescendant = True
1354 isdescendant = True
1355 if isdescendant and ((ancestors is None) or (n in ancestors)):
1355 if isdescendant and ((ancestors is None) or (n in ancestors)):
1356 # Only include nodes that are both descendants and ancestors.
1356 # Only include nodes that are both descendants and ancestors.
1357 orderedout.append(n)
1357 orderedout.append(n)
1358 if (ancestors is not None) and (n in heads):
1358 if (ancestors is not None) and (n in heads):
1359 # We're trying to figure out which heads are reachable
1359 # We're trying to figure out which heads are reachable
1360 # from roots.
1360 # from roots.
1361 # Mark this head as having been reached
1361 # Mark this head as having been reached
1362 heads[n] = True
1362 heads[n] = True
1363 elif ancestors is None:
1363 elif ancestors is None:
1364 # Otherwise, we're trying to discover the heads.
1364 # Otherwise, we're trying to discover the heads.
1365 # Assume this is a head because if it isn't, the next step
1365 # Assume this is a head because if it isn't, the next step
1366 # will eventually remove it.
1366 # will eventually remove it.
1367 heads[n] = True
1367 heads[n] = True
1368 # But, obviously its parents aren't.
1368 # But, obviously its parents aren't.
1369 for p in self.parents(n):
1369 for p in self.parents(n):
1370 heads.pop(p, None)
1370 heads.pop(p, None)
1371 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1371 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1372 roots = list(roots)
1372 roots = list(roots)
1373 assert orderedout
1373 assert orderedout
1374 assert roots
1374 assert roots
1375 assert heads
1375 assert heads
1376 return (orderedout, roots, heads)
1376 return (orderedout, roots, heads)
1377
1377
1378 def headrevs(self, revs=None):
1378 def headrevs(self, revs=None):
1379 if revs is None:
1379 if revs is None:
1380 try:
1380 try:
1381 return self.index.headrevs()
1381 return self.index.headrevs()
1382 except AttributeError:
1382 except AttributeError:
1383 return self._headrevs()
1383 return self._headrevs()
1384 if rustdagop is not None and self.index.rust_ext_compat:
1384 if rustdagop is not None and self.index.rust_ext_compat:
1385 return rustdagop.headrevs(self.index, revs)
1385 return rustdagop.headrevs(self.index, revs)
1386 return dagop.headrevs(revs, self._uncheckedparentrevs)
1386 return dagop.headrevs(revs, self._uncheckedparentrevs)
1387
1387
1388 def computephases(self, roots):
1388 def computephases(self, roots):
1389 return self.index.computephasesmapsets(roots)
1389 return self.index.computephasesmapsets(roots)
1390
1390
1391 def _headrevs(self):
1391 def _headrevs(self):
1392 count = len(self)
1392 count = len(self)
1393 if not count:
1393 if not count:
1394 return [nullrev]
1394 return [nullrev]
1395 # we won't iter over filtered rev so nobody is a head at start
1395 # we won't iter over filtered rev so nobody is a head at start
1396 ishead = [0] * (count + 1)
1396 ishead = [0] * (count + 1)
1397 index = self.index
1397 index = self.index
1398 for r in self:
1398 for r in self:
1399 ishead[r] = 1 # I may be an head
1399 ishead[r] = 1 # I may be an head
1400 e = index[r]
1400 e = index[r]
1401 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1401 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1402 return [r for r, val in enumerate(ishead) if val]
1402 return [r for r, val in enumerate(ishead) if val]
1403
1403
1404 def heads(self, start=None, stop=None):
1404 def heads(self, start=None, stop=None):
1405 """return the list of all nodes that have no children
1405 """return the list of all nodes that have no children
1406
1406
1407 if start is specified, only heads that are descendants of
1407 if start is specified, only heads that are descendants of
1408 start will be returned
1408 start will be returned
1409 if stop is specified, it will consider all the revs from stop
1409 if stop is specified, it will consider all the revs from stop
1410 as if they had no children
1410 as if they had no children
1411 """
1411 """
1412 if start is None and stop is None:
1412 if start is None and stop is None:
1413 if not len(self):
1413 if not len(self):
1414 return [self.nullid]
1414 return [self.nullid]
1415 return [self.node(r) for r in self.headrevs()]
1415 return [self.node(r) for r in self.headrevs()]
1416
1416
1417 if start is None:
1417 if start is None:
1418 start = nullrev
1418 start = nullrev
1419 else:
1419 else:
1420 start = self.rev(start)
1420 start = self.rev(start)
1421
1421
1422 stoprevs = {self.rev(n) for n in stop or []}
1422 stoprevs = {self.rev(n) for n in stop or []}
1423
1423
1424 revs = dagop.headrevssubset(
1424 revs = dagop.headrevssubset(
1425 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1425 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1426 )
1426 )
1427
1427
1428 return [self.node(rev) for rev in revs]
1428 return [self.node(rev) for rev in revs]
1429
1429
1430 def children(self, node):
1430 def children(self, node):
1431 """find the children of a given node"""
1431 """find the children of a given node"""
1432 c = []
1432 c = []
1433 p = self.rev(node)
1433 p = self.rev(node)
1434 for r in self.revs(start=p + 1):
1434 for r in self.revs(start=p + 1):
1435 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1435 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1436 if prevs:
1436 if prevs:
1437 for pr in prevs:
1437 for pr in prevs:
1438 if pr == p:
1438 if pr == p:
1439 c.append(self.node(r))
1439 c.append(self.node(r))
1440 elif p == nullrev:
1440 elif p == nullrev:
1441 c.append(self.node(r))
1441 c.append(self.node(r))
1442 return c
1442 return c
1443
1443
1444 def commonancestorsheads(self, a, b):
1444 def commonancestorsheads(self, a, b):
1445 """calculate all the heads of the common ancestors of nodes a and b"""
1445 """calculate all the heads of the common ancestors of nodes a and b"""
1446 a, b = self.rev(a), self.rev(b)
1446 a, b = self.rev(a), self.rev(b)
1447 ancs = self._commonancestorsheads(a, b)
1447 ancs = self._commonancestorsheads(a, b)
1448 return pycompat.maplist(self.node, ancs)
1448 return pycompat.maplist(self.node, ancs)
1449
1449
1450 def _commonancestorsheads(self, *revs):
1450 def _commonancestorsheads(self, *revs):
1451 """calculate all the heads of the common ancestors of revs"""
1451 """calculate all the heads of the common ancestors of revs"""
1452 try:
1452 try:
1453 ancs = self.index.commonancestorsheads(*revs)
1453 ancs = self.index.commonancestorsheads(*revs)
1454 except (AttributeError, OverflowError): # C implementation failed
1454 except (AttributeError, OverflowError): # C implementation failed
1455 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1455 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1456 return ancs
1456 return ancs
1457
1457
1458 def isancestor(self, a, b):
1458 def isancestor(self, a, b):
1459 """return True if node a is an ancestor of node b
1459 """return True if node a is an ancestor of node b
1460
1460
1461 A revision is considered an ancestor of itself."""
1461 A revision is considered an ancestor of itself."""
1462 a, b = self.rev(a), self.rev(b)
1462 a, b = self.rev(a), self.rev(b)
1463 return self.isancestorrev(a, b)
1463 return self.isancestorrev(a, b)
1464
1464
1465 def isancestorrev(self, a, b):
1465 def isancestorrev(self, a, b):
1466 """return True if revision a is an ancestor of revision b
1466 """return True if revision a is an ancestor of revision b
1467
1467
1468 A revision is considered an ancestor of itself.
1468 A revision is considered an ancestor of itself.
1469
1469
1470 The implementation of this is trivial but the use of
1470 The implementation of this is trivial but the use of
1471 reachableroots is not."""
1471 reachableroots is not."""
1472 if a == nullrev:
1472 if a == nullrev:
1473 return True
1473 return True
1474 elif a == b:
1474 elif a == b:
1475 return True
1475 return True
1476 elif a > b:
1476 elif a > b:
1477 return False
1477 return False
1478 return bool(self.reachableroots(a, [b], [a], includepath=False))
1478 return bool(self.reachableroots(a, [b], [a], includepath=False))
1479
1479
1480 def reachableroots(self, minroot, heads, roots, includepath=False):
1480 def reachableroots(self, minroot, heads, roots, includepath=False):
1481 """return (heads(::(<roots> and <roots>::<heads>)))
1481 """return (heads(::(<roots> and <roots>::<heads>)))
1482
1482
1483 If includepath is True, return (<roots>::<heads>)."""
1483 If includepath is True, return (<roots>::<heads>)."""
1484 try:
1484 try:
1485 return self.index.reachableroots2(
1485 return self.index.reachableroots2(
1486 minroot, heads, roots, includepath
1486 minroot, heads, roots, includepath
1487 )
1487 )
1488 except AttributeError:
1488 except AttributeError:
1489 return dagop._reachablerootspure(
1489 return dagop._reachablerootspure(
1490 self.parentrevs, minroot, roots, heads, includepath
1490 self.parentrevs, minroot, roots, heads, includepath
1491 )
1491 )
1492
1492
1493 def ancestor(self, a, b):
1493 def ancestor(self, a, b):
1494 """calculate the "best" common ancestor of nodes a and b"""
1494 """calculate the "best" common ancestor of nodes a and b"""
1495
1495
1496 a, b = self.rev(a), self.rev(b)
1496 a, b = self.rev(a), self.rev(b)
1497 try:
1497 try:
1498 ancs = self.index.ancestors(a, b)
1498 ancs = self.index.ancestors(a, b)
1499 except (AttributeError, OverflowError):
1499 except (AttributeError, OverflowError):
1500 ancs = ancestor.ancestors(self.parentrevs, a, b)
1500 ancs = ancestor.ancestors(self.parentrevs, a, b)
1501 if ancs:
1501 if ancs:
1502 # choose a consistent winner when there's a tie
1502 # choose a consistent winner when there's a tie
1503 return min(map(self.node, ancs))
1503 return min(map(self.node, ancs))
1504 return self.nullid
1504 return self.nullid
1505
1505
1506 def _match(self, id):
1506 def _match(self, id):
1507 if isinstance(id, int):
1507 if isinstance(id, int):
1508 # rev
1508 # rev
1509 return self.node(id)
1509 return self.node(id)
1510 if len(id) == self.nodeconstants.nodelen:
1510 if len(id) == self.nodeconstants.nodelen:
1511 # possibly a binary node
1511 # possibly a binary node
1512 # odds of a binary node being all hex in ASCII are 1 in 10**25
1512 # odds of a binary node being all hex in ASCII are 1 in 10**25
1513 try:
1513 try:
1514 node = id
1514 node = id
1515 self.rev(node) # quick search the index
1515 self.rev(node) # quick search the index
1516 return node
1516 return node
1517 except error.LookupError:
1517 except error.LookupError:
1518 pass # may be partial hex id
1518 pass # may be partial hex id
1519 try:
1519 try:
1520 # str(rev)
1520 # str(rev)
1521 rev = int(id)
1521 rev = int(id)
1522 if b"%d" % rev != id:
1522 if b"%d" % rev != id:
1523 raise ValueError
1523 raise ValueError
1524 if rev < 0:
1524 if rev < 0:
1525 rev = len(self) + rev
1525 rev = len(self) + rev
1526 if rev < 0 or rev >= len(self):
1526 if rev < 0 or rev >= len(self):
1527 raise ValueError
1527 raise ValueError
1528 return self.node(rev)
1528 return self.node(rev)
1529 except (ValueError, OverflowError):
1529 except (ValueError, OverflowError):
1530 pass
1530 pass
1531 if len(id) == 2 * self.nodeconstants.nodelen:
1531 if len(id) == 2 * self.nodeconstants.nodelen:
1532 try:
1532 try:
1533 # a full hex nodeid?
1533 # a full hex nodeid?
1534 node = bin(id)
1534 node = bin(id)
1535 self.rev(node)
1535 self.rev(node)
1536 return node
1536 return node
1537 except (TypeError, error.LookupError):
1537 except (TypeError, error.LookupError):
1538 pass
1538 pass
1539
1539
1540 def _partialmatch(self, id):
1540 def _partialmatch(self, id):
1541 # we don't care wdirfilenodeids as they should be always full hash
1541 # we don't care wdirfilenodeids as they should be always full hash
1542 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1542 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1543 ambiguous = False
1543 ambiguous = False
1544 try:
1544 try:
1545 partial = self.index.partialmatch(id)
1545 partial = self.index.partialmatch(id)
1546 if partial and self.hasnode(partial):
1546 if partial and self.hasnode(partial):
1547 if maybewdir:
1547 if maybewdir:
1548 # single 'ff...' match in radix tree, ambiguous with wdir
1548 # single 'ff...' match in radix tree, ambiguous with wdir
1549 ambiguous = True
1549 ambiguous = True
1550 else:
1550 else:
1551 return partial
1551 return partial
1552 elif maybewdir:
1552 elif maybewdir:
1553 # no 'ff...' match in radix tree, wdir identified
1553 # no 'ff...' match in radix tree, wdir identified
1554 raise error.WdirUnsupported
1554 raise error.WdirUnsupported
1555 else:
1555 else:
1556 return None
1556 return None
1557 except error.RevlogError:
1557 except error.RevlogError:
1558 # parsers.c radix tree lookup gave multiple matches
1558 # parsers.c radix tree lookup gave multiple matches
1559 # fast path: for unfiltered changelog, radix tree is accurate
1559 # fast path: for unfiltered changelog, radix tree is accurate
1560 if not getattr(self, 'filteredrevs', None):
1560 if not getattr(self, 'filteredrevs', None):
1561 ambiguous = True
1561 ambiguous = True
1562 # fall through to slow path that filters hidden revisions
1562 # fall through to slow path that filters hidden revisions
1563 except (AttributeError, ValueError):
1563 except (AttributeError, ValueError):
1564 # we are pure python, or key was too short to search radix tree
1564 # we are pure python, or key was too short to search radix tree
1565 pass
1565 pass
1566 if ambiguous:
1566 if ambiguous:
1567 raise error.AmbiguousPrefixLookupError(
1567 raise error.AmbiguousPrefixLookupError(
1568 id, self.display_id, _(b'ambiguous identifier')
1568 id, self.display_id, _(b'ambiguous identifier')
1569 )
1569 )
1570
1570
1571 if id in self._pcache:
1571 if id in self._pcache:
1572 return self._pcache[id]
1572 return self._pcache[id]
1573
1573
1574 if len(id) <= 40:
1574 if len(id) <= 40:
1575 try:
1575 try:
1576 # hex(node)[:...]
1576 # hex(node)[:...]
1577 l = len(id) // 2 # grab an even number of digits
1577 l = len(id) // 2 # grab an even number of digits
1578 prefix = bin(id[: l * 2])
1578 prefix = bin(id[: l * 2])
1579 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1579 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1580 nl = [
1580 nl = [
1581 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1581 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1582 ]
1582 ]
1583 if self.nodeconstants.nullhex.startswith(id):
1583 if self.nodeconstants.nullhex.startswith(id):
1584 nl.append(self.nullid)
1584 nl.append(self.nullid)
1585 if len(nl) > 0:
1585 if len(nl) > 0:
1586 if len(nl) == 1 and not maybewdir:
1586 if len(nl) == 1 and not maybewdir:
1587 self._pcache[id] = nl[0]
1587 self._pcache[id] = nl[0]
1588 return nl[0]
1588 return nl[0]
1589 raise error.AmbiguousPrefixLookupError(
1589 raise error.AmbiguousPrefixLookupError(
1590 id, self.display_id, _(b'ambiguous identifier')
1590 id, self.display_id, _(b'ambiguous identifier')
1591 )
1591 )
1592 if maybewdir:
1592 if maybewdir:
1593 raise error.WdirUnsupported
1593 raise error.WdirUnsupported
1594 return None
1594 return None
1595 except TypeError:
1595 except TypeError:
1596 pass
1596 pass
1597
1597
1598 def lookup(self, id):
1598 def lookup(self, id):
1599 """locate a node based on:
1599 """locate a node based on:
1600 - revision number or str(revision number)
1600 - revision number or str(revision number)
1601 - nodeid or subset of hex nodeid
1601 - nodeid or subset of hex nodeid
1602 """
1602 """
1603 n = self._match(id)
1603 n = self._match(id)
1604 if n is not None:
1604 if n is not None:
1605 return n
1605 return n
1606 n = self._partialmatch(id)
1606 n = self._partialmatch(id)
1607 if n:
1607 if n:
1608 return n
1608 return n
1609
1609
1610 raise error.LookupError(id, self.display_id, _(b'no match found'))
1610 raise error.LookupError(id, self.display_id, _(b'no match found'))
1611
1611
1612 def shortest(self, node, minlength=1):
1612 def shortest(self, node, minlength=1):
1613 """Find the shortest unambiguous prefix that matches node."""
1613 """Find the shortest unambiguous prefix that matches node."""
1614
1614
1615 def isvalid(prefix):
1615 def isvalid(prefix):
1616 try:
1616 try:
1617 matchednode = self._partialmatch(prefix)
1617 matchednode = self._partialmatch(prefix)
1618 except error.AmbiguousPrefixLookupError:
1618 except error.AmbiguousPrefixLookupError:
1619 return False
1619 return False
1620 except error.WdirUnsupported:
1620 except error.WdirUnsupported:
1621 # single 'ff...' match
1621 # single 'ff...' match
1622 return True
1622 return True
1623 if matchednode is None:
1623 if matchednode is None:
1624 raise error.LookupError(node, self.display_id, _(b'no node'))
1624 raise error.LookupError(node, self.display_id, _(b'no node'))
1625 return True
1625 return True
1626
1626
1627 def maybewdir(prefix):
1627 def maybewdir(prefix):
1628 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1628 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1629
1629
1630 hexnode = hex(node)
1630 hexnode = hex(node)
1631
1631
1632 def disambiguate(hexnode, minlength):
1632 def disambiguate(hexnode, minlength):
1633 """Disambiguate against wdirid."""
1633 """Disambiguate against wdirid."""
1634 for length in range(minlength, len(hexnode) + 1):
1634 for length in range(minlength, len(hexnode) + 1):
1635 prefix = hexnode[:length]
1635 prefix = hexnode[:length]
1636 if not maybewdir(prefix):
1636 if not maybewdir(prefix):
1637 return prefix
1637 return prefix
1638
1638
1639 if not getattr(self, 'filteredrevs', None):
1639 if not getattr(self, 'filteredrevs', None):
1640 try:
1640 try:
1641 length = max(self.index.shortest(node), minlength)
1641 length = max(self.index.shortest(node), minlength)
1642 return disambiguate(hexnode, length)
1642 return disambiguate(hexnode, length)
1643 except error.RevlogError:
1643 except error.RevlogError:
1644 if node != self.nodeconstants.wdirid:
1644 if node != self.nodeconstants.wdirid:
1645 raise error.LookupError(
1645 raise error.LookupError(
1646 node, self.display_id, _(b'no node')
1646 node, self.display_id, _(b'no node')
1647 )
1647 )
1648 except AttributeError:
1648 except AttributeError:
1649 # Fall through to pure code
1649 # Fall through to pure code
1650 pass
1650 pass
1651
1651
1652 if node == self.nodeconstants.wdirid:
1652 if node == self.nodeconstants.wdirid:
1653 for length in range(minlength, len(hexnode) + 1):
1653 for length in range(minlength, len(hexnode) + 1):
1654 prefix = hexnode[:length]
1654 prefix = hexnode[:length]
1655 if isvalid(prefix):
1655 if isvalid(prefix):
1656 return prefix
1656 return prefix
1657
1657
1658 for length in range(minlength, len(hexnode) + 1):
1658 for length in range(minlength, len(hexnode) + 1):
1659 prefix = hexnode[:length]
1659 prefix = hexnode[:length]
1660 if isvalid(prefix):
1660 if isvalid(prefix):
1661 return disambiguate(hexnode, length)
1661 return disambiguate(hexnode, length)
1662
1662
1663 def cmp(self, node, text):
1663 def cmp(self, node, text):
1664 """compare text with a given file revision
1664 """compare text with a given file revision
1665
1665
1666 returns True if text is different than what is stored.
1666 returns True if text is different than what is stored.
1667 """
1667 """
1668 p1, p2 = self.parents(node)
1668 p1, p2 = self.parents(node)
1669 return storageutil.hashrevisionsha1(text, p1, p2) != node
1669 return storageutil.hashrevisionsha1(text, p1, p2) != node
1670
1670
1671 def _cachesegment(self, offset, data):
1671 def _cachesegment(self, offset, data):
1672 """Add a segment to the revlog cache.
1672 """Add a segment to the revlog cache.
1673
1673
1674 Accepts an absolute offset and the data that is at that location.
1674 Accepts an absolute offset and the data that is at that location.
1675 """
1675 """
1676 o, d = self._chunkcache
1676 o, d = self._chunkcache
1677 # try to add to existing cache
1677 # try to add to existing cache
1678 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1678 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1679 self._chunkcache = o, d + data
1679 self._chunkcache = o, d + data
1680 else:
1680 else:
1681 self._chunkcache = offset, data
1681 self._chunkcache = offset, data
1682
1682
1683 def _readsegment(self, offset, length, df=None):
1683 def _readsegment(self, offset, length, df=None):
1684 """Load a segment of raw data from the revlog.
1684 """Load a segment of raw data from the revlog.
1685
1685
1686 Accepts an absolute offset, length to read, and an optional existing
1686 Accepts an absolute offset, length to read, and an optional existing
1687 file handle to read from.
1687 file handle to read from.
1688
1688
1689 If an existing file handle is passed, it will be seeked and the
1689 If an existing file handle is passed, it will be seeked and the
1690 original seek position will NOT be restored.
1690 original seek position will NOT be restored.
1691
1691
1692 Returns a str or buffer of raw byte data.
1692 Returns a str or buffer of raw byte data.
1693
1693
1694 Raises if the requested number of bytes could not be read.
1694 Raises if the requested number of bytes could not be read.
1695 """
1695 """
1696 # Cache data both forward and backward around the requested
1696 # Cache data both forward and backward around the requested
1697 # data, in a fixed size window. This helps speed up operations
1697 # data, in a fixed size window. This helps speed up operations
1698 # involving reading the revlog backwards.
1698 # involving reading the revlog backwards.
1699 cachesize = self._chunkcachesize
1699 cachesize = self._chunkcachesize
1700 realoffset = offset & ~(cachesize - 1)
1700 realoffset = offset & ~(cachesize - 1)
1701 reallength = (
1701 reallength = (
1702 (offset + length + cachesize) & ~(cachesize - 1)
1702 (offset + length + cachesize) & ~(cachesize - 1)
1703 ) - realoffset
1703 ) - realoffset
1704 with self._datareadfp(df) as df:
1704 with self._datareadfp(df) as df:
1705 df.seek(realoffset)
1705 df.seek(realoffset)
1706 d = df.read(reallength)
1706 d = df.read(reallength)
1707
1707
1708 self._cachesegment(realoffset, d)
1708 self._cachesegment(realoffset, d)
1709 if offset != realoffset or reallength != length:
1709 if offset != realoffset or reallength != length:
1710 startoffset = offset - realoffset
1710 startoffset = offset - realoffset
1711 if len(d) - startoffset < length:
1711 if len(d) - startoffset < length:
1712 raise error.RevlogError(
1712 raise error.RevlogError(
1713 _(
1713 _(
1714 b'partial read of revlog %s; expected %d bytes from '
1714 b'partial read of revlog %s; expected %d bytes from '
1715 b'offset %d, got %d'
1715 b'offset %d, got %d'
1716 )
1716 )
1717 % (
1717 % (
1718 self._indexfile if self._inline else self._datafile,
1718 self._indexfile if self._inline else self._datafile,
1719 length,
1719 length,
1720 offset,
1720 offset,
1721 len(d) - startoffset,
1721 len(d) - startoffset,
1722 )
1722 )
1723 )
1723 )
1724
1724
1725 return util.buffer(d, startoffset, length)
1725 return util.buffer(d, startoffset, length)
1726
1726
1727 if len(d) < length:
1727 if len(d) < length:
1728 raise error.RevlogError(
1728 raise error.RevlogError(
1729 _(
1729 _(
1730 b'partial read of revlog %s; expected %d bytes from offset '
1730 b'partial read of revlog %s; expected %d bytes from offset '
1731 b'%d, got %d'
1731 b'%d, got %d'
1732 )
1732 )
1733 % (
1733 % (
1734 self._indexfile if self._inline else self._datafile,
1734 self._indexfile if self._inline else self._datafile,
1735 length,
1735 length,
1736 offset,
1736 offset,
1737 len(d),
1737 len(d),
1738 )
1738 )
1739 )
1739 )
1740
1740
1741 return d
1741 return d
1742
1742
1743 def _getsegment(self, offset, length, df=None):
1743 def _getsegment(self, offset, length, df=None):
1744 """Obtain a segment of raw data from the revlog.
1744 """Obtain a segment of raw data from the revlog.
1745
1745
1746 Accepts an absolute offset, length of bytes to obtain, and an
1746 Accepts an absolute offset, length of bytes to obtain, and an
1747 optional file handle to the already-opened revlog. If the file
1747 optional file handle to the already-opened revlog. If the file
1748 handle is used, it's original seek position will not be preserved.
1748 handle is used, it's original seek position will not be preserved.
1749
1749
1750 Requests for data may be returned from a cache.
1750 Requests for data may be returned from a cache.
1751
1751
1752 Returns a str or a buffer instance of raw byte data.
1752 Returns a str or a buffer instance of raw byte data.
1753 """
1753 """
1754 o, d = self._chunkcache
1754 o, d = self._chunkcache
1755 l = len(d)
1755 l = len(d)
1756
1756
1757 # is it in the cache?
1757 # is it in the cache?
1758 cachestart = offset - o
1758 cachestart = offset - o
1759 cacheend = cachestart + length
1759 cacheend = cachestart + length
1760 if cachestart >= 0 and cacheend <= l:
1760 if cachestart >= 0 and cacheend <= l:
1761 if cachestart == 0 and cacheend == l:
1761 if cachestart == 0 and cacheend == l:
1762 return d # avoid a copy
1762 return d # avoid a copy
1763 return util.buffer(d, cachestart, cacheend - cachestart)
1763 return util.buffer(d, cachestart, cacheend - cachestart)
1764
1764
1765 return self._readsegment(offset, length, df=df)
1765 return self._readsegment(offset, length, df=df)
1766
1766
1767 def _getsegmentforrevs(self, startrev, endrev, df=None):
1767 def _getsegmentforrevs(self, startrev, endrev, df=None):
1768 """Obtain a segment of raw data corresponding to a range of revisions.
1768 """Obtain a segment of raw data corresponding to a range of revisions.
1769
1769
1770 Accepts the start and end revisions and an optional already-open
1770 Accepts the start and end revisions and an optional already-open
1771 file handle to be used for reading. If the file handle is read, its
1771 file handle to be used for reading. If the file handle is read, its
1772 seek position will not be preserved.
1772 seek position will not be preserved.
1773
1773
1774 Requests for data may be satisfied by a cache.
1774 Requests for data may be satisfied by a cache.
1775
1775
1776 Returns a 2-tuple of (offset, data) for the requested range of
1776 Returns a 2-tuple of (offset, data) for the requested range of
1777 revisions. Offset is the integer offset from the beginning of the
1777 revisions. Offset is the integer offset from the beginning of the
1778 revlog and data is a str or buffer of the raw byte data.
1778 revlog and data is a str or buffer of the raw byte data.
1779
1779
1780 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1780 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1781 to determine where each revision's data begins and ends.
1781 to determine where each revision's data begins and ends.
1782 """
1782 """
1783 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1783 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1784 # (functions are expensive).
1784 # (functions are expensive).
1785 index = self.index
1785 index = self.index
1786 istart = index[startrev]
1786 istart = index[startrev]
1787 start = int(istart[0] >> 16)
1787 start = int(istart[0] >> 16)
1788 if startrev == endrev:
1788 if startrev == endrev:
1789 end = start + istart[1]
1789 end = start + istart[1]
1790 else:
1790 else:
1791 iend = index[endrev]
1791 iend = index[endrev]
1792 end = int(iend[0] >> 16) + iend[1]
1792 end = int(iend[0] >> 16) + iend[1]
1793
1793
1794 if self._inline:
1794 if self._inline:
1795 start += (startrev + 1) * self.index.entry_size
1795 start += (startrev + 1) * self.index.entry_size
1796 end += (endrev + 1) * self.index.entry_size
1796 end += (endrev + 1) * self.index.entry_size
1797 length = end - start
1797 length = end - start
1798
1798
1799 return start, self._getsegment(start, length, df=df)
1799 return start, self._getsegment(start, length, df=df)
1800
1800
1801 def _chunk(self, rev, df=None):
1801 def _chunk(self, rev, df=None):
1802 """Obtain a single decompressed chunk for a revision.
1802 """Obtain a single decompressed chunk for a revision.
1803
1803
1804 Accepts an integer revision and an optional already-open file handle
1804 Accepts an integer revision and an optional already-open file handle
1805 to be used for reading. If used, the seek position of the file will not
1805 to be used for reading. If used, the seek position of the file will not
1806 be preserved.
1806 be preserved.
1807
1807
1808 Returns a str holding uncompressed data for the requested revision.
1808 Returns a str holding uncompressed data for the requested revision.
1809 """
1809 """
1810 compression_mode = self.index[rev][10]
1810 compression_mode = self.index[rev][10]
1811 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1811 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1812 if compression_mode == COMP_MODE_PLAIN:
1812 if compression_mode == COMP_MODE_PLAIN:
1813 return data
1813 return data
1814 elif compression_mode == COMP_MODE_DEFAULT:
1814 elif compression_mode == COMP_MODE_DEFAULT:
1815 return self._decompressor(data)
1815 return self._decompressor(data)
1816 elif compression_mode == COMP_MODE_INLINE:
1816 elif compression_mode == COMP_MODE_INLINE:
1817 return self.decompress(data)
1817 return self.decompress(data)
1818 else:
1818 else:
1819 msg = 'unknown compression mode %d'
1819 msg = 'unknown compression mode %d'
1820 msg %= compression_mode
1820 msg %= compression_mode
1821 raise error.RevlogError(msg)
1821 raise error.RevlogError(msg)
1822
1822
1823 def _chunks(self, revs, df=None, targetsize=None):
1823 def _chunks(self, revs, df=None, targetsize=None):
1824 """Obtain decompressed chunks for the specified revisions.
1824 """Obtain decompressed chunks for the specified revisions.
1825
1825
1826 Accepts an iterable of numeric revisions that are assumed to be in
1826 Accepts an iterable of numeric revisions that are assumed to be in
1827 ascending order. Also accepts an optional already-open file handle
1827 ascending order. Also accepts an optional already-open file handle
1828 to be used for reading. If used, the seek position of the file will
1828 to be used for reading. If used, the seek position of the file will
1829 not be preserved.
1829 not be preserved.
1830
1830
1831 This function is similar to calling ``self._chunk()`` multiple times,
1831 This function is similar to calling ``self._chunk()`` multiple times,
1832 but is faster.
1832 but is faster.
1833
1833
1834 Returns a list with decompressed data for each requested revision.
1834 Returns a list with decompressed data for each requested revision.
1835 """
1835 """
1836 if not revs:
1836 if not revs:
1837 return []
1837 return []
1838 start = self.start
1838 start = self.start
1839 length = self.length
1839 length = self.length
1840 inline = self._inline
1840 inline = self._inline
1841 iosize = self.index.entry_size
1841 iosize = self.index.entry_size
1842 buffer = util.buffer
1842 buffer = util.buffer
1843
1843
1844 l = []
1844 l = []
1845 ladd = l.append
1845 ladd = l.append
1846
1846
1847 if not self._withsparseread:
1847 if not self._withsparseread:
1848 slicedchunks = (revs,)
1848 slicedchunks = (revs,)
1849 else:
1849 else:
1850 slicedchunks = deltautil.slicechunk(
1850 slicedchunks = deltautil.slicechunk(
1851 self, revs, targetsize=targetsize
1851 self, revs, targetsize=targetsize
1852 )
1852 )
1853
1853
1854 for revschunk in slicedchunks:
1854 for revschunk in slicedchunks:
1855 firstrev = revschunk[0]
1855 firstrev = revschunk[0]
1856 # Skip trailing revisions with empty diff
1856 # Skip trailing revisions with empty diff
1857 for lastrev in revschunk[::-1]:
1857 for lastrev in revschunk[::-1]:
1858 if length(lastrev) != 0:
1858 if length(lastrev) != 0:
1859 break
1859 break
1860
1860
1861 try:
1861 try:
1862 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1862 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1863 except OverflowError:
1863 except OverflowError:
1864 # issue4215 - we can't cache a run of chunks greater than
1864 # issue4215 - we can't cache a run of chunks greater than
1865 # 2G on Windows
1865 # 2G on Windows
1866 return [self._chunk(rev, df=df) for rev in revschunk]
1866 return [self._chunk(rev, df=df) for rev in revschunk]
1867
1867
1868 decomp = self.decompress
1868 decomp = self.decompress
1869 # self._decompressor might be None, but will not be used in that case
1869 # self._decompressor might be None, but will not be used in that case
1870 def_decomp = self._decompressor
1870 def_decomp = self._decompressor
1871 for rev in revschunk:
1871 for rev in revschunk:
1872 chunkstart = start(rev)
1872 chunkstart = start(rev)
1873 if inline:
1873 if inline:
1874 chunkstart += (rev + 1) * iosize
1874 chunkstart += (rev + 1) * iosize
1875 chunklength = length(rev)
1875 chunklength = length(rev)
1876 comp_mode = self.index[rev][10]
1876 comp_mode = self.index[rev][10]
1877 c = buffer(data, chunkstart - offset, chunklength)
1877 c = buffer(data, chunkstart - offset, chunklength)
1878 if comp_mode == COMP_MODE_PLAIN:
1878 if comp_mode == COMP_MODE_PLAIN:
1879 ladd(c)
1879 ladd(c)
1880 elif comp_mode == COMP_MODE_INLINE:
1880 elif comp_mode == COMP_MODE_INLINE:
1881 ladd(decomp(c))
1881 ladd(decomp(c))
1882 elif comp_mode == COMP_MODE_DEFAULT:
1882 elif comp_mode == COMP_MODE_DEFAULT:
1883 ladd(def_decomp(c))
1883 ladd(def_decomp(c))
1884 else:
1884 else:
1885 msg = 'unknown compression mode %d'
1885 msg = 'unknown compression mode %d'
1886 msg %= comp_mode
1886 msg %= comp_mode
1887 raise error.RevlogError(msg)
1887 raise error.RevlogError(msg)
1888
1888
1889 return l
1889 return l
1890
1890
1891 def _chunkclear(self):
1891 def _chunkclear(self):
1892 """Clear the raw chunk cache."""
1892 """Clear the raw chunk cache."""
1893 self._chunkcache = (0, b'')
1893 self._chunkcache = (0, b'')
1894
1894
1895 def deltaparent(self, rev):
1895 def deltaparent(self, rev):
1896 """return deltaparent of the given revision"""
1896 """return deltaparent of the given revision"""
1897 base = self.index[rev][3]
1897 base = self.index[rev][3]
1898 if base == rev:
1898 if base == rev:
1899 return nullrev
1899 return nullrev
1900 elif self._generaldelta:
1900 elif self._generaldelta:
1901 return base
1901 return base
1902 else:
1902 else:
1903 return rev - 1
1903 return rev - 1
1904
1904
1905 def issnapshot(self, rev):
1905 def issnapshot(self, rev):
1906 """tells whether rev is a snapshot"""
1906 """tells whether rev is a snapshot"""
1907 if not self._sparserevlog:
1907 if not self._sparserevlog:
1908 return self.deltaparent(rev) == nullrev
1908 return self.deltaparent(rev) == nullrev
1909 elif util.safehasattr(self.index, b'issnapshot'):
1909 elif util.safehasattr(self.index, b'issnapshot'):
1910 # directly assign the method to cache the testing and access
1910 # directly assign the method to cache the testing and access
1911 self.issnapshot = self.index.issnapshot
1911 self.issnapshot = self.index.issnapshot
1912 return self.issnapshot(rev)
1912 return self.issnapshot(rev)
1913 if rev == nullrev:
1913 if rev == nullrev:
1914 return True
1914 return True
1915 entry = self.index[rev]
1915 entry = self.index[rev]
1916 base = entry[3]
1916 base = entry[3]
1917 if base == rev:
1917 if base == rev:
1918 return True
1918 return True
1919 if base == nullrev:
1919 if base == nullrev:
1920 return True
1920 return True
1921 p1 = entry[5]
1921 p1 = entry[5]
1922 p2 = entry[6]
1922 p2 = entry[6]
1923 if base == p1 or base == p2:
1923 if base == p1 or base == p2:
1924 return False
1924 return False
1925 return self.issnapshot(base)
1925 return self.issnapshot(base)
1926
1926
1927 def snapshotdepth(self, rev):
1927 def snapshotdepth(self, rev):
1928 """number of snapshot in the chain before this one"""
1928 """number of snapshot in the chain before this one"""
1929 if not self.issnapshot(rev):
1929 if not self.issnapshot(rev):
1930 raise error.ProgrammingError(b'revision %d not a snapshot')
1930 raise error.ProgrammingError(b'revision %d not a snapshot')
1931 return len(self._deltachain(rev)[0]) - 1
1931 return len(self._deltachain(rev)[0]) - 1
1932
1932
1933 def revdiff(self, rev1, rev2):
1933 def revdiff(self, rev1, rev2):
1934 """return or calculate a delta between two revisions
1934 """return or calculate a delta between two revisions
1935
1935
1936 The delta calculated is in binary form and is intended to be written to
1936 The delta calculated is in binary form and is intended to be written to
1937 revlog data directly. So this function needs raw revision data.
1937 revlog data directly. So this function needs raw revision data.
1938 """
1938 """
1939 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1939 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1940 return bytes(self._chunk(rev2))
1940 return bytes(self._chunk(rev2))
1941
1941
1942 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1942 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1943
1943
1944 def _processflags(self, text, flags, operation, raw=False):
1944 def _processflags(self, text, flags, operation, raw=False):
1945 """deprecated entry point to access flag processors"""
1945 """deprecated entry point to access flag processors"""
1946 msg = b'_processflag(...) use the specialized variant'
1946 msg = b'_processflag(...) use the specialized variant'
1947 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1947 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1948 if raw:
1948 if raw:
1949 return text, flagutil.processflagsraw(self, text, flags)
1949 return text, flagutil.processflagsraw(self, text, flags)
1950 elif operation == b'read':
1950 elif operation == b'read':
1951 return flagutil.processflagsread(self, text, flags)
1951 return flagutil.processflagsread(self, text, flags)
1952 else: # write operation
1952 else: # write operation
1953 return flagutil.processflagswrite(self, text, flags)
1953 return flagutil.processflagswrite(self, text, flags)
1954
1954
1955 def revision(self, nodeorrev, _df=None, raw=False):
1955 def revision(self, nodeorrev, _df=None, raw=False):
1956 """return an uncompressed revision of a given node or revision
1956 """return an uncompressed revision of a given node or revision
1957 number.
1957 number.
1958
1958
1959 _df - an existing file handle to read from. (internal-only)
1959 _df - an existing file handle to read from. (internal-only)
1960 raw - an optional argument specifying if the revision data is to be
1960 raw - an optional argument specifying if the revision data is to be
1961 treated as raw data when applying flag transforms. 'raw' should be set
1961 treated as raw data when applying flag transforms. 'raw' should be set
1962 to True when generating changegroups or in debug commands.
1962 to True when generating changegroups or in debug commands.
1963 """
1963 """
1964 if raw:
1964 if raw:
1965 msg = (
1965 msg = (
1966 b'revlog.revision(..., raw=True) is deprecated, '
1966 b'revlog.revision(..., raw=True) is deprecated, '
1967 b'use revlog.rawdata(...)'
1967 b'use revlog.rawdata(...)'
1968 )
1968 )
1969 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1969 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1970 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1970 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1971
1971
1972 def sidedata(self, nodeorrev, _df=None):
1972 def sidedata(self, nodeorrev, _df=None):
1973 """a map of extra data related to the changeset but not part of the hash
1973 """a map of extra data related to the changeset but not part of the hash
1974
1974
1975 This function currently return a dictionary. However, more advanced
1975 This function currently return a dictionary. However, more advanced
1976 mapping object will likely be used in the future for a more
1976 mapping object will likely be used in the future for a more
1977 efficient/lazy code.
1977 efficient/lazy code.
1978 """
1978 """
1979 return self._revisiondata(nodeorrev, _df)[1]
1979 # deal with <nodeorrev> argument type
1980 if isinstance(nodeorrev, int):
1981 rev = nodeorrev
1982 else:
1983 rev = self.rev(nodeorrev)
1984 return self._sidedata(rev)
1980
1985
1981 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1986 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1982 # deal with <nodeorrev> argument type
1987 # deal with <nodeorrev> argument type
1983 if isinstance(nodeorrev, int):
1988 if isinstance(nodeorrev, int):
1984 rev = nodeorrev
1989 rev = nodeorrev
1985 node = self.node(rev)
1990 node = self.node(rev)
1986 else:
1991 else:
1987 node = nodeorrev
1992 node = nodeorrev
1988 rev = None
1993 rev = None
1989
1994
1990 # fast path the special `nullid` rev
1995 # fast path the special `nullid` rev
1991 if node == self.nullid:
1996 if node == self.nullid:
1992 return b"", {}
1997 return b"", {}
1993
1998
1994 # ``rawtext`` is the text as stored inside the revlog. Might be the
1999 # ``rawtext`` is the text as stored inside the revlog. Might be the
1995 # revision or might need to be processed to retrieve the revision.
2000 # revision or might need to be processed to retrieve the revision.
1996 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
2001 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1997
2002
1998 if self.hassidedata:
2003 if self.hassidedata:
1999 if rev is None:
2004 if rev is None:
2000 rev = self.rev(node)
2005 rev = self.rev(node)
2001 sidedata = self._sidedata(rev)
2006 sidedata = self._sidedata(rev)
2002 else:
2007 else:
2003 sidedata = {}
2008 sidedata = {}
2004
2009
2005 if raw and validated:
2010 if raw and validated:
2006 # if we don't want to process the raw text and that raw
2011 # if we don't want to process the raw text and that raw
2007 # text is cached, we can exit early.
2012 # text is cached, we can exit early.
2008 return rawtext, sidedata
2013 return rawtext, sidedata
2009 if rev is None:
2014 if rev is None:
2010 rev = self.rev(node)
2015 rev = self.rev(node)
2011 # the revlog's flag for this revision
2016 # the revlog's flag for this revision
2012 # (usually alter its state or content)
2017 # (usually alter its state or content)
2013 flags = self.flags(rev)
2018 flags = self.flags(rev)
2014
2019
2015 if validated and flags == REVIDX_DEFAULT_FLAGS:
2020 if validated and flags == REVIDX_DEFAULT_FLAGS:
2016 # no extra flags set, no flag processor runs, text = rawtext
2021 # no extra flags set, no flag processor runs, text = rawtext
2017 return rawtext, sidedata
2022 return rawtext, sidedata
2018
2023
2019 if raw:
2024 if raw:
2020 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2025 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2021 text = rawtext
2026 text = rawtext
2022 else:
2027 else:
2023 r = flagutil.processflagsread(self, rawtext, flags)
2028 r = flagutil.processflagsread(self, rawtext, flags)
2024 text, validatehash = r
2029 text, validatehash = r
2025 if validatehash:
2030 if validatehash:
2026 self.checkhash(text, node, rev=rev)
2031 self.checkhash(text, node, rev=rev)
2027 if not validated:
2032 if not validated:
2028 self._revisioncache = (node, rev, rawtext)
2033 self._revisioncache = (node, rev, rawtext)
2029
2034
2030 return text, sidedata
2035 return text, sidedata
2031
2036
2032 def _rawtext(self, node, rev, _df=None):
2037 def _rawtext(self, node, rev, _df=None):
2033 """return the possibly unvalidated rawtext for a revision
2038 """return the possibly unvalidated rawtext for a revision
2034
2039
2035 returns (rev, rawtext, validated)
2040 returns (rev, rawtext, validated)
2036 """
2041 """
2037
2042
2038 # revision in the cache (could be useful to apply delta)
2043 # revision in the cache (could be useful to apply delta)
2039 cachedrev = None
2044 cachedrev = None
2040 # An intermediate text to apply deltas to
2045 # An intermediate text to apply deltas to
2041 basetext = None
2046 basetext = None
2042
2047
2043 # Check if we have the entry in cache
2048 # Check if we have the entry in cache
2044 # The cache entry looks like (node, rev, rawtext)
2049 # The cache entry looks like (node, rev, rawtext)
2045 if self._revisioncache:
2050 if self._revisioncache:
2046 if self._revisioncache[0] == node:
2051 if self._revisioncache[0] == node:
2047 return (rev, self._revisioncache[2], True)
2052 return (rev, self._revisioncache[2], True)
2048 cachedrev = self._revisioncache[1]
2053 cachedrev = self._revisioncache[1]
2049
2054
2050 if rev is None:
2055 if rev is None:
2051 rev = self.rev(node)
2056 rev = self.rev(node)
2052
2057
2053 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2058 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2054 if stopped:
2059 if stopped:
2055 basetext = self._revisioncache[2]
2060 basetext = self._revisioncache[2]
2056
2061
2057 # drop cache to save memory, the caller is expected to
2062 # drop cache to save memory, the caller is expected to
2058 # update self._revisioncache after validating the text
2063 # update self._revisioncache after validating the text
2059 self._revisioncache = None
2064 self._revisioncache = None
2060
2065
2061 targetsize = None
2066 targetsize = None
2062 rawsize = self.index[rev][2]
2067 rawsize = self.index[rev][2]
2063 if 0 <= rawsize:
2068 if 0 <= rawsize:
2064 targetsize = 4 * rawsize
2069 targetsize = 4 * rawsize
2065
2070
2066 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2071 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2067 if basetext is None:
2072 if basetext is None:
2068 basetext = bytes(bins[0])
2073 basetext = bytes(bins[0])
2069 bins = bins[1:]
2074 bins = bins[1:]
2070
2075
2071 rawtext = mdiff.patches(basetext, bins)
2076 rawtext = mdiff.patches(basetext, bins)
2072 del basetext # let us have a chance to free memory early
2077 del basetext # let us have a chance to free memory early
2073 return (rev, rawtext, False)
2078 return (rev, rawtext, False)
2074
2079
2075 def _sidedata(self, rev):
2080 def _sidedata(self, rev):
2076 """Return the sidedata for a given revision number."""
2081 """Return the sidedata for a given revision number."""
2077 index_entry = self.index[rev]
2082 index_entry = self.index[rev]
2078 sidedata_offset = index_entry[8]
2083 sidedata_offset = index_entry[8]
2079 sidedata_size = index_entry[9]
2084 sidedata_size = index_entry[9]
2080
2085
2081 if self._inline:
2086 if self._inline:
2082 sidedata_offset += self.index.entry_size * (1 + rev)
2087 sidedata_offset += self.index.entry_size * (1 + rev)
2083 if sidedata_size == 0:
2088 if sidedata_size == 0:
2084 return {}
2089 return {}
2085
2090
2086 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2091 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2087 comp = self.index[rev][11]
2092 comp = self.index[rev][11]
2088 if comp == COMP_MODE_PLAIN:
2093 if comp == COMP_MODE_PLAIN:
2089 segment = comp_segment
2094 segment = comp_segment
2090 elif comp == COMP_MODE_DEFAULT:
2095 elif comp == COMP_MODE_DEFAULT:
2091 segment = self._decompressor(comp_segment)
2096 segment = self._decompressor(comp_segment)
2092 elif comp == COMP_MODE_INLINE:
2097 elif comp == COMP_MODE_INLINE:
2093 segment = self.decompress(comp_segment)
2098 segment = self.decompress(comp_segment)
2094 else:
2099 else:
2095 msg = 'unknown compression mode %d'
2100 msg = 'unknown compression mode %d'
2096 msg %= comp
2101 msg %= comp
2097 raise error.RevlogError(msg)
2102 raise error.RevlogError(msg)
2098
2103
2099 sidedata = sidedatautil.deserialize_sidedata(segment)
2104 sidedata = sidedatautil.deserialize_sidedata(segment)
2100 return sidedata
2105 return sidedata
2101
2106
2102 def rawdata(self, nodeorrev, _df=None):
2107 def rawdata(self, nodeorrev, _df=None):
2103 """return an uncompressed raw data of a given node or revision number.
2108 """return an uncompressed raw data of a given node or revision number.
2104
2109
2105 _df - an existing file handle to read from. (internal-only)
2110 _df - an existing file handle to read from. (internal-only)
2106 """
2111 """
2107 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2112 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2108
2113
2109 def hash(self, text, p1, p2):
2114 def hash(self, text, p1, p2):
2110 """Compute a node hash.
2115 """Compute a node hash.
2111
2116
2112 Available as a function so that subclasses can replace the hash
2117 Available as a function so that subclasses can replace the hash
2113 as needed.
2118 as needed.
2114 """
2119 """
2115 return storageutil.hashrevisionsha1(text, p1, p2)
2120 return storageutil.hashrevisionsha1(text, p1, p2)
2116
2121
2117 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2122 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2118 """Check node hash integrity.
2123 """Check node hash integrity.
2119
2124
2120 Available as a function so that subclasses can extend hash mismatch
2125 Available as a function so that subclasses can extend hash mismatch
2121 behaviors as needed.
2126 behaviors as needed.
2122 """
2127 """
2123 try:
2128 try:
2124 if p1 is None and p2 is None:
2129 if p1 is None and p2 is None:
2125 p1, p2 = self.parents(node)
2130 p1, p2 = self.parents(node)
2126 if node != self.hash(text, p1, p2):
2131 if node != self.hash(text, p1, p2):
2127 # Clear the revision cache on hash failure. The revision cache
2132 # Clear the revision cache on hash failure. The revision cache
2128 # only stores the raw revision and clearing the cache does have
2133 # only stores the raw revision and clearing the cache does have
2129 # the side-effect that we won't have a cache hit when the raw
2134 # the side-effect that we won't have a cache hit when the raw
2130 # revision data is accessed. But this case should be rare and
2135 # revision data is accessed. But this case should be rare and
2131 # it is extra work to teach the cache about the hash
2136 # it is extra work to teach the cache about the hash
2132 # verification state.
2137 # verification state.
2133 if self._revisioncache and self._revisioncache[0] == node:
2138 if self._revisioncache and self._revisioncache[0] == node:
2134 self._revisioncache = None
2139 self._revisioncache = None
2135
2140
2136 revornode = rev
2141 revornode = rev
2137 if revornode is None:
2142 if revornode is None:
2138 revornode = templatefilters.short(hex(node))
2143 revornode = templatefilters.short(hex(node))
2139 raise error.RevlogError(
2144 raise error.RevlogError(
2140 _(b"integrity check failed on %s:%s")
2145 _(b"integrity check failed on %s:%s")
2141 % (self.display_id, pycompat.bytestr(revornode))
2146 % (self.display_id, pycompat.bytestr(revornode))
2142 )
2147 )
2143 except error.RevlogError:
2148 except error.RevlogError:
2144 if self._censorable and storageutil.iscensoredtext(text):
2149 if self._censorable and storageutil.iscensoredtext(text):
2145 raise error.CensoredNodeError(self.display_id, node, text)
2150 raise error.CensoredNodeError(self.display_id, node, text)
2146 raise
2151 raise
2147
2152
2148 def _enforceinlinesize(self, tr):
2153 def _enforceinlinesize(self, tr):
2149 """Check if the revlog is too big for inline and convert if so.
2154 """Check if the revlog is too big for inline and convert if so.
2150
2155
2151 This should be called after revisions are added to the revlog. If the
2156 This should be called after revisions are added to the revlog. If the
2152 revlog has grown too large to be an inline revlog, it will convert it
2157 revlog has grown too large to be an inline revlog, it will convert it
2153 to use multiple index and data files.
2158 to use multiple index and data files.
2154 """
2159 """
2155 tiprev = len(self) - 1
2160 tiprev = len(self) - 1
2156 total_size = self.start(tiprev) + self.length(tiprev)
2161 total_size = self.start(tiprev) + self.length(tiprev)
2157 if not self._inline or total_size < _maxinline:
2162 if not self._inline or total_size < _maxinline:
2158 return
2163 return
2159
2164
2160 troffset = tr.findoffset(self._indexfile)
2165 troffset = tr.findoffset(self._indexfile)
2161 if troffset is None:
2166 if troffset is None:
2162 raise error.RevlogError(
2167 raise error.RevlogError(
2163 _(b"%s not found in the transaction") % self._indexfile
2168 _(b"%s not found in the transaction") % self._indexfile
2164 )
2169 )
2165 trindex = 0
2170 trindex = 0
2166 tr.add(self._datafile, 0)
2171 tr.add(self._datafile, 0)
2167
2172
2168 existing_handles = False
2173 existing_handles = False
2169 if self._writinghandles is not None:
2174 if self._writinghandles is not None:
2170 existing_handles = True
2175 existing_handles = True
2171 fp = self._writinghandles[0]
2176 fp = self._writinghandles[0]
2172 fp.flush()
2177 fp.flush()
2173 fp.close()
2178 fp.close()
2174 # We can't use the cached file handle after close(). So prevent
2179 # We can't use the cached file handle after close(). So prevent
2175 # its usage.
2180 # its usage.
2176 self._writinghandles = None
2181 self._writinghandles = None
2177
2182
2178 new_dfh = self._datafp(b'w+')
2183 new_dfh = self._datafp(b'w+')
2179 new_dfh.truncate(0) # drop any potentially existing data
2184 new_dfh.truncate(0) # drop any potentially existing data
2180 try:
2185 try:
2181 with self._indexfp() as read_ifh:
2186 with self._indexfp() as read_ifh:
2182 for r in self:
2187 for r in self:
2183 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2188 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2184 if troffset <= self.start(r) + r * self.index.entry_size:
2189 if troffset <= self.start(r) + r * self.index.entry_size:
2185 trindex = r
2190 trindex = r
2186 new_dfh.flush()
2191 new_dfh.flush()
2187
2192
2188 with self.__index_new_fp() as fp:
2193 with self.__index_new_fp() as fp:
2189 self._format_flags &= ~FLAG_INLINE_DATA
2194 self._format_flags &= ~FLAG_INLINE_DATA
2190 self._inline = False
2195 self._inline = False
2191 for i in self:
2196 for i in self:
2192 e = self.index.entry_binary(i)
2197 e = self.index.entry_binary(i)
2193 if i == 0 and self._docket is None:
2198 if i == 0 and self._docket is None:
2194 header = self._format_flags | self._format_version
2199 header = self._format_flags | self._format_version
2195 header = self.index.pack_header(header)
2200 header = self.index.pack_header(header)
2196 e = header + e
2201 e = header + e
2197 fp.write(e)
2202 fp.write(e)
2198 if self._docket is not None:
2203 if self._docket is not None:
2199 self._docket.index_end = fp.tell()
2204 self._docket.index_end = fp.tell()
2200
2205
2201 # There is a small transactional race here. If the rename of
2206 # There is a small transactional race here. If the rename of
2202 # the index fails, we should remove the datafile. It is more
2207 # the index fails, we should remove the datafile. It is more
2203 # important to ensure that the data file is not truncated
2208 # important to ensure that the data file is not truncated
2204 # when the index is replaced as otherwise data is lost.
2209 # when the index is replaced as otherwise data is lost.
2205 tr.replace(self._datafile, self.start(trindex))
2210 tr.replace(self._datafile, self.start(trindex))
2206
2211
2207 # the temp file replace the real index when we exit the context
2212 # the temp file replace the real index when we exit the context
2208 # manager
2213 # manager
2209
2214
2210 tr.replace(self._indexfile, trindex * self.index.entry_size)
2215 tr.replace(self._indexfile, trindex * self.index.entry_size)
2211 nodemaputil.setup_persistent_nodemap(tr, self)
2216 nodemaputil.setup_persistent_nodemap(tr, self)
2212 self._chunkclear()
2217 self._chunkclear()
2213
2218
2214 if existing_handles:
2219 if existing_handles:
2215 # switched from inline to conventional reopen the index
2220 # switched from inline to conventional reopen the index
2216 ifh = self.__index_write_fp()
2221 ifh = self.__index_write_fp()
2217 self._writinghandles = (ifh, new_dfh)
2222 self._writinghandles = (ifh, new_dfh)
2218 new_dfh = None
2223 new_dfh = None
2219 finally:
2224 finally:
2220 if new_dfh is not None:
2225 if new_dfh is not None:
2221 new_dfh.close()
2226 new_dfh.close()
2222
2227
2223 def _nodeduplicatecallback(self, transaction, node):
2228 def _nodeduplicatecallback(self, transaction, node):
2224 """called when trying to add a node already stored."""
2229 """called when trying to add a node already stored."""
2225
2230
2226 @contextlib.contextmanager
2231 @contextlib.contextmanager
2227 def _writing(self, transaction):
2232 def _writing(self, transaction):
2228 if self._trypending:
2233 if self._trypending:
2229 msg = b'try to write in a `trypending` revlog: %s'
2234 msg = b'try to write in a `trypending` revlog: %s'
2230 msg %= self.display_id
2235 msg %= self.display_id
2231 raise error.ProgrammingError(msg)
2236 raise error.ProgrammingError(msg)
2232 if self._writinghandles is not None:
2237 if self._writinghandles is not None:
2233 yield
2238 yield
2234 else:
2239 else:
2235 ifh = dfh = None
2240 ifh = dfh = None
2236 try:
2241 try:
2237 r = len(self)
2242 r = len(self)
2238 # opening the data file.
2243 # opening the data file.
2239 dsize = 0
2244 dsize = 0
2240 if r:
2245 if r:
2241 dsize = self.end(r - 1)
2246 dsize = self.end(r - 1)
2242 dfh = None
2247 dfh = None
2243 if not self._inline:
2248 if not self._inline:
2244 try:
2249 try:
2245 dfh = self._datafp(b"r+")
2250 dfh = self._datafp(b"r+")
2246 if self._docket is None:
2251 if self._docket is None:
2247 dfh.seek(0, os.SEEK_END)
2252 dfh.seek(0, os.SEEK_END)
2248 else:
2253 else:
2249 dfh.seek(self._docket.data_end, os.SEEK_SET)
2254 dfh.seek(self._docket.data_end, os.SEEK_SET)
2250 except IOError as inst:
2255 except IOError as inst:
2251 if inst.errno != errno.ENOENT:
2256 if inst.errno != errno.ENOENT:
2252 raise
2257 raise
2253 dfh = self._datafp(b"w+")
2258 dfh = self._datafp(b"w+")
2254 transaction.add(self._datafile, dsize)
2259 transaction.add(self._datafile, dsize)
2255
2260
2256 # opening the index file.
2261 # opening the index file.
2257 isize = r * self.index.entry_size
2262 isize = r * self.index.entry_size
2258 ifh = self.__index_write_fp()
2263 ifh = self.__index_write_fp()
2259 if self._inline:
2264 if self._inline:
2260 transaction.add(self._indexfile, dsize + isize)
2265 transaction.add(self._indexfile, dsize + isize)
2261 else:
2266 else:
2262 transaction.add(self._indexfile, isize)
2267 transaction.add(self._indexfile, isize)
2263 # exposing all file handle for writing.
2268 # exposing all file handle for writing.
2264 self._writinghandles = (ifh, dfh)
2269 self._writinghandles = (ifh, dfh)
2265 yield
2270 yield
2266 if self._docket is not None:
2271 if self._docket is not None:
2267 self._write_docket(transaction)
2272 self._write_docket(transaction)
2268 finally:
2273 finally:
2269 self._writinghandles = None
2274 self._writinghandles = None
2270 if dfh is not None:
2275 if dfh is not None:
2271 dfh.close()
2276 dfh.close()
2272 # closing the index file last to avoid exposing referent to
2277 # closing the index file last to avoid exposing referent to
2273 # potential unflushed data content.
2278 # potential unflushed data content.
2274 if ifh is not None:
2279 if ifh is not None:
2275 ifh.close()
2280 ifh.close()
2276
2281
2277 def _write_docket(self, transaction):
2282 def _write_docket(self, transaction):
2278 """write the current docket on disk
2283 """write the current docket on disk
2279
2284
2280 Exist as a method to help changelog to implement transaction logic
2285 Exist as a method to help changelog to implement transaction logic
2281
2286
2282 We could also imagine using the same transaction logic for all revlog
2287 We could also imagine using the same transaction logic for all revlog
2283 since docket are cheap."""
2288 since docket are cheap."""
2284 self._docket.write(transaction)
2289 self._docket.write(transaction)
2285
2290
2286 def addrevision(
2291 def addrevision(
2287 self,
2292 self,
2288 text,
2293 text,
2289 transaction,
2294 transaction,
2290 link,
2295 link,
2291 p1,
2296 p1,
2292 p2,
2297 p2,
2293 cachedelta=None,
2298 cachedelta=None,
2294 node=None,
2299 node=None,
2295 flags=REVIDX_DEFAULT_FLAGS,
2300 flags=REVIDX_DEFAULT_FLAGS,
2296 deltacomputer=None,
2301 deltacomputer=None,
2297 sidedata=None,
2302 sidedata=None,
2298 ):
2303 ):
2299 """add a revision to the log
2304 """add a revision to the log
2300
2305
2301 text - the revision data to add
2306 text - the revision data to add
2302 transaction - the transaction object used for rollback
2307 transaction - the transaction object used for rollback
2303 link - the linkrev data to add
2308 link - the linkrev data to add
2304 p1, p2 - the parent nodeids of the revision
2309 p1, p2 - the parent nodeids of the revision
2305 cachedelta - an optional precomputed delta
2310 cachedelta - an optional precomputed delta
2306 node - nodeid of revision; typically node is not specified, and it is
2311 node - nodeid of revision; typically node is not specified, and it is
2307 computed by default as hash(text, p1, p2), however subclasses might
2312 computed by default as hash(text, p1, p2), however subclasses might
2308 use different hashing method (and override checkhash() in such case)
2313 use different hashing method (and override checkhash() in such case)
2309 flags - the known flags to set on the revision
2314 flags - the known flags to set on the revision
2310 deltacomputer - an optional deltacomputer instance shared between
2315 deltacomputer - an optional deltacomputer instance shared between
2311 multiple calls
2316 multiple calls
2312 """
2317 """
2313 if link == nullrev:
2318 if link == nullrev:
2314 raise error.RevlogError(
2319 raise error.RevlogError(
2315 _(b"attempted to add linkrev -1 to %s") % self.display_id
2320 _(b"attempted to add linkrev -1 to %s") % self.display_id
2316 )
2321 )
2317
2322
2318 if sidedata is None:
2323 if sidedata is None:
2319 sidedata = {}
2324 sidedata = {}
2320 elif sidedata and not self.hassidedata:
2325 elif sidedata and not self.hassidedata:
2321 raise error.ProgrammingError(
2326 raise error.ProgrammingError(
2322 _(b"trying to add sidedata to a revlog who don't support them")
2327 _(b"trying to add sidedata to a revlog who don't support them")
2323 )
2328 )
2324
2329
2325 if flags:
2330 if flags:
2326 node = node or self.hash(text, p1, p2)
2331 node = node or self.hash(text, p1, p2)
2327
2332
2328 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2333 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2329
2334
2330 # If the flag processor modifies the revision data, ignore any provided
2335 # If the flag processor modifies the revision data, ignore any provided
2331 # cachedelta.
2336 # cachedelta.
2332 if rawtext != text:
2337 if rawtext != text:
2333 cachedelta = None
2338 cachedelta = None
2334
2339
2335 if len(rawtext) > _maxentrysize:
2340 if len(rawtext) > _maxentrysize:
2336 raise error.RevlogError(
2341 raise error.RevlogError(
2337 _(
2342 _(
2338 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2343 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2339 )
2344 )
2340 % (self.display_id, len(rawtext))
2345 % (self.display_id, len(rawtext))
2341 )
2346 )
2342
2347
2343 node = node or self.hash(rawtext, p1, p2)
2348 node = node or self.hash(rawtext, p1, p2)
2344 rev = self.index.get_rev(node)
2349 rev = self.index.get_rev(node)
2345 if rev is not None:
2350 if rev is not None:
2346 return rev
2351 return rev
2347
2352
2348 if validatehash:
2353 if validatehash:
2349 self.checkhash(rawtext, node, p1=p1, p2=p2)
2354 self.checkhash(rawtext, node, p1=p1, p2=p2)
2350
2355
2351 return self.addrawrevision(
2356 return self.addrawrevision(
2352 rawtext,
2357 rawtext,
2353 transaction,
2358 transaction,
2354 link,
2359 link,
2355 p1,
2360 p1,
2356 p2,
2361 p2,
2357 node,
2362 node,
2358 flags,
2363 flags,
2359 cachedelta=cachedelta,
2364 cachedelta=cachedelta,
2360 deltacomputer=deltacomputer,
2365 deltacomputer=deltacomputer,
2361 sidedata=sidedata,
2366 sidedata=sidedata,
2362 )
2367 )
2363
2368
2364 def addrawrevision(
2369 def addrawrevision(
2365 self,
2370 self,
2366 rawtext,
2371 rawtext,
2367 transaction,
2372 transaction,
2368 link,
2373 link,
2369 p1,
2374 p1,
2370 p2,
2375 p2,
2371 node,
2376 node,
2372 flags,
2377 flags,
2373 cachedelta=None,
2378 cachedelta=None,
2374 deltacomputer=None,
2379 deltacomputer=None,
2375 sidedata=None,
2380 sidedata=None,
2376 ):
2381 ):
2377 """add a raw revision with known flags, node and parents
2382 """add a raw revision with known flags, node and parents
2378 useful when reusing a revision not stored in this revlog (ex: received
2383 useful when reusing a revision not stored in this revlog (ex: received
2379 over wire, or read from an external bundle).
2384 over wire, or read from an external bundle).
2380 """
2385 """
2381 with self._writing(transaction):
2386 with self._writing(transaction):
2382 return self._addrevision(
2387 return self._addrevision(
2383 node,
2388 node,
2384 rawtext,
2389 rawtext,
2385 transaction,
2390 transaction,
2386 link,
2391 link,
2387 p1,
2392 p1,
2388 p2,
2393 p2,
2389 flags,
2394 flags,
2390 cachedelta,
2395 cachedelta,
2391 deltacomputer=deltacomputer,
2396 deltacomputer=deltacomputer,
2392 sidedata=sidedata,
2397 sidedata=sidedata,
2393 )
2398 )
2394
2399
2395 def compress(self, data):
2400 def compress(self, data):
2396 """Generate a possibly-compressed representation of data."""
2401 """Generate a possibly-compressed representation of data."""
2397 if not data:
2402 if not data:
2398 return b'', data
2403 return b'', data
2399
2404
2400 compressed = self._compressor.compress(data)
2405 compressed = self._compressor.compress(data)
2401
2406
2402 if compressed:
2407 if compressed:
2403 # The revlog compressor added the header in the returned data.
2408 # The revlog compressor added the header in the returned data.
2404 return b'', compressed
2409 return b'', compressed
2405
2410
2406 if data[0:1] == b'\0':
2411 if data[0:1] == b'\0':
2407 return b'', data
2412 return b'', data
2408 return b'u', data
2413 return b'u', data
2409
2414
2410 def decompress(self, data):
2415 def decompress(self, data):
2411 """Decompress a revlog chunk.
2416 """Decompress a revlog chunk.
2412
2417
2413 The chunk is expected to begin with a header identifying the
2418 The chunk is expected to begin with a header identifying the
2414 format type so it can be routed to an appropriate decompressor.
2419 format type so it can be routed to an appropriate decompressor.
2415 """
2420 """
2416 if not data:
2421 if not data:
2417 return data
2422 return data
2418
2423
2419 # Revlogs are read much more frequently than they are written and many
2424 # Revlogs are read much more frequently than they are written and many
2420 # chunks only take microseconds to decompress, so performance is
2425 # chunks only take microseconds to decompress, so performance is
2421 # important here.
2426 # important here.
2422 #
2427 #
2423 # We can make a few assumptions about revlogs:
2428 # We can make a few assumptions about revlogs:
2424 #
2429 #
2425 # 1) the majority of chunks will be compressed (as opposed to inline
2430 # 1) the majority of chunks will be compressed (as opposed to inline
2426 # raw data).
2431 # raw data).
2427 # 2) decompressing *any* data will likely by at least 10x slower than
2432 # 2) decompressing *any* data will likely by at least 10x slower than
2428 # returning raw inline data.
2433 # returning raw inline data.
2429 # 3) we want to prioritize common and officially supported compression
2434 # 3) we want to prioritize common and officially supported compression
2430 # engines
2435 # engines
2431 #
2436 #
2432 # It follows that we want to optimize for "decompress compressed data
2437 # It follows that we want to optimize for "decompress compressed data
2433 # when encoded with common and officially supported compression engines"
2438 # when encoded with common and officially supported compression engines"
2434 # case over "raw data" and "data encoded by less common or non-official
2439 # case over "raw data" and "data encoded by less common or non-official
2435 # compression engines." That is why we have the inline lookup first
2440 # compression engines." That is why we have the inline lookup first
2436 # followed by the compengines lookup.
2441 # followed by the compengines lookup.
2437 #
2442 #
2438 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2443 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2439 # compressed chunks. And this matters for changelog and manifest reads.
2444 # compressed chunks. And this matters for changelog and manifest reads.
2440 t = data[0:1]
2445 t = data[0:1]
2441
2446
2442 if t == b'x':
2447 if t == b'x':
2443 try:
2448 try:
2444 return _zlibdecompress(data)
2449 return _zlibdecompress(data)
2445 except zlib.error as e:
2450 except zlib.error as e:
2446 raise error.RevlogError(
2451 raise error.RevlogError(
2447 _(b'revlog decompress error: %s')
2452 _(b'revlog decompress error: %s')
2448 % stringutil.forcebytestr(e)
2453 % stringutil.forcebytestr(e)
2449 )
2454 )
2450 # '\0' is more common than 'u' so it goes first.
2455 # '\0' is more common than 'u' so it goes first.
2451 elif t == b'\0':
2456 elif t == b'\0':
2452 return data
2457 return data
2453 elif t == b'u':
2458 elif t == b'u':
2454 return util.buffer(data, 1)
2459 return util.buffer(data, 1)
2455
2460
2456 compressor = self._get_decompressor(t)
2461 compressor = self._get_decompressor(t)
2457
2462
2458 return compressor.decompress(data)
2463 return compressor.decompress(data)
2459
2464
2460 def _addrevision(
2465 def _addrevision(
2461 self,
2466 self,
2462 node,
2467 node,
2463 rawtext,
2468 rawtext,
2464 transaction,
2469 transaction,
2465 link,
2470 link,
2466 p1,
2471 p1,
2467 p2,
2472 p2,
2468 flags,
2473 flags,
2469 cachedelta,
2474 cachedelta,
2470 alwayscache=False,
2475 alwayscache=False,
2471 deltacomputer=None,
2476 deltacomputer=None,
2472 sidedata=None,
2477 sidedata=None,
2473 ):
2478 ):
2474 """internal function to add revisions to the log
2479 """internal function to add revisions to the log
2475
2480
2476 see addrevision for argument descriptions.
2481 see addrevision for argument descriptions.
2477
2482
2478 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2483 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2479
2484
2480 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2485 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2481 be used.
2486 be used.
2482
2487
2483 invariants:
2488 invariants:
2484 - rawtext is optional (can be None); if not set, cachedelta must be set.
2489 - rawtext is optional (can be None); if not set, cachedelta must be set.
2485 if both are set, they must correspond to each other.
2490 if both are set, they must correspond to each other.
2486 """
2491 """
2487 if node == self.nullid:
2492 if node == self.nullid:
2488 raise error.RevlogError(
2493 raise error.RevlogError(
2489 _(b"%s: attempt to add null revision") % self.display_id
2494 _(b"%s: attempt to add null revision") % self.display_id
2490 )
2495 )
2491 if (
2496 if (
2492 node == self.nodeconstants.wdirid
2497 node == self.nodeconstants.wdirid
2493 or node in self.nodeconstants.wdirfilenodeids
2498 or node in self.nodeconstants.wdirfilenodeids
2494 ):
2499 ):
2495 raise error.RevlogError(
2500 raise error.RevlogError(
2496 _(b"%s: attempt to add wdir revision") % self.display_id
2501 _(b"%s: attempt to add wdir revision") % self.display_id
2497 )
2502 )
2498 if self._writinghandles is None:
2503 if self._writinghandles is None:
2499 msg = b'adding revision outside `revlog._writing` context'
2504 msg = b'adding revision outside `revlog._writing` context'
2500 raise error.ProgrammingError(msg)
2505 raise error.ProgrammingError(msg)
2501
2506
2502 if self._inline:
2507 if self._inline:
2503 fh = self._writinghandles[0]
2508 fh = self._writinghandles[0]
2504 else:
2509 else:
2505 fh = self._writinghandles[1]
2510 fh = self._writinghandles[1]
2506
2511
2507 btext = [rawtext]
2512 btext = [rawtext]
2508
2513
2509 curr = len(self)
2514 curr = len(self)
2510 prev = curr - 1
2515 prev = curr - 1
2511
2516
2512 offset = self._get_data_offset(prev)
2517 offset = self._get_data_offset(prev)
2513
2518
2514 if self._concurrencychecker:
2519 if self._concurrencychecker:
2515 ifh, dfh = self._writinghandles
2520 ifh, dfh = self._writinghandles
2516 if self._inline:
2521 if self._inline:
2517 # offset is "as if" it were in the .d file, so we need to add on
2522 # offset is "as if" it were in the .d file, so we need to add on
2518 # the size of the entry metadata.
2523 # the size of the entry metadata.
2519 self._concurrencychecker(
2524 self._concurrencychecker(
2520 ifh, self._indexfile, offset + curr * self.index.entry_size
2525 ifh, self._indexfile, offset + curr * self.index.entry_size
2521 )
2526 )
2522 else:
2527 else:
2523 # Entries in the .i are a consistent size.
2528 # Entries in the .i are a consistent size.
2524 self._concurrencychecker(
2529 self._concurrencychecker(
2525 ifh, self._indexfile, curr * self.index.entry_size
2530 ifh, self._indexfile, curr * self.index.entry_size
2526 )
2531 )
2527 self._concurrencychecker(dfh, self._datafile, offset)
2532 self._concurrencychecker(dfh, self._datafile, offset)
2528
2533
2529 p1r, p2r = self.rev(p1), self.rev(p2)
2534 p1r, p2r = self.rev(p1), self.rev(p2)
2530
2535
2531 # full versions are inserted when the needed deltas
2536 # full versions are inserted when the needed deltas
2532 # become comparable to the uncompressed text
2537 # become comparable to the uncompressed text
2533 if rawtext is None:
2538 if rawtext is None:
2534 # need rawtext size, before changed by flag processors, which is
2539 # need rawtext size, before changed by flag processors, which is
2535 # the non-raw size. use revlog explicitly to avoid filelog's extra
2540 # the non-raw size. use revlog explicitly to avoid filelog's extra
2536 # logic that might remove metadata size.
2541 # logic that might remove metadata size.
2537 textlen = mdiff.patchedsize(
2542 textlen = mdiff.patchedsize(
2538 revlog.size(self, cachedelta[0]), cachedelta[1]
2543 revlog.size(self, cachedelta[0]), cachedelta[1]
2539 )
2544 )
2540 else:
2545 else:
2541 textlen = len(rawtext)
2546 textlen = len(rawtext)
2542
2547
2543 if deltacomputer is None:
2548 if deltacomputer is None:
2544 deltacomputer = deltautil.deltacomputer(self)
2549 deltacomputer = deltautil.deltacomputer(self)
2545
2550
2546 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2551 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2547
2552
2548 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2553 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2549
2554
2550 compression_mode = COMP_MODE_INLINE
2555 compression_mode = COMP_MODE_INLINE
2551 if self._docket is not None:
2556 if self._docket is not None:
2552 h, d = deltainfo.data
2557 h, d = deltainfo.data
2553 if not h and not d:
2558 if not h and not d:
2554 # not data to store at all... declare them uncompressed
2559 # not data to store at all... declare them uncompressed
2555 compression_mode = COMP_MODE_PLAIN
2560 compression_mode = COMP_MODE_PLAIN
2556 elif not h:
2561 elif not h:
2557 t = d[0:1]
2562 t = d[0:1]
2558 if t == b'\0':
2563 if t == b'\0':
2559 compression_mode = COMP_MODE_PLAIN
2564 compression_mode = COMP_MODE_PLAIN
2560 elif t == self._docket.default_compression_header:
2565 elif t == self._docket.default_compression_header:
2561 compression_mode = COMP_MODE_DEFAULT
2566 compression_mode = COMP_MODE_DEFAULT
2562 elif h == b'u':
2567 elif h == b'u':
2563 # we have a more efficient way to declare uncompressed
2568 # we have a more efficient way to declare uncompressed
2564 h = b''
2569 h = b''
2565 compression_mode = COMP_MODE_PLAIN
2570 compression_mode = COMP_MODE_PLAIN
2566 deltainfo = deltautil.drop_u_compression(deltainfo)
2571 deltainfo = deltautil.drop_u_compression(deltainfo)
2567
2572
2568 sidedata_compression_mode = COMP_MODE_INLINE
2573 sidedata_compression_mode = COMP_MODE_INLINE
2569 if sidedata and self.hassidedata:
2574 if sidedata and self.hassidedata:
2570 sidedata_compression_mode = COMP_MODE_PLAIN
2575 sidedata_compression_mode = COMP_MODE_PLAIN
2571 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2576 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2572 sidedata_offset = offset + deltainfo.deltalen
2577 sidedata_offset = offset + deltainfo.deltalen
2573 h, comp_sidedata = self.compress(serialized_sidedata)
2578 h, comp_sidedata = self.compress(serialized_sidedata)
2574 if (
2579 if (
2575 h != b'u'
2580 h != b'u'
2576 and comp_sidedata[0:1] != b'\0'
2581 and comp_sidedata[0:1] != b'\0'
2577 and len(comp_sidedata) < len(serialized_sidedata)
2582 and len(comp_sidedata) < len(serialized_sidedata)
2578 ):
2583 ):
2579 assert not h
2584 assert not h
2580 if (
2585 if (
2581 comp_sidedata[0:1]
2586 comp_sidedata[0:1]
2582 == self._docket.default_compression_header
2587 == self._docket.default_compression_header
2583 ):
2588 ):
2584 sidedata_compression_mode = COMP_MODE_DEFAULT
2589 sidedata_compression_mode = COMP_MODE_DEFAULT
2585 serialized_sidedata = comp_sidedata
2590 serialized_sidedata = comp_sidedata
2586 else:
2591 else:
2587 sidedata_compression_mode = COMP_MODE_INLINE
2592 sidedata_compression_mode = COMP_MODE_INLINE
2588 serialized_sidedata = comp_sidedata
2593 serialized_sidedata = comp_sidedata
2589 else:
2594 else:
2590 serialized_sidedata = b""
2595 serialized_sidedata = b""
2591 # Don't store the offset if the sidedata is empty, that way
2596 # Don't store the offset if the sidedata is empty, that way
2592 # we can easily detect empty sidedata and they will be no different
2597 # we can easily detect empty sidedata and they will be no different
2593 # than ones we manually add.
2598 # than ones we manually add.
2594 sidedata_offset = 0
2599 sidedata_offset = 0
2595
2600
2596 e = (
2601 e = (
2597 offset_type(offset, flags),
2602 offset_type(offset, flags),
2598 deltainfo.deltalen,
2603 deltainfo.deltalen,
2599 textlen,
2604 textlen,
2600 deltainfo.base,
2605 deltainfo.base,
2601 link,
2606 link,
2602 p1r,
2607 p1r,
2603 p2r,
2608 p2r,
2604 node,
2609 node,
2605 sidedata_offset,
2610 sidedata_offset,
2606 len(serialized_sidedata),
2611 len(serialized_sidedata),
2607 compression_mode,
2612 compression_mode,
2608 sidedata_compression_mode,
2613 sidedata_compression_mode,
2609 )
2614 )
2610
2615
2611 self.index.append(e)
2616 self.index.append(e)
2612 entry = self.index.entry_binary(curr)
2617 entry = self.index.entry_binary(curr)
2613 if curr == 0 and self._docket is None:
2618 if curr == 0 and self._docket is None:
2614 header = self._format_flags | self._format_version
2619 header = self._format_flags | self._format_version
2615 header = self.index.pack_header(header)
2620 header = self.index.pack_header(header)
2616 entry = header + entry
2621 entry = header + entry
2617 self._writeentry(
2622 self._writeentry(
2618 transaction,
2623 transaction,
2619 entry,
2624 entry,
2620 deltainfo.data,
2625 deltainfo.data,
2621 link,
2626 link,
2622 offset,
2627 offset,
2623 serialized_sidedata,
2628 serialized_sidedata,
2624 )
2629 )
2625
2630
2626 rawtext = btext[0]
2631 rawtext = btext[0]
2627
2632
2628 if alwayscache and rawtext is None:
2633 if alwayscache and rawtext is None:
2629 rawtext = deltacomputer.buildtext(revinfo, fh)
2634 rawtext = deltacomputer.buildtext(revinfo, fh)
2630
2635
2631 if type(rawtext) == bytes: # only accept immutable objects
2636 if type(rawtext) == bytes: # only accept immutable objects
2632 self._revisioncache = (node, curr, rawtext)
2637 self._revisioncache = (node, curr, rawtext)
2633 self._chainbasecache[curr] = deltainfo.chainbase
2638 self._chainbasecache[curr] = deltainfo.chainbase
2634 return curr
2639 return curr
2635
2640
2636 def _get_data_offset(self, prev):
2641 def _get_data_offset(self, prev):
2637 """Returns the current offset in the (in-transaction) data file.
2642 """Returns the current offset in the (in-transaction) data file.
2638 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2643 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2639 file to store that information: since sidedata can be rewritten to the
2644 file to store that information: since sidedata can be rewritten to the
2640 end of the data file within a transaction, you can have cases where, for
2645 end of the data file within a transaction, you can have cases where, for
2641 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2646 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2642 to `n - 1`'s sidedata being written after `n`'s data.
2647 to `n - 1`'s sidedata being written after `n`'s data.
2643
2648
2644 TODO cache this in a docket file before getting out of experimental."""
2649 TODO cache this in a docket file before getting out of experimental."""
2645 if self._docket is None:
2650 if self._docket is None:
2646 return self.end(prev)
2651 return self.end(prev)
2647 else:
2652 else:
2648 return self._docket.data_end
2653 return self._docket.data_end
2649
2654
2650 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2655 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2651 # Files opened in a+ mode have inconsistent behavior on various
2656 # Files opened in a+ mode have inconsistent behavior on various
2652 # platforms. Windows requires that a file positioning call be made
2657 # platforms. Windows requires that a file positioning call be made
2653 # when the file handle transitions between reads and writes. See
2658 # when the file handle transitions between reads and writes. See
2654 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2659 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2655 # platforms, Python or the platform itself can be buggy. Some versions
2660 # platforms, Python or the platform itself can be buggy. Some versions
2656 # of Solaris have been observed to not append at the end of the file
2661 # of Solaris have been observed to not append at the end of the file
2657 # if the file was seeked to before the end. See issue4943 for more.
2662 # if the file was seeked to before the end. See issue4943 for more.
2658 #
2663 #
2659 # We work around this issue by inserting a seek() before writing.
2664 # We work around this issue by inserting a seek() before writing.
2660 # Note: This is likely not necessary on Python 3. However, because
2665 # Note: This is likely not necessary on Python 3. However, because
2661 # the file handle is reused for reads and may be seeked there, we need
2666 # the file handle is reused for reads and may be seeked there, we need
2662 # to be careful before changing this.
2667 # to be careful before changing this.
2663 if self._writinghandles is None:
2668 if self._writinghandles is None:
2664 msg = b'adding revision outside `revlog._writing` context'
2669 msg = b'adding revision outside `revlog._writing` context'
2665 raise error.ProgrammingError(msg)
2670 raise error.ProgrammingError(msg)
2666 ifh, dfh = self._writinghandles
2671 ifh, dfh = self._writinghandles
2667 if self._docket is None:
2672 if self._docket is None:
2668 ifh.seek(0, os.SEEK_END)
2673 ifh.seek(0, os.SEEK_END)
2669 else:
2674 else:
2670 ifh.seek(self._docket.index_end, os.SEEK_SET)
2675 ifh.seek(self._docket.index_end, os.SEEK_SET)
2671 if dfh:
2676 if dfh:
2672 if self._docket is None:
2677 if self._docket is None:
2673 dfh.seek(0, os.SEEK_END)
2678 dfh.seek(0, os.SEEK_END)
2674 else:
2679 else:
2675 dfh.seek(self._docket.data_end, os.SEEK_SET)
2680 dfh.seek(self._docket.data_end, os.SEEK_SET)
2676
2681
2677 curr = len(self) - 1
2682 curr = len(self) - 1
2678 if not self._inline:
2683 if not self._inline:
2679 transaction.add(self._datafile, offset)
2684 transaction.add(self._datafile, offset)
2680 transaction.add(self._indexfile, curr * len(entry))
2685 transaction.add(self._indexfile, curr * len(entry))
2681 if data[0]:
2686 if data[0]:
2682 dfh.write(data[0])
2687 dfh.write(data[0])
2683 dfh.write(data[1])
2688 dfh.write(data[1])
2684 if sidedata:
2689 if sidedata:
2685 dfh.write(sidedata)
2690 dfh.write(sidedata)
2686 ifh.write(entry)
2691 ifh.write(entry)
2687 else:
2692 else:
2688 offset += curr * self.index.entry_size
2693 offset += curr * self.index.entry_size
2689 transaction.add(self._indexfile, offset)
2694 transaction.add(self._indexfile, offset)
2690 ifh.write(entry)
2695 ifh.write(entry)
2691 ifh.write(data[0])
2696 ifh.write(data[0])
2692 ifh.write(data[1])
2697 ifh.write(data[1])
2693 if sidedata:
2698 if sidedata:
2694 ifh.write(sidedata)
2699 ifh.write(sidedata)
2695 self._enforceinlinesize(transaction)
2700 self._enforceinlinesize(transaction)
2696 if self._docket is not None:
2701 if self._docket is not None:
2697 self._docket.index_end = self._writinghandles[0].tell()
2702 self._docket.index_end = self._writinghandles[0].tell()
2698 self._docket.data_end = self._writinghandles[1].tell()
2703 self._docket.data_end = self._writinghandles[1].tell()
2699
2704
2700 nodemaputil.setup_persistent_nodemap(transaction, self)
2705 nodemaputil.setup_persistent_nodemap(transaction, self)
2701
2706
2702 def addgroup(
2707 def addgroup(
2703 self,
2708 self,
2704 deltas,
2709 deltas,
2705 linkmapper,
2710 linkmapper,
2706 transaction,
2711 transaction,
2707 alwayscache=False,
2712 alwayscache=False,
2708 addrevisioncb=None,
2713 addrevisioncb=None,
2709 duplicaterevisioncb=None,
2714 duplicaterevisioncb=None,
2710 ):
2715 ):
2711 """
2716 """
2712 add a delta group
2717 add a delta group
2713
2718
2714 given a set of deltas, add them to the revision log. the
2719 given a set of deltas, add them to the revision log. the
2715 first delta is against its parent, which should be in our
2720 first delta is against its parent, which should be in our
2716 log, the rest are against the previous delta.
2721 log, the rest are against the previous delta.
2717
2722
2718 If ``addrevisioncb`` is defined, it will be called with arguments of
2723 If ``addrevisioncb`` is defined, it will be called with arguments of
2719 this revlog and the node that was added.
2724 this revlog and the node that was added.
2720 """
2725 """
2721
2726
2722 if self._adding_group:
2727 if self._adding_group:
2723 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2728 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2724
2729
2725 self._adding_group = True
2730 self._adding_group = True
2726 empty = True
2731 empty = True
2727 try:
2732 try:
2728 with self._writing(transaction):
2733 with self._writing(transaction):
2729 deltacomputer = deltautil.deltacomputer(self)
2734 deltacomputer = deltautil.deltacomputer(self)
2730 # loop through our set of deltas
2735 # loop through our set of deltas
2731 for data in deltas:
2736 for data in deltas:
2732 (
2737 (
2733 node,
2738 node,
2734 p1,
2739 p1,
2735 p2,
2740 p2,
2736 linknode,
2741 linknode,
2737 deltabase,
2742 deltabase,
2738 delta,
2743 delta,
2739 flags,
2744 flags,
2740 sidedata,
2745 sidedata,
2741 ) = data
2746 ) = data
2742 link = linkmapper(linknode)
2747 link = linkmapper(linknode)
2743 flags = flags or REVIDX_DEFAULT_FLAGS
2748 flags = flags or REVIDX_DEFAULT_FLAGS
2744
2749
2745 rev = self.index.get_rev(node)
2750 rev = self.index.get_rev(node)
2746 if rev is not None:
2751 if rev is not None:
2747 # this can happen if two branches make the same change
2752 # this can happen if two branches make the same change
2748 self._nodeduplicatecallback(transaction, rev)
2753 self._nodeduplicatecallback(transaction, rev)
2749 if duplicaterevisioncb:
2754 if duplicaterevisioncb:
2750 duplicaterevisioncb(self, rev)
2755 duplicaterevisioncb(self, rev)
2751 empty = False
2756 empty = False
2752 continue
2757 continue
2753
2758
2754 for p in (p1, p2):
2759 for p in (p1, p2):
2755 if not self.index.has_node(p):
2760 if not self.index.has_node(p):
2756 raise error.LookupError(
2761 raise error.LookupError(
2757 p, self.radix, _(b'unknown parent')
2762 p, self.radix, _(b'unknown parent')
2758 )
2763 )
2759
2764
2760 if not self.index.has_node(deltabase):
2765 if not self.index.has_node(deltabase):
2761 raise error.LookupError(
2766 raise error.LookupError(
2762 deltabase, self.display_id, _(b'unknown delta base')
2767 deltabase, self.display_id, _(b'unknown delta base')
2763 )
2768 )
2764
2769
2765 baserev = self.rev(deltabase)
2770 baserev = self.rev(deltabase)
2766
2771
2767 if baserev != nullrev and self.iscensored(baserev):
2772 if baserev != nullrev and self.iscensored(baserev):
2768 # if base is censored, delta must be full replacement in a
2773 # if base is censored, delta must be full replacement in a
2769 # single patch operation
2774 # single patch operation
2770 hlen = struct.calcsize(b">lll")
2775 hlen = struct.calcsize(b">lll")
2771 oldlen = self.rawsize(baserev)
2776 oldlen = self.rawsize(baserev)
2772 newlen = len(delta) - hlen
2777 newlen = len(delta) - hlen
2773 if delta[:hlen] != mdiff.replacediffheader(
2778 if delta[:hlen] != mdiff.replacediffheader(
2774 oldlen, newlen
2779 oldlen, newlen
2775 ):
2780 ):
2776 raise error.CensoredBaseError(
2781 raise error.CensoredBaseError(
2777 self.display_id, self.node(baserev)
2782 self.display_id, self.node(baserev)
2778 )
2783 )
2779
2784
2780 if not flags and self._peek_iscensored(baserev, delta):
2785 if not flags and self._peek_iscensored(baserev, delta):
2781 flags |= REVIDX_ISCENSORED
2786 flags |= REVIDX_ISCENSORED
2782
2787
2783 # We assume consumers of addrevisioncb will want to retrieve
2788 # We assume consumers of addrevisioncb will want to retrieve
2784 # the added revision, which will require a call to
2789 # the added revision, which will require a call to
2785 # revision(). revision() will fast path if there is a cache
2790 # revision(). revision() will fast path if there is a cache
2786 # hit. So, we tell _addrevision() to always cache in this case.
2791 # hit. So, we tell _addrevision() to always cache in this case.
2787 # We're only using addgroup() in the context of changegroup
2792 # We're only using addgroup() in the context of changegroup
2788 # generation so the revision data can always be handled as raw
2793 # generation so the revision data can always be handled as raw
2789 # by the flagprocessor.
2794 # by the flagprocessor.
2790 rev = self._addrevision(
2795 rev = self._addrevision(
2791 node,
2796 node,
2792 None,
2797 None,
2793 transaction,
2798 transaction,
2794 link,
2799 link,
2795 p1,
2800 p1,
2796 p2,
2801 p2,
2797 flags,
2802 flags,
2798 (baserev, delta),
2803 (baserev, delta),
2799 alwayscache=alwayscache,
2804 alwayscache=alwayscache,
2800 deltacomputer=deltacomputer,
2805 deltacomputer=deltacomputer,
2801 sidedata=sidedata,
2806 sidedata=sidedata,
2802 )
2807 )
2803
2808
2804 if addrevisioncb:
2809 if addrevisioncb:
2805 addrevisioncb(self, rev)
2810 addrevisioncb(self, rev)
2806 empty = False
2811 empty = False
2807 finally:
2812 finally:
2808 self._adding_group = False
2813 self._adding_group = False
2809 return not empty
2814 return not empty
2810
2815
2811 def iscensored(self, rev):
2816 def iscensored(self, rev):
2812 """Check if a file revision is censored."""
2817 """Check if a file revision is censored."""
2813 if not self._censorable:
2818 if not self._censorable:
2814 return False
2819 return False
2815
2820
2816 return self.flags(rev) & REVIDX_ISCENSORED
2821 return self.flags(rev) & REVIDX_ISCENSORED
2817
2822
2818 def _peek_iscensored(self, baserev, delta):
2823 def _peek_iscensored(self, baserev, delta):
2819 """Quickly check if a delta produces a censored revision."""
2824 """Quickly check if a delta produces a censored revision."""
2820 if not self._censorable:
2825 if not self._censorable:
2821 return False
2826 return False
2822
2827
2823 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2828 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2824
2829
2825 def getstrippoint(self, minlink):
2830 def getstrippoint(self, minlink):
2826 """find the minimum rev that must be stripped to strip the linkrev
2831 """find the minimum rev that must be stripped to strip the linkrev
2827
2832
2828 Returns a tuple containing the minimum rev and a set of all revs that
2833 Returns a tuple containing the minimum rev and a set of all revs that
2829 have linkrevs that will be broken by this strip.
2834 have linkrevs that will be broken by this strip.
2830 """
2835 """
2831 return storageutil.resolvestripinfo(
2836 return storageutil.resolvestripinfo(
2832 minlink,
2837 minlink,
2833 len(self) - 1,
2838 len(self) - 1,
2834 self.headrevs(),
2839 self.headrevs(),
2835 self.linkrev,
2840 self.linkrev,
2836 self.parentrevs,
2841 self.parentrevs,
2837 )
2842 )
2838
2843
2839 def strip(self, minlink, transaction):
2844 def strip(self, minlink, transaction):
2840 """truncate the revlog on the first revision with a linkrev >= minlink
2845 """truncate the revlog on the first revision with a linkrev >= minlink
2841
2846
2842 This function is called when we're stripping revision minlink and
2847 This function is called when we're stripping revision minlink and
2843 its descendants from the repository.
2848 its descendants from the repository.
2844
2849
2845 We have to remove all revisions with linkrev >= minlink, because
2850 We have to remove all revisions with linkrev >= minlink, because
2846 the equivalent changelog revisions will be renumbered after the
2851 the equivalent changelog revisions will be renumbered after the
2847 strip.
2852 strip.
2848
2853
2849 So we truncate the revlog on the first of these revisions, and
2854 So we truncate the revlog on the first of these revisions, and
2850 trust that the caller has saved the revisions that shouldn't be
2855 trust that the caller has saved the revisions that shouldn't be
2851 removed and that it'll re-add them after this truncation.
2856 removed and that it'll re-add them after this truncation.
2852 """
2857 """
2853 if len(self) == 0:
2858 if len(self) == 0:
2854 return
2859 return
2855
2860
2856 rev, _ = self.getstrippoint(minlink)
2861 rev, _ = self.getstrippoint(minlink)
2857 if rev == len(self):
2862 if rev == len(self):
2858 return
2863 return
2859
2864
2860 # first truncate the files on disk
2865 # first truncate the files on disk
2861 data_end = self.start(rev)
2866 data_end = self.start(rev)
2862 if not self._inline:
2867 if not self._inline:
2863 transaction.add(self._datafile, data_end)
2868 transaction.add(self._datafile, data_end)
2864 end = rev * self.index.entry_size
2869 end = rev * self.index.entry_size
2865 else:
2870 else:
2866 end = data_end + (rev * self.index.entry_size)
2871 end = data_end + (rev * self.index.entry_size)
2867
2872
2868 transaction.add(self._indexfile, end)
2873 transaction.add(self._indexfile, end)
2869 if self._docket is not None:
2874 if self._docket is not None:
2870 # XXX we could, leverage the docket while stripping. However it is
2875 # XXX we could, leverage the docket while stripping. However it is
2871 # not powerfull enough at the time of this comment
2876 # not powerfull enough at the time of this comment
2872 self._docket.index_end = end
2877 self._docket.index_end = end
2873 self._docket.data_end = data_end
2878 self._docket.data_end = data_end
2874 self._docket.write(transaction, stripping=True)
2879 self._docket.write(transaction, stripping=True)
2875
2880
2876 # then reset internal state in memory to forget those revisions
2881 # then reset internal state in memory to forget those revisions
2877 self._revisioncache = None
2882 self._revisioncache = None
2878 self._chaininfocache = util.lrucachedict(500)
2883 self._chaininfocache = util.lrucachedict(500)
2879 self._chunkclear()
2884 self._chunkclear()
2880
2885
2881 del self.index[rev:-1]
2886 del self.index[rev:-1]
2882
2887
2883 def checksize(self):
2888 def checksize(self):
2884 """Check size of index and data files
2889 """Check size of index and data files
2885
2890
2886 return a (dd, di) tuple.
2891 return a (dd, di) tuple.
2887 - dd: extra bytes for the "data" file
2892 - dd: extra bytes for the "data" file
2888 - di: extra bytes for the "index" file
2893 - di: extra bytes for the "index" file
2889
2894
2890 A healthy revlog will return (0, 0).
2895 A healthy revlog will return (0, 0).
2891 """
2896 """
2892 expected = 0
2897 expected = 0
2893 if len(self):
2898 if len(self):
2894 expected = max(0, self.end(len(self) - 1))
2899 expected = max(0, self.end(len(self) - 1))
2895
2900
2896 try:
2901 try:
2897 with self._datafp() as f:
2902 with self._datafp() as f:
2898 f.seek(0, io.SEEK_END)
2903 f.seek(0, io.SEEK_END)
2899 actual = f.tell()
2904 actual = f.tell()
2900 dd = actual - expected
2905 dd = actual - expected
2901 except IOError as inst:
2906 except IOError as inst:
2902 if inst.errno != errno.ENOENT:
2907 if inst.errno != errno.ENOENT:
2903 raise
2908 raise
2904 dd = 0
2909 dd = 0
2905
2910
2906 try:
2911 try:
2907 f = self.opener(self._indexfile)
2912 f = self.opener(self._indexfile)
2908 f.seek(0, io.SEEK_END)
2913 f.seek(0, io.SEEK_END)
2909 actual = f.tell()
2914 actual = f.tell()
2910 f.close()
2915 f.close()
2911 s = self.index.entry_size
2916 s = self.index.entry_size
2912 i = max(0, actual // s)
2917 i = max(0, actual // s)
2913 di = actual - (i * s)
2918 di = actual - (i * s)
2914 if self._inline:
2919 if self._inline:
2915 databytes = 0
2920 databytes = 0
2916 for r in self:
2921 for r in self:
2917 databytes += max(0, self.length(r))
2922 databytes += max(0, self.length(r))
2918 dd = 0
2923 dd = 0
2919 di = actual - len(self) * s - databytes
2924 di = actual - len(self) * s - databytes
2920 except IOError as inst:
2925 except IOError as inst:
2921 if inst.errno != errno.ENOENT:
2926 if inst.errno != errno.ENOENT:
2922 raise
2927 raise
2923 di = 0
2928 di = 0
2924
2929
2925 return (dd, di)
2930 return (dd, di)
2926
2931
2927 def files(self):
2932 def files(self):
2928 res = [self._indexfile]
2933 res = [self._indexfile]
2929 if not self._inline:
2934 if not self._inline:
2930 res.append(self._datafile)
2935 res.append(self._datafile)
2931 return res
2936 return res
2932
2937
2933 def emitrevisions(
2938 def emitrevisions(
2934 self,
2939 self,
2935 nodes,
2940 nodes,
2936 nodesorder=None,
2941 nodesorder=None,
2937 revisiondata=False,
2942 revisiondata=False,
2938 assumehaveparentrevisions=False,
2943 assumehaveparentrevisions=False,
2939 deltamode=repository.CG_DELTAMODE_STD,
2944 deltamode=repository.CG_DELTAMODE_STD,
2940 sidedata_helpers=None,
2945 sidedata_helpers=None,
2941 ):
2946 ):
2942 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2947 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2943 raise error.ProgrammingError(
2948 raise error.ProgrammingError(
2944 b'unhandled value for nodesorder: %s' % nodesorder
2949 b'unhandled value for nodesorder: %s' % nodesorder
2945 )
2950 )
2946
2951
2947 if nodesorder is None and not self._generaldelta:
2952 if nodesorder is None and not self._generaldelta:
2948 nodesorder = b'storage'
2953 nodesorder = b'storage'
2949
2954
2950 if (
2955 if (
2951 not self._storedeltachains
2956 not self._storedeltachains
2952 and deltamode != repository.CG_DELTAMODE_PREV
2957 and deltamode != repository.CG_DELTAMODE_PREV
2953 ):
2958 ):
2954 deltamode = repository.CG_DELTAMODE_FULL
2959 deltamode = repository.CG_DELTAMODE_FULL
2955
2960
2956 return storageutil.emitrevisions(
2961 return storageutil.emitrevisions(
2957 self,
2962 self,
2958 nodes,
2963 nodes,
2959 nodesorder,
2964 nodesorder,
2960 revlogrevisiondelta,
2965 revlogrevisiondelta,
2961 deltaparentfn=self.deltaparent,
2966 deltaparentfn=self.deltaparent,
2962 candeltafn=self.candelta,
2967 candeltafn=self.candelta,
2963 rawsizefn=self.rawsize,
2968 rawsizefn=self.rawsize,
2964 revdifffn=self.revdiff,
2969 revdifffn=self.revdiff,
2965 flagsfn=self.flags,
2970 flagsfn=self.flags,
2966 deltamode=deltamode,
2971 deltamode=deltamode,
2967 revisiondata=revisiondata,
2972 revisiondata=revisiondata,
2968 assumehaveparentrevisions=assumehaveparentrevisions,
2973 assumehaveparentrevisions=assumehaveparentrevisions,
2969 sidedata_helpers=sidedata_helpers,
2974 sidedata_helpers=sidedata_helpers,
2970 )
2975 )
2971
2976
2972 DELTAREUSEALWAYS = b'always'
2977 DELTAREUSEALWAYS = b'always'
2973 DELTAREUSESAMEREVS = b'samerevs'
2978 DELTAREUSESAMEREVS = b'samerevs'
2974 DELTAREUSENEVER = b'never'
2979 DELTAREUSENEVER = b'never'
2975
2980
2976 DELTAREUSEFULLADD = b'fulladd'
2981 DELTAREUSEFULLADD = b'fulladd'
2977
2982
2978 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2983 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2979
2984
2980 def clone(
2985 def clone(
2981 self,
2986 self,
2982 tr,
2987 tr,
2983 destrevlog,
2988 destrevlog,
2984 addrevisioncb=None,
2989 addrevisioncb=None,
2985 deltareuse=DELTAREUSESAMEREVS,
2990 deltareuse=DELTAREUSESAMEREVS,
2986 forcedeltabothparents=None,
2991 forcedeltabothparents=None,
2987 sidedata_helpers=None,
2992 sidedata_helpers=None,
2988 ):
2993 ):
2989 """Copy this revlog to another, possibly with format changes.
2994 """Copy this revlog to another, possibly with format changes.
2990
2995
2991 The destination revlog will contain the same revisions and nodes.
2996 The destination revlog will contain the same revisions and nodes.
2992 However, it may not be bit-for-bit identical due to e.g. delta encoding
2997 However, it may not be bit-for-bit identical due to e.g. delta encoding
2993 differences.
2998 differences.
2994
2999
2995 The ``deltareuse`` argument control how deltas from the existing revlog
3000 The ``deltareuse`` argument control how deltas from the existing revlog
2996 are preserved in the destination revlog. The argument can have the
3001 are preserved in the destination revlog. The argument can have the
2997 following values:
3002 following values:
2998
3003
2999 DELTAREUSEALWAYS
3004 DELTAREUSEALWAYS
3000 Deltas will always be reused (if possible), even if the destination
3005 Deltas will always be reused (if possible), even if the destination
3001 revlog would not select the same revisions for the delta. This is the
3006 revlog would not select the same revisions for the delta. This is the
3002 fastest mode of operation.
3007 fastest mode of operation.
3003 DELTAREUSESAMEREVS
3008 DELTAREUSESAMEREVS
3004 Deltas will be reused if the destination revlog would pick the same
3009 Deltas will be reused if the destination revlog would pick the same
3005 revisions for the delta. This mode strikes a balance between speed
3010 revisions for the delta. This mode strikes a balance between speed
3006 and optimization.
3011 and optimization.
3007 DELTAREUSENEVER
3012 DELTAREUSENEVER
3008 Deltas will never be reused. This is the slowest mode of execution.
3013 Deltas will never be reused. This is the slowest mode of execution.
3009 This mode can be used to recompute deltas (e.g. if the diff/delta
3014 This mode can be used to recompute deltas (e.g. if the diff/delta
3010 algorithm changes).
3015 algorithm changes).
3011 DELTAREUSEFULLADD
3016 DELTAREUSEFULLADD
3012 Revision will be re-added as if their were new content. This is
3017 Revision will be re-added as if their were new content. This is
3013 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3018 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3014 eg: large file detection and handling.
3019 eg: large file detection and handling.
3015
3020
3016 Delta computation can be slow, so the choice of delta reuse policy can
3021 Delta computation can be slow, so the choice of delta reuse policy can
3017 significantly affect run time.
3022 significantly affect run time.
3018
3023
3019 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3024 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3020 two extremes. Deltas will be reused if they are appropriate. But if the
3025 two extremes. Deltas will be reused if they are appropriate. But if the
3021 delta could choose a better revision, it will do so. This means if you
3026 delta could choose a better revision, it will do so. This means if you
3022 are converting a non-generaldelta revlog to a generaldelta revlog,
3027 are converting a non-generaldelta revlog to a generaldelta revlog,
3023 deltas will be recomputed if the delta's parent isn't a parent of the
3028 deltas will be recomputed if the delta's parent isn't a parent of the
3024 revision.
3029 revision.
3025
3030
3026 In addition to the delta policy, the ``forcedeltabothparents``
3031 In addition to the delta policy, the ``forcedeltabothparents``
3027 argument controls whether to force compute deltas against both parents
3032 argument controls whether to force compute deltas against both parents
3028 for merges. By default, the current default is used.
3033 for merges. By default, the current default is used.
3029
3034
3030 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3035 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3031 `sidedata_helpers`.
3036 `sidedata_helpers`.
3032 """
3037 """
3033 if deltareuse not in self.DELTAREUSEALL:
3038 if deltareuse not in self.DELTAREUSEALL:
3034 raise ValueError(
3039 raise ValueError(
3035 _(b'value for deltareuse invalid: %s') % deltareuse
3040 _(b'value for deltareuse invalid: %s') % deltareuse
3036 )
3041 )
3037
3042
3038 if len(destrevlog):
3043 if len(destrevlog):
3039 raise ValueError(_(b'destination revlog is not empty'))
3044 raise ValueError(_(b'destination revlog is not empty'))
3040
3045
3041 if getattr(self, 'filteredrevs', None):
3046 if getattr(self, 'filteredrevs', None):
3042 raise ValueError(_(b'source revlog has filtered revisions'))
3047 raise ValueError(_(b'source revlog has filtered revisions'))
3043 if getattr(destrevlog, 'filteredrevs', None):
3048 if getattr(destrevlog, 'filteredrevs', None):
3044 raise ValueError(_(b'destination revlog has filtered revisions'))
3049 raise ValueError(_(b'destination revlog has filtered revisions'))
3045
3050
3046 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3051 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3047 # if possible.
3052 # if possible.
3048 oldlazydelta = destrevlog._lazydelta
3053 oldlazydelta = destrevlog._lazydelta
3049 oldlazydeltabase = destrevlog._lazydeltabase
3054 oldlazydeltabase = destrevlog._lazydeltabase
3050 oldamd = destrevlog._deltabothparents
3055 oldamd = destrevlog._deltabothparents
3051
3056
3052 try:
3057 try:
3053 if deltareuse == self.DELTAREUSEALWAYS:
3058 if deltareuse == self.DELTAREUSEALWAYS:
3054 destrevlog._lazydeltabase = True
3059 destrevlog._lazydeltabase = True
3055 destrevlog._lazydelta = True
3060 destrevlog._lazydelta = True
3056 elif deltareuse == self.DELTAREUSESAMEREVS:
3061 elif deltareuse == self.DELTAREUSESAMEREVS:
3057 destrevlog._lazydeltabase = False
3062 destrevlog._lazydeltabase = False
3058 destrevlog._lazydelta = True
3063 destrevlog._lazydelta = True
3059 elif deltareuse == self.DELTAREUSENEVER:
3064 elif deltareuse == self.DELTAREUSENEVER:
3060 destrevlog._lazydeltabase = False
3065 destrevlog._lazydeltabase = False
3061 destrevlog._lazydelta = False
3066 destrevlog._lazydelta = False
3062
3067
3063 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3068 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3064
3069
3065 self._clone(
3070 self._clone(
3066 tr,
3071 tr,
3067 destrevlog,
3072 destrevlog,
3068 addrevisioncb,
3073 addrevisioncb,
3069 deltareuse,
3074 deltareuse,
3070 forcedeltabothparents,
3075 forcedeltabothparents,
3071 sidedata_helpers,
3076 sidedata_helpers,
3072 )
3077 )
3073
3078
3074 finally:
3079 finally:
3075 destrevlog._lazydelta = oldlazydelta
3080 destrevlog._lazydelta = oldlazydelta
3076 destrevlog._lazydeltabase = oldlazydeltabase
3081 destrevlog._lazydeltabase = oldlazydeltabase
3077 destrevlog._deltabothparents = oldamd
3082 destrevlog._deltabothparents = oldamd
3078
3083
3079 def _clone(
3084 def _clone(
3080 self,
3085 self,
3081 tr,
3086 tr,
3082 destrevlog,
3087 destrevlog,
3083 addrevisioncb,
3088 addrevisioncb,
3084 deltareuse,
3089 deltareuse,
3085 forcedeltabothparents,
3090 forcedeltabothparents,
3086 sidedata_helpers,
3091 sidedata_helpers,
3087 ):
3092 ):
3088 """perform the core duty of `revlog.clone` after parameter processing"""
3093 """perform the core duty of `revlog.clone` after parameter processing"""
3089 deltacomputer = deltautil.deltacomputer(destrevlog)
3094 deltacomputer = deltautil.deltacomputer(destrevlog)
3090 index = self.index
3095 index = self.index
3091 for rev in self:
3096 for rev in self:
3092 entry = index[rev]
3097 entry = index[rev]
3093
3098
3094 # Some classes override linkrev to take filtered revs into
3099 # Some classes override linkrev to take filtered revs into
3095 # account. Use raw entry from index.
3100 # account. Use raw entry from index.
3096 flags = entry[0] & 0xFFFF
3101 flags = entry[0] & 0xFFFF
3097 linkrev = entry[4]
3102 linkrev = entry[4]
3098 p1 = index[entry[5]][7]
3103 p1 = index[entry[5]][7]
3099 p2 = index[entry[6]][7]
3104 p2 = index[entry[6]][7]
3100 node = entry[7]
3105 node = entry[7]
3101
3106
3102 # (Possibly) reuse the delta from the revlog if allowed and
3107 # (Possibly) reuse the delta from the revlog if allowed and
3103 # the revlog chunk is a delta.
3108 # the revlog chunk is a delta.
3104 cachedelta = None
3109 cachedelta = None
3105 rawtext = None
3110 rawtext = None
3106 if deltareuse == self.DELTAREUSEFULLADD:
3111 if deltareuse == self.DELTAREUSEFULLADD:
3107 text, sidedata = self._revisiondata(rev)
3112 text, sidedata = self._revisiondata(rev)
3108
3113
3109 if sidedata_helpers is not None:
3114 if sidedata_helpers is not None:
3110 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3115 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3111 self, sidedata_helpers, sidedata, rev
3116 self, sidedata_helpers, sidedata, rev
3112 )
3117 )
3113 flags = flags | new_flags[0] & ~new_flags[1]
3118 flags = flags | new_flags[0] & ~new_flags[1]
3114
3119
3115 destrevlog.addrevision(
3120 destrevlog.addrevision(
3116 text,
3121 text,
3117 tr,
3122 tr,
3118 linkrev,
3123 linkrev,
3119 p1,
3124 p1,
3120 p2,
3125 p2,
3121 cachedelta=cachedelta,
3126 cachedelta=cachedelta,
3122 node=node,
3127 node=node,
3123 flags=flags,
3128 flags=flags,
3124 deltacomputer=deltacomputer,
3129 deltacomputer=deltacomputer,
3125 sidedata=sidedata,
3130 sidedata=sidedata,
3126 )
3131 )
3127 else:
3132 else:
3128 if destrevlog._lazydelta:
3133 if destrevlog._lazydelta:
3129 dp = self.deltaparent(rev)
3134 dp = self.deltaparent(rev)
3130 if dp != nullrev:
3135 if dp != nullrev:
3131 cachedelta = (dp, bytes(self._chunk(rev)))
3136 cachedelta = (dp, bytes(self._chunk(rev)))
3132
3137
3133 sidedata = None
3138 sidedata = None
3134 if not cachedelta:
3139 if not cachedelta:
3135 rawtext, sidedata = self._revisiondata(rev)
3140 rawtext, sidedata = self._revisiondata(rev)
3136 if sidedata is None:
3141 if sidedata is None:
3137 sidedata = self.sidedata(rev)
3142 sidedata = self.sidedata(rev)
3138
3143
3139 if sidedata_helpers is not None:
3144 if sidedata_helpers is not None:
3140 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3145 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3141 self, sidedata_helpers, sidedata, rev
3146 self, sidedata_helpers, sidedata, rev
3142 )
3147 )
3143 flags = flags | new_flags[0] & ~new_flags[1]
3148 flags = flags | new_flags[0] & ~new_flags[1]
3144
3149
3145 with destrevlog._writing(tr):
3150 with destrevlog._writing(tr):
3146 destrevlog._addrevision(
3151 destrevlog._addrevision(
3147 node,
3152 node,
3148 rawtext,
3153 rawtext,
3149 tr,
3154 tr,
3150 linkrev,
3155 linkrev,
3151 p1,
3156 p1,
3152 p2,
3157 p2,
3153 flags,
3158 flags,
3154 cachedelta,
3159 cachedelta,
3155 deltacomputer=deltacomputer,
3160 deltacomputer=deltacomputer,
3156 sidedata=sidedata,
3161 sidedata=sidedata,
3157 )
3162 )
3158
3163
3159 if addrevisioncb:
3164 if addrevisioncb:
3160 addrevisioncb(self, rev, node)
3165 addrevisioncb(self, rev, node)
3161
3166
3162 def censorrevision(self, tr, censornode, tombstone=b''):
3167 def censorrevision(self, tr, censornode, tombstone=b''):
3163 if self._format_version == REVLOGV0:
3168 if self._format_version == REVLOGV0:
3164 raise error.RevlogError(
3169 raise error.RevlogError(
3165 _(b'cannot censor with version %d revlogs')
3170 _(b'cannot censor with version %d revlogs')
3166 % self._format_version
3171 % self._format_version
3167 )
3172 )
3168
3173
3169 censorrev = self.rev(censornode)
3174 censorrev = self.rev(censornode)
3170 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3175 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3171
3176
3172 if len(tombstone) > self.rawsize(censorrev):
3177 if len(tombstone) > self.rawsize(censorrev):
3173 raise error.Abort(
3178 raise error.Abort(
3174 _(b'censor tombstone must be no longer than censored data')
3179 _(b'censor tombstone must be no longer than censored data')
3175 )
3180 )
3176
3181
3177 # Rewriting the revlog in place is hard. Our strategy for censoring is
3182 # Rewriting the revlog in place is hard. Our strategy for censoring is
3178 # to create a new revlog, copy all revisions to it, then replace the
3183 # to create a new revlog, copy all revisions to it, then replace the
3179 # revlogs on transaction close.
3184 # revlogs on transaction close.
3180 #
3185 #
3181 # This is a bit dangerous. We could easily have a mismatch of state.
3186 # This is a bit dangerous. We could easily have a mismatch of state.
3182 newrl = revlog(
3187 newrl = revlog(
3183 self.opener,
3188 self.opener,
3184 target=self.target,
3189 target=self.target,
3185 radix=self.radix,
3190 radix=self.radix,
3186 postfix=b'tmpcensored',
3191 postfix=b'tmpcensored',
3187 censorable=True,
3192 censorable=True,
3188 )
3193 )
3189 newrl._format_version = self._format_version
3194 newrl._format_version = self._format_version
3190 newrl._format_flags = self._format_flags
3195 newrl._format_flags = self._format_flags
3191 newrl._generaldelta = self._generaldelta
3196 newrl._generaldelta = self._generaldelta
3192 newrl._parse_index = self._parse_index
3197 newrl._parse_index = self._parse_index
3193
3198
3194 for rev in self.revs():
3199 for rev in self.revs():
3195 node = self.node(rev)
3200 node = self.node(rev)
3196 p1, p2 = self.parents(node)
3201 p1, p2 = self.parents(node)
3197
3202
3198 if rev == censorrev:
3203 if rev == censorrev:
3199 newrl.addrawrevision(
3204 newrl.addrawrevision(
3200 tombstone,
3205 tombstone,
3201 tr,
3206 tr,
3202 self.linkrev(censorrev),
3207 self.linkrev(censorrev),
3203 p1,
3208 p1,
3204 p2,
3209 p2,
3205 censornode,
3210 censornode,
3206 REVIDX_ISCENSORED,
3211 REVIDX_ISCENSORED,
3207 )
3212 )
3208
3213
3209 if newrl.deltaparent(rev) != nullrev:
3214 if newrl.deltaparent(rev) != nullrev:
3210 raise error.Abort(
3215 raise error.Abort(
3211 _(
3216 _(
3212 b'censored revision stored as delta; '
3217 b'censored revision stored as delta; '
3213 b'cannot censor'
3218 b'cannot censor'
3214 ),
3219 ),
3215 hint=_(
3220 hint=_(
3216 b'censoring of revlogs is not '
3221 b'censoring of revlogs is not '
3217 b'fully implemented; please report '
3222 b'fully implemented; please report '
3218 b'this bug'
3223 b'this bug'
3219 ),
3224 ),
3220 )
3225 )
3221 continue
3226 continue
3222
3227
3223 if self.iscensored(rev):
3228 if self.iscensored(rev):
3224 if self.deltaparent(rev) != nullrev:
3229 if self.deltaparent(rev) != nullrev:
3225 raise error.Abort(
3230 raise error.Abort(
3226 _(
3231 _(
3227 b'cannot censor due to censored '
3232 b'cannot censor due to censored '
3228 b'revision having delta stored'
3233 b'revision having delta stored'
3229 )
3234 )
3230 )
3235 )
3231 rawtext = self._chunk(rev)
3236 rawtext = self._chunk(rev)
3232 else:
3237 else:
3233 rawtext = self.rawdata(rev)
3238 rawtext = self.rawdata(rev)
3234
3239
3235 newrl.addrawrevision(
3240 newrl.addrawrevision(
3236 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3241 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3237 )
3242 )
3238
3243
3239 tr.addbackup(self._indexfile, location=b'store')
3244 tr.addbackup(self._indexfile, location=b'store')
3240 if not self._inline:
3245 if not self._inline:
3241 tr.addbackup(self._datafile, location=b'store')
3246 tr.addbackup(self._datafile, location=b'store')
3242
3247
3243 self.opener.rename(newrl._indexfile, self._indexfile)
3248 self.opener.rename(newrl._indexfile, self._indexfile)
3244 if not self._inline:
3249 if not self._inline:
3245 self.opener.rename(newrl._datafile, self._datafile)
3250 self.opener.rename(newrl._datafile, self._datafile)
3246
3251
3247 self.clearcaches()
3252 self.clearcaches()
3248 self._loadindex()
3253 self._loadindex()
3249
3254
3250 def verifyintegrity(self, state):
3255 def verifyintegrity(self, state):
3251 """Verifies the integrity of the revlog.
3256 """Verifies the integrity of the revlog.
3252
3257
3253 Yields ``revlogproblem`` instances describing problems that are
3258 Yields ``revlogproblem`` instances describing problems that are
3254 found.
3259 found.
3255 """
3260 """
3256 dd, di = self.checksize()
3261 dd, di = self.checksize()
3257 if dd:
3262 if dd:
3258 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3263 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3259 if di:
3264 if di:
3260 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3265 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3261
3266
3262 version = self._format_version
3267 version = self._format_version
3263
3268
3264 # The verifier tells us what version revlog we should be.
3269 # The verifier tells us what version revlog we should be.
3265 if version != state[b'expectedversion']:
3270 if version != state[b'expectedversion']:
3266 yield revlogproblem(
3271 yield revlogproblem(
3267 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3272 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3268 % (self.display_id, version, state[b'expectedversion'])
3273 % (self.display_id, version, state[b'expectedversion'])
3269 )
3274 )
3270
3275
3271 state[b'skipread'] = set()
3276 state[b'skipread'] = set()
3272 state[b'safe_renamed'] = set()
3277 state[b'safe_renamed'] = set()
3273
3278
3274 for rev in self:
3279 for rev in self:
3275 node = self.node(rev)
3280 node = self.node(rev)
3276
3281
3277 # Verify contents. 4 cases to care about:
3282 # Verify contents. 4 cases to care about:
3278 #
3283 #
3279 # common: the most common case
3284 # common: the most common case
3280 # rename: with a rename
3285 # rename: with a rename
3281 # meta: file content starts with b'\1\n', the metadata
3286 # meta: file content starts with b'\1\n', the metadata
3282 # header defined in filelog.py, but without a rename
3287 # header defined in filelog.py, but without a rename
3283 # ext: content stored externally
3288 # ext: content stored externally
3284 #
3289 #
3285 # More formally, their differences are shown below:
3290 # More formally, their differences are shown below:
3286 #
3291 #
3287 # | common | rename | meta | ext
3292 # | common | rename | meta | ext
3288 # -------------------------------------------------------
3293 # -------------------------------------------------------
3289 # flags() | 0 | 0 | 0 | not 0
3294 # flags() | 0 | 0 | 0 | not 0
3290 # renamed() | False | True | False | ?
3295 # renamed() | False | True | False | ?
3291 # rawtext[0:2]=='\1\n'| False | True | True | ?
3296 # rawtext[0:2]=='\1\n'| False | True | True | ?
3292 #
3297 #
3293 # "rawtext" means the raw text stored in revlog data, which
3298 # "rawtext" means the raw text stored in revlog data, which
3294 # could be retrieved by "rawdata(rev)". "text"
3299 # could be retrieved by "rawdata(rev)". "text"
3295 # mentioned below is "revision(rev)".
3300 # mentioned below is "revision(rev)".
3296 #
3301 #
3297 # There are 3 different lengths stored physically:
3302 # There are 3 different lengths stored physically:
3298 # 1. L1: rawsize, stored in revlog index
3303 # 1. L1: rawsize, stored in revlog index
3299 # 2. L2: len(rawtext), stored in revlog data
3304 # 2. L2: len(rawtext), stored in revlog data
3300 # 3. L3: len(text), stored in revlog data if flags==0, or
3305 # 3. L3: len(text), stored in revlog data if flags==0, or
3301 # possibly somewhere else if flags!=0
3306 # possibly somewhere else if flags!=0
3302 #
3307 #
3303 # L1 should be equal to L2. L3 could be different from them.
3308 # L1 should be equal to L2. L3 could be different from them.
3304 # "text" may or may not affect commit hash depending on flag
3309 # "text" may or may not affect commit hash depending on flag
3305 # processors (see flagutil.addflagprocessor).
3310 # processors (see flagutil.addflagprocessor).
3306 #
3311 #
3307 # | common | rename | meta | ext
3312 # | common | rename | meta | ext
3308 # -------------------------------------------------
3313 # -------------------------------------------------
3309 # rawsize() | L1 | L1 | L1 | L1
3314 # rawsize() | L1 | L1 | L1 | L1
3310 # size() | L1 | L2-LM | L1(*) | L1 (?)
3315 # size() | L1 | L2-LM | L1(*) | L1 (?)
3311 # len(rawtext) | L2 | L2 | L2 | L2
3316 # len(rawtext) | L2 | L2 | L2 | L2
3312 # len(text) | L2 | L2 | L2 | L3
3317 # len(text) | L2 | L2 | L2 | L3
3313 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3318 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3314 #
3319 #
3315 # LM: length of metadata, depending on rawtext
3320 # LM: length of metadata, depending on rawtext
3316 # (*): not ideal, see comment in filelog.size
3321 # (*): not ideal, see comment in filelog.size
3317 # (?): could be "- len(meta)" if the resolved content has
3322 # (?): could be "- len(meta)" if the resolved content has
3318 # rename metadata
3323 # rename metadata
3319 #
3324 #
3320 # Checks needed to be done:
3325 # Checks needed to be done:
3321 # 1. length check: L1 == L2, in all cases.
3326 # 1. length check: L1 == L2, in all cases.
3322 # 2. hash check: depending on flag processor, we may need to
3327 # 2. hash check: depending on flag processor, we may need to
3323 # use either "text" (external), or "rawtext" (in revlog).
3328 # use either "text" (external), or "rawtext" (in revlog).
3324
3329
3325 try:
3330 try:
3326 skipflags = state.get(b'skipflags', 0)
3331 skipflags = state.get(b'skipflags', 0)
3327 if skipflags:
3332 if skipflags:
3328 skipflags &= self.flags(rev)
3333 skipflags &= self.flags(rev)
3329
3334
3330 _verify_revision(self, skipflags, state, node)
3335 _verify_revision(self, skipflags, state, node)
3331
3336
3332 l1 = self.rawsize(rev)
3337 l1 = self.rawsize(rev)
3333 l2 = len(self.rawdata(node))
3338 l2 = len(self.rawdata(node))
3334
3339
3335 if l1 != l2:
3340 if l1 != l2:
3336 yield revlogproblem(
3341 yield revlogproblem(
3337 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3342 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3338 node=node,
3343 node=node,
3339 )
3344 )
3340
3345
3341 except error.CensoredNodeError:
3346 except error.CensoredNodeError:
3342 if state[b'erroroncensored']:
3347 if state[b'erroroncensored']:
3343 yield revlogproblem(
3348 yield revlogproblem(
3344 error=_(b'censored file data'), node=node
3349 error=_(b'censored file data'), node=node
3345 )
3350 )
3346 state[b'skipread'].add(node)
3351 state[b'skipread'].add(node)
3347 except Exception as e:
3352 except Exception as e:
3348 yield revlogproblem(
3353 yield revlogproblem(
3349 error=_(b'unpacking %s: %s')
3354 error=_(b'unpacking %s: %s')
3350 % (short(node), stringutil.forcebytestr(e)),
3355 % (short(node), stringutil.forcebytestr(e)),
3351 node=node,
3356 node=node,
3352 )
3357 )
3353 state[b'skipread'].add(node)
3358 state[b'skipread'].add(node)
3354
3359
3355 def storageinfo(
3360 def storageinfo(
3356 self,
3361 self,
3357 exclusivefiles=False,
3362 exclusivefiles=False,
3358 sharedfiles=False,
3363 sharedfiles=False,
3359 revisionscount=False,
3364 revisionscount=False,
3360 trackedsize=False,
3365 trackedsize=False,
3361 storedsize=False,
3366 storedsize=False,
3362 ):
3367 ):
3363 d = {}
3368 d = {}
3364
3369
3365 if exclusivefiles:
3370 if exclusivefiles:
3366 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3371 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3367 if not self._inline:
3372 if not self._inline:
3368 d[b'exclusivefiles'].append((self.opener, self._datafile))
3373 d[b'exclusivefiles'].append((self.opener, self._datafile))
3369
3374
3370 if sharedfiles:
3375 if sharedfiles:
3371 d[b'sharedfiles'] = []
3376 d[b'sharedfiles'] = []
3372
3377
3373 if revisionscount:
3378 if revisionscount:
3374 d[b'revisionscount'] = len(self)
3379 d[b'revisionscount'] = len(self)
3375
3380
3376 if trackedsize:
3381 if trackedsize:
3377 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3382 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3378
3383
3379 if storedsize:
3384 if storedsize:
3380 d[b'storedsize'] = sum(
3385 d[b'storedsize'] = sum(
3381 self.opener.stat(path).st_size for path in self.files()
3386 self.opener.stat(path).st_size for path in self.files()
3382 )
3387 )
3383
3388
3384 return d
3389 return d
3385
3390
3386 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3391 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3387 if not self.hassidedata:
3392 if not self.hassidedata:
3388 return
3393 return
3389 # revlog formats with sidedata support does not support inline
3394 # revlog formats with sidedata support does not support inline
3390 assert not self._inline
3395 assert not self._inline
3391 if not helpers[1] and not helpers[2]:
3396 if not helpers[1] and not helpers[2]:
3392 # Nothing to generate or remove
3397 # Nothing to generate or remove
3393 return
3398 return
3394
3399
3395 new_entries = []
3400 new_entries = []
3396 # append the new sidedata
3401 # append the new sidedata
3397 with self._writing(transaction):
3402 with self._writing(transaction):
3398 ifh, dfh = self._writinghandles
3403 ifh, dfh = self._writinghandles
3399 if self._docket is not None:
3404 if self._docket is not None:
3400 dfh.seek(self._docket.data_end, os.SEEK_SET)
3405 dfh.seek(self._docket.data_end, os.SEEK_SET)
3401 else:
3406 else:
3402 dfh.seek(0, os.SEEK_END)
3407 dfh.seek(0, os.SEEK_END)
3403
3408
3404 current_offset = dfh.tell()
3409 current_offset = dfh.tell()
3405 for rev in range(startrev, endrev + 1):
3410 for rev in range(startrev, endrev + 1):
3406 entry = self.index[rev]
3411 entry = self.index[rev]
3407 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3412 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3408 store=self,
3413 store=self,
3409 sidedata_helpers=helpers,
3414 sidedata_helpers=helpers,
3410 sidedata={},
3415 sidedata={},
3411 rev=rev,
3416 rev=rev,
3412 )
3417 )
3413
3418
3414 serialized_sidedata = sidedatautil.serialize_sidedata(
3419 serialized_sidedata = sidedatautil.serialize_sidedata(
3415 new_sidedata
3420 new_sidedata
3416 )
3421 )
3417
3422
3418 sidedata_compression_mode = COMP_MODE_INLINE
3423 sidedata_compression_mode = COMP_MODE_INLINE
3419 if serialized_sidedata and self.hassidedata:
3424 if serialized_sidedata and self.hassidedata:
3420 sidedata_compression_mode = COMP_MODE_PLAIN
3425 sidedata_compression_mode = COMP_MODE_PLAIN
3421 h, comp_sidedata = self.compress(serialized_sidedata)
3426 h, comp_sidedata = self.compress(serialized_sidedata)
3422 if (
3427 if (
3423 h != b'u'
3428 h != b'u'
3424 and comp_sidedata[0] != b'\0'
3429 and comp_sidedata[0] != b'\0'
3425 and len(comp_sidedata) < len(serialized_sidedata)
3430 and len(comp_sidedata) < len(serialized_sidedata)
3426 ):
3431 ):
3427 assert not h
3432 assert not h
3428 if (
3433 if (
3429 comp_sidedata[0]
3434 comp_sidedata[0]
3430 == self._docket.default_compression_header
3435 == self._docket.default_compression_header
3431 ):
3436 ):
3432 sidedata_compression_mode = COMP_MODE_DEFAULT
3437 sidedata_compression_mode = COMP_MODE_DEFAULT
3433 serialized_sidedata = comp_sidedata
3438 serialized_sidedata = comp_sidedata
3434 else:
3439 else:
3435 sidedata_compression_mode = COMP_MODE_INLINE
3440 sidedata_compression_mode = COMP_MODE_INLINE
3436 serialized_sidedata = comp_sidedata
3441 serialized_sidedata = comp_sidedata
3437 if entry[8] != 0 or entry[9] != 0:
3442 if entry[8] != 0 or entry[9] != 0:
3438 # rewriting entries that already have sidedata is not
3443 # rewriting entries that already have sidedata is not
3439 # supported yet, because it introduces garbage data in the
3444 # supported yet, because it introduces garbage data in the
3440 # revlog.
3445 # revlog.
3441 msg = b"rewriting existing sidedata is not supported yet"
3446 msg = b"rewriting existing sidedata is not supported yet"
3442 raise error.Abort(msg)
3447 raise error.Abort(msg)
3443
3448
3444 # Apply (potential) flags to add and to remove after running
3449 # Apply (potential) flags to add and to remove after running
3445 # the sidedata helpers
3450 # the sidedata helpers
3446 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3451 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3447 entry_update = (
3452 entry_update = (
3448 current_offset,
3453 current_offset,
3449 len(serialized_sidedata),
3454 len(serialized_sidedata),
3450 new_offset_flags,
3455 new_offset_flags,
3451 sidedata_compression_mode,
3456 sidedata_compression_mode,
3452 )
3457 )
3453
3458
3454 # the sidedata computation might have move the file cursors around
3459 # the sidedata computation might have move the file cursors around
3455 dfh.seek(current_offset, os.SEEK_SET)
3460 dfh.seek(current_offset, os.SEEK_SET)
3456 dfh.write(serialized_sidedata)
3461 dfh.write(serialized_sidedata)
3457 new_entries.append(entry_update)
3462 new_entries.append(entry_update)
3458 current_offset += len(serialized_sidedata)
3463 current_offset += len(serialized_sidedata)
3459 if self._docket is not None:
3464 if self._docket is not None:
3460 self._docket.data_end = dfh.tell()
3465 self._docket.data_end = dfh.tell()
3461
3466
3462 # rewrite the new index entries
3467 # rewrite the new index entries
3463 ifh.seek(startrev * self.index.entry_size)
3468 ifh.seek(startrev * self.index.entry_size)
3464 for i, e in enumerate(new_entries):
3469 for i, e in enumerate(new_entries):
3465 rev = startrev + i
3470 rev = startrev + i
3466 self.index.replace_sidedata_info(rev, *e)
3471 self.index.replace_sidedata_info(rev, *e)
3467 packed = self.index.entry_binary(rev)
3472 packed = self.index.entry_binary(rev)
3468 if rev == 0 and self._docket is None:
3473 if rev == 0 and self._docket is None:
3469 header = self._format_flags | self._format_version
3474 header = self._format_flags | self._format_version
3470 header = self.index.pack_header(header)
3475 header = self.index.pack_header(header)
3471 packed = header + packed
3476 packed = header + packed
3472 ifh.write(packed)
3477 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now