##// END OF EJS Templates
revlog: close the index file handle after the data one...
marmoute -
r48119:27e9ed12 default
parent child Browse files
Show More
@@ -1,3470 +1,3472 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 CHANGELOGV2,
38 CHANGELOGV2,
39 COMP_MODE_DEFAULT,
39 COMP_MODE_DEFAULT,
40 COMP_MODE_INLINE,
40 COMP_MODE_INLINE,
41 COMP_MODE_PLAIN,
41 COMP_MODE_PLAIN,
42 FEATURES_BY_VERSION,
42 FEATURES_BY_VERSION,
43 FLAG_GENERALDELTA,
43 FLAG_GENERALDELTA,
44 FLAG_INLINE_DATA,
44 FLAG_INLINE_DATA,
45 INDEX_HEADER,
45 INDEX_HEADER,
46 KIND_CHANGELOG,
46 KIND_CHANGELOG,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 SUPPORTED_FLAGS,
55 SUPPORTED_FLAGS,
56 )
56 )
57 from .revlogutils.flagutil import (
57 from .revlogutils.flagutil import (
58 REVIDX_DEFAULT_FLAGS,
58 REVIDX_DEFAULT_FLAGS,
59 REVIDX_ELLIPSIS,
59 REVIDX_ELLIPSIS,
60 REVIDX_EXTSTORED,
60 REVIDX_EXTSTORED,
61 REVIDX_FLAGS_ORDER,
61 REVIDX_FLAGS_ORDER,
62 REVIDX_HASCOPIESINFO,
62 REVIDX_HASCOPIESINFO,
63 REVIDX_ISCENSORED,
63 REVIDX_ISCENSORED,
64 REVIDX_RAWTEXT_CHANGING_FLAGS,
64 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 )
65 )
66 from .thirdparty import attr
66 from .thirdparty import attr
67 from . import (
67 from . import (
68 ancestor,
68 ancestor,
69 dagop,
69 dagop,
70 error,
70 error,
71 mdiff,
71 mdiff,
72 policy,
72 policy,
73 pycompat,
73 pycompat,
74 templatefilters,
74 templatefilters,
75 util,
75 util,
76 )
76 )
77 from .interfaces import (
77 from .interfaces import (
78 repository,
78 repository,
79 util as interfaceutil,
79 util as interfaceutil,
80 )
80 )
81 from .revlogutils import (
81 from .revlogutils import (
82 deltas as deltautil,
82 deltas as deltautil,
83 docket as docketutil,
83 docket as docketutil,
84 flagutil,
84 flagutil,
85 nodemap as nodemaputil,
85 nodemap as nodemaputil,
86 revlogv0,
86 revlogv0,
87 sidedata as sidedatautil,
87 sidedata as sidedatautil,
88 )
88 )
89 from .utils import (
89 from .utils import (
90 storageutil,
90 storageutil,
91 stringutil,
91 stringutil,
92 )
92 )
93
93
94 # blanked usage of all the name to prevent pyflakes constraints
94 # blanked usage of all the name to prevent pyflakes constraints
95 # We need these name available in the module for extensions.
95 # We need these name available in the module for extensions.
96
96
97 REVLOGV0
97 REVLOGV0
98 REVLOGV1
98 REVLOGV1
99 REVLOGV2
99 REVLOGV2
100 FLAG_INLINE_DATA
100 FLAG_INLINE_DATA
101 FLAG_GENERALDELTA
101 FLAG_GENERALDELTA
102 REVLOG_DEFAULT_FLAGS
102 REVLOG_DEFAULT_FLAGS
103 REVLOG_DEFAULT_FORMAT
103 REVLOG_DEFAULT_FORMAT
104 REVLOG_DEFAULT_VERSION
104 REVLOG_DEFAULT_VERSION
105 REVLOGV1_FLAGS
105 REVLOGV1_FLAGS
106 REVLOGV2_FLAGS
106 REVLOGV2_FLAGS
107 REVIDX_ISCENSORED
107 REVIDX_ISCENSORED
108 REVIDX_ELLIPSIS
108 REVIDX_ELLIPSIS
109 REVIDX_HASCOPIESINFO
109 REVIDX_HASCOPIESINFO
110 REVIDX_EXTSTORED
110 REVIDX_EXTSTORED
111 REVIDX_DEFAULT_FLAGS
111 REVIDX_DEFAULT_FLAGS
112 REVIDX_FLAGS_ORDER
112 REVIDX_FLAGS_ORDER
113 REVIDX_RAWTEXT_CHANGING_FLAGS
113 REVIDX_RAWTEXT_CHANGING_FLAGS
114
114
115 parsers = policy.importmod('parsers')
115 parsers = policy.importmod('parsers')
116 rustancestor = policy.importrust('ancestor')
116 rustancestor = policy.importrust('ancestor')
117 rustdagop = policy.importrust('dagop')
117 rustdagop = policy.importrust('dagop')
118 rustrevlog = policy.importrust('revlog')
118 rustrevlog = policy.importrust('revlog')
119
119
120 # Aliased for performance.
120 # Aliased for performance.
121 _zlibdecompress = zlib.decompress
121 _zlibdecompress = zlib.decompress
122
122
123 # max size of revlog with inline data
123 # max size of revlog with inline data
124 _maxinline = 131072
124 _maxinline = 131072
125 _chunksize = 1048576
125 _chunksize = 1048576
126
126
127 # Flag processors for REVIDX_ELLIPSIS.
127 # Flag processors for REVIDX_ELLIPSIS.
128 def ellipsisreadprocessor(rl, text):
128 def ellipsisreadprocessor(rl, text):
129 return text, False
129 return text, False
130
130
131
131
132 def ellipsiswriteprocessor(rl, text):
132 def ellipsiswriteprocessor(rl, text):
133 return text, False
133 return text, False
134
134
135
135
136 def ellipsisrawprocessor(rl, text):
136 def ellipsisrawprocessor(rl, text):
137 return False
137 return False
138
138
139
139
140 ellipsisprocessor = (
140 ellipsisprocessor = (
141 ellipsisreadprocessor,
141 ellipsisreadprocessor,
142 ellipsiswriteprocessor,
142 ellipsiswriteprocessor,
143 ellipsisrawprocessor,
143 ellipsisrawprocessor,
144 )
144 )
145
145
146
146
147 def offset_type(offset, type):
147 def offset_type(offset, type):
148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
149 raise ValueError(b'unknown revlog index flags')
149 raise ValueError(b'unknown revlog index flags')
150 return int(int(offset) << 16 | type)
150 return int(int(offset) << 16 | type)
151
151
152
152
153 def _verify_revision(rl, skipflags, state, node):
153 def _verify_revision(rl, skipflags, state, node):
154 """Verify the integrity of the given revlog ``node`` while providing a hook
154 """Verify the integrity of the given revlog ``node`` while providing a hook
155 point for extensions to influence the operation."""
155 point for extensions to influence the operation."""
156 if skipflags:
156 if skipflags:
157 state[b'skipread'].add(node)
157 state[b'skipread'].add(node)
158 else:
158 else:
159 # Side-effect: read content and verify hash.
159 # Side-effect: read content and verify hash.
160 rl.revision(node)
160 rl.revision(node)
161
161
162
162
163 # True if a fast implementation for persistent-nodemap is available
163 # True if a fast implementation for persistent-nodemap is available
164 #
164 #
165 # We also consider we have a "fast" implementation in "pure" python because
165 # We also consider we have a "fast" implementation in "pure" python because
166 # people using pure don't really have performance consideration (and a
166 # people using pure don't really have performance consideration (and a
167 # wheelbarrow of other slowness source)
167 # wheelbarrow of other slowness source)
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
169 parsers, 'BaseIndexObject'
169 parsers, 'BaseIndexObject'
170 )
170 )
171
171
172
172
173 @attr.s(slots=True, frozen=True)
173 @attr.s(slots=True, frozen=True)
174 class _revisioninfo(object):
174 class _revisioninfo(object):
175 """Information about a revision that allows building its fulltext
175 """Information about a revision that allows building its fulltext
176 node: expected hash of the revision
176 node: expected hash of the revision
177 p1, p2: parent revs of the revision
177 p1, p2: parent revs of the revision
178 btext: built text cache consisting of a one-element list
178 btext: built text cache consisting of a one-element list
179 cachedelta: (baserev, uncompressed_delta) or None
179 cachedelta: (baserev, uncompressed_delta) or None
180 flags: flags associated to the revision storage
180 flags: flags associated to the revision storage
181
181
182 One of btext[0] or cachedelta must be set.
182 One of btext[0] or cachedelta must be set.
183 """
183 """
184
184
185 node = attr.ib()
185 node = attr.ib()
186 p1 = attr.ib()
186 p1 = attr.ib()
187 p2 = attr.ib()
187 p2 = attr.ib()
188 btext = attr.ib()
188 btext = attr.ib()
189 textlen = attr.ib()
189 textlen = attr.ib()
190 cachedelta = attr.ib()
190 cachedelta = attr.ib()
191 flags = attr.ib()
191 flags = attr.ib()
192
192
193
193
194 @interfaceutil.implementer(repository.irevisiondelta)
194 @interfaceutil.implementer(repository.irevisiondelta)
195 @attr.s(slots=True)
195 @attr.s(slots=True)
196 class revlogrevisiondelta(object):
196 class revlogrevisiondelta(object):
197 node = attr.ib()
197 node = attr.ib()
198 p1node = attr.ib()
198 p1node = attr.ib()
199 p2node = attr.ib()
199 p2node = attr.ib()
200 basenode = attr.ib()
200 basenode = attr.ib()
201 flags = attr.ib()
201 flags = attr.ib()
202 baserevisionsize = attr.ib()
202 baserevisionsize = attr.ib()
203 revision = attr.ib()
203 revision = attr.ib()
204 delta = attr.ib()
204 delta = attr.ib()
205 sidedata = attr.ib()
205 sidedata = attr.ib()
206 protocol_flags = attr.ib()
206 protocol_flags = attr.ib()
207 linknode = attr.ib(default=None)
207 linknode = attr.ib(default=None)
208
208
209
209
210 @interfaceutil.implementer(repository.iverifyproblem)
210 @interfaceutil.implementer(repository.iverifyproblem)
211 @attr.s(frozen=True)
211 @attr.s(frozen=True)
212 class revlogproblem(object):
212 class revlogproblem(object):
213 warning = attr.ib(default=None)
213 warning = attr.ib(default=None)
214 error = attr.ib(default=None)
214 error = attr.ib(default=None)
215 node = attr.ib(default=None)
215 node = attr.ib(default=None)
216
216
217
217
218 def parse_index_v1(data, inline):
218 def parse_index_v1(data, inline):
219 # call the C implementation to parse the index data
219 # call the C implementation to parse the index data
220 index, cache = parsers.parse_index2(data, inline)
220 index, cache = parsers.parse_index2(data, inline)
221 return index, cache
221 return index, cache
222
222
223
223
224 def parse_index_v2(data, inline):
224 def parse_index_v2(data, inline):
225 # call the C implementation to parse the index data
225 # call the C implementation to parse the index data
226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
227 return index, cache
227 return index, cache
228
228
229
229
230 def parse_index_cl_v2(data, inline):
230 def parse_index_cl_v2(data, inline):
231 # call the C implementation to parse the index data
231 # call the C implementation to parse the index data
232 assert not inline
232 assert not inline
233 from .pure.parsers import parse_index_cl_v2
233 from .pure.parsers import parse_index_cl_v2
234
234
235 index, cache = parse_index_cl_v2(data)
235 index, cache = parse_index_cl_v2(data)
236 return index, cache
236 return index, cache
237
237
238
238
239 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
239 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
240
240
241 def parse_index_v1_nodemap(data, inline):
241 def parse_index_v1_nodemap(data, inline):
242 index, cache = parsers.parse_index_devel_nodemap(data, inline)
242 index, cache = parsers.parse_index_devel_nodemap(data, inline)
243 return index, cache
243 return index, cache
244
244
245
245
246 else:
246 else:
247 parse_index_v1_nodemap = None
247 parse_index_v1_nodemap = None
248
248
249
249
250 def parse_index_v1_mixed(data, inline):
250 def parse_index_v1_mixed(data, inline):
251 index, cache = parse_index_v1(data, inline)
251 index, cache = parse_index_v1(data, inline)
252 return rustrevlog.MixedIndex(index), cache
252 return rustrevlog.MixedIndex(index), cache
253
253
254
254
255 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
255 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
256 # signed integer)
256 # signed integer)
257 _maxentrysize = 0x7FFFFFFF
257 _maxentrysize = 0x7FFFFFFF
258
258
259
259
260 class revlog(object):
260 class revlog(object):
261 """
261 """
262 the underlying revision storage object
262 the underlying revision storage object
263
263
264 A revlog consists of two parts, an index and the revision data.
264 A revlog consists of two parts, an index and the revision data.
265
265
266 The index is a file with a fixed record size containing
266 The index is a file with a fixed record size containing
267 information on each revision, including its nodeid (hash), the
267 information on each revision, including its nodeid (hash), the
268 nodeids of its parents, the position and offset of its data within
268 nodeids of its parents, the position and offset of its data within
269 the data file, and the revision it's based on. Finally, each entry
269 the data file, and the revision it's based on. Finally, each entry
270 contains a linkrev entry that can serve as a pointer to external
270 contains a linkrev entry that can serve as a pointer to external
271 data.
271 data.
272
272
273 The revision data itself is a linear collection of data chunks.
273 The revision data itself is a linear collection of data chunks.
274 Each chunk represents a revision and is usually represented as a
274 Each chunk represents a revision and is usually represented as a
275 delta against the previous chunk. To bound lookup time, runs of
275 delta against the previous chunk. To bound lookup time, runs of
276 deltas are limited to about 2 times the length of the original
276 deltas are limited to about 2 times the length of the original
277 version data. This makes retrieval of a version proportional to
277 version data. This makes retrieval of a version proportional to
278 its size, or O(1) relative to the number of revisions.
278 its size, or O(1) relative to the number of revisions.
279
279
280 Both pieces of the revlog are written to in an append-only
280 Both pieces of the revlog are written to in an append-only
281 fashion, which means we never need to rewrite a file to insert or
281 fashion, which means we never need to rewrite a file to insert or
282 remove data, and can use some simple techniques to avoid the need
282 remove data, and can use some simple techniques to avoid the need
283 for locking while reading.
283 for locking while reading.
284
284
285 If checkambig, indexfile is opened with checkambig=True at
285 If checkambig, indexfile is opened with checkambig=True at
286 writing, to avoid file stat ambiguity.
286 writing, to avoid file stat ambiguity.
287
287
288 If mmaplargeindex is True, and an mmapindexthreshold is set, the
288 If mmaplargeindex is True, and an mmapindexthreshold is set, the
289 index will be mmapped rather than read if it is larger than the
289 index will be mmapped rather than read if it is larger than the
290 configured threshold.
290 configured threshold.
291
291
292 If censorable is True, the revlog can have censored revisions.
292 If censorable is True, the revlog can have censored revisions.
293
293
294 If `upperboundcomp` is not None, this is the expected maximal gain from
294 If `upperboundcomp` is not None, this is the expected maximal gain from
295 compression for the data content.
295 compression for the data content.
296
296
297 `concurrencychecker` is an optional function that receives 3 arguments: a
297 `concurrencychecker` is an optional function that receives 3 arguments: a
298 file handle, a filename, and an expected position. It should check whether
298 file handle, a filename, and an expected position. It should check whether
299 the current position in the file handle is valid, and log/warn/fail (by
299 the current position in the file handle is valid, and log/warn/fail (by
300 raising).
300 raising).
301
301
302
302
303 Internal details
303 Internal details
304 ----------------
304 ----------------
305
305
306 A large part of the revlog logic deals with revisions' "index entries", tuple
306 A large part of the revlog logic deals with revisions' "index entries", tuple
307 objects that contains the same "items" whatever the revlog version.
307 objects that contains the same "items" whatever the revlog version.
308 Different versions will have different ways of storing these items (sometimes
308 Different versions will have different ways of storing these items (sometimes
309 not having them at all), but the tuple will always be the same. New fields
309 not having them at all), but the tuple will always be the same. New fields
310 are usually added at the end to avoid breaking existing code that relies
310 are usually added at the end to avoid breaking existing code that relies
311 on the existing order. The field are defined as follows:
311 on the existing order. The field are defined as follows:
312
312
313 [0] offset:
313 [0] offset:
314 The byte index of the start of revision data chunk.
314 The byte index of the start of revision data chunk.
315 That value is shifted up by 16 bits. use "offset = field >> 16" to
315 That value is shifted up by 16 bits. use "offset = field >> 16" to
316 retrieve it.
316 retrieve it.
317
317
318 flags:
318 flags:
319 A flag field that carries special information or changes the behavior
319 A flag field that carries special information or changes the behavior
320 of the revision. (see `REVIDX_*` constants for details)
320 of the revision. (see `REVIDX_*` constants for details)
321 The flag field only occupies the first 16 bits of this field,
321 The flag field only occupies the first 16 bits of this field,
322 use "flags = field & 0xFFFF" to retrieve the value.
322 use "flags = field & 0xFFFF" to retrieve the value.
323
323
324 [1] compressed length:
324 [1] compressed length:
325 The size, in bytes, of the chunk on disk
325 The size, in bytes, of the chunk on disk
326
326
327 [2] uncompressed length:
327 [2] uncompressed length:
328 The size, in bytes, of the full revision once reconstructed.
328 The size, in bytes, of the full revision once reconstructed.
329
329
330 [3] base rev:
330 [3] base rev:
331 Either the base of the revision delta chain (without general
331 Either the base of the revision delta chain (without general
332 delta), or the base of the delta (stored in the data chunk)
332 delta), or the base of the delta (stored in the data chunk)
333 with general delta.
333 with general delta.
334
334
335 [4] link rev:
335 [4] link rev:
336 Changelog revision number of the changeset introducing this
336 Changelog revision number of the changeset introducing this
337 revision.
337 revision.
338
338
339 [5] parent 1 rev:
339 [5] parent 1 rev:
340 Revision number of the first parent
340 Revision number of the first parent
341
341
342 [6] parent 2 rev:
342 [6] parent 2 rev:
343 Revision number of the second parent
343 Revision number of the second parent
344
344
345 [7] node id:
345 [7] node id:
346 The node id of the current revision
346 The node id of the current revision
347
347
348 [8] sidedata offset:
348 [8] sidedata offset:
349 The byte index of the start of the revision's side-data chunk.
349 The byte index of the start of the revision's side-data chunk.
350
350
351 [9] sidedata chunk length:
351 [9] sidedata chunk length:
352 The size, in bytes, of the revision's side-data chunk.
352 The size, in bytes, of the revision's side-data chunk.
353
353
354 [10] data compression mode:
354 [10] data compression mode:
355 two bits that detail the way the data chunk is compressed on disk.
355 two bits that detail the way the data chunk is compressed on disk.
356 (see "COMP_MODE_*" constants for details). For revlog version 0 and
356 (see "COMP_MODE_*" constants for details). For revlog version 0 and
357 1 this will always be COMP_MODE_INLINE.
357 1 this will always be COMP_MODE_INLINE.
358
358
359 [11] side-data compression mode:
359 [11] side-data compression mode:
360 two bits that detail the way the sidedata chunk is compressed on disk.
360 two bits that detail the way the sidedata chunk is compressed on disk.
361 (see "COMP_MODE_*" constants for details)
361 (see "COMP_MODE_*" constants for details)
362 """
362 """
363
363
364 _flagserrorclass = error.RevlogError
364 _flagserrorclass = error.RevlogError
365
365
366 def __init__(
366 def __init__(
367 self,
367 self,
368 opener,
368 opener,
369 target,
369 target,
370 radix,
370 radix,
371 postfix=None, # only exist for `tmpcensored` now
371 postfix=None, # only exist for `tmpcensored` now
372 checkambig=False,
372 checkambig=False,
373 mmaplargeindex=False,
373 mmaplargeindex=False,
374 censorable=False,
374 censorable=False,
375 upperboundcomp=None,
375 upperboundcomp=None,
376 persistentnodemap=False,
376 persistentnodemap=False,
377 concurrencychecker=None,
377 concurrencychecker=None,
378 trypending=False,
378 trypending=False,
379 ):
379 ):
380 """
380 """
381 create a revlog object
381 create a revlog object
382
382
383 opener is a function that abstracts the file opening operation
383 opener is a function that abstracts the file opening operation
384 and can be used to implement COW semantics or the like.
384 and can be used to implement COW semantics or the like.
385
385
386 `target`: a (KIND, ID) tuple that identify the content stored in
386 `target`: a (KIND, ID) tuple that identify the content stored in
387 this revlog. It help the rest of the code to understand what the revlog
387 this revlog. It help the rest of the code to understand what the revlog
388 is about without having to resort to heuristic and index filename
388 is about without having to resort to heuristic and index filename
389 analysis. Note: that this must be reliably be set by normal code, but
389 analysis. Note: that this must be reliably be set by normal code, but
390 that test, debug, or performance measurement code might not set this to
390 that test, debug, or performance measurement code might not set this to
391 accurate value.
391 accurate value.
392 """
392 """
393 self.upperboundcomp = upperboundcomp
393 self.upperboundcomp = upperboundcomp
394
394
395 self.radix = radix
395 self.radix = radix
396
396
397 self._docket_file = None
397 self._docket_file = None
398 self._indexfile = None
398 self._indexfile = None
399 self._datafile = None
399 self._datafile = None
400 self._nodemap_file = None
400 self._nodemap_file = None
401 self.postfix = postfix
401 self.postfix = postfix
402 self._trypending = trypending
402 self._trypending = trypending
403 self.opener = opener
403 self.opener = opener
404 if persistentnodemap:
404 if persistentnodemap:
405 self._nodemap_file = nodemaputil.get_nodemap_file(self)
405 self._nodemap_file = nodemaputil.get_nodemap_file(self)
406
406
407 assert target[0] in ALL_KINDS
407 assert target[0] in ALL_KINDS
408 assert len(target) == 2
408 assert len(target) == 2
409 self.target = target
409 self.target = target
410 # When True, indexfile is opened with checkambig=True at writing, to
410 # When True, indexfile is opened with checkambig=True at writing, to
411 # avoid file stat ambiguity.
411 # avoid file stat ambiguity.
412 self._checkambig = checkambig
412 self._checkambig = checkambig
413 self._mmaplargeindex = mmaplargeindex
413 self._mmaplargeindex = mmaplargeindex
414 self._censorable = censorable
414 self._censorable = censorable
415 # 3-tuple of (node, rev, text) for a raw revision.
415 # 3-tuple of (node, rev, text) for a raw revision.
416 self._revisioncache = None
416 self._revisioncache = None
417 # Maps rev to chain base rev.
417 # Maps rev to chain base rev.
418 self._chainbasecache = util.lrucachedict(100)
418 self._chainbasecache = util.lrucachedict(100)
419 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
419 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
420 self._chunkcache = (0, b'')
420 self._chunkcache = (0, b'')
421 # How much data to read and cache into the raw revlog data cache.
421 # How much data to read and cache into the raw revlog data cache.
422 self._chunkcachesize = 65536
422 self._chunkcachesize = 65536
423 self._maxchainlen = None
423 self._maxchainlen = None
424 self._deltabothparents = True
424 self._deltabothparents = True
425 self.index = None
425 self.index = None
426 self._docket = None
426 self._docket = None
427 self._nodemap_docket = None
427 self._nodemap_docket = None
428 # Mapping of partial identifiers to full nodes.
428 # Mapping of partial identifiers to full nodes.
429 self._pcache = {}
429 self._pcache = {}
430 # Mapping of revision integer to full node.
430 # Mapping of revision integer to full node.
431 self._compengine = b'zlib'
431 self._compengine = b'zlib'
432 self._compengineopts = {}
432 self._compengineopts = {}
433 self._maxdeltachainspan = -1
433 self._maxdeltachainspan = -1
434 self._withsparseread = False
434 self._withsparseread = False
435 self._sparserevlog = False
435 self._sparserevlog = False
436 self.hassidedata = False
436 self.hassidedata = False
437 self._srdensitythreshold = 0.50
437 self._srdensitythreshold = 0.50
438 self._srmingapsize = 262144
438 self._srmingapsize = 262144
439
439
440 # Make copy of flag processors so each revlog instance can support
440 # Make copy of flag processors so each revlog instance can support
441 # custom flags.
441 # custom flags.
442 self._flagprocessors = dict(flagutil.flagprocessors)
442 self._flagprocessors = dict(flagutil.flagprocessors)
443
443
444 # 2-tuple of file handles being used for active writing.
444 # 2-tuple of file handles being used for active writing.
445 self._writinghandles = None
445 self._writinghandles = None
446 # prevent nesting of addgroup
446 # prevent nesting of addgroup
447 self._adding_group = None
447 self._adding_group = None
448
448
449 self._loadindex()
449 self._loadindex()
450
450
451 self._concurrencychecker = concurrencychecker
451 self._concurrencychecker = concurrencychecker
452
452
453 def _init_opts(self):
453 def _init_opts(self):
454 """process options (from above/config) to setup associated default revlog mode
454 """process options (from above/config) to setup associated default revlog mode
455
455
456 These values might be affected when actually reading on disk information.
456 These values might be affected when actually reading on disk information.
457
457
458 The relevant values are returned for use in _loadindex().
458 The relevant values are returned for use in _loadindex().
459
459
460 * newversionflags:
460 * newversionflags:
461 version header to use if we need to create a new revlog
461 version header to use if we need to create a new revlog
462
462
463 * mmapindexthreshold:
463 * mmapindexthreshold:
464 minimal index size for start to use mmap
464 minimal index size for start to use mmap
465
465
466 * force_nodemap:
466 * force_nodemap:
467 force the usage of a "development" version of the nodemap code
467 force the usage of a "development" version of the nodemap code
468 """
468 """
469 mmapindexthreshold = None
469 mmapindexthreshold = None
470 opts = self.opener.options
470 opts = self.opener.options
471
471
472 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
472 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
473 new_header = CHANGELOGV2
473 new_header = CHANGELOGV2
474 elif b'revlogv2' in opts:
474 elif b'revlogv2' in opts:
475 new_header = REVLOGV2
475 new_header = REVLOGV2
476 elif b'revlogv1' in opts:
476 elif b'revlogv1' in opts:
477 new_header = REVLOGV1 | FLAG_INLINE_DATA
477 new_header = REVLOGV1 | FLAG_INLINE_DATA
478 if b'generaldelta' in opts:
478 if b'generaldelta' in opts:
479 new_header |= FLAG_GENERALDELTA
479 new_header |= FLAG_GENERALDELTA
480 elif b'revlogv0' in self.opener.options:
480 elif b'revlogv0' in self.opener.options:
481 new_header = REVLOGV0
481 new_header = REVLOGV0
482 else:
482 else:
483 new_header = REVLOG_DEFAULT_VERSION
483 new_header = REVLOG_DEFAULT_VERSION
484
484
485 if b'chunkcachesize' in opts:
485 if b'chunkcachesize' in opts:
486 self._chunkcachesize = opts[b'chunkcachesize']
486 self._chunkcachesize = opts[b'chunkcachesize']
487 if b'maxchainlen' in opts:
487 if b'maxchainlen' in opts:
488 self._maxchainlen = opts[b'maxchainlen']
488 self._maxchainlen = opts[b'maxchainlen']
489 if b'deltabothparents' in opts:
489 if b'deltabothparents' in opts:
490 self._deltabothparents = opts[b'deltabothparents']
490 self._deltabothparents = opts[b'deltabothparents']
491 self._lazydelta = bool(opts.get(b'lazydelta', True))
491 self._lazydelta = bool(opts.get(b'lazydelta', True))
492 self._lazydeltabase = False
492 self._lazydeltabase = False
493 if self._lazydelta:
493 if self._lazydelta:
494 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
494 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
495 if b'compengine' in opts:
495 if b'compengine' in opts:
496 self._compengine = opts[b'compengine']
496 self._compengine = opts[b'compengine']
497 if b'zlib.level' in opts:
497 if b'zlib.level' in opts:
498 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
498 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
499 if b'zstd.level' in opts:
499 if b'zstd.level' in opts:
500 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
500 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
501 if b'maxdeltachainspan' in opts:
501 if b'maxdeltachainspan' in opts:
502 self._maxdeltachainspan = opts[b'maxdeltachainspan']
502 self._maxdeltachainspan = opts[b'maxdeltachainspan']
503 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
503 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
504 mmapindexthreshold = opts[b'mmapindexthreshold']
504 mmapindexthreshold = opts[b'mmapindexthreshold']
505 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
505 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
506 withsparseread = bool(opts.get(b'with-sparse-read', False))
506 withsparseread = bool(opts.get(b'with-sparse-read', False))
507 # sparse-revlog forces sparse-read
507 # sparse-revlog forces sparse-read
508 self._withsparseread = self._sparserevlog or withsparseread
508 self._withsparseread = self._sparserevlog or withsparseread
509 if b'sparse-read-density-threshold' in opts:
509 if b'sparse-read-density-threshold' in opts:
510 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
510 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
511 if b'sparse-read-min-gap-size' in opts:
511 if b'sparse-read-min-gap-size' in opts:
512 self._srmingapsize = opts[b'sparse-read-min-gap-size']
512 self._srmingapsize = opts[b'sparse-read-min-gap-size']
513 if opts.get(b'enableellipsis'):
513 if opts.get(b'enableellipsis'):
514 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
514 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
515
515
516 # revlog v0 doesn't have flag processors
516 # revlog v0 doesn't have flag processors
517 for flag, processor in pycompat.iteritems(
517 for flag, processor in pycompat.iteritems(
518 opts.get(b'flagprocessors', {})
518 opts.get(b'flagprocessors', {})
519 ):
519 ):
520 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
520 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
521
521
522 if self._chunkcachesize <= 0:
522 if self._chunkcachesize <= 0:
523 raise error.RevlogError(
523 raise error.RevlogError(
524 _(b'revlog chunk cache size %r is not greater than 0')
524 _(b'revlog chunk cache size %r is not greater than 0')
525 % self._chunkcachesize
525 % self._chunkcachesize
526 )
526 )
527 elif self._chunkcachesize & (self._chunkcachesize - 1):
527 elif self._chunkcachesize & (self._chunkcachesize - 1):
528 raise error.RevlogError(
528 raise error.RevlogError(
529 _(b'revlog chunk cache size %r is not a power of 2')
529 _(b'revlog chunk cache size %r is not a power of 2')
530 % self._chunkcachesize
530 % self._chunkcachesize
531 )
531 )
532 force_nodemap = opts.get(b'devel-force-nodemap', False)
532 force_nodemap = opts.get(b'devel-force-nodemap', False)
533 return new_header, mmapindexthreshold, force_nodemap
533 return new_header, mmapindexthreshold, force_nodemap
534
534
535 def _get_data(self, filepath, mmap_threshold, size=None):
535 def _get_data(self, filepath, mmap_threshold, size=None):
536 """return a file content with or without mmap
536 """return a file content with or without mmap
537
537
538 If the file is missing return the empty string"""
538 If the file is missing return the empty string"""
539 try:
539 try:
540 with self.opener(filepath) as fp:
540 with self.opener(filepath) as fp:
541 if mmap_threshold is not None:
541 if mmap_threshold is not None:
542 file_size = self.opener.fstat(fp).st_size
542 file_size = self.opener.fstat(fp).st_size
543 if file_size >= mmap_threshold:
543 if file_size >= mmap_threshold:
544 if size is not None:
544 if size is not None:
545 # avoid potentiel mmap crash
545 # avoid potentiel mmap crash
546 size = min(file_size, size)
546 size = min(file_size, size)
547 # TODO: should .close() to release resources without
547 # TODO: should .close() to release resources without
548 # relying on Python GC
548 # relying on Python GC
549 if size is None:
549 if size is None:
550 return util.buffer(util.mmapread(fp))
550 return util.buffer(util.mmapread(fp))
551 else:
551 else:
552 return util.buffer(util.mmapread(fp, size))
552 return util.buffer(util.mmapread(fp, size))
553 if size is None:
553 if size is None:
554 return fp.read()
554 return fp.read()
555 else:
555 else:
556 return fp.read(size)
556 return fp.read(size)
557 except IOError as inst:
557 except IOError as inst:
558 if inst.errno != errno.ENOENT:
558 if inst.errno != errno.ENOENT:
559 raise
559 raise
560 return b''
560 return b''
561
561
562 def _loadindex(self):
562 def _loadindex(self):
563
563
564 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
564 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
565
565
566 if self.postfix is not None:
566 if self.postfix is not None:
567 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
567 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
568 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
568 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
569 entry_point = b'%s.i.a' % self.radix
569 entry_point = b'%s.i.a' % self.radix
570 else:
570 else:
571 entry_point = b'%s.i' % self.radix
571 entry_point = b'%s.i' % self.radix
572
572
573 entry_data = b''
573 entry_data = b''
574 self._initempty = True
574 self._initempty = True
575 entry_data = self._get_data(entry_point, mmapindexthreshold)
575 entry_data = self._get_data(entry_point, mmapindexthreshold)
576 if len(entry_data) > 0:
576 if len(entry_data) > 0:
577 header = INDEX_HEADER.unpack(entry_data[:4])[0]
577 header = INDEX_HEADER.unpack(entry_data[:4])[0]
578 self._initempty = False
578 self._initempty = False
579 else:
579 else:
580 header = new_header
580 header = new_header
581
581
582 self._format_flags = header & ~0xFFFF
582 self._format_flags = header & ~0xFFFF
583 self._format_version = header & 0xFFFF
583 self._format_version = header & 0xFFFF
584
584
585 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
585 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
586 if supported_flags is None:
586 if supported_flags is None:
587 msg = _(b'unknown version (%d) in revlog %s')
587 msg = _(b'unknown version (%d) in revlog %s')
588 msg %= (self._format_version, self.display_id)
588 msg %= (self._format_version, self.display_id)
589 raise error.RevlogError(msg)
589 raise error.RevlogError(msg)
590 elif self._format_flags & ~supported_flags:
590 elif self._format_flags & ~supported_flags:
591 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
591 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
592 display_flag = self._format_flags >> 16
592 display_flag = self._format_flags >> 16
593 msg %= (display_flag, self._format_version, self.display_id)
593 msg %= (display_flag, self._format_version, self.display_id)
594 raise error.RevlogError(msg)
594 raise error.RevlogError(msg)
595
595
596 features = FEATURES_BY_VERSION[self._format_version]
596 features = FEATURES_BY_VERSION[self._format_version]
597 self._inline = features[b'inline'](self._format_flags)
597 self._inline = features[b'inline'](self._format_flags)
598 self._generaldelta = features[b'generaldelta'](self._format_flags)
598 self._generaldelta = features[b'generaldelta'](self._format_flags)
599 self.hassidedata = features[b'sidedata']
599 self.hassidedata = features[b'sidedata']
600
600
601 if not features[b'docket']:
601 if not features[b'docket']:
602 self._indexfile = entry_point
602 self._indexfile = entry_point
603 index_data = entry_data
603 index_data = entry_data
604 else:
604 else:
605 self._docket_file = entry_point
605 self._docket_file = entry_point
606 if self._initempty:
606 if self._initempty:
607 self._docket = docketutil.default_docket(self, header)
607 self._docket = docketutil.default_docket(self, header)
608 else:
608 else:
609 self._docket = docketutil.parse_docket(
609 self._docket = docketutil.parse_docket(
610 self, entry_data, use_pending=self._trypending
610 self, entry_data, use_pending=self._trypending
611 )
611 )
612 self._indexfile = self._docket.index_filepath()
612 self._indexfile = self._docket.index_filepath()
613 index_data = b''
613 index_data = b''
614 index_size = self._docket.index_end
614 index_size = self._docket.index_end
615 if index_size > 0:
615 if index_size > 0:
616 index_data = self._get_data(
616 index_data = self._get_data(
617 self._indexfile, mmapindexthreshold, size=index_size
617 self._indexfile, mmapindexthreshold, size=index_size
618 )
618 )
619 if len(index_data) < index_size:
619 if len(index_data) < index_size:
620 msg = _(b'too few index data for %s: got %d, expected %d')
620 msg = _(b'too few index data for %s: got %d, expected %d')
621 msg %= (self.display_id, len(index_data), index_size)
621 msg %= (self.display_id, len(index_data), index_size)
622 raise error.RevlogError(msg)
622 raise error.RevlogError(msg)
623
623
624 self._inline = False
624 self._inline = False
625 # generaldelta implied by version 2 revlogs.
625 # generaldelta implied by version 2 revlogs.
626 self._generaldelta = True
626 self._generaldelta = True
627 # the logic for persistent nodemap will be dealt with within the
627 # the logic for persistent nodemap will be dealt with within the
628 # main docket, so disable it for now.
628 # main docket, so disable it for now.
629 self._nodemap_file = None
629 self._nodemap_file = None
630
630
631 if self._docket is not None:
631 if self._docket is not None:
632 self._datafile = self._docket.data_filepath()
632 self._datafile = self._docket.data_filepath()
633 elif self.postfix is None:
633 elif self.postfix is None:
634 self._datafile = b'%s.d' % self.radix
634 self._datafile = b'%s.d' % self.radix
635 else:
635 else:
636 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
636 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
637
637
638 self.nodeconstants = sha1nodeconstants
638 self.nodeconstants = sha1nodeconstants
639 self.nullid = self.nodeconstants.nullid
639 self.nullid = self.nodeconstants.nullid
640
640
641 # sparse-revlog can't be on without general-delta (issue6056)
641 # sparse-revlog can't be on without general-delta (issue6056)
642 if not self._generaldelta:
642 if not self._generaldelta:
643 self._sparserevlog = False
643 self._sparserevlog = False
644
644
645 self._storedeltachains = True
645 self._storedeltachains = True
646
646
647 devel_nodemap = (
647 devel_nodemap = (
648 self._nodemap_file
648 self._nodemap_file
649 and force_nodemap
649 and force_nodemap
650 and parse_index_v1_nodemap is not None
650 and parse_index_v1_nodemap is not None
651 )
651 )
652
652
653 use_rust_index = False
653 use_rust_index = False
654 if rustrevlog is not None:
654 if rustrevlog is not None:
655 if self._nodemap_file is not None:
655 if self._nodemap_file is not None:
656 use_rust_index = True
656 use_rust_index = True
657 else:
657 else:
658 use_rust_index = self.opener.options.get(b'rust.index')
658 use_rust_index = self.opener.options.get(b'rust.index')
659
659
660 self._parse_index = parse_index_v1
660 self._parse_index = parse_index_v1
661 if self._format_version == REVLOGV0:
661 if self._format_version == REVLOGV0:
662 self._parse_index = revlogv0.parse_index_v0
662 self._parse_index = revlogv0.parse_index_v0
663 elif self._format_version == REVLOGV2:
663 elif self._format_version == REVLOGV2:
664 self._parse_index = parse_index_v2
664 self._parse_index = parse_index_v2
665 elif self._format_version == CHANGELOGV2:
665 elif self._format_version == CHANGELOGV2:
666 self._parse_index = parse_index_cl_v2
666 self._parse_index = parse_index_cl_v2
667 elif devel_nodemap:
667 elif devel_nodemap:
668 self._parse_index = parse_index_v1_nodemap
668 self._parse_index = parse_index_v1_nodemap
669 elif use_rust_index:
669 elif use_rust_index:
670 self._parse_index = parse_index_v1_mixed
670 self._parse_index = parse_index_v1_mixed
671 try:
671 try:
672 d = self._parse_index(index_data, self._inline)
672 d = self._parse_index(index_data, self._inline)
673 index, _chunkcache = d
673 index, _chunkcache = d
674 use_nodemap = (
674 use_nodemap = (
675 not self._inline
675 not self._inline
676 and self._nodemap_file is not None
676 and self._nodemap_file is not None
677 and util.safehasattr(index, 'update_nodemap_data')
677 and util.safehasattr(index, 'update_nodemap_data')
678 )
678 )
679 if use_nodemap:
679 if use_nodemap:
680 nodemap_data = nodemaputil.persisted_data(self)
680 nodemap_data = nodemaputil.persisted_data(self)
681 if nodemap_data is not None:
681 if nodemap_data is not None:
682 docket = nodemap_data[0]
682 docket = nodemap_data[0]
683 if (
683 if (
684 len(d[0]) > docket.tip_rev
684 len(d[0]) > docket.tip_rev
685 and d[0][docket.tip_rev][7] == docket.tip_node
685 and d[0][docket.tip_rev][7] == docket.tip_node
686 ):
686 ):
687 # no changelog tampering
687 # no changelog tampering
688 self._nodemap_docket = docket
688 self._nodemap_docket = docket
689 index.update_nodemap_data(*nodemap_data)
689 index.update_nodemap_data(*nodemap_data)
690 except (ValueError, IndexError):
690 except (ValueError, IndexError):
691 raise error.RevlogError(
691 raise error.RevlogError(
692 _(b"index %s is corrupted") % self.display_id
692 _(b"index %s is corrupted") % self.display_id
693 )
693 )
694 self.index, self._chunkcache = d
694 self.index, self._chunkcache = d
695 if not self._chunkcache:
695 if not self._chunkcache:
696 self._chunkclear()
696 self._chunkclear()
697 # revnum -> (chain-length, sum-delta-length)
697 # revnum -> (chain-length, sum-delta-length)
698 self._chaininfocache = util.lrucachedict(500)
698 self._chaininfocache = util.lrucachedict(500)
699 # revlog header -> revlog compressor
699 # revlog header -> revlog compressor
700 self._decompressors = {}
700 self._decompressors = {}
701
701
702 @util.propertycache
702 @util.propertycache
703 def revlog_kind(self):
703 def revlog_kind(self):
704 return self.target[0]
704 return self.target[0]
705
705
706 @util.propertycache
706 @util.propertycache
707 def display_id(self):
707 def display_id(self):
708 """The public facing "ID" of the revlog that we use in message"""
708 """The public facing "ID" of the revlog that we use in message"""
709 # Maybe we should build a user facing representation of
709 # Maybe we should build a user facing representation of
710 # revlog.target instead of using `self.radix`
710 # revlog.target instead of using `self.radix`
711 return self.radix
711 return self.radix
712
712
713 def _get_decompressor(self, t):
713 def _get_decompressor(self, t):
714 try:
714 try:
715 compressor = self._decompressors[t]
715 compressor = self._decompressors[t]
716 except KeyError:
716 except KeyError:
717 try:
717 try:
718 engine = util.compengines.forrevlogheader(t)
718 engine = util.compengines.forrevlogheader(t)
719 compressor = engine.revlogcompressor(self._compengineopts)
719 compressor = engine.revlogcompressor(self._compengineopts)
720 self._decompressors[t] = compressor
720 self._decompressors[t] = compressor
721 except KeyError:
721 except KeyError:
722 raise error.RevlogError(
722 raise error.RevlogError(
723 _(b'unknown compression type %s') % binascii.hexlify(t)
723 _(b'unknown compression type %s') % binascii.hexlify(t)
724 )
724 )
725 return compressor
725 return compressor
726
726
727 @util.propertycache
727 @util.propertycache
728 def _compressor(self):
728 def _compressor(self):
729 engine = util.compengines[self._compengine]
729 engine = util.compengines[self._compengine]
730 return engine.revlogcompressor(self._compengineopts)
730 return engine.revlogcompressor(self._compengineopts)
731
731
732 @util.propertycache
732 @util.propertycache
733 def _decompressor(self):
733 def _decompressor(self):
734 """the default decompressor"""
734 """the default decompressor"""
735 if self._docket is None:
735 if self._docket is None:
736 return None
736 return None
737 t = self._docket.default_compression_header
737 t = self._docket.default_compression_header
738 c = self._get_decompressor(t)
738 c = self._get_decompressor(t)
739 return c.decompress
739 return c.decompress
740
740
741 def _indexfp(self):
741 def _indexfp(self):
742 """file object for the revlog's index file"""
742 """file object for the revlog's index file"""
743 return self.opener(self._indexfile, mode=b"r")
743 return self.opener(self._indexfile, mode=b"r")
744
744
745 def __index_write_fp(self):
745 def __index_write_fp(self):
746 # You should not use this directly and use `_writing` instead
746 # You should not use this directly and use `_writing` instead
747 try:
747 try:
748 f = self.opener(
748 f = self.opener(
749 self._indexfile, mode=b"r+", checkambig=self._checkambig
749 self._indexfile, mode=b"r+", checkambig=self._checkambig
750 )
750 )
751 if self._docket is None:
751 if self._docket is None:
752 f.seek(0, os.SEEK_END)
752 f.seek(0, os.SEEK_END)
753 else:
753 else:
754 f.seek(self._docket.index_end, os.SEEK_SET)
754 f.seek(self._docket.index_end, os.SEEK_SET)
755 return f
755 return f
756 except IOError as inst:
756 except IOError as inst:
757 if inst.errno != errno.ENOENT:
757 if inst.errno != errno.ENOENT:
758 raise
758 raise
759 return self.opener(
759 return self.opener(
760 self._indexfile, mode=b"w+", checkambig=self._checkambig
760 self._indexfile, mode=b"w+", checkambig=self._checkambig
761 )
761 )
762
762
763 def __index_new_fp(self):
763 def __index_new_fp(self):
764 # You should not use this unless you are upgrading from inline revlog
764 # You should not use this unless you are upgrading from inline revlog
765 return self.opener(
765 return self.opener(
766 self._indexfile,
766 self._indexfile,
767 mode=b"w",
767 mode=b"w",
768 checkambig=self._checkambig,
768 checkambig=self._checkambig,
769 atomictemp=True,
769 atomictemp=True,
770 )
770 )
771
771
772 def _datafp(self, mode=b'r'):
772 def _datafp(self, mode=b'r'):
773 """file object for the revlog's data file"""
773 """file object for the revlog's data file"""
774 return self.opener(self._datafile, mode=mode)
774 return self.opener(self._datafile, mode=mode)
775
775
776 @contextlib.contextmanager
776 @contextlib.contextmanager
777 def _datareadfp(self, existingfp=None):
777 def _datareadfp(self, existingfp=None):
778 """file object suitable to read data"""
778 """file object suitable to read data"""
779 # Use explicit file handle, if given.
779 # Use explicit file handle, if given.
780 if existingfp is not None:
780 if existingfp is not None:
781 yield existingfp
781 yield existingfp
782
782
783 # Use a file handle being actively used for writes, if available.
783 # Use a file handle being actively used for writes, if available.
784 # There is some danger to doing this because reads will seek the
784 # There is some danger to doing this because reads will seek the
785 # file. However, _writeentry() performs a SEEK_END before all writes,
785 # file. However, _writeentry() performs a SEEK_END before all writes,
786 # so we should be safe.
786 # so we should be safe.
787 elif self._writinghandles:
787 elif self._writinghandles:
788 if self._inline:
788 if self._inline:
789 yield self._writinghandles[0]
789 yield self._writinghandles[0]
790 else:
790 else:
791 yield self._writinghandles[1]
791 yield self._writinghandles[1]
792
792
793 # Otherwise open a new file handle.
793 # Otherwise open a new file handle.
794 else:
794 else:
795 if self._inline:
795 if self._inline:
796 func = self._indexfp
796 func = self._indexfp
797 else:
797 else:
798 func = self._datafp
798 func = self._datafp
799 with func() as fp:
799 with func() as fp:
800 yield fp
800 yield fp
801
801
802 def tiprev(self):
802 def tiprev(self):
803 return len(self.index) - 1
803 return len(self.index) - 1
804
804
805 def tip(self):
805 def tip(self):
806 return self.node(self.tiprev())
806 return self.node(self.tiprev())
807
807
808 def __contains__(self, rev):
808 def __contains__(self, rev):
809 return 0 <= rev < len(self)
809 return 0 <= rev < len(self)
810
810
811 def __len__(self):
811 def __len__(self):
812 return len(self.index)
812 return len(self.index)
813
813
814 def __iter__(self):
814 def __iter__(self):
815 return iter(pycompat.xrange(len(self)))
815 return iter(pycompat.xrange(len(self)))
816
816
817 def revs(self, start=0, stop=None):
817 def revs(self, start=0, stop=None):
818 """iterate over all rev in this revlog (from start to stop)"""
818 """iterate over all rev in this revlog (from start to stop)"""
819 return storageutil.iterrevs(len(self), start=start, stop=stop)
819 return storageutil.iterrevs(len(self), start=start, stop=stop)
820
820
821 @property
821 @property
822 def nodemap(self):
822 def nodemap(self):
823 msg = (
823 msg = (
824 b"revlog.nodemap is deprecated, "
824 b"revlog.nodemap is deprecated, "
825 b"use revlog.index.[has_node|rev|get_rev]"
825 b"use revlog.index.[has_node|rev|get_rev]"
826 )
826 )
827 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
827 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
828 return self.index.nodemap
828 return self.index.nodemap
829
829
830 @property
830 @property
831 def _nodecache(self):
831 def _nodecache(self):
832 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
832 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
833 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
833 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
834 return self.index.nodemap
834 return self.index.nodemap
835
835
836 def hasnode(self, node):
836 def hasnode(self, node):
837 try:
837 try:
838 self.rev(node)
838 self.rev(node)
839 return True
839 return True
840 except KeyError:
840 except KeyError:
841 return False
841 return False
842
842
843 def candelta(self, baserev, rev):
843 def candelta(self, baserev, rev):
844 """whether two revisions (baserev, rev) can be delta-ed or not"""
844 """whether two revisions (baserev, rev) can be delta-ed or not"""
845 # Disable delta if either rev requires a content-changing flag
845 # Disable delta if either rev requires a content-changing flag
846 # processor (ex. LFS). This is because such flag processor can alter
846 # processor (ex. LFS). This is because such flag processor can alter
847 # the rawtext content that the delta will be based on, and two clients
847 # the rawtext content that the delta will be based on, and two clients
848 # could have a same revlog node with different flags (i.e. different
848 # could have a same revlog node with different flags (i.e. different
849 # rawtext contents) and the delta could be incompatible.
849 # rawtext contents) and the delta could be incompatible.
850 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
850 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
851 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
851 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
852 ):
852 ):
853 return False
853 return False
854 return True
854 return True
855
855
856 def update_caches(self, transaction):
856 def update_caches(self, transaction):
857 if self._nodemap_file is not None:
857 if self._nodemap_file is not None:
858 if transaction is None:
858 if transaction is None:
859 nodemaputil.update_persistent_nodemap(self)
859 nodemaputil.update_persistent_nodemap(self)
860 else:
860 else:
861 nodemaputil.setup_persistent_nodemap(transaction, self)
861 nodemaputil.setup_persistent_nodemap(transaction, self)
862
862
863 def clearcaches(self):
863 def clearcaches(self):
864 self._revisioncache = None
864 self._revisioncache = None
865 self._chainbasecache.clear()
865 self._chainbasecache.clear()
866 self._chunkcache = (0, b'')
866 self._chunkcache = (0, b'')
867 self._pcache = {}
867 self._pcache = {}
868 self._nodemap_docket = None
868 self._nodemap_docket = None
869 self.index.clearcaches()
869 self.index.clearcaches()
870 # The python code is the one responsible for validating the docket, we
870 # The python code is the one responsible for validating the docket, we
871 # end up having to refresh it here.
871 # end up having to refresh it here.
872 use_nodemap = (
872 use_nodemap = (
873 not self._inline
873 not self._inline
874 and self._nodemap_file is not None
874 and self._nodemap_file is not None
875 and util.safehasattr(self.index, 'update_nodemap_data')
875 and util.safehasattr(self.index, 'update_nodemap_data')
876 )
876 )
877 if use_nodemap:
877 if use_nodemap:
878 nodemap_data = nodemaputil.persisted_data(self)
878 nodemap_data = nodemaputil.persisted_data(self)
879 if nodemap_data is not None:
879 if nodemap_data is not None:
880 self._nodemap_docket = nodemap_data[0]
880 self._nodemap_docket = nodemap_data[0]
881 self.index.update_nodemap_data(*nodemap_data)
881 self.index.update_nodemap_data(*nodemap_data)
882
882
883 def rev(self, node):
883 def rev(self, node):
884 try:
884 try:
885 return self.index.rev(node)
885 return self.index.rev(node)
886 except TypeError:
886 except TypeError:
887 raise
887 raise
888 except error.RevlogError:
888 except error.RevlogError:
889 # parsers.c radix tree lookup failed
889 # parsers.c radix tree lookup failed
890 if (
890 if (
891 node == self.nodeconstants.wdirid
891 node == self.nodeconstants.wdirid
892 or node in self.nodeconstants.wdirfilenodeids
892 or node in self.nodeconstants.wdirfilenodeids
893 ):
893 ):
894 raise error.WdirUnsupported
894 raise error.WdirUnsupported
895 raise error.LookupError(node, self.display_id, _(b'no node'))
895 raise error.LookupError(node, self.display_id, _(b'no node'))
896
896
897 # Accessors for index entries.
897 # Accessors for index entries.
898
898
899 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
899 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
900 # are flags.
900 # are flags.
901 def start(self, rev):
901 def start(self, rev):
902 return int(self.index[rev][0] >> 16)
902 return int(self.index[rev][0] >> 16)
903
903
904 def flags(self, rev):
904 def flags(self, rev):
905 return self.index[rev][0] & 0xFFFF
905 return self.index[rev][0] & 0xFFFF
906
906
907 def length(self, rev):
907 def length(self, rev):
908 return self.index[rev][1]
908 return self.index[rev][1]
909
909
910 def sidedata_length(self, rev):
910 def sidedata_length(self, rev):
911 if not self.hassidedata:
911 if not self.hassidedata:
912 return 0
912 return 0
913 return self.index[rev][9]
913 return self.index[rev][9]
914
914
915 def rawsize(self, rev):
915 def rawsize(self, rev):
916 """return the length of the uncompressed text for a given revision"""
916 """return the length of the uncompressed text for a given revision"""
917 l = self.index[rev][2]
917 l = self.index[rev][2]
918 if l >= 0:
918 if l >= 0:
919 return l
919 return l
920
920
921 t = self.rawdata(rev)
921 t = self.rawdata(rev)
922 return len(t)
922 return len(t)
923
923
924 def size(self, rev):
924 def size(self, rev):
925 """length of non-raw text (processed by a "read" flag processor)"""
925 """length of non-raw text (processed by a "read" flag processor)"""
926 # fast path: if no "read" flag processor could change the content,
926 # fast path: if no "read" flag processor could change the content,
927 # size is rawsize. note: ELLIPSIS is known to not change the content.
927 # size is rawsize. note: ELLIPSIS is known to not change the content.
928 flags = self.flags(rev)
928 flags = self.flags(rev)
929 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
929 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
930 return self.rawsize(rev)
930 return self.rawsize(rev)
931
931
932 return len(self.revision(rev, raw=False))
932 return len(self.revision(rev, raw=False))
933
933
934 def chainbase(self, rev):
934 def chainbase(self, rev):
935 base = self._chainbasecache.get(rev)
935 base = self._chainbasecache.get(rev)
936 if base is not None:
936 if base is not None:
937 return base
937 return base
938
938
939 index = self.index
939 index = self.index
940 iterrev = rev
940 iterrev = rev
941 base = index[iterrev][3]
941 base = index[iterrev][3]
942 while base != iterrev:
942 while base != iterrev:
943 iterrev = base
943 iterrev = base
944 base = index[iterrev][3]
944 base = index[iterrev][3]
945
945
946 self._chainbasecache[rev] = base
946 self._chainbasecache[rev] = base
947 return base
947 return base
948
948
949 def linkrev(self, rev):
949 def linkrev(self, rev):
950 return self.index[rev][4]
950 return self.index[rev][4]
951
951
952 def parentrevs(self, rev):
952 def parentrevs(self, rev):
953 try:
953 try:
954 entry = self.index[rev]
954 entry = self.index[rev]
955 except IndexError:
955 except IndexError:
956 if rev == wdirrev:
956 if rev == wdirrev:
957 raise error.WdirUnsupported
957 raise error.WdirUnsupported
958 raise
958 raise
959 if entry[5] == nullrev:
959 if entry[5] == nullrev:
960 return entry[6], entry[5]
960 return entry[6], entry[5]
961 else:
961 else:
962 return entry[5], entry[6]
962 return entry[5], entry[6]
963
963
964 # fast parentrevs(rev) where rev isn't filtered
964 # fast parentrevs(rev) where rev isn't filtered
965 _uncheckedparentrevs = parentrevs
965 _uncheckedparentrevs = parentrevs
966
966
967 def node(self, rev):
967 def node(self, rev):
968 try:
968 try:
969 return self.index[rev][7]
969 return self.index[rev][7]
970 except IndexError:
970 except IndexError:
971 if rev == wdirrev:
971 if rev == wdirrev:
972 raise error.WdirUnsupported
972 raise error.WdirUnsupported
973 raise
973 raise
974
974
975 # Derived from index values.
975 # Derived from index values.
976
976
977 def end(self, rev):
977 def end(self, rev):
978 return self.start(rev) + self.length(rev)
978 return self.start(rev) + self.length(rev)
979
979
980 def parents(self, node):
980 def parents(self, node):
981 i = self.index
981 i = self.index
982 d = i[self.rev(node)]
982 d = i[self.rev(node)]
983 # inline node() to avoid function call overhead
983 # inline node() to avoid function call overhead
984 if d[5] == self.nullid:
984 if d[5] == self.nullid:
985 return i[d[6]][7], i[d[5]][7]
985 return i[d[6]][7], i[d[5]][7]
986 else:
986 else:
987 return i[d[5]][7], i[d[6]][7]
987 return i[d[5]][7], i[d[6]][7]
988
988
989 def chainlen(self, rev):
989 def chainlen(self, rev):
990 return self._chaininfo(rev)[0]
990 return self._chaininfo(rev)[0]
991
991
992 def _chaininfo(self, rev):
992 def _chaininfo(self, rev):
993 chaininfocache = self._chaininfocache
993 chaininfocache = self._chaininfocache
994 if rev in chaininfocache:
994 if rev in chaininfocache:
995 return chaininfocache[rev]
995 return chaininfocache[rev]
996 index = self.index
996 index = self.index
997 generaldelta = self._generaldelta
997 generaldelta = self._generaldelta
998 iterrev = rev
998 iterrev = rev
999 e = index[iterrev]
999 e = index[iterrev]
1000 clen = 0
1000 clen = 0
1001 compresseddeltalen = 0
1001 compresseddeltalen = 0
1002 while iterrev != e[3]:
1002 while iterrev != e[3]:
1003 clen += 1
1003 clen += 1
1004 compresseddeltalen += e[1]
1004 compresseddeltalen += e[1]
1005 if generaldelta:
1005 if generaldelta:
1006 iterrev = e[3]
1006 iterrev = e[3]
1007 else:
1007 else:
1008 iterrev -= 1
1008 iterrev -= 1
1009 if iterrev in chaininfocache:
1009 if iterrev in chaininfocache:
1010 t = chaininfocache[iterrev]
1010 t = chaininfocache[iterrev]
1011 clen += t[0]
1011 clen += t[0]
1012 compresseddeltalen += t[1]
1012 compresseddeltalen += t[1]
1013 break
1013 break
1014 e = index[iterrev]
1014 e = index[iterrev]
1015 else:
1015 else:
1016 # Add text length of base since decompressing that also takes
1016 # Add text length of base since decompressing that also takes
1017 # work. For cache hits the length is already included.
1017 # work. For cache hits the length is already included.
1018 compresseddeltalen += e[1]
1018 compresseddeltalen += e[1]
1019 r = (clen, compresseddeltalen)
1019 r = (clen, compresseddeltalen)
1020 chaininfocache[rev] = r
1020 chaininfocache[rev] = r
1021 return r
1021 return r
1022
1022
1023 def _deltachain(self, rev, stoprev=None):
1023 def _deltachain(self, rev, stoprev=None):
1024 """Obtain the delta chain for a revision.
1024 """Obtain the delta chain for a revision.
1025
1025
1026 ``stoprev`` specifies a revision to stop at. If not specified, we
1026 ``stoprev`` specifies a revision to stop at. If not specified, we
1027 stop at the base of the chain.
1027 stop at the base of the chain.
1028
1028
1029 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1029 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1030 revs in ascending order and ``stopped`` is a bool indicating whether
1030 revs in ascending order and ``stopped`` is a bool indicating whether
1031 ``stoprev`` was hit.
1031 ``stoprev`` was hit.
1032 """
1032 """
1033 # Try C implementation.
1033 # Try C implementation.
1034 try:
1034 try:
1035 return self.index.deltachain(rev, stoprev, self._generaldelta)
1035 return self.index.deltachain(rev, stoprev, self._generaldelta)
1036 except AttributeError:
1036 except AttributeError:
1037 pass
1037 pass
1038
1038
1039 chain = []
1039 chain = []
1040
1040
1041 # Alias to prevent attribute lookup in tight loop.
1041 # Alias to prevent attribute lookup in tight loop.
1042 index = self.index
1042 index = self.index
1043 generaldelta = self._generaldelta
1043 generaldelta = self._generaldelta
1044
1044
1045 iterrev = rev
1045 iterrev = rev
1046 e = index[iterrev]
1046 e = index[iterrev]
1047 while iterrev != e[3] and iterrev != stoprev:
1047 while iterrev != e[3] and iterrev != stoprev:
1048 chain.append(iterrev)
1048 chain.append(iterrev)
1049 if generaldelta:
1049 if generaldelta:
1050 iterrev = e[3]
1050 iterrev = e[3]
1051 else:
1051 else:
1052 iterrev -= 1
1052 iterrev -= 1
1053 e = index[iterrev]
1053 e = index[iterrev]
1054
1054
1055 if iterrev == stoprev:
1055 if iterrev == stoprev:
1056 stopped = True
1056 stopped = True
1057 else:
1057 else:
1058 chain.append(iterrev)
1058 chain.append(iterrev)
1059 stopped = False
1059 stopped = False
1060
1060
1061 chain.reverse()
1061 chain.reverse()
1062 return chain, stopped
1062 return chain, stopped
1063
1063
1064 def ancestors(self, revs, stoprev=0, inclusive=False):
1064 def ancestors(self, revs, stoprev=0, inclusive=False):
1065 """Generate the ancestors of 'revs' in reverse revision order.
1065 """Generate the ancestors of 'revs' in reverse revision order.
1066 Does not generate revs lower than stoprev.
1066 Does not generate revs lower than stoprev.
1067
1067
1068 See the documentation for ancestor.lazyancestors for more details."""
1068 See the documentation for ancestor.lazyancestors for more details."""
1069
1069
1070 # first, make sure start revisions aren't filtered
1070 # first, make sure start revisions aren't filtered
1071 revs = list(revs)
1071 revs = list(revs)
1072 checkrev = self.node
1072 checkrev = self.node
1073 for r in revs:
1073 for r in revs:
1074 checkrev(r)
1074 checkrev(r)
1075 # and we're sure ancestors aren't filtered as well
1075 # and we're sure ancestors aren't filtered as well
1076
1076
1077 if rustancestor is not None and self.index.rust_ext_compat:
1077 if rustancestor is not None and self.index.rust_ext_compat:
1078 lazyancestors = rustancestor.LazyAncestors
1078 lazyancestors = rustancestor.LazyAncestors
1079 arg = self.index
1079 arg = self.index
1080 else:
1080 else:
1081 lazyancestors = ancestor.lazyancestors
1081 lazyancestors = ancestor.lazyancestors
1082 arg = self._uncheckedparentrevs
1082 arg = self._uncheckedparentrevs
1083 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1083 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1084
1084
1085 def descendants(self, revs):
1085 def descendants(self, revs):
1086 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1086 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1087
1087
1088 def findcommonmissing(self, common=None, heads=None):
1088 def findcommonmissing(self, common=None, heads=None):
1089 """Return a tuple of the ancestors of common and the ancestors of heads
1089 """Return a tuple of the ancestors of common and the ancestors of heads
1090 that are not ancestors of common. In revset terminology, we return the
1090 that are not ancestors of common. In revset terminology, we return the
1091 tuple:
1091 tuple:
1092
1092
1093 ::common, (::heads) - (::common)
1093 ::common, (::heads) - (::common)
1094
1094
1095 The list is sorted by revision number, meaning it is
1095 The list is sorted by revision number, meaning it is
1096 topologically sorted.
1096 topologically sorted.
1097
1097
1098 'heads' and 'common' are both lists of node IDs. If heads is
1098 'heads' and 'common' are both lists of node IDs. If heads is
1099 not supplied, uses all of the revlog's heads. If common is not
1099 not supplied, uses all of the revlog's heads. If common is not
1100 supplied, uses nullid."""
1100 supplied, uses nullid."""
1101 if common is None:
1101 if common is None:
1102 common = [self.nullid]
1102 common = [self.nullid]
1103 if heads is None:
1103 if heads is None:
1104 heads = self.heads()
1104 heads = self.heads()
1105
1105
1106 common = [self.rev(n) for n in common]
1106 common = [self.rev(n) for n in common]
1107 heads = [self.rev(n) for n in heads]
1107 heads = [self.rev(n) for n in heads]
1108
1108
1109 # we want the ancestors, but inclusive
1109 # we want the ancestors, but inclusive
1110 class lazyset(object):
1110 class lazyset(object):
1111 def __init__(self, lazyvalues):
1111 def __init__(self, lazyvalues):
1112 self.addedvalues = set()
1112 self.addedvalues = set()
1113 self.lazyvalues = lazyvalues
1113 self.lazyvalues = lazyvalues
1114
1114
1115 def __contains__(self, value):
1115 def __contains__(self, value):
1116 return value in self.addedvalues or value in self.lazyvalues
1116 return value in self.addedvalues or value in self.lazyvalues
1117
1117
1118 def __iter__(self):
1118 def __iter__(self):
1119 added = self.addedvalues
1119 added = self.addedvalues
1120 for r in added:
1120 for r in added:
1121 yield r
1121 yield r
1122 for r in self.lazyvalues:
1122 for r in self.lazyvalues:
1123 if not r in added:
1123 if not r in added:
1124 yield r
1124 yield r
1125
1125
1126 def add(self, value):
1126 def add(self, value):
1127 self.addedvalues.add(value)
1127 self.addedvalues.add(value)
1128
1128
1129 def update(self, values):
1129 def update(self, values):
1130 self.addedvalues.update(values)
1130 self.addedvalues.update(values)
1131
1131
1132 has = lazyset(self.ancestors(common))
1132 has = lazyset(self.ancestors(common))
1133 has.add(nullrev)
1133 has.add(nullrev)
1134 has.update(common)
1134 has.update(common)
1135
1135
1136 # take all ancestors from heads that aren't in has
1136 # take all ancestors from heads that aren't in has
1137 missing = set()
1137 missing = set()
1138 visit = collections.deque(r for r in heads if r not in has)
1138 visit = collections.deque(r for r in heads if r not in has)
1139 while visit:
1139 while visit:
1140 r = visit.popleft()
1140 r = visit.popleft()
1141 if r in missing:
1141 if r in missing:
1142 continue
1142 continue
1143 else:
1143 else:
1144 missing.add(r)
1144 missing.add(r)
1145 for p in self.parentrevs(r):
1145 for p in self.parentrevs(r):
1146 if p not in has:
1146 if p not in has:
1147 visit.append(p)
1147 visit.append(p)
1148 missing = list(missing)
1148 missing = list(missing)
1149 missing.sort()
1149 missing.sort()
1150 return has, [self.node(miss) for miss in missing]
1150 return has, [self.node(miss) for miss in missing]
1151
1151
1152 def incrementalmissingrevs(self, common=None):
1152 def incrementalmissingrevs(self, common=None):
1153 """Return an object that can be used to incrementally compute the
1153 """Return an object that can be used to incrementally compute the
1154 revision numbers of the ancestors of arbitrary sets that are not
1154 revision numbers of the ancestors of arbitrary sets that are not
1155 ancestors of common. This is an ancestor.incrementalmissingancestors
1155 ancestors of common. This is an ancestor.incrementalmissingancestors
1156 object.
1156 object.
1157
1157
1158 'common' is a list of revision numbers. If common is not supplied, uses
1158 'common' is a list of revision numbers. If common is not supplied, uses
1159 nullrev.
1159 nullrev.
1160 """
1160 """
1161 if common is None:
1161 if common is None:
1162 common = [nullrev]
1162 common = [nullrev]
1163
1163
1164 if rustancestor is not None and self.index.rust_ext_compat:
1164 if rustancestor is not None and self.index.rust_ext_compat:
1165 return rustancestor.MissingAncestors(self.index, common)
1165 return rustancestor.MissingAncestors(self.index, common)
1166 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1166 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1167
1167
1168 def findmissingrevs(self, common=None, heads=None):
1168 def findmissingrevs(self, common=None, heads=None):
1169 """Return the revision numbers of the ancestors of heads that
1169 """Return the revision numbers of the ancestors of heads that
1170 are not ancestors of common.
1170 are not ancestors of common.
1171
1171
1172 More specifically, return a list of revision numbers corresponding to
1172 More specifically, return a list of revision numbers corresponding to
1173 nodes N such that every N satisfies the following constraints:
1173 nodes N such that every N satisfies the following constraints:
1174
1174
1175 1. N is an ancestor of some node in 'heads'
1175 1. N is an ancestor of some node in 'heads'
1176 2. N is not an ancestor of any node in 'common'
1176 2. N is not an ancestor of any node in 'common'
1177
1177
1178 The list is sorted by revision number, meaning it is
1178 The list is sorted by revision number, meaning it is
1179 topologically sorted.
1179 topologically sorted.
1180
1180
1181 'heads' and 'common' are both lists of revision numbers. If heads is
1181 'heads' and 'common' are both lists of revision numbers. If heads is
1182 not supplied, uses all of the revlog's heads. If common is not
1182 not supplied, uses all of the revlog's heads. If common is not
1183 supplied, uses nullid."""
1183 supplied, uses nullid."""
1184 if common is None:
1184 if common is None:
1185 common = [nullrev]
1185 common = [nullrev]
1186 if heads is None:
1186 if heads is None:
1187 heads = self.headrevs()
1187 heads = self.headrevs()
1188
1188
1189 inc = self.incrementalmissingrevs(common=common)
1189 inc = self.incrementalmissingrevs(common=common)
1190 return inc.missingancestors(heads)
1190 return inc.missingancestors(heads)
1191
1191
1192 def findmissing(self, common=None, heads=None):
1192 def findmissing(self, common=None, heads=None):
1193 """Return the ancestors of heads that are not ancestors of common.
1193 """Return the ancestors of heads that are not ancestors of common.
1194
1194
1195 More specifically, return a list of nodes N such that every N
1195 More specifically, return a list of nodes N such that every N
1196 satisfies the following constraints:
1196 satisfies the following constraints:
1197
1197
1198 1. N is an ancestor of some node in 'heads'
1198 1. N is an ancestor of some node in 'heads'
1199 2. N is not an ancestor of any node in 'common'
1199 2. N is not an ancestor of any node in 'common'
1200
1200
1201 The list is sorted by revision number, meaning it is
1201 The list is sorted by revision number, meaning it is
1202 topologically sorted.
1202 topologically sorted.
1203
1203
1204 'heads' and 'common' are both lists of node IDs. If heads is
1204 'heads' and 'common' are both lists of node IDs. If heads is
1205 not supplied, uses all of the revlog's heads. If common is not
1205 not supplied, uses all of the revlog's heads. If common is not
1206 supplied, uses nullid."""
1206 supplied, uses nullid."""
1207 if common is None:
1207 if common is None:
1208 common = [self.nullid]
1208 common = [self.nullid]
1209 if heads is None:
1209 if heads is None:
1210 heads = self.heads()
1210 heads = self.heads()
1211
1211
1212 common = [self.rev(n) for n in common]
1212 common = [self.rev(n) for n in common]
1213 heads = [self.rev(n) for n in heads]
1213 heads = [self.rev(n) for n in heads]
1214
1214
1215 inc = self.incrementalmissingrevs(common=common)
1215 inc = self.incrementalmissingrevs(common=common)
1216 return [self.node(r) for r in inc.missingancestors(heads)]
1216 return [self.node(r) for r in inc.missingancestors(heads)]
1217
1217
1218 def nodesbetween(self, roots=None, heads=None):
1218 def nodesbetween(self, roots=None, heads=None):
1219 """Return a topological path from 'roots' to 'heads'.
1219 """Return a topological path from 'roots' to 'heads'.
1220
1220
1221 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1221 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1222 topologically sorted list of all nodes N that satisfy both of
1222 topologically sorted list of all nodes N that satisfy both of
1223 these constraints:
1223 these constraints:
1224
1224
1225 1. N is a descendant of some node in 'roots'
1225 1. N is a descendant of some node in 'roots'
1226 2. N is an ancestor of some node in 'heads'
1226 2. N is an ancestor of some node in 'heads'
1227
1227
1228 Every node is considered to be both a descendant and an ancestor
1228 Every node is considered to be both a descendant and an ancestor
1229 of itself, so every reachable node in 'roots' and 'heads' will be
1229 of itself, so every reachable node in 'roots' and 'heads' will be
1230 included in 'nodes'.
1230 included in 'nodes'.
1231
1231
1232 'outroots' is the list of reachable nodes in 'roots', i.e., the
1232 'outroots' is the list of reachable nodes in 'roots', i.e., the
1233 subset of 'roots' that is returned in 'nodes'. Likewise,
1233 subset of 'roots' that is returned in 'nodes'. Likewise,
1234 'outheads' is the subset of 'heads' that is also in 'nodes'.
1234 'outheads' is the subset of 'heads' that is also in 'nodes'.
1235
1235
1236 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1236 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1237 unspecified, uses nullid as the only root. If 'heads' is
1237 unspecified, uses nullid as the only root. If 'heads' is
1238 unspecified, uses list of all of the revlog's heads."""
1238 unspecified, uses list of all of the revlog's heads."""
1239 nonodes = ([], [], [])
1239 nonodes = ([], [], [])
1240 if roots is not None:
1240 if roots is not None:
1241 roots = list(roots)
1241 roots = list(roots)
1242 if not roots:
1242 if not roots:
1243 return nonodes
1243 return nonodes
1244 lowestrev = min([self.rev(n) for n in roots])
1244 lowestrev = min([self.rev(n) for n in roots])
1245 else:
1245 else:
1246 roots = [self.nullid] # Everybody's a descendant of nullid
1246 roots = [self.nullid] # Everybody's a descendant of nullid
1247 lowestrev = nullrev
1247 lowestrev = nullrev
1248 if (lowestrev == nullrev) and (heads is None):
1248 if (lowestrev == nullrev) and (heads is None):
1249 # We want _all_ the nodes!
1249 # We want _all_ the nodes!
1250 return (
1250 return (
1251 [self.node(r) for r in self],
1251 [self.node(r) for r in self],
1252 [self.nullid],
1252 [self.nullid],
1253 list(self.heads()),
1253 list(self.heads()),
1254 )
1254 )
1255 if heads is None:
1255 if heads is None:
1256 # All nodes are ancestors, so the latest ancestor is the last
1256 # All nodes are ancestors, so the latest ancestor is the last
1257 # node.
1257 # node.
1258 highestrev = len(self) - 1
1258 highestrev = len(self) - 1
1259 # Set ancestors to None to signal that every node is an ancestor.
1259 # Set ancestors to None to signal that every node is an ancestor.
1260 ancestors = None
1260 ancestors = None
1261 # Set heads to an empty dictionary for later discovery of heads
1261 # Set heads to an empty dictionary for later discovery of heads
1262 heads = {}
1262 heads = {}
1263 else:
1263 else:
1264 heads = list(heads)
1264 heads = list(heads)
1265 if not heads:
1265 if not heads:
1266 return nonodes
1266 return nonodes
1267 ancestors = set()
1267 ancestors = set()
1268 # Turn heads into a dictionary so we can remove 'fake' heads.
1268 # Turn heads into a dictionary so we can remove 'fake' heads.
1269 # Also, later we will be using it to filter out the heads we can't
1269 # Also, later we will be using it to filter out the heads we can't
1270 # find from roots.
1270 # find from roots.
1271 heads = dict.fromkeys(heads, False)
1271 heads = dict.fromkeys(heads, False)
1272 # Start at the top and keep marking parents until we're done.
1272 # Start at the top and keep marking parents until we're done.
1273 nodestotag = set(heads)
1273 nodestotag = set(heads)
1274 # Remember where the top was so we can use it as a limit later.
1274 # Remember where the top was so we can use it as a limit later.
1275 highestrev = max([self.rev(n) for n in nodestotag])
1275 highestrev = max([self.rev(n) for n in nodestotag])
1276 while nodestotag:
1276 while nodestotag:
1277 # grab a node to tag
1277 # grab a node to tag
1278 n = nodestotag.pop()
1278 n = nodestotag.pop()
1279 # Never tag nullid
1279 # Never tag nullid
1280 if n == self.nullid:
1280 if n == self.nullid:
1281 continue
1281 continue
1282 # A node's revision number represents its place in a
1282 # A node's revision number represents its place in a
1283 # topologically sorted list of nodes.
1283 # topologically sorted list of nodes.
1284 r = self.rev(n)
1284 r = self.rev(n)
1285 if r >= lowestrev:
1285 if r >= lowestrev:
1286 if n not in ancestors:
1286 if n not in ancestors:
1287 # If we are possibly a descendant of one of the roots
1287 # If we are possibly a descendant of one of the roots
1288 # and we haven't already been marked as an ancestor
1288 # and we haven't already been marked as an ancestor
1289 ancestors.add(n) # Mark as ancestor
1289 ancestors.add(n) # Mark as ancestor
1290 # Add non-nullid parents to list of nodes to tag.
1290 # Add non-nullid parents to list of nodes to tag.
1291 nodestotag.update(
1291 nodestotag.update(
1292 [p for p in self.parents(n) if p != self.nullid]
1292 [p for p in self.parents(n) if p != self.nullid]
1293 )
1293 )
1294 elif n in heads: # We've seen it before, is it a fake head?
1294 elif n in heads: # We've seen it before, is it a fake head?
1295 # So it is, real heads should not be the ancestors of
1295 # So it is, real heads should not be the ancestors of
1296 # any other heads.
1296 # any other heads.
1297 heads.pop(n)
1297 heads.pop(n)
1298 if not ancestors:
1298 if not ancestors:
1299 return nonodes
1299 return nonodes
1300 # Now that we have our set of ancestors, we want to remove any
1300 # Now that we have our set of ancestors, we want to remove any
1301 # roots that are not ancestors.
1301 # roots that are not ancestors.
1302
1302
1303 # If one of the roots was nullid, everything is included anyway.
1303 # If one of the roots was nullid, everything is included anyway.
1304 if lowestrev > nullrev:
1304 if lowestrev > nullrev:
1305 # But, since we weren't, let's recompute the lowest rev to not
1305 # But, since we weren't, let's recompute the lowest rev to not
1306 # include roots that aren't ancestors.
1306 # include roots that aren't ancestors.
1307
1307
1308 # Filter out roots that aren't ancestors of heads
1308 # Filter out roots that aren't ancestors of heads
1309 roots = [root for root in roots if root in ancestors]
1309 roots = [root for root in roots if root in ancestors]
1310 # Recompute the lowest revision
1310 # Recompute the lowest revision
1311 if roots:
1311 if roots:
1312 lowestrev = min([self.rev(root) for root in roots])
1312 lowestrev = min([self.rev(root) for root in roots])
1313 else:
1313 else:
1314 # No more roots? Return empty list
1314 # No more roots? Return empty list
1315 return nonodes
1315 return nonodes
1316 else:
1316 else:
1317 # We are descending from nullid, and don't need to care about
1317 # We are descending from nullid, and don't need to care about
1318 # any other roots.
1318 # any other roots.
1319 lowestrev = nullrev
1319 lowestrev = nullrev
1320 roots = [self.nullid]
1320 roots = [self.nullid]
1321 # Transform our roots list into a set.
1321 # Transform our roots list into a set.
1322 descendants = set(roots)
1322 descendants = set(roots)
1323 # Also, keep the original roots so we can filter out roots that aren't
1323 # Also, keep the original roots so we can filter out roots that aren't
1324 # 'real' roots (i.e. are descended from other roots).
1324 # 'real' roots (i.e. are descended from other roots).
1325 roots = descendants.copy()
1325 roots = descendants.copy()
1326 # Our topologically sorted list of output nodes.
1326 # Our topologically sorted list of output nodes.
1327 orderedout = []
1327 orderedout = []
1328 # Don't start at nullid since we don't want nullid in our output list,
1328 # Don't start at nullid since we don't want nullid in our output list,
1329 # and if nullid shows up in descendants, empty parents will look like
1329 # and if nullid shows up in descendants, empty parents will look like
1330 # they're descendants.
1330 # they're descendants.
1331 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1331 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1332 n = self.node(r)
1332 n = self.node(r)
1333 isdescendant = False
1333 isdescendant = False
1334 if lowestrev == nullrev: # Everybody is a descendant of nullid
1334 if lowestrev == nullrev: # Everybody is a descendant of nullid
1335 isdescendant = True
1335 isdescendant = True
1336 elif n in descendants:
1336 elif n in descendants:
1337 # n is already a descendant
1337 # n is already a descendant
1338 isdescendant = True
1338 isdescendant = True
1339 # This check only needs to be done here because all the roots
1339 # This check only needs to be done here because all the roots
1340 # will start being marked is descendants before the loop.
1340 # will start being marked is descendants before the loop.
1341 if n in roots:
1341 if n in roots:
1342 # If n was a root, check if it's a 'real' root.
1342 # If n was a root, check if it's a 'real' root.
1343 p = tuple(self.parents(n))
1343 p = tuple(self.parents(n))
1344 # If any of its parents are descendants, it's not a root.
1344 # If any of its parents are descendants, it's not a root.
1345 if (p[0] in descendants) or (p[1] in descendants):
1345 if (p[0] in descendants) or (p[1] in descendants):
1346 roots.remove(n)
1346 roots.remove(n)
1347 else:
1347 else:
1348 p = tuple(self.parents(n))
1348 p = tuple(self.parents(n))
1349 # A node is a descendant if either of its parents are
1349 # A node is a descendant if either of its parents are
1350 # descendants. (We seeded the dependents list with the roots
1350 # descendants. (We seeded the dependents list with the roots
1351 # up there, remember?)
1351 # up there, remember?)
1352 if (p[0] in descendants) or (p[1] in descendants):
1352 if (p[0] in descendants) or (p[1] in descendants):
1353 descendants.add(n)
1353 descendants.add(n)
1354 isdescendant = True
1354 isdescendant = True
1355 if isdescendant and ((ancestors is None) or (n in ancestors)):
1355 if isdescendant and ((ancestors is None) or (n in ancestors)):
1356 # Only include nodes that are both descendants and ancestors.
1356 # Only include nodes that are both descendants and ancestors.
1357 orderedout.append(n)
1357 orderedout.append(n)
1358 if (ancestors is not None) and (n in heads):
1358 if (ancestors is not None) and (n in heads):
1359 # We're trying to figure out which heads are reachable
1359 # We're trying to figure out which heads are reachable
1360 # from roots.
1360 # from roots.
1361 # Mark this head as having been reached
1361 # Mark this head as having been reached
1362 heads[n] = True
1362 heads[n] = True
1363 elif ancestors is None:
1363 elif ancestors is None:
1364 # Otherwise, we're trying to discover the heads.
1364 # Otherwise, we're trying to discover the heads.
1365 # Assume this is a head because if it isn't, the next step
1365 # Assume this is a head because if it isn't, the next step
1366 # will eventually remove it.
1366 # will eventually remove it.
1367 heads[n] = True
1367 heads[n] = True
1368 # But, obviously its parents aren't.
1368 # But, obviously its parents aren't.
1369 for p in self.parents(n):
1369 for p in self.parents(n):
1370 heads.pop(p, None)
1370 heads.pop(p, None)
1371 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1371 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1372 roots = list(roots)
1372 roots = list(roots)
1373 assert orderedout
1373 assert orderedout
1374 assert roots
1374 assert roots
1375 assert heads
1375 assert heads
1376 return (orderedout, roots, heads)
1376 return (orderedout, roots, heads)
1377
1377
1378 def headrevs(self, revs=None):
1378 def headrevs(self, revs=None):
1379 if revs is None:
1379 if revs is None:
1380 try:
1380 try:
1381 return self.index.headrevs()
1381 return self.index.headrevs()
1382 except AttributeError:
1382 except AttributeError:
1383 return self._headrevs()
1383 return self._headrevs()
1384 if rustdagop is not None and self.index.rust_ext_compat:
1384 if rustdagop is not None and self.index.rust_ext_compat:
1385 return rustdagop.headrevs(self.index, revs)
1385 return rustdagop.headrevs(self.index, revs)
1386 return dagop.headrevs(revs, self._uncheckedparentrevs)
1386 return dagop.headrevs(revs, self._uncheckedparentrevs)
1387
1387
1388 def computephases(self, roots):
1388 def computephases(self, roots):
1389 return self.index.computephasesmapsets(roots)
1389 return self.index.computephasesmapsets(roots)
1390
1390
1391 def _headrevs(self):
1391 def _headrevs(self):
1392 count = len(self)
1392 count = len(self)
1393 if not count:
1393 if not count:
1394 return [nullrev]
1394 return [nullrev]
1395 # we won't iter over filtered rev so nobody is a head at start
1395 # we won't iter over filtered rev so nobody is a head at start
1396 ishead = [0] * (count + 1)
1396 ishead = [0] * (count + 1)
1397 index = self.index
1397 index = self.index
1398 for r in self:
1398 for r in self:
1399 ishead[r] = 1 # I may be an head
1399 ishead[r] = 1 # I may be an head
1400 e = index[r]
1400 e = index[r]
1401 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1401 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1402 return [r for r, val in enumerate(ishead) if val]
1402 return [r for r, val in enumerate(ishead) if val]
1403
1403
1404 def heads(self, start=None, stop=None):
1404 def heads(self, start=None, stop=None):
1405 """return the list of all nodes that have no children
1405 """return the list of all nodes that have no children
1406
1406
1407 if start is specified, only heads that are descendants of
1407 if start is specified, only heads that are descendants of
1408 start will be returned
1408 start will be returned
1409 if stop is specified, it will consider all the revs from stop
1409 if stop is specified, it will consider all the revs from stop
1410 as if they had no children
1410 as if they had no children
1411 """
1411 """
1412 if start is None and stop is None:
1412 if start is None and stop is None:
1413 if not len(self):
1413 if not len(self):
1414 return [self.nullid]
1414 return [self.nullid]
1415 return [self.node(r) for r in self.headrevs()]
1415 return [self.node(r) for r in self.headrevs()]
1416
1416
1417 if start is None:
1417 if start is None:
1418 start = nullrev
1418 start = nullrev
1419 else:
1419 else:
1420 start = self.rev(start)
1420 start = self.rev(start)
1421
1421
1422 stoprevs = {self.rev(n) for n in stop or []}
1422 stoprevs = {self.rev(n) for n in stop or []}
1423
1423
1424 revs = dagop.headrevssubset(
1424 revs = dagop.headrevssubset(
1425 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1425 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1426 )
1426 )
1427
1427
1428 return [self.node(rev) for rev in revs]
1428 return [self.node(rev) for rev in revs]
1429
1429
1430 def children(self, node):
1430 def children(self, node):
1431 """find the children of a given node"""
1431 """find the children of a given node"""
1432 c = []
1432 c = []
1433 p = self.rev(node)
1433 p = self.rev(node)
1434 for r in self.revs(start=p + 1):
1434 for r in self.revs(start=p + 1):
1435 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1435 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1436 if prevs:
1436 if prevs:
1437 for pr in prevs:
1437 for pr in prevs:
1438 if pr == p:
1438 if pr == p:
1439 c.append(self.node(r))
1439 c.append(self.node(r))
1440 elif p == nullrev:
1440 elif p == nullrev:
1441 c.append(self.node(r))
1441 c.append(self.node(r))
1442 return c
1442 return c
1443
1443
1444 def commonancestorsheads(self, a, b):
1444 def commonancestorsheads(self, a, b):
1445 """calculate all the heads of the common ancestors of nodes a and b"""
1445 """calculate all the heads of the common ancestors of nodes a and b"""
1446 a, b = self.rev(a), self.rev(b)
1446 a, b = self.rev(a), self.rev(b)
1447 ancs = self._commonancestorsheads(a, b)
1447 ancs = self._commonancestorsheads(a, b)
1448 return pycompat.maplist(self.node, ancs)
1448 return pycompat.maplist(self.node, ancs)
1449
1449
1450 def _commonancestorsheads(self, *revs):
1450 def _commonancestorsheads(self, *revs):
1451 """calculate all the heads of the common ancestors of revs"""
1451 """calculate all the heads of the common ancestors of revs"""
1452 try:
1452 try:
1453 ancs = self.index.commonancestorsheads(*revs)
1453 ancs = self.index.commonancestorsheads(*revs)
1454 except (AttributeError, OverflowError): # C implementation failed
1454 except (AttributeError, OverflowError): # C implementation failed
1455 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1455 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1456 return ancs
1456 return ancs
1457
1457
1458 def isancestor(self, a, b):
1458 def isancestor(self, a, b):
1459 """return True if node a is an ancestor of node b
1459 """return True if node a is an ancestor of node b
1460
1460
1461 A revision is considered an ancestor of itself."""
1461 A revision is considered an ancestor of itself."""
1462 a, b = self.rev(a), self.rev(b)
1462 a, b = self.rev(a), self.rev(b)
1463 return self.isancestorrev(a, b)
1463 return self.isancestorrev(a, b)
1464
1464
1465 def isancestorrev(self, a, b):
1465 def isancestorrev(self, a, b):
1466 """return True if revision a is an ancestor of revision b
1466 """return True if revision a is an ancestor of revision b
1467
1467
1468 A revision is considered an ancestor of itself.
1468 A revision is considered an ancestor of itself.
1469
1469
1470 The implementation of this is trivial but the use of
1470 The implementation of this is trivial but the use of
1471 reachableroots is not."""
1471 reachableroots is not."""
1472 if a == nullrev:
1472 if a == nullrev:
1473 return True
1473 return True
1474 elif a == b:
1474 elif a == b:
1475 return True
1475 return True
1476 elif a > b:
1476 elif a > b:
1477 return False
1477 return False
1478 return bool(self.reachableroots(a, [b], [a], includepath=False))
1478 return bool(self.reachableroots(a, [b], [a], includepath=False))
1479
1479
1480 def reachableroots(self, minroot, heads, roots, includepath=False):
1480 def reachableroots(self, minroot, heads, roots, includepath=False):
1481 """return (heads(::(<roots> and <roots>::<heads>)))
1481 """return (heads(::(<roots> and <roots>::<heads>)))
1482
1482
1483 If includepath is True, return (<roots>::<heads>)."""
1483 If includepath is True, return (<roots>::<heads>)."""
1484 try:
1484 try:
1485 return self.index.reachableroots2(
1485 return self.index.reachableroots2(
1486 minroot, heads, roots, includepath
1486 minroot, heads, roots, includepath
1487 )
1487 )
1488 except AttributeError:
1488 except AttributeError:
1489 return dagop._reachablerootspure(
1489 return dagop._reachablerootspure(
1490 self.parentrevs, minroot, roots, heads, includepath
1490 self.parentrevs, minroot, roots, heads, includepath
1491 )
1491 )
1492
1492
1493 def ancestor(self, a, b):
1493 def ancestor(self, a, b):
1494 """calculate the "best" common ancestor of nodes a and b"""
1494 """calculate the "best" common ancestor of nodes a and b"""
1495
1495
1496 a, b = self.rev(a), self.rev(b)
1496 a, b = self.rev(a), self.rev(b)
1497 try:
1497 try:
1498 ancs = self.index.ancestors(a, b)
1498 ancs = self.index.ancestors(a, b)
1499 except (AttributeError, OverflowError):
1499 except (AttributeError, OverflowError):
1500 ancs = ancestor.ancestors(self.parentrevs, a, b)
1500 ancs = ancestor.ancestors(self.parentrevs, a, b)
1501 if ancs:
1501 if ancs:
1502 # choose a consistent winner when there's a tie
1502 # choose a consistent winner when there's a tie
1503 return min(map(self.node, ancs))
1503 return min(map(self.node, ancs))
1504 return self.nullid
1504 return self.nullid
1505
1505
1506 def _match(self, id):
1506 def _match(self, id):
1507 if isinstance(id, int):
1507 if isinstance(id, int):
1508 # rev
1508 # rev
1509 return self.node(id)
1509 return self.node(id)
1510 if len(id) == self.nodeconstants.nodelen:
1510 if len(id) == self.nodeconstants.nodelen:
1511 # possibly a binary node
1511 # possibly a binary node
1512 # odds of a binary node being all hex in ASCII are 1 in 10**25
1512 # odds of a binary node being all hex in ASCII are 1 in 10**25
1513 try:
1513 try:
1514 node = id
1514 node = id
1515 self.rev(node) # quick search the index
1515 self.rev(node) # quick search the index
1516 return node
1516 return node
1517 except error.LookupError:
1517 except error.LookupError:
1518 pass # may be partial hex id
1518 pass # may be partial hex id
1519 try:
1519 try:
1520 # str(rev)
1520 # str(rev)
1521 rev = int(id)
1521 rev = int(id)
1522 if b"%d" % rev != id:
1522 if b"%d" % rev != id:
1523 raise ValueError
1523 raise ValueError
1524 if rev < 0:
1524 if rev < 0:
1525 rev = len(self) + rev
1525 rev = len(self) + rev
1526 if rev < 0 or rev >= len(self):
1526 if rev < 0 or rev >= len(self):
1527 raise ValueError
1527 raise ValueError
1528 return self.node(rev)
1528 return self.node(rev)
1529 except (ValueError, OverflowError):
1529 except (ValueError, OverflowError):
1530 pass
1530 pass
1531 if len(id) == 2 * self.nodeconstants.nodelen:
1531 if len(id) == 2 * self.nodeconstants.nodelen:
1532 try:
1532 try:
1533 # a full hex nodeid?
1533 # a full hex nodeid?
1534 node = bin(id)
1534 node = bin(id)
1535 self.rev(node)
1535 self.rev(node)
1536 return node
1536 return node
1537 except (TypeError, error.LookupError):
1537 except (TypeError, error.LookupError):
1538 pass
1538 pass
1539
1539
1540 def _partialmatch(self, id):
1540 def _partialmatch(self, id):
1541 # we don't care wdirfilenodeids as they should be always full hash
1541 # we don't care wdirfilenodeids as they should be always full hash
1542 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1542 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1543 ambiguous = False
1543 ambiguous = False
1544 try:
1544 try:
1545 partial = self.index.partialmatch(id)
1545 partial = self.index.partialmatch(id)
1546 if partial and self.hasnode(partial):
1546 if partial and self.hasnode(partial):
1547 if maybewdir:
1547 if maybewdir:
1548 # single 'ff...' match in radix tree, ambiguous with wdir
1548 # single 'ff...' match in radix tree, ambiguous with wdir
1549 ambiguous = True
1549 ambiguous = True
1550 else:
1550 else:
1551 return partial
1551 return partial
1552 elif maybewdir:
1552 elif maybewdir:
1553 # no 'ff...' match in radix tree, wdir identified
1553 # no 'ff...' match in radix tree, wdir identified
1554 raise error.WdirUnsupported
1554 raise error.WdirUnsupported
1555 else:
1555 else:
1556 return None
1556 return None
1557 except error.RevlogError:
1557 except error.RevlogError:
1558 # parsers.c radix tree lookup gave multiple matches
1558 # parsers.c radix tree lookup gave multiple matches
1559 # fast path: for unfiltered changelog, radix tree is accurate
1559 # fast path: for unfiltered changelog, radix tree is accurate
1560 if not getattr(self, 'filteredrevs', None):
1560 if not getattr(self, 'filteredrevs', None):
1561 ambiguous = True
1561 ambiguous = True
1562 # fall through to slow path that filters hidden revisions
1562 # fall through to slow path that filters hidden revisions
1563 except (AttributeError, ValueError):
1563 except (AttributeError, ValueError):
1564 # we are pure python, or key was too short to search radix tree
1564 # we are pure python, or key was too short to search radix tree
1565 pass
1565 pass
1566 if ambiguous:
1566 if ambiguous:
1567 raise error.AmbiguousPrefixLookupError(
1567 raise error.AmbiguousPrefixLookupError(
1568 id, self.display_id, _(b'ambiguous identifier')
1568 id, self.display_id, _(b'ambiguous identifier')
1569 )
1569 )
1570
1570
1571 if id in self._pcache:
1571 if id in self._pcache:
1572 return self._pcache[id]
1572 return self._pcache[id]
1573
1573
1574 if len(id) <= 40:
1574 if len(id) <= 40:
1575 try:
1575 try:
1576 # hex(node)[:...]
1576 # hex(node)[:...]
1577 l = len(id) // 2 # grab an even number of digits
1577 l = len(id) // 2 # grab an even number of digits
1578 prefix = bin(id[: l * 2])
1578 prefix = bin(id[: l * 2])
1579 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1579 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1580 nl = [
1580 nl = [
1581 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1581 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1582 ]
1582 ]
1583 if self.nodeconstants.nullhex.startswith(id):
1583 if self.nodeconstants.nullhex.startswith(id):
1584 nl.append(self.nullid)
1584 nl.append(self.nullid)
1585 if len(nl) > 0:
1585 if len(nl) > 0:
1586 if len(nl) == 1 and not maybewdir:
1586 if len(nl) == 1 and not maybewdir:
1587 self._pcache[id] = nl[0]
1587 self._pcache[id] = nl[0]
1588 return nl[0]
1588 return nl[0]
1589 raise error.AmbiguousPrefixLookupError(
1589 raise error.AmbiguousPrefixLookupError(
1590 id, self.display_id, _(b'ambiguous identifier')
1590 id, self.display_id, _(b'ambiguous identifier')
1591 )
1591 )
1592 if maybewdir:
1592 if maybewdir:
1593 raise error.WdirUnsupported
1593 raise error.WdirUnsupported
1594 return None
1594 return None
1595 except TypeError:
1595 except TypeError:
1596 pass
1596 pass
1597
1597
1598 def lookup(self, id):
1598 def lookup(self, id):
1599 """locate a node based on:
1599 """locate a node based on:
1600 - revision number or str(revision number)
1600 - revision number or str(revision number)
1601 - nodeid or subset of hex nodeid
1601 - nodeid or subset of hex nodeid
1602 """
1602 """
1603 n = self._match(id)
1603 n = self._match(id)
1604 if n is not None:
1604 if n is not None:
1605 return n
1605 return n
1606 n = self._partialmatch(id)
1606 n = self._partialmatch(id)
1607 if n:
1607 if n:
1608 return n
1608 return n
1609
1609
1610 raise error.LookupError(id, self.display_id, _(b'no match found'))
1610 raise error.LookupError(id, self.display_id, _(b'no match found'))
1611
1611
1612 def shortest(self, node, minlength=1):
1612 def shortest(self, node, minlength=1):
1613 """Find the shortest unambiguous prefix that matches node."""
1613 """Find the shortest unambiguous prefix that matches node."""
1614
1614
1615 def isvalid(prefix):
1615 def isvalid(prefix):
1616 try:
1616 try:
1617 matchednode = self._partialmatch(prefix)
1617 matchednode = self._partialmatch(prefix)
1618 except error.AmbiguousPrefixLookupError:
1618 except error.AmbiguousPrefixLookupError:
1619 return False
1619 return False
1620 except error.WdirUnsupported:
1620 except error.WdirUnsupported:
1621 # single 'ff...' match
1621 # single 'ff...' match
1622 return True
1622 return True
1623 if matchednode is None:
1623 if matchednode is None:
1624 raise error.LookupError(node, self.display_id, _(b'no node'))
1624 raise error.LookupError(node, self.display_id, _(b'no node'))
1625 return True
1625 return True
1626
1626
1627 def maybewdir(prefix):
1627 def maybewdir(prefix):
1628 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1628 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1629
1629
1630 hexnode = hex(node)
1630 hexnode = hex(node)
1631
1631
1632 def disambiguate(hexnode, minlength):
1632 def disambiguate(hexnode, minlength):
1633 """Disambiguate against wdirid."""
1633 """Disambiguate against wdirid."""
1634 for length in range(minlength, len(hexnode) + 1):
1634 for length in range(minlength, len(hexnode) + 1):
1635 prefix = hexnode[:length]
1635 prefix = hexnode[:length]
1636 if not maybewdir(prefix):
1636 if not maybewdir(prefix):
1637 return prefix
1637 return prefix
1638
1638
1639 if not getattr(self, 'filteredrevs', None):
1639 if not getattr(self, 'filteredrevs', None):
1640 try:
1640 try:
1641 length = max(self.index.shortest(node), minlength)
1641 length = max(self.index.shortest(node), minlength)
1642 return disambiguate(hexnode, length)
1642 return disambiguate(hexnode, length)
1643 except error.RevlogError:
1643 except error.RevlogError:
1644 if node != self.nodeconstants.wdirid:
1644 if node != self.nodeconstants.wdirid:
1645 raise error.LookupError(
1645 raise error.LookupError(
1646 node, self.display_id, _(b'no node')
1646 node, self.display_id, _(b'no node')
1647 )
1647 )
1648 except AttributeError:
1648 except AttributeError:
1649 # Fall through to pure code
1649 # Fall through to pure code
1650 pass
1650 pass
1651
1651
1652 if node == self.nodeconstants.wdirid:
1652 if node == self.nodeconstants.wdirid:
1653 for length in range(minlength, len(hexnode) + 1):
1653 for length in range(minlength, len(hexnode) + 1):
1654 prefix = hexnode[:length]
1654 prefix = hexnode[:length]
1655 if isvalid(prefix):
1655 if isvalid(prefix):
1656 return prefix
1656 return prefix
1657
1657
1658 for length in range(minlength, len(hexnode) + 1):
1658 for length in range(minlength, len(hexnode) + 1):
1659 prefix = hexnode[:length]
1659 prefix = hexnode[:length]
1660 if isvalid(prefix):
1660 if isvalid(prefix):
1661 return disambiguate(hexnode, length)
1661 return disambiguate(hexnode, length)
1662
1662
1663 def cmp(self, node, text):
1663 def cmp(self, node, text):
1664 """compare text with a given file revision
1664 """compare text with a given file revision
1665
1665
1666 returns True if text is different than what is stored.
1666 returns True if text is different than what is stored.
1667 """
1667 """
1668 p1, p2 = self.parents(node)
1668 p1, p2 = self.parents(node)
1669 return storageutil.hashrevisionsha1(text, p1, p2) != node
1669 return storageutil.hashrevisionsha1(text, p1, p2) != node
1670
1670
1671 def _cachesegment(self, offset, data):
1671 def _cachesegment(self, offset, data):
1672 """Add a segment to the revlog cache.
1672 """Add a segment to the revlog cache.
1673
1673
1674 Accepts an absolute offset and the data that is at that location.
1674 Accepts an absolute offset and the data that is at that location.
1675 """
1675 """
1676 o, d = self._chunkcache
1676 o, d = self._chunkcache
1677 # try to add to existing cache
1677 # try to add to existing cache
1678 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1678 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1679 self._chunkcache = o, d + data
1679 self._chunkcache = o, d + data
1680 else:
1680 else:
1681 self._chunkcache = offset, data
1681 self._chunkcache = offset, data
1682
1682
1683 def _readsegment(self, offset, length, df=None):
1683 def _readsegment(self, offset, length, df=None):
1684 """Load a segment of raw data from the revlog.
1684 """Load a segment of raw data from the revlog.
1685
1685
1686 Accepts an absolute offset, length to read, and an optional existing
1686 Accepts an absolute offset, length to read, and an optional existing
1687 file handle to read from.
1687 file handle to read from.
1688
1688
1689 If an existing file handle is passed, it will be seeked and the
1689 If an existing file handle is passed, it will be seeked and the
1690 original seek position will NOT be restored.
1690 original seek position will NOT be restored.
1691
1691
1692 Returns a str or buffer of raw byte data.
1692 Returns a str or buffer of raw byte data.
1693
1693
1694 Raises if the requested number of bytes could not be read.
1694 Raises if the requested number of bytes could not be read.
1695 """
1695 """
1696 # Cache data both forward and backward around the requested
1696 # Cache data both forward and backward around the requested
1697 # data, in a fixed size window. This helps speed up operations
1697 # data, in a fixed size window. This helps speed up operations
1698 # involving reading the revlog backwards.
1698 # involving reading the revlog backwards.
1699 cachesize = self._chunkcachesize
1699 cachesize = self._chunkcachesize
1700 realoffset = offset & ~(cachesize - 1)
1700 realoffset = offset & ~(cachesize - 1)
1701 reallength = (
1701 reallength = (
1702 (offset + length + cachesize) & ~(cachesize - 1)
1702 (offset + length + cachesize) & ~(cachesize - 1)
1703 ) - realoffset
1703 ) - realoffset
1704 with self._datareadfp(df) as df:
1704 with self._datareadfp(df) as df:
1705 df.seek(realoffset)
1705 df.seek(realoffset)
1706 d = df.read(reallength)
1706 d = df.read(reallength)
1707
1707
1708 self._cachesegment(realoffset, d)
1708 self._cachesegment(realoffset, d)
1709 if offset != realoffset or reallength != length:
1709 if offset != realoffset or reallength != length:
1710 startoffset = offset - realoffset
1710 startoffset = offset - realoffset
1711 if len(d) - startoffset < length:
1711 if len(d) - startoffset < length:
1712 raise error.RevlogError(
1712 raise error.RevlogError(
1713 _(
1713 _(
1714 b'partial read of revlog %s; expected %d bytes from '
1714 b'partial read of revlog %s; expected %d bytes from '
1715 b'offset %d, got %d'
1715 b'offset %d, got %d'
1716 )
1716 )
1717 % (
1717 % (
1718 self._indexfile if self._inline else self._datafile,
1718 self._indexfile if self._inline else self._datafile,
1719 length,
1719 length,
1720 offset,
1720 offset,
1721 len(d) - startoffset,
1721 len(d) - startoffset,
1722 )
1722 )
1723 )
1723 )
1724
1724
1725 return util.buffer(d, startoffset, length)
1725 return util.buffer(d, startoffset, length)
1726
1726
1727 if len(d) < length:
1727 if len(d) < length:
1728 raise error.RevlogError(
1728 raise error.RevlogError(
1729 _(
1729 _(
1730 b'partial read of revlog %s; expected %d bytes from offset '
1730 b'partial read of revlog %s; expected %d bytes from offset '
1731 b'%d, got %d'
1731 b'%d, got %d'
1732 )
1732 )
1733 % (
1733 % (
1734 self._indexfile if self._inline else self._datafile,
1734 self._indexfile if self._inline else self._datafile,
1735 length,
1735 length,
1736 offset,
1736 offset,
1737 len(d),
1737 len(d),
1738 )
1738 )
1739 )
1739 )
1740
1740
1741 return d
1741 return d
1742
1742
1743 def _getsegment(self, offset, length, df=None):
1743 def _getsegment(self, offset, length, df=None):
1744 """Obtain a segment of raw data from the revlog.
1744 """Obtain a segment of raw data from the revlog.
1745
1745
1746 Accepts an absolute offset, length of bytes to obtain, and an
1746 Accepts an absolute offset, length of bytes to obtain, and an
1747 optional file handle to the already-opened revlog. If the file
1747 optional file handle to the already-opened revlog. If the file
1748 handle is used, it's original seek position will not be preserved.
1748 handle is used, it's original seek position will not be preserved.
1749
1749
1750 Requests for data may be returned from a cache.
1750 Requests for data may be returned from a cache.
1751
1751
1752 Returns a str or a buffer instance of raw byte data.
1752 Returns a str or a buffer instance of raw byte data.
1753 """
1753 """
1754 o, d = self._chunkcache
1754 o, d = self._chunkcache
1755 l = len(d)
1755 l = len(d)
1756
1756
1757 # is it in the cache?
1757 # is it in the cache?
1758 cachestart = offset - o
1758 cachestart = offset - o
1759 cacheend = cachestart + length
1759 cacheend = cachestart + length
1760 if cachestart >= 0 and cacheend <= l:
1760 if cachestart >= 0 and cacheend <= l:
1761 if cachestart == 0 and cacheend == l:
1761 if cachestart == 0 and cacheend == l:
1762 return d # avoid a copy
1762 return d # avoid a copy
1763 return util.buffer(d, cachestart, cacheend - cachestart)
1763 return util.buffer(d, cachestart, cacheend - cachestart)
1764
1764
1765 return self._readsegment(offset, length, df=df)
1765 return self._readsegment(offset, length, df=df)
1766
1766
1767 def _getsegmentforrevs(self, startrev, endrev, df=None):
1767 def _getsegmentforrevs(self, startrev, endrev, df=None):
1768 """Obtain a segment of raw data corresponding to a range of revisions.
1768 """Obtain a segment of raw data corresponding to a range of revisions.
1769
1769
1770 Accepts the start and end revisions and an optional already-open
1770 Accepts the start and end revisions and an optional already-open
1771 file handle to be used for reading. If the file handle is read, its
1771 file handle to be used for reading. If the file handle is read, its
1772 seek position will not be preserved.
1772 seek position will not be preserved.
1773
1773
1774 Requests for data may be satisfied by a cache.
1774 Requests for data may be satisfied by a cache.
1775
1775
1776 Returns a 2-tuple of (offset, data) for the requested range of
1776 Returns a 2-tuple of (offset, data) for the requested range of
1777 revisions. Offset is the integer offset from the beginning of the
1777 revisions. Offset is the integer offset from the beginning of the
1778 revlog and data is a str or buffer of the raw byte data.
1778 revlog and data is a str or buffer of the raw byte data.
1779
1779
1780 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1780 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1781 to determine where each revision's data begins and ends.
1781 to determine where each revision's data begins and ends.
1782 """
1782 """
1783 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1783 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1784 # (functions are expensive).
1784 # (functions are expensive).
1785 index = self.index
1785 index = self.index
1786 istart = index[startrev]
1786 istart = index[startrev]
1787 start = int(istart[0] >> 16)
1787 start = int(istart[0] >> 16)
1788 if startrev == endrev:
1788 if startrev == endrev:
1789 end = start + istart[1]
1789 end = start + istart[1]
1790 else:
1790 else:
1791 iend = index[endrev]
1791 iend = index[endrev]
1792 end = int(iend[0] >> 16) + iend[1]
1792 end = int(iend[0] >> 16) + iend[1]
1793
1793
1794 if self._inline:
1794 if self._inline:
1795 start += (startrev + 1) * self.index.entry_size
1795 start += (startrev + 1) * self.index.entry_size
1796 end += (endrev + 1) * self.index.entry_size
1796 end += (endrev + 1) * self.index.entry_size
1797 length = end - start
1797 length = end - start
1798
1798
1799 return start, self._getsegment(start, length, df=df)
1799 return start, self._getsegment(start, length, df=df)
1800
1800
1801 def _chunk(self, rev, df=None):
1801 def _chunk(self, rev, df=None):
1802 """Obtain a single decompressed chunk for a revision.
1802 """Obtain a single decompressed chunk for a revision.
1803
1803
1804 Accepts an integer revision and an optional already-open file handle
1804 Accepts an integer revision and an optional already-open file handle
1805 to be used for reading. If used, the seek position of the file will not
1805 to be used for reading. If used, the seek position of the file will not
1806 be preserved.
1806 be preserved.
1807
1807
1808 Returns a str holding uncompressed data for the requested revision.
1808 Returns a str holding uncompressed data for the requested revision.
1809 """
1809 """
1810 compression_mode = self.index[rev][10]
1810 compression_mode = self.index[rev][10]
1811 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1811 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1812 if compression_mode == COMP_MODE_PLAIN:
1812 if compression_mode == COMP_MODE_PLAIN:
1813 return data
1813 return data
1814 elif compression_mode == COMP_MODE_DEFAULT:
1814 elif compression_mode == COMP_MODE_DEFAULT:
1815 return self._decompressor(data)
1815 return self._decompressor(data)
1816 elif compression_mode == COMP_MODE_INLINE:
1816 elif compression_mode == COMP_MODE_INLINE:
1817 return self.decompress(data)
1817 return self.decompress(data)
1818 else:
1818 else:
1819 msg = 'unknown compression mode %d'
1819 msg = 'unknown compression mode %d'
1820 msg %= compression_mode
1820 msg %= compression_mode
1821 raise error.RevlogError(msg)
1821 raise error.RevlogError(msg)
1822
1822
1823 def _chunks(self, revs, df=None, targetsize=None):
1823 def _chunks(self, revs, df=None, targetsize=None):
1824 """Obtain decompressed chunks for the specified revisions.
1824 """Obtain decompressed chunks for the specified revisions.
1825
1825
1826 Accepts an iterable of numeric revisions that are assumed to be in
1826 Accepts an iterable of numeric revisions that are assumed to be in
1827 ascending order. Also accepts an optional already-open file handle
1827 ascending order. Also accepts an optional already-open file handle
1828 to be used for reading. If used, the seek position of the file will
1828 to be used for reading. If used, the seek position of the file will
1829 not be preserved.
1829 not be preserved.
1830
1830
1831 This function is similar to calling ``self._chunk()`` multiple times,
1831 This function is similar to calling ``self._chunk()`` multiple times,
1832 but is faster.
1832 but is faster.
1833
1833
1834 Returns a list with decompressed data for each requested revision.
1834 Returns a list with decompressed data for each requested revision.
1835 """
1835 """
1836 if not revs:
1836 if not revs:
1837 return []
1837 return []
1838 start = self.start
1838 start = self.start
1839 length = self.length
1839 length = self.length
1840 inline = self._inline
1840 inline = self._inline
1841 iosize = self.index.entry_size
1841 iosize = self.index.entry_size
1842 buffer = util.buffer
1842 buffer = util.buffer
1843
1843
1844 l = []
1844 l = []
1845 ladd = l.append
1845 ladd = l.append
1846
1846
1847 if not self._withsparseread:
1847 if not self._withsparseread:
1848 slicedchunks = (revs,)
1848 slicedchunks = (revs,)
1849 else:
1849 else:
1850 slicedchunks = deltautil.slicechunk(
1850 slicedchunks = deltautil.slicechunk(
1851 self, revs, targetsize=targetsize
1851 self, revs, targetsize=targetsize
1852 )
1852 )
1853
1853
1854 for revschunk in slicedchunks:
1854 for revschunk in slicedchunks:
1855 firstrev = revschunk[0]
1855 firstrev = revschunk[0]
1856 # Skip trailing revisions with empty diff
1856 # Skip trailing revisions with empty diff
1857 for lastrev in revschunk[::-1]:
1857 for lastrev in revschunk[::-1]:
1858 if length(lastrev) != 0:
1858 if length(lastrev) != 0:
1859 break
1859 break
1860
1860
1861 try:
1861 try:
1862 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1862 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1863 except OverflowError:
1863 except OverflowError:
1864 # issue4215 - we can't cache a run of chunks greater than
1864 # issue4215 - we can't cache a run of chunks greater than
1865 # 2G on Windows
1865 # 2G on Windows
1866 return [self._chunk(rev, df=df) for rev in revschunk]
1866 return [self._chunk(rev, df=df) for rev in revschunk]
1867
1867
1868 decomp = self.decompress
1868 decomp = self.decompress
1869 # self._decompressor might be None, but will not be used in that case
1869 # self._decompressor might be None, but will not be used in that case
1870 def_decomp = self._decompressor
1870 def_decomp = self._decompressor
1871 for rev in revschunk:
1871 for rev in revschunk:
1872 chunkstart = start(rev)
1872 chunkstart = start(rev)
1873 if inline:
1873 if inline:
1874 chunkstart += (rev + 1) * iosize
1874 chunkstart += (rev + 1) * iosize
1875 chunklength = length(rev)
1875 chunklength = length(rev)
1876 comp_mode = self.index[rev][10]
1876 comp_mode = self.index[rev][10]
1877 c = buffer(data, chunkstart - offset, chunklength)
1877 c = buffer(data, chunkstart - offset, chunklength)
1878 if comp_mode == COMP_MODE_PLAIN:
1878 if comp_mode == COMP_MODE_PLAIN:
1879 ladd(c)
1879 ladd(c)
1880 elif comp_mode == COMP_MODE_INLINE:
1880 elif comp_mode == COMP_MODE_INLINE:
1881 ladd(decomp(c))
1881 ladd(decomp(c))
1882 elif comp_mode == COMP_MODE_DEFAULT:
1882 elif comp_mode == COMP_MODE_DEFAULT:
1883 ladd(def_decomp(c))
1883 ladd(def_decomp(c))
1884 else:
1884 else:
1885 msg = 'unknown compression mode %d'
1885 msg = 'unknown compression mode %d'
1886 msg %= comp_mode
1886 msg %= comp_mode
1887 raise error.RevlogError(msg)
1887 raise error.RevlogError(msg)
1888
1888
1889 return l
1889 return l
1890
1890
1891 def _chunkclear(self):
1891 def _chunkclear(self):
1892 """Clear the raw chunk cache."""
1892 """Clear the raw chunk cache."""
1893 self._chunkcache = (0, b'')
1893 self._chunkcache = (0, b'')
1894
1894
1895 def deltaparent(self, rev):
1895 def deltaparent(self, rev):
1896 """return deltaparent of the given revision"""
1896 """return deltaparent of the given revision"""
1897 base = self.index[rev][3]
1897 base = self.index[rev][3]
1898 if base == rev:
1898 if base == rev:
1899 return nullrev
1899 return nullrev
1900 elif self._generaldelta:
1900 elif self._generaldelta:
1901 return base
1901 return base
1902 else:
1902 else:
1903 return rev - 1
1903 return rev - 1
1904
1904
1905 def issnapshot(self, rev):
1905 def issnapshot(self, rev):
1906 """tells whether rev is a snapshot"""
1906 """tells whether rev is a snapshot"""
1907 if not self._sparserevlog:
1907 if not self._sparserevlog:
1908 return self.deltaparent(rev) == nullrev
1908 return self.deltaparent(rev) == nullrev
1909 elif util.safehasattr(self.index, b'issnapshot'):
1909 elif util.safehasattr(self.index, b'issnapshot'):
1910 # directly assign the method to cache the testing and access
1910 # directly assign the method to cache the testing and access
1911 self.issnapshot = self.index.issnapshot
1911 self.issnapshot = self.index.issnapshot
1912 return self.issnapshot(rev)
1912 return self.issnapshot(rev)
1913 if rev == nullrev:
1913 if rev == nullrev:
1914 return True
1914 return True
1915 entry = self.index[rev]
1915 entry = self.index[rev]
1916 base = entry[3]
1916 base = entry[3]
1917 if base == rev:
1917 if base == rev:
1918 return True
1918 return True
1919 if base == nullrev:
1919 if base == nullrev:
1920 return True
1920 return True
1921 p1 = entry[5]
1921 p1 = entry[5]
1922 p2 = entry[6]
1922 p2 = entry[6]
1923 if base == p1 or base == p2:
1923 if base == p1 or base == p2:
1924 return False
1924 return False
1925 return self.issnapshot(base)
1925 return self.issnapshot(base)
1926
1926
1927 def snapshotdepth(self, rev):
1927 def snapshotdepth(self, rev):
1928 """number of snapshot in the chain before this one"""
1928 """number of snapshot in the chain before this one"""
1929 if not self.issnapshot(rev):
1929 if not self.issnapshot(rev):
1930 raise error.ProgrammingError(b'revision %d not a snapshot')
1930 raise error.ProgrammingError(b'revision %d not a snapshot')
1931 return len(self._deltachain(rev)[0]) - 1
1931 return len(self._deltachain(rev)[0]) - 1
1932
1932
1933 def revdiff(self, rev1, rev2):
1933 def revdiff(self, rev1, rev2):
1934 """return or calculate a delta between two revisions
1934 """return or calculate a delta between two revisions
1935
1935
1936 The delta calculated is in binary form and is intended to be written to
1936 The delta calculated is in binary form and is intended to be written to
1937 revlog data directly. So this function needs raw revision data.
1937 revlog data directly. So this function needs raw revision data.
1938 """
1938 """
1939 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1939 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1940 return bytes(self._chunk(rev2))
1940 return bytes(self._chunk(rev2))
1941
1941
1942 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1942 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1943
1943
1944 def _processflags(self, text, flags, operation, raw=False):
1944 def _processflags(self, text, flags, operation, raw=False):
1945 """deprecated entry point to access flag processors"""
1945 """deprecated entry point to access flag processors"""
1946 msg = b'_processflag(...) use the specialized variant'
1946 msg = b'_processflag(...) use the specialized variant'
1947 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1947 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1948 if raw:
1948 if raw:
1949 return text, flagutil.processflagsraw(self, text, flags)
1949 return text, flagutil.processflagsraw(self, text, flags)
1950 elif operation == b'read':
1950 elif operation == b'read':
1951 return flagutil.processflagsread(self, text, flags)
1951 return flagutil.processflagsread(self, text, flags)
1952 else: # write operation
1952 else: # write operation
1953 return flagutil.processflagswrite(self, text, flags)
1953 return flagutil.processflagswrite(self, text, flags)
1954
1954
1955 def revision(self, nodeorrev, _df=None, raw=False):
1955 def revision(self, nodeorrev, _df=None, raw=False):
1956 """return an uncompressed revision of a given node or revision
1956 """return an uncompressed revision of a given node or revision
1957 number.
1957 number.
1958
1958
1959 _df - an existing file handle to read from. (internal-only)
1959 _df - an existing file handle to read from. (internal-only)
1960 raw - an optional argument specifying if the revision data is to be
1960 raw - an optional argument specifying if the revision data is to be
1961 treated as raw data when applying flag transforms. 'raw' should be set
1961 treated as raw data when applying flag transforms. 'raw' should be set
1962 to True when generating changegroups or in debug commands.
1962 to True when generating changegroups or in debug commands.
1963 """
1963 """
1964 if raw:
1964 if raw:
1965 msg = (
1965 msg = (
1966 b'revlog.revision(..., raw=True) is deprecated, '
1966 b'revlog.revision(..., raw=True) is deprecated, '
1967 b'use revlog.rawdata(...)'
1967 b'use revlog.rawdata(...)'
1968 )
1968 )
1969 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1969 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1970 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1970 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1971
1971
1972 def sidedata(self, nodeorrev, _df=None):
1972 def sidedata(self, nodeorrev, _df=None):
1973 """a map of extra data related to the changeset but not part of the hash
1973 """a map of extra data related to the changeset but not part of the hash
1974
1974
1975 This function currently return a dictionary. However, more advanced
1975 This function currently return a dictionary. However, more advanced
1976 mapping object will likely be used in the future for a more
1976 mapping object will likely be used in the future for a more
1977 efficient/lazy code.
1977 efficient/lazy code.
1978 """
1978 """
1979 return self._revisiondata(nodeorrev, _df)[1]
1979 return self._revisiondata(nodeorrev, _df)[1]
1980
1980
1981 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1981 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1982 # deal with <nodeorrev> argument type
1982 # deal with <nodeorrev> argument type
1983 if isinstance(nodeorrev, int):
1983 if isinstance(nodeorrev, int):
1984 rev = nodeorrev
1984 rev = nodeorrev
1985 node = self.node(rev)
1985 node = self.node(rev)
1986 else:
1986 else:
1987 node = nodeorrev
1987 node = nodeorrev
1988 rev = None
1988 rev = None
1989
1989
1990 # fast path the special `nullid` rev
1990 # fast path the special `nullid` rev
1991 if node == self.nullid:
1991 if node == self.nullid:
1992 return b"", {}
1992 return b"", {}
1993
1993
1994 # ``rawtext`` is the text as stored inside the revlog. Might be the
1994 # ``rawtext`` is the text as stored inside the revlog. Might be the
1995 # revision or might need to be processed to retrieve the revision.
1995 # revision or might need to be processed to retrieve the revision.
1996 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1996 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1997
1997
1998 if self.hassidedata:
1998 if self.hassidedata:
1999 if rev is None:
1999 if rev is None:
2000 rev = self.rev(node)
2000 rev = self.rev(node)
2001 sidedata = self._sidedata(rev)
2001 sidedata = self._sidedata(rev)
2002 else:
2002 else:
2003 sidedata = {}
2003 sidedata = {}
2004
2004
2005 if raw and validated:
2005 if raw and validated:
2006 # if we don't want to process the raw text and that raw
2006 # if we don't want to process the raw text and that raw
2007 # text is cached, we can exit early.
2007 # text is cached, we can exit early.
2008 return rawtext, sidedata
2008 return rawtext, sidedata
2009 if rev is None:
2009 if rev is None:
2010 rev = self.rev(node)
2010 rev = self.rev(node)
2011 # the revlog's flag for this revision
2011 # the revlog's flag for this revision
2012 # (usually alter its state or content)
2012 # (usually alter its state or content)
2013 flags = self.flags(rev)
2013 flags = self.flags(rev)
2014
2014
2015 if validated and flags == REVIDX_DEFAULT_FLAGS:
2015 if validated and flags == REVIDX_DEFAULT_FLAGS:
2016 # no extra flags set, no flag processor runs, text = rawtext
2016 # no extra flags set, no flag processor runs, text = rawtext
2017 return rawtext, sidedata
2017 return rawtext, sidedata
2018
2018
2019 if raw:
2019 if raw:
2020 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2020 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2021 text = rawtext
2021 text = rawtext
2022 else:
2022 else:
2023 r = flagutil.processflagsread(self, rawtext, flags)
2023 r = flagutil.processflagsread(self, rawtext, flags)
2024 text, validatehash = r
2024 text, validatehash = r
2025 if validatehash:
2025 if validatehash:
2026 self.checkhash(text, node, rev=rev)
2026 self.checkhash(text, node, rev=rev)
2027 if not validated:
2027 if not validated:
2028 self._revisioncache = (node, rev, rawtext)
2028 self._revisioncache = (node, rev, rawtext)
2029
2029
2030 return text, sidedata
2030 return text, sidedata
2031
2031
2032 def _rawtext(self, node, rev, _df=None):
2032 def _rawtext(self, node, rev, _df=None):
2033 """return the possibly unvalidated rawtext for a revision
2033 """return the possibly unvalidated rawtext for a revision
2034
2034
2035 returns (rev, rawtext, validated)
2035 returns (rev, rawtext, validated)
2036 """
2036 """
2037
2037
2038 # revision in the cache (could be useful to apply delta)
2038 # revision in the cache (could be useful to apply delta)
2039 cachedrev = None
2039 cachedrev = None
2040 # An intermediate text to apply deltas to
2040 # An intermediate text to apply deltas to
2041 basetext = None
2041 basetext = None
2042
2042
2043 # Check if we have the entry in cache
2043 # Check if we have the entry in cache
2044 # The cache entry looks like (node, rev, rawtext)
2044 # The cache entry looks like (node, rev, rawtext)
2045 if self._revisioncache:
2045 if self._revisioncache:
2046 if self._revisioncache[0] == node:
2046 if self._revisioncache[0] == node:
2047 return (rev, self._revisioncache[2], True)
2047 return (rev, self._revisioncache[2], True)
2048 cachedrev = self._revisioncache[1]
2048 cachedrev = self._revisioncache[1]
2049
2049
2050 if rev is None:
2050 if rev is None:
2051 rev = self.rev(node)
2051 rev = self.rev(node)
2052
2052
2053 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2053 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2054 if stopped:
2054 if stopped:
2055 basetext = self._revisioncache[2]
2055 basetext = self._revisioncache[2]
2056
2056
2057 # drop cache to save memory, the caller is expected to
2057 # drop cache to save memory, the caller is expected to
2058 # update self._revisioncache after validating the text
2058 # update self._revisioncache after validating the text
2059 self._revisioncache = None
2059 self._revisioncache = None
2060
2060
2061 targetsize = None
2061 targetsize = None
2062 rawsize = self.index[rev][2]
2062 rawsize = self.index[rev][2]
2063 if 0 <= rawsize:
2063 if 0 <= rawsize:
2064 targetsize = 4 * rawsize
2064 targetsize = 4 * rawsize
2065
2065
2066 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2066 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2067 if basetext is None:
2067 if basetext is None:
2068 basetext = bytes(bins[0])
2068 basetext = bytes(bins[0])
2069 bins = bins[1:]
2069 bins = bins[1:]
2070
2070
2071 rawtext = mdiff.patches(basetext, bins)
2071 rawtext = mdiff.patches(basetext, bins)
2072 del basetext # let us have a chance to free memory early
2072 del basetext # let us have a chance to free memory early
2073 return (rev, rawtext, False)
2073 return (rev, rawtext, False)
2074
2074
2075 def _sidedata(self, rev):
2075 def _sidedata(self, rev):
2076 """Return the sidedata for a given revision number."""
2076 """Return the sidedata for a given revision number."""
2077 index_entry = self.index[rev]
2077 index_entry = self.index[rev]
2078 sidedata_offset = index_entry[8]
2078 sidedata_offset = index_entry[8]
2079 sidedata_size = index_entry[9]
2079 sidedata_size = index_entry[9]
2080
2080
2081 if self._inline:
2081 if self._inline:
2082 sidedata_offset += self.index.entry_size * (1 + rev)
2082 sidedata_offset += self.index.entry_size * (1 + rev)
2083 if sidedata_size == 0:
2083 if sidedata_size == 0:
2084 return {}
2084 return {}
2085
2085
2086 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2086 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2087 comp = self.index[rev][11]
2087 comp = self.index[rev][11]
2088 if comp == COMP_MODE_PLAIN:
2088 if comp == COMP_MODE_PLAIN:
2089 segment = comp_segment
2089 segment = comp_segment
2090 elif comp == COMP_MODE_DEFAULT:
2090 elif comp == COMP_MODE_DEFAULT:
2091 segment = self._decompressor(comp_segment)
2091 segment = self._decompressor(comp_segment)
2092 elif comp == COMP_MODE_INLINE:
2092 elif comp == COMP_MODE_INLINE:
2093 segment = self.decompress(comp_segment)
2093 segment = self.decompress(comp_segment)
2094 else:
2094 else:
2095 msg = 'unknown compression mode %d'
2095 msg = 'unknown compression mode %d'
2096 msg %= comp
2096 msg %= comp
2097 raise error.RevlogError(msg)
2097 raise error.RevlogError(msg)
2098
2098
2099 sidedata = sidedatautil.deserialize_sidedata(segment)
2099 sidedata = sidedatautil.deserialize_sidedata(segment)
2100 return sidedata
2100 return sidedata
2101
2101
2102 def rawdata(self, nodeorrev, _df=None):
2102 def rawdata(self, nodeorrev, _df=None):
2103 """return an uncompressed raw data of a given node or revision number.
2103 """return an uncompressed raw data of a given node or revision number.
2104
2104
2105 _df - an existing file handle to read from. (internal-only)
2105 _df - an existing file handle to read from. (internal-only)
2106 """
2106 """
2107 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2107 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2108
2108
2109 def hash(self, text, p1, p2):
2109 def hash(self, text, p1, p2):
2110 """Compute a node hash.
2110 """Compute a node hash.
2111
2111
2112 Available as a function so that subclasses can replace the hash
2112 Available as a function so that subclasses can replace the hash
2113 as needed.
2113 as needed.
2114 """
2114 """
2115 return storageutil.hashrevisionsha1(text, p1, p2)
2115 return storageutil.hashrevisionsha1(text, p1, p2)
2116
2116
2117 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2117 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2118 """Check node hash integrity.
2118 """Check node hash integrity.
2119
2119
2120 Available as a function so that subclasses can extend hash mismatch
2120 Available as a function so that subclasses can extend hash mismatch
2121 behaviors as needed.
2121 behaviors as needed.
2122 """
2122 """
2123 try:
2123 try:
2124 if p1 is None and p2 is None:
2124 if p1 is None and p2 is None:
2125 p1, p2 = self.parents(node)
2125 p1, p2 = self.parents(node)
2126 if node != self.hash(text, p1, p2):
2126 if node != self.hash(text, p1, p2):
2127 # Clear the revision cache on hash failure. The revision cache
2127 # Clear the revision cache on hash failure. The revision cache
2128 # only stores the raw revision and clearing the cache does have
2128 # only stores the raw revision and clearing the cache does have
2129 # the side-effect that we won't have a cache hit when the raw
2129 # the side-effect that we won't have a cache hit when the raw
2130 # revision data is accessed. But this case should be rare and
2130 # revision data is accessed. But this case should be rare and
2131 # it is extra work to teach the cache about the hash
2131 # it is extra work to teach the cache about the hash
2132 # verification state.
2132 # verification state.
2133 if self._revisioncache and self._revisioncache[0] == node:
2133 if self._revisioncache and self._revisioncache[0] == node:
2134 self._revisioncache = None
2134 self._revisioncache = None
2135
2135
2136 revornode = rev
2136 revornode = rev
2137 if revornode is None:
2137 if revornode is None:
2138 revornode = templatefilters.short(hex(node))
2138 revornode = templatefilters.short(hex(node))
2139 raise error.RevlogError(
2139 raise error.RevlogError(
2140 _(b"integrity check failed on %s:%s")
2140 _(b"integrity check failed on %s:%s")
2141 % (self.display_id, pycompat.bytestr(revornode))
2141 % (self.display_id, pycompat.bytestr(revornode))
2142 )
2142 )
2143 except error.RevlogError:
2143 except error.RevlogError:
2144 if self._censorable and storageutil.iscensoredtext(text):
2144 if self._censorable and storageutil.iscensoredtext(text):
2145 raise error.CensoredNodeError(self.display_id, node, text)
2145 raise error.CensoredNodeError(self.display_id, node, text)
2146 raise
2146 raise
2147
2147
2148 def _enforceinlinesize(self, tr):
2148 def _enforceinlinesize(self, tr):
2149 """Check if the revlog is too big for inline and convert if so.
2149 """Check if the revlog is too big for inline and convert if so.
2150
2150
2151 This should be called after revisions are added to the revlog. If the
2151 This should be called after revisions are added to the revlog. If the
2152 revlog has grown too large to be an inline revlog, it will convert it
2152 revlog has grown too large to be an inline revlog, it will convert it
2153 to use multiple index and data files.
2153 to use multiple index and data files.
2154 """
2154 """
2155 tiprev = len(self) - 1
2155 tiprev = len(self) - 1
2156 total_size = self.start(tiprev) + self.length(tiprev)
2156 total_size = self.start(tiprev) + self.length(tiprev)
2157 if not self._inline or total_size < _maxinline:
2157 if not self._inline or total_size < _maxinline:
2158 return
2158 return
2159
2159
2160 troffset = tr.findoffset(self._indexfile)
2160 troffset = tr.findoffset(self._indexfile)
2161 if troffset is None:
2161 if troffset is None:
2162 raise error.RevlogError(
2162 raise error.RevlogError(
2163 _(b"%s not found in the transaction") % self._indexfile
2163 _(b"%s not found in the transaction") % self._indexfile
2164 )
2164 )
2165 trindex = 0
2165 trindex = 0
2166 tr.add(self._datafile, 0)
2166 tr.add(self._datafile, 0)
2167
2167
2168 existing_handles = False
2168 existing_handles = False
2169 if self._writinghandles is not None:
2169 if self._writinghandles is not None:
2170 existing_handles = True
2170 existing_handles = True
2171 fp = self._writinghandles[0]
2171 fp = self._writinghandles[0]
2172 fp.flush()
2172 fp.flush()
2173 fp.close()
2173 fp.close()
2174 # We can't use the cached file handle after close(). So prevent
2174 # We can't use the cached file handle after close(). So prevent
2175 # its usage.
2175 # its usage.
2176 self._writinghandles = None
2176 self._writinghandles = None
2177
2177
2178 new_dfh = self._datafp(b'w+')
2178 new_dfh = self._datafp(b'w+')
2179 new_dfh.truncate(0) # drop any potentially existing data
2179 new_dfh.truncate(0) # drop any potentially existing data
2180 try:
2180 try:
2181 with self._indexfp() as read_ifh:
2181 with self._indexfp() as read_ifh:
2182 for r in self:
2182 for r in self:
2183 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2183 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2184 if troffset <= self.start(r) + r * self.index.entry_size:
2184 if troffset <= self.start(r) + r * self.index.entry_size:
2185 trindex = r
2185 trindex = r
2186 new_dfh.flush()
2186 new_dfh.flush()
2187
2187
2188 with self.__index_new_fp() as fp:
2188 with self.__index_new_fp() as fp:
2189 self._format_flags &= ~FLAG_INLINE_DATA
2189 self._format_flags &= ~FLAG_INLINE_DATA
2190 self._inline = False
2190 self._inline = False
2191 for i in self:
2191 for i in self:
2192 e = self.index.entry_binary(i)
2192 e = self.index.entry_binary(i)
2193 if i == 0 and self._docket is None:
2193 if i == 0 and self._docket is None:
2194 header = self._format_flags | self._format_version
2194 header = self._format_flags | self._format_version
2195 header = self.index.pack_header(header)
2195 header = self.index.pack_header(header)
2196 e = header + e
2196 e = header + e
2197 fp.write(e)
2197 fp.write(e)
2198 if self._docket is not None:
2198 if self._docket is not None:
2199 self._docket.index_end = fp.tell()
2199 self._docket.index_end = fp.tell()
2200
2200
2201 # There is a small transactional race here. If the rename of
2201 # There is a small transactional race here. If the rename of
2202 # the index fails, we should remove the datafile. It is more
2202 # the index fails, we should remove the datafile. It is more
2203 # important to ensure that the data file is not truncated
2203 # important to ensure that the data file is not truncated
2204 # when the index is replaced as otherwise data is lost.
2204 # when the index is replaced as otherwise data is lost.
2205 tr.replace(self._datafile, self.start(trindex))
2205 tr.replace(self._datafile, self.start(trindex))
2206
2206
2207 # the temp file replace the real index when we exit the context
2207 # the temp file replace the real index when we exit the context
2208 # manager
2208 # manager
2209
2209
2210 tr.replace(self._indexfile, trindex * self.index.entry_size)
2210 tr.replace(self._indexfile, trindex * self.index.entry_size)
2211 nodemaputil.setup_persistent_nodemap(tr, self)
2211 nodemaputil.setup_persistent_nodemap(tr, self)
2212 self._chunkclear()
2212 self._chunkclear()
2213
2213
2214 if existing_handles:
2214 if existing_handles:
2215 # switched from inline to conventional reopen the index
2215 # switched from inline to conventional reopen the index
2216 ifh = self.__index_write_fp()
2216 ifh = self.__index_write_fp()
2217 self._writinghandles = (ifh, new_dfh)
2217 self._writinghandles = (ifh, new_dfh)
2218 new_dfh = None
2218 new_dfh = None
2219 finally:
2219 finally:
2220 if new_dfh is not None:
2220 if new_dfh is not None:
2221 new_dfh.close()
2221 new_dfh.close()
2222
2222
2223 def _nodeduplicatecallback(self, transaction, node):
2223 def _nodeduplicatecallback(self, transaction, node):
2224 """called when trying to add a node already stored."""
2224 """called when trying to add a node already stored."""
2225
2225
2226 @contextlib.contextmanager
2226 @contextlib.contextmanager
2227 def _writing(self, transaction):
2227 def _writing(self, transaction):
2228 if self._trypending:
2228 if self._trypending:
2229 msg = b'try to write in a `trypending` revlog: %s'
2229 msg = b'try to write in a `trypending` revlog: %s'
2230 msg %= self.display_id
2230 msg %= self.display_id
2231 raise error.ProgrammingError(msg)
2231 raise error.ProgrammingError(msg)
2232 if self._writinghandles is not None:
2232 if self._writinghandles is not None:
2233 yield
2233 yield
2234 else:
2234 else:
2235 ifh = dfh = None
2235 ifh = dfh = None
2236 try:
2236 try:
2237 r = len(self)
2237 r = len(self)
2238 # opening the data file.
2238 # opening the data file.
2239 dsize = 0
2239 dsize = 0
2240 if r:
2240 if r:
2241 dsize = self.end(r - 1)
2241 dsize = self.end(r - 1)
2242 dfh = None
2242 dfh = None
2243 if not self._inline:
2243 if not self._inline:
2244 try:
2244 try:
2245 dfh = self._datafp(b"r+")
2245 dfh = self._datafp(b"r+")
2246 if self._docket is None:
2246 if self._docket is None:
2247 dfh.seek(0, os.SEEK_END)
2247 dfh.seek(0, os.SEEK_END)
2248 else:
2248 else:
2249 dfh.seek(self._docket.data_end, os.SEEK_SET)
2249 dfh.seek(self._docket.data_end, os.SEEK_SET)
2250 except IOError as inst:
2250 except IOError as inst:
2251 if inst.errno != errno.ENOENT:
2251 if inst.errno != errno.ENOENT:
2252 raise
2252 raise
2253 dfh = self._datafp(b"w+")
2253 dfh = self._datafp(b"w+")
2254 transaction.add(self._datafile, dsize)
2254 transaction.add(self._datafile, dsize)
2255
2255
2256 # opening the index file.
2256 # opening the index file.
2257 isize = r * self.index.entry_size
2257 isize = r * self.index.entry_size
2258 ifh = self.__index_write_fp()
2258 ifh = self.__index_write_fp()
2259 if self._inline:
2259 if self._inline:
2260 transaction.add(self._indexfile, dsize + isize)
2260 transaction.add(self._indexfile, dsize + isize)
2261 else:
2261 else:
2262 transaction.add(self._indexfile, isize)
2262 transaction.add(self._indexfile, isize)
2263 # exposing all file handle for writing.
2263 # exposing all file handle for writing.
2264 self._writinghandles = (ifh, dfh)
2264 self._writinghandles = (ifh, dfh)
2265 yield
2265 yield
2266 if self._docket is not None:
2266 if self._docket is not None:
2267 self._write_docket(transaction)
2267 self._write_docket(transaction)
2268 finally:
2268 finally:
2269 self._writinghandles = None
2269 self._writinghandles = None
2270 if dfh is not None:
2271 dfh.close()
2272 # closing the index file last to avoid exposing referent to
2273 # potential unflushed data content.
2270 if ifh is not None:
2274 if ifh is not None:
2271 ifh.close()
2275 ifh.close()
2272 if dfh is not None:
2273 dfh.close()
2274
2276
2275 def _write_docket(self, transaction):
2277 def _write_docket(self, transaction):
2276 """write the current docket on disk
2278 """write the current docket on disk
2277
2279
2278 Exist as a method to help changelog to implement transaction logic
2280 Exist as a method to help changelog to implement transaction logic
2279
2281
2280 We could also imagine using the same transaction logic for all revlog
2282 We could also imagine using the same transaction logic for all revlog
2281 since docket are cheap."""
2283 since docket are cheap."""
2282 self._docket.write(transaction)
2284 self._docket.write(transaction)
2283
2285
2284 def addrevision(
2286 def addrevision(
2285 self,
2287 self,
2286 text,
2288 text,
2287 transaction,
2289 transaction,
2288 link,
2290 link,
2289 p1,
2291 p1,
2290 p2,
2292 p2,
2291 cachedelta=None,
2293 cachedelta=None,
2292 node=None,
2294 node=None,
2293 flags=REVIDX_DEFAULT_FLAGS,
2295 flags=REVIDX_DEFAULT_FLAGS,
2294 deltacomputer=None,
2296 deltacomputer=None,
2295 sidedata=None,
2297 sidedata=None,
2296 ):
2298 ):
2297 """add a revision to the log
2299 """add a revision to the log
2298
2300
2299 text - the revision data to add
2301 text - the revision data to add
2300 transaction - the transaction object used for rollback
2302 transaction - the transaction object used for rollback
2301 link - the linkrev data to add
2303 link - the linkrev data to add
2302 p1, p2 - the parent nodeids of the revision
2304 p1, p2 - the parent nodeids of the revision
2303 cachedelta - an optional precomputed delta
2305 cachedelta - an optional precomputed delta
2304 node - nodeid of revision; typically node is not specified, and it is
2306 node - nodeid of revision; typically node is not specified, and it is
2305 computed by default as hash(text, p1, p2), however subclasses might
2307 computed by default as hash(text, p1, p2), however subclasses might
2306 use different hashing method (and override checkhash() in such case)
2308 use different hashing method (and override checkhash() in such case)
2307 flags - the known flags to set on the revision
2309 flags - the known flags to set on the revision
2308 deltacomputer - an optional deltacomputer instance shared between
2310 deltacomputer - an optional deltacomputer instance shared between
2309 multiple calls
2311 multiple calls
2310 """
2312 """
2311 if link == nullrev:
2313 if link == nullrev:
2312 raise error.RevlogError(
2314 raise error.RevlogError(
2313 _(b"attempted to add linkrev -1 to %s") % self.display_id
2315 _(b"attempted to add linkrev -1 to %s") % self.display_id
2314 )
2316 )
2315
2317
2316 if sidedata is None:
2318 if sidedata is None:
2317 sidedata = {}
2319 sidedata = {}
2318 elif sidedata and not self.hassidedata:
2320 elif sidedata and not self.hassidedata:
2319 raise error.ProgrammingError(
2321 raise error.ProgrammingError(
2320 _(b"trying to add sidedata to a revlog who don't support them")
2322 _(b"trying to add sidedata to a revlog who don't support them")
2321 )
2323 )
2322
2324
2323 if flags:
2325 if flags:
2324 node = node or self.hash(text, p1, p2)
2326 node = node or self.hash(text, p1, p2)
2325
2327
2326 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2328 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2327
2329
2328 # If the flag processor modifies the revision data, ignore any provided
2330 # If the flag processor modifies the revision data, ignore any provided
2329 # cachedelta.
2331 # cachedelta.
2330 if rawtext != text:
2332 if rawtext != text:
2331 cachedelta = None
2333 cachedelta = None
2332
2334
2333 if len(rawtext) > _maxentrysize:
2335 if len(rawtext) > _maxentrysize:
2334 raise error.RevlogError(
2336 raise error.RevlogError(
2335 _(
2337 _(
2336 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2338 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2337 )
2339 )
2338 % (self.display_id, len(rawtext))
2340 % (self.display_id, len(rawtext))
2339 )
2341 )
2340
2342
2341 node = node or self.hash(rawtext, p1, p2)
2343 node = node or self.hash(rawtext, p1, p2)
2342 rev = self.index.get_rev(node)
2344 rev = self.index.get_rev(node)
2343 if rev is not None:
2345 if rev is not None:
2344 return rev
2346 return rev
2345
2347
2346 if validatehash:
2348 if validatehash:
2347 self.checkhash(rawtext, node, p1=p1, p2=p2)
2349 self.checkhash(rawtext, node, p1=p1, p2=p2)
2348
2350
2349 return self.addrawrevision(
2351 return self.addrawrevision(
2350 rawtext,
2352 rawtext,
2351 transaction,
2353 transaction,
2352 link,
2354 link,
2353 p1,
2355 p1,
2354 p2,
2356 p2,
2355 node,
2357 node,
2356 flags,
2358 flags,
2357 cachedelta=cachedelta,
2359 cachedelta=cachedelta,
2358 deltacomputer=deltacomputer,
2360 deltacomputer=deltacomputer,
2359 sidedata=sidedata,
2361 sidedata=sidedata,
2360 )
2362 )
2361
2363
2362 def addrawrevision(
2364 def addrawrevision(
2363 self,
2365 self,
2364 rawtext,
2366 rawtext,
2365 transaction,
2367 transaction,
2366 link,
2368 link,
2367 p1,
2369 p1,
2368 p2,
2370 p2,
2369 node,
2371 node,
2370 flags,
2372 flags,
2371 cachedelta=None,
2373 cachedelta=None,
2372 deltacomputer=None,
2374 deltacomputer=None,
2373 sidedata=None,
2375 sidedata=None,
2374 ):
2376 ):
2375 """add a raw revision with known flags, node and parents
2377 """add a raw revision with known flags, node and parents
2376 useful when reusing a revision not stored in this revlog (ex: received
2378 useful when reusing a revision not stored in this revlog (ex: received
2377 over wire, or read from an external bundle).
2379 over wire, or read from an external bundle).
2378 """
2380 """
2379 with self._writing(transaction):
2381 with self._writing(transaction):
2380 return self._addrevision(
2382 return self._addrevision(
2381 node,
2383 node,
2382 rawtext,
2384 rawtext,
2383 transaction,
2385 transaction,
2384 link,
2386 link,
2385 p1,
2387 p1,
2386 p2,
2388 p2,
2387 flags,
2389 flags,
2388 cachedelta,
2390 cachedelta,
2389 deltacomputer=deltacomputer,
2391 deltacomputer=deltacomputer,
2390 sidedata=sidedata,
2392 sidedata=sidedata,
2391 )
2393 )
2392
2394
2393 def compress(self, data):
2395 def compress(self, data):
2394 """Generate a possibly-compressed representation of data."""
2396 """Generate a possibly-compressed representation of data."""
2395 if not data:
2397 if not data:
2396 return b'', data
2398 return b'', data
2397
2399
2398 compressed = self._compressor.compress(data)
2400 compressed = self._compressor.compress(data)
2399
2401
2400 if compressed:
2402 if compressed:
2401 # The revlog compressor added the header in the returned data.
2403 # The revlog compressor added the header in the returned data.
2402 return b'', compressed
2404 return b'', compressed
2403
2405
2404 if data[0:1] == b'\0':
2406 if data[0:1] == b'\0':
2405 return b'', data
2407 return b'', data
2406 return b'u', data
2408 return b'u', data
2407
2409
2408 def decompress(self, data):
2410 def decompress(self, data):
2409 """Decompress a revlog chunk.
2411 """Decompress a revlog chunk.
2410
2412
2411 The chunk is expected to begin with a header identifying the
2413 The chunk is expected to begin with a header identifying the
2412 format type so it can be routed to an appropriate decompressor.
2414 format type so it can be routed to an appropriate decompressor.
2413 """
2415 """
2414 if not data:
2416 if not data:
2415 return data
2417 return data
2416
2418
2417 # Revlogs are read much more frequently than they are written and many
2419 # Revlogs are read much more frequently than they are written and many
2418 # chunks only take microseconds to decompress, so performance is
2420 # chunks only take microseconds to decompress, so performance is
2419 # important here.
2421 # important here.
2420 #
2422 #
2421 # We can make a few assumptions about revlogs:
2423 # We can make a few assumptions about revlogs:
2422 #
2424 #
2423 # 1) the majority of chunks will be compressed (as opposed to inline
2425 # 1) the majority of chunks will be compressed (as opposed to inline
2424 # raw data).
2426 # raw data).
2425 # 2) decompressing *any* data will likely by at least 10x slower than
2427 # 2) decompressing *any* data will likely by at least 10x slower than
2426 # returning raw inline data.
2428 # returning raw inline data.
2427 # 3) we want to prioritize common and officially supported compression
2429 # 3) we want to prioritize common and officially supported compression
2428 # engines
2430 # engines
2429 #
2431 #
2430 # It follows that we want to optimize for "decompress compressed data
2432 # It follows that we want to optimize for "decompress compressed data
2431 # when encoded with common and officially supported compression engines"
2433 # when encoded with common and officially supported compression engines"
2432 # case over "raw data" and "data encoded by less common or non-official
2434 # case over "raw data" and "data encoded by less common or non-official
2433 # compression engines." That is why we have the inline lookup first
2435 # compression engines." That is why we have the inline lookup first
2434 # followed by the compengines lookup.
2436 # followed by the compengines lookup.
2435 #
2437 #
2436 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2438 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2437 # compressed chunks. And this matters for changelog and manifest reads.
2439 # compressed chunks. And this matters for changelog and manifest reads.
2438 t = data[0:1]
2440 t = data[0:1]
2439
2441
2440 if t == b'x':
2442 if t == b'x':
2441 try:
2443 try:
2442 return _zlibdecompress(data)
2444 return _zlibdecompress(data)
2443 except zlib.error as e:
2445 except zlib.error as e:
2444 raise error.RevlogError(
2446 raise error.RevlogError(
2445 _(b'revlog decompress error: %s')
2447 _(b'revlog decompress error: %s')
2446 % stringutil.forcebytestr(e)
2448 % stringutil.forcebytestr(e)
2447 )
2449 )
2448 # '\0' is more common than 'u' so it goes first.
2450 # '\0' is more common than 'u' so it goes first.
2449 elif t == b'\0':
2451 elif t == b'\0':
2450 return data
2452 return data
2451 elif t == b'u':
2453 elif t == b'u':
2452 return util.buffer(data, 1)
2454 return util.buffer(data, 1)
2453
2455
2454 compressor = self._get_decompressor(t)
2456 compressor = self._get_decompressor(t)
2455
2457
2456 return compressor.decompress(data)
2458 return compressor.decompress(data)
2457
2459
2458 def _addrevision(
2460 def _addrevision(
2459 self,
2461 self,
2460 node,
2462 node,
2461 rawtext,
2463 rawtext,
2462 transaction,
2464 transaction,
2463 link,
2465 link,
2464 p1,
2466 p1,
2465 p2,
2467 p2,
2466 flags,
2468 flags,
2467 cachedelta,
2469 cachedelta,
2468 alwayscache=False,
2470 alwayscache=False,
2469 deltacomputer=None,
2471 deltacomputer=None,
2470 sidedata=None,
2472 sidedata=None,
2471 ):
2473 ):
2472 """internal function to add revisions to the log
2474 """internal function to add revisions to the log
2473
2475
2474 see addrevision for argument descriptions.
2476 see addrevision for argument descriptions.
2475
2477
2476 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2478 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2477
2479
2478 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2480 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2479 be used.
2481 be used.
2480
2482
2481 invariants:
2483 invariants:
2482 - rawtext is optional (can be None); if not set, cachedelta must be set.
2484 - rawtext is optional (can be None); if not set, cachedelta must be set.
2483 if both are set, they must correspond to each other.
2485 if both are set, they must correspond to each other.
2484 """
2486 """
2485 if node == self.nullid:
2487 if node == self.nullid:
2486 raise error.RevlogError(
2488 raise error.RevlogError(
2487 _(b"%s: attempt to add null revision") % self.display_id
2489 _(b"%s: attempt to add null revision") % self.display_id
2488 )
2490 )
2489 if (
2491 if (
2490 node == self.nodeconstants.wdirid
2492 node == self.nodeconstants.wdirid
2491 or node in self.nodeconstants.wdirfilenodeids
2493 or node in self.nodeconstants.wdirfilenodeids
2492 ):
2494 ):
2493 raise error.RevlogError(
2495 raise error.RevlogError(
2494 _(b"%s: attempt to add wdir revision") % self.display_id
2496 _(b"%s: attempt to add wdir revision") % self.display_id
2495 )
2497 )
2496 if self._writinghandles is None:
2498 if self._writinghandles is None:
2497 msg = b'adding revision outside `revlog._writing` context'
2499 msg = b'adding revision outside `revlog._writing` context'
2498 raise error.ProgrammingError(msg)
2500 raise error.ProgrammingError(msg)
2499
2501
2500 if self._inline:
2502 if self._inline:
2501 fh = self._writinghandles[0]
2503 fh = self._writinghandles[0]
2502 else:
2504 else:
2503 fh = self._writinghandles[1]
2505 fh = self._writinghandles[1]
2504
2506
2505 btext = [rawtext]
2507 btext = [rawtext]
2506
2508
2507 curr = len(self)
2509 curr = len(self)
2508 prev = curr - 1
2510 prev = curr - 1
2509
2511
2510 offset = self._get_data_offset(prev)
2512 offset = self._get_data_offset(prev)
2511
2513
2512 if self._concurrencychecker:
2514 if self._concurrencychecker:
2513 ifh, dfh = self._writinghandles
2515 ifh, dfh = self._writinghandles
2514 if self._inline:
2516 if self._inline:
2515 # offset is "as if" it were in the .d file, so we need to add on
2517 # offset is "as if" it were in the .d file, so we need to add on
2516 # the size of the entry metadata.
2518 # the size of the entry metadata.
2517 self._concurrencychecker(
2519 self._concurrencychecker(
2518 ifh, self._indexfile, offset + curr * self.index.entry_size
2520 ifh, self._indexfile, offset + curr * self.index.entry_size
2519 )
2521 )
2520 else:
2522 else:
2521 # Entries in the .i are a consistent size.
2523 # Entries in the .i are a consistent size.
2522 self._concurrencychecker(
2524 self._concurrencychecker(
2523 ifh, self._indexfile, curr * self.index.entry_size
2525 ifh, self._indexfile, curr * self.index.entry_size
2524 )
2526 )
2525 self._concurrencychecker(dfh, self._datafile, offset)
2527 self._concurrencychecker(dfh, self._datafile, offset)
2526
2528
2527 p1r, p2r = self.rev(p1), self.rev(p2)
2529 p1r, p2r = self.rev(p1), self.rev(p2)
2528
2530
2529 # full versions are inserted when the needed deltas
2531 # full versions are inserted when the needed deltas
2530 # become comparable to the uncompressed text
2532 # become comparable to the uncompressed text
2531 if rawtext is None:
2533 if rawtext is None:
2532 # need rawtext size, before changed by flag processors, which is
2534 # need rawtext size, before changed by flag processors, which is
2533 # the non-raw size. use revlog explicitly to avoid filelog's extra
2535 # the non-raw size. use revlog explicitly to avoid filelog's extra
2534 # logic that might remove metadata size.
2536 # logic that might remove metadata size.
2535 textlen = mdiff.patchedsize(
2537 textlen = mdiff.patchedsize(
2536 revlog.size(self, cachedelta[0]), cachedelta[1]
2538 revlog.size(self, cachedelta[0]), cachedelta[1]
2537 )
2539 )
2538 else:
2540 else:
2539 textlen = len(rawtext)
2541 textlen = len(rawtext)
2540
2542
2541 if deltacomputer is None:
2543 if deltacomputer is None:
2542 deltacomputer = deltautil.deltacomputer(self)
2544 deltacomputer = deltautil.deltacomputer(self)
2543
2545
2544 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2546 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2545
2547
2546 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2548 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2547
2549
2548 compression_mode = COMP_MODE_INLINE
2550 compression_mode = COMP_MODE_INLINE
2549 if self._docket is not None:
2551 if self._docket is not None:
2550 h, d = deltainfo.data
2552 h, d = deltainfo.data
2551 if not h and not d:
2553 if not h and not d:
2552 # not data to store at all... declare them uncompressed
2554 # not data to store at all... declare them uncompressed
2553 compression_mode = COMP_MODE_PLAIN
2555 compression_mode = COMP_MODE_PLAIN
2554 elif not h:
2556 elif not h:
2555 t = d[0:1]
2557 t = d[0:1]
2556 if t == b'\0':
2558 if t == b'\0':
2557 compression_mode = COMP_MODE_PLAIN
2559 compression_mode = COMP_MODE_PLAIN
2558 elif t == self._docket.default_compression_header:
2560 elif t == self._docket.default_compression_header:
2559 compression_mode = COMP_MODE_DEFAULT
2561 compression_mode = COMP_MODE_DEFAULT
2560 elif h == b'u':
2562 elif h == b'u':
2561 # we have a more efficient way to declare uncompressed
2563 # we have a more efficient way to declare uncompressed
2562 h = b''
2564 h = b''
2563 compression_mode = COMP_MODE_PLAIN
2565 compression_mode = COMP_MODE_PLAIN
2564 deltainfo = deltautil.drop_u_compression(deltainfo)
2566 deltainfo = deltautil.drop_u_compression(deltainfo)
2565
2567
2566 sidedata_compression_mode = COMP_MODE_INLINE
2568 sidedata_compression_mode = COMP_MODE_INLINE
2567 if sidedata and self.hassidedata:
2569 if sidedata and self.hassidedata:
2568 sidedata_compression_mode = COMP_MODE_PLAIN
2570 sidedata_compression_mode = COMP_MODE_PLAIN
2569 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2571 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2570 sidedata_offset = offset + deltainfo.deltalen
2572 sidedata_offset = offset + deltainfo.deltalen
2571 h, comp_sidedata = self.compress(serialized_sidedata)
2573 h, comp_sidedata = self.compress(serialized_sidedata)
2572 if (
2574 if (
2573 h != b'u'
2575 h != b'u'
2574 and comp_sidedata[0:1] != b'\0'
2576 and comp_sidedata[0:1] != b'\0'
2575 and len(comp_sidedata) < len(serialized_sidedata)
2577 and len(comp_sidedata) < len(serialized_sidedata)
2576 ):
2578 ):
2577 assert not h
2579 assert not h
2578 if (
2580 if (
2579 comp_sidedata[0:1]
2581 comp_sidedata[0:1]
2580 == self._docket.default_compression_header
2582 == self._docket.default_compression_header
2581 ):
2583 ):
2582 sidedata_compression_mode = COMP_MODE_DEFAULT
2584 sidedata_compression_mode = COMP_MODE_DEFAULT
2583 serialized_sidedata = comp_sidedata
2585 serialized_sidedata = comp_sidedata
2584 else:
2586 else:
2585 sidedata_compression_mode = COMP_MODE_INLINE
2587 sidedata_compression_mode = COMP_MODE_INLINE
2586 serialized_sidedata = comp_sidedata
2588 serialized_sidedata = comp_sidedata
2587 else:
2589 else:
2588 serialized_sidedata = b""
2590 serialized_sidedata = b""
2589 # Don't store the offset if the sidedata is empty, that way
2591 # Don't store the offset if the sidedata is empty, that way
2590 # we can easily detect empty sidedata and they will be no different
2592 # we can easily detect empty sidedata and they will be no different
2591 # than ones we manually add.
2593 # than ones we manually add.
2592 sidedata_offset = 0
2594 sidedata_offset = 0
2593
2595
2594 e = (
2596 e = (
2595 offset_type(offset, flags),
2597 offset_type(offset, flags),
2596 deltainfo.deltalen,
2598 deltainfo.deltalen,
2597 textlen,
2599 textlen,
2598 deltainfo.base,
2600 deltainfo.base,
2599 link,
2601 link,
2600 p1r,
2602 p1r,
2601 p2r,
2603 p2r,
2602 node,
2604 node,
2603 sidedata_offset,
2605 sidedata_offset,
2604 len(serialized_sidedata),
2606 len(serialized_sidedata),
2605 compression_mode,
2607 compression_mode,
2606 sidedata_compression_mode,
2608 sidedata_compression_mode,
2607 )
2609 )
2608
2610
2609 self.index.append(e)
2611 self.index.append(e)
2610 entry = self.index.entry_binary(curr)
2612 entry = self.index.entry_binary(curr)
2611 if curr == 0 and self._docket is None:
2613 if curr == 0 and self._docket is None:
2612 header = self._format_flags | self._format_version
2614 header = self._format_flags | self._format_version
2613 header = self.index.pack_header(header)
2615 header = self.index.pack_header(header)
2614 entry = header + entry
2616 entry = header + entry
2615 self._writeentry(
2617 self._writeentry(
2616 transaction,
2618 transaction,
2617 entry,
2619 entry,
2618 deltainfo.data,
2620 deltainfo.data,
2619 link,
2621 link,
2620 offset,
2622 offset,
2621 serialized_sidedata,
2623 serialized_sidedata,
2622 )
2624 )
2623
2625
2624 rawtext = btext[0]
2626 rawtext = btext[0]
2625
2627
2626 if alwayscache and rawtext is None:
2628 if alwayscache and rawtext is None:
2627 rawtext = deltacomputer.buildtext(revinfo, fh)
2629 rawtext = deltacomputer.buildtext(revinfo, fh)
2628
2630
2629 if type(rawtext) == bytes: # only accept immutable objects
2631 if type(rawtext) == bytes: # only accept immutable objects
2630 self._revisioncache = (node, curr, rawtext)
2632 self._revisioncache = (node, curr, rawtext)
2631 self._chainbasecache[curr] = deltainfo.chainbase
2633 self._chainbasecache[curr] = deltainfo.chainbase
2632 return curr
2634 return curr
2633
2635
2634 def _get_data_offset(self, prev):
2636 def _get_data_offset(self, prev):
2635 """Returns the current offset in the (in-transaction) data file.
2637 """Returns the current offset in the (in-transaction) data file.
2636 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2638 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2637 file to store that information: since sidedata can be rewritten to the
2639 file to store that information: since sidedata can be rewritten to the
2638 end of the data file within a transaction, you can have cases where, for
2640 end of the data file within a transaction, you can have cases where, for
2639 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2641 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2640 to `n - 1`'s sidedata being written after `n`'s data.
2642 to `n - 1`'s sidedata being written after `n`'s data.
2641
2643
2642 TODO cache this in a docket file before getting out of experimental."""
2644 TODO cache this in a docket file before getting out of experimental."""
2643 if self._docket is None:
2645 if self._docket is None:
2644 return self.end(prev)
2646 return self.end(prev)
2645 else:
2647 else:
2646 return self._docket.data_end
2648 return self._docket.data_end
2647
2649
2648 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2650 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2649 # Files opened in a+ mode have inconsistent behavior on various
2651 # Files opened in a+ mode have inconsistent behavior on various
2650 # platforms. Windows requires that a file positioning call be made
2652 # platforms. Windows requires that a file positioning call be made
2651 # when the file handle transitions between reads and writes. See
2653 # when the file handle transitions between reads and writes. See
2652 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2654 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2653 # platforms, Python or the platform itself can be buggy. Some versions
2655 # platforms, Python or the platform itself can be buggy. Some versions
2654 # of Solaris have been observed to not append at the end of the file
2656 # of Solaris have been observed to not append at the end of the file
2655 # if the file was seeked to before the end. See issue4943 for more.
2657 # if the file was seeked to before the end. See issue4943 for more.
2656 #
2658 #
2657 # We work around this issue by inserting a seek() before writing.
2659 # We work around this issue by inserting a seek() before writing.
2658 # Note: This is likely not necessary on Python 3. However, because
2660 # Note: This is likely not necessary on Python 3. However, because
2659 # the file handle is reused for reads and may be seeked there, we need
2661 # the file handle is reused for reads and may be seeked there, we need
2660 # to be careful before changing this.
2662 # to be careful before changing this.
2661 if self._writinghandles is None:
2663 if self._writinghandles is None:
2662 msg = b'adding revision outside `revlog._writing` context'
2664 msg = b'adding revision outside `revlog._writing` context'
2663 raise error.ProgrammingError(msg)
2665 raise error.ProgrammingError(msg)
2664 ifh, dfh = self._writinghandles
2666 ifh, dfh = self._writinghandles
2665 if self._docket is None:
2667 if self._docket is None:
2666 ifh.seek(0, os.SEEK_END)
2668 ifh.seek(0, os.SEEK_END)
2667 else:
2669 else:
2668 ifh.seek(self._docket.index_end, os.SEEK_SET)
2670 ifh.seek(self._docket.index_end, os.SEEK_SET)
2669 if dfh:
2671 if dfh:
2670 if self._docket is None:
2672 if self._docket is None:
2671 dfh.seek(0, os.SEEK_END)
2673 dfh.seek(0, os.SEEK_END)
2672 else:
2674 else:
2673 dfh.seek(self._docket.data_end, os.SEEK_SET)
2675 dfh.seek(self._docket.data_end, os.SEEK_SET)
2674
2676
2675 curr = len(self) - 1
2677 curr = len(self) - 1
2676 if not self._inline:
2678 if not self._inline:
2677 transaction.add(self._datafile, offset)
2679 transaction.add(self._datafile, offset)
2678 transaction.add(self._indexfile, curr * len(entry))
2680 transaction.add(self._indexfile, curr * len(entry))
2679 if data[0]:
2681 if data[0]:
2680 dfh.write(data[0])
2682 dfh.write(data[0])
2681 dfh.write(data[1])
2683 dfh.write(data[1])
2682 if sidedata:
2684 if sidedata:
2683 dfh.write(sidedata)
2685 dfh.write(sidedata)
2684 ifh.write(entry)
2686 ifh.write(entry)
2685 else:
2687 else:
2686 offset += curr * self.index.entry_size
2688 offset += curr * self.index.entry_size
2687 transaction.add(self._indexfile, offset)
2689 transaction.add(self._indexfile, offset)
2688 ifh.write(entry)
2690 ifh.write(entry)
2689 ifh.write(data[0])
2691 ifh.write(data[0])
2690 ifh.write(data[1])
2692 ifh.write(data[1])
2691 if sidedata:
2693 if sidedata:
2692 ifh.write(sidedata)
2694 ifh.write(sidedata)
2693 self._enforceinlinesize(transaction)
2695 self._enforceinlinesize(transaction)
2694 if self._docket is not None:
2696 if self._docket is not None:
2695 self._docket.index_end = self._writinghandles[0].tell()
2697 self._docket.index_end = self._writinghandles[0].tell()
2696 self._docket.data_end = self._writinghandles[1].tell()
2698 self._docket.data_end = self._writinghandles[1].tell()
2697
2699
2698 nodemaputil.setup_persistent_nodemap(transaction, self)
2700 nodemaputil.setup_persistent_nodemap(transaction, self)
2699
2701
2700 def addgroup(
2702 def addgroup(
2701 self,
2703 self,
2702 deltas,
2704 deltas,
2703 linkmapper,
2705 linkmapper,
2704 transaction,
2706 transaction,
2705 alwayscache=False,
2707 alwayscache=False,
2706 addrevisioncb=None,
2708 addrevisioncb=None,
2707 duplicaterevisioncb=None,
2709 duplicaterevisioncb=None,
2708 ):
2710 ):
2709 """
2711 """
2710 add a delta group
2712 add a delta group
2711
2713
2712 given a set of deltas, add them to the revision log. the
2714 given a set of deltas, add them to the revision log. the
2713 first delta is against its parent, which should be in our
2715 first delta is against its parent, which should be in our
2714 log, the rest are against the previous delta.
2716 log, the rest are against the previous delta.
2715
2717
2716 If ``addrevisioncb`` is defined, it will be called with arguments of
2718 If ``addrevisioncb`` is defined, it will be called with arguments of
2717 this revlog and the node that was added.
2719 this revlog and the node that was added.
2718 """
2720 """
2719
2721
2720 if self._adding_group:
2722 if self._adding_group:
2721 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2723 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2722
2724
2723 self._adding_group = True
2725 self._adding_group = True
2724 empty = True
2726 empty = True
2725 try:
2727 try:
2726 with self._writing(transaction):
2728 with self._writing(transaction):
2727 deltacomputer = deltautil.deltacomputer(self)
2729 deltacomputer = deltautil.deltacomputer(self)
2728 # loop through our set of deltas
2730 # loop through our set of deltas
2729 for data in deltas:
2731 for data in deltas:
2730 (
2732 (
2731 node,
2733 node,
2732 p1,
2734 p1,
2733 p2,
2735 p2,
2734 linknode,
2736 linknode,
2735 deltabase,
2737 deltabase,
2736 delta,
2738 delta,
2737 flags,
2739 flags,
2738 sidedata,
2740 sidedata,
2739 ) = data
2741 ) = data
2740 link = linkmapper(linknode)
2742 link = linkmapper(linknode)
2741 flags = flags or REVIDX_DEFAULT_FLAGS
2743 flags = flags or REVIDX_DEFAULT_FLAGS
2742
2744
2743 rev = self.index.get_rev(node)
2745 rev = self.index.get_rev(node)
2744 if rev is not None:
2746 if rev is not None:
2745 # this can happen if two branches make the same change
2747 # this can happen if two branches make the same change
2746 self._nodeduplicatecallback(transaction, rev)
2748 self._nodeduplicatecallback(transaction, rev)
2747 if duplicaterevisioncb:
2749 if duplicaterevisioncb:
2748 duplicaterevisioncb(self, rev)
2750 duplicaterevisioncb(self, rev)
2749 empty = False
2751 empty = False
2750 continue
2752 continue
2751
2753
2752 for p in (p1, p2):
2754 for p in (p1, p2):
2753 if not self.index.has_node(p):
2755 if not self.index.has_node(p):
2754 raise error.LookupError(
2756 raise error.LookupError(
2755 p, self.radix, _(b'unknown parent')
2757 p, self.radix, _(b'unknown parent')
2756 )
2758 )
2757
2759
2758 if not self.index.has_node(deltabase):
2760 if not self.index.has_node(deltabase):
2759 raise error.LookupError(
2761 raise error.LookupError(
2760 deltabase, self.display_id, _(b'unknown delta base')
2762 deltabase, self.display_id, _(b'unknown delta base')
2761 )
2763 )
2762
2764
2763 baserev = self.rev(deltabase)
2765 baserev = self.rev(deltabase)
2764
2766
2765 if baserev != nullrev and self.iscensored(baserev):
2767 if baserev != nullrev and self.iscensored(baserev):
2766 # if base is censored, delta must be full replacement in a
2768 # if base is censored, delta must be full replacement in a
2767 # single patch operation
2769 # single patch operation
2768 hlen = struct.calcsize(b">lll")
2770 hlen = struct.calcsize(b">lll")
2769 oldlen = self.rawsize(baserev)
2771 oldlen = self.rawsize(baserev)
2770 newlen = len(delta) - hlen
2772 newlen = len(delta) - hlen
2771 if delta[:hlen] != mdiff.replacediffheader(
2773 if delta[:hlen] != mdiff.replacediffheader(
2772 oldlen, newlen
2774 oldlen, newlen
2773 ):
2775 ):
2774 raise error.CensoredBaseError(
2776 raise error.CensoredBaseError(
2775 self.display_id, self.node(baserev)
2777 self.display_id, self.node(baserev)
2776 )
2778 )
2777
2779
2778 if not flags and self._peek_iscensored(baserev, delta):
2780 if not flags and self._peek_iscensored(baserev, delta):
2779 flags |= REVIDX_ISCENSORED
2781 flags |= REVIDX_ISCENSORED
2780
2782
2781 # We assume consumers of addrevisioncb will want to retrieve
2783 # We assume consumers of addrevisioncb will want to retrieve
2782 # the added revision, which will require a call to
2784 # the added revision, which will require a call to
2783 # revision(). revision() will fast path if there is a cache
2785 # revision(). revision() will fast path if there is a cache
2784 # hit. So, we tell _addrevision() to always cache in this case.
2786 # hit. So, we tell _addrevision() to always cache in this case.
2785 # We're only using addgroup() in the context of changegroup
2787 # We're only using addgroup() in the context of changegroup
2786 # generation so the revision data can always be handled as raw
2788 # generation so the revision data can always be handled as raw
2787 # by the flagprocessor.
2789 # by the flagprocessor.
2788 rev = self._addrevision(
2790 rev = self._addrevision(
2789 node,
2791 node,
2790 None,
2792 None,
2791 transaction,
2793 transaction,
2792 link,
2794 link,
2793 p1,
2795 p1,
2794 p2,
2796 p2,
2795 flags,
2797 flags,
2796 (baserev, delta),
2798 (baserev, delta),
2797 alwayscache=alwayscache,
2799 alwayscache=alwayscache,
2798 deltacomputer=deltacomputer,
2800 deltacomputer=deltacomputer,
2799 sidedata=sidedata,
2801 sidedata=sidedata,
2800 )
2802 )
2801
2803
2802 if addrevisioncb:
2804 if addrevisioncb:
2803 addrevisioncb(self, rev)
2805 addrevisioncb(self, rev)
2804 empty = False
2806 empty = False
2805 finally:
2807 finally:
2806 self._adding_group = False
2808 self._adding_group = False
2807 return not empty
2809 return not empty
2808
2810
2809 def iscensored(self, rev):
2811 def iscensored(self, rev):
2810 """Check if a file revision is censored."""
2812 """Check if a file revision is censored."""
2811 if not self._censorable:
2813 if not self._censorable:
2812 return False
2814 return False
2813
2815
2814 return self.flags(rev) & REVIDX_ISCENSORED
2816 return self.flags(rev) & REVIDX_ISCENSORED
2815
2817
2816 def _peek_iscensored(self, baserev, delta):
2818 def _peek_iscensored(self, baserev, delta):
2817 """Quickly check if a delta produces a censored revision."""
2819 """Quickly check if a delta produces a censored revision."""
2818 if not self._censorable:
2820 if not self._censorable:
2819 return False
2821 return False
2820
2822
2821 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2823 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2822
2824
2823 def getstrippoint(self, minlink):
2825 def getstrippoint(self, minlink):
2824 """find the minimum rev that must be stripped to strip the linkrev
2826 """find the minimum rev that must be stripped to strip the linkrev
2825
2827
2826 Returns a tuple containing the minimum rev and a set of all revs that
2828 Returns a tuple containing the minimum rev and a set of all revs that
2827 have linkrevs that will be broken by this strip.
2829 have linkrevs that will be broken by this strip.
2828 """
2830 """
2829 return storageutil.resolvestripinfo(
2831 return storageutil.resolvestripinfo(
2830 minlink,
2832 minlink,
2831 len(self) - 1,
2833 len(self) - 1,
2832 self.headrevs(),
2834 self.headrevs(),
2833 self.linkrev,
2835 self.linkrev,
2834 self.parentrevs,
2836 self.parentrevs,
2835 )
2837 )
2836
2838
2837 def strip(self, minlink, transaction):
2839 def strip(self, minlink, transaction):
2838 """truncate the revlog on the first revision with a linkrev >= minlink
2840 """truncate the revlog on the first revision with a linkrev >= minlink
2839
2841
2840 This function is called when we're stripping revision minlink and
2842 This function is called when we're stripping revision minlink and
2841 its descendants from the repository.
2843 its descendants from the repository.
2842
2844
2843 We have to remove all revisions with linkrev >= minlink, because
2845 We have to remove all revisions with linkrev >= minlink, because
2844 the equivalent changelog revisions will be renumbered after the
2846 the equivalent changelog revisions will be renumbered after the
2845 strip.
2847 strip.
2846
2848
2847 So we truncate the revlog on the first of these revisions, and
2849 So we truncate the revlog on the first of these revisions, and
2848 trust that the caller has saved the revisions that shouldn't be
2850 trust that the caller has saved the revisions that shouldn't be
2849 removed and that it'll re-add them after this truncation.
2851 removed and that it'll re-add them after this truncation.
2850 """
2852 """
2851 if len(self) == 0:
2853 if len(self) == 0:
2852 return
2854 return
2853
2855
2854 rev, _ = self.getstrippoint(minlink)
2856 rev, _ = self.getstrippoint(minlink)
2855 if rev == len(self):
2857 if rev == len(self):
2856 return
2858 return
2857
2859
2858 # first truncate the files on disk
2860 # first truncate the files on disk
2859 data_end = self.start(rev)
2861 data_end = self.start(rev)
2860 if not self._inline:
2862 if not self._inline:
2861 transaction.add(self._datafile, data_end)
2863 transaction.add(self._datafile, data_end)
2862 end = rev * self.index.entry_size
2864 end = rev * self.index.entry_size
2863 else:
2865 else:
2864 end = data_end + (rev * self.index.entry_size)
2866 end = data_end + (rev * self.index.entry_size)
2865
2867
2866 transaction.add(self._indexfile, end)
2868 transaction.add(self._indexfile, end)
2867 if self._docket is not None:
2869 if self._docket is not None:
2868 # XXX we could, leverage the docket while stripping. However it is
2870 # XXX we could, leverage the docket while stripping. However it is
2869 # not powerfull enough at the time of this comment
2871 # not powerfull enough at the time of this comment
2870 self._docket.index_end = end
2872 self._docket.index_end = end
2871 self._docket.data_end = data_end
2873 self._docket.data_end = data_end
2872 self._docket.write(transaction, stripping=True)
2874 self._docket.write(transaction, stripping=True)
2873
2875
2874 # then reset internal state in memory to forget those revisions
2876 # then reset internal state in memory to forget those revisions
2875 self._revisioncache = None
2877 self._revisioncache = None
2876 self._chaininfocache = util.lrucachedict(500)
2878 self._chaininfocache = util.lrucachedict(500)
2877 self._chunkclear()
2879 self._chunkclear()
2878
2880
2879 del self.index[rev:-1]
2881 del self.index[rev:-1]
2880
2882
2881 def checksize(self):
2883 def checksize(self):
2882 """Check size of index and data files
2884 """Check size of index and data files
2883
2885
2884 return a (dd, di) tuple.
2886 return a (dd, di) tuple.
2885 - dd: extra bytes for the "data" file
2887 - dd: extra bytes for the "data" file
2886 - di: extra bytes for the "index" file
2888 - di: extra bytes for the "index" file
2887
2889
2888 A healthy revlog will return (0, 0).
2890 A healthy revlog will return (0, 0).
2889 """
2891 """
2890 expected = 0
2892 expected = 0
2891 if len(self):
2893 if len(self):
2892 expected = max(0, self.end(len(self) - 1))
2894 expected = max(0, self.end(len(self) - 1))
2893
2895
2894 try:
2896 try:
2895 with self._datafp() as f:
2897 with self._datafp() as f:
2896 f.seek(0, io.SEEK_END)
2898 f.seek(0, io.SEEK_END)
2897 actual = f.tell()
2899 actual = f.tell()
2898 dd = actual - expected
2900 dd = actual - expected
2899 except IOError as inst:
2901 except IOError as inst:
2900 if inst.errno != errno.ENOENT:
2902 if inst.errno != errno.ENOENT:
2901 raise
2903 raise
2902 dd = 0
2904 dd = 0
2903
2905
2904 try:
2906 try:
2905 f = self.opener(self._indexfile)
2907 f = self.opener(self._indexfile)
2906 f.seek(0, io.SEEK_END)
2908 f.seek(0, io.SEEK_END)
2907 actual = f.tell()
2909 actual = f.tell()
2908 f.close()
2910 f.close()
2909 s = self.index.entry_size
2911 s = self.index.entry_size
2910 i = max(0, actual // s)
2912 i = max(0, actual // s)
2911 di = actual - (i * s)
2913 di = actual - (i * s)
2912 if self._inline:
2914 if self._inline:
2913 databytes = 0
2915 databytes = 0
2914 for r in self:
2916 for r in self:
2915 databytes += max(0, self.length(r))
2917 databytes += max(0, self.length(r))
2916 dd = 0
2918 dd = 0
2917 di = actual - len(self) * s - databytes
2919 di = actual - len(self) * s - databytes
2918 except IOError as inst:
2920 except IOError as inst:
2919 if inst.errno != errno.ENOENT:
2921 if inst.errno != errno.ENOENT:
2920 raise
2922 raise
2921 di = 0
2923 di = 0
2922
2924
2923 return (dd, di)
2925 return (dd, di)
2924
2926
2925 def files(self):
2927 def files(self):
2926 res = [self._indexfile]
2928 res = [self._indexfile]
2927 if not self._inline:
2929 if not self._inline:
2928 res.append(self._datafile)
2930 res.append(self._datafile)
2929 return res
2931 return res
2930
2932
2931 def emitrevisions(
2933 def emitrevisions(
2932 self,
2934 self,
2933 nodes,
2935 nodes,
2934 nodesorder=None,
2936 nodesorder=None,
2935 revisiondata=False,
2937 revisiondata=False,
2936 assumehaveparentrevisions=False,
2938 assumehaveparentrevisions=False,
2937 deltamode=repository.CG_DELTAMODE_STD,
2939 deltamode=repository.CG_DELTAMODE_STD,
2938 sidedata_helpers=None,
2940 sidedata_helpers=None,
2939 ):
2941 ):
2940 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2942 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2941 raise error.ProgrammingError(
2943 raise error.ProgrammingError(
2942 b'unhandled value for nodesorder: %s' % nodesorder
2944 b'unhandled value for nodesorder: %s' % nodesorder
2943 )
2945 )
2944
2946
2945 if nodesorder is None and not self._generaldelta:
2947 if nodesorder is None and not self._generaldelta:
2946 nodesorder = b'storage'
2948 nodesorder = b'storage'
2947
2949
2948 if (
2950 if (
2949 not self._storedeltachains
2951 not self._storedeltachains
2950 and deltamode != repository.CG_DELTAMODE_PREV
2952 and deltamode != repository.CG_DELTAMODE_PREV
2951 ):
2953 ):
2952 deltamode = repository.CG_DELTAMODE_FULL
2954 deltamode = repository.CG_DELTAMODE_FULL
2953
2955
2954 return storageutil.emitrevisions(
2956 return storageutil.emitrevisions(
2955 self,
2957 self,
2956 nodes,
2958 nodes,
2957 nodesorder,
2959 nodesorder,
2958 revlogrevisiondelta,
2960 revlogrevisiondelta,
2959 deltaparentfn=self.deltaparent,
2961 deltaparentfn=self.deltaparent,
2960 candeltafn=self.candelta,
2962 candeltafn=self.candelta,
2961 rawsizefn=self.rawsize,
2963 rawsizefn=self.rawsize,
2962 revdifffn=self.revdiff,
2964 revdifffn=self.revdiff,
2963 flagsfn=self.flags,
2965 flagsfn=self.flags,
2964 deltamode=deltamode,
2966 deltamode=deltamode,
2965 revisiondata=revisiondata,
2967 revisiondata=revisiondata,
2966 assumehaveparentrevisions=assumehaveparentrevisions,
2968 assumehaveparentrevisions=assumehaveparentrevisions,
2967 sidedata_helpers=sidedata_helpers,
2969 sidedata_helpers=sidedata_helpers,
2968 )
2970 )
2969
2971
2970 DELTAREUSEALWAYS = b'always'
2972 DELTAREUSEALWAYS = b'always'
2971 DELTAREUSESAMEREVS = b'samerevs'
2973 DELTAREUSESAMEREVS = b'samerevs'
2972 DELTAREUSENEVER = b'never'
2974 DELTAREUSENEVER = b'never'
2973
2975
2974 DELTAREUSEFULLADD = b'fulladd'
2976 DELTAREUSEFULLADD = b'fulladd'
2975
2977
2976 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2978 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2977
2979
2978 def clone(
2980 def clone(
2979 self,
2981 self,
2980 tr,
2982 tr,
2981 destrevlog,
2983 destrevlog,
2982 addrevisioncb=None,
2984 addrevisioncb=None,
2983 deltareuse=DELTAREUSESAMEREVS,
2985 deltareuse=DELTAREUSESAMEREVS,
2984 forcedeltabothparents=None,
2986 forcedeltabothparents=None,
2985 sidedata_helpers=None,
2987 sidedata_helpers=None,
2986 ):
2988 ):
2987 """Copy this revlog to another, possibly with format changes.
2989 """Copy this revlog to another, possibly with format changes.
2988
2990
2989 The destination revlog will contain the same revisions and nodes.
2991 The destination revlog will contain the same revisions and nodes.
2990 However, it may not be bit-for-bit identical due to e.g. delta encoding
2992 However, it may not be bit-for-bit identical due to e.g. delta encoding
2991 differences.
2993 differences.
2992
2994
2993 The ``deltareuse`` argument control how deltas from the existing revlog
2995 The ``deltareuse`` argument control how deltas from the existing revlog
2994 are preserved in the destination revlog. The argument can have the
2996 are preserved in the destination revlog. The argument can have the
2995 following values:
2997 following values:
2996
2998
2997 DELTAREUSEALWAYS
2999 DELTAREUSEALWAYS
2998 Deltas will always be reused (if possible), even if the destination
3000 Deltas will always be reused (if possible), even if the destination
2999 revlog would not select the same revisions for the delta. This is the
3001 revlog would not select the same revisions for the delta. This is the
3000 fastest mode of operation.
3002 fastest mode of operation.
3001 DELTAREUSESAMEREVS
3003 DELTAREUSESAMEREVS
3002 Deltas will be reused if the destination revlog would pick the same
3004 Deltas will be reused if the destination revlog would pick the same
3003 revisions for the delta. This mode strikes a balance between speed
3005 revisions for the delta. This mode strikes a balance between speed
3004 and optimization.
3006 and optimization.
3005 DELTAREUSENEVER
3007 DELTAREUSENEVER
3006 Deltas will never be reused. This is the slowest mode of execution.
3008 Deltas will never be reused. This is the slowest mode of execution.
3007 This mode can be used to recompute deltas (e.g. if the diff/delta
3009 This mode can be used to recompute deltas (e.g. if the diff/delta
3008 algorithm changes).
3010 algorithm changes).
3009 DELTAREUSEFULLADD
3011 DELTAREUSEFULLADD
3010 Revision will be re-added as if their were new content. This is
3012 Revision will be re-added as if their were new content. This is
3011 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3013 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3012 eg: large file detection and handling.
3014 eg: large file detection and handling.
3013
3015
3014 Delta computation can be slow, so the choice of delta reuse policy can
3016 Delta computation can be slow, so the choice of delta reuse policy can
3015 significantly affect run time.
3017 significantly affect run time.
3016
3018
3017 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3019 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3018 two extremes. Deltas will be reused if they are appropriate. But if the
3020 two extremes. Deltas will be reused if they are appropriate. But if the
3019 delta could choose a better revision, it will do so. This means if you
3021 delta could choose a better revision, it will do so. This means if you
3020 are converting a non-generaldelta revlog to a generaldelta revlog,
3022 are converting a non-generaldelta revlog to a generaldelta revlog,
3021 deltas will be recomputed if the delta's parent isn't a parent of the
3023 deltas will be recomputed if the delta's parent isn't a parent of the
3022 revision.
3024 revision.
3023
3025
3024 In addition to the delta policy, the ``forcedeltabothparents``
3026 In addition to the delta policy, the ``forcedeltabothparents``
3025 argument controls whether to force compute deltas against both parents
3027 argument controls whether to force compute deltas against both parents
3026 for merges. By default, the current default is used.
3028 for merges. By default, the current default is used.
3027
3029
3028 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3030 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3029 `sidedata_helpers`.
3031 `sidedata_helpers`.
3030 """
3032 """
3031 if deltareuse not in self.DELTAREUSEALL:
3033 if deltareuse not in self.DELTAREUSEALL:
3032 raise ValueError(
3034 raise ValueError(
3033 _(b'value for deltareuse invalid: %s') % deltareuse
3035 _(b'value for deltareuse invalid: %s') % deltareuse
3034 )
3036 )
3035
3037
3036 if len(destrevlog):
3038 if len(destrevlog):
3037 raise ValueError(_(b'destination revlog is not empty'))
3039 raise ValueError(_(b'destination revlog is not empty'))
3038
3040
3039 if getattr(self, 'filteredrevs', None):
3041 if getattr(self, 'filteredrevs', None):
3040 raise ValueError(_(b'source revlog has filtered revisions'))
3042 raise ValueError(_(b'source revlog has filtered revisions'))
3041 if getattr(destrevlog, 'filteredrevs', None):
3043 if getattr(destrevlog, 'filteredrevs', None):
3042 raise ValueError(_(b'destination revlog has filtered revisions'))
3044 raise ValueError(_(b'destination revlog has filtered revisions'))
3043
3045
3044 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3046 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3045 # if possible.
3047 # if possible.
3046 oldlazydelta = destrevlog._lazydelta
3048 oldlazydelta = destrevlog._lazydelta
3047 oldlazydeltabase = destrevlog._lazydeltabase
3049 oldlazydeltabase = destrevlog._lazydeltabase
3048 oldamd = destrevlog._deltabothparents
3050 oldamd = destrevlog._deltabothparents
3049
3051
3050 try:
3052 try:
3051 if deltareuse == self.DELTAREUSEALWAYS:
3053 if deltareuse == self.DELTAREUSEALWAYS:
3052 destrevlog._lazydeltabase = True
3054 destrevlog._lazydeltabase = True
3053 destrevlog._lazydelta = True
3055 destrevlog._lazydelta = True
3054 elif deltareuse == self.DELTAREUSESAMEREVS:
3056 elif deltareuse == self.DELTAREUSESAMEREVS:
3055 destrevlog._lazydeltabase = False
3057 destrevlog._lazydeltabase = False
3056 destrevlog._lazydelta = True
3058 destrevlog._lazydelta = True
3057 elif deltareuse == self.DELTAREUSENEVER:
3059 elif deltareuse == self.DELTAREUSENEVER:
3058 destrevlog._lazydeltabase = False
3060 destrevlog._lazydeltabase = False
3059 destrevlog._lazydelta = False
3061 destrevlog._lazydelta = False
3060
3062
3061 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3063 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3062
3064
3063 self._clone(
3065 self._clone(
3064 tr,
3066 tr,
3065 destrevlog,
3067 destrevlog,
3066 addrevisioncb,
3068 addrevisioncb,
3067 deltareuse,
3069 deltareuse,
3068 forcedeltabothparents,
3070 forcedeltabothparents,
3069 sidedata_helpers,
3071 sidedata_helpers,
3070 )
3072 )
3071
3073
3072 finally:
3074 finally:
3073 destrevlog._lazydelta = oldlazydelta
3075 destrevlog._lazydelta = oldlazydelta
3074 destrevlog._lazydeltabase = oldlazydeltabase
3076 destrevlog._lazydeltabase = oldlazydeltabase
3075 destrevlog._deltabothparents = oldamd
3077 destrevlog._deltabothparents = oldamd
3076
3078
3077 def _clone(
3079 def _clone(
3078 self,
3080 self,
3079 tr,
3081 tr,
3080 destrevlog,
3082 destrevlog,
3081 addrevisioncb,
3083 addrevisioncb,
3082 deltareuse,
3084 deltareuse,
3083 forcedeltabothparents,
3085 forcedeltabothparents,
3084 sidedata_helpers,
3086 sidedata_helpers,
3085 ):
3087 ):
3086 """perform the core duty of `revlog.clone` after parameter processing"""
3088 """perform the core duty of `revlog.clone` after parameter processing"""
3087 deltacomputer = deltautil.deltacomputer(destrevlog)
3089 deltacomputer = deltautil.deltacomputer(destrevlog)
3088 index = self.index
3090 index = self.index
3089 for rev in self:
3091 for rev in self:
3090 entry = index[rev]
3092 entry = index[rev]
3091
3093
3092 # Some classes override linkrev to take filtered revs into
3094 # Some classes override linkrev to take filtered revs into
3093 # account. Use raw entry from index.
3095 # account. Use raw entry from index.
3094 flags = entry[0] & 0xFFFF
3096 flags = entry[0] & 0xFFFF
3095 linkrev = entry[4]
3097 linkrev = entry[4]
3096 p1 = index[entry[5]][7]
3098 p1 = index[entry[5]][7]
3097 p2 = index[entry[6]][7]
3099 p2 = index[entry[6]][7]
3098 node = entry[7]
3100 node = entry[7]
3099
3101
3100 # (Possibly) reuse the delta from the revlog if allowed and
3102 # (Possibly) reuse the delta from the revlog if allowed and
3101 # the revlog chunk is a delta.
3103 # the revlog chunk is a delta.
3102 cachedelta = None
3104 cachedelta = None
3103 rawtext = None
3105 rawtext = None
3104 if deltareuse == self.DELTAREUSEFULLADD:
3106 if deltareuse == self.DELTAREUSEFULLADD:
3105 text, sidedata = self._revisiondata(rev)
3107 text, sidedata = self._revisiondata(rev)
3106
3108
3107 if sidedata_helpers is not None:
3109 if sidedata_helpers is not None:
3108 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3110 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3109 self, sidedata_helpers, sidedata, rev
3111 self, sidedata_helpers, sidedata, rev
3110 )
3112 )
3111 flags = flags | new_flags[0] & ~new_flags[1]
3113 flags = flags | new_flags[0] & ~new_flags[1]
3112
3114
3113 destrevlog.addrevision(
3115 destrevlog.addrevision(
3114 text,
3116 text,
3115 tr,
3117 tr,
3116 linkrev,
3118 linkrev,
3117 p1,
3119 p1,
3118 p2,
3120 p2,
3119 cachedelta=cachedelta,
3121 cachedelta=cachedelta,
3120 node=node,
3122 node=node,
3121 flags=flags,
3123 flags=flags,
3122 deltacomputer=deltacomputer,
3124 deltacomputer=deltacomputer,
3123 sidedata=sidedata,
3125 sidedata=sidedata,
3124 )
3126 )
3125 else:
3127 else:
3126 if destrevlog._lazydelta:
3128 if destrevlog._lazydelta:
3127 dp = self.deltaparent(rev)
3129 dp = self.deltaparent(rev)
3128 if dp != nullrev:
3130 if dp != nullrev:
3129 cachedelta = (dp, bytes(self._chunk(rev)))
3131 cachedelta = (dp, bytes(self._chunk(rev)))
3130
3132
3131 sidedata = None
3133 sidedata = None
3132 if not cachedelta:
3134 if not cachedelta:
3133 rawtext, sidedata = self._revisiondata(rev)
3135 rawtext, sidedata = self._revisiondata(rev)
3134 if sidedata is None:
3136 if sidedata is None:
3135 sidedata = self.sidedata(rev)
3137 sidedata = self.sidedata(rev)
3136
3138
3137 if sidedata_helpers is not None:
3139 if sidedata_helpers is not None:
3138 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3140 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3139 self, sidedata_helpers, sidedata, rev
3141 self, sidedata_helpers, sidedata, rev
3140 )
3142 )
3141 flags = flags | new_flags[0] & ~new_flags[1]
3143 flags = flags | new_flags[0] & ~new_flags[1]
3142
3144
3143 with destrevlog._writing(tr):
3145 with destrevlog._writing(tr):
3144 destrevlog._addrevision(
3146 destrevlog._addrevision(
3145 node,
3147 node,
3146 rawtext,
3148 rawtext,
3147 tr,
3149 tr,
3148 linkrev,
3150 linkrev,
3149 p1,
3151 p1,
3150 p2,
3152 p2,
3151 flags,
3153 flags,
3152 cachedelta,
3154 cachedelta,
3153 deltacomputer=deltacomputer,
3155 deltacomputer=deltacomputer,
3154 sidedata=sidedata,
3156 sidedata=sidedata,
3155 )
3157 )
3156
3158
3157 if addrevisioncb:
3159 if addrevisioncb:
3158 addrevisioncb(self, rev, node)
3160 addrevisioncb(self, rev, node)
3159
3161
3160 def censorrevision(self, tr, censornode, tombstone=b''):
3162 def censorrevision(self, tr, censornode, tombstone=b''):
3161 if self._format_version == REVLOGV0:
3163 if self._format_version == REVLOGV0:
3162 raise error.RevlogError(
3164 raise error.RevlogError(
3163 _(b'cannot censor with version %d revlogs')
3165 _(b'cannot censor with version %d revlogs')
3164 % self._format_version
3166 % self._format_version
3165 )
3167 )
3166
3168
3167 censorrev = self.rev(censornode)
3169 censorrev = self.rev(censornode)
3168 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3170 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3169
3171
3170 if len(tombstone) > self.rawsize(censorrev):
3172 if len(tombstone) > self.rawsize(censorrev):
3171 raise error.Abort(
3173 raise error.Abort(
3172 _(b'censor tombstone must be no longer than censored data')
3174 _(b'censor tombstone must be no longer than censored data')
3173 )
3175 )
3174
3176
3175 # Rewriting the revlog in place is hard. Our strategy for censoring is
3177 # Rewriting the revlog in place is hard. Our strategy for censoring is
3176 # to create a new revlog, copy all revisions to it, then replace the
3178 # to create a new revlog, copy all revisions to it, then replace the
3177 # revlogs on transaction close.
3179 # revlogs on transaction close.
3178 #
3180 #
3179 # This is a bit dangerous. We could easily have a mismatch of state.
3181 # This is a bit dangerous. We could easily have a mismatch of state.
3180 newrl = revlog(
3182 newrl = revlog(
3181 self.opener,
3183 self.opener,
3182 target=self.target,
3184 target=self.target,
3183 radix=self.radix,
3185 radix=self.radix,
3184 postfix=b'tmpcensored',
3186 postfix=b'tmpcensored',
3185 censorable=True,
3187 censorable=True,
3186 )
3188 )
3187 newrl._format_version = self._format_version
3189 newrl._format_version = self._format_version
3188 newrl._format_flags = self._format_flags
3190 newrl._format_flags = self._format_flags
3189 newrl._generaldelta = self._generaldelta
3191 newrl._generaldelta = self._generaldelta
3190 newrl._parse_index = self._parse_index
3192 newrl._parse_index = self._parse_index
3191
3193
3192 for rev in self.revs():
3194 for rev in self.revs():
3193 node = self.node(rev)
3195 node = self.node(rev)
3194 p1, p2 = self.parents(node)
3196 p1, p2 = self.parents(node)
3195
3197
3196 if rev == censorrev:
3198 if rev == censorrev:
3197 newrl.addrawrevision(
3199 newrl.addrawrevision(
3198 tombstone,
3200 tombstone,
3199 tr,
3201 tr,
3200 self.linkrev(censorrev),
3202 self.linkrev(censorrev),
3201 p1,
3203 p1,
3202 p2,
3204 p2,
3203 censornode,
3205 censornode,
3204 REVIDX_ISCENSORED,
3206 REVIDX_ISCENSORED,
3205 )
3207 )
3206
3208
3207 if newrl.deltaparent(rev) != nullrev:
3209 if newrl.deltaparent(rev) != nullrev:
3208 raise error.Abort(
3210 raise error.Abort(
3209 _(
3211 _(
3210 b'censored revision stored as delta; '
3212 b'censored revision stored as delta; '
3211 b'cannot censor'
3213 b'cannot censor'
3212 ),
3214 ),
3213 hint=_(
3215 hint=_(
3214 b'censoring of revlogs is not '
3216 b'censoring of revlogs is not '
3215 b'fully implemented; please report '
3217 b'fully implemented; please report '
3216 b'this bug'
3218 b'this bug'
3217 ),
3219 ),
3218 )
3220 )
3219 continue
3221 continue
3220
3222
3221 if self.iscensored(rev):
3223 if self.iscensored(rev):
3222 if self.deltaparent(rev) != nullrev:
3224 if self.deltaparent(rev) != nullrev:
3223 raise error.Abort(
3225 raise error.Abort(
3224 _(
3226 _(
3225 b'cannot censor due to censored '
3227 b'cannot censor due to censored '
3226 b'revision having delta stored'
3228 b'revision having delta stored'
3227 )
3229 )
3228 )
3230 )
3229 rawtext = self._chunk(rev)
3231 rawtext = self._chunk(rev)
3230 else:
3232 else:
3231 rawtext = self.rawdata(rev)
3233 rawtext = self.rawdata(rev)
3232
3234
3233 newrl.addrawrevision(
3235 newrl.addrawrevision(
3234 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3236 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3235 )
3237 )
3236
3238
3237 tr.addbackup(self._indexfile, location=b'store')
3239 tr.addbackup(self._indexfile, location=b'store')
3238 if not self._inline:
3240 if not self._inline:
3239 tr.addbackup(self._datafile, location=b'store')
3241 tr.addbackup(self._datafile, location=b'store')
3240
3242
3241 self.opener.rename(newrl._indexfile, self._indexfile)
3243 self.opener.rename(newrl._indexfile, self._indexfile)
3242 if not self._inline:
3244 if not self._inline:
3243 self.opener.rename(newrl._datafile, self._datafile)
3245 self.opener.rename(newrl._datafile, self._datafile)
3244
3246
3245 self.clearcaches()
3247 self.clearcaches()
3246 self._loadindex()
3248 self._loadindex()
3247
3249
3248 def verifyintegrity(self, state):
3250 def verifyintegrity(self, state):
3249 """Verifies the integrity of the revlog.
3251 """Verifies the integrity of the revlog.
3250
3252
3251 Yields ``revlogproblem`` instances describing problems that are
3253 Yields ``revlogproblem`` instances describing problems that are
3252 found.
3254 found.
3253 """
3255 """
3254 dd, di = self.checksize()
3256 dd, di = self.checksize()
3255 if dd:
3257 if dd:
3256 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3258 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3257 if di:
3259 if di:
3258 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3260 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3259
3261
3260 version = self._format_version
3262 version = self._format_version
3261
3263
3262 # The verifier tells us what version revlog we should be.
3264 # The verifier tells us what version revlog we should be.
3263 if version != state[b'expectedversion']:
3265 if version != state[b'expectedversion']:
3264 yield revlogproblem(
3266 yield revlogproblem(
3265 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3267 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3266 % (self.display_id, version, state[b'expectedversion'])
3268 % (self.display_id, version, state[b'expectedversion'])
3267 )
3269 )
3268
3270
3269 state[b'skipread'] = set()
3271 state[b'skipread'] = set()
3270 state[b'safe_renamed'] = set()
3272 state[b'safe_renamed'] = set()
3271
3273
3272 for rev in self:
3274 for rev in self:
3273 node = self.node(rev)
3275 node = self.node(rev)
3274
3276
3275 # Verify contents. 4 cases to care about:
3277 # Verify contents. 4 cases to care about:
3276 #
3278 #
3277 # common: the most common case
3279 # common: the most common case
3278 # rename: with a rename
3280 # rename: with a rename
3279 # meta: file content starts with b'\1\n', the metadata
3281 # meta: file content starts with b'\1\n', the metadata
3280 # header defined in filelog.py, but without a rename
3282 # header defined in filelog.py, but without a rename
3281 # ext: content stored externally
3283 # ext: content stored externally
3282 #
3284 #
3283 # More formally, their differences are shown below:
3285 # More formally, their differences are shown below:
3284 #
3286 #
3285 # | common | rename | meta | ext
3287 # | common | rename | meta | ext
3286 # -------------------------------------------------------
3288 # -------------------------------------------------------
3287 # flags() | 0 | 0 | 0 | not 0
3289 # flags() | 0 | 0 | 0 | not 0
3288 # renamed() | False | True | False | ?
3290 # renamed() | False | True | False | ?
3289 # rawtext[0:2]=='\1\n'| False | True | True | ?
3291 # rawtext[0:2]=='\1\n'| False | True | True | ?
3290 #
3292 #
3291 # "rawtext" means the raw text stored in revlog data, which
3293 # "rawtext" means the raw text stored in revlog data, which
3292 # could be retrieved by "rawdata(rev)". "text"
3294 # could be retrieved by "rawdata(rev)". "text"
3293 # mentioned below is "revision(rev)".
3295 # mentioned below is "revision(rev)".
3294 #
3296 #
3295 # There are 3 different lengths stored physically:
3297 # There are 3 different lengths stored physically:
3296 # 1. L1: rawsize, stored in revlog index
3298 # 1. L1: rawsize, stored in revlog index
3297 # 2. L2: len(rawtext), stored in revlog data
3299 # 2. L2: len(rawtext), stored in revlog data
3298 # 3. L3: len(text), stored in revlog data if flags==0, or
3300 # 3. L3: len(text), stored in revlog data if flags==0, or
3299 # possibly somewhere else if flags!=0
3301 # possibly somewhere else if flags!=0
3300 #
3302 #
3301 # L1 should be equal to L2. L3 could be different from them.
3303 # L1 should be equal to L2. L3 could be different from them.
3302 # "text" may or may not affect commit hash depending on flag
3304 # "text" may or may not affect commit hash depending on flag
3303 # processors (see flagutil.addflagprocessor).
3305 # processors (see flagutil.addflagprocessor).
3304 #
3306 #
3305 # | common | rename | meta | ext
3307 # | common | rename | meta | ext
3306 # -------------------------------------------------
3308 # -------------------------------------------------
3307 # rawsize() | L1 | L1 | L1 | L1
3309 # rawsize() | L1 | L1 | L1 | L1
3308 # size() | L1 | L2-LM | L1(*) | L1 (?)
3310 # size() | L1 | L2-LM | L1(*) | L1 (?)
3309 # len(rawtext) | L2 | L2 | L2 | L2
3311 # len(rawtext) | L2 | L2 | L2 | L2
3310 # len(text) | L2 | L2 | L2 | L3
3312 # len(text) | L2 | L2 | L2 | L3
3311 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3313 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3312 #
3314 #
3313 # LM: length of metadata, depending on rawtext
3315 # LM: length of metadata, depending on rawtext
3314 # (*): not ideal, see comment in filelog.size
3316 # (*): not ideal, see comment in filelog.size
3315 # (?): could be "- len(meta)" if the resolved content has
3317 # (?): could be "- len(meta)" if the resolved content has
3316 # rename metadata
3318 # rename metadata
3317 #
3319 #
3318 # Checks needed to be done:
3320 # Checks needed to be done:
3319 # 1. length check: L1 == L2, in all cases.
3321 # 1. length check: L1 == L2, in all cases.
3320 # 2. hash check: depending on flag processor, we may need to
3322 # 2. hash check: depending on flag processor, we may need to
3321 # use either "text" (external), or "rawtext" (in revlog).
3323 # use either "text" (external), or "rawtext" (in revlog).
3322
3324
3323 try:
3325 try:
3324 skipflags = state.get(b'skipflags', 0)
3326 skipflags = state.get(b'skipflags', 0)
3325 if skipflags:
3327 if skipflags:
3326 skipflags &= self.flags(rev)
3328 skipflags &= self.flags(rev)
3327
3329
3328 _verify_revision(self, skipflags, state, node)
3330 _verify_revision(self, skipflags, state, node)
3329
3331
3330 l1 = self.rawsize(rev)
3332 l1 = self.rawsize(rev)
3331 l2 = len(self.rawdata(node))
3333 l2 = len(self.rawdata(node))
3332
3334
3333 if l1 != l2:
3335 if l1 != l2:
3334 yield revlogproblem(
3336 yield revlogproblem(
3335 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3337 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3336 node=node,
3338 node=node,
3337 )
3339 )
3338
3340
3339 except error.CensoredNodeError:
3341 except error.CensoredNodeError:
3340 if state[b'erroroncensored']:
3342 if state[b'erroroncensored']:
3341 yield revlogproblem(
3343 yield revlogproblem(
3342 error=_(b'censored file data'), node=node
3344 error=_(b'censored file data'), node=node
3343 )
3345 )
3344 state[b'skipread'].add(node)
3346 state[b'skipread'].add(node)
3345 except Exception as e:
3347 except Exception as e:
3346 yield revlogproblem(
3348 yield revlogproblem(
3347 error=_(b'unpacking %s: %s')
3349 error=_(b'unpacking %s: %s')
3348 % (short(node), stringutil.forcebytestr(e)),
3350 % (short(node), stringutil.forcebytestr(e)),
3349 node=node,
3351 node=node,
3350 )
3352 )
3351 state[b'skipread'].add(node)
3353 state[b'skipread'].add(node)
3352
3354
3353 def storageinfo(
3355 def storageinfo(
3354 self,
3356 self,
3355 exclusivefiles=False,
3357 exclusivefiles=False,
3356 sharedfiles=False,
3358 sharedfiles=False,
3357 revisionscount=False,
3359 revisionscount=False,
3358 trackedsize=False,
3360 trackedsize=False,
3359 storedsize=False,
3361 storedsize=False,
3360 ):
3362 ):
3361 d = {}
3363 d = {}
3362
3364
3363 if exclusivefiles:
3365 if exclusivefiles:
3364 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3366 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3365 if not self._inline:
3367 if not self._inline:
3366 d[b'exclusivefiles'].append((self.opener, self._datafile))
3368 d[b'exclusivefiles'].append((self.opener, self._datafile))
3367
3369
3368 if sharedfiles:
3370 if sharedfiles:
3369 d[b'sharedfiles'] = []
3371 d[b'sharedfiles'] = []
3370
3372
3371 if revisionscount:
3373 if revisionscount:
3372 d[b'revisionscount'] = len(self)
3374 d[b'revisionscount'] = len(self)
3373
3375
3374 if trackedsize:
3376 if trackedsize:
3375 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3377 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3376
3378
3377 if storedsize:
3379 if storedsize:
3378 d[b'storedsize'] = sum(
3380 d[b'storedsize'] = sum(
3379 self.opener.stat(path).st_size for path in self.files()
3381 self.opener.stat(path).st_size for path in self.files()
3380 )
3382 )
3381
3383
3382 return d
3384 return d
3383
3385
3384 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3386 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3385 if not self.hassidedata:
3387 if not self.hassidedata:
3386 return
3388 return
3387 # revlog formats with sidedata support does not support inline
3389 # revlog formats with sidedata support does not support inline
3388 assert not self._inline
3390 assert not self._inline
3389 if not helpers[1] and not helpers[2]:
3391 if not helpers[1] and not helpers[2]:
3390 # Nothing to generate or remove
3392 # Nothing to generate or remove
3391 return
3393 return
3392
3394
3393 new_entries = []
3395 new_entries = []
3394 # append the new sidedata
3396 # append the new sidedata
3395 with self._writing(transaction):
3397 with self._writing(transaction):
3396 ifh, dfh = self._writinghandles
3398 ifh, dfh = self._writinghandles
3397 if self._docket is not None:
3399 if self._docket is not None:
3398 dfh.seek(self._docket.data_end, os.SEEK_SET)
3400 dfh.seek(self._docket.data_end, os.SEEK_SET)
3399 else:
3401 else:
3400 dfh.seek(0, os.SEEK_END)
3402 dfh.seek(0, os.SEEK_END)
3401
3403
3402 current_offset = dfh.tell()
3404 current_offset = dfh.tell()
3403 for rev in range(startrev, endrev + 1):
3405 for rev in range(startrev, endrev + 1):
3404 entry = self.index[rev]
3406 entry = self.index[rev]
3405 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3407 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3406 store=self,
3408 store=self,
3407 sidedata_helpers=helpers,
3409 sidedata_helpers=helpers,
3408 sidedata={},
3410 sidedata={},
3409 rev=rev,
3411 rev=rev,
3410 )
3412 )
3411
3413
3412 serialized_sidedata = sidedatautil.serialize_sidedata(
3414 serialized_sidedata = sidedatautil.serialize_sidedata(
3413 new_sidedata
3415 new_sidedata
3414 )
3416 )
3415
3417
3416 sidedata_compression_mode = COMP_MODE_INLINE
3418 sidedata_compression_mode = COMP_MODE_INLINE
3417 if serialized_sidedata and self.hassidedata:
3419 if serialized_sidedata and self.hassidedata:
3418 sidedata_compression_mode = COMP_MODE_PLAIN
3420 sidedata_compression_mode = COMP_MODE_PLAIN
3419 h, comp_sidedata = self.compress(serialized_sidedata)
3421 h, comp_sidedata = self.compress(serialized_sidedata)
3420 if (
3422 if (
3421 h != b'u'
3423 h != b'u'
3422 and comp_sidedata[0] != b'\0'
3424 and comp_sidedata[0] != b'\0'
3423 and len(comp_sidedata) < len(serialized_sidedata)
3425 and len(comp_sidedata) < len(serialized_sidedata)
3424 ):
3426 ):
3425 assert not h
3427 assert not h
3426 if (
3428 if (
3427 comp_sidedata[0]
3429 comp_sidedata[0]
3428 == self._docket.default_compression_header
3430 == self._docket.default_compression_header
3429 ):
3431 ):
3430 sidedata_compression_mode = COMP_MODE_DEFAULT
3432 sidedata_compression_mode = COMP_MODE_DEFAULT
3431 serialized_sidedata = comp_sidedata
3433 serialized_sidedata = comp_sidedata
3432 else:
3434 else:
3433 sidedata_compression_mode = COMP_MODE_INLINE
3435 sidedata_compression_mode = COMP_MODE_INLINE
3434 serialized_sidedata = comp_sidedata
3436 serialized_sidedata = comp_sidedata
3435 if entry[8] != 0 or entry[9] != 0:
3437 if entry[8] != 0 or entry[9] != 0:
3436 # rewriting entries that already have sidedata is not
3438 # rewriting entries that already have sidedata is not
3437 # supported yet, because it introduces garbage data in the
3439 # supported yet, because it introduces garbage data in the
3438 # revlog.
3440 # revlog.
3439 msg = b"rewriting existing sidedata is not supported yet"
3441 msg = b"rewriting existing sidedata is not supported yet"
3440 raise error.Abort(msg)
3442 raise error.Abort(msg)
3441
3443
3442 # Apply (potential) flags to add and to remove after running
3444 # Apply (potential) flags to add and to remove after running
3443 # the sidedata helpers
3445 # the sidedata helpers
3444 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3446 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3445 entry_update = (
3447 entry_update = (
3446 current_offset,
3448 current_offset,
3447 len(serialized_sidedata),
3449 len(serialized_sidedata),
3448 new_offset_flags,
3450 new_offset_flags,
3449 sidedata_compression_mode,
3451 sidedata_compression_mode,
3450 )
3452 )
3451
3453
3452 # the sidedata computation might have move the file cursors around
3454 # the sidedata computation might have move the file cursors around
3453 dfh.seek(current_offset, os.SEEK_SET)
3455 dfh.seek(current_offset, os.SEEK_SET)
3454 dfh.write(serialized_sidedata)
3456 dfh.write(serialized_sidedata)
3455 new_entries.append(entry_update)
3457 new_entries.append(entry_update)
3456 current_offset += len(serialized_sidedata)
3458 current_offset += len(serialized_sidedata)
3457 if self._docket is not None:
3459 if self._docket is not None:
3458 self._docket.data_end = dfh.tell()
3460 self._docket.data_end = dfh.tell()
3459
3461
3460 # rewrite the new index entries
3462 # rewrite the new index entries
3461 ifh.seek(startrev * self.index.entry_size)
3463 ifh.seek(startrev * self.index.entry_size)
3462 for i, e in enumerate(new_entries):
3464 for i, e in enumerate(new_entries):
3463 rev = startrev + i
3465 rev = startrev + i
3464 self.index.replace_sidedata_info(rev, *e)
3466 self.index.replace_sidedata_info(rev, *e)
3465 packed = self.index.entry_binary(rev)
3467 packed = self.index.entry_binary(rev)
3466 if rev == 0 and self._docket is None:
3468 if rev == 0 and self._docket is None:
3467 header = self._format_flags | self._format_version
3469 header = self._format_flags | self._format_version
3468 header = self.index.pack_header(header)
3470 header = self.index.pack_header(header)
3469 packed = header + packed
3471 packed = header + packed
3470 ifh.write(packed)
3472 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now