##// END OF EJS Templates
revlog: use `self.sidedata` directly in `revlog.clone`...
marmoute -
r48176:fec306b0 default
parent child Browse files
Show More
@@ -1,3477 +1,3479 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 CHANGELOGV2,
38 CHANGELOGV2,
39 COMP_MODE_DEFAULT,
39 COMP_MODE_DEFAULT,
40 COMP_MODE_INLINE,
40 COMP_MODE_INLINE,
41 COMP_MODE_PLAIN,
41 COMP_MODE_PLAIN,
42 FEATURES_BY_VERSION,
42 FEATURES_BY_VERSION,
43 FLAG_GENERALDELTA,
43 FLAG_GENERALDELTA,
44 FLAG_INLINE_DATA,
44 FLAG_INLINE_DATA,
45 INDEX_HEADER,
45 INDEX_HEADER,
46 KIND_CHANGELOG,
46 KIND_CHANGELOG,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 SUPPORTED_FLAGS,
55 SUPPORTED_FLAGS,
56 )
56 )
57 from .revlogutils.flagutil import (
57 from .revlogutils.flagutil import (
58 REVIDX_DEFAULT_FLAGS,
58 REVIDX_DEFAULT_FLAGS,
59 REVIDX_ELLIPSIS,
59 REVIDX_ELLIPSIS,
60 REVIDX_EXTSTORED,
60 REVIDX_EXTSTORED,
61 REVIDX_FLAGS_ORDER,
61 REVIDX_FLAGS_ORDER,
62 REVIDX_HASCOPIESINFO,
62 REVIDX_HASCOPIESINFO,
63 REVIDX_ISCENSORED,
63 REVIDX_ISCENSORED,
64 REVIDX_RAWTEXT_CHANGING_FLAGS,
64 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 )
65 )
66 from .thirdparty import attr
66 from .thirdparty import attr
67 from . import (
67 from . import (
68 ancestor,
68 ancestor,
69 dagop,
69 dagop,
70 error,
70 error,
71 mdiff,
71 mdiff,
72 policy,
72 policy,
73 pycompat,
73 pycompat,
74 templatefilters,
74 templatefilters,
75 util,
75 util,
76 )
76 )
77 from .interfaces import (
77 from .interfaces import (
78 repository,
78 repository,
79 util as interfaceutil,
79 util as interfaceutil,
80 )
80 )
81 from .revlogutils import (
81 from .revlogutils import (
82 deltas as deltautil,
82 deltas as deltautil,
83 docket as docketutil,
83 docket as docketutil,
84 flagutil,
84 flagutil,
85 nodemap as nodemaputil,
85 nodemap as nodemaputil,
86 revlogv0,
86 revlogv0,
87 sidedata as sidedatautil,
87 sidedata as sidedatautil,
88 )
88 )
89 from .utils import (
89 from .utils import (
90 storageutil,
90 storageutil,
91 stringutil,
91 stringutil,
92 )
92 )
93
93
94 # blanked usage of all the name to prevent pyflakes constraints
94 # blanked usage of all the name to prevent pyflakes constraints
95 # We need these name available in the module for extensions.
95 # We need these name available in the module for extensions.
96
96
97 REVLOGV0
97 REVLOGV0
98 REVLOGV1
98 REVLOGV1
99 REVLOGV2
99 REVLOGV2
100 FLAG_INLINE_DATA
100 FLAG_INLINE_DATA
101 FLAG_GENERALDELTA
101 FLAG_GENERALDELTA
102 REVLOG_DEFAULT_FLAGS
102 REVLOG_DEFAULT_FLAGS
103 REVLOG_DEFAULT_FORMAT
103 REVLOG_DEFAULT_FORMAT
104 REVLOG_DEFAULT_VERSION
104 REVLOG_DEFAULT_VERSION
105 REVLOGV1_FLAGS
105 REVLOGV1_FLAGS
106 REVLOGV2_FLAGS
106 REVLOGV2_FLAGS
107 REVIDX_ISCENSORED
107 REVIDX_ISCENSORED
108 REVIDX_ELLIPSIS
108 REVIDX_ELLIPSIS
109 REVIDX_HASCOPIESINFO
109 REVIDX_HASCOPIESINFO
110 REVIDX_EXTSTORED
110 REVIDX_EXTSTORED
111 REVIDX_DEFAULT_FLAGS
111 REVIDX_DEFAULT_FLAGS
112 REVIDX_FLAGS_ORDER
112 REVIDX_FLAGS_ORDER
113 REVIDX_RAWTEXT_CHANGING_FLAGS
113 REVIDX_RAWTEXT_CHANGING_FLAGS
114
114
115 parsers = policy.importmod('parsers')
115 parsers = policy.importmod('parsers')
116 rustancestor = policy.importrust('ancestor')
116 rustancestor = policy.importrust('ancestor')
117 rustdagop = policy.importrust('dagop')
117 rustdagop = policy.importrust('dagop')
118 rustrevlog = policy.importrust('revlog')
118 rustrevlog = policy.importrust('revlog')
119
119
120 # Aliased for performance.
120 # Aliased for performance.
121 _zlibdecompress = zlib.decompress
121 _zlibdecompress = zlib.decompress
122
122
123 # max size of revlog with inline data
123 # max size of revlog with inline data
124 _maxinline = 131072
124 _maxinline = 131072
125 _chunksize = 1048576
125 _chunksize = 1048576
126
126
127 # Flag processors for REVIDX_ELLIPSIS.
127 # Flag processors for REVIDX_ELLIPSIS.
128 def ellipsisreadprocessor(rl, text):
128 def ellipsisreadprocessor(rl, text):
129 return text, False
129 return text, False
130
130
131
131
132 def ellipsiswriteprocessor(rl, text):
132 def ellipsiswriteprocessor(rl, text):
133 return text, False
133 return text, False
134
134
135
135
136 def ellipsisrawprocessor(rl, text):
136 def ellipsisrawprocessor(rl, text):
137 return False
137 return False
138
138
139
139
140 ellipsisprocessor = (
140 ellipsisprocessor = (
141 ellipsisreadprocessor,
141 ellipsisreadprocessor,
142 ellipsiswriteprocessor,
142 ellipsiswriteprocessor,
143 ellipsisrawprocessor,
143 ellipsisrawprocessor,
144 )
144 )
145
145
146
146
147 def offset_type(offset, type):
147 def offset_type(offset, type):
148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
149 raise ValueError(b'unknown revlog index flags')
149 raise ValueError(b'unknown revlog index flags')
150 return int(int(offset) << 16 | type)
150 return int(int(offset) << 16 | type)
151
151
152
152
153 def _verify_revision(rl, skipflags, state, node):
153 def _verify_revision(rl, skipflags, state, node):
154 """Verify the integrity of the given revlog ``node`` while providing a hook
154 """Verify the integrity of the given revlog ``node`` while providing a hook
155 point for extensions to influence the operation."""
155 point for extensions to influence the operation."""
156 if skipflags:
156 if skipflags:
157 state[b'skipread'].add(node)
157 state[b'skipread'].add(node)
158 else:
158 else:
159 # Side-effect: read content and verify hash.
159 # Side-effect: read content and verify hash.
160 rl.revision(node)
160 rl.revision(node)
161
161
162
162
163 # True if a fast implementation for persistent-nodemap is available
163 # True if a fast implementation for persistent-nodemap is available
164 #
164 #
165 # We also consider we have a "fast" implementation in "pure" python because
165 # We also consider we have a "fast" implementation in "pure" python because
166 # people using pure don't really have performance consideration (and a
166 # people using pure don't really have performance consideration (and a
167 # wheelbarrow of other slowness source)
167 # wheelbarrow of other slowness source)
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
169 parsers, 'BaseIndexObject'
169 parsers, 'BaseIndexObject'
170 )
170 )
171
171
172
172
173 @attr.s(slots=True, frozen=True)
173 @attr.s(slots=True, frozen=True)
174 class _revisioninfo(object):
174 class _revisioninfo(object):
175 """Information about a revision that allows building its fulltext
175 """Information about a revision that allows building its fulltext
176 node: expected hash of the revision
176 node: expected hash of the revision
177 p1, p2: parent revs of the revision
177 p1, p2: parent revs of the revision
178 btext: built text cache consisting of a one-element list
178 btext: built text cache consisting of a one-element list
179 cachedelta: (baserev, uncompressed_delta) or None
179 cachedelta: (baserev, uncompressed_delta) or None
180 flags: flags associated to the revision storage
180 flags: flags associated to the revision storage
181
181
182 One of btext[0] or cachedelta must be set.
182 One of btext[0] or cachedelta must be set.
183 """
183 """
184
184
185 node = attr.ib()
185 node = attr.ib()
186 p1 = attr.ib()
186 p1 = attr.ib()
187 p2 = attr.ib()
187 p2 = attr.ib()
188 btext = attr.ib()
188 btext = attr.ib()
189 textlen = attr.ib()
189 textlen = attr.ib()
190 cachedelta = attr.ib()
190 cachedelta = attr.ib()
191 flags = attr.ib()
191 flags = attr.ib()
192
192
193
193
194 @interfaceutil.implementer(repository.irevisiondelta)
194 @interfaceutil.implementer(repository.irevisiondelta)
195 @attr.s(slots=True)
195 @attr.s(slots=True)
196 class revlogrevisiondelta(object):
196 class revlogrevisiondelta(object):
197 node = attr.ib()
197 node = attr.ib()
198 p1node = attr.ib()
198 p1node = attr.ib()
199 p2node = attr.ib()
199 p2node = attr.ib()
200 basenode = attr.ib()
200 basenode = attr.ib()
201 flags = attr.ib()
201 flags = attr.ib()
202 baserevisionsize = attr.ib()
202 baserevisionsize = attr.ib()
203 revision = attr.ib()
203 revision = attr.ib()
204 delta = attr.ib()
204 delta = attr.ib()
205 sidedata = attr.ib()
205 sidedata = attr.ib()
206 protocol_flags = attr.ib()
206 protocol_flags = attr.ib()
207 linknode = attr.ib(default=None)
207 linknode = attr.ib(default=None)
208
208
209
209
210 @interfaceutil.implementer(repository.iverifyproblem)
210 @interfaceutil.implementer(repository.iverifyproblem)
211 @attr.s(frozen=True)
211 @attr.s(frozen=True)
212 class revlogproblem(object):
212 class revlogproblem(object):
213 warning = attr.ib(default=None)
213 warning = attr.ib(default=None)
214 error = attr.ib(default=None)
214 error = attr.ib(default=None)
215 node = attr.ib(default=None)
215 node = attr.ib(default=None)
216
216
217
217
218 def parse_index_v1(data, inline):
218 def parse_index_v1(data, inline):
219 # call the C implementation to parse the index data
219 # call the C implementation to parse the index data
220 index, cache = parsers.parse_index2(data, inline)
220 index, cache = parsers.parse_index2(data, inline)
221 return index, cache
221 return index, cache
222
222
223
223
224 def parse_index_v2(data, inline):
224 def parse_index_v2(data, inline):
225 # call the C implementation to parse the index data
225 # call the C implementation to parse the index data
226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
227 return index, cache
227 return index, cache
228
228
229
229
230 def parse_index_cl_v2(data, inline):
230 def parse_index_cl_v2(data, inline):
231 # call the C implementation to parse the index data
231 # call the C implementation to parse the index data
232 assert not inline
232 assert not inline
233 from .pure.parsers import parse_index_cl_v2
233 from .pure.parsers import parse_index_cl_v2
234
234
235 index, cache = parse_index_cl_v2(data)
235 index, cache = parse_index_cl_v2(data)
236 return index, cache
236 return index, cache
237
237
238
238
239 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
239 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
240
240
241 def parse_index_v1_nodemap(data, inline):
241 def parse_index_v1_nodemap(data, inline):
242 index, cache = parsers.parse_index_devel_nodemap(data, inline)
242 index, cache = parsers.parse_index_devel_nodemap(data, inline)
243 return index, cache
243 return index, cache
244
244
245
245
246 else:
246 else:
247 parse_index_v1_nodemap = None
247 parse_index_v1_nodemap = None
248
248
249
249
250 def parse_index_v1_mixed(data, inline):
250 def parse_index_v1_mixed(data, inline):
251 index, cache = parse_index_v1(data, inline)
251 index, cache = parse_index_v1(data, inline)
252 return rustrevlog.MixedIndex(index), cache
252 return rustrevlog.MixedIndex(index), cache
253
253
254
254
255 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
255 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
256 # signed integer)
256 # signed integer)
257 _maxentrysize = 0x7FFFFFFF
257 _maxentrysize = 0x7FFFFFFF
258
258
259
259
260 class revlog(object):
260 class revlog(object):
261 """
261 """
262 the underlying revision storage object
262 the underlying revision storage object
263
263
264 A revlog consists of two parts, an index and the revision data.
264 A revlog consists of two parts, an index and the revision data.
265
265
266 The index is a file with a fixed record size containing
266 The index is a file with a fixed record size containing
267 information on each revision, including its nodeid (hash), the
267 information on each revision, including its nodeid (hash), the
268 nodeids of its parents, the position and offset of its data within
268 nodeids of its parents, the position and offset of its data within
269 the data file, and the revision it's based on. Finally, each entry
269 the data file, and the revision it's based on. Finally, each entry
270 contains a linkrev entry that can serve as a pointer to external
270 contains a linkrev entry that can serve as a pointer to external
271 data.
271 data.
272
272
273 The revision data itself is a linear collection of data chunks.
273 The revision data itself is a linear collection of data chunks.
274 Each chunk represents a revision and is usually represented as a
274 Each chunk represents a revision and is usually represented as a
275 delta against the previous chunk. To bound lookup time, runs of
275 delta against the previous chunk. To bound lookup time, runs of
276 deltas are limited to about 2 times the length of the original
276 deltas are limited to about 2 times the length of the original
277 version data. This makes retrieval of a version proportional to
277 version data. This makes retrieval of a version proportional to
278 its size, or O(1) relative to the number of revisions.
278 its size, or O(1) relative to the number of revisions.
279
279
280 Both pieces of the revlog are written to in an append-only
280 Both pieces of the revlog are written to in an append-only
281 fashion, which means we never need to rewrite a file to insert or
281 fashion, which means we never need to rewrite a file to insert or
282 remove data, and can use some simple techniques to avoid the need
282 remove data, and can use some simple techniques to avoid the need
283 for locking while reading.
283 for locking while reading.
284
284
285 If checkambig, indexfile is opened with checkambig=True at
285 If checkambig, indexfile is opened with checkambig=True at
286 writing, to avoid file stat ambiguity.
286 writing, to avoid file stat ambiguity.
287
287
288 If mmaplargeindex is True, and an mmapindexthreshold is set, the
288 If mmaplargeindex is True, and an mmapindexthreshold is set, the
289 index will be mmapped rather than read if it is larger than the
289 index will be mmapped rather than read if it is larger than the
290 configured threshold.
290 configured threshold.
291
291
292 If censorable is True, the revlog can have censored revisions.
292 If censorable is True, the revlog can have censored revisions.
293
293
294 If `upperboundcomp` is not None, this is the expected maximal gain from
294 If `upperboundcomp` is not None, this is the expected maximal gain from
295 compression for the data content.
295 compression for the data content.
296
296
297 `concurrencychecker` is an optional function that receives 3 arguments: a
297 `concurrencychecker` is an optional function that receives 3 arguments: a
298 file handle, a filename, and an expected position. It should check whether
298 file handle, a filename, and an expected position. It should check whether
299 the current position in the file handle is valid, and log/warn/fail (by
299 the current position in the file handle is valid, and log/warn/fail (by
300 raising).
300 raising).
301
301
302
302
303 Internal details
303 Internal details
304 ----------------
304 ----------------
305
305
306 A large part of the revlog logic deals with revisions' "index entries", tuple
306 A large part of the revlog logic deals with revisions' "index entries", tuple
307 objects that contains the same "items" whatever the revlog version.
307 objects that contains the same "items" whatever the revlog version.
308 Different versions will have different ways of storing these items (sometimes
308 Different versions will have different ways of storing these items (sometimes
309 not having them at all), but the tuple will always be the same. New fields
309 not having them at all), but the tuple will always be the same. New fields
310 are usually added at the end to avoid breaking existing code that relies
310 are usually added at the end to avoid breaking existing code that relies
311 on the existing order. The field are defined as follows:
311 on the existing order. The field are defined as follows:
312
312
313 [0] offset:
313 [0] offset:
314 The byte index of the start of revision data chunk.
314 The byte index of the start of revision data chunk.
315 That value is shifted up by 16 bits. use "offset = field >> 16" to
315 That value is shifted up by 16 bits. use "offset = field >> 16" to
316 retrieve it.
316 retrieve it.
317
317
318 flags:
318 flags:
319 A flag field that carries special information or changes the behavior
319 A flag field that carries special information or changes the behavior
320 of the revision. (see `REVIDX_*` constants for details)
320 of the revision. (see `REVIDX_*` constants for details)
321 The flag field only occupies the first 16 bits of this field,
321 The flag field only occupies the first 16 bits of this field,
322 use "flags = field & 0xFFFF" to retrieve the value.
322 use "flags = field & 0xFFFF" to retrieve the value.
323
323
324 [1] compressed length:
324 [1] compressed length:
325 The size, in bytes, of the chunk on disk
325 The size, in bytes, of the chunk on disk
326
326
327 [2] uncompressed length:
327 [2] uncompressed length:
328 The size, in bytes, of the full revision once reconstructed.
328 The size, in bytes, of the full revision once reconstructed.
329
329
330 [3] base rev:
330 [3] base rev:
331 Either the base of the revision delta chain (without general
331 Either the base of the revision delta chain (without general
332 delta), or the base of the delta (stored in the data chunk)
332 delta), or the base of the delta (stored in the data chunk)
333 with general delta.
333 with general delta.
334
334
335 [4] link rev:
335 [4] link rev:
336 Changelog revision number of the changeset introducing this
336 Changelog revision number of the changeset introducing this
337 revision.
337 revision.
338
338
339 [5] parent 1 rev:
339 [5] parent 1 rev:
340 Revision number of the first parent
340 Revision number of the first parent
341
341
342 [6] parent 2 rev:
342 [6] parent 2 rev:
343 Revision number of the second parent
343 Revision number of the second parent
344
344
345 [7] node id:
345 [7] node id:
346 The node id of the current revision
346 The node id of the current revision
347
347
348 [8] sidedata offset:
348 [8] sidedata offset:
349 The byte index of the start of the revision's side-data chunk.
349 The byte index of the start of the revision's side-data chunk.
350
350
351 [9] sidedata chunk length:
351 [9] sidedata chunk length:
352 The size, in bytes, of the revision's side-data chunk.
352 The size, in bytes, of the revision's side-data chunk.
353
353
354 [10] data compression mode:
354 [10] data compression mode:
355 two bits that detail the way the data chunk is compressed on disk.
355 two bits that detail the way the data chunk is compressed on disk.
356 (see "COMP_MODE_*" constants for details). For revlog version 0 and
356 (see "COMP_MODE_*" constants for details). For revlog version 0 and
357 1 this will always be COMP_MODE_INLINE.
357 1 this will always be COMP_MODE_INLINE.
358
358
359 [11] side-data compression mode:
359 [11] side-data compression mode:
360 two bits that detail the way the sidedata chunk is compressed on disk.
360 two bits that detail the way the sidedata chunk is compressed on disk.
361 (see "COMP_MODE_*" constants for details)
361 (see "COMP_MODE_*" constants for details)
362 """
362 """
363
363
364 _flagserrorclass = error.RevlogError
364 _flagserrorclass = error.RevlogError
365
365
366 def __init__(
366 def __init__(
367 self,
367 self,
368 opener,
368 opener,
369 target,
369 target,
370 radix,
370 radix,
371 postfix=None, # only exist for `tmpcensored` now
371 postfix=None, # only exist for `tmpcensored` now
372 checkambig=False,
372 checkambig=False,
373 mmaplargeindex=False,
373 mmaplargeindex=False,
374 censorable=False,
374 censorable=False,
375 upperboundcomp=None,
375 upperboundcomp=None,
376 persistentnodemap=False,
376 persistentnodemap=False,
377 concurrencychecker=None,
377 concurrencychecker=None,
378 trypending=False,
378 trypending=False,
379 ):
379 ):
380 """
380 """
381 create a revlog object
381 create a revlog object
382
382
383 opener is a function that abstracts the file opening operation
383 opener is a function that abstracts the file opening operation
384 and can be used to implement COW semantics or the like.
384 and can be used to implement COW semantics or the like.
385
385
386 `target`: a (KIND, ID) tuple that identify the content stored in
386 `target`: a (KIND, ID) tuple that identify the content stored in
387 this revlog. It help the rest of the code to understand what the revlog
387 this revlog. It help the rest of the code to understand what the revlog
388 is about without having to resort to heuristic and index filename
388 is about without having to resort to heuristic and index filename
389 analysis. Note: that this must be reliably be set by normal code, but
389 analysis. Note: that this must be reliably be set by normal code, but
390 that test, debug, or performance measurement code might not set this to
390 that test, debug, or performance measurement code might not set this to
391 accurate value.
391 accurate value.
392 """
392 """
393 self.upperboundcomp = upperboundcomp
393 self.upperboundcomp = upperboundcomp
394
394
395 self.radix = radix
395 self.radix = radix
396
396
397 self._docket_file = None
397 self._docket_file = None
398 self._indexfile = None
398 self._indexfile = None
399 self._datafile = None
399 self._datafile = None
400 self._nodemap_file = None
400 self._nodemap_file = None
401 self.postfix = postfix
401 self.postfix = postfix
402 self._trypending = trypending
402 self._trypending = trypending
403 self.opener = opener
403 self.opener = opener
404 if persistentnodemap:
404 if persistentnodemap:
405 self._nodemap_file = nodemaputil.get_nodemap_file(self)
405 self._nodemap_file = nodemaputil.get_nodemap_file(self)
406
406
407 assert target[0] in ALL_KINDS
407 assert target[0] in ALL_KINDS
408 assert len(target) == 2
408 assert len(target) == 2
409 self.target = target
409 self.target = target
410 # When True, indexfile is opened with checkambig=True at writing, to
410 # When True, indexfile is opened with checkambig=True at writing, to
411 # avoid file stat ambiguity.
411 # avoid file stat ambiguity.
412 self._checkambig = checkambig
412 self._checkambig = checkambig
413 self._mmaplargeindex = mmaplargeindex
413 self._mmaplargeindex = mmaplargeindex
414 self._censorable = censorable
414 self._censorable = censorable
415 # 3-tuple of (node, rev, text) for a raw revision.
415 # 3-tuple of (node, rev, text) for a raw revision.
416 self._revisioncache = None
416 self._revisioncache = None
417 # Maps rev to chain base rev.
417 # Maps rev to chain base rev.
418 self._chainbasecache = util.lrucachedict(100)
418 self._chainbasecache = util.lrucachedict(100)
419 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
419 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
420 self._chunkcache = (0, b'')
420 self._chunkcache = (0, b'')
421 # How much data to read and cache into the raw revlog data cache.
421 # How much data to read and cache into the raw revlog data cache.
422 self._chunkcachesize = 65536
422 self._chunkcachesize = 65536
423 self._maxchainlen = None
423 self._maxchainlen = None
424 self._deltabothparents = True
424 self._deltabothparents = True
425 self.index = None
425 self.index = None
426 self._docket = None
426 self._docket = None
427 self._nodemap_docket = None
427 self._nodemap_docket = None
428 # Mapping of partial identifiers to full nodes.
428 # Mapping of partial identifiers to full nodes.
429 self._pcache = {}
429 self._pcache = {}
430 # Mapping of revision integer to full node.
430 # Mapping of revision integer to full node.
431 self._compengine = b'zlib'
431 self._compengine = b'zlib'
432 self._compengineopts = {}
432 self._compengineopts = {}
433 self._maxdeltachainspan = -1
433 self._maxdeltachainspan = -1
434 self._withsparseread = False
434 self._withsparseread = False
435 self._sparserevlog = False
435 self._sparserevlog = False
436 self.hassidedata = False
436 self.hassidedata = False
437 self._srdensitythreshold = 0.50
437 self._srdensitythreshold = 0.50
438 self._srmingapsize = 262144
438 self._srmingapsize = 262144
439
439
440 # Make copy of flag processors so each revlog instance can support
440 # Make copy of flag processors so each revlog instance can support
441 # custom flags.
441 # custom flags.
442 self._flagprocessors = dict(flagutil.flagprocessors)
442 self._flagprocessors = dict(flagutil.flagprocessors)
443
443
444 # 2-tuple of file handles being used for active writing.
444 # 2-tuple of file handles being used for active writing.
445 self._writinghandles = None
445 self._writinghandles = None
446 # prevent nesting of addgroup
446 # prevent nesting of addgroup
447 self._adding_group = None
447 self._adding_group = None
448
448
449 self._loadindex()
449 self._loadindex()
450
450
451 self._concurrencychecker = concurrencychecker
451 self._concurrencychecker = concurrencychecker
452
452
453 def _init_opts(self):
453 def _init_opts(self):
454 """process options (from above/config) to setup associated default revlog mode
454 """process options (from above/config) to setup associated default revlog mode
455
455
456 These values might be affected when actually reading on disk information.
456 These values might be affected when actually reading on disk information.
457
457
458 The relevant values are returned for use in _loadindex().
458 The relevant values are returned for use in _loadindex().
459
459
460 * newversionflags:
460 * newversionflags:
461 version header to use if we need to create a new revlog
461 version header to use if we need to create a new revlog
462
462
463 * mmapindexthreshold:
463 * mmapindexthreshold:
464 minimal index size for start to use mmap
464 minimal index size for start to use mmap
465
465
466 * force_nodemap:
466 * force_nodemap:
467 force the usage of a "development" version of the nodemap code
467 force the usage of a "development" version of the nodemap code
468 """
468 """
469 mmapindexthreshold = None
469 mmapindexthreshold = None
470 opts = self.opener.options
470 opts = self.opener.options
471
471
472 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
472 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
473 new_header = CHANGELOGV2
473 new_header = CHANGELOGV2
474 elif b'revlogv2' in opts:
474 elif b'revlogv2' in opts:
475 new_header = REVLOGV2
475 new_header = REVLOGV2
476 elif b'revlogv1' in opts:
476 elif b'revlogv1' in opts:
477 new_header = REVLOGV1 | FLAG_INLINE_DATA
477 new_header = REVLOGV1 | FLAG_INLINE_DATA
478 if b'generaldelta' in opts:
478 if b'generaldelta' in opts:
479 new_header |= FLAG_GENERALDELTA
479 new_header |= FLAG_GENERALDELTA
480 elif b'revlogv0' in self.opener.options:
480 elif b'revlogv0' in self.opener.options:
481 new_header = REVLOGV0
481 new_header = REVLOGV0
482 else:
482 else:
483 new_header = REVLOG_DEFAULT_VERSION
483 new_header = REVLOG_DEFAULT_VERSION
484
484
485 if b'chunkcachesize' in opts:
485 if b'chunkcachesize' in opts:
486 self._chunkcachesize = opts[b'chunkcachesize']
486 self._chunkcachesize = opts[b'chunkcachesize']
487 if b'maxchainlen' in opts:
487 if b'maxchainlen' in opts:
488 self._maxchainlen = opts[b'maxchainlen']
488 self._maxchainlen = opts[b'maxchainlen']
489 if b'deltabothparents' in opts:
489 if b'deltabothparents' in opts:
490 self._deltabothparents = opts[b'deltabothparents']
490 self._deltabothparents = opts[b'deltabothparents']
491 self._lazydelta = bool(opts.get(b'lazydelta', True))
491 self._lazydelta = bool(opts.get(b'lazydelta', True))
492 self._lazydeltabase = False
492 self._lazydeltabase = False
493 if self._lazydelta:
493 if self._lazydelta:
494 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
494 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
495 if b'compengine' in opts:
495 if b'compengine' in opts:
496 self._compengine = opts[b'compengine']
496 self._compengine = opts[b'compengine']
497 if b'zlib.level' in opts:
497 if b'zlib.level' in opts:
498 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
498 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
499 if b'zstd.level' in opts:
499 if b'zstd.level' in opts:
500 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
500 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
501 if b'maxdeltachainspan' in opts:
501 if b'maxdeltachainspan' in opts:
502 self._maxdeltachainspan = opts[b'maxdeltachainspan']
502 self._maxdeltachainspan = opts[b'maxdeltachainspan']
503 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
503 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
504 mmapindexthreshold = opts[b'mmapindexthreshold']
504 mmapindexthreshold = opts[b'mmapindexthreshold']
505 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
505 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
506 withsparseread = bool(opts.get(b'with-sparse-read', False))
506 withsparseread = bool(opts.get(b'with-sparse-read', False))
507 # sparse-revlog forces sparse-read
507 # sparse-revlog forces sparse-read
508 self._withsparseread = self._sparserevlog or withsparseread
508 self._withsparseread = self._sparserevlog or withsparseread
509 if b'sparse-read-density-threshold' in opts:
509 if b'sparse-read-density-threshold' in opts:
510 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
510 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
511 if b'sparse-read-min-gap-size' in opts:
511 if b'sparse-read-min-gap-size' in opts:
512 self._srmingapsize = opts[b'sparse-read-min-gap-size']
512 self._srmingapsize = opts[b'sparse-read-min-gap-size']
513 if opts.get(b'enableellipsis'):
513 if opts.get(b'enableellipsis'):
514 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
514 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
515
515
516 # revlog v0 doesn't have flag processors
516 # revlog v0 doesn't have flag processors
517 for flag, processor in pycompat.iteritems(
517 for flag, processor in pycompat.iteritems(
518 opts.get(b'flagprocessors', {})
518 opts.get(b'flagprocessors', {})
519 ):
519 ):
520 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
520 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
521
521
522 if self._chunkcachesize <= 0:
522 if self._chunkcachesize <= 0:
523 raise error.RevlogError(
523 raise error.RevlogError(
524 _(b'revlog chunk cache size %r is not greater than 0')
524 _(b'revlog chunk cache size %r is not greater than 0')
525 % self._chunkcachesize
525 % self._chunkcachesize
526 )
526 )
527 elif self._chunkcachesize & (self._chunkcachesize - 1):
527 elif self._chunkcachesize & (self._chunkcachesize - 1):
528 raise error.RevlogError(
528 raise error.RevlogError(
529 _(b'revlog chunk cache size %r is not a power of 2')
529 _(b'revlog chunk cache size %r is not a power of 2')
530 % self._chunkcachesize
530 % self._chunkcachesize
531 )
531 )
532 force_nodemap = opts.get(b'devel-force-nodemap', False)
532 force_nodemap = opts.get(b'devel-force-nodemap', False)
533 return new_header, mmapindexthreshold, force_nodemap
533 return new_header, mmapindexthreshold, force_nodemap
534
534
535 def _get_data(self, filepath, mmap_threshold, size=None):
535 def _get_data(self, filepath, mmap_threshold, size=None):
536 """return a file content with or without mmap
536 """return a file content with or without mmap
537
537
538 If the file is missing return the empty string"""
538 If the file is missing return the empty string"""
539 try:
539 try:
540 with self.opener(filepath) as fp:
540 with self.opener(filepath) as fp:
541 if mmap_threshold is not None:
541 if mmap_threshold is not None:
542 file_size = self.opener.fstat(fp).st_size
542 file_size = self.opener.fstat(fp).st_size
543 if file_size >= mmap_threshold:
543 if file_size >= mmap_threshold:
544 if size is not None:
544 if size is not None:
545 # avoid potentiel mmap crash
545 # avoid potentiel mmap crash
546 size = min(file_size, size)
546 size = min(file_size, size)
547 # TODO: should .close() to release resources without
547 # TODO: should .close() to release resources without
548 # relying on Python GC
548 # relying on Python GC
549 if size is None:
549 if size is None:
550 return util.buffer(util.mmapread(fp))
550 return util.buffer(util.mmapread(fp))
551 else:
551 else:
552 return util.buffer(util.mmapread(fp, size))
552 return util.buffer(util.mmapread(fp, size))
553 if size is None:
553 if size is None:
554 return fp.read()
554 return fp.read()
555 else:
555 else:
556 return fp.read(size)
556 return fp.read(size)
557 except IOError as inst:
557 except IOError as inst:
558 if inst.errno != errno.ENOENT:
558 if inst.errno != errno.ENOENT:
559 raise
559 raise
560 return b''
560 return b''
561
561
562 def _loadindex(self):
562 def _loadindex(self):
563
563
564 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
564 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
565
565
566 if self.postfix is not None:
566 if self.postfix is not None:
567 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
567 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
568 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
568 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
569 entry_point = b'%s.i.a' % self.radix
569 entry_point = b'%s.i.a' % self.radix
570 else:
570 else:
571 entry_point = b'%s.i' % self.radix
571 entry_point = b'%s.i' % self.radix
572
572
573 entry_data = b''
573 entry_data = b''
574 self._initempty = True
574 self._initempty = True
575 entry_data = self._get_data(entry_point, mmapindexthreshold)
575 entry_data = self._get_data(entry_point, mmapindexthreshold)
576 if len(entry_data) > 0:
576 if len(entry_data) > 0:
577 header = INDEX_HEADER.unpack(entry_data[:4])[0]
577 header = INDEX_HEADER.unpack(entry_data[:4])[0]
578 self._initempty = False
578 self._initempty = False
579 else:
579 else:
580 header = new_header
580 header = new_header
581
581
582 self._format_flags = header & ~0xFFFF
582 self._format_flags = header & ~0xFFFF
583 self._format_version = header & 0xFFFF
583 self._format_version = header & 0xFFFF
584
584
585 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
585 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
586 if supported_flags is None:
586 if supported_flags is None:
587 msg = _(b'unknown version (%d) in revlog %s')
587 msg = _(b'unknown version (%d) in revlog %s')
588 msg %= (self._format_version, self.display_id)
588 msg %= (self._format_version, self.display_id)
589 raise error.RevlogError(msg)
589 raise error.RevlogError(msg)
590 elif self._format_flags & ~supported_flags:
590 elif self._format_flags & ~supported_flags:
591 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
591 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
592 display_flag = self._format_flags >> 16
592 display_flag = self._format_flags >> 16
593 msg %= (display_flag, self._format_version, self.display_id)
593 msg %= (display_flag, self._format_version, self.display_id)
594 raise error.RevlogError(msg)
594 raise error.RevlogError(msg)
595
595
596 features = FEATURES_BY_VERSION[self._format_version]
596 features = FEATURES_BY_VERSION[self._format_version]
597 self._inline = features[b'inline'](self._format_flags)
597 self._inline = features[b'inline'](self._format_flags)
598 self._generaldelta = features[b'generaldelta'](self._format_flags)
598 self._generaldelta = features[b'generaldelta'](self._format_flags)
599 self.hassidedata = features[b'sidedata']
599 self.hassidedata = features[b'sidedata']
600
600
601 if not features[b'docket']:
601 if not features[b'docket']:
602 self._indexfile = entry_point
602 self._indexfile = entry_point
603 index_data = entry_data
603 index_data = entry_data
604 else:
604 else:
605 self._docket_file = entry_point
605 self._docket_file = entry_point
606 if self._initempty:
606 if self._initempty:
607 self._docket = docketutil.default_docket(self, header)
607 self._docket = docketutil.default_docket(self, header)
608 else:
608 else:
609 self._docket = docketutil.parse_docket(
609 self._docket = docketutil.parse_docket(
610 self, entry_data, use_pending=self._trypending
610 self, entry_data, use_pending=self._trypending
611 )
611 )
612 self._indexfile = self._docket.index_filepath()
612 self._indexfile = self._docket.index_filepath()
613 index_data = b''
613 index_data = b''
614 index_size = self._docket.index_end
614 index_size = self._docket.index_end
615 if index_size > 0:
615 if index_size > 0:
616 index_data = self._get_data(
616 index_data = self._get_data(
617 self._indexfile, mmapindexthreshold, size=index_size
617 self._indexfile, mmapindexthreshold, size=index_size
618 )
618 )
619 if len(index_data) < index_size:
619 if len(index_data) < index_size:
620 msg = _(b'too few index data for %s: got %d, expected %d')
620 msg = _(b'too few index data for %s: got %d, expected %d')
621 msg %= (self.display_id, len(index_data), index_size)
621 msg %= (self.display_id, len(index_data), index_size)
622 raise error.RevlogError(msg)
622 raise error.RevlogError(msg)
623
623
624 self._inline = False
624 self._inline = False
625 # generaldelta implied by version 2 revlogs.
625 # generaldelta implied by version 2 revlogs.
626 self._generaldelta = True
626 self._generaldelta = True
627 # the logic for persistent nodemap will be dealt with within the
627 # the logic for persistent nodemap will be dealt with within the
628 # main docket, so disable it for now.
628 # main docket, so disable it for now.
629 self._nodemap_file = None
629 self._nodemap_file = None
630
630
631 if self._docket is not None:
631 if self._docket is not None:
632 self._datafile = self._docket.data_filepath()
632 self._datafile = self._docket.data_filepath()
633 elif self.postfix is None:
633 elif self.postfix is None:
634 self._datafile = b'%s.d' % self.radix
634 self._datafile = b'%s.d' % self.radix
635 else:
635 else:
636 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
636 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
637
637
638 self.nodeconstants = sha1nodeconstants
638 self.nodeconstants = sha1nodeconstants
639 self.nullid = self.nodeconstants.nullid
639 self.nullid = self.nodeconstants.nullid
640
640
641 # sparse-revlog can't be on without general-delta (issue6056)
641 # sparse-revlog can't be on without general-delta (issue6056)
642 if not self._generaldelta:
642 if not self._generaldelta:
643 self._sparserevlog = False
643 self._sparserevlog = False
644
644
645 self._storedeltachains = True
645 self._storedeltachains = True
646
646
647 devel_nodemap = (
647 devel_nodemap = (
648 self._nodemap_file
648 self._nodemap_file
649 and force_nodemap
649 and force_nodemap
650 and parse_index_v1_nodemap is not None
650 and parse_index_v1_nodemap is not None
651 )
651 )
652
652
653 use_rust_index = False
653 use_rust_index = False
654 if rustrevlog is not None:
654 if rustrevlog is not None:
655 if self._nodemap_file is not None:
655 if self._nodemap_file is not None:
656 use_rust_index = True
656 use_rust_index = True
657 else:
657 else:
658 use_rust_index = self.opener.options.get(b'rust.index')
658 use_rust_index = self.opener.options.get(b'rust.index')
659
659
660 self._parse_index = parse_index_v1
660 self._parse_index = parse_index_v1
661 if self._format_version == REVLOGV0:
661 if self._format_version == REVLOGV0:
662 self._parse_index = revlogv0.parse_index_v0
662 self._parse_index = revlogv0.parse_index_v0
663 elif self._format_version == REVLOGV2:
663 elif self._format_version == REVLOGV2:
664 self._parse_index = parse_index_v2
664 self._parse_index = parse_index_v2
665 elif self._format_version == CHANGELOGV2:
665 elif self._format_version == CHANGELOGV2:
666 self._parse_index = parse_index_cl_v2
666 self._parse_index = parse_index_cl_v2
667 elif devel_nodemap:
667 elif devel_nodemap:
668 self._parse_index = parse_index_v1_nodemap
668 self._parse_index = parse_index_v1_nodemap
669 elif use_rust_index:
669 elif use_rust_index:
670 self._parse_index = parse_index_v1_mixed
670 self._parse_index = parse_index_v1_mixed
671 try:
671 try:
672 d = self._parse_index(index_data, self._inline)
672 d = self._parse_index(index_data, self._inline)
673 index, _chunkcache = d
673 index, _chunkcache = d
674 use_nodemap = (
674 use_nodemap = (
675 not self._inline
675 not self._inline
676 and self._nodemap_file is not None
676 and self._nodemap_file is not None
677 and util.safehasattr(index, 'update_nodemap_data')
677 and util.safehasattr(index, 'update_nodemap_data')
678 )
678 )
679 if use_nodemap:
679 if use_nodemap:
680 nodemap_data = nodemaputil.persisted_data(self)
680 nodemap_data = nodemaputil.persisted_data(self)
681 if nodemap_data is not None:
681 if nodemap_data is not None:
682 docket = nodemap_data[0]
682 docket = nodemap_data[0]
683 if (
683 if (
684 len(d[0]) > docket.tip_rev
684 len(d[0]) > docket.tip_rev
685 and d[0][docket.tip_rev][7] == docket.tip_node
685 and d[0][docket.tip_rev][7] == docket.tip_node
686 ):
686 ):
687 # no changelog tampering
687 # no changelog tampering
688 self._nodemap_docket = docket
688 self._nodemap_docket = docket
689 index.update_nodemap_data(*nodemap_data)
689 index.update_nodemap_data(*nodemap_data)
690 except (ValueError, IndexError):
690 except (ValueError, IndexError):
691 raise error.RevlogError(
691 raise error.RevlogError(
692 _(b"index %s is corrupted") % self.display_id
692 _(b"index %s is corrupted") % self.display_id
693 )
693 )
694 self.index, self._chunkcache = d
694 self.index, self._chunkcache = d
695 if not self._chunkcache:
695 if not self._chunkcache:
696 self._chunkclear()
696 self._chunkclear()
697 # revnum -> (chain-length, sum-delta-length)
697 # revnum -> (chain-length, sum-delta-length)
698 self._chaininfocache = util.lrucachedict(500)
698 self._chaininfocache = util.lrucachedict(500)
699 # revlog header -> revlog compressor
699 # revlog header -> revlog compressor
700 self._decompressors = {}
700 self._decompressors = {}
701
701
702 @util.propertycache
702 @util.propertycache
703 def revlog_kind(self):
703 def revlog_kind(self):
704 return self.target[0]
704 return self.target[0]
705
705
706 @util.propertycache
706 @util.propertycache
707 def display_id(self):
707 def display_id(self):
708 """The public facing "ID" of the revlog that we use in message"""
708 """The public facing "ID" of the revlog that we use in message"""
709 # Maybe we should build a user facing representation of
709 # Maybe we should build a user facing representation of
710 # revlog.target instead of using `self.radix`
710 # revlog.target instead of using `self.radix`
711 return self.radix
711 return self.radix
712
712
713 def _get_decompressor(self, t):
713 def _get_decompressor(self, t):
714 try:
714 try:
715 compressor = self._decompressors[t]
715 compressor = self._decompressors[t]
716 except KeyError:
716 except KeyError:
717 try:
717 try:
718 engine = util.compengines.forrevlogheader(t)
718 engine = util.compengines.forrevlogheader(t)
719 compressor = engine.revlogcompressor(self._compengineopts)
719 compressor = engine.revlogcompressor(self._compengineopts)
720 self._decompressors[t] = compressor
720 self._decompressors[t] = compressor
721 except KeyError:
721 except KeyError:
722 raise error.RevlogError(
722 raise error.RevlogError(
723 _(b'unknown compression type %s') % binascii.hexlify(t)
723 _(b'unknown compression type %s') % binascii.hexlify(t)
724 )
724 )
725 return compressor
725 return compressor
726
726
727 @util.propertycache
727 @util.propertycache
728 def _compressor(self):
728 def _compressor(self):
729 engine = util.compengines[self._compengine]
729 engine = util.compengines[self._compengine]
730 return engine.revlogcompressor(self._compengineopts)
730 return engine.revlogcompressor(self._compengineopts)
731
731
732 @util.propertycache
732 @util.propertycache
733 def _decompressor(self):
733 def _decompressor(self):
734 """the default decompressor"""
734 """the default decompressor"""
735 if self._docket is None:
735 if self._docket is None:
736 return None
736 return None
737 t = self._docket.default_compression_header
737 t = self._docket.default_compression_header
738 c = self._get_decompressor(t)
738 c = self._get_decompressor(t)
739 return c.decompress
739 return c.decompress
740
740
741 def _indexfp(self):
741 def _indexfp(self):
742 """file object for the revlog's index file"""
742 """file object for the revlog's index file"""
743 return self.opener(self._indexfile, mode=b"r")
743 return self.opener(self._indexfile, mode=b"r")
744
744
745 def __index_write_fp(self):
745 def __index_write_fp(self):
746 # You should not use this directly and use `_writing` instead
746 # You should not use this directly and use `_writing` instead
747 try:
747 try:
748 f = self.opener(
748 f = self.opener(
749 self._indexfile, mode=b"r+", checkambig=self._checkambig
749 self._indexfile, mode=b"r+", checkambig=self._checkambig
750 )
750 )
751 if self._docket is None:
751 if self._docket is None:
752 f.seek(0, os.SEEK_END)
752 f.seek(0, os.SEEK_END)
753 else:
753 else:
754 f.seek(self._docket.index_end, os.SEEK_SET)
754 f.seek(self._docket.index_end, os.SEEK_SET)
755 return f
755 return f
756 except IOError as inst:
756 except IOError as inst:
757 if inst.errno != errno.ENOENT:
757 if inst.errno != errno.ENOENT:
758 raise
758 raise
759 return self.opener(
759 return self.opener(
760 self._indexfile, mode=b"w+", checkambig=self._checkambig
760 self._indexfile, mode=b"w+", checkambig=self._checkambig
761 )
761 )
762
762
763 def __index_new_fp(self):
763 def __index_new_fp(self):
764 # You should not use this unless you are upgrading from inline revlog
764 # You should not use this unless you are upgrading from inline revlog
765 return self.opener(
765 return self.opener(
766 self._indexfile,
766 self._indexfile,
767 mode=b"w",
767 mode=b"w",
768 checkambig=self._checkambig,
768 checkambig=self._checkambig,
769 atomictemp=True,
769 atomictemp=True,
770 )
770 )
771
771
772 def _datafp(self, mode=b'r'):
772 def _datafp(self, mode=b'r'):
773 """file object for the revlog's data file"""
773 """file object for the revlog's data file"""
774 return self.opener(self._datafile, mode=mode)
774 return self.opener(self._datafile, mode=mode)
775
775
776 @contextlib.contextmanager
776 @contextlib.contextmanager
777 def _datareadfp(self, existingfp=None):
777 def _datareadfp(self, existingfp=None):
778 """file object suitable to read data"""
778 """file object suitable to read data"""
779 # Use explicit file handle, if given.
779 # Use explicit file handle, if given.
780 if existingfp is not None:
780 if existingfp is not None:
781 yield existingfp
781 yield existingfp
782
782
783 # Use a file handle being actively used for writes, if available.
783 # Use a file handle being actively used for writes, if available.
784 # There is some danger to doing this because reads will seek the
784 # There is some danger to doing this because reads will seek the
785 # file. However, _writeentry() performs a SEEK_END before all writes,
785 # file. However, _writeentry() performs a SEEK_END before all writes,
786 # so we should be safe.
786 # so we should be safe.
787 elif self._writinghandles:
787 elif self._writinghandles:
788 if self._inline:
788 if self._inline:
789 yield self._writinghandles[0]
789 yield self._writinghandles[0]
790 else:
790 else:
791 yield self._writinghandles[1]
791 yield self._writinghandles[1]
792
792
793 # Otherwise open a new file handle.
793 # Otherwise open a new file handle.
794 else:
794 else:
795 if self._inline:
795 if self._inline:
796 func = self._indexfp
796 func = self._indexfp
797 else:
797 else:
798 func = self._datafp
798 func = self._datafp
799 with func() as fp:
799 with func() as fp:
800 yield fp
800 yield fp
801
801
802 def tiprev(self):
802 def tiprev(self):
803 return len(self.index) - 1
803 return len(self.index) - 1
804
804
805 def tip(self):
805 def tip(self):
806 return self.node(self.tiprev())
806 return self.node(self.tiprev())
807
807
808 def __contains__(self, rev):
808 def __contains__(self, rev):
809 return 0 <= rev < len(self)
809 return 0 <= rev < len(self)
810
810
811 def __len__(self):
811 def __len__(self):
812 return len(self.index)
812 return len(self.index)
813
813
814 def __iter__(self):
814 def __iter__(self):
815 return iter(pycompat.xrange(len(self)))
815 return iter(pycompat.xrange(len(self)))
816
816
817 def revs(self, start=0, stop=None):
817 def revs(self, start=0, stop=None):
818 """iterate over all rev in this revlog (from start to stop)"""
818 """iterate over all rev in this revlog (from start to stop)"""
819 return storageutil.iterrevs(len(self), start=start, stop=stop)
819 return storageutil.iterrevs(len(self), start=start, stop=stop)
820
820
821 @property
821 @property
822 def nodemap(self):
822 def nodemap(self):
823 msg = (
823 msg = (
824 b"revlog.nodemap is deprecated, "
824 b"revlog.nodemap is deprecated, "
825 b"use revlog.index.[has_node|rev|get_rev]"
825 b"use revlog.index.[has_node|rev|get_rev]"
826 )
826 )
827 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
827 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
828 return self.index.nodemap
828 return self.index.nodemap
829
829
830 @property
830 @property
831 def _nodecache(self):
831 def _nodecache(self):
832 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
832 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
833 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
833 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
834 return self.index.nodemap
834 return self.index.nodemap
835
835
836 def hasnode(self, node):
836 def hasnode(self, node):
837 try:
837 try:
838 self.rev(node)
838 self.rev(node)
839 return True
839 return True
840 except KeyError:
840 except KeyError:
841 return False
841 return False
842
842
843 def candelta(self, baserev, rev):
843 def candelta(self, baserev, rev):
844 """whether two revisions (baserev, rev) can be delta-ed or not"""
844 """whether two revisions (baserev, rev) can be delta-ed or not"""
845 # Disable delta if either rev requires a content-changing flag
845 # Disable delta if either rev requires a content-changing flag
846 # processor (ex. LFS). This is because such flag processor can alter
846 # processor (ex. LFS). This is because such flag processor can alter
847 # the rawtext content that the delta will be based on, and two clients
847 # the rawtext content that the delta will be based on, and two clients
848 # could have a same revlog node with different flags (i.e. different
848 # could have a same revlog node with different flags (i.e. different
849 # rawtext contents) and the delta could be incompatible.
849 # rawtext contents) and the delta could be incompatible.
850 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
850 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
851 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
851 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
852 ):
852 ):
853 return False
853 return False
854 return True
854 return True
855
855
856 def update_caches(self, transaction):
856 def update_caches(self, transaction):
857 if self._nodemap_file is not None:
857 if self._nodemap_file is not None:
858 if transaction is None:
858 if transaction is None:
859 nodemaputil.update_persistent_nodemap(self)
859 nodemaputil.update_persistent_nodemap(self)
860 else:
860 else:
861 nodemaputil.setup_persistent_nodemap(transaction, self)
861 nodemaputil.setup_persistent_nodemap(transaction, self)
862
862
863 def clearcaches(self):
863 def clearcaches(self):
864 self._revisioncache = None
864 self._revisioncache = None
865 self._chainbasecache.clear()
865 self._chainbasecache.clear()
866 self._chunkcache = (0, b'')
866 self._chunkcache = (0, b'')
867 self._pcache = {}
867 self._pcache = {}
868 self._nodemap_docket = None
868 self._nodemap_docket = None
869 self.index.clearcaches()
869 self.index.clearcaches()
870 # The python code is the one responsible for validating the docket, we
870 # The python code is the one responsible for validating the docket, we
871 # end up having to refresh it here.
871 # end up having to refresh it here.
872 use_nodemap = (
872 use_nodemap = (
873 not self._inline
873 not self._inline
874 and self._nodemap_file is not None
874 and self._nodemap_file is not None
875 and util.safehasattr(self.index, 'update_nodemap_data')
875 and util.safehasattr(self.index, 'update_nodemap_data')
876 )
876 )
877 if use_nodemap:
877 if use_nodemap:
878 nodemap_data = nodemaputil.persisted_data(self)
878 nodemap_data = nodemaputil.persisted_data(self)
879 if nodemap_data is not None:
879 if nodemap_data is not None:
880 self._nodemap_docket = nodemap_data[0]
880 self._nodemap_docket = nodemap_data[0]
881 self.index.update_nodemap_data(*nodemap_data)
881 self.index.update_nodemap_data(*nodemap_data)
882
882
883 def rev(self, node):
883 def rev(self, node):
884 try:
884 try:
885 return self.index.rev(node)
885 return self.index.rev(node)
886 except TypeError:
886 except TypeError:
887 raise
887 raise
888 except error.RevlogError:
888 except error.RevlogError:
889 # parsers.c radix tree lookup failed
889 # parsers.c radix tree lookup failed
890 if (
890 if (
891 node == self.nodeconstants.wdirid
891 node == self.nodeconstants.wdirid
892 or node in self.nodeconstants.wdirfilenodeids
892 or node in self.nodeconstants.wdirfilenodeids
893 ):
893 ):
894 raise error.WdirUnsupported
894 raise error.WdirUnsupported
895 raise error.LookupError(node, self.display_id, _(b'no node'))
895 raise error.LookupError(node, self.display_id, _(b'no node'))
896
896
897 # Accessors for index entries.
897 # Accessors for index entries.
898
898
899 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
899 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
900 # are flags.
900 # are flags.
901 def start(self, rev):
901 def start(self, rev):
902 return int(self.index[rev][0] >> 16)
902 return int(self.index[rev][0] >> 16)
903
903
904 def flags(self, rev):
904 def flags(self, rev):
905 return self.index[rev][0] & 0xFFFF
905 return self.index[rev][0] & 0xFFFF
906
906
907 def length(self, rev):
907 def length(self, rev):
908 return self.index[rev][1]
908 return self.index[rev][1]
909
909
910 def sidedata_length(self, rev):
910 def sidedata_length(self, rev):
911 if not self.hassidedata:
911 if not self.hassidedata:
912 return 0
912 return 0
913 return self.index[rev][9]
913 return self.index[rev][9]
914
914
915 def rawsize(self, rev):
915 def rawsize(self, rev):
916 """return the length of the uncompressed text for a given revision"""
916 """return the length of the uncompressed text for a given revision"""
917 l = self.index[rev][2]
917 l = self.index[rev][2]
918 if l >= 0:
918 if l >= 0:
919 return l
919 return l
920
920
921 t = self.rawdata(rev)
921 t = self.rawdata(rev)
922 return len(t)
922 return len(t)
923
923
924 def size(self, rev):
924 def size(self, rev):
925 """length of non-raw text (processed by a "read" flag processor)"""
925 """length of non-raw text (processed by a "read" flag processor)"""
926 # fast path: if no "read" flag processor could change the content,
926 # fast path: if no "read" flag processor could change the content,
927 # size is rawsize. note: ELLIPSIS is known to not change the content.
927 # size is rawsize. note: ELLIPSIS is known to not change the content.
928 flags = self.flags(rev)
928 flags = self.flags(rev)
929 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
929 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
930 return self.rawsize(rev)
930 return self.rawsize(rev)
931
931
932 return len(self.revision(rev, raw=False))
932 return len(self.revision(rev, raw=False))
933
933
934 def chainbase(self, rev):
934 def chainbase(self, rev):
935 base = self._chainbasecache.get(rev)
935 base = self._chainbasecache.get(rev)
936 if base is not None:
936 if base is not None:
937 return base
937 return base
938
938
939 index = self.index
939 index = self.index
940 iterrev = rev
940 iterrev = rev
941 base = index[iterrev][3]
941 base = index[iterrev][3]
942 while base != iterrev:
942 while base != iterrev:
943 iterrev = base
943 iterrev = base
944 base = index[iterrev][3]
944 base = index[iterrev][3]
945
945
946 self._chainbasecache[rev] = base
946 self._chainbasecache[rev] = base
947 return base
947 return base
948
948
949 def linkrev(self, rev):
949 def linkrev(self, rev):
950 return self.index[rev][4]
950 return self.index[rev][4]
951
951
952 def parentrevs(self, rev):
952 def parentrevs(self, rev):
953 try:
953 try:
954 entry = self.index[rev]
954 entry = self.index[rev]
955 except IndexError:
955 except IndexError:
956 if rev == wdirrev:
956 if rev == wdirrev:
957 raise error.WdirUnsupported
957 raise error.WdirUnsupported
958 raise
958 raise
959 if entry[5] == nullrev:
959 if entry[5] == nullrev:
960 return entry[6], entry[5]
960 return entry[6], entry[5]
961 else:
961 else:
962 return entry[5], entry[6]
962 return entry[5], entry[6]
963
963
964 # fast parentrevs(rev) where rev isn't filtered
964 # fast parentrevs(rev) where rev isn't filtered
965 _uncheckedparentrevs = parentrevs
965 _uncheckedparentrevs = parentrevs
966
966
967 def node(self, rev):
967 def node(self, rev):
968 try:
968 try:
969 return self.index[rev][7]
969 return self.index[rev][7]
970 except IndexError:
970 except IndexError:
971 if rev == wdirrev:
971 if rev == wdirrev:
972 raise error.WdirUnsupported
972 raise error.WdirUnsupported
973 raise
973 raise
974
974
975 # Derived from index values.
975 # Derived from index values.
976
976
977 def end(self, rev):
977 def end(self, rev):
978 return self.start(rev) + self.length(rev)
978 return self.start(rev) + self.length(rev)
979
979
980 def parents(self, node):
980 def parents(self, node):
981 i = self.index
981 i = self.index
982 d = i[self.rev(node)]
982 d = i[self.rev(node)]
983 # inline node() to avoid function call overhead
983 # inline node() to avoid function call overhead
984 if d[5] == self.nullid:
984 if d[5] == self.nullid:
985 return i[d[6]][7], i[d[5]][7]
985 return i[d[6]][7], i[d[5]][7]
986 else:
986 else:
987 return i[d[5]][7], i[d[6]][7]
987 return i[d[5]][7], i[d[6]][7]
988
988
989 def chainlen(self, rev):
989 def chainlen(self, rev):
990 return self._chaininfo(rev)[0]
990 return self._chaininfo(rev)[0]
991
991
992 def _chaininfo(self, rev):
992 def _chaininfo(self, rev):
993 chaininfocache = self._chaininfocache
993 chaininfocache = self._chaininfocache
994 if rev in chaininfocache:
994 if rev in chaininfocache:
995 return chaininfocache[rev]
995 return chaininfocache[rev]
996 index = self.index
996 index = self.index
997 generaldelta = self._generaldelta
997 generaldelta = self._generaldelta
998 iterrev = rev
998 iterrev = rev
999 e = index[iterrev]
999 e = index[iterrev]
1000 clen = 0
1000 clen = 0
1001 compresseddeltalen = 0
1001 compresseddeltalen = 0
1002 while iterrev != e[3]:
1002 while iterrev != e[3]:
1003 clen += 1
1003 clen += 1
1004 compresseddeltalen += e[1]
1004 compresseddeltalen += e[1]
1005 if generaldelta:
1005 if generaldelta:
1006 iterrev = e[3]
1006 iterrev = e[3]
1007 else:
1007 else:
1008 iterrev -= 1
1008 iterrev -= 1
1009 if iterrev in chaininfocache:
1009 if iterrev in chaininfocache:
1010 t = chaininfocache[iterrev]
1010 t = chaininfocache[iterrev]
1011 clen += t[0]
1011 clen += t[0]
1012 compresseddeltalen += t[1]
1012 compresseddeltalen += t[1]
1013 break
1013 break
1014 e = index[iterrev]
1014 e = index[iterrev]
1015 else:
1015 else:
1016 # Add text length of base since decompressing that also takes
1016 # Add text length of base since decompressing that also takes
1017 # work. For cache hits the length is already included.
1017 # work. For cache hits the length is already included.
1018 compresseddeltalen += e[1]
1018 compresseddeltalen += e[1]
1019 r = (clen, compresseddeltalen)
1019 r = (clen, compresseddeltalen)
1020 chaininfocache[rev] = r
1020 chaininfocache[rev] = r
1021 return r
1021 return r
1022
1022
1023 def _deltachain(self, rev, stoprev=None):
1023 def _deltachain(self, rev, stoprev=None):
1024 """Obtain the delta chain for a revision.
1024 """Obtain the delta chain for a revision.
1025
1025
1026 ``stoprev`` specifies a revision to stop at. If not specified, we
1026 ``stoprev`` specifies a revision to stop at. If not specified, we
1027 stop at the base of the chain.
1027 stop at the base of the chain.
1028
1028
1029 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1029 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1030 revs in ascending order and ``stopped`` is a bool indicating whether
1030 revs in ascending order and ``stopped`` is a bool indicating whether
1031 ``stoprev`` was hit.
1031 ``stoprev`` was hit.
1032 """
1032 """
1033 # Try C implementation.
1033 # Try C implementation.
1034 try:
1034 try:
1035 return self.index.deltachain(rev, stoprev, self._generaldelta)
1035 return self.index.deltachain(rev, stoprev, self._generaldelta)
1036 except AttributeError:
1036 except AttributeError:
1037 pass
1037 pass
1038
1038
1039 chain = []
1039 chain = []
1040
1040
1041 # Alias to prevent attribute lookup in tight loop.
1041 # Alias to prevent attribute lookup in tight loop.
1042 index = self.index
1042 index = self.index
1043 generaldelta = self._generaldelta
1043 generaldelta = self._generaldelta
1044
1044
1045 iterrev = rev
1045 iterrev = rev
1046 e = index[iterrev]
1046 e = index[iterrev]
1047 while iterrev != e[3] and iterrev != stoprev:
1047 while iterrev != e[3] and iterrev != stoprev:
1048 chain.append(iterrev)
1048 chain.append(iterrev)
1049 if generaldelta:
1049 if generaldelta:
1050 iterrev = e[3]
1050 iterrev = e[3]
1051 else:
1051 else:
1052 iterrev -= 1
1052 iterrev -= 1
1053 e = index[iterrev]
1053 e = index[iterrev]
1054
1054
1055 if iterrev == stoprev:
1055 if iterrev == stoprev:
1056 stopped = True
1056 stopped = True
1057 else:
1057 else:
1058 chain.append(iterrev)
1058 chain.append(iterrev)
1059 stopped = False
1059 stopped = False
1060
1060
1061 chain.reverse()
1061 chain.reverse()
1062 return chain, stopped
1062 return chain, stopped
1063
1063
1064 def ancestors(self, revs, stoprev=0, inclusive=False):
1064 def ancestors(self, revs, stoprev=0, inclusive=False):
1065 """Generate the ancestors of 'revs' in reverse revision order.
1065 """Generate the ancestors of 'revs' in reverse revision order.
1066 Does not generate revs lower than stoprev.
1066 Does not generate revs lower than stoprev.
1067
1067
1068 See the documentation for ancestor.lazyancestors for more details."""
1068 See the documentation for ancestor.lazyancestors for more details."""
1069
1069
1070 # first, make sure start revisions aren't filtered
1070 # first, make sure start revisions aren't filtered
1071 revs = list(revs)
1071 revs = list(revs)
1072 checkrev = self.node
1072 checkrev = self.node
1073 for r in revs:
1073 for r in revs:
1074 checkrev(r)
1074 checkrev(r)
1075 # and we're sure ancestors aren't filtered as well
1075 # and we're sure ancestors aren't filtered as well
1076
1076
1077 if rustancestor is not None and self.index.rust_ext_compat:
1077 if rustancestor is not None and self.index.rust_ext_compat:
1078 lazyancestors = rustancestor.LazyAncestors
1078 lazyancestors = rustancestor.LazyAncestors
1079 arg = self.index
1079 arg = self.index
1080 else:
1080 else:
1081 lazyancestors = ancestor.lazyancestors
1081 lazyancestors = ancestor.lazyancestors
1082 arg = self._uncheckedparentrevs
1082 arg = self._uncheckedparentrevs
1083 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1083 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1084
1084
1085 def descendants(self, revs):
1085 def descendants(self, revs):
1086 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1086 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1087
1087
1088 def findcommonmissing(self, common=None, heads=None):
1088 def findcommonmissing(self, common=None, heads=None):
1089 """Return a tuple of the ancestors of common and the ancestors of heads
1089 """Return a tuple of the ancestors of common and the ancestors of heads
1090 that are not ancestors of common. In revset terminology, we return the
1090 that are not ancestors of common. In revset terminology, we return the
1091 tuple:
1091 tuple:
1092
1092
1093 ::common, (::heads) - (::common)
1093 ::common, (::heads) - (::common)
1094
1094
1095 The list is sorted by revision number, meaning it is
1095 The list is sorted by revision number, meaning it is
1096 topologically sorted.
1096 topologically sorted.
1097
1097
1098 'heads' and 'common' are both lists of node IDs. If heads is
1098 'heads' and 'common' are both lists of node IDs. If heads is
1099 not supplied, uses all of the revlog's heads. If common is not
1099 not supplied, uses all of the revlog's heads. If common is not
1100 supplied, uses nullid."""
1100 supplied, uses nullid."""
1101 if common is None:
1101 if common is None:
1102 common = [self.nullid]
1102 common = [self.nullid]
1103 if heads is None:
1103 if heads is None:
1104 heads = self.heads()
1104 heads = self.heads()
1105
1105
1106 common = [self.rev(n) for n in common]
1106 common = [self.rev(n) for n in common]
1107 heads = [self.rev(n) for n in heads]
1107 heads = [self.rev(n) for n in heads]
1108
1108
1109 # we want the ancestors, but inclusive
1109 # we want the ancestors, but inclusive
1110 class lazyset(object):
1110 class lazyset(object):
1111 def __init__(self, lazyvalues):
1111 def __init__(self, lazyvalues):
1112 self.addedvalues = set()
1112 self.addedvalues = set()
1113 self.lazyvalues = lazyvalues
1113 self.lazyvalues = lazyvalues
1114
1114
1115 def __contains__(self, value):
1115 def __contains__(self, value):
1116 return value in self.addedvalues or value in self.lazyvalues
1116 return value in self.addedvalues or value in self.lazyvalues
1117
1117
1118 def __iter__(self):
1118 def __iter__(self):
1119 added = self.addedvalues
1119 added = self.addedvalues
1120 for r in added:
1120 for r in added:
1121 yield r
1121 yield r
1122 for r in self.lazyvalues:
1122 for r in self.lazyvalues:
1123 if not r in added:
1123 if not r in added:
1124 yield r
1124 yield r
1125
1125
1126 def add(self, value):
1126 def add(self, value):
1127 self.addedvalues.add(value)
1127 self.addedvalues.add(value)
1128
1128
1129 def update(self, values):
1129 def update(self, values):
1130 self.addedvalues.update(values)
1130 self.addedvalues.update(values)
1131
1131
1132 has = lazyset(self.ancestors(common))
1132 has = lazyset(self.ancestors(common))
1133 has.add(nullrev)
1133 has.add(nullrev)
1134 has.update(common)
1134 has.update(common)
1135
1135
1136 # take all ancestors from heads that aren't in has
1136 # take all ancestors from heads that aren't in has
1137 missing = set()
1137 missing = set()
1138 visit = collections.deque(r for r in heads if r not in has)
1138 visit = collections.deque(r for r in heads if r not in has)
1139 while visit:
1139 while visit:
1140 r = visit.popleft()
1140 r = visit.popleft()
1141 if r in missing:
1141 if r in missing:
1142 continue
1142 continue
1143 else:
1143 else:
1144 missing.add(r)
1144 missing.add(r)
1145 for p in self.parentrevs(r):
1145 for p in self.parentrevs(r):
1146 if p not in has:
1146 if p not in has:
1147 visit.append(p)
1147 visit.append(p)
1148 missing = list(missing)
1148 missing = list(missing)
1149 missing.sort()
1149 missing.sort()
1150 return has, [self.node(miss) for miss in missing]
1150 return has, [self.node(miss) for miss in missing]
1151
1151
1152 def incrementalmissingrevs(self, common=None):
1152 def incrementalmissingrevs(self, common=None):
1153 """Return an object that can be used to incrementally compute the
1153 """Return an object that can be used to incrementally compute the
1154 revision numbers of the ancestors of arbitrary sets that are not
1154 revision numbers of the ancestors of arbitrary sets that are not
1155 ancestors of common. This is an ancestor.incrementalmissingancestors
1155 ancestors of common. This is an ancestor.incrementalmissingancestors
1156 object.
1156 object.
1157
1157
1158 'common' is a list of revision numbers. If common is not supplied, uses
1158 'common' is a list of revision numbers. If common is not supplied, uses
1159 nullrev.
1159 nullrev.
1160 """
1160 """
1161 if common is None:
1161 if common is None:
1162 common = [nullrev]
1162 common = [nullrev]
1163
1163
1164 if rustancestor is not None and self.index.rust_ext_compat:
1164 if rustancestor is not None and self.index.rust_ext_compat:
1165 return rustancestor.MissingAncestors(self.index, common)
1165 return rustancestor.MissingAncestors(self.index, common)
1166 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1166 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1167
1167
1168 def findmissingrevs(self, common=None, heads=None):
1168 def findmissingrevs(self, common=None, heads=None):
1169 """Return the revision numbers of the ancestors of heads that
1169 """Return the revision numbers of the ancestors of heads that
1170 are not ancestors of common.
1170 are not ancestors of common.
1171
1171
1172 More specifically, return a list of revision numbers corresponding to
1172 More specifically, return a list of revision numbers corresponding to
1173 nodes N such that every N satisfies the following constraints:
1173 nodes N such that every N satisfies the following constraints:
1174
1174
1175 1. N is an ancestor of some node in 'heads'
1175 1. N is an ancestor of some node in 'heads'
1176 2. N is not an ancestor of any node in 'common'
1176 2. N is not an ancestor of any node in 'common'
1177
1177
1178 The list is sorted by revision number, meaning it is
1178 The list is sorted by revision number, meaning it is
1179 topologically sorted.
1179 topologically sorted.
1180
1180
1181 'heads' and 'common' are both lists of revision numbers. If heads is
1181 'heads' and 'common' are both lists of revision numbers. If heads is
1182 not supplied, uses all of the revlog's heads. If common is not
1182 not supplied, uses all of the revlog's heads. If common is not
1183 supplied, uses nullid."""
1183 supplied, uses nullid."""
1184 if common is None:
1184 if common is None:
1185 common = [nullrev]
1185 common = [nullrev]
1186 if heads is None:
1186 if heads is None:
1187 heads = self.headrevs()
1187 heads = self.headrevs()
1188
1188
1189 inc = self.incrementalmissingrevs(common=common)
1189 inc = self.incrementalmissingrevs(common=common)
1190 return inc.missingancestors(heads)
1190 return inc.missingancestors(heads)
1191
1191
1192 def findmissing(self, common=None, heads=None):
1192 def findmissing(self, common=None, heads=None):
1193 """Return the ancestors of heads that are not ancestors of common.
1193 """Return the ancestors of heads that are not ancestors of common.
1194
1194
1195 More specifically, return a list of nodes N such that every N
1195 More specifically, return a list of nodes N such that every N
1196 satisfies the following constraints:
1196 satisfies the following constraints:
1197
1197
1198 1. N is an ancestor of some node in 'heads'
1198 1. N is an ancestor of some node in 'heads'
1199 2. N is not an ancestor of any node in 'common'
1199 2. N is not an ancestor of any node in 'common'
1200
1200
1201 The list is sorted by revision number, meaning it is
1201 The list is sorted by revision number, meaning it is
1202 topologically sorted.
1202 topologically sorted.
1203
1203
1204 'heads' and 'common' are both lists of node IDs. If heads is
1204 'heads' and 'common' are both lists of node IDs. If heads is
1205 not supplied, uses all of the revlog's heads. If common is not
1205 not supplied, uses all of the revlog's heads. If common is not
1206 supplied, uses nullid."""
1206 supplied, uses nullid."""
1207 if common is None:
1207 if common is None:
1208 common = [self.nullid]
1208 common = [self.nullid]
1209 if heads is None:
1209 if heads is None:
1210 heads = self.heads()
1210 heads = self.heads()
1211
1211
1212 common = [self.rev(n) for n in common]
1212 common = [self.rev(n) for n in common]
1213 heads = [self.rev(n) for n in heads]
1213 heads = [self.rev(n) for n in heads]
1214
1214
1215 inc = self.incrementalmissingrevs(common=common)
1215 inc = self.incrementalmissingrevs(common=common)
1216 return [self.node(r) for r in inc.missingancestors(heads)]
1216 return [self.node(r) for r in inc.missingancestors(heads)]
1217
1217
1218 def nodesbetween(self, roots=None, heads=None):
1218 def nodesbetween(self, roots=None, heads=None):
1219 """Return a topological path from 'roots' to 'heads'.
1219 """Return a topological path from 'roots' to 'heads'.
1220
1220
1221 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1221 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1222 topologically sorted list of all nodes N that satisfy both of
1222 topologically sorted list of all nodes N that satisfy both of
1223 these constraints:
1223 these constraints:
1224
1224
1225 1. N is a descendant of some node in 'roots'
1225 1. N is a descendant of some node in 'roots'
1226 2. N is an ancestor of some node in 'heads'
1226 2. N is an ancestor of some node in 'heads'
1227
1227
1228 Every node is considered to be both a descendant and an ancestor
1228 Every node is considered to be both a descendant and an ancestor
1229 of itself, so every reachable node in 'roots' and 'heads' will be
1229 of itself, so every reachable node in 'roots' and 'heads' will be
1230 included in 'nodes'.
1230 included in 'nodes'.
1231
1231
1232 'outroots' is the list of reachable nodes in 'roots', i.e., the
1232 'outroots' is the list of reachable nodes in 'roots', i.e., the
1233 subset of 'roots' that is returned in 'nodes'. Likewise,
1233 subset of 'roots' that is returned in 'nodes'. Likewise,
1234 'outheads' is the subset of 'heads' that is also in 'nodes'.
1234 'outheads' is the subset of 'heads' that is also in 'nodes'.
1235
1235
1236 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1236 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1237 unspecified, uses nullid as the only root. If 'heads' is
1237 unspecified, uses nullid as the only root. If 'heads' is
1238 unspecified, uses list of all of the revlog's heads."""
1238 unspecified, uses list of all of the revlog's heads."""
1239 nonodes = ([], [], [])
1239 nonodes = ([], [], [])
1240 if roots is not None:
1240 if roots is not None:
1241 roots = list(roots)
1241 roots = list(roots)
1242 if not roots:
1242 if not roots:
1243 return nonodes
1243 return nonodes
1244 lowestrev = min([self.rev(n) for n in roots])
1244 lowestrev = min([self.rev(n) for n in roots])
1245 else:
1245 else:
1246 roots = [self.nullid] # Everybody's a descendant of nullid
1246 roots = [self.nullid] # Everybody's a descendant of nullid
1247 lowestrev = nullrev
1247 lowestrev = nullrev
1248 if (lowestrev == nullrev) and (heads is None):
1248 if (lowestrev == nullrev) and (heads is None):
1249 # We want _all_ the nodes!
1249 # We want _all_ the nodes!
1250 return (
1250 return (
1251 [self.node(r) for r in self],
1251 [self.node(r) for r in self],
1252 [self.nullid],
1252 [self.nullid],
1253 list(self.heads()),
1253 list(self.heads()),
1254 )
1254 )
1255 if heads is None:
1255 if heads is None:
1256 # All nodes are ancestors, so the latest ancestor is the last
1256 # All nodes are ancestors, so the latest ancestor is the last
1257 # node.
1257 # node.
1258 highestrev = len(self) - 1
1258 highestrev = len(self) - 1
1259 # Set ancestors to None to signal that every node is an ancestor.
1259 # Set ancestors to None to signal that every node is an ancestor.
1260 ancestors = None
1260 ancestors = None
1261 # Set heads to an empty dictionary for later discovery of heads
1261 # Set heads to an empty dictionary for later discovery of heads
1262 heads = {}
1262 heads = {}
1263 else:
1263 else:
1264 heads = list(heads)
1264 heads = list(heads)
1265 if not heads:
1265 if not heads:
1266 return nonodes
1266 return nonodes
1267 ancestors = set()
1267 ancestors = set()
1268 # Turn heads into a dictionary so we can remove 'fake' heads.
1268 # Turn heads into a dictionary so we can remove 'fake' heads.
1269 # Also, later we will be using it to filter out the heads we can't
1269 # Also, later we will be using it to filter out the heads we can't
1270 # find from roots.
1270 # find from roots.
1271 heads = dict.fromkeys(heads, False)
1271 heads = dict.fromkeys(heads, False)
1272 # Start at the top and keep marking parents until we're done.
1272 # Start at the top and keep marking parents until we're done.
1273 nodestotag = set(heads)
1273 nodestotag = set(heads)
1274 # Remember where the top was so we can use it as a limit later.
1274 # Remember where the top was so we can use it as a limit later.
1275 highestrev = max([self.rev(n) for n in nodestotag])
1275 highestrev = max([self.rev(n) for n in nodestotag])
1276 while nodestotag:
1276 while nodestotag:
1277 # grab a node to tag
1277 # grab a node to tag
1278 n = nodestotag.pop()
1278 n = nodestotag.pop()
1279 # Never tag nullid
1279 # Never tag nullid
1280 if n == self.nullid:
1280 if n == self.nullid:
1281 continue
1281 continue
1282 # A node's revision number represents its place in a
1282 # A node's revision number represents its place in a
1283 # topologically sorted list of nodes.
1283 # topologically sorted list of nodes.
1284 r = self.rev(n)
1284 r = self.rev(n)
1285 if r >= lowestrev:
1285 if r >= lowestrev:
1286 if n not in ancestors:
1286 if n not in ancestors:
1287 # If we are possibly a descendant of one of the roots
1287 # If we are possibly a descendant of one of the roots
1288 # and we haven't already been marked as an ancestor
1288 # and we haven't already been marked as an ancestor
1289 ancestors.add(n) # Mark as ancestor
1289 ancestors.add(n) # Mark as ancestor
1290 # Add non-nullid parents to list of nodes to tag.
1290 # Add non-nullid parents to list of nodes to tag.
1291 nodestotag.update(
1291 nodestotag.update(
1292 [p for p in self.parents(n) if p != self.nullid]
1292 [p for p in self.parents(n) if p != self.nullid]
1293 )
1293 )
1294 elif n in heads: # We've seen it before, is it a fake head?
1294 elif n in heads: # We've seen it before, is it a fake head?
1295 # So it is, real heads should not be the ancestors of
1295 # So it is, real heads should not be the ancestors of
1296 # any other heads.
1296 # any other heads.
1297 heads.pop(n)
1297 heads.pop(n)
1298 if not ancestors:
1298 if not ancestors:
1299 return nonodes
1299 return nonodes
1300 # Now that we have our set of ancestors, we want to remove any
1300 # Now that we have our set of ancestors, we want to remove any
1301 # roots that are not ancestors.
1301 # roots that are not ancestors.
1302
1302
1303 # If one of the roots was nullid, everything is included anyway.
1303 # If one of the roots was nullid, everything is included anyway.
1304 if lowestrev > nullrev:
1304 if lowestrev > nullrev:
1305 # But, since we weren't, let's recompute the lowest rev to not
1305 # But, since we weren't, let's recompute the lowest rev to not
1306 # include roots that aren't ancestors.
1306 # include roots that aren't ancestors.
1307
1307
1308 # Filter out roots that aren't ancestors of heads
1308 # Filter out roots that aren't ancestors of heads
1309 roots = [root for root in roots if root in ancestors]
1309 roots = [root for root in roots if root in ancestors]
1310 # Recompute the lowest revision
1310 # Recompute the lowest revision
1311 if roots:
1311 if roots:
1312 lowestrev = min([self.rev(root) for root in roots])
1312 lowestrev = min([self.rev(root) for root in roots])
1313 else:
1313 else:
1314 # No more roots? Return empty list
1314 # No more roots? Return empty list
1315 return nonodes
1315 return nonodes
1316 else:
1316 else:
1317 # We are descending from nullid, and don't need to care about
1317 # We are descending from nullid, and don't need to care about
1318 # any other roots.
1318 # any other roots.
1319 lowestrev = nullrev
1319 lowestrev = nullrev
1320 roots = [self.nullid]
1320 roots = [self.nullid]
1321 # Transform our roots list into a set.
1321 # Transform our roots list into a set.
1322 descendants = set(roots)
1322 descendants = set(roots)
1323 # Also, keep the original roots so we can filter out roots that aren't
1323 # Also, keep the original roots so we can filter out roots that aren't
1324 # 'real' roots (i.e. are descended from other roots).
1324 # 'real' roots (i.e. are descended from other roots).
1325 roots = descendants.copy()
1325 roots = descendants.copy()
1326 # Our topologically sorted list of output nodes.
1326 # Our topologically sorted list of output nodes.
1327 orderedout = []
1327 orderedout = []
1328 # Don't start at nullid since we don't want nullid in our output list,
1328 # Don't start at nullid since we don't want nullid in our output list,
1329 # and if nullid shows up in descendants, empty parents will look like
1329 # and if nullid shows up in descendants, empty parents will look like
1330 # they're descendants.
1330 # they're descendants.
1331 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1331 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1332 n = self.node(r)
1332 n = self.node(r)
1333 isdescendant = False
1333 isdescendant = False
1334 if lowestrev == nullrev: # Everybody is a descendant of nullid
1334 if lowestrev == nullrev: # Everybody is a descendant of nullid
1335 isdescendant = True
1335 isdescendant = True
1336 elif n in descendants:
1336 elif n in descendants:
1337 # n is already a descendant
1337 # n is already a descendant
1338 isdescendant = True
1338 isdescendant = True
1339 # This check only needs to be done here because all the roots
1339 # This check only needs to be done here because all the roots
1340 # will start being marked is descendants before the loop.
1340 # will start being marked is descendants before the loop.
1341 if n in roots:
1341 if n in roots:
1342 # If n was a root, check if it's a 'real' root.
1342 # If n was a root, check if it's a 'real' root.
1343 p = tuple(self.parents(n))
1343 p = tuple(self.parents(n))
1344 # If any of its parents are descendants, it's not a root.
1344 # If any of its parents are descendants, it's not a root.
1345 if (p[0] in descendants) or (p[1] in descendants):
1345 if (p[0] in descendants) or (p[1] in descendants):
1346 roots.remove(n)
1346 roots.remove(n)
1347 else:
1347 else:
1348 p = tuple(self.parents(n))
1348 p = tuple(self.parents(n))
1349 # A node is a descendant if either of its parents are
1349 # A node is a descendant if either of its parents are
1350 # descendants. (We seeded the dependents list with the roots
1350 # descendants. (We seeded the dependents list with the roots
1351 # up there, remember?)
1351 # up there, remember?)
1352 if (p[0] in descendants) or (p[1] in descendants):
1352 if (p[0] in descendants) or (p[1] in descendants):
1353 descendants.add(n)
1353 descendants.add(n)
1354 isdescendant = True
1354 isdescendant = True
1355 if isdescendant and ((ancestors is None) or (n in ancestors)):
1355 if isdescendant and ((ancestors is None) or (n in ancestors)):
1356 # Only include nodes that are both descendants and ancestors.
1356 # Only include nodes that are both descendants and ancestors.
1357 orderedout.append(n)
1357 orderedout.append(n)
1358 if (ancestors is not None) and (n in heads):
1358 if (ancestors is not None) and (n in heads):
1359 # We're trying to figure out which heads are reachable
1359 # We're trying to figure out which heads are reachable
1360 # from roots.
1360 # from roots.
1361 # Mark this head as having been reached
1361 # Mark this head as having been reached
1362 heads[n] = True
1362 heads[n] = True
1363 elif ancestors is None:
1363 elif ancestors is None:
1364 # Otherwise, we're trying to discover the heads.
1364 # Otherwise, we're trying to discover the heads.
1365 # Assume this is a head because if it isn't, the next step
1365 # Assume this is a head because if it isn't, the next step
1366 # will eventually remove it.
1366 # will eventually remove it.
1367 heads[n] = True
1367 heads[n] = True
1368 # But, obviously its parents aren't.
1368 # But, obviously its parents aren't.
1369 for p in self.parents(n):
1369 for p in self.parents(n):
1370 heads.pop(p, None)
1370 heads.pop(p, None)
1371 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1371 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1372 roots = list(roots)
1372 roots = list(roots)
1373 assert orderedout
1373 assert orderedout
1374 assert roots
1374 assert roots
1375 assert heads
1375 assert heads
1376 return (orderedout, roots, heads)
1376 return (orderedout, roots, heads)
1377
1377
1378 def headrevs(self, revs=None):
1378 def headrevs(self, revs=None):
1379 if revs is None:
1379 if revs is None:
1380 try:
1380 try:
1381 return self.index.headrevs()
1381 return self.index.headrevs()
1382 except AttributeError:
1382 except AttributeError:
1383 return self._headrevs()
1383 return self._headrevs()
1384 if rustdagop is not None and self.index.rust_ext_compat:
1384 if rustdagop is not None and self.index.rust_ext_compat:
1385 return rustdagop.headrevs(self.index, revs)
1385 return rustdagop.headrevs(self.index, revs)
1386 return dagop.headrevs(revs, self._uncheckedparentrevs)
1386 return dagop.headrevs(revs, self._uncheckedparentrevs)
1387
1387
1388 def computephases(self, roots):
1388 def computephases(self, roots):
1389 return self.index.computephasesmapsets(roots)
1389 return self.index.computephasesmapsets(roots)
1390
1390
1391 def _headrevs(self):
1391 def _headrevs(self):
1392 count = len(self)
1392 count = len(self)
1393 if not count:
1393 if not count:
1394 return [nullrev]
1394 return [nullrev]
1395 # we won't iter over filtered rev so nobody is a head at start
1395 # we won't iter over filtered rev so nobody is a head at start
1396 ishead = [0] * (count + 1)
1396 ishead = [0] * (count + 1)
1397 index = self.index
1397 index = self.index
1398 for r in self:
1398 for r in self:
1399 ishead[r] = 1 # I may be an head
1399 ishead[r] = 1 # I may be an head
1400 e = index[r]
1400 e = index[r]
1401 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1401 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1402 return [r for r, val in enumerate(ishead) if val]
1402 return [r for r, val in enumerate(ishead) if val]
1403
1403
1404 def heads(self, start=None, stop=None):
1404 def heads(self, start=None, stop=None):
1405 """return the list of all nodes that have no children
1405 """return the list of all nodes that have no children
1406
1406
1407 if start is specified, only heads that are descendants of
1407 if start is specified, only heads that are descendants of
1408 start will be returned
1408 start will be returned
1409 if stop is specified, it will consider all the revs from stop
1409 if stop is specified, it will consider all the revs from stop
1410 as if they had no children
1410 as if they had no children
1411 """
1411 """
1412 if start is None and stop is None:
1412 if start is None and stop is None:
1413 if not len(self):
1413 if not len(self):
1414 return [self.nullid]
1414 return [self.nullid]
1415 return [self.node(r) for r in self.headrevs()]
1415 return [self.node(r) for r in self.headrevs()]
1416
1416
1417 if start is None:
1417 if start is None:
1418 start = nullrev
1418 start = nullrev
1419 else:
1419 else:
1420 start = self.rev(start)
1420 start = self.rev(start)
1421
1421
1422 stoprevs = {self.rev(n) for n in stop or []}
1422 stoprevs = {self.rev(n) for n in stop or []}
1423
1423
1424 revs = dagop.headrevssubset(
1424 revs = dagop.headrevssubset(
1425 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1425 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1426 )
1426 )
1427
1427
1428 return [self.node(rev) for rev in revs]
1428 return [self.node(rev) for rev in revs]
1429
1429
1430 def children(self, node):
1430 def children(self, node):
1431 """find the children of a given node"""
1431 """find the children of a given node"""
1432 c = []
1432 c = []
1433 p = self.rev(node)
1433 p = self.rev(node)
1434 for r in self.revs(start=p + 1):
1434 for r in self.revs(start=p + 1):
1435 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1435 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1436 if prevs:
1436 if prevs:
1437 for pr in prevs:
1437 for pr in prevs:
1438 if pr == p:
1438 if pr == p:
1439 c.append(self.node(r))
1439 c.append(self.node(r))
1440 elif p == nullrev:
1440 elif p == nullrev:
1441 c.append(self.node(r))
1441 c.append(self.node(r))
1442 return c
1442 return c
1443
1443
1444 def commonancestorsheads(self, a, b):
1444 def commonancestorsheads(self, a, b):
1445 """calculate all the heads of the common ancestors of nodes a and b"""
1445 """calculate all the heads of the common ancestors of nodes a and b"""
1446 a, b = self.rev(a), self.rev(b)
1446 a, b = self.rev(a), self.rev(b)
1447 ancs = self._commonancestorsheads(a, b)
1447 ancs = self._commonancestorsheads(a, b)
1448 return pycompat.maplist(self.node, ancs)
1448 return pycompat.maplist(self.node, ancs)
1449
1449
1450 def _commonancestorsheads(self, *revs):
1450 def _commonancestorsheads(self, *revs):
1451 """calculate all the heads of the common ancestors of revs"""
1451 """calculate all the heads of the common ancestors of revs"""
1452 try:
1452 try:
1453 ancs = self.index.commonancestorsheads(*revs)
1453 ancs = self.index.commonancestorsheads(*revs)
1454 except (AttributeError, OverflowError): # C implementation failed
1454 except (AttributeError, OverflowError): # C implementation failed
1455 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1455 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1456 return ancs
1456 return ancs
1457
1457
1458 def isancestor(self, a, b):
1458 def isancestor(self, a, b):
1459 """return True if node a is an ancestor of node b
1459 """return True if node a is an ancestor of node b
1460
1460
1461 A revision is considered an ancestor of itself."""
1461 A revision is considered an ancestor of itself."""
1462 a, b = self.rev(a), self.rev(b)
1462 a, b = self.rev(a), self.rev(b)
1463 return self.isancestorrev(a, b)
1463 return self.isancestorrev(a, b)
1464
1464
1465 def isancestorrev(self, a, b):
1465 def isancestorrev(self, a, b):
1466 """return True if revision a is an ancestor of revision b
1466 """return True if revision a is an ancestor of revision b
1467
1467
1468 A revision is considered an ancestor of itself.
1468 A revision is considered an ancestor of itself.
1469
1469
1470 The implementation of this is trivial but the use of
1470 The implementation of this is trivial but the use of
1471 reachableroots is not."""
1471 reachableroots is not."""
1472 if a == nullrev:
1472 if a == nullrev:
1473 return True
1473 return True
1474 elif a == b:
1474 elif a == b:
1475 return True
1475 return True
1476 elif a > b:
1476 elif a > b:
1477 return False
1477 return False
1478 return bool(self.reachableroots(a, [b], [a], includepath=False))
1478 return bool(self.reachableroots(a, [b], [a], includepath=False))
1479
1479
1480 def reachableroots(self, minroot, heads, roots, includepath=False):
1480 def reachableroots(self, minroot, heads, roots, includepath=False):
1481 """return (heads(::(<roots> and <roots>::<heads>)))
1481 """return (heads(::(<roots> and <roots>::<heads>)))
1482
1482
1483 If includepath is True, return (<roots>::<heads>)."""
1483 If includepath is True, return (<roots>::<heads>)."""
1484 try:
1484 try:
1485 return self.index.reachableroots2(
1485 return self.index.reachableroots2(
1486 minroot, heads, roots, includepath
1486 minroot, heads, roots, includepath
1487 )
1487 )
1488 except AttributeError:
1488 except AttributeError:
1489 return dagop._reachablerootspure(
1489 return dagop._reachablerootspure(
1490 self.parentrevs, minroot, roots, heads, includepath
1490 self.parentrevs, minroot, roots, heads, includepath
1491 )
1491 )
1492
1492
1493 def ancestor(self, a, b):
1493 def ancestor(self, a, b):
1494 """calculate the "best" common ancestor of nodes a and b"""
1494 """calculate the "best" common ancestor of nodes a and b"""
1495
1495
1496 a, b = self.rev(a), self.rev(b)
1496 a, b = self.rev(a), self.rev(b)
1497 try:
1497 try:
1498 ancs = self.index.ancestors(a, b)
1498 ancs = self.index.ancestors(a, b)
1499 except (AttributeError, OverflowError):
1499 except (AttributeError, OverflowError):
1500 ancs = ancestor.ancestors(self.parentrevs, a, b)
1500 ancs = ancestor.ancestors(self.parentrevs, a, b)
1501 if ancs:
1501 if ancs:
1502 # choose a consistent winner when there's a tie
1502 # choose a consistent winner when there's a tie
1503 return min(map(self.node, ancs))
1503 return min(map(self.node, ancs))
1504 return self.nullid
1504 return self.nullid
1505
1505
1506 def _match(self, id):
1506 def _match(self, id):
1507 if isinstance(id, int):
1507 if isinstance(id, int):
1508 # rev
1508 # rev
1509 return self.node(id)
1509 return self.node(id)
1510 if len(id) == self.nodeconstants.nodelen:
1510 if len(id) == self.nodeconstants.nodelen:
1511 # possibly a binary node
1511 # possibly a binary node
1512 # odds of a binary node being all hex in ASCII are 1 in 10**25
1512 # odds of a binary node being all hex in ASCII are 1 in 10**25
1513 try:
1513 try:
1514 node = id
1514 node = id
1515 self.rev(node) # quick search the index
1515 self.rev(node) # quick search the index
1516 return node
1516 return node
1517 except error.LookupError:
1517 except error.LookupError:
1518 pass # may be partial hex id
1518 pass # may be partial hex id
1519 try:
1519 try:
1520 # str(rev)
1520 # str(rev)
1521 rev = int(id)
1521 rev = int(id)
1522 if b"%d" % rev != id:
1522 if b"%d" % rev != id:
1523 raise ValueError
1523 raise ValueError
1524 if rev < 0:
1524 if rev < 0:
1525 rev = len(self) + rev
1525 rev = len(self) + rev
1526 if rev < 0 or rev >= len(self):
1526 if rev < 0 or rev >= len(self):
1527 raise ValueError
1527 raise ValueError
1528 return self.node(rev)
1528 return self.node(rev)
1529 except (ValueError, OverflowError):
1529 except (ValueError, OverflowError):
1530 pass
1530 pass
1531 if len(id) == 2 * self.nodeconstants.nodelen:
1531 if len(id) == 2 * self.nodeconstants.nodelen:
1532 try:
1532 try:
1533 # a full hex nodeid?
1533 # a full hex nodeid?
1534 node = bin(id)
1534 node = bin(id)
1535 self.rev(node)
1535 self.rev(node)
1536 return node
1536 return node
1537 except (TypeError, error.LookupError):
1537 except (TypeError, error.LookupError):
1538 pass
1538 pass
1539
1539
1540 def _partialmatch(self, id):
1540 def _partialmatch(self, id):
1541 # we don't care wdirfilenodeids as they should be always full hash
1541 # we don't care wdirfilenodeids as they should be always full hash
1542 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1542 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1543 ambiguous = False
1543 ambiguous = False
1544 try:
1544 try:
1545 partial = self.index.partialmatch(id)
1545 partial = self.index.partialmatch(id)
1546 if partial and self.hasnode(partial):
1546 if partial and self.hasnode(partial):
1547 if maybewdir:
1547 if maybewdir:
1548 # single 'ff...' match in radix tree, ambiguous with wdir
1548 # single 'ff...' match in radix tree, ambiguous with wdir
1549 ambiguous = True
1549 ambiguous = True
1550 else:
1550 else:
1551 return partial
1551 return partial
1552 elif maybewdir:
1552 elif maybewdir:
1553 # no 'ff...' match in radix tree, wdir identified
1553 # no 'ff...' match in radix tree, wdir identified
1554 raise error.WdirUnsupported
1554 raise error.WdirUnsupported
1555 else:
1555 else:
1556 return None
1556 return None
1557 except error.RevlogError:
1557 except error.RevlogError:
1558 # parsers.c radix tree lookup gave multiple matches
1558 # parsers.c radix tree lookup gave multiple matches
1559 # fast path: for unfiltered changelog, radix tree is accurate
1559 # fast path: for unfiltered changelog, radix tree is accurate
1560 if not getattr(self, 'filteredrevs', None):
1560 if not getattr(self, 'filteredrevs', None):
1561 ambiguous = True
1561 ambiguous = True
1562 # fall through to slow path that filters hidden revisions
1562 # fall through to slow path that filters hidden revisions
1563 except (AttributeError, ValueError):
1563 except (AttributeError, ValueError):
1564 # we are pure python, or key was too short to search radix tree
1564 # we are pure python, or key was too short to search radix tree
1565 pass
1565 pass
1566 if ambiguous:
1566 if ambiguous:
1567 raise error.AmbiguousPrefixLookupError(
1567 raise error.AmbiguousPrefixLookupError(
1568 id, self.display_id, _(b'ambiguous identifier')
1568 id, self.display_id, _(b'ambiguous identifier')
1569 )
1569 )
1570
1570
1571 if id in self._pcache:
1571 if id in self._pcache:
1572 return self._pcache[id]
1572 return self._pcache[id]
1573
1573
1574 if len(id) <= 40:
1574 if len(id) <= 40:
1575 try:
1575 try:
1576 # hex(node)[:...]
1576 # hex(node)[:...]
1577 l = len(id) // 2 # grab an even number of digits
1577 l = len(id) // 2 # grab an even number of digits
1578 prefix = bin(id[: l * 2])
1578 prefix = bin(id[: l * 2])
1579 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1579 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1580 nl = [
1580 nl = [
1581 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1581 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1582 ]
1582 ]
1583 if self.nodeconstants.nullhex.startswith(id):
1583 if self.nodeconstants.nullhex.startswith(id):
1584 nl.append(self.nullid)
1584 nl.append(self.nullid)
1585 if len(nl) > 0:
1585 if len(nl) > 0:
1586 if len(nl) == 1 and not maybewdir:
1586 if len(nl) == 1 and not maybewdir:
1587 self._pcache[id] = nl[0]
1587 self._pcache[id] = nl[0]
1588 return nl[0]
1588 return nl[0]
1589 raise error.AmbiguousPrefixLookupError(
1589 raise error.AmbiguousPrefixLookupError(
1590 id, self.display_id, _(b'ambiguous identifier')
1590 id, self.display_id, _(b'ambiguous identifier')
1591 )
1591 )
1592 if maybewdir:
1592 if maybewdir:
1593 raise error.WdirUnsupported
1593 raise error.WdirUnsupported
1594 return None
1594 return None
1595 except TypeError:
1595 except TypeError:
1596 pass
1596 pass
1597
1597
1598 def lookup(self, id):
1598 def lookup(self, id):
1599 """locate a node based on:
1599 """locate a node based on:
1600 - revision number or str(revision number)
1600 - revision number or str(revision number)
1601 - nodeid or subset of hex nodeid
1601 - nodeid or subset of hex nodeid
1602 """
1602 """
1603 n = self._match(id)
1603 n = self._match(id)
1604 if n is not None:
1604 if n is not None:
1605 return n
1605 return n
1606 n = self._partialmatch(id)
1606 n = self._partialmatch(id)
1607 if n:
1607 if n:
1608 return n
1608 return n
1609
1609
1610 raise error.LookupError(id, self.display_id, _(b'no match found'))
1610 raise error.LookupError(id, self.display_id, _(b'no match found'))
1611
1611
1612 def shortest(self, node, minlength=1):
1612 def shortest(self, node, minlength=1):
1613 """Find the shortest unambiguous prefix that matches node."""
1613 """Find the shortest unambiguous prefix that matches node."""
1614
1614
1615 def isvalid(prefix):
1615 def isvalid(prefix):
1616 try:
1616 try:
1617 matchednode = self._partialmatch(prefix)
1617 matchednode = self._partialmatch(prefix)
1618 except error.AmbiguousPrefixLookupError:
1618 except error.AmbiguousPrefixLookupError:
1619 return False
1619 return False
1620 except error.WdirUnsupported:
1620 except error.WdirUnsupported:
1621 # single 'ff...' match
1621 # single 'ff...' match
1622 return True
1622 return True
1623 if matchednode is None:
1623 if matchednode is None:
1624 raise error.LookupError(node, self.display_id, _(b'no node'))
1624 raise error.LookupError(node, self.display_id, _(b'no node'))
1625 return True
1625 return True
1626
1626
1627 def maybewdir(prefix):
1627 def maybewdir(prefix):
1628 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1628 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1629
1629
1630 hexnode = hex(node)
1630 hexnode = hex(node)
1631
1631
1632 def disambiguate(hexnode, minlength):
1632 def disambiguate(hexnode, minlength):
1633 """Disambiguate against wdirid."""
1633 """Disambiguate against wdirid."""
1634 for length in range(minlength, len(hexnode) + 1):
1634 for length in range(minlength, len(hexnode) + 1):
1635 prefix = hexnode[:length]
1635 prefix = hexnode[:length]
1636 if not maybewdir(prefix):
1636 if not maybewdir(prefix):
1637 return prefix
1637 return prefix
1638
1638
1639 if not getattr(self, 'filteredrevs', None):
1639 if not getattr(self, 'filteredrevs', None):
1640 try:
1640 try:
1641 length = max(self.index.shortest(node), minlength)
1641 length = max(self.index.shortest(node), minlength)
1642 return disambiguate(hexnode, length)
1642 return disambiguate(hexnode, length)
1643 except error.RevlogError:
1643 except error.RevlogError:
1644 if node != self.nodeconstants.wdirid:
1644 if node != self.nodeconstants.wdirid:
1645 raise error.LookupError(
1645 raise error.LookupError(
1646 node, self.display_id, _(b'no node')
1646 node, self.display_id, _(b'no node')
1647 )
1647 )
1648 except AttributeError:
1648 except AttributeError:
1649 # Fall through to pure code
1649 # Fall through to pure code
1650 pass
1650 pass
1651
1651
1652 if node == self.nodeconstants.wdirid:
1652 if node == self.nodeconstants.wdirid:
1653 for length in range(minlength, len(hexnode) + 1):
1653 for length in range(minlength, len(hexnode) + 1):
1654 prefix = hexnode[:length]
1654 prefix = hexnode[:length]
1655 if isvalid(prefix):
1655 if isvalid(prefix):
1656 return prefix
1656 return prefix
1657
1657
1658 for length in range(minlength, len(hexnode) + 1):
1658 for length in range(minlength, len(hexnode) + 1):
1659 prefix = hexnode[:length]
1659 prefix = hexnode[:length]
1660 if isvalid(prefix):
1660 if isvalid(prefix):
1661 return disambiguate(hexnode, length)
1661 return disambiguate(hexnode, length)
1662
1662
1663 def cmp(self, node, text):
1663 def cmp(self, node, text):
1664 """compare text with a given file revision
1664 """compare text with a given file revision
1665
1665
1666 returns True if text is different than what is stored.
1666 returns True if text is different than what is stored.
1667 """
1667 """
1668 p1, p2 = self.parents(node)
1668 p1, p2 = self.parents(node)
1669 return storageutil.hashrevisionsha1(text, p1, p2) != node
1669 return storageutil.hashrevisionsha1(text, p1, p2) != node
1670
1670
1671 def _cachesegment(self, offset, data):
1671 def _cachesegment(self, offset, data):
1672 """Add a segment to the revlog cache.
1672 """Add a segment to the revlog cache.
1673
1673
1674 Accepts an absolute offset and the data that is at that location.
1674 Accepts an absolute offset and the data that is at that location.
1675 """
1675 """
1676 o, d = self._chunkcache
1676 o, d = self._chunkcache
1677 # try to add to existing cache
1677 # try to add to existing cache
1678 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1678 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1679 self._chunkcache = o, d + data
1679 self._chunkcache = o, d + data
1680 else:
1680 else:
1681 self._chunkcache = offset, data
1681 self._chunkcache = offset, data
1682
1682
1683 def _readsegment(self, offset, length, df=None):
1683 def _readsegment(self, offset, length, df=None):
1684 """Load a segment of raw data from the revlog.
1684 """Load a segment of raw data from the revlog.
1685
1685
1686 Accepts an absolute offset, length to read, and an optional existing
1686 Accepts an absolute offset, length to read, and an optional existing
1687 file handle to read from.
1687 file handle to read from.
1688
1688
1689 If an existing file handle is passed, it will be seeked and the
1689 If an existing file handle is passed, it will be seeked and the
1690 original seek position will NOT be restored.
1690 original seek position will NOT be restored.
1691
1691
1692 Returns a str or buffer of raw byte data.
1692 Returns a str or buffer of raw byte data.
1693
1693
1694 Raises if the requested number of bytes could not be read.
1694 Raises if the requested number of bytes could not be read.
1695 """
1695 """
1696 # Cache data both forward and backward around the requested
1696 # Cache data both forward and backward around the requested
1697 # data, in a fixed size window. This helps speed up operations
1697 # data, in a fixed size window. This helps speed up operations
1698 # involving reading the revlog backwards.
1698 # involving reading the revlog backwards.
1699 cachesize = self._chunkcachesize
1699 cachesize = self._chunkcachesize
1700 realoffset = offset & ~(cachesize - 1)
1700 realoffset = offset & ~(cachesize - 1)
1701 reallength = (
1701 reallength = (
1702 (offset + length + cachesize) & ~(cachesize - 1)
1702 (offset + length + cachesize) & ~(cachesize - 1)
1703 ) - realoffset
1703 ) - realoffset
1704 with self._datareadfp(df) as df:
1704 with self._datareadfp(df) as df:
1705 df.seek(realoffset)
1705 df.seek(realoffset)
1706 d = df.read(reallength)
1706 d = df.read(reallength)
1707
1707
1708 self._cachesegment(realoffset, d)
1708 self._cachesegment(realoffset, d)
1709 if offset != realoffset or reallength != length:
1709 if offset != realoffset or reallength != length:
1710 startoffset = offset - realoffset
1710 startoffset = offset - realoffset
1711 if len(d) - startoffset < length:
1711 if len(d) - startoffset < length:
1712 raise error.RevlogError(
1712 raise error.RevlogError(
1713 _(
1713 _(
1714 b'partial read of revlog %s; expected %d bytes from '
1714 b'partial read of revlog %s; expected %d bytes from '
1715 b'offset %d, got %d'
1715 b'offset %d, got %d'
1716 )
1716 )
1717 % (
1717 % (
1718 self._indexfile if self._inline else self._datafile,
1718 self._indexfile if self._inline else self._datafile,
1719 length,
1719 length,
1720 offset,
1720 offset,
1721 len(d) - startoffset,
1721 len(d) - startoffset,
1722 )
1722 )
1723 )
1723 )
1724
1724
1725 return util.buffer(d, startoffset, length)
1725 return util.buffer(d, startoffset, length)
1726
1726
1727 if len(d) < length:
1727 if len(d) < length:
1728 raise error.RevlogError(
1728 raise error.RevlogError(
1729 _(
1729 _(
1730 b'partial read of revlog %s; expected %d bytes from offset '
1730 b'partial read of revlog %s; expected %d bytes from offset '
1731 b'%d, got %d'
1731 b'%d, got %d'
1732 )
1732 )
1733 % (
1733 % (
1734 self._indexfile if self._inline else self._datafile,
1734 self._indexfile if self._inline else self._datafile,
1735 length,
1735 length,
1736 offset,
1736 offset,
1737 len(d),
1737 len(d),
1738 )
1738 )
1739 )
1739 )
1740
1740
1741 return d
1741 return d
1742
1742
1743 def _getsegment(self, offset, length, df=None):
1743 def _getsegment(self, offset, length, df=None):
1744 """Obtain a segment of raw data from the revlog.
1744 """Obtain a segment of raw data from the revlog.
1745
1745
1746 Accepts an absolute offset, length of bytes to obtain, and an
1746 Accepts an absolute offset, length of bytes to obtain, and an
1747 optional file handle to the already-opened revlog. If the file
1747 optional file handle to the already-opened revlog. If the file
1748 handle is used, it's original seek position will not be preserved.
1748 handle is used, it's original seek position will not be preserved.
1749
1749
1750 Requests for data may be returned from a cache.
1750 Requests for data may be returned from a cache.
1751
1751
1752 Returns a str or a buffer instance of raw byte data.
1752 Returns a str or a buffer instance of raw byte data.
1753 """
1753 """
1754 o, d = self._chunkcache
1754 o, d = self._chunkcache
1755 l = len(d)
1755 l = len(d)
1756
1756
1757 # is it in the cache?
1757 # is it in the cache?
1758 cachestart = offset - o
1758 cachestart = offset - o
1759 cacheend = cachestart + length
1759 cacheend = cachestart + length
1760 if cachestart >= 0 and cacheend <= l:
1760 if cachestart >= 0 and cacheend <= l:
1761 if cachestart == 0 and cacheend == l:
1761 if cachestart == 0 and cacheend == l:
1762 return d # avoid a copy
1762 return d # avoid a copy
1763 return util.buffer(d, cachestart, cacheend - cachestart)
1763 return util.buffer(d, cachestart, cacheend - cachestart)
1764
1764
1765 return self._readsegment(offset, length, df=df)
1765 return self._readsegment(offset, length, df=df)
1766
1766
1767 def _getsegmentforrevs(self, startrev, endrev, df=None):
1767 def _getsegmentforrevs(self, startrev, endrev, df=None):
1768 """Obtain a segment of raw data corresponding to a range of revisions.
1768 """Obtain a segment of raw data corresponding to a range of revisions.
1769
1769
1770 Accepts the start and end revisions and an optional already-open
1770 Accepts the start and end revisions and an optional already-open
1771 file handle to be used for reading. If the file handle is read, its
1771 file handle to be used for reading. If the file handle is read, its
1772 seek position will not be preserved.
1772 seek position will not be preserved.
1773
1773
1774 Requests for data may be satisfied by a cache.
1774 Requests for data may be satisfied by a cache.
1775
1775
1776 Returns a 2-tuple of (offset, data) for the requested range of
1776 Returns a 2-tuple of (offset, data) for the requested range of
1777 revisions. Offset is the integer offset from the beginning of the
1777 revisions. Offset is the integer offset from the beginning of the
1778 revlog and data is a str or buffer of the raw byte data.
1778 revlog and data is a str or buffer of the raw byte data.
1779
1779
1780 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1780 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1781 to determine where each revision's data begins and ends.
1781 to determine where each revision's data begins and ends.
1782 """
1782 """
1783 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1783 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1784 # (functions are expensive).
1784 # (functions are expensive).
1785 index = self.index
1785 index = self.index
1786 istart = index[startrev]
1786 istart = index[startrev]
1787 start = int(istart[0] >> 16)
1787 start = int(istart[0] >> 16)
1788 if startrev == endrev:
1788 if startrev == endrev:
1789 end = start + istart[1]
1789 end = start + istart[1]
1790 else:
1790 else:
1791 iend = index[endrev]
1791 iend = index[endrev]
1792 end = int(iend[0] >> 16) + iend[1]
1792 end = int(iend[0] >> 16) + iend[1]
1793
1793
1794 if self._inline:
1794 if self._inline:
1795 start += (startrev + 1) * self.index.entry_size
1795 start += (startrev + 1) * self.index.entry_size
1796 end += (endrev + 1) * self.index.entry_size
1796 end += (endrev + 1) * self.index.entry_size
1797 length = end - start
1797 length = end - start
1798
1798
1799 return start, self._getsegment(start, length, df=df)
1799 return start, self._getsegment(start, length, df=df)
1800
1800
1801 def _chunk(self, rev, df=None):
1801 def _chunk(self, rev, df=None):
1802 """Obtain a single decompressed chunk for a revision.
1802 """Obtain a single decompressed chunk for a revision.
1803
1803
1804 Accepts an integer revision and an optional already-open file handle
1804 Accepts an integer revision and an optional already-open file handle
1805 to be used for reading. If used, the seek position of the file will not
1805 to be used for reading. If used, the seek position of the file will not
1806 be preserved.
1806 be preserved.
1807
1807
1808 Returns a str holding uncompressed data for the requested revision.
1808 Returns a str holding uncompressed data for the requested revision.
1809 """
1809 """
1810 compression_mode = self.index[rev][10]
1810 compression_mode = self.index[rev][10]
1811 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1811 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1812 if compression_mode == COMP_MODE_PLAIN:
1812 if compression_mode == COMP_MODE_PLAIN:
1813 return data
1813 return data
1814 elif compression_mode == COMP_MODE_DEFAULT:
1814 elif compression_mode == COMP_MODE_DEFAULT:
1815 return self._decompressor(data)
1815 return self._decompressor(data)
1816 elif compression_mode == COMP_MODE_INLINE:
1816 elif compression_mode == COMP_MODE_INLINE:
1817 return self.decompress(data)
1817 return self.decompress(data)
1818 else:
1818 else:
1819 msg = 'unknown compression mode %d'
1819 msg = 'unknown compression mode %d'
1820 msg %= compression_mode
1820 msg %= compression_mode
1821 raise error.RevlogError(msg)
1821 raise error.RevlogError(msg)
1822
1822
1823 def _chunks(self, revs, df=None, targetsize=None):
1823 def _chunks(self, revs, df=None, targetsize=None):
1824 """Obtain decompressed chunks for the specified revisions.
1824 """Obtain decompressed chunks for the specified revisions.
1825
1825
1826 Accepts an iterable of numeric revisions that are assumed to be in
1826 Accepts an iterable of numeric revisions that are assumed to be in
1827 ascending order. Also accepts an optional already-open file handle
1827 ascending order. Also accepts an optional already-open file handle
1828 to be used for reading. If used, the seek position of the file will
1828 to be used for reading. If used, the seek position of the file will
1829 not be preserved.
1829 not be preserved.
1830
1830
1831 This function is similar to calling ``self._chunk()`` multiple times,
1831 This function is similar to calling ``self._chunk()`` multiple times,
1832 but is faster.
1832 but is faster.
1833
1833
1834 Returns a list with decompressed data for each requested revision.
1834 Returns a list with decompressed data for each requested revision.
1835 """
1835 """
1836 if not revs:
1836 if not revs:
1837 return []
1837 return []
1838 start = self.start
1838 start = self.start
1839 length = self.length
1839 length = self.length
1840 inline = self._inline
1840 inline = self._inline
1841 iosize = self.index.entry_size
1841 iosize = self.index.entry_size
1842 buffer = util.buffer
1842 buffer = util.buffer
1843
1843
1844 l = []
1844 l = []
1845 ladd = l.append
1845 ladd = l.append
1846
1846
1847 if not self._withsparseread:
1847 if not self._withsparseread:
1848 slicedchunks = (revs,)
1848 slicedchunks = (revs,)
1849 else:
1849 else:
1850 slicedchunks = deltautil.slicechunk(
1850 slicedchunks = deltautil.slicechunk(
1851 self, revs, targetsize=targetsize
1851 self, revs, targetsize=targetsize
1852 )
1852 )
1853
1853
1854 for revschunk in slicedchunks:
1854 for revschunk in slicedchunks:
1855 firstrev = revschunk[0]
1855 firstrev = revschunk[0]
1856 # Skip trailing revisions with empty diff
1856 # Skip trailing revisions with empty diff
1857 for lastrev in revschunk[::-1]:
1857 for lastrev in revschunk[::-1]:
1858 if length(lastrev) != 0:
1858 if length(lastrev) != 0:
1859 break
1859 break
1860
1860
1861 try:
1861 try:
1862 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1862 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1863 except OverflowError:
1863 except OverflowError:
1864 # issue4215 - we can't cache a run of chunks greater than
1864 # issue4215 - we can't cache a run of chunks greater than
1865 # 2G on Windows
1865 # 2G on Windows
1866 return [self._chunk(rev, df=df) for rev in revschunk]
1866 return [self._chunk(rev, df=df) for rev in revschunk]
1867
1867
1868 decomp = self.decompress
1868 decomp = self.decompress
1869 # self._decompressor might be None, but will not be used in that case
1869 # self._decompressor might be None, but will not be used in that case
1870 def_decomp = self._decompressor
1870 def_decomp = self._decompressor
1871 for rev in revschunk:
1871 for rev in revschunk:
1872 chunkstart = start(rev)
1872 chunkstart = start(rev)
1873 if inline:
1873 if inline:
1874 chunkstart += (rev + 1) * iosize
1874 chunkstart += (rev + 1) * iosize
1875 chunklength = length(rev)
1875 chunklength = length(rev)
1876 comp_mode = self.index[rev][10]
1876 comp_mode = self.index[rev][10]
1877 c = buffer(data, chunkstart - offset, chunklength)
1877 c = buffer(data, chunkstart - offset, chunklength)
1878 if comp_mode == COMP_MODE_PLAIN:
1878 if comp_mode == COMP_MODE_PLAIN:
1879 ladd(c)
1879 ladd(c)
1880 elif comp_mode == COMP_MODE_INLINE:
1880 elif comp_mode == COMP_MODE_INLINE:
1881 ladd(decomp(c))
1881 ladd(decomp(c))
1882 elif comp_mode == COMP_MODE_DEFAULT:
1882 elif comp_mode == COMP_MODE_DEFAULT:
1883 ladd(def_decomp(c))
1883 ladd(def_decomp(c))
1884 else:
1884 else:
1885 msg = 'unknown compression mode %d'
1885 msg = 'unknown compression mode %d'
1886 msg %= comp_mode
1886 msg %= comp_mode
1887 raise error.RevlogError(msg)
1887 raise error.RevlogError(msg)
1888
1888
1889 return l
1889 return l
1890
1890
1891 def _chunkclear(self):
1891 def _chunkclear(self):
1892 """Clear the raw chunk cache."""
1892 """Clear the raw chunk cache."""
1893 self._chunkcache = (0, b'')
1893 self._chunkcache = (0, b'')
1894
1894
1895 def deltaparent(self, rev):
1895 def deltaparent(self, rev):
1896 """return deltaparent of the given revision"""
1896 """return deltaparent of the given revision"""
1897 base = self.index[rev][3]
1897 base = self.index[rev][3]
1898 if base == rev:
1898 if base == rev:
1899 return nullrev
1899 return nullrev
1900 elif self._generaldelta:
1900 elif self._generaldelta:
1901 return base
1901 return base
1902 else:
1902 else:
1903 return rev - 1
1903 return rev - 1
1904
1904
1905 def issnapshot(self, rev):
1905 def issnapshot(self, rev):
1906 """tells whether rev is a snapshot"""
1906 """tells whether rev is a snapshot"""
1907 if not self._sparserevlog:
1907 if not self._sparserevlog:
1908 return self.deltaparent(rev) == nullrev
1908 return self.deltaparent(rev) == nullrev
1909 elif util.safehasattr(self.index, b'issnapshot'):
1909 elif util.safehasattr(self.index, b'issnapshot'):
1910 # directly assign the method to cache the testing and access
1910 # directly assign the method to cache the testing and access
1911 self.issnapshot = self.index.issnapshot
1911 self.issnapshot = self.index.issnapshot
1912 return self.issnapshot(rev)
1912 return self.issnapshot(rev)
1913 if rev == nullrev:
1913 if rev == nullrev:
1914 return True
1914 return True
1915 entry = self.index[rev]
1915 entry = self.index[rev]
1916 base = entry[3]
1916 base = entry[3]
1917 if base == rev:
1917 if base == rev:
1918 return True
1918 return True
1919 if base == nullrev:
1919 if base == nullrev:
1920 return True
1920 return True
1921 p1 = entry[5]
1921 p1 = entry[5]
1922 p2 = entry[6]
1922 p2 = entry[6]
1923 if base == p1 or base == p2:
1923 if base == p1 or base == p2:
1924 return False
1924 return False
1925 return self.issnapshot(base)
1925 return self.issnapshot(base)
1926
1926
1927 def snapshotdepth(self, rev):
1927 def snapshotdepth(self, rev):
1928 """number of snapshot in the chain before this one"""
1928 """number of snapshot in the chain before this one"""
1929 if not self.issnapshot(rev):
1929 if not self.issnapshot(rev):
1930 raise error.ProgrammingError(b'revision %d not a snapshot')
1930 raise error.ProgrammingError(b'revision %d not a snapshot')
1931 return len(self._deltachain(rev)[0]) - 1
1931 return len(self._deltachain(rev)[0]) - 1
1932
1932
1933 def revdiff(self, rev1, rev2):
1933 def revdiff(self, rev1, rev2):
1934 """return or calculate a delta between two revisions
1934 """return or calculate a delta between two revisions
1935
1935
1936 The delta calculated is in binary form and is intended to be written to
1936 The delta calculated is in binary form and is intended to be written to
1937 revlog data directly. So this function needs raw revision data.
1937 revlog data directly. So this function needs raw revision data.
1938 """
1938 """
1939 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1939 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1940 return bytes(self._chunk(rev2))
1940 return bytes(self._chunk(rev2))
1941
1941
1942 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1942 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1943
1943
1944 def _processflags(self, text, flags, operation, raw=False):
1944 def _processflags(self, text, flags, operation, raw=False):
1945 """deprecated entry point to access flag processors"""
1945 """deprecated entry point to access flag processors"""
1946 msg = b'_processflag(...) use the specialized variant'
1946 msg = b'_processflag(...) use the specialized variant'
1947 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1947 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1948 if raw:
1948 if raw:
1949 return text, flagutil.processflagsraw(self, text, flags)
1949 return text, flagutil.processflagsraw(self, text, flags)
1950 elif operation == b'read':
1950 elif operation == b'read':
1951 return flagutil.processflagsread(self, text, flags)
1951 return flagutil.processflagsread(self, text, flags)
1952 else: # write operation
1952 else: # write operation
1953 return flagutil.processflagswrite(self, text, flags)
1953 return flagutil.processflagswrite(self, text, flags)
1954
1954
1955 def revision(self, nodeorrev, _df=None, raw=False):
1955 def revision(self, nodeorrev, _df=None, raw=False):
1956 """return an uncompressed revision of a given node or revision
1956 """return an uncompressed revision of a given node or revision
1957 number.
1957 number.
1958
1958
1959 _df - an existing file handle to read from. (internal-only)
1959 _df - an existing file handle to read from. (internal-only)
1960 raw - an optional argument specifying if the revision data is to be
1960 raw - an optional argument specifying if the revision data is to be
1961 treated as raw data when applying flag transforms. 'raw' should be set
1961 treated as raw data when applying flag transforms. 'raw' should be set
1962 to True when generating changegroups or in debug commands.
1962 to True when generating changegroups or in debug commands.
1963 """
1963 """
1964 if raw:
1964 if raw:
1965 msg = (
1965 msg = (
1966 b'revlog.revision(..., raw=True) is deprecated, '
1966 b'revlog.revision(..., raw=True) is deprecated, '
1967 b'use revlog.rawdata(...)'
1967 b'use revlog.rawdata(...)'
1968 )
1968 )
1969 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1969 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1970 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1970 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1971
1971
1972 def sidedata(self, nodeorrev, _df=None):
1972 def sidedata(self, nodeorrev, _df=None):
1973 """a map of extra data related to the changeset but not part of the hash
1973 """a map of extra data related to the changeset but not part of the hash
1974
1974
1975 This function currently return a dictionary. However, more advanced
1975 This function currently return a dictionary. However, more advanced
1976 mapping object will likely be used in the future for a more
1976 mapping object will likely be used in the future for a more
1977 efficient/lazy code.
1977 efficient/lazy code.
1978 """
1978 """
1979 # deal with <nodeorrev> argument type
1979 # deal with <nodeorrev> argument type
1980 if isinstance(nodeorrev, int):
1980 if isinstance(nodeorrev, int):
1981 rev = nodeorrev
1981 rev = nodeorrev
1982 else:
1982 else:
1983 rev = self.rev(nodeorrev)
1983 rev = self.rev(nodeorrev)
1984 return self._sidedata(rev)
1984 return self._sidedata(rev)
1985
1985
1986 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1986 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1987 # deal with <nodeorrev> argument type
1987 # deal with <nodeorrev> argument type
1988 if isinstance(nodeorrev, int):
1988 if isinstance(nodeorrev, int):
1989 rev = nodeorrev
1989 rev = nodeorrev
1990 node = self.node(rev)
1990 node = self.node(rev)
1991 else:
1991 else:
1992 node = nodeorrev
1992 node = nodeorrev
1993 rev = None
1993 rev = None
1994
1994
1995 # fast path the special `nullid` rev
1995 # fast path the special `nullid` rev
1996 if node == self.nullid:
1996 if node == self.nullid:
1997 return b"", {}
1997 return b"", {}
1998
1998
1999 # ``rawtext`` is the text as stored inside the revlog. Might be the
1999 # ``rawtext`` is the text as stored inside the revlog. Might be the
2000 # revision or might need to be processed to retrieve the revision.
2000 # revision or might need to be processed to retrieve the revision.
2001 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
2001 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
2002
2002
2003 if self.hassidedata:
2003 if self.hassidedata:
2004 if rev is None:
2004 if rev is None:
2005 rev = self.rev(node)
2005 rev = self.rev(node)
2006 sidedata = self._sidedata(rev)
2006 sidedata = self._sidedata(rev)
2007 else:
2007 else:
2008 sidedata = {}
2008 sidedata = {}
2009
2009
2010 if raw and validated:
2010 if raw and validated:
2011 # if we don't want to process the raw text and that raw
2011 # if we don't want to process the raw text and that raw
2012 # text is cached, we can exit early.
2012 # text is cached, we can exit early.
2013 return rawtext, sidedata
2013 return rawtext, sidedata
2014 if rev is None:
2014 if rev is None:
2015 rev = self.rev(node)
2015 rev = self.rev(node)
2016 # the revlog's flag for this revision
2016 # the revlog's flag for this revision
2017 # (usually alter its state or content)
2017 # (usually alter its state or content)
2018 flags = self.flags(rev)
2018 flags = self.flags(rev)
2019
2019
2020 if validated and flags == REVIDX_DEFAULT_FLAGS:
2020 if validated and flags == REVIDX_DEFAULT_FLAGS:
2021 # no extra flags set, no flag processor runs, text = rawtext
2021 # no extra flags set, no flag processor runs, text = rawtext
2022 return rawtext, sidedata
2022 return rawtext, sidedata
2023
2023
2024 if raw:
2024 if raw:
2025 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2025 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2026 text = rawtext
2026 text = rawtext
2027 else:
2027 else:
2028 r = flagutil.processflagsread(self, rawtext, flags)
2028 r = flagutil.processflagsread(self, rawtext, flags)
2029 text, validatehash = r
2029 text, validatehash = r
2030 if validatehash:
2030 if validatehash:
2031 self.checkhash(text, node, rev=rev)
2031 self.checkhash(text, node, rev=rev)
2032 if not validated:
2032 if not validated:
2033 self._revisioncache = (node, rev, rawtext)
2033 self._revisioncache = (node, rev, rawtext)
2034
2034
2035 return text, sidedata
2035 return text, sidedata
2036
2036
2037 def _rawtext(self, node, rev, _df=None):
2037 def _rawtext(self, node, rev, _df=None):
2038 """return the possibly unvalidated rawtext for a revision
2038 """return the possibly unvalidated rawtext for a revision
2039
2039
2040 returns (rev, rawtext, validated)
2040 returns (rev, rawtext, validated)
2041 """
2041 """
2042
2042
2043 # revision in the cache (could be useful to apply delta)
2043 # revision in the cache (could be useful to apply delta)
2044 cachedrev = None
2044 cachedrev = None
2045 # An intermediate text to apply deltas to
2045 # An intermediate text to apply deltas to
2046 basetext = None
2046 basetext = None
2047
2047
2048 # Check if we have the entry in cache
2048 # Check if we have the entry in cache
2049 # The cache entry looks like (node, rev, rawtext)
2049 # The cache entry looks like (node, rev, rawtext)
2050 if self._revisioncache:
2050 if self._revisioncache:
2051 if self._revisioncache[0] == node:
2051 if self._revisioncache[0] == node:
2052 return (rev, self._revisioncache[2], True)
2052 return (rev, self._revisioncache[2], True)
2053 cachedrev = self._revisioncache[1]
2053 cachedrev = self._revisioncache[1]
2054
2054
2055 if rev is None:
2055 if rev is None:
2056 rev = self.rev(node)
2056 rev = self.rev(node)
2057
2057
2058 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2058 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2059 if stopped:
2059 if stopped:
2060 basetext = self._revisioncache[2]
2060 basetext = self._revisioncache[2]
2061
2061
2062 # drop cache to save memory, the caller is expected to
2062 # drop cache to save memory, the caller is expected to
2063 # update self._revisioncache after validating the text
2063 # update self._revisioncache after validating the text
2064 self._revisioncache = None
2064 self._revisioncache = None
2065
2065
2066 targetsize = None
2066 targetsize = None
2067 rawsize = self.index[rev][2]
2067 rawsize = self.index[rev][2]
2068 if 0 <= rawsize:
2068 if 0 <= rawsize:
2069 targetsize = 4 * rawsize
2069 targetsize = 4 * rawsize
2070
2070
2071 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2071 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2072 if basetext is None:
2072 if basetext is None:
2073 basetext = bytes(bins[0])
2073 basetext = bytes(bins[0])
2074 bins = bins[1:]
2074 bins = bins[1:]
2075
2075
2076 rawtext = mdiff.patches(basetext, bins)
2076 rawtext = mdiff.patches(basetext, bins)
2077 del basetext # let us have a chance to free memory early
2077 del basetext # let us have a chance to free memory early
2078 return (rev, rawtext, False)
2078 return (rev, rawtext, False)
2079
2079
2080 def _sidedata(self, rev):
2080 def _sidedata(self, rev):
2081 """Return the sidedata for a given revision number."""
2081 """Return the sidedata for a given revision number."""
2082 index_entry = self.index[rev]
2082 index_entry = self.index[rev]
2083 sidedata_offset = index_entry[8]
2083 sidedata_offset = index_entry[8]
2084 sidedata_size = index_entry[9]
2084 sidedata_size = index_entry[9]
2085
2085
2086 if self._inline:
2086 if self._inline:
2087 sidedata_offset += self.index.entry_size * (1 + rev)
2087 sidedata_offset += self.index.entry_size * (1 + rev)
2088 if sidedata_size == 0:
2088 if sidedata_size == 0:
2089 return {}
2089 return {}
2090
2090
2091 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2091 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2092 comp = self.index[rev][11]
2092 comp = self.index[rev][11]
2093 if comp == COMP_MODE_PLAIN:
2093 if comp == COMP_MODE_PLAIN:
2094 segment = comp_segment
2094 segment = comp_segment
2095 elif comp == COMP_MODE_DEFAULT:
2095 elif comp == COMP_MODE_DEFAULT:
2096 segment = self._decompressor(comp_segment)
2096 segment = self._decompressor(comp_segment)
2097 elif comp == COMP_MODE_INLINE:
2097 elif comp == COMP_MODE_INLINE:
2098 segment = self.decompress(comp_segment)
2098 segment = self.decompress(comp_segment)
2099 else:
2099 else:
2100 msg = 'unknown compression mode %d'
2100 msg = 'unknown compression mode %d'
2101 msg %= comp
2101 msg %= comp
2102 raise error.RevlogError(msg)
2102 raise error.RevlogError(msg)
2103
2103
2104 sidedata = sidedatautil.deserialize_sidedata(segment)
2104 sidedata = sidedatautil.deserialize_sidedata(segment)
2105 return sidedata
2105 return sidedata
2106
2106
2107 def rawdata(self, nodeorrev, _df=None):
2107 def rawdata(self, nodeorrev, _df=None):
2108 """return an uncompressed raw data of a given node or revision number.
2108 """return an uncompressed raw data of a given node or revision number.
2109
2109
2110 _df - an existing file handle to read from. (internal-only)
2110 _df - an existing file handle to read from. (internal-only)
2111 """
2111 """
2112 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2112 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2113
2113
2114 def hash(self, text, p1, p2):
2114 def hash(self, text, p1, p2):
2115 """Compute a node hash.
2115 """Compute a node hash.
2116
2116
2117 Available as a function so that subclasses can replace the hash
2117 Available as a function so that subclasses can replace the hash
2118 as needed.
2118 as needed.
2119 """
2119 """
2120 return storageutil.hashrevisionsha1(text, p1, p2)
2120 return storageutil.hashrevisionsha1(text, p1, p2)
2121
2121
2122 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2122 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2123 """Check node hash integrity.
2123 """Check node hash integrity.
2124
2124
2125 Available as a function so that subclasses can extend hash mismatch
2125 Available as a function so that subclasses can extend hash mismatch
2126 behaviors as needed.
2126 behaviors as needed.
2127 """
2127 """
2128 try:
2128 try:
2129 if p1 is None and p2 is None:
2129 if p1 is None and p2 is None:
2130 p1, p2 = self.parents(node)
2130 p1, p2 = self.parents(node)
2131 if node != self.hash(text, p1, p2):
2131 if node != self.hash(text, p1, p2):
2132 # Clear the revision cache on hash failure. The revision cache
2132 # Clear the revision cache on hash failure. The revision cache
2133 # only stores the raw revision and clearing the cache does have
2133 # only stores the raw revision and clearing the cache does have
2134 # the side-effect that we won't have a cache hit when the raw
2134 # the side-effect that we won't have a cache hit when the raw
2135 # revision data is accessed. But this case should be rare and
2135 # revision data is accessed. But this case should be rare and
2136 # it is extra work to teach the cache about the hash
2136 # it is extra work to teach the cache about the hash
2137 # verification state.
2137 # verification state.
2138 if self._revisioncache and self._revisioncache[0] == node:
2138 if self._revisioncache and self._revisioncache[0] == node:
2139 self._revisioncache = None
2139 self._revisioncache = None
2140
2140
2141 revornode = rev
2141 revornode = rev
2142 if revornode is None:
2142 if revornode is None:
2143 revornode = templatefilters.short(hex(node))
2143 revornode = templatefilters.short(hex(node))
2144 raise error.RevlogError(
2144 raise error.RevlogError(
2145 _(b"integrity check failed on %s:%s")
2145 _(b"integrity check failed on %s:%s")
2146 % (self.display_id, pycompat.bytestr(revornode))
2146 % (self.display_id, pycompat.bytestr(revornode))
2147 )
2147 )
2148 except error.RevlogError:
2148 except error.RevlogError:
2149 if self._censorable and storageutil.iscensoredtext(text):
2149 if self._censorable and storageutil.iscensoredtext(text):
2150 raise error.CensoredNodeError(self.display_id, node, text)
2150 raise error.CensoredNodeError(self.display_id, node, text)
2151 raise
2151 raise
2152
2152
2153 def _enforceinlinesize(self, tr):
2153 def _enforceinlinesize(self, tr):
2154 """Check if the revlog is too big for inline and convert if so.
2154 """Check if the revlog is too big for inline and convert if so.
2155
2155
2156 This should be called after revisions are added to the revlog. If the
2156 This should be called after revisions are added to the revlog. If the
2157 revlog has grown too large to be an inline revlog, it will convert it
2157 revlog has grown too large to be an inline revlog, it will convert it
2158 to use multiple index and data files.
2158 to use multiple index and data files.
2159 """
2159 """
2160 tiprev = len(self) - 1
2160 tiprev = len(self) - 1
2161 total_size = self.start(tiprev) + self.length(tiprev)
2161 total_size = self.start(tiprev) + self.length(tiprev)
2162 if not self._inline or total_size < _maxinline:
2162 if not self._inline or total_size < _maxinline:
2163 return
2163 return
2164
2164
2165 troffset = tr.findoffset(self._indexfile)
2165 troffset = tr.findoffset(self._indexfile)
2166 if troffset is None:
2166 if troffset is None:
2167 raise error.RevlogError(
2167 raise error.RevlogError(
2168 _(b"%s not found in the transaction") % self._indexfile
2168 _(b"%s not found in the transaction") % self._indexfile
2169 )
2169 )
2170 trindex = 0
2170 trindex = 0
2171 tr.add(self._datafile, 0)
2171 tr.add(self._datafile, 0)
2172
2172
2173 existing_handles = False
2173 existing_handles = False
2174 if self._writinghandles is not None:
2174 if self._writinghandles is not None:
2175 existing_handles = True
2175 existing_handles = True
2176 fp = self._writinghandles[0]
2176 fp = self._writinghandles[0]
2177 fp.flush()
2177 fp.flush()
2178 fp.close()
2178 fp.close()
2179 # We can't use the cached file handle after close(). So prevent
2179 # We can't use the cached file handle after close(). So prevent
2180 # its usage.
2180 # its usage.
2181 self._writinghandles = None
2181 self._writinghandles = None
2182
2182
2183 new_dfh = self._datafp(b'w+')
2183 new_dfh = self._datafp(b'w+')
2184 new_dfh.truncate(0) # drop any potentially existing data
2184 new_dfh.truncate(0) # drop any potentially existing data
2185 try:
2185 try:
2186 with self._indexfp() as read_ifh:
2186 with self._indexfp() as read_ifh:
2187 for r in self:
2187 for r in self:
2188 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2188 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2189 if troffset <= self.start(r) + r * self.index.entry_size:
2189 if troffset <= self.start(r) + r * self.index.entry_size:
2190 trindex = r
2190 trindex = r
2191 new_dfh.flush()
2191 new_dfh.flush()
2192
2192
2193 with self.__index_new_fp() as fp:
2193 with self.__index_new_fp() as fp:
2194 self._format_flags &= ~FLAG_INLINE_DATA
2194 self._format_flags &= ~FLAG_INLINE_DATA
2195 self._inline = False
2195 self._inline = False
2196 for i in self:
2196 for i in self:
2197 e = self.index.entry_binary(i)
2197 e = self.index.entry_binary(i)
2198 if i == 0 and self._docket is None:
2198 if i == 0 and self._docket is None:
2199 header = self._format_flags | self._format_version
2199 header = self._format_flags | self._format_version
2200 header = self.index.pack_header(header)
2200 header = self.index.pack_header(header)
2201 e = header + e
2201 e = header + e
2202 fp.write(e)
2202 fp.write(e)
2203 if self._docket is not None:
2203 if self._docket is not None:
2204 self._docket.index_end = fp.tell()
2204 self._docket.index_end = fp.tell()
2205
2205
2206 # There is a small transactional race here. If the rename of
2206 # There is a small transactional race here. If the rename of
2207 # the index fails, we should remove the datafile. It is more
2207 # the index fails, we should remove the datafile. It is more
2208 # important to ensure that the data file is not truncated
2208 # important to ensure that the data file is not truncated
2209 # when the index is replaced as otherwise data is lost.
2209 # when the index is replaced as otherwise data is lost.
2210 tr.replace(self._datafile, self.start(trindex))
2210 tr.replace(self._datafile, self.start(trindex))
2211
2211
2212 # the temp file replace the real index when we exit the context
2212 # the temp file replace the real index when we exit the context
2213 # manager
2213 # manager
2214
2214
2215 tr.replace(self._indexfile, trindex * self.index.entry_size)
2215 tr.replace(self._indexfile, trindex * self.index.entry_size)
2216 nodemaputil.setup_persistent_nodemap(tr, self)
2216 nodemaputil.setup_persistent_nodemap(tr, self)
2217 self._chunkclear()
2217 self._chunkclear()
2218
2218
2219 if existing_handles:
2219 if existing_handles:
2220 # switched from inline to conventional reopen the index
2220 # switched from inline to conventional reopen the index
2221 ifh = self.__index_write_fp()
2221 ifh = self.__index_write_fp()
2222 self._writinghandles = (ifh, new_dfh)
2222 self._writinghandles = (ifh, new_dfh)
2223 new_dfh = None
2223 new_dfh = None
2224 finally:
2224 finally:
2225 if new_dfh is not None:
2225 if new_dfh is not None:
2226 new_dfh.close()
2226 new_dfh.close()
2227
2227
2228 def _nodeduplicatecallback(self, transaction, node):
2228 def _nodeduplicatecallback(self, transaction, node):
2229 """called when trying to add a node already stored."""
2229 """called when trying to add a node already stored."""
2230
2230
2231 @contextlib.contextmanager
2231 @contextlib.contextmanager
2232 def _writing(self, transaction):
2232 def _writing(self, transaction):
2233 if self._trypending:
2233 if self._trypending:
2234 msg = b'try to write in a `trypending` revlog: %s'
2234 msg = b'try to write in a `trypending` revlog: %s'
2235 msg %= self.display_id
2235 msg %= self.display_id
2236 raise error.ProgrammingError(msg)
2236 raise error.ProgrammingError(msg)
2237 if self._writinghandles is not None:
2237 if self._writinghandles is not None:
2238 yield
2238 yield
2239 else:
2239 else:
2240 ifh = dfh = None
2240 ifh = dfh = None
2241 try:
2241 try:
2242 r = len(self)
2242 r = len(self)
2243 # opening the data file.
2243 # opening the data file.
2244 dsize = 0
2244 dsize = 0
2245 if r:
2245 if r:
2246 dsize = self.end(r - 1)
2246 dsize = self.end(r - 1)
2247 dfh = None
2247 dfh = None
2248 if not self._inline:
2248 if not self._inline:
2249 try:
2249 try:
2250 dfh = self._datafp(b"r+")
2250 dfh = self._datafp(b"r+")
2251 if self._docket is None:
2251 if self._docket is None:
2252 dfh.seek(0, os.SEEK_END)
2252 dfh.seek(0, os.SEEK_END)
2253 else:
2253 else:
2254 dfh.seek(self._docket.data_end, os.SEEK_SET)
2254 dfh.seek(self._docket.data_end, os.SEEK_SET)
2255 except IOError as inst:
2255 except IOError as inst:
2256 if inst.errno != errno.ENOENT:
2256 if inst.errno != errno.ENOENT:
2257 raise
2257 raise
2258 dfh = self._datafp(b"w+")
2258 dfh = self._datafp(b"w+")
2259 transaction.add(self._datafile, dsize)
2259 transaction.add(self._datafile, dsize)
2260
2260
2261 # opening the index file.
2261 # opening the index file.
2262 isize = r * self.index.entry_size
2262 isize = r * self.index.entry_size
2263 ifh = self.__index_write_fp()
2263 ifh = self.__index_write_fp()
2264 if self._inline:
2264 if self._inline:
2265 transaction.add(self._indexfile, dsize + isize)
2265 transaction.add(self._indexfile, dsize + isize)
2266 else:
2266 else:
2267 transaction.add(self._indexfile, isize)
2267 transaction.add(self._indexfile, isize)
2268 # exposing all file handle for writing.
2268 # exposing all file handle for writing.
2269 self._writinghandles = (ifh, dfh)
2269 self._writinghandles = (ifh, dfh)
2270 yield
2270 yield
2271 if self._docket is not None:
2271 if self._docket is not None:
2272 self._write_docket(transaction)
2272 self._write_docket(transaction)
2273 finally:
2273 finally:
2274 self._writinghandles = None
2274 self._writinghandles = None
2275 if dfh is not None:
2275 if dfh is not None:
2276 dfh.close()
2276 dfh.close()
2277 # closing the index file last to avoid exposing referent to
2277 # closing the index file last to avoid exposing referent to
2278 # potential unflushed data content.
2278 # potential unflushed data content.
2279 if ifh is not None:
2279 if ifh is not None:
2280 ifh.close()
2280 ifh.close()
2281
2281
2282 def _write_docket(self, transaction):
2282 def _write_docket(self, transaction):
2283 """write the current docket on disk
2283 """write the current docket on disk
2284
2284
2285 Exist as a method to help changelog to implement transaction logic
2285 Exist as a method to help changelog to implement transaction logic
2286
2286
2287 We could also imagine using the same transaction logic for all revlog
2287 We could also imagine using the same transaction logic for all revlog
2288 since docket are cheap."""
2288 since docket are cheap."""
2289 self._docket.write(transaction)
2289 self._docket.write(transaction)
2290
2290
2291 def addrevision(
2291 def addrevision(
2292 self,
2292 self,
2293 text,
2293 text,
2294 transaction,
2294 transaction,
2295 link,
2295 link,
2296 p1,
2296 p1,
2297 p2,
2297 p2,
2298 cachedelta=None,
2298 cachedelta=None,
2299 node=None,
2299 node=None,
2300 flags=REVIDX_DEFAULT_FLAGS,
2300 flags=REVIDX_DEFAULT_FLAGS,
2301 deltacomputer=None,
2301 deltacomputer=None,
2302 sidedata=None,
2302 sidedata=None,
2303 ):
2303 ):
2304 """add a revision to the log
2304 """add a revision to the log
2305
2305
2306 text - the revision data to add
2306 text - the revision data to add
2307 transaction - the transaction object used for rollback
2307 transaction - the transaction object used for rollback
2308 link - the linkrev data to add
2308 link - the linkrev data to add
2309 p1, p2 - the parent nodeids of the revision
2309 p1, p2 - the parent nodeids of the revision
2310 cachedelta - an optional precomputed delta
2310 cachedelta - an optional precomputed delta
2311 node - nodeid of revision; typically node is not specified, and it is
2311 node - nodeid of revision; typically node is not specified, and it is
2312 computed by default as hash(text, p1, p2), however subclasses might
2312 computed by default as hash(text, p1, p2), however subclasses might
2313 use different hashing method (and override checkhash() in such case)
2313 use different hashing method (and override checkhash() in such case)
2314 flags - the known flags to set on the revision
2314 flags - the known flags to set on the revision
2315 deltacomputer - an optional deltacomputer instance shared between
2315 deltacomputer - an optional deltacomputer instance shared between
2316 multiple calls
2316 multiple calls
2317 """
2317 """
2318 if link == nullrev:
2318 if link == nullrev:
2319 raise error.RevlogError(
2319 raise error.RevlogError(
2320 _(b"attempted to add linkrev -1 to %s") % self.display_id
2320 _(b"attempted to add linkrev -1 to %s") % self.display_id
2321 )
2321 )
2322
2322
2323 if sidedata is None:
2323 if sidedata is None:
2324 sidedata = {}
2324 sidedata = {}
2325 elif sidedata and not self.hassidedata:
2325 elif sidedata and not self.hassidedata:
2326 raise error.ProgrammingError(
2326 raise error.ProgrammingError(
2327 _(b"trying to add sidedata to a revlog who don't support them")
2327 _(b"trying to add sidedata to a revlog who don't support them")
2328 )
2328 )
2329
2329
2330 if flags:
2330 if flags:
2331 node = node or self.hash(text, p1, p2)
2331 node = node or self.hash(text, p1, p2)
2332
2332
2333 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2333 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2334
2334
2335 # If the flag processor modifies the revision data, ignore any provided
2335 # If the flag processor modifies the revision data, ignore any provided
2336 # cachedelta.
2336 # cachedelta.
2337 if rawtext != text:
2337 if rawtext != text:
2338 cachedelta = None
2338 cachedelta = None
2339
2339
2340 if len(rawtext) > _maxentrysize:
2340 if len(rawtext) > _maxentrysize:
2341 raise error.RevlogError(
2341 raise error.RevlogError(
2342 _(
2342 _(
2343 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2343 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2344 )
2344 )
2345 % (self.display_id, len(rawtext))
2345 % (self.display_id, len(rawtext))
2346 )
2346 )
2347
2347
2348 node = node or self.hash(rawtext, p1, p2)
2348 node = node or self.hash(rawtext, p1, p2)
2349 rev = self.index.get_rev(node)
2349 rev = self.index.get_rev(node)
2350 if rev is not None:
2350 if rev is not None:
2351 return rev
2351 return rev
2352
2352
2353 if validatehash:
2353 if validatehash:
2354 self.checkhash(rawtext, node, p1=p1, p2=p2)
2354 self.checkhash(rawtext, node, p1=p1, p2=p2)
2355
2355
2356 return self.addrawrevision(
2356 return self.addrawrevision(
2357 rawtext,
2357 rawtext,
2358 transaction,
2358 transaction,
2359 link,
2359 link,
2360 p1,
2360 p1,
2361 p2,
2361 p2,
2362 node,
2362 node,
2363 flags,
2363 flags,
2364 cachedelta=cachedelta,
2364 cachedelta=cachedelta,
2365 deltacomputer=deltacomputer,
2365 deltacomputer=deltacomputer,
2366 sidedata=sidedata,
2366 sidedata=sidedata,
2367 )
2367 )
2368
2368
2369 def addrawrevision(
2369 def addrawrevision(
2370 self,
2370 self,
2371 rawtext,
2371 rawtext,
2372 transaction,
2372 transaction,
2373 link,
2373 link,
2374 p1,
2374 p1,
2375 p2,
2375 p2,
2376 node,
2376 node,
2377 flags,
2377 flags,
2378 cachedelta=None,
2378 cachedelta=None,
2379 deltacomputer=None,
2379 deltacomputer=None,
2380 sidedata=None,
2380 sidedata=None,
2381 ):
2381 ):
2382 """add a raw revision with known flags, node and parents
2382 """add a raw revision with known flags, node and parents
2383 useful when reusing a revision not stored in this revlog (ex: received
2383 useful when reusing a revision not stored in this revlog (ex: received
2384 over wire, or read from an external bundle).
2384 over wire, or read from an external bundle).
2385 """
2385 """
2386 with self._writing(transaction):
2386 with self._writing(transaction):
2387 return self._addrevision(
2387 return self._addrevision(
2388 node,
2388 node,
2389 rawtext,
2389 rawtext,
2390 transaction,
2390 transaction,
2391 link,
2391 link,
2392 p1,
2392 p1,
2393 p2,
2393 p2,
2394 flags,
2394 flags,
2395 cachedelta,
2395 cachedelta,
2396 deltacomputer=deltacomputer,
2396 deltacomputer=deltacomputer,
2397 sidedata=sidedata,
2397 sidedata=sidedata,
2398 )
2398 )
2399
2399
2400 def compress(self, data):
2400 def compress(self, data):
2401 """Generate a possibly-compressed representation of data."""
2401 """Generate a possibly-compressed representation of data."""
2402 if not data:
2402 if not data:
2403 return b'', data
2403 return b'', data
2404
2404
2405 compressed = self._compressor.compress(data)
2405 compressed = self._compressor.compress(data)
2406
2406
2407 if compressed:
2407 if compressed:
2408 # The revlog compressor added the header in the returned data.
2408 # The revlog compressor added the header in the returned data.
2409 return b'', compressed
2409 return b'', compressed
2410
2410
2411 if data[0:1] == b'\0':
2411 if data[0:1] == b'\0':
2412 return b'', data
2412 return b'', data
2413 return b'u', data
2413 return b'u', data
2414
2414
2415 def decompress(self, data):
2415 def decompress(self, data):
2416 """Decompress a revlog chunk.
2416 """Decompress a revlog chunk.
2417
2417
2418 The chunk is expected to begin with a header identifying the
2418 The chunk is expected to begin with a header identifying the
2419 format type so it can be routed to an appropriate decompressor.
2419 format type so it can be routed to an appropriate decompressor.
2420 """
2420 """
2421 if not data:
2421 if not data:
2422 return data
2422 return data
2423
2423
2424 # Revlogs are read much more frequently than they are written and many
2424 # Revlogs are read much more frequently than they are written and many
2425 # chunks only take microseconds to decompress, so performance is
2425 # chunks only take microseconds to decompress, so performance is
2426 # important here.
2426 # important here.
2427 #
2427 #
2428 # We can make a few assumptions about revlogs:
2428 # We can make a few assumptions about revlogs:
2429 #
2429 #
2430 # 1) the majority of chunks will be compressed (as opposed to inline
2430 # 1) the majority of chunks will be compressed (as opposed to inline
2431 # raw data).
2431 # raw data).
2432 # 2) decompressing *any* data will likely by at least 10x slower than
2432 # 2) decompressing *any* data will likely by at least 10x slower than
2433 # returning raw inline data.
2433 # returning raw inline data.
2434 # 3) we want to prioritize common and officially supported compression
2434 # 3) we want to prioritize common and officially supported compression
2435 # engines
2435 # engines
2436 #
2436 #
2437 # It follows that we want to optimize for "decompress compressed data
2437 # It follows that we want to optimize for "decompress compressed data
2438 # when encoded with common and officially supported compression engines"
2438 # when encoded with common and officially supported compression engines"
2439 # case over "raw data" and "data encoded by less common or non-official
2439 # case over "raw data" and "data encoded by less common or non-official
2440 # compression engines." That is why we have the inline lookup first
2440 # compression engines." That is why we have the inline lookup first
2441 # followed by the compengines lookup.
2441 # followed by the compengines lookup.
2442 #
2442 #
2443 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2443 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2444 # compressed chunks. And this matters for changelog and manifest reads.
2444 # compressed chunks. And this matters for changelog and manifest reads.
2445 t = data[0:1]
2445 t = data[0:1]
2446
2446
2447 if t == b'x':
2447 if t == b'x':
2448 try:
2448 try:
2449 return _zlibdecompress(data)
2449 return _zlibdecompress(data)
2450 except zlib.error as e:
2450 except zlib.error as e:
2451 raise error.RevlogError(
2451 raise error.RevlogError(
2452 _(b'revlog decompress error: %s')
2452 _(b'revlog decompress error: %s')
2453 % stringutil.forcebytestr(e)
2453 % stringutil.forcebytestr(e)
2454 )
2454 )
2455 # '\0' is more common than 'u' so it goes first.
2455 # '\0' is more common than 'u' so it goes first.
2456 elif t == b'\0':
2456 elif t == b'\0':
2457 return data
2457 return data
2458 elif t == b'u':
2458 elif t == b'u':
2459 return util.buffer(data, 1)
2459 return util.buffer(data, 1)
2460
2460
2461 compressor = self._get_decompressor(t)
2461 compressor = self._get_decompressor(t)
2462
2462
2463 return compressor.decompress(data)
2463 return compressor.decompress(data)
2464
2464
2465 def _addrevision(
2465 def _addrevision(
2466 self,
2466 self,
2467 node,
2467 node,
2468 rawtext,
2468 rawtext,
2469 transaction,
2469 transaction,
2470 link,
2470 link,
2471 p1,
2471 p1,
2472 p2,
2472 p2,
2473 flags,
2473 flags,
2474 cachedelta,
2474 cachedelta,
2475 alwayscache=False,
2475 alwayscache=False,
2476 deltacomputer=None,
2476 deltacomputer=None,
2477 sidedata=None,
2477 sidedata=None,
2478 ):
2478 ):
2479 """internal function to add revisions to the log
2479 """internal function to add revisions to the log
2480
2480
2481 see addrevision for argument descriptions.
2481 see addrevision for argument descriptions.
2482
2482
2483 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2483 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2484
2484
2485 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2485 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2486 be used.
2486 be used.
2487
2487
2488 invariants:
2488 invariants:
2489 - rawtext is optional (can be None); if not set, cachedelta must be set.
2489 - rawtext is optional (can be None); if not set, cachedelta must be set.
2490 if both are set, they must correspond to each other.
2490 if both are set, they must correspond to each other.
2491 """
2491 """
2492 if node == self.nullid:
2492 if node == self.nullid:
2493 raise error.RevlogError(
2493 raise error.RevlogError(
2494 _(b"%s: attempt to add null revision") % self.display_id
2494 _(b"%s: attempt to add null revision") % self.display_id
2495 )
2495 )
2496 if (
2496 if (
2497 node == self.nodeconstants.wdirid
2497 node == self.nodeconstants.wdirid
2498 or node in self.nodeconstants.wdirfilenodeids
2498 or node in self.nodeconstants.wdirfilenodeids
2499 ):
2499 ):
2500 raise error.RevlogError(
2500 raise error.RevlogError(
2501 _(b"%s: attempt to add wdir revision") % self.display_id
2501 _(b"%s: attempt to add wdir revision") % self.display_id
2502 )
2502 )
2503 if self._writinghandles is None:
2503 if self._writinghandles is None:
2504 msg = b'adding revision outside `revlog._writing` context'
2504 msg = b'adding revision outside `revlog._writing` context'
2505 raise error.ProgrammingError(msg)
2505 raise error.ProgrammingError(msg)
2506
2506
2507 if self._inline:
2507 if self._inline:
2508 fh = self._writinghandles[0]
2508 fh = self._writinghandles[0]
2509 else:
2509 else:
2510 fh = self._writinghandles[1]
2510 fh = self._writinghandles[1]
2511
2511
2512 btext = [rawtext]
2512 btext = [rawtext]
2513
2513
2514 curr = len(self)
2514 curr = len(self)
2515 prev = curr - 1
2515 prev = curr - 1
2516
2516
2517 offset = self._get_data_offset(prev)
2517 offset = self._get_data_offset(prev)
2518
2518
2519 if self._concurrencychecker:
2519 if self._concurrencychecker:
2520 ifh, dfh = self._writinghandles
2520 ifh, dfh = self._writinghandles
2521 if self._inline:
2521 if self._inline:
2522 # offset is "as if" it were in the .d file, so we need to add on
2522 # offset is "as if" it were in the .d file, so we need to add on
2523 # the size of the entry metadata.
2523 # the size of the entry metadata.
2524 self._concurrencychecker(
2524 self._concurrencychecker(
2525 ifh, self._indexfile, offset + curr * self.index.entry_size
2525 ifh, self._indexfile, offset + curr * self.index.entry_size
2526 )
2526 )
2527 else:
2527 else:
2528 # Entries in the .i are a consistent size.
2528 # Entries in the .i are a consistent size.
2529 self._concurrencychecker(
2529 self._concurrencychecker(
2530 ifh, self._indexfile, curr * self.index.entry_size
2530 ifh, self._indexfile, curr * self.index.entry_size
2531 )
2531 )
2532 self._concurrencychecker(dfh, self._datafile, offset)
2532 self._concurrencychecker(dfh, self._datafile, offset)
2533
2533
2534 p1r, p2r = self.rev(p1), self.rev(p2)
2534 p1r, p2r = self.rev(p1), self.rev(p2)
2535
2535
2536 # full versions are inserted when the needed deltas
2536 # full versions are inserted when the needed deltas
2537 # become comparable to the uncompressed text
2537 # become comparable to the uncompressed text
2538 if rawtext is None:
2538 if rawtext is None:
2539 # need rawtext size, before changed by flag processors, which is
2539 # need rawtext size, before changed by flag processors, which is
2540 # the non-raw size. use revlog explicitly to avoid filelog's extra
2540 # the non-raw size. use revlog explicitly to avoid filelog's extra
2541 # logic that might remove metadata size.
2541 # logic that might remove metadata size.
2542 textlen = mdiff.patchedsize(
2542 textlen = mdiff.patchedsize(
2543 revlog.size(self, cachedelta[0]), cachedelta[1]
2543 revlog.size(self, cachedelta[0]), cachedelta[1]
2544 )
2544 )
2545 else:
2545 else:
2546 textlen = len(rawtext)
2546 textlen = len(rawtext)
2547
2547
2548 if deltacomputer is None:
2548 if deltacomputer is None:
2549 deltacomputer = deltautil.deltacomputer(self)
2549 deltacomputer = deltautil.deltacomputer(self)
2550
2550
2551 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2551 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2552
2552
2553 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2553 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2554
2554
2555 compression_mode = COMP_MODE_INLINE
2555 compression_mode = COMP_MODE_INLINE
2556 if self._docket is not None:
2556 if self._docket is not None:
2557 h, d = deltainfo.data
2557 h, d = deltainfo.data
2558 if not h and not d:
2558 if not h and not d:
2559 # not data to store at all... declare them uncompressed
2559 # not data to store at all... declare them uncompressed
2560 compression_mode = COMP_MODE_PLAIN
2560 compression_mode = COMP_MODE_PLAIN
2561 elif not h:
2561 elif not h:
2562 t = d[0:1]
2562 t = d[0:1]
2563 if t == b'\0':
2563 if t == b'\0':
2564 compression_mode = COMP_MODE_PLAIN
2564 compression_mode = COMP_MODE_PLAIN
2565 elif t == self._docket.default_compression_header:
2565 elif t == self._docket.default_compression_header:
2566 compression_mode = COMP_MODE_DEFAULT
2566 compression_mode = COMP_MODE_DEFAULT
2567 elif h == b'u':
2567 elif h == b'u':
2568 # we have a more efficient way to declare uncompressed
2568 # we have a more efficient way to declare uncompressed
2569 h = b''
2569 h = b''
2570 compression_mode = COMP_MODE_PLAIN
2570 compression_mode = COMP_MODE_PLAIN
2571 deltainfo = deltautil.drop_u_compression(deltainfo)
2571 deltainfo = deltautil.drop_u_compression(deltainfo)
2572
2572
2573 sidedata_compression_mode = COMP_MODE_INLINE
2573 sidedata_compression_mode = COMP_MODE_INLINE
2574 if sidedata and self.hassidedata:
2574 if sidedata and self.hassidedata:
2575 sidedata_compression_mode = COMP_MODE_PLAIN
2575 sidedata_compression_mode = COMP_MODE_PLAIN
2576 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2576 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2577 sidedata_offset = offset + deltainfo.deltalen
2577 sidedata_offset = offset + deltainfo.deltalen
2578 h, comp_sidedata = self.compress(serialized_sidedata)
2578 h, comp_sidedata = self.compress(serialized_sidedata)
2579 if (
2579 if (
2580 h != b'u'
2580 h != b'u'
2581 and comp_sidedata[0:1] != b'\0'
2581 and comp_sidedata[0:1] != b'\0'
2582 and len(comp_sidedata) < len(serialized_sidedata)
2582 and len(comp_sidedata) < len(serialized_sidedata)
2583 ):
2583 ):
2584 assert not h
2584 assert not h
2585 if (
2585 if (
2586 comp_sidedata[0:1]
2586 comp_sidedata[0:1]
2587 == self._docket.default_compression_header
2587 == self._docket.default_compression_header
2588 ):
2588 ):
2589 sidedata_compression_mode = COMP_MODE_DEFAULT
2589 sidedata_compression_mode = COMP_MODE_DEFAULT
2590 serialized_sidedata = comp_sidedata
2590 serialized_sidedata = comp_sidedata
2591 else:
2591 else:
2592 sidedata_compression_mode = COMP_MODE_INLINE
2592 sidedata_compression_mode = COMP_MODE_INLINE
2593 serialized_sidedata = comp_sidedata
2593 serialized_sidedata = comp_sidedata
2594 else:
2594 else:
2595 serialized_sidedata = b""
2595 serialized_sidedata = b""
2596 # Don't store the offset if the sidedata is empty, that way
2596 # Don't store the offset if the sidedata is empty, that way
2597 # we can easily detect empty sidedata and they will be no different
2597 # we can easily detect empty sidedata and they will be no different
2598 # than ones we manually add.
2598 # than ones we manually add.
2599 sidedata_offset = 0
2599 sidedata_offset = 0
2600
2600
2601 e = (
2601 e = (
2602 offset_type(offset, flags),
2602 offset_type(offset, flags),
2603 deltainfo.deltalen,
2603 deltainfo.deltalen,
2604 textlen,
2604 textlen,
2605 deltainfo.base,
2605 deltainfo.base,
2606 link,
2606 link,
2607 p1r,
2607 p1r,
2608 p2r,
2608 p2r,
2609 node,
2609 node,
2610 sidedata_offset,
2610 sidedata_offset,
2611 len(serialized_sidedata),
2611 len(serialized_sidedata),
2612 compression_mode,
2612 compression_mode,
2613 sidedata_compression_mode,
2613 sidedata_compression_mode,
2614 )
2614 )
2615
2615
2616 self.index.append(e)
2616 self.index.append(e)
2617 entry = self.index.entry_binary(curr)
2617 entry = self.index.entry_binary(curr)
2618 if curr == 0 and self._docket is None:
2618 if curr == 0 and self._docket is None:
2619 header = self._format_flags | self._format_version
2619 header = self._format_flags | self._format_version
2620 header = self.index.pack_header(header)
2620 header = self.index.pack_header(header)
2621 entry = header + entry
2621 entry = header + entry
2622 self._writeentry(
2622 self._writeentry(
2623 transaction,
2623 transaction,
2624 entry,
2624 entry,
2625 deltainfo.data,
2625 deltainfo.data,
2626 link,
2626 link,
2627 offset,
2627 offset,
2628 serialized_sidedata,
2628 serialized_sidedata,
2629 )
2629 )
2630
2630
2631 rawtext = btext[0]
2631 rawtext = btext[0]
2632
2632
2633 if alwayscache and rawtext is None:
2633 if alwayscache and rawtext is None:
2634 rawtext = deltacomputer.buildtext(revinfo, fh)
2634 rawtext = deltacomputer.buildtext(revinfo, fh)
2635
2635
2636 if type(rawtext) == bytes: # only accept immutable objects
2636 if type(rawtext) == bytes: # only accept immutable objects
2637 self._revisioncache = (node, curr, rawtext)
2637 self._revisioncache = (node, curr, rawtext)
2638 self._chainbasecache[curr] = deltainfo.chainbase
2638 self._chainbasecache[curr] = deltainfo.chainbase
2639 return curr
2639 return curr
2640
2640
2641 def _get_data_offset(self, prev):
2641 def _get_data_offset(self, prev):
2642 """Returns the current offset in the (in-transaction) data file.
2642 """Returns the current offset in the (in-transaction) data file.
2643 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2643 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2644 file to store that information: since sidedata can be rewritten to the
2644 file to store that information: since sidedata can be rewritten to the
2645 end of the data file within a transaction, you can have cases where, for
2645 end of the data file within a transaction, you can have cases where, for
2646 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2646 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2647 to `n - 1`'s sidedata being written after `n`'s data.
2647 to `n - 1`'s sidedata being written after `n`'s data.
2648
2648
2649 TODO cache this in a docket file before getting out of experimental."""
2649 TODO cache this in a docket file before getting out of experimental."""
2650 if self._docket is None:
2650 if self._docket is None:
2651 return self.end(prev)
2651 return self.end(prev)
2652 else:
2652 else:
2653 return self._docket.data_end
2653 return self._docket.data_end
2654
2654
2655 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2655 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2656 # Files opened in a+ mode have inconsistent behavior on various
2656 # Files opened in a+ mode have inconsistent behavior on various
2657 # platforms. Windows requires that a file positioning call be made
2657 # platforms. Windows requires that a file positioning call be made
2658 # when the file handle transitions between reads and writes. See
2658 # when the file handle transitions between reads and writes. See
2659 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2659 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2660 # platforms, Python or the platform itself can be buggy. Some versions
2660 # platforms, Python or the platform itself can be buggy. Some versions
2661 # of Solaris have been observed to not append at the end of the file
2661 # of Solaris have been observed to not append at the end of the file
2662 # if the file was seeked to before the end. See issue4943 for more.
2662 # if the file was seeked to before the end. See issue4943 for more.
2663 #
2663 #
2664 # We work around this issue by inserting a seek() before writing.
2664 # We work around this issue by inserting a seek() before writing.
2665 # Note: This is likely not necessary on Python 3. However, because
2665 # Note: This is likely not necessary on Python 3. However, because
2666 # the file handle is reused for reads and may be seeked there, we need
2666 # the file handle is reused for reads and may be seeked there, we need
2667 # to be careful before changing this.
2667 # to be careful before changing this.
2668 if self._writinghandles is None:
2668 if self._writinghandles is None:
2669 msg = b'adding revision outside `revlog._writing` context'
2669 msg = b'adding revision outside `revlog._writing` context'
2670 raise error.ProgrammingError(msg)
2670 raise error.ProgrammingError(msg)
2671 ifh, dfh = self._writinghandles
2671 ifh, dfh = self._writinghandles
2672 if self._docket is None:
2672 if self._docket is None:
2673 ifh.seek(0, os.SEEK_END)
2673 ifh.seek(0, os.SEEK_END)
2674 else:
2674 else:
2675 ifh.seek(self._docket.index_end, os.SEEK_SET)
2675 ifh.seek(self._docket.index_end, os.SEEK_SET)
2676 if dfh:
2676 if dfh:
2677 if self._docket is None:
2677 if self._docket is None:
2678 dfh.seek(0, os.SEEK_END)
2678 dfh.seek(0, os.SEEK_END)
2679 else:
2679 else:
2680 dfh.seek(self._docket.data_end, os.SEEK_SET)
2680 dfh.seek(self._docket.data_end, os.SEEK_SET)
2681
2681
2682 curr = len(self) - 1
2682 curr = len(self) - 1
2683 if not self._inline:
2683 if not self._inline:
2684 transaction.add(self._datafile, offset)
2684 transaction.add(self._datafile, offset)
2685 transaction.add(self._indexfile, curr * len(entry))
2685 transaction.add(self._indexfile, curr * len(entry))
2686 if data[0]:
2686 if data[0]:
2687 dfh.write(data[0])
2687 dfh.write(data[0])
2688 dfh.write(data[1])
2688 dfh.write(data[1])
2689 if sidedata:
2689 if sidedata:
2690 dfh.write(sidedata)
2690 dfh.write(sidedata)
2691 ifh.write(entry)
2691 ifh.write(entry)
2692 else:
2692 else:
2693 offset += curr * self.index.entry_size
2693 offset += curr * self.index.entry_size
2694 transaction.add(self._indexfile, offset)
2694 transaction.add(self._indexfile, offset)
2695 ifh.write(entry)
2695 ifh.write(entry)
2696 ifh.write(data[0])
2696 ifh.write(data[0])
2697 ifh.write(data[1])
2697 ifh.write(data[1])
2698 if sidedata:
2698 if sidedata:
2699 ifh.write(sidedata)
2699 ifh.write(sidedata)
2700 self._enforceinlinesize(transaction)
2700 self._enforceinlinesize(transaction)
2701 if self._docket is not None:
2701 if self._docket is not None:
2702 self._docket.index_end = self._writinghandles[0].tell()
2702 self._docket.index_end = self._writinghandles[0].tell()
2703 self._docket.data_end = self._writinghandles[1].tell()
2703 self._docket.data_end = self._writinghandles[1].tell()
2704
2704
2705 nodemaputil.setup_persistent_nodemap(transaction, self)
2705 nodemaputil.setup_persistent_nodemap(transaction, self)
2706
2706
2707 def addgroup(
2707 def addgroup(
2708 self,
2708 self,
2709 deltas,
2709 deltas,
2710 linkmapper,
2710 linkmapper,
2711 transaction,
2711 transaction,
2712 alwayscache=False,
2712 alwayscache=False,
2713 addrevisioncb=None,
2713 addrevisioncb=None,
2714 duplicaterevisioncb=None,
2714 duplicaterevisioncb=None,
2715 ):
2715 ):
2716 """
2716 """
2717 add a delta group
2717 add a delta group
2718
2718
2719 given a set of deltas, add them to the revision log. the
2719 given a set of deltas, add them to the revision log. the
2720 first delta is against its parent, which should be in our
2720 first delta is against its parent, which should be in our
2721 log, the rest are against the previous delta.
2721 log, the rest are against the previous delta.
2722
2722
2723 If ``addrevisioncb`` is defined, it will be called with arguments of
2723 If ``addrevisioncb`` is defined, it will be called with arguments of
2724 this revlog and the node that was added.
2724 this revlog and the node that was added.
2725 """
2725 """
2726
2726
2727 if self._adding_group:
2727 if self._adding_group:
2728 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2728 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2729
2729
2730 self._adding_group = True
2730 self._adding_group = True
2731 empty = True
2731 empty = True
2732 try:
2732 try:
2733 with self._writing(transaction):
2733 with self._writing(transaction):
2734 deltacomputer = deltautil.deltacomputer(self)
2734 deltacomputer = deltautil.deltacomputer(self)
2735 # loop through our set of deltas
2735 # loop through our set of deltas
2736 for data in deltas:
2736 for data in deltas:
2737 (
2737 (
2738 node,
2738 node,
2739 p1,
2739 p1,
2740 p2,
2740 p2,
2741 linknode,
2741 linknode,
2742 deltabase,
2742 deltabase,
2743 delta,
2743 delta,
2744 flags,
2744 flags,
2745 sidedata,
2745 sidedata,
2746 ) = data
2746 ) = data
2747 link = linkmapper(linknode)
2747 link = linkmapper(linknode)
2748 flags = flags or REVIDX_DEFAULT_FLAGS
2748 flags = flags or REVIDX_DEFAULT_FLAGS
2749
2749
2750 rev = self.index.get_rev(node)
2750 rev = self.index.get_rev(node)
2751 if rev is not None:
2751 if rev is not None:
2752 # this can happen if two branches make the same change
2752 # this can happen if two branches make the same change
2753 self._nodeduplicatecallback(transaction, rev)
2753 self._nodeduplicatecallback(transaction, rev)
2754 if duplicaterevisioncb:
2754 if duplicaterevisioncb:
2755 duplicaterevisioncb(self, rev)
2755 duplicaterevisioncb(self, rev)
2756 empty = False
2756 empty = False
2757 continue
2757 continue
2758
2758
2759 for p in (p1, p2):
2759 for p in (p1, p2):
2760 if not self.index.has_node(p):
2760 if not self.index.has_node(p):
2761 raise error.LookupError(
2761 raise error.LookupError(
2762 p, self.radix, _(b'unknown parent')
2762 p, self.radix, _(b'unknown parent')
2763 )
2763 )
2764
2764
2765 if not self.index.has_node(deltabase):
2765 if not self.index.has_node(deltabase):
2766 raise error.LookupError(
2766 raise error.LookupError(
2767 deltabase, self.display_id, _(b'unknown delta base')
2767 deltabase, self.display_id, _(b'unknown delta base')
2768 )
2768 )
2769
2769
2770 baserev = self.rev(deltabase)
2770 baserev = self.rev(deltabase)
2771
2771
2772 if baserev != nullrev and self.iscensored(baserev):
2772 if baserev != nullrev and self.iscensored(baserev):
2773 # if base is censored, delta must be full replacement in a
2773 # if base is censored, delta must be full replacement in a
2774 # single patch operation
2774 # single patch operation
2775 hlen = struct.calcsize(b">lll")
2775 hlen = struct.calcsize(b">lll")
2776 oldlen = self.rawsize(baserev)
2776 oldlen = self.rawsize(baserev)
2777 newlen = len(delta) - hlen
2777 newlen = len(delta) - hlen
2778 if delta[:hlen] != mdiff.replacediffheader(
2778 if delta[:hlen] != mdiff.replacediffheader(
2779 oldlen, newlen
2779 oldlen, newlen
2780 ):
2780 ):
2781 raise error.CensoredBaseError(
2781 raise error.CensoredBaseError(
2782 self.display_id, self.node(baserev)
2782 self.display_id, self.node(baserev)
2783 )
2783 )
2784
2784
2785 if not flags and self._peek_iscensored(baserev, delta):
2785 if not flags and self._peek_iscensored(baserev, delta):
2786 flags |= REVIDX_ISCENSORED
2786 flags |= REVIDX_ISCENSORED
2787
2787
2788 # We assume consumers of addrevisioncb will want to retrieve
2788 # We assume consumers of addrevisioncb will want to retrieve
2789 # the added revision, which will require a call to
2789 # the added revision, which will require a call to
2790 # revision(). revision() will fast path if there is a cache
2790 # revision(). revision() will fast path if there is a cache
2791 # hit. So, we tell _addrevision() to always cache in this case.
2791 # hit. So, we tell _addrevision() to always cache in this case.
2792 # We're only using addgroup() in the context of changegroup
2792 # We're only using addgroup() in the context of changegroup
2793 # generation so the revision data can always be handled as raw
2793 # generation so the revision data can always be handled as raw
2794 # by the flagprocessor.
2794 # by the flagprocessor.
2795 rev = self._addrevision(
2795 rev = self._addrevision(
2796 node,
2796 node,
2797 None,
2797 None,
2798 transaction,
2798 transaction,
2799 link,
2799 link,
2800 p1,
2800 p1,
2801 p2,
2801 p2,
2802 flags,
2802 flags,
2803 (baserev, delta),
2803 (baserev, delta),
2804 alwayscache=alwayscache,
2804 alwayscache=alwayscache,
2805 deltacomputer=deltacomputer,
2805 deltacomputer=deltacomputer,
2806 sidedata=sidedata,
2806 sidedata=sidedata,
2807 )
2807 )
2808
2808
2809 if addrevisioncb:
2809 if addrevisioncb:
2810 addrevisioncb(self, rev)
2810 addrevisioncb(self, rev)
2811 empty = False
2811 empty = False
2812 finally:
2812 finally:
2813 self._adding_group = False
2813 self._adding_group = False
2814 return not empty
2814 return not empty
2815
2815
2816 def iscensored(self, rev):
2816 def iscensored(self, rev):
2817 """Check if a file revision is censored."""
2817 """Check if a file revision is censored."""
2818 if not self._censorable:
2818 if not self._censorable:
2819 return False
2819 return False
2820
2820
2821 return self.flags(rev) & REVIDX_ISCENSORED
2821 return self.flags(rev) & REVIDX_ISCENSORED
2822
2822
2823 def _peek_iscensored(self, baserev, delta):
2823 def _peek_iscensored(self, baserev, delta):
2824 """Quickly check if a delta produces a censored revision."""
2824 """Quickly check if a delta produces a censored revision."""
2825 if not self._censorable:
2825 if not self._censorable:
2826 return False
2826 return False
2827
2827
2828 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2828 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2829
2829
2830 def getstrippoint(self, minlink):
2830 def getstrippoint(self, minlink):
2831 """find the minimum rev that must be stripped to strip the linkrev
2831 """find the minimum rev that must be stripped to strip the linkrev
2832
2832
2833 Returns a tuple containing the minimum rev and a set of all revs that
2833 Returns a tuple containing the minimum rev and a set of all revs that
2834 have linkrevs that will be broken by this strip.
2834 have linkrevs that will be broken by this strip.
2835 """
2835 """
2836 return storageutil.resolvestripinfo(
2836 return storageutil.resolvestripinfo(
2837 minlink,
2837 minlink,
2838 len(self) - 1,
2838 len(self) - 1,
2839 self.headrevs(),
2839 self.headrevs(),
2840 self.linkrev,
2840 self.linkrev,
2841 self.parentrevs,
2841 self.parentrevs,
2842 )
2842 )
2843
2843
2844 def strip(self, minlink, transaction):
2844 def strip(self, minlink, transaction):
2845 """truncate the revlog on the first revision with a linkrev >= minlink
2845 """truncate the revlog on the first revision with a linkrev >= minlink
2846
2846
2847 This function is called when we're stripping revision minlink and
2847 This function is called when we're stripping revision minlink and
2848 its descendants from the repository.
2848 its descendants from the repository.
2849
2849
2850 We have to remove all revisions with linkrev >= minlink, because
2850 We have to remove all revisions with linkrev >= minlink, because
2851 the equivalent changelog revisions will be renumbered after the
2851 the equivalent changelog revisions will be renumbered after the
2852 strip.
2852 strip.
2853
2853
2854 So we truncate the revlog on the first of these revisions, and
2854 So we truncate the revlog on the first of these revisions, and
2855 trust that the caller has saved the revisions that shouldn't be
2855 trust that the caller has saved the revisions that shouldn't be
2856 removed and that it'll re-add them after this truncation.
2856 removed and that it'll re-add them after this truncation.
2857 """
2857 """
2858 if len(self) == 0:
2858 if len(self) == 0:
2859 return
2859 return
2860
2860
2861 rev, _ = self.getstrippoint(minlink)
2861 rev, _ = self.getstrippoint(minlink)
2862 if rev == len(self):
2862 if rev == len(self):
2863 return
2863 return
2864
2864
2865 # first truncate the files on disk
2865 # first truncate the files on disk
2866 data_end = self.start(rev)
2866 data_end = self.start(rev)
2867 if not self._inline:
2867 if not self._inline:
2868 transaction.add(self._datafile, data_end)
2868 transaction.add(self._datafile, data_end)
2869 end = rev * self.index.entry_size
2869 end = rev * self.index.entry_size
2870 else:
2870 else:
2871 end = data_end + (rev * self.index.entry_size)
2871 end = data_end + (rev * self.index.entry_size)
2872
2872
2873 transaction.add(self._indexfile, end)
2873 transaction.add(self._indexfile, end)
2874 if self._docket is not None:
2874 if self._docket is not None:
2875 # XXX we could, leverage the docket while stripping. However it is
2875 # XXX we could, leverage the docket while stripping. However it is
2876 # not powerfull enough at the time of this comment
2876 # not powerfull enough at the time of this comment
2877 self._docket.index_end = end
2877 self._docket.index_end = end
2878 self._docket.data_end = data_end
2878 self._docket.data_end = data_end
2879 self._docket.write(transaction, stripping=True)
2879 self._docket.write(transaction, stripping=True)
2880
2880
2881 # then reset internal state in memory to forget those revisions
2881 # then reset internal state in memory to forget those revisions
2882 self._revisioncache = None
2882 self._revisioncache = None
2883 self._chaininfocache = util.lrucachedict(500)
2883 self._chaininfocache = util.lrucachedict(500)
2884 self._chunkclear()
2884 self._chunkclear()
2885
2885
2886 del self.index[rev:-1]
2886 del self.index[rev:-1]
2887
2887
2888 def checksize(self):
2888 def checksize(self):
2889 """Check size of index and data files
2889 """Check size of index and data files
2890
2890
2891 return a (dd, di) tuple.
2891 return a (dd, di) tuple.
2892 - dd: extra bytes for the "data" file
2892 - dd: extra bytes for the "data" file
2893 - di: extra bytes for the "index" file
2893 - di: extra bytes for the "index" file
2894
2894
2895 A healthy revlog will return (0, 0).
2895 A healthy revlog will return (0, 0).
2896 """
2896 """
2897 expected = 0
2897 expected = 0
2898 if len(self):
2898 if len(self):
2899 expected = max(0, self.end(len(self) - 1))
2899 expected = max(0, self.end(len(self) - 1))
2900
2900
2901 try:
2901 try:
2902 with self._datafp() as f:
2902 with self._datafp() as f:
2903 f.seek(0, io.SEEK_END)
2903 f.seek(0, io.SEEK_END)
2904 actual = f.tell()
2904 actual = f.tell()
2905 dd = actual - expected
2905 dd = actual - expected
2906 except IOError as inst:
2906 except IOError as inst:
2907 if inst.errno != errno.ENOENT:
2907 if inst.errno != errno.ENOENT:
2908 raise
2908 raise
2909 dd = 0
2909 dd = 0
2910
2910
2911 try:
2911 try:
2912 f = self.opener(self._indexfile)
2912 f = self.opener(self._indexfile)
2913 f.seek(0, io.SEEK_END)
2913 f.seek(0, io.SEEK_END)
2914 actual = f.tell()
2914 actual = f.tell()
2915 f.close()
2915 f.close()
2916 s = self.index.entry_size
2916 s = self.index.entry_size
2917 i = max(0, actual // s)
2917 i = max(0, actual // s)
2918 di = actual - (i * s)
2918 di = actual - (i * s)
2919 if self._inline:
2919 if self._inline:
2920 databytes = 0
2920 databytes = 0
2921 for r in self:
2921 for r in self:
2922 databytes += max(0, self.length(r))
2922 databytes += max(0, self.length(r))
2923 dd = 0
2923 dd = 0
2924 di = actual - len(self) * s - databytes
2924 di = actual - len(self) * s - databytes
2925 except IOError as inst:
2925 except IOError as inst:
2926 if inst.errno != errno.ENOENT:
2926 if inst.errno != errno.ENOENT:
2927 raise
2927 raise
2928 di = 0
2928 di = 0
2929
2929
2930 return (dd, di)
2930 return (dd, di)
2931
2931
2932 def files(self):
2932 def files(self):
2933 res = [self._indexfile]
2933 res = [self._indexfile]
2934 if not self._inline:
2934 if not self._inline:
2935 res.append(self._datafile)
2935 res.append(self._datafile)
2936 return res
2936 return res
2937
2937
2938 def emitrevisions(
2938 def emitrevisions(
2939 self,
2939 self,
2940 nodes,
2940 nodes,
2941 nodesorder=None,
2941 nodesorder=None,
2942 revisiondata=False,
2942 revisiondata=False,
2943 assumehaveparentrevisions=False,
2943 assumehaveparentrevisions=False,
2944 deltamode=repository.CG_DELTAMODE_STD,
2944 deltamode=repository.CG_DELTAMODE_STD,
2945 sidedata_helpers=None,
2945 sidedata_helpers=None,
2946 ):
2946 ):
2947 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2947 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2948 raise error.ProgrammingError(
2948 raise error.ProgrammingError(
2949 b'unhandled value for nodesorder: %s' % nodesorder
2949 b'unhandled value for nodesorder: %s' % nodesorder
2950 )
2950 )
2951
2951
2952 if nodesorder is None and not self._generaldelta:
2952 if nodesorder is None and not self._generaldelta:
2953 nodesorder = b'storage'
2953 nodesorder = b'storage'
2954
2954
2955 if (
2955 if (
2956 not self._storedeltachains
2956 not self._storedeltachains
2957 and deltamode != repository.CG_DELTAMODE_PREV
2957 and deltamode != repository.CG_DELTAMODE_PREV
2958 ):
2958 ):
2959 deltamode = repository.CG_DELTAMODE_FULL
2959 deltamode = repository.CG_DELTAMODE_FULL
2960
2960
2961 return storageutil.emitrevisions(
2961 return storageutil.emitrevisions(
2962 self,
2962 self,
2963 nodes,
2963 nodes,
2964 nodesorder,
2964 nodesorder,
2965 revlogrevisiondelta,
2965 revlogrevisiondelta,
2966 deltaparentfn=self.deltaparent,
2966 deltaparentfn=self.deltaparent,
2967 candeltafn=self.candelta,
2967 candeltafn=self.candelta,
2968 rawsizefn=self.rawsize,
2968 rawsizefn=self.rawsize,
2969 revdifffn=self.revdiff,
2969 revdifffn=self.revdiff,
2970 flagsfn=self.flags,
2970 flagsfn=self.flags,
2971 deltamode=deltamode,
2971 deltamode=deltamode,
2972 revisiondata=revisiondata,
2972 revisiondata=revisiondata,
2973 assumehaveparentrevisions=assumehaveparentrevisions,
2973 assumehaveparentrevisions=assumehaveparentrevisions,
2974 sidedata_helpers=sidedata_helpers,
2974 sidedata_helpers=sidedata_helpers,
2975 )
2975 )
2976
2976
2977 DELTAREUSEALWAYS = b'always'
2977 DELTAREUSEALWAYS = b'always'
2978 DELTAREUSESAMEREVS = b'samerevs'
2978 DELTAREUSESAMEREVS = b'samerevs'
2979 DELTAREUSENEVER = b'never'
2979 DELTAREUSENEVER = b'never'
2980
2980
2981 DELTAREUSEFULLADD = b'fulladd'
2981 DELTAREUSEFULLADD = b'fulladd'
2982
2982
2983 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2983 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2984
2984
2985 def clone(
2985 def clone(
2986 self,
2986 self,
2987 tr,
2987 tr,
2988 destrevlog,
2988 destrevlog,
2989 addrevisioncb=None,
2989 addrevisioncb=None,
2990 deltareuse=DELTAREUSESAMEREVS,
2990 deltareuse=DELTAREUSESAMEREVS,
2991 forcedeltabothparents=None,
2991 forcedeltabothparents=None,
2992 sidedata_helpers=None,
2992 sidedata_helpers=None,
2993 ):
2993 ):
2994 """Copy this revlog to another, possibly with format changes.
2994 """Copy this revlog to another, possibly with format changes.
2995
2995
2996 The destination revlog will contain the same revisions and nodes.
2996 The destination revlog will contain the same revisions and nodes.
2997 However, it may not be bit-for-bit identical due to e.g. delta encoding
2997 However, it may not be bit-for-bit identical due to e.g. delta encoding
2998 differences.
2998 differences.
2999
2999
3000 The ``deltareuse`` argument control how deltas from the existing revlog
3000 The ``deltareuse`` argument control how deltas from the existing revlog
3001 are preserved in the destination revlog. The argument can have the
3001 are preserved in the destination revlog. The argument can have the
3002 following values:
3002 following values:
3003
3003
3004 DELTAREUSEALWAYS
3004 DELTAREUSEALWAYS
3005 Deltas will always be reused (if possible), even if the destination
3005 Deltas will always be reused (if possible), even if the destination
3006 revlog would not select the same revisions for the delta. This is the
3006 revlog would not select the same revisions for the delta. This is the
3007 fastest mode of operation.
3007 fastest mode of operation.
3008 DELTAREUSESAMEREVS
3008 DELTAREUSESAMEREVS
3009 Deltas will be reused if the destination revlog would pick the same
3009 Deltas will be reused if the destination revlog would pick the same
3010 revisions for the delta. This mode strikes a balance between speed
3010 revisions for the delta. This mode strikes a balance between speed
3011 and optimization.
3011 and optimization.
3012 DELTAREUSENEVER
3012 DELTAREUSENEVER
3013 Deltas will never be reused. This is the slowest mode of execution.
3013 Deltas will never be reused. This is the slowest mode of execution.
3014 This mode can be used to recompute deltas (e.g. if the diff/delta
3014 This mode can be used to recompute deltas (e.g. if the diff/delta
3015 algorithm changes).
3015 algorithm changes).
3016 DELTAREUSEFULLADD
3016 DELTAREUSEFULLADD
3017 Revision will be re-added as if their were new content. This is
3017 Revision will be re-added as if their were new content. This is
3018 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3018 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3019 eg: large file detection and handling.
3019 eg: large file detection and handling.
3020
3020
3021 Delta computation can be slow, so the choice of delta reuse policy can
3021 Delta computation can be slow, so the choice of delta reuse policy can
3022 significantly affect run time.
3022 significantly affect run time.
3023
3023
3024 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3024 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3025 two extremes. Deltas will be reused if they are appropriate. But if the
3025 two extremes. Deltas will be reused if they are appropriate. But if the
3026 delta could choose a better revision, it will do so. This means if you
3026 delta could choose a better revision, it will do so. This means if you
3027 are converting a non-generaldelta revlog to a generaldelta revlog,
3027 are converting a non-generaldelta revlog to a generaldelta revlog,
3028 deltas will be recomputed if the delta's parent isn't a parent of the
3028 deltas will be recomputed if the delta's parent isn't a parent of the
3029 revision.
3029 revision.
3030
3030
3031 In addition to the delta policy, the ``forcedeltabothparents``
3031 In addition to the delta policy, the ``forcedeltabothparents``
3032 argument controls whether to force compute deltas against both parents
3032 argument controls whether to force compute deltas against both parents
3033 for merges. By default, the current default is used.
3033 for merges. By default, the current default is used.
3034
3034
3035 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3035 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3036 `sidedata_helpers`.
3036 `sidedata_helpers`.
3037 """
3037 """
3038 if deltareuse not in self.DELTAREUSEALL:
3038 if deltareuse not in self.DELTAREUSEALL:
3039 raise ValueError(
3039 raise ValueError(
3040 _(b'value for deltareuse invalid: %s') % deltareuse
3040 _(b'value for deltareuse invalid: %s') % deltareuse
3041 )
3041 )
3042
3042
3043 if len(destrevlog):
3043 if len(destrevlog):
3044 raise ValueError(_(b'destination revlog is not empty'))
3044 raise ValueError(_(b'destination revlog is not empty'))
3045
3045
3046 if getattr(self, 'filteredrevs', None):
3046 if getattr(self, 'filteredrevs', None):
3047 raise ValueError(_(b'source revlog has filtered revisions'))
3047 raise ValueError(_(b'source revlog has filtered revisions'))
3048 if getattr(destrevlog, 'filteredrevs', None):
3048 if getattr(destrevlog, 'filteredrevs', None):
3049 raise ValueError(_(b'destination revlog has filtered revisions'))
3049 raise ValueError(_(b'destination revlog has filtered revisions'))
3050
3050
3051 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3051 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3052 # if possible.
3052 # if possible.
3053 oldlazydelta = destrevlog._lazydelta
3053 oldlazydelta = destrevlog._lazydelta
3054 oldlazydeltabase = destrevlog._lazydeltabase
3054 oldlazydeltabase = destrevlog._lazydeltabase
3055 oldamd = destrevlog._deltabothparents
3055 oldamd = destrevlog._deltabothparents
3056
3056
3057 try:
3057 try:
3058 if deltareuse == self.DELTAREUSEALWAYS:
3058 if deltareuse == self.DELTAREUSEALWAYS:
3059 destrevlog._lazydeltabase = True
3059 destrevlog._lazydeltabase = True
3060 destrevlog._lazydelta = True
3060 destrevlog._lazydelta = True
3061 elif deltareuse == self.DELTAREUSESAMEREVS:
3061 elif deltareuse == self.DELTAREUSESAMEREVS:
3062 destrevlog._lazydeltabase = False
3062 destrevlog._lazydeltabase = False
3063 destrevlog._lazydelta = True
3063 destrevlog._lazydelta = True
3064 elif deltareuse == self.DELTAREUSENEVER:
3064 elif deltareuse == self.DELTAREUSENEVER:
3065 destrevlog._lazydeltabase = False
3065 destrevlog._lazydeltabase = False
3066 destrevlog._lazydelta = False
3066 destrevlog._lazydelta = False
3067
3067
3068 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3068 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3069
3069
3070 self._clone(
3070 self._clone(
3071 tr,
3071 tr,
3072 destrevlog,
3072 destrevlog,
3073 addrevisioncb,
3073 addrevisioncb,
3074 deltareuse,
3074 deltareuse,
3075 forcedeltabothparents,
3075 forcedeltabothparents,
3076 sidedata_helpers,
3076 sidedata_helpers,
3077 )
3077 )
3078
3078
3079 finally:
3079 finally:
3080 destrevlog._lazydelta = oldlazydelta
3080 destrevlog._lazydelta = oldlazydelta
3081 destrevlog._lazydeltabase = oldlazydeltabase
3081 destrevlog._lazydeltabase = oldlazydeltabase
3082 destrevlog._deltabothparents = oldamd
3082 destrevlog._deltabothparents = oldamd
3083
3083
3084 def _clone(
3084 def _clone(
3085 self,
3085 self,
3086 tr,
3086 tr,
3087 destrevlog,
3087 destrevlog,
3088 addrevisioncb,
3088 addrevisioncb,
3089 deltareuse,
3089 deltareuse,
3090 forcedeltabothparents,
3090 forcedeltabothparents,
3091 sidedata_helpers,
3091 sidedata_helpers,
3092 ):
3092 ):
3093 """perform the core duty of `revlog.clone` after parameter processing"""
3093 """perform the core duty of `revlog.clone` after parameter processing"""
3094 deltacomputer = deltautil.deltacomputer(destrevlog)
3094 deltacomputer = deltautil.deltacomputer(destrevlog)
3095 index = self.index
3095 index = self.index
3096 for rev in self:
3096 for rev in self:
3097 entry = index[rev]
3097 entry = index[rev]
3098
3098
3099 # Some classes override linkrev to take filtered revs into
3099 # Some classes override linkrev to take filtered revs into
3100 # account. Use raw entry from index.
3100 # account. Use raw entry from index.
3101 flags = entry[0] & 0xFFFF
3101 flags = entry[0] & 0xFFFF
3102 linkrev = entry[4]
3102 linkrev = entry[4]
3103 p1 = index[entry[5]][7]
3103 p1 = index[entry[5]][7]
3104 p2 = index[entry[6]][7]
3104 p2 = index[entry[6]][7]
3105 node = entry[7]
3105 node = entry[7]
3106
3106
3107 # (Possibly) reuse the delta from the revlog if allowed and
3107 # (Possibly) reuse the delta from the revlog if allowed and
3108 # the revlog chunk is a delta.
3108 # the revlog chunk is a delta.
3109 cachedelta = None
3109 cachedelta = None
3110 rawtext = None
3110 rawtext = None
3111 if deltareuse == self.DELTAREUSEFULLADD:
3111 if deltareuse == self.DELTAREUSEFULLADD:
3112 text, sidedata = self._revisiondata(rev)
3112 text = self._revisiondata(rev)[0]
3113 sidedata = self.sidedata(rev)
3113
3114
3114 if sidedata_helpers is not None:
3115 if sidedata_helpers is not None:
3115 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3116 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3116 self, sidedata_helpers, sidedata, rev
3117 self, sidedata_helpers, sidedata, rev
3117 )
3118 )
3118 flags = flags | new_flags[0] & ~new_flags[1]
3119 flags = flags | new_flags[0] & ~new_flags[1]
3119
3120
3120 destrevlog.addrevision(
3121 destrevlog.addrevision(
3121 text,
3122 text,
3122 tr,
3123 tr,
3123 linkrev,
3124 linkrev,
3124 p1,
3125 p1,
3125 p2,
3126 p2,
3126 cachedelta=cachedelta,
3127 cachedelta=cachedelta,
3127 node=node,
3128 node=node,
3128 flags=flags,
3129 flags=flags,
3129 deltacomputer=deltacomputer,
3130 deltacomputer=deltacomputer,
3130 sidedata=sidedata,
3131 sidedata=sidedata,
3131 )
3132 )
3132 else:
3133 else:
3133 if destrevlog._lazydelta:
3134 if destrevlog._lazydelta:
3134 dp = self.deltaparent(rev)
3135 dp = self.deltaparent(rev)
3135 if dp != nullrev:
3136 if dp != nullrev:
3136 cachedelta = (dp, bytes(self._chunk(rev)))
3137 cachedelta = (dp, bytes(self._chunk(rev)))
3137
3138
3138 sidedata = None
3139 sidedata = None
3139 if not cachedelta:
3140 if not cachedelta:
3140 rawtext, sidedata = self._revisiondata(rev)
3141 rawtext = self._revisiondata(rev)[0]
3142 sidedata = self.sidedata(rev)
3141 if sidedata is None:
3143 if sidedata is None:
3142 sidedata = self.sidedata(rev)
3144 sidedata = self.sidedata(rev)
3143
3145
3144 if sidedata_helpers is not None:
3146 if sidedata_helpers is not None:
3145 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3147 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3146 self, sidedata_helpers, sidedata, rev
3148 self, sidedata_helpers, sidedata, rev
3147 )
3149 )
3148 flags = flags | new_flags[0] & ~new_flags[1]
3150 flags = flags | new_flags[0] & ~new_flags[1]
3149
3151
3150 with destrevlog._writing(tr):
3152 with destrevlog._writing(tr):
3151 destrevlog._addrevision(
3153 destrevlog._addrevision(
3152 node,
3154 node,
3153 rawtext,
3155 rawtext,
3154 tr,
3156 tr,
3155 linkrev,
3157 linkrev,
3156 p1,
3158 p1,
3157 p2,
3159 p2,
3158 flags,
3160 flags,
3159 cachedelta,
3161 cachedelta,
3160 deltacomputer=deltacomputer,
3162 deltacomputer=deltacomputer,
3161 sidedata=sidedata,
3163 sidedata=sidedata,
3162 )
3164 )
3163
3165
3164 if addrevisioncb:
3166 if addrevisioncb:
3165 addrevisioncb(self, rev, node)
3167 addrevisioncb(self, rev, node)
3166
3168
3167 def censorrevision(self, tr, censornode, tombstone=b''):
3169 def censorrevision(self, tr, censornode, tombstone=b''):
3168 if self._format_version == REVLOGV0:
3170 if self._format_version == REVLOGV0:
3169 raise error.RevlogError(
3171 raise error.RevlogError(
3170 _(b'cannot censor with version %d revlogs')
3172 _(b'cannot censor with version %d revlogs')
3171 % self._format_version
3173 % self._format_version
3172 )
3174 )
3173
3175
3174 censorrev = self.rev(censornode)
3176 censorrev = self.rev(censornode)
3175 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3177 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3176
3178
3177 if len(tombstone) > self.rawsize(censorrev):
3179 if len(tombstone) > self.rawsize(censorrev):
3178 raise error.Abort(
3180 raise error.Abort(
3179 _(b'censor tombstone must be no longer than censored data')
3181 _(b'censor tombstone must be no longer than censored data')
3180 )
3182 )
3181
3183
3182 # Rewriting the revlog in place is hard. Our strategy for censoring is
3184 # Rewriting the revlog in place is hard. Our strategy for censoring is
3183 # to create a new revlog, copy all revisions to it, then replace the
3185 # to create a new revlog, copy all revisions to it, then replace the
3184 # revlogs on transaction close.
3186 # revlogs on transaction close.
3185 #
3187 #
3186 # This is a bit dangerous. We could easily have a mismatch of state.
3188 # This is a bit dangerous. We could easily have a mismatch of state.
3187 newrl = revlog(
3189 newrl = revlog(
3188 self.opener,
3190 self.opener,
3189 target=self.target,
3191 target=self.target,
3190 radix=self.radix,
3192 radix=self.radix,
3191 postfix=b'tmpcensored',
3193 postfix=b'tmpcensored',
3192 censorable=True,
3194 censorable=True,
3193 )
3195 )
3194 newrl._format_version = self._format_version
3196 newrl._format_version = self._format_version
3195 newrl._format_flags = self._format_flags
3197 newrl._format_flags = self._format_flags
3196 newrl._generaldelta = self._generaldelta
3198 newrl._generaldelta = self._generaldelta
3197 newrl._parse_index = self._parse_index
3199 newrl._parse_index = self._parse_index
3198
3200
3199 for rev in self.revs():
3201 for rev in self.revs():
3200 node = self.node(rev)
3202 node = self.node(rev)
3201 p1, p2 = self.parents(node)
3203 p1, p2 = self.parents(node)
3202
3204
3203 if rev == censorrev:
3205 if rev == censorrev:
3204 newrl.addrawrevision(
3206 newrl.addrawrevision(
3205 tombstone,
3207 tombstone,
3206 tr,
3208 tr,
3207 self.linkrev(censorrev),
3209 self.linkrev(censorrev),
3208 p1,
3210 p1,
3209 p2,
3211 p2,
3210 censornode,
3212 censornode,
3211 REVIDX_ISCENSORED,
3213 REVIDX_ISCENSORED,
3212 )
3214 )
3213
3215
3214 if newrl.deltaparent(rev) != nullrev:
3216 if newrl.deltaparent(rev) != nullrev:
3215 raise error.Abort(
3217 raise error.Abort(
3216 _(
3218 _(
3217 b'censored revision stored as delta; '
3219 b'censored revision stored as delta; '
3218 b'cannot censor'
3220 b'cannot censor'
3219 ),
3221 ),
3220 hint=_(
3222 hint=_(
3221 b'censoring of revlogs is not '
3223 b'censoring of revlogs is not '
3222 b'fully implemented; please report '
3224 b'fully implemented; please report '
3223 b'this bug'
3225 b'this bug'
3224 ),
3226 ),
3225 )
3227 )
3226 continue
3228 continue
3227
3229
3228 if self.iscensored(rev):
3230 if self.iscensored(rev):
3229 if self.deltaparent(rev) != nullrev:
3231 if self.deltaparent(rev) != nullrev:
3230 raise error.Abort(
3232 raise error.Abort(
3231 _(
3233 _(
3232 b'cannot censor due to censored '
3234 b'cannot censor due to censored '
3233 b'revision having delta stored'
3235 b'revision having delta stored'
3234 )
3236 )
3235 )
3237 )
3236 rawtext = self._chunk(rev)
3238 rawtext = self._chunk(rev)
3237 else:
3239 else:
3238 rawtext = self.rawdata(rev)
3240 rawtext = self.rawdata(rev)
3239
3241
3240 newrl.addrawrevision(
3242 newrl.addrawrevision(
3241 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3243 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3242 )
3244 )
3243
3245
3244 tr.addbackup(self._indexfile, location=b'store')
3246 tr.addbackup(self._indexfile, location=b'store')
3245 if not self._inline:
3247 if not self._inline:
3246 tr.addbackup(self._datafile, location=b'store')
3248 tr.addbackup(self._datafile, location=b'store')
3247
3249
3248 self.opener.rename(newrl._indexfile, self._indexfile)
3250 self.opener.rename(newrl._indexfile, self._indexfile)
3249 if not self._inline:
3251 if not self._inline:
3250 self.opener.rename(newrl._datafile, self._datafile)
3252 self.opener.rename(newrl._datafile, self._datafile)
3251
3253
3252 self.clearcaches()
3254 self.clearcaches()
3253 self._loadindex()
3255 self._loadindex()
3254
3256
3255 def verifyintegrity(self, state):
3257 def verifyintegrity(self, state):
3256 """Verifies the integrity of the revlog.
3258 """Verifies the integrity of the revlog.
3257
3259
3258 Yields ``revlogproblem`` instances describing problems that are
3260 Yields ``revlogproblem`` instances describing problems that are
3259 found.
3261 found.
3260 """
3262 """
3261 dd, di = self.checksize()
3263 dd, di = self.checksize()
3262 if dd:
3264 if dd:
3263 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3265 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3264 if di:
3266 if di:
3265 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3267 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3266
3268
3267 version = self._format_version
3269 version = self._format_version
3268
3270
3269 # The verifier tells us what version revlog we should be.
3271 # The verifier tells us what version revlog we should be.
3270 if version != state[b'expectedversion']:
3272 if version != state[b'expectedversion']:
3271 yield revlogproblem(
3273 yield revlogproblem(
3272 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3274 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3273 % (self.display_id, version, state[b'expectedversion'])
3275 % (self.display_id, version, state[b'expectedversion'])
3274 )
3276 )
3275
3277
3276 state[b'skipread'] = set()
3278 state[b'skipread'] = set()
3277 state[b'safe_renamed'] = set()
3279 state[b'safe_renamed'] = set()
3278
3280
3279 for rev in self:
3281 for rev in self:
3280 node = self.node(rev)
3282 node = self.node(rev)
3281
3283
3282 # Verify contents. 4 cases to care about:
3284 # Verify contents. 4 cases to care about:
3283 #
3285 #
3284 # common: the most common case
3286 # common: the most common case
3285 # rename: with a rename
3287 # rename: with a rename
3286 # meta: file content starts with b'\1\n', the metadata
3288 # meta: file content starts with b'\1\n', the metadata
3287 # header defined in filelog.py, but without a rename
3289 # header defined in filelog.py, but without a rename
3288 # ext: content stored externally
3290 # ext: content stored externally
3289 #
3291 #
3290 # More formally, their differences are shown below:
3292 # More formally, their differences are shown below:
3291 #
3293 #
3292 # | common | rename | meta | ext
3294 # | common | rename | meta | ext
3293 # -------------------------------------------------------
3295 # -------------------------------------------------------
3294 # flags() | 0 | 0 | 0 | not 0
3296 # flags() | 0 | 0 | 0 | not 0
3295 # renamed() | False | True | False | ?
3297 # renamed() | False | True | False | ?
3296 # rawtext[0:2]=='\1\n'| False | True | True | ?
3298 # rawtext[0:2]=='\1\n'| False | True | True | ?
3297 #
3299 #
3298 # "rawtext" means the raw text stored in revlog data, which
3300 # "rawtext" means the raw text stored in revlog data, which
3299 # could be retrieved by "rawdata(rev)". "text"
3301 # could be retrieved by "rawdata(rev)". "text"
3300 # mentioned below is "revision(rev)".
3302 # mentioned below is "revision(rev)".
3301 #
3303 #
3302 # There are 3 different lengths stored physically:
3304 # There are 3 different lengths stored physically:
3303 # 1. L1: rawsize, stored in revlog index
3305 # 1. L1: rawsize, stored in revlog index
3304 # 2. L2: len(rawtext), stored in revlog data
3306 # 2. L2: len(rawtext), stored in revlog data
3305 # 3. L3: len(text), stored in revlog data if flags==0, or
3307 # 3. L3: len(text), stored in revlog data if flags==0, or
3306 # possibly somewhere else if flags!=0
3308 # possibly somewhere else if flags!=0
3307 #
3309 #
3308 # L1 should be equal to L2. L3 could be different from them.
3310 # L1 should be equal to L2. L3 could be different from them.
3309 # "text" may or may not affect commit hash depending on flag
3311 # "text" may or may not affect commit hash depending on flag
3310 # processors (see flagutil.addflagprocessor).
3312 # processors (see flagutil.addflagprocessor).
3311 #
3313 #
3312 # | common | rename | meta | ext
3314 # | common | rename | meta | ext
3313 # -------------------------------------------------
3315 # -------------------------------------------------
3314 # rawsize() | L1 | L1 | L1 | L1
3316 # rawsize() | L1 | L1 | L1 | L1
3315 # size() | L1 | L2-LM | L1(*) | L1 (?)
3317 # size() | L1 | L2-LM | L1(*) | L1 (?)
3316 # len(rawtext) | L2 | L2 | L2 | L2
3318 # len(rawtext) | L2 | L2 | L2 | L2
3317 # len(text) | L2 | L2 | L2 | L3
3319 # len(text) | L2 | L2 | L2 | L3
3318 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3320 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3319 #
3321 #
3320 # LM: length of metadata, depending on rawtext
3322 # LM: length of metadata, depending on rawtext
3321 # (*): not ideal, see comment in filelog.size
3323 # (*): not ideal, see comment in filelog.size
3322 # (?): could be "- len(meta)" if the resolved content has
3324 # (?): could be "- len(meta)" if the resolved content has
3323 # rename metadata
3325 # rename metadata
3324 #
3326 #
3325 # Checks needed to be done:
3327 # Checks needed to be done:
3326 # 1. length check: L1 == L2, in all cases.
3328 # 1. length check: L1 == L2, in all cases.
3327 # 2. hash check: depending on flag processor, we may need to
3329 # 2. hash check: depending on flag processor, we may need to
3328 # use either "text" (external), or "rawtext" (in revlog).
3330 # use either "text" (external), or "rawtext" (in revlog).
3329
3331
3330 try:
3332 try:
3331 skipflags = state.get(b'skipflags', 0)
3333 skipflags = state.get(b'skipflags', 0)
3332 if skipflags:
3334 if skipflags:
3333 skipflags &= self.flags(rev)
3335 skipflags &= self.flags(rev)
3334
3336
3335 _verify_revision(self, skipflags, state, node)
3337 _verify_revision(self, skipflags, state, node)
3336
3338
3337 l1 = self.rawsize(rev)
3339 l1 = self.rawsize(rev)
3338 l2 = len(self.rawdata(node))
3340 l2 = len(self.rawdata(node))
3339
3341
3340 if l1 != l2:
3342 if l1 != l2:
3341 yield revlogproblem(
3343 yield revlogproblem(
3342 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3344 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3343 node=node,
3345 node=node,
3344 )
3346 )
3345
3347
3346 except error.CensoredNodeError:
3348 except error.CensoredNodeError:
3347 if state[b'erroroncensored']:
3349 if state[b'erroroncensored']:
3348 yield revlogproblem(
3350 yield revlogproblem(
3349 error=_(b'censored file data'), node=node
3351 error=_(b'censored file data'), node=node
3350 )
3352 )
3351 state[b'skipread'].add(node)
3353 state[b'skipread'].add(node)
3352 except Exception as e:
3354 except Exception as e:
3353 yield revlogproblem(
3355 yield revlogproblem(
3354 error=_(b'unpacking %s: %s')
3356 error=_(b'unpacking %s: %s')
3355 % (short(node), stringutil.forcebytestr(e)),
3357 % (short(node), stringutil.forcebytestr(e)),
3356 node=node,
3358 node=node,
3357 )
3359 )
3358 state[b'skipread'].add(node)
3360 state[b'skipread'].add(node)
3359
3361
3360 def storageinfo(
3362 def storageinfo(
3361 self,
3363 self,
3362 exclusivefiles=False,
3364 exclusivefiles=False,
3363 sharedfiles=False,
3365 sharedfiles=False,
3364 revisionscount=False,
3366 revisionscount=False,
3365 trackedsize=False,
3367 trackedsize=False,
3366 storedsize=False,
3368 storedsize=False,
3367 ):
3369 ):
3368 d = {}
3370 d = {}
3369
3371
3370 if exclusivefiles:
3372 if exclusivefiles:
3371 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3373 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3372 if not self._inline:
3374 if not self._inline:
3373 d[b'exclusivefiles'].append((self.opener, self._datafile))
3375 d[b'exclusivefiles'].append((self.opener, self._datafile))
3374
3376
3375 if sharedfiles:
3377 if sharedfiles:
3376 d[b'sharedfiles'] = []
3378 d[b'sharedfiles'] = []
3377
3379
3378 if revisionscount:
3380 if revisionscount:
3379 d[b'revisionscount'] = len(self)
3381 d[b'revisionscount'] = len(self)
3380
3382
3381 if trackedsize:
3383 if trackedsize:
3382 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3384 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3383
3385
3384 if storedsize:
3386 if storedsize:
3385 d[b'storedsize'] = sum(
3387 d[b'storedsize'] = sum(
3386 self.opener.stat(path).st_size for path in self.files()
3388 self.opener.stat(path).st_size for path in self.files()
3387 )
3389 )
3388
3390
3389 return d
3391 return d
3390
3392
3391 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3393 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3392 if not self.hassidedata:
3394 if not self.hassidedata:
3393 return
3395 return
3394 # revlog formats with sidedata support does not support inline
3396 # revlog formats with sidedata support does not support inline
3395 assert not self._inline
3397 assert not self._inline
3396 if not helpers[1] and not helpers[2]:
3398 if not helpers[1] and not helpers[2]:
3397 # Nothing to generate or remove
3399 # Nothing to generate or remove
3398 return
3400 return
3399
3401
3400 new_entries = []
3402 new_entries = []
3401 # append the new sidedata
3403 # append the new sidedata
3402 with self._writing(transaction):
3404 with self._writing(transaction):
3403 ifh, dfh = self._writinghandles
3405 ifh, dfh = self._writinghandles
3404 if self._docket is not None:
3406 if self._docket is not None:
3405 dfh.seek(self._docket.data_end, os.SEEK_SET)
3407 dfh.seek(self._docket.data_end, os.SEEK_SET)
3406 else:
3408 else:
3407 dfh.seek(0, os.SEEK_END)
3409 dfh.seek(0, os.SEEK_END)
3408
3410
3409 current_offset = dfh.tell()
3411 current_offset = dfh.tell()
3410 for rev in range(startrev, endrev + 1):
3412 for rev in range(startrev, endrev + 1):
3411 entry = self.index[rev]
3413 entry = self.index[rev]
3412 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3414 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3413 store=self,
3415 store=self,
3414 sidedata_helpers=helpers,
3416 sidedata_helpers=helpers,
3415 sidedata={},
3417 sidedata={},
3416 rev=rev,
3418 rev=rev,
3417 )
3419 )
3418
3420
3419 serialized_sidedata = sidedatautil.serialize_sidedata(
3421 serialized_sidedata = sidedatautil.serialize_sidedata(
3420 new_sidedata
3422 new_sidedata
3421 )
3423 )
3422
3424
3423 sidedata_compression_mode = COMP_MODE_INLINE
3425 sidedata_compression_mode = COMP_MODE_INLINE
3424 if serialized_sidedata and self.hassidedata:
3426 if serialized_sidedata and self.hassidedata:
3425 sidedata_compression_mode = COMP_MODE_PLAIN
3427 sidedata_compression_mode = COMP_MODE_PLAIN
3426 h, comp_sidedata = self.compress(serialized_sidedata)
3428 h, comp_sidedata = self.compress(serialized_sidedata)
3427 if (
3429 if (
3428 h != b'u'
3430 h != b'u'
3429 and comp_sidedata[0] != b'\0'
3431 and comp_sidedata[0] != b'\0'
3430 and len(comp_sidedata) < len(serialized_sidedata)
3432 and len(comp_sidedata) < len(serialized_sidedata)
3431 ):
3433 ):
3432 assert not h
3434 assert not h
3433 if (
3435 if (
3434 comp_sidedata[0]
3436 comp_sidedata[0]
3435 == self._docket.default_compression_header
3437 == self._docket.default_compression_header
3436 ):
3438 ):
3437 sidedata_compression_mode = COMP_MODE_DEFAULT
3439 sidedata_compression_mode = COMP_MODE_DEFAULT
3438 serialized_sidedata = comp_sidedata
3440 serialized_sidedata = comp_sidedata
3439 else:
3441 else:
3440 sidedata_compression_mode = COMP_MODE_INLINE
3442 sidedata_compression_mode = COMP_MODE_INLINE
3441 serialized_sidedata = comp_sidedata
3443 serialized_sidedata = comp_sidedata
3442 if entry[8] != 0 or entry[9] != 0:
3444 if entry[8] != 0 or entry[9] != 0:
3443 # rewriting entries that already have sidedata is not
3445 # rewriting entries that already have sidedata is not
3444 # supported yet, because it introduces garbage data in the
3446 # supported yet, because it introduces garbage data in the
3445 # revlog.
3447 # revlog.
3446 msg = b"rewriting existing sidedata is not supported yet"
3448 msg = b"rewriting existing sidedata is not supported yet"
3447 raise error.Abort(msg)
3449 raise error.Abort(msg)
3448
3450
3449 # Apply (potential) flags to add and to remove after running
3451 # Apply (potential) flags to add and to remove after running
3450 # the sidedata helpers
3452 # the sidedata helpers
3451 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3453 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3452 entry_update = (
3454 entry_update = (
3453 current_offset,
3455 current_offset,
3454 len(serialized_sidedata),
3456 len(serialized_sidedata),
3455 new_offset_flags,
3457 new_offset_flags,
3456 sidedata_compression_mode,
3458 sidedata_compression_mode,
3457 )
3459 )
3458
3460
3459 # the sidedata computation might have move the file cursors around
3461 # the sidedata computation might have move the file cursors around
3460 dfh.seek(current_offset, os.SEEK_SET)
3462 dfh.seek(current_offset, os.SEEK_SET)
3461 dfh.write(serialized_sidedata)
3463 dfh.write(serialized_sidedata)
3462 new_entries.append(entry_update)
3464 new_entries.append(entry_update)
3463 current_offset += len(serialized_sidedata)
3465 current_offset += len(serialized_sidedata)
3464 if self._docket is not None:
3466 if self._docket is not None:
3465 self._docket.data_end = dfh.tell()
3467 self._docket.data_end = dfh.tell()
3466
3468
3467 # rewrite the new index entries
3469 # rewrite the new index entries
3468 ifh.seek(startrev * self.index.entry_size)
3470 ifh.seek(startrev * self.index.entry_size)
3469 for i, e in enumerate(new_entries):
3471 for i, e in enumerate(new_entries):
3470 rev = startrev + i
3472 rev = startrev + i
3471 self.index.replace_sidedata_info(rev, *e)
3473 self.index.replace_sidedata_info(rev, *e)
3472 packed = self.index.entry_binary(rev)
3474 packed = self.index.entry_binary(rev)
3473 if rev == 0 and self._docket is None:
3475 if rev == 0 and self._docket is None:
3474 header = self._format_flags | self._format_version
3476 header = self._format_flags | self._format_version
3475 header = self.index.pack_header(header)
3477 header = self.index.pack_header(header)
3476 packed = header + packed
3478 packed = header + packed
3477 ifh.write(packed)
3479 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now