##// END OF EJS Templates
typing: make the revlog classes known to pytype...
Matt Harbison -
r53102:bcaa5d40 default
parent child Browse files
Show More
@@ -1,4109 +1,4123
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import functools
19 import functools
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import typing
23 import typing
24 import weakref
24 import weakref
25 import zlib
25 import zlib
26
26
27 from typing import (
27 from typing import (
28 Optional,
28 Optional,
29 Tuple,
29 Tuple,
30 )
30 )
31
31
32 # import stuff from node for others to import from revlog
32 # import stuff from node for others to import from revlog
33 from .node import (
33 from .node import (
34 bin,
34 bin,
35 hex,
35 hex,
36 nullrev,
36 nullrev,
37 sha1nodeconstants,
37 sha1nodeconstants,
38 short,
38 short,
39 wdirrev,
39 wdirrev,
40 )
40 )
41 from .i18n import _
41 from .i18n import _
42 from .revlogutils.constants import (
42 from .revlogutils.constants import (
43 ALL_KINDS,
43 ALL_KINDS,
44 CHANGELOGV2,
44 CHANGELOGV2,
45 COMP_MODE_DEFAULT,
45 COMP_MODE_DEFAULT,
46 COMP_MODE_INLINE,
46 COMP_MODE_INLINE,
47 COMP_MODE_PLAIN,
47 COMP_MODE_PLAIN,
48 DELTA_BASE_REUSE_NO,
48 DELTA_BASE_REUSE_NO,
49 DELTA_BASE_REUSE_TRY,
49 DELTA_BASE_REUSE_TRY,
50 ENTRY_RANK,
50 ENTRY_RANK,
51 FEATURES_BY_VERSION,
51 FEATURES_BY_VERSION,
52 FLAG_GENERALDELTA,
52 FLAG_GENERALDELTA,
53 FLAG_INLINE_DATA,
53 FLAG_INLINE_DATA,
54 INDEX_HEADER,
54 INDEX_HEADER,
55 KIND_CHANGELOG,
55 KIND_CHANGELOG,
56 KIND_FILELOG,
56 KIND_FILELOG,
57 RANK_UNKNOWN,
57 RANK_UNKNOWN,
58 REVLOGV0,
58 REVLOGV0,
59 REVLOGV1,
59 REVLOGV1,
60 REVLOGV1_FLAGS,
60 REVLOGV1_FLAGS,
61 REVLOGV2,
61 REVLOGV2,
62 REVLOGV2_FLAGS,
62 REVLOGV2_FLAGS,
63 REVLOG_DEFAULT_FLAGS,
63 REVLOG_DEFAULT_FLAGS,
64 REVLOG_DEFAULT_FORMAT,
64 REVLOG_DEFAULT_FORMAT,
65 REVLOG_DEFAULT_VERSION,
65 REVLOG_DEFAULT_VERSION,
66 SUPPORTED_FLAGS,
66 SUPPORTED_FLAGS,
67 )
67 )
68 from .revlogutils.flagutil import (
68 from .revlogutils.flagutil import (
69 REVIDX_DEFAULT_FLAGS,
69 REVIDX_DEFAULT_FLAGS,
70 REVIDX_ELLIPSIS,
70 REVIDX_ELLIPSIS,
71 REVIDX_EXTSTORED,
71 REVIDX_EXTSTORED,
72 REVIDX_FLAGS_ORDER,
72 REVIDX_FLAGS_ORDER,
73 REVIDX_HASCOPIESINFO,
73 REVIDX_HASCOPIESINFO,
74 REVIDX_ISCENSORED,
74 REVIDX_ISCENSORED,
75 REVIDX_RAWTEXT_CHANGING_FLAGS,
75 REVIDX_RAWTEXT_CHANGING_FLAGS,
76 )
76 )
77 from .thirdparty import attr
77 from .thirdparty import attr
78
78
79 # Force pytype to use the non-vendored package
79 # Force pytype to use the non-vendored package
80 if typing.TYPE_CHECKING:
80 if typing.TYPE_CHECKING:
81 # noinspection PyPackageRequirements
81 # noinspection PyPackageRequirements
82 import attr
82 import attr
83
83
84 from . import (
84 from . import (
85 ancestor,
85 ancestor,
86 dagop,
86 dagop,
87 error,
87 error,
88 mdiff,
88 mdiff,
89 policy,
89 policy,
90 pycompat,
90 pycompat,
91 revlogutils,
91 revlogutils,
92 templatefilters,
92 templatefilters,
93 util,
93 util,
94 vfs as vfsmod,
94 vfs as vfsmod,
95 )
95 )
96 from .interfaces import (
96 from .interfaces import (
97 repository,
97 repository,
98 util as interfaceutil,
98 util as interfaceutil,
99 )
99 )
100 from .revlogutils import (
100 from .revlogutils import (
101 deltas as deltautil,
101 deltas as deltautil,
102 docket as docketutil,
102 docket as docketutil,
103 flagutil,
103 flagutil,
104 nodemap as nodemaputil,
104 nodemap as nodemaputil,
105 randomaccessfile,
105 randomaccessfile,
106 revlogv0,
106 revlogv0,
107 rewrite,
107 rewrite,
108 sidedata as sidedatautil,
108 sidedata as sidedatautil,
109 )
109 )
110 from .utils import (
110 from .utils import (
111 storageutil,
111 storageutil,
112 stringutil,
112 stringutil,
113 )
113 )
114
114
115 # blanked usage of all the name to prevent pyflakes constraints
115 # blanked usage of all the name to prevent pyflakes constraints
116 # We need these name available in the module for extensions.
116 # We need these name available in the module for extensions.
117
117
118 REVLOGV0
118 REVLOGV0
119 REVLOGV1
119 REVLOGV1
120 REVLOGV2
120 REVLOGV2
121 CHANGELOGV2
121 CHANGELOGV2
122 FLAG_INLINE_DATA
122 FLAG_INLINE_DATA
123 FLAG_GENERALDELTA
123 FLAG_GENERALDELTA
124 REVLOG_DEFAULT_FLAGS
124 REVLOG_DEFAULT_FLAGS
125 REVLOG_DEFAULT_FORMAT
125 REVLOG_DEFAULT_FORMAT
126 REVLOG_DEFAULT_VERSION
126 REVLOG_DEFAULT_VERSION
127 REVLOGV1_FLAGS
127 REVLOGV1_FLAGS
128 REVLOGV2_FLAGS
128 REVLOGV2_FLAGS
129 REVIDX_ISCENSORED
129 REVIDX_ISCENSORED
130 REVIDX_ELLIPSIS
130 REVIDX_ELLIPSIS
131 REVIDX_HASCOPIESINFO
131 REVIDX_HASCOPIESINFO
132 REVIDX_EXTSTORED
132 REVIDX_EXTSTORED
133 REVIDX_DEFAULT_FLAGS
133 REVIDX_DEFAULT_FLAGS
134 REVIDX_FLAGS_ORDER
134 REVIDX_FLAGS_ORDER
135 REVIDX_RAWTEXT_CHANGING_FLAGS
135 REVIDX_RAWTEXT_CHANGING_FLAGS
136
136
137 parsers = policy.importmod('parsers')
137 parsers = policy.importmod('parsers')
138 rustancestor = policy.importrust('ancestor')
138 rustancestor = policy.importrust('ancestor')
139 rustdagop = policy.importrust('dagop')
139 rustdagop = policy.importrust('dagop')
140 rustrevlog = policy.importrust('revlog')
140 rustrevlog = policy.importrust('revlog')
141
141
142 # Aliased for performance.
142 # Aliased for performance.
143 _zlibdecompress = zlib.decompress
143 _zlibdecompress = zlib.decompress
144
144
145 # max size of inline data embedded into a revlog
145 # max size of inline data embedded into a revlog
146 _maxinline = 131072
146 _maxinline = 131072
147
147
148
148
149 # Flag processors for REVIDX_ELLIPSIS.
149 # Flag processors for REVIDX_ELLIPSIS.
150 def ellipsisreadprocessor(rl, text):
150 def ellipsisreadprocessor(rl, text):
151 return text, False
151 return text, False
152
152
153
153
154 def ellipsiswriteprocessor(rl, text):
154 def ellipsiswriteprocessor(rl, text):
155 return text, False
155 return text, False
156
156
157
157
158 def ellipsisrawprocessor(rl, text):
158 def ellipsisrawprocessor(rl, text):
159 return False
159 return False
160
160
161
161
162 ellipsisprocessor = (
162 ellipsisprocessor = (
163 ellipsisreadprocessor,
163 ellipsisreadprocessor,
164 ellipsiswriteprocessor,
164 ellipsiswriteprocessor,
165 ellipsisrawprocessor,
165 ellipsisrawprocessor,
166 )
166 )
167
167
168
168
169 def _verify_revision(rl, skipflags, state, node):
169 def _verify_revision(rl, skipflags, state, node):
170 """Verify the integrity of the given revlog ``node`` while providing a hook
170 """Verify the integrity of the given revlog ``node`` while providing a hook
171 point for extensions to influence the operation."""
171 point for extensions to influence the operation."""
172 if skipflags:
172 if skipflags:
173 state[b'skipread'].add(node)
173 state[b'skipread'].add(node)
174 else:
174 else:
175 # Side-effect: read content and verify hash.
175 # Side-effect: read content and verify hash.
176 rl.revision(node)
176 rl.revision(node)
177
177
178
178
179 # True if a fast implementation for persistent-nodemap is available
179 # True if a fast implementation for persistent-nodemap is available
180 #
180 #
181 # We also consider we have a "fast" implementation in "pure" python because
181 # We also consider we have a "fast" implementation in "pure" python because
182 # people using pure don't really have performance consideration (and a
182 # people using pure don't really have performance consideration (and a
183 # wheelbarrow of other slowness source)
183 # wheelbarrow of other slowness source)
184 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
184 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
185 parsers, 'BaseIndexObject'
185 parsers, 'BaseIndexObject'
186 )
186 )
187
187
188
188
189 @interfaceutil.implementer(repository.irevisiondelta)
190 @attr.s(slots=True)
189 @attr.s(slots=True)
191 class revlogrevisiondelta:
190 class RevLogRevisionDelta:
192 node = attr.ib()
191 node = attr.ib()
193 p1node = attr.ib()
192 p1node = attr.ib()
194 p2node = attr.ib()
193 p2node = attr.ib()
195 basenode = attr.ib()
194 basenode = attr.ib()
196 flags = attr.ib()
195 flags = attr.ib()
197 baserevisionsize = attr.ib()
196 baserevisionsize = attr.ib()
198 revision = attr.ib()
197 revision = attr.ib()
199 delta = attr.ib()
198 delta = attr.ib()
200 sidedata = attr.ib()
199 sidedata = attr.ib()
201 protocol_flags = attr.ib()
200 protocol_flags = attr.ib()
202 linknode = attr.ib(default=None)
201 linknode = attr.ib(default=None)
203
202
204
203
205 @interfaceutil.implementer(repository.iverifyproblem)
204 revlogrevisiondelta = interfaceutil.implementer(repository.irevisiondelta)(
205 RevLogRevisionDelta
206 )
207
208 if typing.TYPE_CHECKING:
209 revlogrevisiondelta = RevLogRevisionDelta
210
211
206 @attr.s(frozen=True)
212 @attr.s(frozen=True)
207 class revlogproblem:
213 class RevLogProblem:
208 warning = attr.ib(default=None)
214 warning = attr.ib(default=None)
209 error = attr.ib(default=None)
215 error = attr.ib(default=None)
210 node = attr.ib(default=None)
216 node = attr.ib(default=None)
211
217
212
218
219 revlogproblem = interfaceutil.implementer(repository.iverifyproblem)(
220 RevLogProblem
221 )
222
223 if typing.TYPE_CHECKING:
224 revlogproblem = RevLogProblem
225
226
213 def parse_index_v1(data, inline):
227 def parse_index_v1(data, inline):
214 # call the C implementation to parse the index data
228 # call the C implementation to parse the index data
215 index, cache = parsers.parse_index2(data, inline)
229 index, cache = parsers.parse_index2(data, inline)
216 return index, cache
230 return index, cache
217
231
218
232
219 def parse_index_v2(data, inline):
233 def parse_index_v2(data, inline):
220 # call the C implementation to parse the index data
234 # call the C implementation to parse the index data
221 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
235 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
222 return index, cache
236 return index, cache
223
237
224
238
225 def parse_index_cl_v2(data, inline):
239 def parse_index_cl_v2(data, inline):
226 # call the C implementation to parse the index data
240 # call the C implementation to parse the index data
227 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
241 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
228 return index, cache
242 return index, cache
229
243
230
244
231 if hasattr(parsers, 'parse_index_devel_nodemap'):
245 if hasattr(parsers, 'parse_index_devel_nodemap'):
232
246
233 def parse_index_v1_nodemap(data, inline):
247 def parse_index_v1_nodemap(data, inline):
234 index, cache = parsers.parse_index_devel_nodemap(data, inline)
248 index, cache = parsers.parse_index_devel_nodemap(data, inline)
235 return index, cache
249 return index, cache
236
250
237 else:
251 else:
238 parse_index_v1_nodemap = None
252 parse_index_v1_nodemap = None
239
253
240
254
241 def parse_index_v1_rust(data, inline, default_header):
255 def parse_index_v1_rust(data, inline, default_header):
242 cache = (0, data) if inline else None
256 cache = (0, data) if inline else None
243 return rustrevlog.Index(data, default_header), cache
257 return rustrevlog.Index(data, default_header), cache
244
258
245
259
246 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
260 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
247 # signed integer)
261 # signed integer)
248 _maxentrysize = 0x7FFFFFFF
262 _maxentrysize = 0x7FFFFFFF
249
263
250 FILE_TOO_SHORT_MSG = _(
264 FILE_TOO_SHORT_MSG = _(
251 b'cannot read from revlog %s;'
265 b'cannot read from revlog %s;'
252 b' expected %d bytes from offset %d, data size is %d'
266 b' expected %d bytes from offset %d, data size is %d'
253 )
267 )
254
268
255 hexdigits = b'0123456789abcdefABCDEF'
269 hexdigits = b'0123456789abcdefABCDEF'
256
270
257
271
258 class _Config:
272 class _Config:
259 def copy(self):
273 def copy(self):
260 return self.__class__(**self.__dict__)
274 return self.__class__(**self.__dict__)
261
275
262
276
263 @attr.s()
277 @attr.s()
264 class FeatureConfig(_Config):
278 class FeatureConfig(_Config):
265 """Hold configuration values about the available revlog features"""
279 """Hold configuration values about the available revlog features"""
266
280
267 # the default compression engine
281 # the default compression engine
268 compression_engine = attr.ib(default=b'zlib')
282 compression_engine = attr.ib(default=b'zlib')
269 # compression engines options
283 # compression engines options
270 compression_engine_options = attr.ib(default=attr.Factory(dict))
284 compression_engine_options = attr.ib(default=attr.Factory(dict))
271
285
272 # can we use censor on this revlog
286 # can we use censor on this revlog
273 censorable = attr.ib(default=False)
287 censorable = attr.ib(default=False)
274 # does this revlog use the "side data" feature
288 # does this revlog use the "side data" feature
275 has_side_data = attr.ib(default=False)
289 has_side_data = attr.ib(default=False)
276 # might remove rank configuration once the computation has no impact
290 # might remove rank configuration once the computation has no impact
277 compute_rank = attr.ib(default=False)
291 compute_rank = attr.ib(default=False)
278 # parent order is supposed to be semantically irrelevant, so we
292 # parent order is supposed to be semantically irrelevant, so we
279 # normally resort parents to ensure that the first parent is non-null,
293 # normally resort parents to ensure that the first parent is non-null,
280 # if there is a non-null parent at all.
294 # if there is a non-null parent at all.
281 # filelog abuses the parent order as flag to mark some instances of
295 # filelog abuses the parent order as flag to mark some instances of
282 # meta-encoded files, so allow it to disable this behavior.
296 # meta-encoded files, so allow it to disable this behavior.
283 canonical_parent_order = attr.ib(default=False)
297 canonical_parent_order = attr.ib(default=False)
284 # can ellipsis commit be used
298 # can ellipsis commit be used
285 enable_ellipsis = attr.ib(default=False)
299 enable_ellipsis = attr.ib(default=False)
286
300
287 def copy(self):
301 def copy(self):
288 new = super().copy()
302 new = super().copy()
289 new.compression_engine_options = self.compression_engine_options.copy()
303 new.compression_engine_options = self.compression_engine_options.copy()
290 return new
304 return new
291
305
292
306
293 @attr.s()
307 @attr.s()
294 class DataConfig(_Config):
308 class DataConfig(_Config):
295 """Hold configuration value about how the revlog data are read"""
309 """Hold configuration value about how the revlog data are read"""
296
310
297 # should we try to open the "pending" version of the revlog
311 # should we try to open the "pending" version of the revlog
298 try_pending = attr.ib(default=False)
312 try_pending = attr.ib(default=False)
299 # should we try to open the "splitted" version of the revlog
313 # should we try to open the "splitted" version of the revlog
300 try_split = attr.ib(default=False)
314 try_split = attr.ib(default=False)
301 # When True, indexfile should be opened with checkambig=True at writing,
315 # When True, indexfile should be opened with checkambig=True at writing,
302 # to avoid file stat ambiguity.
316 # to avoid file stat ambiguity.
303 check_ambig = attr.ib(default=False)
317 check_ambig = attr.ib(default=False)
304
318
305 # If true, use mmap instead of reading to deal with large index
319 # If true, use mmap instead of reading to deal with large index
306 mmap_large_index = attr.ib(default=False)
320 mmap_large_index = attr.ib(default=False)
307 # how much data is large
321 # how much data is large
308 mmap_index_threshold = attr.ib(default=None)
322 mmap_index_threshold = attr.ib(default=None)
309 # How much data to read and cache into the raw revlog data cache.
323 # How much data to read and cache into the raw revlog data cache.
310 chunk_cache_size = attr.ib(default=65536)
324 chunk_cache_size = attr.ib(default=65536)
311
325
312 # The size of the uncompressed cache compared to the largest revision seen.
326 # The size of the uncompressed cache compared to the largest revision seen.
313 uncompressed_cache_factor = attr.ib(default=None)
327 uncompressed_cache_factor = attr.ib(default=None)
314
328
315 # The number of chunk cached
329 # The number of chunk cached
316 uncompressed_cache_count = attr.ib(default=None)
330 uncompressed_cache_count = attr.ib(default=None)
317
331
318 # Allow sparse reading of the revlog data
332 # Allow sparse reading of the revlog data
319 with_sparse_read = attr.ib(default=False)
333 with_sparse_read = attr.ib(default=False)
320 # minimal density of a sparse read chunk
334 # minimal density of a sparse read chunk
321 sr_density_threshold = attr.ib(default=0.50)
335 sr_density_threshold = attr.ib(default=0.50)
322 # minimal size of data we skip when performing sparse read
336 # minimal size of data we skip when performing sparse read
323 sr_min_gap_size = attr.ib(default=262144)
337 sr_min_gap_size = attr.ib(default=262144)
324
338
325 # are delta encoded against arbitrary bases.
339 # are delta encoded against arbitrary bases.
326 generaldelta = attr.ib(default=False)
340 generaldelta = attr.ib(default=False)
327
341
328
342
329 @attr.s()
343 @attr.s()
330 class DeltaConfig(_Config):
344 class DeltaConfig(_Config):
331 """Hold configuration value about how new delta are computed
345 """Hold configuration value about how new delta are computed
332
346
333 Some attributes are duplicated from DataConfig to help havign each object
347 Some attributes are duplicated from DataConfig to help havign each object
334 self contained.
348 self contained.
335 """
349 """
336
350
337 # can delta be encoded against arbitrary bases.
351 # can delta be encoded against arbitrary bases.
338 general_delta = attr.ib(default=False)
352 general_delta = attr.ib(default=False)
339 # Allow sparse writing of the revlog data
353 # Allow sparse writing of the revlog data
340 sparse_revlog = attr.ib(default=False)
354 sparse_revlog = attr.ib(default=False)
341 # maximum length of a delta chain
355 # maximum length of a delta chain
342 max_chain_len = attr.ib(default=None)
356 max_chain_len = attr.ib(default=None)
343 # Maximum distance between delta chain base start and end
357 # Maximum distance between delta chain base start and end
344 max_deltachain_span = attr.ib(default=-1)
358 max_deltachain_span = attr.ib(default=-1)
345 # If `upper_bound_comp` is not None, this is the expected maximal gain from
359 # If `upper_bound_comp` is not None, this is the expected maximal gain from
346 # compression for the data content.
360 # compression for the data content.
347 upper_bound_comp = attr.ib(default=None)
361 upper_bound_comp = attr.ib(default=None)
348 # Should we try a delta against both parent
362 # Should we try a delta against both parent
349 delta_both_parents = attr.ib(default=True)
363 delta_both_parents = attr.ib(default=True)
350 # Test delta base candidate group by chunk of this maximal size.
364 # Test delta base candidate group by chunk of this maximal size.
351 candidate_group_chunk_size = attr.ib(default=0)
365 candidate_group_chunk_size = attr.ib(default=0)
352 # Should we display debug information about delta computation
366 # Should we display debug information about delta computation
353 debug_delta = attr.ib(default=False)
367 debug_delta = attr.ib(default=False)
354 # trust incoming delta by default
368 # trust incoming delta by default
355 lazy_delta = attr.ib(default=True)
369 lazy_delta = attr.ib(default=True)
356 # trust the base of incoming delta by default
370 # trust the base of incoming delta by default
357 lazy_delta_base = attr.ib(default=False)
371 lazy_delta_base = attr.ib(default=False)
358
372
359
373
360 class _InnerRevlog:
374 class _InnerRevlog:
361 """An inner layer of the revlog object
375 """An inner layer of the revlog object
362
376
363 That layer exist to be able to delegate some operation to Rust, its
377 That layer exist to be able to delegate some operation to Rust, its
364 boundaries are arbitrary and based on what we can delegate to Rust.
378 boundaries are arbitrary and based on what we can delegate to Rust.
365 """
379 """
366
380
367 opener: vfsmod.vfs
381 opener: vfsmod.vfs
368
382
369 def __init__(
383 def __init__(
370 self,
384 self,
371 opener: vfsmod.vfs,
385 opener: vfsmod.vfs,
372 index,
386 index,
373 index_file,
387 index_file,
374 data_file,
388 data_file,
375 sidedata_file,
389 sidedata_file,
376 inline,
390 inline,
377 data_config,
391 data_config,
378 delta_config,
392 delta_config,
379 feature_config,
393 feature_config,
380 chunk_cache,
394 chunk_cache,
381 default_compression_header,
395 default_compression_header,
382 ):
396 ):
383 self.opener = opener
397 self.opener = opener
384 self.index = index
398 self.index = index
385
399
386 self.index_file = index_file
400 self.index_file = index_file
387 self.data_file = data_file
401 self.data_file = data_file
388 self.sidedata_file = sidedata_file
402 self.sidedata_file = sidedata_file
389 self.inline = inline
403 self.inline = inline
390 self.data_config = data_config
404 self.data_config = data_config
391 self.delta_config = delta_config
405 self.delta_config = delta_config
392 self.feature_config = feature_config
406 self.feature_config = feature_config
393
407
394 # used during diverted write.
408 # used during diverted write.
395 self._orig_index_file = None
409 self._orig_index_file = None
396
410
397 self._default_compression_header = default_compression_header
411 self._default_compression_header = default_compression_header
398
412
399 # index
413 # index
400
414
401 # 3-tuple of file handles being used for active writing.
415 # 3-tuple of file handles being used for active writing.
402 self._writinghandles = None
416 self._writinghandles = None
403
417
404 self._segmentfile = randomaccessfile.randomaccessfile(
418 self._segmentfile = randomaccessfile.randomaccessfile(
405 self.opener,
419 self.opener,
406 (self.index_file if self.inline else self.data_file),
420 (self.index_file if self.inline else self.data_file),
407 self.data_config.chunk_cache_size,
421 self.data_config.chunk_cache_size,
408 chunk_cache,
422 chunk_cache,
409 )
423 )
410 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
424 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
411 self.opener,
425 self.opener,
412 self.sidedata_file,
426 self.sidedata_file,
413 self.data_config.chunk_cache_size,
427 self.data_config.chunk_cache_size,
414 )
428 )
415
429
416 # revlog header -> revlog compressor
430 # revlog header -> revlog compressor
417 self._decompressors = {}
431 self._decompressors = {}
418 # 3-tuple of (node, rev, text) for a raw revision.
432 # 3-tuple of (node, rev, text) for a raw revision.
419 self._revisioncache = None
433 self._revisioncache = None
420
434
421 # cache some uncompressed chunks
435 # cache some uncompressed chunks
422 # rev β†’ uncompressed_chunk
436 # rev β†’ uncompressed_chunk
423 #
437 #
424 # the max cost is dynamically updated to be proportionnal to the
438 # the max cost is dynamically updated to be proportionnal to the
425 # size of revision we actually encounter.
439 # size of revision we actually encounter.
426 self._uncompressed_chunk_cache = None
440 self._uncompressed_chunk_cache = None
427 if self.data_config.uncompressed_cache_factor is not None:
441 if self.data_config.uncompressed_cache_factor is not None:
428 self._uncompressed_chunk_cache = util.lrucachedict(
442 self._uncompressed_chunk_cache = util.lrucachedict(
429 self.data_config.uncompressed_cache_count,
443 self.data_config.uncompressed_cache_count,
430 maxcost=65536, # some arbitrary initial value
444 maxcost=65536, # some arbitrary initial value
431 )
445 )
432
446
433 self._delay_buffer = None
447 self._delay_buffer = None
434
448
435 def __len__(self):
449 def __len__(self):
436 return len(self.index)
450 return len(self.index)
437
451
438 def clear_cache(self):
452 def clear_cache(self):
439 assert not self.is_delaying
453 assert not self.is_delaying
440 self._revisioncache = None
454 self._revisioncache = None
441 if self._uncompressed_chunk_cache is not None:
455 if self._uncompressed_chunk_cache is not None:
442 self._uncompressed_chunk_cache.clear()
456 self._uncompressed_chunk_cache.clear()
443 self._segmentfile.clear_cache()
457 self._segmentfile.clear_cache()
444 self._segmentfile_sidedata.clear_cache()
458 self._segmentfile_sidedata.clear_cache()
445
459
446 @property
460 @property
447 def canonical_index_file(self):
461 def canonical_index_file(self):
448 if self._orig_index_file is not None:
462 if self._orig_index_file is not None:
449 return self._orig_index_file
463 return self._orig_index_file
450 return self.index_file
464 return self.index_file
451
465
452 @property
466 @property
453 def is_delaying(self):
467 def is_delaying(self):
454 """is the revlog is currently delaying the visibility of written data?
468 """is the revlog is currently delaying the visibility of written data?
455
469
456 The delaying mechanism can be either in-memory or written on disk in a
470 The delaying mechanism can be either in-memory or written on disk in a
457 side-file."""
471 side-file."""
458 return (self._delay_buffer is not None) or (
472 return (self._delay_buffer is not None) or (
459 self._orig_index_file is not None
473 self._orig_index_file is not None
460 )
474 )
461
475
462 # Derived from index values.
476 # Derived from index values.
463
477
464 def start(self, rev):
478 def start(self, rev):
465 """the offset of the data chunk for this revision"""
479 """the offset of the data chunk for this revision"""
466 return int(self.index[rev][0] >> 16)
480 return int(self.index[rev][0] >> 16)
467
481
468 def length(self, rev):
482 def length(self, rev):
469 """the length of the data chunk for this revision"""
483 """the length of the data chunk for this revision"""
470 return self.index[rev][1]
484 return self.index[rev][1]
471
485
472 def end(self, rev):
486 def end(self, rev):
473 """the end of the data chunk for this revision"""
487 """the end of the data chunk for this revision"""
474 return self.start(rev) + self.length(rev)
488 return self.start(rev) + self.length(rev)
475
489
476 def deltaparent(self, rev):
490 def deltaparent(self, rev):
477 """return deltaparent of the given revision"""
491 """return deltaparent of the given revision"""
478 base = self.index[rev][3]
492 base = self.index[rev][3]
479 if base == rev:
493 if base == rev:
480 return nullrev
494 return nullrev
481 elif self.delta_config.general_delta:
495 elif self.delta_config.general_delta:
482 return base
496 return base
483 else:
497 else:
484 return rev - 1
498 return rev - 1
485
499
486 def issnapshot(self, rev):
500 def issnapshot(self, rev):
487 """tells whether rev is a snapshot"""
501 """tells whether rev is a snapshot"""
488 if not self.delta_config.sparse_revlog:
502 if not self.delta_config.sparse_revlog:
489 return self.deltaparent(rev) == nullrev
503 return self.deltaparent(rev) == nullrev
490 elif hasattr(self.index, 'issnapshot'):
504 elif hasattr(self.index, 'issnapshot'):
491 # directly assign the method to cache the testing and access
505 # directly assign the method to cache the testing and access
492 self.issnapshot = self.index.issnapshot
506 self.issnapshot = self.index.issnapshot
493 return self.issnapshot(rev)
507 return self.issnapshot(rev)
494 if rev == nullrev:
508 if rev == nullrev:
495 return True
509 return True
496 entry = self.index[rev]
510 entry = self.index[rev]
497 base = entry[3]
511 base = entry[3]
498 if base == rev:
512 if base == rev:
499 return True
513 return True
500 if base == nullrev:
514 if base == nullrev:
501 return True
515 return True
502 p1 = entry[5]
516 p1 = entry[5]
503 while self.length(p1) == 0:
517 while self.length(p1) == 0:
504 b = self.deltaparent(p1)
518 b = self.deltaparent(p1)
505 if b == p1:
519 if b == p1:
506 break
520 break
507 p1 = b
521 p1 = b
508 p2 = entry[6]
522 p2 = entry[6]
509 while self.length(p2) == 0:
523 while self.length(p2) == 0:
510 b = self.deltaparent(p2)
524 b = self.deltaparent(p2)
511 if b == p2:
525 if b == p2:
512 break
526 break
513 p2 = b
527 p2 = b
514 if base == p1 or base == p2:
528 if base == p1 or base == p2:
515 return False
529 return False
516 return self.issnapshot(base)
530 return self.issnapshot(base)
517
531
518 def _deltachain(self, rev, stoprev=None):
532 def _deltachain(self, rev, stoprev=None):
519 """Obtain the delta chain for a revision.
533 """Obtain the delta chain for a revision.
520
534
521 ``stoprev`` specifies a revision to stop at. If not specified, we
535 ``stoprev`` specifies a revision to stop at. If not specified, we
522 stop at the base of the chain.
536 stop at the base of the chain.
523
537
524 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
538 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
525 revs in ascending order and ``stopped`` is a bool indicating whether
539 revs in ascending order and ``stopped`` is a bool indicating whether
526 ``stoprev`` was hit.
540 ``stoprev`` was hit.
527 """
541 """
528 generaldelta = self.delta_config.general_delta
542 generaldelta = self.delta_config.general_delta
529 # Try C implementation.
543 # Try C implementation.
530 try:
544 try:
531 return self.index.deltachain(rev, stoprev, generaldelta)
545 return self.index.deltachain(rev, stoprev, generaldelta)
532 except AttributeError:
546 except AttributeError:
533 pass
547 pass
534
548
535 chain = []
549 chain = []
536
550
537 # Alias to prevent attribute lookup in tight loop.
551 # Alias to prevent attribute lookup in tight loop.
538 index = self.index
552 index = self.index
539
553
540 iterrev = rev
554 iterrev = rev
541 e = index[iterrev]
555 e = index[iterrev]
542 while iterrev != e[3] and iterrev != stoprev:
556 while iterrev != e[3] and iterrev != stoprev:
543 chain.append(iterrev)
557 chain.append(iterrev)
544 if generaldelta:
558 if generaldelta:
545 iterrev = e[3]
559 iterrev = e[3]
546 else:
560 else:
547 iterrev -= 1
561 iterrev -= 1
548 e = index[iterrev]
562 e = index[iterrev]
549
563
550 if iterrev == stoprev:
564 if iterrev == stoprev:
551 stopped = True
565 stopped = True
552 else:
566 else:
553 chain.append(iterrev)
567 chain.append(iterrev)
554 stopped = False
568 stopped = False
555
569
556 chain.reverse()
570 chain.reverse()
557 return chain, stopped
571 return chain, stopped
558
572
559 @util.propertycache
573 @util.propertycache
560 def _compressor(self):
574 def _compressor(self):
561 engine = util.compengines[self.feature_config.compression_engine]
575 engine = util.compengines[self.feature_config.compression_engine]
562 return engine.revlogcompressor(
576 return engine.revlogcompressor(
563 self.feature_config.compression_engine_options
577 self.feature_config.compression_engine_options
564 )
578 )
565
579
566 @util.propertycache
580 @util.propertycache
567 def _decompressor(self):
581 def _decompressor(self):
568 """the default decompressor"""
582 """the default decompressor"""
569 if self._default_compression_header is None:
583 if self._default_compression_header is None:
570 return None
584 return None
571 t = self._default_compression_header
585 t = self._default_compression_header
572 c = self._get_decompressor(t)
586 c = self._get_decompressor(t)
573 return c.decompress
587 return c.decompress
574
588
575 def _get_decompressor(self, t: bytes):
589 def _get_decompressor(self, t: bytes):
576 try:
590 try:
577 compressor = self._decompressors[t]
591 compressor = self._decompressors[t]
578 except KeyError:
592 except KeyError:
579 try:
593 try:
580 engine = util.compengines.forrevlogheader(t)
594 engine = util.compengines.forrevlogheader(t)
581 compressor = engine.revlogcompressor(
595 compressor = engine.revlogcompressor(
582 self.feature_config.compression_engine_options
596 self.feature_config.compression_engine_options
583 )
597 )
584 self._decompressors[t] = compressor
598 self._decompressors[t] = compressor
585 except KeyError:
599 except KeyError:
586 raise error.RevlogError(
600 raise error.RevlogError(
587 _(b'unknown compression type %s') % binascii.hexlify(t)
601 _(b'unknown compression type %s') % binascii.hexlify(t)
588 )
602 )
589 return compressor
603 return compressor
590
604
591 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
605 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
592 """Generate a possibly-compressed representation of data."""
606 """Generate a possibly-compressed representation of data."""
593 if not data:
607 if not data:
594 return b'', data
608 return b'', data
595
609
596 compressed = self._compressor.compress(data)
610 compressed = self._compressor.compress(data)
597
611
598 if compressed:
612 if compressed:
599 # The revlog compressor added the header in the returned data.
613 # The revlog compressor added the header in the returned data.
600 return b'', compressed
614 return b'', compressed
601
615
602 if data[0:1] == b'\0':
616 if data[0:1] == b'\0':
603 return b'', data
617 return b'', data
604 return b'u', data
618 return b'u', data
605
619
606 def decompress(self, data: bytes):
620 def decompress(self, data: bytes):
607 """Decompress a revlog chunk.
621 """Decompress a revlog chunk.
608
622
609 The chunk is expected to begin with a header identifying the
623 The chunk is expected to begin with a header identifying the
610 format type so it can be routed to an appropriate decompressor.
624 format type so it can be routed to an appropriate decompressor.
611 """
625 """
612 if not data:
626 if not data:
613 return data
627 return data
614
628
615 # Revlogs are read much more frequently than they are written and many
629 # Revlogs are read much more frequently than they are written and many
616 # chunks only take microseconds to decompress, so performance is
630 # chunks only take microseconds to decompress, so performance is
617 # important here.
631 # important here.
618 #
632 #
619 # We can make a few assumptions about revlogs:
633 # We can make a few assumptions about revlogs:
620 #
634 #
621 # 1) the majority of chunks will be compressed (as opposed to inline
635 # 1) the majority of chunks will be compressed (as opposed to inline
622 # raw data).
636 # raw data).
623 # 2) decompressing *any* data will likely by at least 10x slower than
637 # 2) decompressing *any* data will likely by at least 10x slower than
624 # returning raw inline data.
638 # returning raw inline data.
625 # 3) we want to prioritize common and officially supported compression
639 # 3) we want to prioritize common and officially supported compression
626 # engines
640 # engines
627 #
641 #
628 # It follows that we want to optimize for "decompress compressed data
642 # It follows that we want to optimize for "decompress compressed data
629 # when encoded with common and officially supported compression engines"
643 # when encoded with common and officially supported compression engines"
630 # case over "raw data" and "data encoded by less common or non-official
644 # case over "raw data" and "data encoded by less common or non-official
631 # compression engines." That is why we have the inline lookup first
645 # compression engines." That is why we have the inline lookup first
632 # followed by the compengines lookup.
646 # followed by the compengines lookup.
633 #
647 #
634 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
648 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
635 # compressed chunks. And this matters for changelog and manifest reads.
649 # compressed chunks. And this matters for changelog and manifest reads.
636 t = data[0:1]
650 t = data[0:1]
637
651
638 if t == b'x':
652 if t == b'x':
639 try:
653 try:
640 return _zlibdecompress(data)
654 return _zlibdecompress(data)
641 except zlib.error as e:
655 except zlib.error as e:
642 raise error.RevlogError(
656 raise error.RevlogError(
643 _(b'revlog decompress error: %s')
657 _(b'revlog decompress error: %s')
644 % stringutil.forcebytestr(e)
658 % stringutil.forcebytestr(e)
645 )
659 )
646 # '\0' is more common than 'u' so it goes first.
660 # '\0' is more common than 'u' so it goes first.
647 elif t == b'\0':
661 elif t == b'\0':
648 return data
662 return data
649 elif t == b'u':
663 elif t == b'u':
650 return util.buffer(data, 1)
664 return util.buffer(data, 1)
651
665
652 compressor = self._get_decompressor(t)
666 compressor = self._get_decompressor(t)
653
667
654 return compressor.decompress(data)
668 return compressor.decompress(data)
655
669
656 @contextlib.contextmanager
670 @contextlib.contextmanager
657 def reading(self):
671 def reading(self):
658 """Context manager that keeps data and sidedata files open for reading"""
672 """Context manager that keeps data and sidedata files open for reading"""
659 if len(self.index) == 0:
673 if len(self.index) == 0:
660 yield # nothing to be read
674 yield # nothing to be read
661 elif self._delay_buffer is not None and self.inline:
675 elif self._delay_buffer is not None and self.inline:
662 msg = "revlog with delayed write should not be inline"
676 msg = "revlog with delayed write should not be inline"
663 raise error.ProgrammingError(msg)
677 raise error.ProgrammingError(msg)
664 else:
678 else:
665 with self._segmentfile.reading():
679 with self._segmentfile.reading():
666 with self._segmentfile_sidedata.reading():
680 with self._segmentfile_sidedata.reading():
667 yield
681 yield
668
682
669 @property
683 @property
670 def is_writing(self):
684 def is_writing(self):
671 """True is a writing context is open"""
685 """True is a writing context is open"""
672 return self._writinghandles is not None
686 return self._writinghandles is not None
673
687
674 @property
688 @property
675 def is_open(self):
689 def is_open(self):
676 """True if any file handle is being held
690 """True if any file handle is being held
677
691
678 Used for assert and debug in the python code"""
692 Used for assert and debug in the python code"""
679 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
693 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
680
694
681 @contextlib.contextmanager
695 @contextlib.contextmanager
682 def writing(self, transaction, data_end=None, sidedata_end=None):
696 def writing(self, transaction, data_end=None, sidedata_end=None):
683 """Open the revlog files for writing
697 """Open the revlog files for writing
684
698
685 Add content to a revlog should be done within such context.
699 Add content to a revlog should be done within such context.
686 """
700 """
687 if self.is_writing:
701 if self.is_writing:
688 yield
702 yield
689 else:
703 else:
690 ifh = dfh = sdfh = None
704 ifh = dfh = sdfh = None
691 try:
705 try:
692 r = len(self.index)
706 r = len(self.index)
693 # opening the data file.
707 # opening the data file.
694 dsize = 0
708 dsize = 0
695 if r:
709 if r:
696 dsize = self.end(r - 1)
710 dsize = self.end(r - 1)
697 dfh = None
711 dfh = None
698 if not self.inline:
712 if not self.inline:
699 try:
713 try:
700 dfh = self.opener(self.data_file, mode=b"r+")
714 dfh = self.opener(self.data_file, mode=b"r+")
701 if data_end is None:
715 if data_end is None:
702 dfh.seek(0, os.SEEK_END)
716 dfh.seek(0, os.SEEK_END)
703 else:
717 else:
704 dfh.seek(data_end, os.SEEK_SET)
718 dfh.seek(data_end, os.SEEK_SET)
705 except FileNotFoundError:
719 except FileNotFoundError:
706 dfh = self.opener(self.data_file, mode=b"w+")
720 dfh = self.opener(self.data_file, mode=b"w+")
707 transaction.add(self.data_file, dsize)
721 transaction.add(self.data_file, dsize)
708 if self.sidedata_file is not None:
722 if self.sidedata_file is not None:
709 assert sidedata_end is not None
723 assert sidedata_end is not None
710 # revlog-v2 does not inline, help Pytype
724 # revlog-v2 does not inline, help Pytype
711 assert dfh is not None
725 assert dfh is not None
712 try:
726 try:
713 sdfh = self.opener(self.sidedata_file, mode=b"r+")
727 sdfh = self.opener(self.sidedata_file, mode=b"r+")
714 dfh.seek(sidedata_end, os.SEEK_SET)
728 dfh.seek(sidedata_end, os.SEEK_SET)
715 except FileNotFoundError:
729 except FileNotFoundError:
716 sdfh = self.opener(self.sidedata_file, mode=b"w+")
730 sdfh = self.opener(self.sidedata_file, mode=b"w+")
717 transaction.add(self.sidedata_file, sidedata_end)
731 transaction.add(self.sidedata_file, sidedata_end)
718
732
719 # opening the index file.
733 # opening the index file.
720 isize = r * self.index.entry_size
734 isize = r * self.index.entry_size
721 ifh = self.__index_write_fp()
735 ifh = self.__index_write_fp()
722 if self.inline:
736 if self.inline:
723 transaction.add(self.index_file, dsize + isize)
737 transaction.add(self.index_file, dsize + isize)
724 else:
738 else:
725 transaction.add(self.index_file, isize)
739 transaction.add(self.index_file, isize)
726 # exposing all file handle for writing.
740 # exposing all file handle for writing.
727 self._writinghandles = (ifh, dfh, sdfh)
741 self._writinghandles = (ifh, dfh, sdfh)
728 self._segmentfile.writing_handle = ifh if self.inline else dfh
742 self._segmentfile.writing_handle = ifh if self.inline else dfh
729 self._segmentfile_sidedata.writing_handle = sdfh
743 self._segmentfile_sidedata.writing_handle = sdfh
730 yield
744 yield
731 finally:
745 finally:
732 self._writinghandles = None
746 self._writinghandles = None
733 self._segmentfile.writing_handle = None
747 self._segmentfile.writing_handle = None
734 self._segmentfile_sidedata.writing_handle = None
748 self._segmentfile_sidedata.writing_handle = None
735 if dfh is not None:
749 if dfh is not None:
736 dfh.close()
750 dfh.close()
737 if sdfh is not None:
751 if sdfh is not None:
738 sdfh.close()
752 sdfh.close()
739 # closing the index file last to avoid exposing referent to
753 # closing the index file last to avoid exposing referent to
740 # potential unflushed data content.
754 # potential unflushed data content.
741 if ifh is not None:
755 if ifh is not None:
742 ifh.close()
756 ifh.close()
743
757
744 def __index_write_fp(self, index_end=None):
758 def __index_write_fp(self, index_end=None):
745 """internal method to open the index file for writing
759 """internal method to open the index file for writing
746
760
747 You should not use this directly and use `_writing` instead
761 You should not use this directly and use `_writing` instead
748 """
762 """
749 try:
763 try:
750 if self._delay_buffer is None:
764 if self._delay_buffer is None:
751 f = self.opener(
765 f = self.opener(
752 self.index_file,
766 self.index_file,
753 mode=b"r+",
767 mode=b"r+",
754 checkambig=self.data_config.check_ambig,
768 checkambig=self.data_config.check_ambig,
755 )
769 )
756 else:
770 else:
757 # check_ambig affect we way we open file for writing, however
771 # check_ambig affect we way we open file for writing, however
758 # here, we do not actually open a file for writting as write
772 # here, we do not actually open a file for writting as write
759 # will appened to a delay_buffer. So check_ambig is not
773 # will appened to a delay_buffer. So check_ambig is not
760 # meaningful and unneeded here.
774 # meaningful and unneeded here.
761 f = randomaccessfile.appender(
775 f = randomaccessfile.appender(
762 self.opener, self.index_file, b"r+", self._delay_buffer
776 self.opener, self.index_file, b"r+", self._delay_buffer
763 )
777 )
764 if index_end is None:
778 if index_end is None:
765 f.seek(0, os.SEEK_END)
779 f.seek(0, os.SEEK_END)
766 else:
780 else:
767 f.seek(index_end, os.SEEK_SET)
781 f.seek(index_end, os.SEEK_SET)
768 return f
782 return f
769 except FileNotFoundError:
783 except FileNotFoundError:
770 if self._delay_buffer is None:
784 if self._delay_buffer is None:
771 return self.opener(
785 return self.opener(
772 self.index_file,
786 self.index_file,
773 mode=b"w+",
787 mode=b"w+",
774 checkambig=self.data_config.check_ambig,
788 checkambig=self.data_config.check_ambig,
775 )
789 )
776 else:
790 else:
777 return randomaccessfile.appender(
791 return randomaccessfile.appender(
778 self.opener, self.index_file, b"w+", self._delay_buffer
792 self.opener, self.index_file, b"w+", self._delay_buffer
779 )
793 )
780
794
781 def __index_new_fp(self):
795 def __index_new_fp(self):
782 """internal method to create a new index file for writing
796 """internal method to create a new index file for writing
783
797
784 You should not use this unless you are upgrading from inline revlog
798 You should not use this unless you are upgrading from inline revlog
785 """
799 """
786 return self.opener(
800 return self.opener(
787 self.index_file,
801 self.index_file,
788 mode=b"w",
802 mode=b"w",
789 checkambig=self.data_config.check_ambig,
803 checkambig=self.data_config.check_ambig,
790 )
804 )
791
805
792 def split_inline(self, tr, header, new_index_file_path=None):
806 def split_inline(self, tr, header, new_index_file_path=None):
793 """split the data of an inline revlog into an index and a data file"""
807 """split the data of an inline revlog into an index and a data file"""
794 assert self._delay_buffer is None
808 assert self._delay_buffer is None
795 existing_handles = False
809 existing_handles = False
796 if self._writinghandles is not None:
810 if self._writinghandles is not None:
797 existing_handles = True
811 existing_handles = True
798 fp = self._writinghandles[0]
812 fp = self._writinghandles[0]
799 fp.flush()
813 fp.flush()
800 fp.close()
814 fp.close()
801 # We can't use the cached file handle after close(). So prevent
815 # We can't use the cached file handle after close(). So prevent
802 # its usage.
816 # its usage.
803 self._writinghandles = None
817 self._writinghandles = None
804 self._segmentfile.writing_handle = None
818 self._segmentfile.writing_handle = None
805 # No need to deal with sidedata writing handle as it is only
819 # No need to deal with sidedata writing handle as it is only
806 # relevant with revlog-v2 which is never inline, not reaching
820 # relevant with revlog-v2 which is never inline, not reaching
807 # this code
821 # this code
808
822
809 new_dfh = self.opener(self.data_file, mode=b"w+")
823 new_dfh = self.opener(self.data_file, mode=b"w+")
810 new_dfh.truncate(0) # drop any potentially existing data
824 new_dfh.truncate(0) # drop any potentially existing data
811 try:
825 try:
812 with self.reading():
826 with self.reading():
813 for r in range(len(self.index)):
827 for r in range(len(self.index)):
814 new_dfh.write(self.get_segment_for_revs(r, r)[1])
828 new_dfh.write(self.get_segment_for_revs(r, r)[1])
815 new_dfh.flush()
829 new_dfh.flush()
816
830
817 if new_index_file_path is not None:
831 if new_index_file_path is not None:
818 self.index_file = new_index_file_path
832 self.index_file = new_index_file_path
819 with self.__index_new_fp() as fp:
833 with self.__index_new_fp() as fp:
820 self.inline = False
834 self.inline = False
821 for i in range(len(self.index)):
835 for i in range(len(self.index)):
822 e = self.index.entry_binary(i)
836 e = self.index.entry_binary(i)
823 if i == 0:
837 if i == 0:
824 packed_header = self.index.pack_header(header)
838 packed_header = self.index.pack_header(header)
825 e = packed_header + e
839 e = packed_header + e
826 fp.write(e)
840 fp.write(e)
827
841
828 # If we don't use side-write, the temp file replace the real
842 # If we don't use side-write, the temp file replace the real
829 # index when we exit the context manager
843 # index when we exit the context manager
830
844
831 self._segmentfile = randomaccessfile.randomaccessfile(
845 self._segmentfile = randomaccessfile.randomaccessfile(
832 self.opener,
846 self.opener,
833 self.data_file,
847 self.data_file,
834 self.data_config.chunk_cache_size,
848 self.data_config.chunk_cache_size,
835 )
849 )
836
850
837 if existing_handles:
851 if existing_handles:
838 # switched from inline to conventional reopen the index
852 # switched from inline to conventional reopen the index
839 ifh = self.__index_write_fp()
853 ifh = self.__index_write_fp()
840 self._writinghandles = (ifh, new_dfh, None)
854 self._writinghandles = (ifh, new_dfh, None)
841 self._segmentfile.writing_handle = new_dfh
855 self._segmentfile.writing_handle = new_dfh
842 new_dfh = None
856 new_dfh = None
843 # No need to deal with sidedata writing handle as it is only
857 # No need to deal with sidedata writing handle as it is only
844 # relevant with revlog-v2 which is never inline, not reaching
858 # relevant with revlog-v2 which is never inline, not reaching
845 # this code
859 # this code
846 finally:
860 finally:
847 if new_dfh is not None:
861 if new_dfh is not None:
848 new_dfh.close()
862 new_dfh.close()
849 return self.index_file
863 return self.index_file
850
864
851 def get_segment_for_revs(self, startrev, endrev):
865 def get_segment_for_revs(self, startrev, endrev):
852 """Obtain a segment of raw data corresponding to a range of revisions.
866 """Obtain a segment of raw data corresponding to a range of revisions.
853
867
854 Accepts the start and end revisions and an optional already-open
868 Accepts the start and end revisions and an optional already-open
855 file handle to be used for reading. If the file handle is read, its
869 file handle to be used for reading. If the file handle is read, its
856 seek position will not be preserved.
870 seek position will not be preserved.
857
871
858 Requests for data may be satisfied by a cache.
872 Requests for data may be satisfied by a cache.
859
873
860 Returns a 2-tuple of (offset, data) for the requested range of
874 Returns a 2-tuple of (offset, data) for the requested range of
861 revisions. Offset is the integer offset from the beginning of the
875 revisions. Offset is the integer offset from the beginning of the
862 revlog and data is a str or buffer of the raw byte data.
876 revlog and data is a str or buffer of the raw byte data.
863
877
864 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
878 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
865 to determine where each revision's data begins and ends.
879 to determine where each revision's data begins and ends.
866
880
867 API: we should consider making this a private part of the InnerRevlog
881 API: we should consider making this a private part of the InnerRevlog
868 at some point.
882 at some point.
869 """
883 """
870 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
884 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
871 # (functions are expensive).
885 # (functions are expensive).
872 index = self.index
886 index = self.index
873 istart = index[startrev]
887 istart = index[startrev]
874 start = int(istart[0] >> 16)
888 start = int(istart[0] >> 16)
875 if startrev == endrev:
889 if startrev == endrev:
876 end = start + istart[1]
890 end = start + istart[1]
877 else:
891 else:
878 iend = index[endrev]
892 iend = index[endrev]
879 end = int(iend[0] >> 16) + iend[1]
893 end = int(iend[0] >> 16) + iend[1]
880
894
881 if self.inline:
895 if self.inline:
882 start += (startrev + 1) * self.index.entry_size
896 start += (startrev + 1) * self.index.entry_size
883 end += (endrev + 1) * self.index.entry_size
897 end += (endrev + 1) * self.index.entry_size
884 length = end - start
898 length = end - start
885
899
886 return start, self._segmentfile.read_chunk(start, length)
900 return start, self._segmentfile.read_chunk(start, length)
887
901
888 def _chunk(self, rev):
902 def _chunk(self, rev):
889 """Obtain a single decompressed chunk for a revision.
903 """Obtain a single decompressed chunk for a revision.
890
904
891 Accepts an integer revision and an optional already-open file handle
905 Accepts an integer revision and an optional already-open file handle
892 to be used for reading. If used, the seek position of the file will not
906 to be used for reading. If used, the seek position of the file will not
893 be preserved.
907 be preserved.
894
908
895 Returns a str holding uncompressed data for the requested revision.
909 Returns a str holding uncompressed data for the requested revision.
896 """
910 """
897 if self._uncompressed_chunk_cache is not None:
911 if self._uncompressed_chunk_cache is not None:
898 uncomp = self._uncompressed_chunk_cache.get(rev)
912 uncomp = self._uncompressed_chunk_cache.get(rev)
899 if uncomp is not None:
913 if uncomp is not None:
900 return uncomp
914 return uncomp
901
915
902 compression_mode = self.index[rev][10]
916 compression_mode = self.index[rev][10]
903 data = self.get_segment_for_revs(rev, rev)[1]
917 data = self.get_segment_for_revs(rev, rev)[1]
904 if compression_mode == COMP_MODE_PLAIN:
918 if compression_mode == COMP_MODE_PLAIN:
905 uncomp = data
919 uncomp = data
906 elif compression_mode == COMP_MODE_DEFAULT:
920 elif compression_mode == COMP_MODE_DEFAULT:
907 uncomp = self._decompressor(data)
921 uncomp = self._decompressor(data)
908 elif compression_mode == COMP_MODE_INLINE:
922 elif compression_mode == COMP_MODE_INLINE:
909 uncomp = self.decompress(data)
923 uncomp = self.decompress(data)
910 else:
924 else:
911 msg = b'unknown compression mode %d'
925 msg = b'unknown compression mode %d'
912 msg %= compression_mode
926 msg %= compression_mode
913 raise error.RevlogError(msg)
927 raise error.RevlogError(msg)
914 if self._uncompressed_chunk_cache is not None:
928 if self._uncompressed_chunk_cache is not None:
915 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
929 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
916 return uncomp
930 return uncomp
917
931
918 def _chunks(self, revs, targetsize=None):
932 def _chunks(self, revs, targetsize=None):
919 """Obtain decompressed chunks for the specified revisions.
933 """Obtain decompressed chunks for the specified revisions.
920
934
921 Accepts an iterable of numeric revisions that are assumed to be in
935 Accepts an iterable of numeric revisions that are assumed to be in
922 ascending order. Also accepts an optional already-open file handle
936 ascending order. Also accepts an optional already-open file handle
923 to be used for reading. If used, the seek position of the file will
937 to be used for reading. If used, the seek position of the file will
924 not be preserved.
938 not be preserved.
925
939
926 This function is similar to calling ``self._chunk()`` multiple times,
940 This function is similar to calling ``self._chunk()`` multiple times,
927 but is faster.
941 but is faster.
928
942
929 Returns a list with decompressed data for each requested revision.
943 Returns a list with decompressed data for each requested revision.
930 """
944 """
931 if not revs:
945 if not revs:
932 return []
946 return []
933 start = self.start
947 start = self.start
934 length = self.length
948 length = self.length
935 inline = self.inline
949 inline = self.inline
936 iosize = self.index.entry_size
950 iosize = self.index.entry_size
937 buffer = util.buffer
951 buffer = util.buffer
938
952
939 fetched_revs = []
953 fetched_revs = []
940 fadd = fetched_revs.append
954 fadd = fetched_revs.append
941
955
942 chunks = []
956 chunks = []
943 ladd = chunks.append
957 ladd = chunks.append
944
958
945 if self._uncompressed_chunk_cache is None:
959 if self._uncompressed_chunk_cache is None:
946 fetched_revs = revs
960 fetched_revs = revs
947 else:
961 else:
948 for rev in revs:
962 for rev in revs:
949 cached_value = self._uncompressed_chunk_cache.get(rev)
963 cached_value = self._uncompressed_chunk_cache.get(rev)
950 if cached_value is None:
964 if cached_value is None:
951 fadd(rev)
965 fadd(rev)
952 else:
966 else:
953 ladd((rev, cached_value))
967 ladd((rev, cached_value))
954
968
955 if not fetched_revs:
969 if not fetched_revs:
956 slicedchunks = ()
970 slicedchunks = ()
957 elif not self.data_config.with_sparse_read:
971 elif not self.data_config.with_sparse_read:
958 slicedchunks = (fetched_revs,)
972 slicedchunks = (fetched_revs,)
959 else:
973 else:
960 slicedchunks = deltautil.slicechunk(
974 slicedchunks = deltautil.slicechunk(
961 self,
975 self,
962 fetched_revs,
976 fetched_revs,
963 targetsize=targetsize,
977 targetsize=targetsize,
964 )
978 )
965
979
966 for revschunk in slicedchunks:
980 for revschunk in slicedchunks:
967 firstrev = revschunk[0]
981 firstrev = revschunk[0]
968 # Skip trailing revisions with empty diff
982 # Skip trailing revisions with empty diff
969 for lastrev in revschunk[::-1]:
983 for lastrev in revschunk[::-1]:
970 if length(lastrev) != 0:
984 if length(lastrev) != 0:
971 break
985 break
972
986
973 try:
987 try:
974 offset, data = self.get_segment_for_revs(firstrev, lastrev)
988 offset, data = self.get_segment_for_revs(firstrev, lastrev)
975 except OverflowError:
989 except OverflowError:
976 # issue4215 - we can't cache a run of chunks greater than
990 # issue4215 - we can't cache a run of chunks greater than
977 # 2G on Windows
991 # 2G on Windows
978 for rev in revschunk:
992 for rev in revschunk:
979 ladd((rev, self._chunk(rev)))
993 ladd((rev, self._chunk(rev)))
980
994
981 decomp = self.decompress
995 decomp = self.decompress
982 # self._decompressor might be None, but will not be used in that case
996 # self._decompressor might be None, but will not be used in that case
983 def_decomp = self._decompressor
997 def_decomp = self._decompressor
984 for rev in revschunk:
998 for rev in revschunk:
985 chunkstart = start(rev)
999 chunkstart = start(rev)
986 if inline:
1000 if inline:
987 chunkstart += (rev + 1) * iosize
1001 chunkstart += (rev + 1) * iosize
988 chunklength = length(rev)
1002 chunklength = length(rev)
989 comp_mode = self.index[rev][10]
1003 comp_mode = self.index[rev][10]
990 c = buffer(data, chunkstart - offset, chunklength)
1004 c = buffer(data, chunkstart - offset, chunklength)
991 if comp_mode == COMP_MODE_PLAIN:
1005 if comp_mode == COMP_MODE_PLAIN:
992 c = c
1006 c = c
993 elif comp_mode == COMP_MODE_INLINE:
1007 elif comp_mode == COMP_MODE_INLINE:
994 c = decomp(c)
1008 c = decomp(c)
995 elif comp_mode == COMP_MODE_DEFAULT:
1009 elif comp_mode == COMP_MODE_DEFAULT:
996 c = def_decomp(c)
1010 c = def_decomp(c)
997 else:
1011 else:
998 msg = b'unknown compression mode %d'
1012 msg = b'unknown compression mode %d'
999 msg %= comp_mode
1013 msg %= comp_mode
1000 raise error.RevlogError(msg)
1014 raise error.RevlogError(msg)
1001 ladd((rev, c))
1015 ladd((rev, c))
1002 if self._uncompressed_chunk_cache is not None:
1016 if self._uncompressed_chunk_cache is not None:
1003 self._uncompressed_chunk_cache.insert(rev, c, len(c))
1017 self._uncompressed_chunk_cache.insert(rev, c, len(c))
1004
1018
1005 chunks.sort()
1019 chunks.sort()
1006 return [x[1] for x in chunks]
1020 return [x[1] for x in chunks]
1007
1021
1008 def raw_text(self, node, rev):
1022 def raw_text(self, node, rev):
1009 """return the possibly unvalidated rawtext for a revision
1023 """return the possibly unvalidated rawtext for a revision
1010
1024
1011 returns (rev, rawtext, validated)
1025 returns (rev, rawtext, validated)
1012 """
1026 """
1013
1027
1014 # revision in the cache (could be useful to apply delta)
1028 # revision in the cache (could be useful to apply delta)
1015 cachedrev = None
1029 cachedrev = None
1016 # An intermediate text to apply deltas to
1030 # An intermediate text to apply deltas to
1017 basetext = None
1031 basetext = None
1018
1032
1019 # Check if we have the entry in cache
1033 # Check if we have the entry in cache
1020 # The cache entry looks like (node, rev, rawtext)
1034 # The cache entry looks like (node, rev, rawtext)
1021 if self._revisioncache:
1035 if self._revisioncache:
1022 cachedrev = self._revisioncache[1]
1036 cachedrev = self._revisioncache[1]
1023
1037
1024 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1038 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1025 if stopped:
1039 if stopped:
1026 basetext = self._revisioncache[2]
1040 basetext = self._revisioncache[2]
1027
1041
1028 # drop cache to save memory, the caller is expected to
1042 # drop cache to save memory, the caller is expected to
1029 # update self._inner._revisioncache after validating the text
1043 # update self._inner._revisioncache after validating the text
1030 self._revisioncache = None
1044 self._revisioncache = None
1031
1045
1032 targetsize = None
1046 targetsize = None
1033 rawsize = self.index[rev][2]
1047 rawsize = self.index[rev][2]
1034 if 0 <= rawsize:
1048 if 0 <= rawsize:
1035 targetsize = 4 * rawsize
1049 targetsize = 4 * rawsize
1036
1050
1037 if self._uncompressed_chunk_cache is not None:
1051 if self._uncompressed_chunk_cache is not None:
1038 # dynamically update the uncompressed_chunk_cache size to the
1052 # dynamically update the uncompressed_chunk_cache size to the
1039 # largest revision we saw in this revlog.
1053 # largest revision we saw in this revlog.
1040 factor = self.data_config.uncompressed_cache_factor
1054 factor = self.data_config.uncompressed_cache_factor
1041 candidate_size = rawsize * factor
1055 candidate_size = rawsize * factor
1042 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1056 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1043 self._uncompressed_chunk_cache.maxcost = candidate_size
1057 self._uncompressed_chunk_cache.maxcost = candidate_size
1044
1058
1045 bins = self._chunks(chain, targetsize=targetsize)
1059 bins = self._chunks(chain, targetsize=targetsize)
1046 if basetext is None:
1060 if basetext is None:
1047 basetext = bytes(bins[0])
1061 basetext = bytes(bins[0])
1048 bins = bins[1:]
1062 bins = bins[1:]
1049
1063
1050 rawtext = mdiff.patches(basetext, bins)
1064 rawtext = mdiff.patches(basetext, bins)
1051 del basetext # let us have a chance to free memory early
1065 del basetext # let us have a chance to free memory early
1052 return (rev, rawtext, False)
1066 return (rev, rawtext, False)
1053
1067
1054 def sidedata(self, rev, sidedata_end):
1068 def sidedata(self, rev, sidedata_end):
1055 """Return the sidedata for a given revision number."""
1069 """Return the sidedata for a given revision number."""
1056 index_entry = self.index[rev]
1070 index_entry = self.index[rev]
1057 sidedata_offset = index_entry[8]
1071 sidedata_offset = index_entry[8]
1058 sidedata_size = index_entry[9]
1072 sidedata_size = index_entry[9]
1059
1073
1060 if self.inline:
1074 if self.inline:
1061 sidedata_offset += self.index.entry_size * (1 + rev)
1075 sidedata_offset += self.index.entry_size * (1 + rev)
1062 if sidedata_size == 0:
1076 if sidedata_size == 0:
1063 return {}
1077 return {}
1064
1078
1065 if sidedata_end < sidedata_offset + sidedata_size:
1079 if sidedata_end < sidedata_offset + sidedata_size:
1066 filename = self.sidedata_file
1080 filename = self.sidedata_file
1067 end = sidedata_end
1081 end = sidedata_end
1068 offset = sidedata_offset
1082 offset = sidedata_offset
1069 length = sidedata_size
1083 length = sidedata_size
1070 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1084 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1071 raise error.RevlogError(m)
1085 raise error.RevlogError(m)
1072
1086
1073 comp_segment = self._segmentfile_sidedata.read_chunk(
1087 comp_segment = self._segmentfile_sidedata.read_chunk(
1074 sidedata_offset, sidedata_size
1088 sidedata_offset, sidedata_size
1075 )
1089 )
1076
1090
1077 comp = self.index[rev][11]
1091 comp = self.index[rev][11]
1078 if comp == COMP_MODE_PLAIN:
1092 if comp == COMP_MODE_PLAIN:
1079 segment = comp_segment
1093 segment = comp_segment
1080 elif comp == COMP_MODE_DEFAULT:
1094 elif comp == COMP_MODE_DEFAULT:
1081 segment = self._decompressor(comp_segment)
1095 segment = self._decompressor(comp_segment)
1082 elif comp == COMP_MODE_INLINE:
1096 elif comp == COMP_MODE_INLINE:
1083 segment = self.decompress(comp_segment)
1097 segment = self.decompress(comp_segment)
1084 else:
1098 else:
1085 msg = b'unknown compression mode %d'
1099 msg = b'unknown compression mode %d'
1086 msg %= comp
1100 msg %= comp
1087 raise error.RevlogError(msg)
1101 raise error.RevlogError(msg)
1088
1102
1089 sidedata = sidedatautil.deserialize_sidedata(segment)
1103 sidedata = sidedatautil.deserialize_sidedata(segment)
1090 return sidedata
1104 return sidedata
1091
1105
1092 def write_entry(
1106 def write_entry(
1093 self,
1107 self,
1094 transaction,
1108 transaction,
1095 entry,
1109 entry,
1096 data,
1110 data,
1097 link,
1111 link,
1098 offset,
1112 offset,
1099 sidedata,
1113 sidedata,
1100 sidedata_offset,
1114 sidedata_offset,
1101 index_end,
1115 index_end,
1102 data_end,
1116 data_end,
1103 sidedata_end,
1117 sidedata_end,
1104 ):
1118 ):
1105 # Files opened in a+ mode have inconsistent behavior on various
1119 # Files opened in a+ mode have inconsistent behavior on various
1106 # platforms. Windows requires that a file positioning call be made
1120 # platforms. Windows requires that a file positioning call be made
1107 # when the file handle transitions between reads and writes. See
1121 # when the file handle transitions between reads and writes. See
1108 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1122 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1109 # platforms, Python or the platform itself can be buggy. Some versions
1123 # platforms, Python or the platform itself can be buggy. Some versions
1110 # of Solaris have been observed to not append at the end of the file
1124 # of Solaris have been observed to not append at the end of the file
1111 # if the file was seeked to before the end. See issue4943 for more.
1125 # if the file was seeked to before the end. See issue4943 for more.
1112 #
1126 #
1113 # We work around this issue by inserting a seek() before writing.
1127 # We work around this issue by inserting a seek() before writing.
1114 # Note: This is likely not necessary on Python 3. However, because
1128 # Note: This is likely not necessary on Python 3. However, because
1115 # the file handle is reused for reads and may be seeked there, we need
1129 # the file handle is reused for reads and may be seeked there, we need
1116 # to be careful before changing this.
1130 # to be careful before changing this.
1117 if self._writinghandles is None:
1131 if self._writinghandles is None:
1118 msg = b'adding revision outside `revlog._writing` context'
1132 msg = b'adding revision outside `revlog._writing` context'
1119 raise error.ProgrammingError(msg)
1133 raise error.ProgrammingError(msg)
1120 ifh, dfh, sdfh = self._writinghandles
1134 ifh, dfh, sdfh = self._writinghandles
1121 if index_end is None:
1135 if index_end is None:
1122 ifh.seek(0, os.SEEK_END)
1136 ifh.seek(0, os.SEEK_END)
1123 else:
1137 else:
1124 ifh.seek(index_end, os.SEEK_SET)
1138 ifh.seek(index_end, os.SEEK_SET)
1125 if dfh:
1139 if dfh:
1126 if data_end is None:
1140 if data_end is None:
1127 dfh.seek(0, os.SEEK_END)
1141 dfh.seek(0, os.SEEK_END)
1128 else:
1142 else:
1129 dfh.seek(data_end, os.SEEK_SET)
1143 dfh.seek(data_end, os.SEEK_SET)
1130 if sdfh:
1144 if sdfh:
1131 sdfh.seek(sidedata_end, os.SEEK_SET)
1145 sdfh.seek(sidedata_end, os.SEEK_SET)
1132
1146
1133 curr = len(self.index) - 1
1147 curr = len(self.index) - 1
1134 if not self.inline:
1148 if not self.inline:
1135 transaction.add(self.data_file, offset)
1149 transaction.add(self.data_file, offset)
1136 if self.sidedata_file:
1150 if self.sidedata_file:
1137 transaction.add(self.sidedata_file, sidedata_offset)
1151 transaction.add(self.sidedata_file, sidedata_offset)
1138 transaction.add(self.canonical_index_file, curr * len(entry))
1152 transaction.add(self.canonical_index_file, curr * len(entry))
1139 if data[0]:
1153 if data[0]:
1140 dfh.write(data[0])
1154 dfh.write(data[0])
1141 dfh.write(data[1])
1155 dfh.write(data[1])
1142 if sidedata:
1156 if sidedata:
1143 sdfh.write(sidedata)
1157 sdfh.write(sidedata)
1144 if self._delay_buffer is None:
1158 if self._delay_buffer is None:
1145 ifh.write(entry)
1159 ifh.write(entry)
1146 else:
1160 else:
1147 self._delay_buffer.append(entry)
1161 self._delay_buffer.append(entry)
1148 elif self._delay_buffer is not None:
1162 elif self._delay_buffer is not None:
1149 msg = b'invalid delayed write on inline revlog'
1163 msg = b'invalid delayed write on inline revlog'
1150 raise error.ProgrammingError(msg)
1164 raise error.ProgrammingError(msg)
1151 else:
1165 else:
1152 offset += curr * self.index.entry_size
1166 offset += curr * self.index.entry_size
1153 transaction.add(self.canonical_index_file, offset)
1167 transaction.add(self.canonical_index_file, offset)
1154 assert not sidedata
1168 assert not sidedata
1155 ifh.write(entry)
1169 ifh.write(entry)
1156 ifh.write(data[0])
1170 ifh.write(data[0])
1157 ifh.write(data[1])
1171 ifh.write(data[1])
1158 return (
1172 return (
1159 ifh.tell(),
1173 ifh.tell(),
1160 dfh.tell() if dfh else None,
1174 dfh.tell() if dfh else None,
1161 sdfh.tell() if sdfh else None,
1175 sdfh.tell() if sdfh else None,
1162 )
1176 )
1163
1177
1164 def _divert_index(self):
1178 def _divert_index(self):
1165 index_file = self.index_file
1179 index_file = self.index_file
1166 # when we encounter a legacy inline-changelog, split it. However it is
1180 # when we encounter a legacy inline-changelog, split it. However it is
1167 # important to use the expected filename for pending content
1181 # important to use the expected filename for pending content
1168 # (<radix>.a) otherwise hooks won't be seeing the content of the
1182 # (<radix>.a) otherwise hooks won't be seeing the content of the
1169 # pending transaction.
1183 # pending transaction.
1170 if index_file.endswith(b'.s'):
1184 if index_file.endswith(b'.s'):
1171 index_file = self.index_file[:-2]
1185 index_file = self.index_file[:-2]
1172 return index_file + b'.a'
1186 return index_file + b'.a'
1173
1187
1174 def delay(self):
1188 def delay(self):
1175 assert not self.is_open
1189 assert not self.is_open
1176 if self.inline:
1190 if self.inline:
1177 msg = "revlog with delayed write should not be inline"
1191 msg = "revlog with delayed write should not be inline"
1178 raise error.ProgrammingError(msg)
1192 raise error.ProgrammingError(msg)
1179 if self._delay_buffer is not None or self._orig_index_file is not None:
1193 if self._delay_buffer is not None or self._orig_index_file is not None:
1180 # delay or divert already in place
1194 # delay or divert already in place
1181 return None
1195 return None
1182 elif len(self.index) == 0:
1196 elif len(self.index) == 0:
1183 self._orig_index_file = self.index_file
1197 self._orig_index_file = self.index_file
1184 self.index_file = self._divert_index()
1198 self.index_file = self._divert_index()
1185 assert self._orig_index_file is not None
1199 assert self._orig_index_file is not None
1186 assert self.index_file is not None
1200 assert self.index_file is not None
1187 if self.opener.exists(self.index_file):
1201 if self.opener.exists(self.index_file):
1188 self.opener.unlink(self.index_file)
1202 self.opener.unlink(self.index_file)
1189 return self.index_file
1203 return self.index_file
1190 else:
1204 else:
1191 self._delay_buffer = []
1205 self._delay_buffer = []
1192 return None
1206 return None
1193
1207
1194 def write_pending(self):
1208 def write_pending(self):
1195 assert not self.is_open
1209 assert not self.is_open
1196 if self.inline:
1210 if self.inline:
1197 msg = "revlog with delayed write should not be inline"
1211 msg = "revlog with delayed write should not be inline"
1198 raise error.ProgrammingError(msg)
1212 raise error.ProgrammingError(msg)
1199 if self._orig_index_file is not None:
1213 if self._orig_index_file is not None:
1200 return None, True
1214 return None, True
1201 any_pending = False
1215 any_pending = False
1202 pending_index_file = self._divert_index()
1216 pending_index_file = self._divert_index()
1203 if self.opener.exists(pending_index_file):
1217 if self.opener.exists(pending_index_file):
1204 self.opener.unlink(pending_index_file)
1218 self.opener.unlink(pending_index_file)
1205 util.copyfile(
1219 util.copyfile(
1206 self.opener.join(self.index_file),
1220 self.opener.join(self.index_file),
1207 self.opener.join(pending_index_file),
1221 self.opener.join(pending_index_file),
1208 )
1222 )
1209 if self._delay_buffer:
1223 if self._delay_buffer:
1210 with self.opener(pending_index_file, b'r+') as ifh:
1224 with self.opener(pending_index_file, b'r+') as ifh:
1211 ifh.seek(0, os.SEEK_END)
1225 ifh.seek(0, os.SEEK_END)
1212 ifh.write(b"".join(self._delay_buffer))
1226 ifh.write(b"".join(self._delay_buffer))
1213 any_pending = True
1227 any_pending = True
1214 self._delay_buffer = None
1228 self._delay_buffer = None
1215 self._orig_index_file = self.index_file
1229 self._orig_index_file = self.index_file
1216 self.index_file = pending_index_file
1230 self.index_file = pending_index_file
1217 return self.index_file, any_pending
1231 return self.index_file, any_pending
1218
1232
1219 def finalize_pending(self):
1233 def finalize_pending(self):
1220 assert not self.is_open
1234 assert not self.is_open
1221 if self.inline:
1235 if self.inline:
1222 msg = "revlog with delayed write should not be inline"
1236 msg = "revlog with delayed write should not be inline"
1223 raise error.ProgrammingError(msg)
1237 raise error.ProgrammingError(msg)
1224
1238
1225 delay = self._delay_buffer is not None
1239 delay = self._delay_buffer is not None
1226 divert = self._orig_index_file is not None
1240 divert = self._orig_index_file is not None
1227
1241
1228 if delay and divert:
1242 if delay and divert:
1229 assert False, "unreachable"
1243 assert False, "unreachable"
1230 elif delay:
1244 elif delay:
1231 if self._delay_buffer:
1245 if self._delay_buffer:
1232 with self.opener(self.index_file, b'r+') as ifh:
1246 with self.opener(self.index_file, b'r+') as ifh:
1233 ifh.seek(0, os.SEEK_END)
1247 ifh.seek(0, os.SEEK_END)
1234 ifh.write(b"".join(self._delay_buffer))
1248 ifh.write(b"".join(self._delay_buffer))
1235 self._delay_buffer = None
1249 self._delay_buffer = None
1236 elif divert:
1250 elif divert:
1237 if self.opener.exists(self.index_file):
1251 if self.opener.exists(self.index_file):
1238 self.opener.rename(
1252 self.opener.rename(
1239 self.index_file,
1253 self.index_file,
1240 self._orig_index_file,
1254 self._orig_index_file,
1241 checkambig=True,
1255 checkambig=True,
1242 )
1256 )
1243 self.index_file = self._orig_index_file
1257 self.index_file = self._orig_index_file
1244 self._orig_index_file = None
1258 self._orig_index_file = None
1245 else:
1259 else:
1246 msg = b"not delay or divert found on this revlog"
1260 msg = b"not delay or divert found on this revlog"
1247 raise error.ProgrammingError(msg)
1261 raise error.ProgrammingError(msg)
1248 return self.canonical_index_file
1262 return self.canonical_index_file
1249
1263
1250
1264
1251 class revlog:
1265 class revlog:
1252 """
1266 """
1253 the underlying revision storage object
1267 the underlying revision storage object
1254
1268
1255 A revlog consists of two parts, an index and the revision data.
1269 A revlog consists of two parts, an index and the revision data.
1256
1270
1257 The index is a file with a fixed record size containing
1271 The index is a file with a fixed record size containing
1258 information on each revision, including its nodeid (hash), the
1272 information on each revision, including its nodeid (hash), the
1259 nodeids of its parents, the position and offset of its data within
1273 nodeids of its parents, the position and offset of its data within
1260 the data file, and the revision it's based on. Finally, each entry
1274 the data file, and the revision it's based on. Finally, each entry
1261 contains a linkrev entry that can serve as a pointer to external
1275 contains a linkrev entry that can serve as a pointer to external
1262 data.
1276 data.
1263
1277
1264 The revision data itself is a linear collection of data chunks.
1278 The revision data itself is a linear collection of data chunks.
1265 Each chunk represents a revision and is usually represented as a
1279 Each chunk represents a revision and is usually represented as a
1266 delta against the previous chunk. To bound lookup time, runs of
1280 delta against the previous chunk. To bound lookup time, runs of
1267 deltas are limited to about 2 times the length of the original
1281 deltas are limited to about 2 times the length of the original
1268 version data. This makes retrieval of a version proportional to
1282 version data. This makes retrieval of a version proportional to
1269 its size, or O(1) relative to the number of revisions.
1283 its size, or O(1) relative to the number of revisions.
1270
1284
1271 Both pieces of the revlog are written to in an append-only
1285 Both pieces of the revlog are written to in an append-only
1272 fashion, which means we never need to rewrite a file to insert or
1286 fashion, which means we never need to rewrite a file to insert or
1273 remove data, and can use some simple techniques to avoid the need
1287 remove data, and can use some simple techniques to avoid the need
1274 for locking while reading.
1288 for locking while reading.
1275
1289
1276 If checkambig, indexfile is opened with checkambig=True at
1290 If checkambig, indexfile is opened with checkambig=True at
1277 writing, to avoid file stat ambiguity.
1291 writing, to avoid file stat ambiguity.
1278
1292
1279 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1293 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1280 index will be mmapped rather than read if it is larger than the
1294 index will be mmapped rather than read if it is larger than the
1281 configured threshold.
1295 configured threshold.
1282
1296
1283 If censorable is True, the revlog can have censored revisions.
1297 If censorable is True, the revlog can have censored revisions.
1284
1298
1285 If `upperboundcomp` is not None, this is the expected maximal gain from
1299 If `upperboundcomp` is not None, this is the expected maximal gain from
1286 compression for the data content.
1300 compression for the data content.
1287
1301
1288 `concurrencychecker` is an optional function that receives 3 arguments: a
1302 `concurrencychecker` is an optional function that receives 3 arguments: a
1289 file handle, a filename, and an expected position. It should check whether
1303 file handle, a filename, and an expected position. It should check whether
1290 the current position in the file handle is valid, and log/warn/fail (by
1304 the current position in the file handle is valid, and log/warn/fail (by
1291 raising).
1305 raising).
1292
1306
1293 See mercurial/revlogutils/contants.py for details about the content of an
1307 See mercurial/revlogutils/contants.py for details about the content of an
1294 index entry.
1308 index entry.
1295 """
1309 """
1296
1310
1297 _flagserrorclass = error.RevlogError
1311 _flagserrorclass = error.RevlogError
1298
1312
1299 opener: vfsmod.vfs
1313 opener: vfsmod.vfs
1300
1314
1301 @staticmethod
1315 @staticmethod
1302 def is_inline_index(header_bytes):
1316 def is_inline_index(header_bytes):
1303 """Determine if a revlog is inline from the initial bytes of the index"""
1317 """Determine if a revlog is inline from the initial bytes of the index"""
1304 if len(header_bytes) == 0:
1318 if len(header_bytes) == 0:
1305 return True
1319 return True
1306
1320
1307 header = INDEX_HEADER.unpack(header_bytes)[0]
1321 header = INDEX_HEADER.unpack(header_bytes)[0]
1308
1322
1309 _format_flags = header & ~0xFFFF
1323 _format_flags = header & ~0xFFFF
1310 _format_version = header & 0xFFFF
1324 _format_version = header & 0xFFFF
1311
1325
1312 features = FEATURES_BY_VERSION[_format_version]
1326 features = FEATURES_BY_VERSION[_format_version]
1313 return features[b'inline'](_format_flags)
1327 return features[b'inline'](_format_flags)
1314
1328
1315 _docket_file: Optional[bytes]
1329 _docket_file: Optional[bytes]
1316
1330
1317 def __init__(
1331 def __init__(
1318 self,
1332 self,
1319 opener: vfsmod.vfs,
1333 opener: vfsmod.vfs,
1320 target,
1334 target,
1321 radix,
1335 radix,
1322 postfix=None, # only exist for `tmpcensored` now
1336 postfix=None, # only exist for `tmpcensored` now
1323 checkambig=False,
1337 checkambig=False,
1324 mmaplargeindex=False,
1338 mmaplargeindex=False,
1325 censorable=False,
1339 censorable=False,
1326 upperboundcomp=None,
1340 upperboundcomp=None,
1327 persistentnodemap=False,
1341 persistentnodemap=False,
1328 concurrencychecker=None,
1342 concurrencychecker=None,
1329 trypending=False,
1343 trypending=False,
1330 try_split=False,
1344 try_split=False,
1331 canonical_parent_order=True,
1345 canonical_parent_order=True,
1332 data_config=None,
1346 data_config=None,
1333 delta_config=None,
1347 delta_config=None,
1334 feature_config=None,
1348 feature_config=None,
1335 may_inline=True, # may inline new revlog
1349 may_inline=True, # may inline new revlog
1336 ):
1350 ):
1337 """
1351 """
1338 create a revlog object
1352 create a revlog object
1339
1353
1340 opener is a function that abstracts the file opening operation
1354 opener is a function that abstracts the file opening operation
1341 and can be used to implement COW semantics or the like.
1355 and can be used to implement COW semantics or the like.
1342
1356
1343 `target`: a (KIND, ID) tuple that identify the content stored in
1357 `target`: a (KIND, ID) tuple that identify the content stored in
1344 this revlog. It help the rest of the code to understand what the revlog
1358 this revlog. It help the rest of the code to understand what the revlog
1345 is about without having to resort to heuristic and index filename
1359 is about without having to resort to heuristic and index filename
1346 analysis. Note: that this must be reliably be set by normal code, but
1360 analysis. Note: that this must be reliably be set by normal code, but
1347 that test, debug, or performance measurement code might not set this to
1361 that test, debug, or performance measurement code might not set this to
1348 accurate value.
1362 accurate value.
1349 """
1363 """
1350
1364
1351 self.radix = radix
1365 self.radix = radix
1352
1366
1353 self._docket_file = None
1367 self._docket_file = None
1354 self._indexfile = None
1368 self._indexfile = None
1355 self._datafile = None
1369 self._datafile = None
1356 self._sidedatafile = None
1370 self._sidedatafile = None
1357 self._nodemap_file = None
1371 self._nodemap_file = None
1358 self.postfix = postfix
1372 self.postfix = postfix
1359 self._trypending = trypending
1373 self._trypending = trypending
1360 self._try_split = try_split
1374 self._try_split = try_split
1361 self._may_inline = may_inline
1375 self._may_inline = may_inline
1362 self.opener = opener
1376 self.opener = opener
1363 if persistentnodemap:
1377 if persistentnodemap:
1364 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1378 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1365
1379
1366 assert target[0] in ALL_KINDS
1380 assert target[0] in ALL_KINDS
1367 assert len(target) == 2
1381 assert len(target) == 2
1368 self.target = target
1382 self.target = target
1369 if feature_config is not None:
1383 if feature_config is not None:
1370 self.feature_config = feature_config.copy()
1384 self.feature_config = feature_config.copy()
1371 elif b'feature-config' in self.opener.options:
1385 elif b'feature-config' in self.opener.options:
1372 self.feature_config = self.opener.options[b'feature-config'].copy()
1386 self.feature_config = self.opener.options[b'feature-config'].copy()
1373 else:
1387 else:
1374 self.feature_config = FeatureConfig()
1388 self.feature_config = FeatureConfig()
1375 self.feature_config.censorable = censorable
1389 self.feature_config.censorable = censorable
1376 self.feature_config.canonical_parent_order = canonical_parent_order
1390 self.feature_config.canonical_parent_order = canonical_parent_order
1377 if data_config is not None:
1391 if data_config is not None:
1378 self.data_config = data_config.copy()
1392 self.data_config = data_config.copy()
1379 elif b'data-config' in self.opener.options:
1393 elif b'data-config' in self.opener.options:
1380 self.data_config = self.opener.options[b'data-config'].copy()
1394 self.data_config = self.opener.options[b'data-config'].copy()
1381 else:
1395 else:
1382 self.data_config = DataConfig()
1396 self.data_config = DataConfig()
1383 self.data_config.check_ambig = checkambig
1397 self.data_config.check_ambig = checkambig
1384 self.data_config.mmap_large_index = mmaplargeindex
1398 self.data_config.mmap_large_index = mmaplargeindex
1385 if delta_config is not None:
1399 if delta_config is not None:
1386 self.delta_config = delta_config.copy()
1400 self.delta_config = delta_config.copy()
1387 elif b'delta-config' in self.opener.options:
1401 elif b'delta-config' in self.opener.options:
1388 self.delta_config = self.opener.options[b'delta-config'].copy()
1402 self.delta_config = self.opener.options[b'delta-config'].copy()
1389 else:
1403 else:
1390 self.delta_config = DeltaConfig()
1404 self.delta_config = DeltaConfig()
1391 self.delta_config.upper_bound_comp = upperboundcomp
1405 self.delta_config.upper_bound_comp = upperboundcomp
1392
1406
1393 # Maps rev to chain base rev.
1407 # Maps rev to chain base rev.
1394 self._chainbasecache = util.lrucachedict(100)
1408 self._chainbasecache = util.lrucachedict(100)
1395
1409
1396 self.index = None
1410 self.index = None
1397 self._docket = None
1411 self._docket = None
1398 self._nodemap_docket = None
1412 self._nodemap_docket = None
1399 # Mapping of partial identifiers to full nodes.
1413 # Mapping of partial identifiers to full nodes.
1400 self._pcache = {}
1414 self._pcache = {}
1401
1415
1402 # other optionnals features
1416 # other optionnals features
1403
1417
1404 # Make copy of flag processors so each revlog instance can support
1418 # Make copy of flag processors so each revlog instance can support
1405 # custom flags.
1419 # custom flags.
1406 self._flagprocessors = dict(flagutil.flagprocessors)
1420 self._flagprocessors = dict(flagutil.flagprocessors)
1407 # prevent nesting of addgroup
1421 # prevent nesting of addgroup
1408 self._adding_group = None
1422 self._adding_group = None
1409
1423
1410 chunk_cache = self._loadindex()
1424 chunk_cache = self._loadindex()
1411 self._load_inner(chunk_cache)
1425 self._load_inner(chunk_cache)
1412 self._concurrencychecker = concurrencychecker
1426 self._concurrencychecker = concurrencychecker
1413
1427
1414 def _init_opts(self):
1428 def _init_opts(self):
1415 """process options (from above/config) to setup associated default revlog mode
1429 """process options (from above/config) to setup associated default revlog mode
1416
1430
1417 These values might be affected when actually reading on disk information.
1431 These values might be affected when actually reading on disk information.
1418
1432
1419 The relevant values are returned for use in _loadindex().
1433 The relevant values are returned for use in _loadindex().
1420
1434
1421 * newversionflags:
1435 * newversionflags:
1422 version header to use if we need to create a new revlog
1436 version header to use if we need to create a new revlog
1423
1437
1424 * mmapindexthreshold:
1438 * mmapindexthreshold:
1425 minimal index size for start to use mmap
1439 minimal index size for start to use mmap
1426
1440
1427 * force_nodemap:
1441 * force_nodemap:
1428 force the usage of a "development" version of the nodemap code
1442 force the usage of a "development" version of the nodemap code
1429 """
1443 """
1430 opts = self.opener.options
1444 opts = self.opener.options
1431
1445
1432 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1446 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1433 new_header = CHANGELOGV2
1447 new_header = CHANGELOGV2
1434 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1448 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1435 self.feature_config.compute_rank = compute_rank
1449 self.feature_config.compute_rank = compute_rank
1436 elif b'revlogv2' in opts:
1450 elif b'revlogv2' in opts:
1437 new_header = REVLOGV2
1451 new_header = REVLOGV2
1438 elif b'revlogv1' in opts:
1452 elif b'revlogv1' in opts:
1439 new_header = REVLOGV1
1453 new_header = REVLOGV1
1440 if self._may_inline:
1454 if self._may_inline:
1441 new_header |= FLAG_INLINE_DATA
1455 new_header |= FLAG_INLINE_DATA
1442 if b'generaldelta' in opts:
1456 if b'generaldelta' in opts:
1443 new_header |= FLAG_GENERALDELTA
1457 new_header |= FLAG_GENERALDELTA
1444 elif b'revlogv0' in self.opener.options:
1458 elif b'revlogv0' in self.opener.options:
1445 new_header = REVLOGV0
1459 new_header = REVLOGV0
1446 else:
1460 else:
1447 new_header = REVLOG_DEFAULT_VERSION
1461 new_header = REVLOG_DEFAULT_VERSION
1448
1462
1449 mmapindexthreshold = None
1463 mmapindexthreshold = None
1450 if self.data_config.mmap_large_index:
1464 if self.data_config.mmap_large_index:
1451 mmapindexthreshold = self.data_config.mmap_index_threshold
1465 mmapindexthreshold = self.data_config.mmap_index_threshold
1452 if self.feature_config.enable_ellipsis:
1466 if self.feature_config.enable_ellipsis:
1453 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1467 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1454
1468
1455 # revlog v0 doesn't have flag processors
1469 # revlog v0 doesn't have flag processors
1456 for flag, processor in opts.get(b'flagprocessors', {}).items():
1470 for flag, processor in opts.get(b'flagprocessors', {}).items():
1457 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1471 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1458
1472
1459 chunk_cache_size = self.data_config.chunk_cache_size
1473 chunk_cache_size = self.data_config.chunk_cache_size
1460 if chunk_cache_size <= 0:
1474 if chunk_cache_size <= 0:
1461 raise error.RevlogError(
1475 raise error.RevlogError(
1462 _(b'revlog chunk cache size %r is not greater than 0')
1476 _(b'revlog chunk cache size %r is not greater than 0')
1463 % chunk_cache_size
1477 % chunk_cache_size
1464 )
1478 )
1465 elif chunk_cache_size & (chunk_cache_size - 1):
1479 elif chunk_cache_size & (chunk_cache_size - 1):
1466 raise error.RevlogError(
1480 raise error.RevlogError(
1467 _(b'revlog chunk cache size %r is not a power of 2')
1481 _(b'revlog chunk cache size %r is not a power of 2')
1468 % chunk_cache_size
1482 % chunk_cache_size
1469 )
1483 )
1470 force_nodemap = opts.get(b'devel-force-nodemap', False)
1484 force_nodemap = opts.get(b'devel-force-nodemap', False)
1471 return new_header, mmapindexthreshold, force_nodemap
1485 return new_header, mmapindexthreshold, force_nodemap
1472
1486
1473 def _get_data(self, filepath, mmap_threshold, size=None):
1487 def _get_data(self, filepath, mmap_threshold, size=None):
1474 """return a file content with or without mmap
1488 """return a file content with or without mmap
1475
1489
1476 If the file is missing return the empty string"""
1490 If the file is missing return the empty string"""
1477 try:
1491 try:
1478 with self.opener(filepath) as fp:
1492 with self.opener(filepath) as fp:
1479 if mmap_threshold is not None:
1493 if mmap_threshold is not None:
1480 file_size = self.opener.fstat(fp).st_size
1494 file_size = self.opener.fstat(fp).st_size
1481 if (
1495 if (
1482 file_size >= mmap_threshold
1496 file_size >= mmap_threshold
1483 and self.opener.is_mmap_safe(filepath)
1497 and self.opener.is_mmap_safe(filepath)
1484 ):
1498 ):
1485 if size is not None:
1499 if size is not None:
1486 # avoid potentiel mmap crash
1500 # avoid potentiel mmap crash
1487 size = min(file_size, size)
1501 size = min(file_size, size)
1488 # TODO: should .close() to release resources without
1502 # TODO: should .close() to release resources without
1489 # relying on Python GC
1503 # relying on Python GC
1490 if size is None:
1504 if size is None:
1491 return util.buffer(util.mmapread(fp))
1505 return util.buffer(util.mmapread(fp))
1492 else:
1506 else:
1493 return util.buffer(util.mmapread(fp, size))
1507 return util.buffer(util.mmapread(fp, size))
1494 if size is None:
1508 if size is None:
1495 return fp.read()
1509 return fp.read()
1496 else:
1510 else:
1497 return fp.read(size)
1511 return fp.read(size)
1498 except FileNotFoundError:
1512 except FileNotFoundError:
1499 return b''
1513 return b''
1500
1514
1501 def get_streams(self, max_linkrev, force_inline=False):
1515 def get_streams(self, max_linkrev, force_inline=False):
1502 """return a list of streams that represent this revlog
1516 """return a list of streams that represent this revlog
1503
1517
1504 This is used by stream-clone to do bytes to bytes copies of a repository.
1518 This is used by stream-clone to do bytes to bytes copies of a repository.
1505
1519
1506 This streams data for all revisions that refer to a changelog revision up
1520 This streams data for all revisions that refer to a changelog revision up
1507 to `max_linkrev`.
1521 to `max_linkrev`.
1508
1522
1509 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1523 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1510
1524
1511 It returns is a list of three-tuple:
1525 It returns is a list of three-tuple:
1512
1526
1513 [
1527 [
1514 (filename, bytes_stream, stream_size),
1528 (filename, bytes_stream, stream_size),
1515 …
1529 …
1516 ]
1530 ]
1517 """
1531 """
1518 n = len(self)
1532 n = len(self)
1519 index = self.index
1533 index = self.index
1520 while n > 0:
1534 while n > 0:
1521 linkrev = index[n - 1][4]
1535 linkrev = index[n - 1][4]
1522 if linkrev < max_linkrev:
1536 if linkrev < max_linkrev:
1523 break
1537 break
1524 # note: this loop will rarely go through multiple iterations, since
1538 # note: this loop will rarely go through multiple iterations, since
1525 # it only traverses commits created during the current streaming
1539 # it only traverses commits created during the current streaming
1526 # pull operation.
1540 # pull operation.
1527 #
1541 #
1528 # If this become a problem, using a binary search should cap the
1542 # If this become a problem, using a binary search should cap the
1529 # runtime of this.
1543 # runtime of this.
1530 n = n - 1
1544 n = n - 1
1531 if n == 0:
1545 if n == 0:
1532 # no data to send
1546 # no data to send
1533 return []
1547 return []
1534 index_size = n * index.entry_size
1548 index_size = n * index.entry_size
1535 data_size = self.end(n - 1)
1549 data_size = self.end(n - 1)
1536
1550
1537 # XXX we might have been split (or stripped) since the object
1551 # XXX we might have been split (or stripped) since the object
1538 # initialization, We need to close this race too, but having a way to
1552 # initialization, We need to close this race too, but having a way to
1539 # pre-open the file we feed to the revlog and never closing them before
1553 # pre-open the file we feed to the revlog and never closing them before
1540 # we are done streaming.
1554 # we are done streaming.
1541
1555
1542 if self._inline:
1556 if self._inline:
1543
1557
1544 def get_stream():
1558 def get_stream():
1545 with self.opener(self._indexfile, mode=b"r") as fp:
1559 with self.opener(self._indexfile, mode=b"r") as fp:
1546 yield None
1560 yield None
1547 size = index_size + data_size
1561 size = index_size + data_size
1548 if size <= 65536:
1562 if size <= 65536:
1549 yield fp.read(size)
1563 yield fp.read(size)
1550 else:
1564 else:
1551 yield from util.filechunkiter(fp, limit=size)
1565 yield from util.filechunkiter(fp, limit=size)
1552
1566
1553 inline_stream = get_stream()
1567 inline_stream = get_stream()
1554 next(inline_stream)
1568 next(inline_stream)
1555 return [
1569 return [
1556 (self._indexfile, inline_stream, index_size + data_size),
1570 (self._indexfile, inline_stream, index_size + data_size),
1557 ]
1571 ]
1558 elif force_inline:
1572 elif force_inline:
1559
1573
1560 def get_stream():
1574 def get_stream():
1561 with self.reading():
1575 with self.reading():
1562 yield None
1576 yield None
1563
1577
1564 for rev in range(n):
1578 for rev in range(n):
1565 idx = self.index.entry_binary(rev)
1579 idx = self.index.entry_binary(rev)
1566 if rev == 0 and self._docket is None:
1580 if rev == 0 and self._docket is None:
1567 # re-inject the inline flag
1581 # re-inject the inline flag
1568 header = self._format_flags
1582 header = self._format_flags
1569 header |= self._format_version
1583 header |= self._format_version
1570 header |= FLAG_INLINE_DATA
1584 header |= FLAG_INLINE_DATA
1571 header = self.index.pack_header(header)
1585 header = self.index.pack_header(header)
1572 idx = header + idx
1586 idx = header + idx
1573 yield idx
1587 yield idx
1574 yield self._inner.get_segment_for_revs(rev, rev)[1]
1588 yield self._inner.get_segment_for_revs(rev, rev)[1]
1575
1589
1576 inline_stream = get_stream()
1590 inline_stream = get_stream()
1577 next(inline_stream)
1591 next(inline_stream)
1578 return [
1592 return [
1579 (self._indexfile, inline_stream, index_size + data_size),
1593 (self._indexfile, inline_stream, index_size + data_size),
1580 ]
1594 ]
1581 else:
1595 else:
1582
1596
1583 def get_index_stream():
1597 def get_index_stream():
1584 with self.opener(self._indexfile, mode=b"r") as fp:
1598 with self.opener(self._indexfile, mode=b"r") as fp:
1585 yield None
1599 yield None
1586 if index_size <= 65536:
1600 if index_size <= 65536:
1587 yield fp.read(index_size)
1601 yield fp.read(index_size)
1588 else:
1602 else:
1589 yield from util.filechunkiter(fp, limit=index_size)
1603 yield from util.filechunkiter(fp, limit=index_size)
1590
1604
1591 def get_data_stream():
1605 def get_data_stream():
1592 with self._datafp() as fp:
1606 with self._datafp() as fp:
1593 yield None
1607 yield None
1594 if data_size <= 65536:
1608 if data_size <= 65536:
1595 yield fp.read(data_size)
1609 yield fp.read(data_size)
1596 else:
1610 else:
1597 yield from util.filechunkiter(fp, limit=data_size)
1611 yield from util.filechunkiter(fp, limit=data_size)
1598
1612
1599 index_stream = get_index_stream()
1613 index_stream = get_index_stream()
1600 next(index_stream)
1614 next(index_stream)
1601 data_stream = get_data_stream()
1615 data_stream = get_data_stream()
1602 next(data_stream)
1616 next(data_stream)
1603 return [
1617 return [
1604 (self._datafile, data_stream, data_size),
1618 (self._datafile, data_stream, data_size),
1605 (self._indexfile, index_stream, index_size),
1619 (self._indexfile, index_stream, index_size),
1606 ]
1620 ]
1607
1621
1608 def _loadindex(self, docket=None):
1622 def _loadindex(self, docket=None):
1609 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1623 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1610
1624
1611 if self.postfix is not None:
1625 if self.postfix is not None:
1612 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1626 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1613 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1627 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1614 entry_point = b'%s.i.a' % self.radix
1628 entry_point = b'%s.i.a' % self.radix
1615 elif self._try_split and self.opener.exists(self._split_index_file):
1629 elif self._try_split and self.opener.exists(self._split_index_file):
1616 entry_point = self._split_index_file
1630 entry_point = self._split_index_file
1617 else:
1631 else:
1618 entry_point = b'%s.i' % self.radix
1632 entry_point = b'%s.i' % self.radix
1619
1633
1620 if docket is not None:
1634 if docket is not None:
1621 self._docket = docket
1635 self._docket = docket
1622 self._docket_file = entry_point
1636 self._docket_file = entry_point
1623 else:
1637 else:
1624 self._initempty = True
1638 self._initempty = True
1625 entry_data = self._get_data(entry_point, mmapindexthreshold)
1639 entry_data = self._get_data(entry_point, mmapindexthreshold)
1626 if len(entry_data) > 0:
1640 if len(entry_data) > 0:
1627 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1641 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1628 self._initempty = False
1642 self._initempty = False
1629 else:
1643 else:
1630 header = new_header
1644 header = new_header
1631
1645
1632 self._format_flags = header & ~0xFFFF
1646 self._format_flags = header & ~0xFFFF
1633 self._format_version = header & 0xFFFF
1647 self._format_version = header & 0xFFFF
1634
1648
1635 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1649 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1636 if supported_flags is None:
1650 if supported_flags is None:
1637 msg = _(b'unknown version (%d) in revlog %s')
1651 msg = _(b'unknown version (%d) in revlog %s')
1638 msg %= (self._format_version, self.display_id)
1652 msg %= (self._format_version, self.display_id)
1639 raise error.RevlogError(msg)
1653 raise error.RevlogError(msg)
1640 elif self._format_flags & ~supported_flags:
1654 elif self._format_flags & ~supported_flags:
1641 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1655 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1642 display_flag = self._format_flags >> 16
1656 display_flag = self._format_flags >> 16
1643 msg %= (display_flag, self._format_version, self.display_id)
1657 msg %= (display_flag, self._format_version, self.display_id)
1644 raise error.RevlogError(msg)
1658 raise error.RevlogError(msg)
1645
1659
1646 features = FEATURES_BY_VERSION[self._format_version]
1660 features = FEATURES_BY_VERSION[self._format_version]
1647 self._inline = features[b'inline'](self._format_flags)
1661 self._inline = features[b'inline'](self._format_flags)
1648 self.delta_config.general_delta = features[b'generaldelta'](
1662 self.delta_config.general_delta = features[b'generaldelta'](
1649 self._format_flags
1663 self._format_flags
1650 )
1664 )
1651 self.feature_config.has_side_data = features[b'sidedata']
1665 self.feature_config.has_side_data = features[b'sidedata']
1652
1666
1653 if not features[b'docket']:
1667 if not features[b'docket']:
1654 self._indexfile = entry_point
1668 self._indexfile = entry_point
1655 index_data = entry_data
1669 index_data = entry_data
1656 else:
1670 else:
1657 self._docket_file = entry_point
1671 self._docket_file = entry_point
1658 if self._initempty:
1672 if self._initempty:
1659 self._docket = docketutil.default_docket(self, header)
1673 self._docket = docketutil.default_docket(self, header)
1660 else:
1674 else:
1661 self._docket = docketutil.parse_docket(
1675 self._docket = docketutil.parse_docket(
1662 self, entry_data, use_pending=self._trypending
1676 self, entry_data, use_pending=self._trypending
1663 )
1677 )
1664
1678
1665 if self._docket is not None:
1679 if self._docket is not None:
1666 self._indexfile = self._docket.index_filepath()
1680 self._indexfile = self._docket.index_filepath()
1667 index_data = b''
1681 index_data = b''
1668 index_size = self._docket.index_end
1682 index_size = self._docket.index_end
1669 if index_size > 0:
1683 if index_size > 0:
1670 index_data = self._get_data(
1684 index_data = self._get_data(
1671 self._indexfile, mmapindexthreshold, size=index_size
1685 self._indexfile, mmapindexthreshold, size=index_size
1672 )
1686 )
1673 if len(index_data) < index_size:
1687 if len(index_data) < index_size:
1674 msg = _(b'too few index data for %s: got %d, expected %d')
1688 msg = _(b'too few index data for %s: got %d, expected %d')
1675 msg %= (self.display_id, len(index_data), index_size)
1689 msg %= (self.display_id, len(index_data), index_size)
1676 raise error.RevlogError(msg)
1690 raise error.RevlogError(msg)
1677
1691
1678 self._inline = False
1692 self._inline = False
1679 # generaldelta implied by version 2 revlogs.
1693 # generaldelta implied by version 2 revlogs.
1680 self.delta_config.general_delta = True
1694 self.delta_config.general_delta = True
1681 # the logic for persistent nodemap will be dealt with within the
1695 # the logic for persistent nodemap will be dealt with within the
1682 # main docket, so disable it for now.
1696 # main docket, so disable it for now.
1683 self._nodemap_file = None
1697 self._nodemap_file = None
1684
1698
1685 if self._docket is not None:
1699 if self._docket is not None:
1686 self._datafile = self._docket.data_filepath()
1700 self._datafile = self._docket.data_filepath()
1687 self._sidedatafile = self._docket.sidedata_filepath()
1701 self._sidedatafile = self._docket.sidedata_filepath()
1688 elif self.postfix is None:
1702 elif self.postfix is None:
1689 self._datafile = b'%s.d' % self.radix
1703 self._datafile = b'%s.d' % self.radix
1690 else:
1704 else:
1691 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1705 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1692
1706
1693 self.nodeconstants = sha1nodeconstants
1707 self.nodeconstants = sha1nodeconstants
1694 self.nullid = self.nodeconstants.nullid
1708 self.nullid = self.nodeconstants.nullid
1695
1709
1696 # sparse-revlog can't be on without general-delta (issue6056)
1710 # sparse-revlog can't be on without general-delta (issue6056)
1697 if not self.delta_config.general_delta:
1711 if not self.delta_config.general_delta:
1698 self.delta_config.sparse_revlog = False
1712 self.delta_config.sparse_revlog = False
1699
1713
1700 self._storedeltachains = True
1714 self._storedeltachains = True
1701
1715
1702 devel_nodemap = (
1716 devel_nodemap = (
1703 self._nodemap_file
1717 self._nodemap_file
1704 and force_nodemap
1718 and force_nodemap
1705 and parse_index_v1_nodemap is not None
1719 and parse_index_v1_nodemap is not None
1706 )
1720 )
1707
1721
1708 use_rust_index = False
1722 use_rust_index = False
1709 if rustrevlog is not None and self._nodemap_file is not None:
1723 if rustrevlog is not None and self._nodemap_file is not None:
1710 # we would like to use the rust_index in all case, especially
1724 # we would like to use the rust_index in all case, especially
1711 # because it is necessary for AncestorsIterator and LazyAncestors
1725 # because it is necessary for AncestorsIterator and LazyAncestors
1712 # since the 6.7 cycle.
1726 # since the 6.7 cycle.
1713 #
1727 #
1714 # However, the performance impact of inconditionnaly building the
1728 # However, the performance impact of inconditionnaly building the
1715 # nodemap is currently a problem for non-persistent nodemap
1729 # nodemap is currently a problem for non-persistent nodemap
1716 # repository.
1730 # repository.
1717 use_rust_index = True
1731 use_rust_index = True
1718
1732
1719 self._parse_index = parse_index_v1
1733 self._parse_index = parse_index_v1
1720 if self._format_version == REVLOGV0:
1734 if self._format_version == REVLOGV0:
1721 self._parse_index = revlogv0.parse_index_v0
1735 self._parse_index = revlogv0.parse_index_v0
1722 elif self._format_version == REVLOGV2:
1736 elif self._format_version == REVLOGV2:
1723 self._parse_index = parse_index_v2
1737 self._parse_index = parse_index_v2
1724 elif self._format_version == CHANGELOGV2:
1738 elif self._format_version == CHANGELOGV2:
1725 self._parse_index = parse_index_cl_v2
1739 self._parse_index = parse_index_cl_v2
1726 elif devel_nodemap:
1740 elif devel_nodemap:
1727 self._parse_index = parse_index_v1_nodemap
1741 self._parse_index = parse_index_v1_nodemap
1728 elif use_rust_index:
1742 elif use_rust_index:
1729 self._parse_index = functools.partial(
1743 self._parse_index = functools.partial(
1730 parse_index_v1_rust, default_header=new_header
1744 parse_index_v1_rust, default_header=new_header
1731 )
1745 )
1732 try:
1746 try:
1733 d = self._parse_index(index_data, self._inline)
1747 d = self._parse_index(index_data, self._inline)
1734 index, chunkcache = d
1748 index, chunkcache = d
1735 use_nodemap = (
1749 use_nodemap = (
1736 not self._inline
1750 not self._inline
1737 and self._nodemap_file is not None
1751 and self._nodemap_file is not None
1738 and hasattr(index, 'update_nodemap_data')
1752 and hasattr(index, 'update_nodemap_data')
1739 )
1753 )
1740 if use_nodemap:
1754 if use_nodemap:
1741 nodemap_data = nodemaputil.persisted_data(self)
1755 nodemap_data = nodemaputil.persisted_data(self)
1742 if nodemap_data is not None:
1756 if nodemap_data is not None:
1743 docket = nodemap_data[0]
1757 docket = nodemap_data[0]
1744 if (
1758 if (
1745 len(d[0]) > docket.tip_rev
1759 len(d[0]) > docket.tip_rev
1746 and d[0][docket.tip_rev][7] == docket.tip_node
1760 and d[0][docket.tip_rev][7] == docket.tip_node
1747 ):
1761 ):
1748 # no changelog tampering
1762 # no changelog tampering
1749 self._nodemap_docket = docket
1763 self._nodemap_docket = docket
1750 index.update_nodemap_data(*nodemap_data)
1764 index.update_nodemap_data(*nodemap_data)
1751 except (ValueError, IndexError):
1765 except (ValueError, IndexError):
1752 raise error.RevlogError(
1766 raise error.RevlogError(
1753 _(b"index %s is corrupted") % self.display_id
1767 _(b"index %s is corrupted") % self.display_id
1754 )
1768 )
1755 self.index = index
1769 self.index = index
1756 # revnum -> (chain-length, sum-delta-length)
1770 # revnum -> (chain-length, sum-delta-length)
1757 self._chaininfocache = util.lrucachedict(500)
1771 self._chaininfocache = util.lrucachedict(500)
1758
1772
1759 return chunkcache
1773 return chunkcache
1760
1774
1761 def _load_inner(self, chunk_cache):
1775 def _load_inner(self, chunk_cache):
1762 if self._docket is None:
1776 if self._docket is None:
1763 default_compression_header = None
1777 default_compression_header = None
1764 else:
1778 else:
1765 default_compression_header = self._docket.default_compression_header
1779 default_compression_header = self._docket.default_compression_header
1766
1780
1767 self._inner = _InnerRevlog(
1781 self._inner = _InnerRevlog(
1768 opener=self.opener,
1782 opener=self.opener,
1769 index=self.index,
1783 index=self.index,
1770 index_file=self._indexfile,
1784 index_file=self._indexfile,
1771 data_file=self._datafile,
1785 data_file=self._datafile,
1772 sidedata_file=self._sidedatafile,
1786 sidedata_file=self._sidedatafile,
1773 inline=self._inline,
1787 inline=self._inline,
1774 data_config=self.data_config,
1788 data_config=self.data_config,
1775 delta_config=self.delta_config,
1789 delta_config=self.delta_config,
1776 feature_config=self.feature_config,
1790 feature_config=self.feature_config,
1777 chunk_cache=chunk_cache,
1791 chunk_cache=chunk_cache,
1778 default_compression_header=default_compression_header,
1792 default_compression_header=default_compression_header,
1779 )
1793 )
1780
1794
1781 def get_revlog(self):
1795 def get_revlog(self):
1782 """simple function to mirror API of other not-really-revlog API"""
1796 """simple function to mirror API of other not-really-revlog API"""
1783 return self
1797 return self
1784
1798
1785 @util.propertycache
1799 @util.propertycache
1786 def revlog_kind(self):
1800 def revlog_kind(self):
1787 return self.target[0]
1801 return self.target[0]
1788
1802
1789 @util.propertycache
1803 @util.propertycache
1790 def display_id(self):
1804 def display_id(self):
1791 """The public facing "ID" of the revlog that we use in message"""
1805 """The public facing "ID" of the revlog that we use in message"""
1792 if self.revlog_kind == KIND_FILELOG:
1806 if self.revlog_kind == KIND_FILELOG:
1793 # Reference the file without the "data/" prefix, so it is familiar
1807 # Reference the file without the "data/" prefix, so it is familiar
1794 # to the user.
1808 # to the user.
1795 return self.target[1]
1809 return self.target[1]
1796 else:
1810 else:
1797 return self.radix
1811 return self.radix
1798
1812
1799 def _datafp(self, mode=b'r'):
1813 def _datafp(self, mode=b'r'):
1800 """file object for the revlog's data file"""
1814 """file object for the revlog's data file"""
1801 return self.opener(self._datafile, mode=mode)
1815 return self.opener(self._datafile, mode=mode)
1802
1816
1803 def tiprev(self):
1817 def tiprev(self):
1804 return len(self.index) - 1
1818 return len(self.index) - 1
1805
1819
1806 def tip(self):
1820 def tip(self):
1807 return self.node(self.tiprev())
1821 return self.node(self.tiprev())
1808
1822
1809 def __contains__(self, rev):
1823 def __contains__(self, rev):
1810 return 0 <= rev < len(self)
1824 return 0 <= rev < len(self)
1811
1825
1812 def __len__(self):
1826 def __len__(self):
1813 return len(self.index)
1827 return len(self.index)
1814
1828
1815 def __iter__(self):
1829 def __iter__(self):
1816 return iter(range(len(self)))
1830 return iter(range(len(self)))
1817
1831
1818 def revs(self, start=0, stop=None):
1832 def revs(self, start=0, stop=None):
1819 """iterate over all rev in this revlog (from start to stop)"""
1833 """iterate over all rev in this revlog (from start to stop)"""
1820 return storageutil.iterrevs(len(self), start=start, stop=stop)
1834 return storageutil.iterrevs(len(self), start=start, stop=stop)
1821
1835
1822 def hasnode(self, node):
1836 def hasnode(self, node):
1823 try:
1837 try:
1824 self.rev(node)
1838 self.rev(node)
1825 return True
1839 return True
1826 except KeyError:
1840 except KeyError:
1827 return False
1841 return False
1828
1842
1829 def _candelta(self, baserev, rev):
1843 def _candelta(self, baserev, rev):
1830 """whether two revisions (baserev, rev) can be delta-ed or not"""
1844 """whether two revisions (baserev, rev) can be delta-ed or not"""
1831 # Disable delta if either rev requires a content-changing flag
1845 # Disable delta if either rev requires a content-changing flag
1832 # processor (ex. LFS). This is because such flag processor can alter
1846 # processor (ex. LFS). This is because such flag processor can alter
1833 # the rawtext content that the delta will be based on, and two clients
1847 # the rawtext content that the delta will be based on, and two clients
1834 # could have a same revlog node with different flags (i.e. different
1848 # could have a same revlog node with different flags (i.e. different
1835 # rawtext contents) and the delta could be incompatible.
1849 # rawtext contents) and the delta could be incompatible.
1836 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1850 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1837 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1851 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1838 ):
1852 ):
1839 return False
1853 return False
1840 return True
1854 return True
1841
1855
1842 def update_caches(self, transaction):
1856 def update_caches(self, transaction):
1843 """update on disk cache
1857 """update on disk cache
1844
1858
1845 If a transaction is passed, the update may be delayed to transaction
1859 If a transaction is passed, the update may be delayed to transaction
1846 commit."""
1860 commit."""
1847 if self._nodemap_file is not None:
1861 if self._nodemap_file is not None:
1848 if transaction is None:
1862 if transaction is None:
1849 nodemaputil.update_persistent_nodemap(self)
1863 nodemaputil.update_persistent_nodemap(self)
1850 else:
1864 else:
1851 nodemaputil.setup_persistent_nodemap(transaction, self)
1865 nodemaputil.setup_persistent_nodemap(transaction, self)
1852
1866
1853 def clearcaches(self):
1867 def clearcaches(self):
1854 """Clear in-memory caches"""
1868 """Clear in-memory caches"""
1855 self._chainbasecache.clear()
1869 self._chainbasecache.clear()
1856 self._inner.clear_cache()
1870 self._inner.clear_cache()
1857 self._pcache = {}
1871 self._pcache = {}
1858 self._nodemap_docket = None
1872 self._nodemap_docket = None
1859 self.index.clearcaches()
1873 self.index.clearcaches()
1860 # The python code is the one responsible for validating the docket, we
1874 # The python code is the one responsible for validating the docket, we
1861 # end up having to refresh it here.
1875 # end up having to refresh it here.
1862 use_nodemap = (
1876 use_nodemap = (
1863 not self._inline
1877 not self._inline
1864 and self._nodemap_file is not None
1878 and self._nodemap_file is not None
1865 and hasattr(self.index, 'update_nodemap_data')
1879 and hasattr(self.index, 'update_nodemap_data')
1866 )
1880 )
1867 if use_nodemap:
1881 if use_nodemap:
1868 nodemap_data = nodemaputil.persisted_data(self)
1882 nodemap_data = nodemaputil.persisted_data(self)
1869 if nodemap_data is not None:
1883 if nodemap_data is not None:
1870 self._nodemap_docket = nodemap_data[0]
1884 self._nodemap_docket = nodemap_data[0]
1871 self.index.update_nodemap_data(*nodemap_data)
1885 self.index.update_nodemap_data(*nodemap_data)
1872
1886
1873 def rev(self, node):
1887 def rev(self, node):
1874 """return the revision number associated with a <nodeid>"""
1888 """return the revision number associated with a <nodeid>"""
1875 try:
1889 try:
1876 return self.index.rev(node)
1890 return self.index.rev(node)
1877 except TypeError:
1891 except TypeError:
1878 raise
1892 raise
1879 except error.RevlogError:
1893 except error.RevlogError:
1880 # parsers.c radix tree lookup failed
1894 # parsers.c radix tree lookup failed
1881 if (
1895 if (
1882 node == self.nodeconstants.wdirid
1896 node == self.nodeconstants.wdirid
1883 or node in self.nodeconstants.wdirfilenodeids
1897 or node in self.nodeconstants.wdirfilenodeids
1884 ):
1898 ):
1885 raise error.WdirUnsupported
1899 raise error.WdirUnsupported
1886 raise error.LookupError(node, self.display_id, _(b'no node'))
1900 raise error.LookupError(node, self.display_id, _(b'no node'))
1887
1901
1888 # Accessors for index entries.
1902 # Accessors for index entries.
1889
1903
1890 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1904 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1891 # are flags.
1905 # are flags.
1892 def start(self, rev):
1906 def start(self, rev):
1893 return int(self.index[rev][0] >> 16)
1907 return int(self.index[rev][0] >> 16)
1894
1908
1895 def sidedata_cut_off(self, rev):
1909 def sidedata_cut_off(self, rev):
1896 sd_cut_off = self.index[rev][8]
1910 sd_cut_off = self.index[rev][8]
1897 if sd_cut_off != 0:
1911 if sd_cut_off != 0:
1898 return sd_cut_off
1912 return sd_cut_off
1899 # This is some annoying dance, because entries without sidedata
1913 # This is some annoying dance, because entries without sidedata
1900 # currently use 0 as their ofsset. (instead of previous-offset +
1914 # currently use 0 as their ofsset. (instead of previous-offset +
1901 # previous-size)
1915 # previous-size)
1902 #
1916 #
1903 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1917 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1904 # In the meantime, we need this.
1918 # In the meantime, we need this.
1905 while 0 <= rev:
1919 while 0 <= rev:
1906 e = self.index[rev]
1920 e = self.index[rev]
1907 if e[9] != 0:
1921 if e[9] != 0:
1908 return e[8] + e[9]
1922 return e[8] + e[9]
1909 rev -= 1
1923 rev -= 1
1910 return 0
1924 return 0
1911
1925
1912 def flags(self, rev):
1926 def flags(self, rev):
1913 return self.index[rev][0] & 0xFFFF
1927 return self.index[rev][0] & 0xFFFF
1914
1928
1915 def length(self, rev):
1929 def length(self, rev):
1916 return self.index[rev][1]
1930 return self.index[rev][1]
1917
1931
1918 def sidedata_length(self, rev):
1932 def sidedata_length(self, rev):
1919 if not self.feature_config.has_side_data:
1933 if not self.feature_config.has_side_data:
1920 return 0
1934 return 0
1921 return self.index[rev][9]
1935 return self.index[rev][9]
1922
1936
1923 def rawsize(self, rev):
1937 def rawsize(self, rev):
1924 """return the length of the uncompressed text for a given revision"""
1938 """return the length of the uncompressed text for a given revision"""
1925 l = self.index[rev][2]
1939 l = self.index[rev][2]
1926 if l >= 0:
1940 if l >= 0:
1927 return l
1941 return l
1928
1942
1929 t = self.rawdata(rev)
1943 t = self.rawdata(rev)
1930 return len(t)
1944 return len(t)
1931
1945
1932 def size(self, rev):
1946 def size(self, rev):
1933 """length of non-raw text (processed by a "read" flag processor)"""
1947 """length of non-raw text (processed by a "read" flag processor)"""
1934 # fast path: if no "read" flag processor could change the content,
1948 # fast path: if no "read" flag processor could change the content,
1935 # size is rawsize. note: ELLIPSIS is known to not change the content.
1949 # size is rawsize. note: ELLIPSIS is known to not change the content.
1936 flags = self.flags(rev)
1950 flags = self.flags(rev)
1937 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1951 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1938 return self.rawsize(rev)
1952 return self.rawsize(rev)
1939
1953
1940 return len(self.revision(rev))
1954 return len(self.revision(rev))
1941
1955
1942 def fast_rank(self, rev):
1956 def fast_rank(self, rev):
1943 """Return the rank of a revision if already known, or None otherwise.
1957 """Return the rank of a revision if already known, or None otherwise.
1944
1958
1945 The rank of a revision is the size of the sub-graph it defines as a
1959 The rank of a revision is the size of the sub-graph it defines as a
1946 head. Equivalently, the rank of a revision `r` is the size of the set
1960 head. Equivalently, the rank of a revision `r` is the size of the set
1947 `ancestors(r)`, `r` included.
1961 `ancestors(r)`, `r` included.
1948
1962
1949 This method returns the rank retrieved from the revlog in constant
1963 This method returns the rank retrieved from the revlog in constant
1950 time. It makes no attempt at computing unknown values for versions of
1964 time. It makes no attempt at computing unknown values for versions of
1951 the revlog which do not persist the rank.
1965 the revlog which do not persist the rank.
1952 """
1966 """
1953 rank = self.index[rev][ENTRY_RANK]
1967 rank = self.index[rev][ENTRY_RANK]
1954 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1968 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1955 return None
1969 return None
1956 if rev == nullrev:
1970 if rev == nullrev:
1957 return 0 # convention
1971 return 0 # convention
1958 return rank
1972 return rank
1959
1973
1960 def chainbase(self, rev):
1974 def chainbase(self, rev):
1961 base = self._chainbasecache.get(rev)
1975 base = self._chainbasecache.get(rev)
1962 if base is not None:
1976 if base is not None:
1963 return base
1977 return base
1964
1978
1965 index = self.index
1979 index = self.index
1966 iterrev = rev
1980 iterrev = rev
1967 base = index[iterrev][3]
1981 base = index[iterrev][3]
1968 while base != iterrev:
1982 while base != iterrev:
1969 iterrev = base
1983 iterrev = base
1970 base = index[iterrev][3]
1984 base = index[iterrev][3]
1971
1985
1972 self._chainbasecache[rev] = base
1986 self._chainbasecache[rev] = base
1973 return base
1987 return base
1974
1988
1975 def linkrev(self, rev):
1989 def linkrev(self, rev):
1976 return self.index[rev][4]
1990 return self.index[rev][4]
1977
1991
1978 def parentrevs(self, rev):
1992 def parentrevs(self, rev):
1979 try:
1993 try:
1980 entry = self.index[rev]
1994 entry = self.index[rev]
1981 except IndexError:
1995 except IndexError:
1982 if rev == wdirrev:
1996 if rev == wdirrev:
1983 raise error.WdirUnsupported
1997 raise error.WdirUnsupported
1984 raise
1998 raise
1985
1999
1986 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
2000 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1987 return entry[6], entry[5]
2001 return entry[6], entry[5]
1988 else:
2002 else:
1989 return entry[5], entry[6]
2003 return entry[5], entry[6]
1990
2004
1991 # fast parentrevs(rev) where rev isn't filtered
2005 # fast parentrevs(rev) where rev isn't filtered
1992 _uncheckedparentrevs = parentrevs
2006 _uncheckedparentrevs = parentrevs
1993
2007
1994 def node(self, rev):
2008 def node(self, rev):
1995 try:
2009 try:
1996 return self.index[rev][7]
2010 return self.index[rev][7]
1997 except IndexError:
2011 except IndexError:
1998 if rev == wdirrev:
2012 if rev == wdirrev:
1999 raise error.WdirUnsupported
2013 raise error.WdirUnsupported
2000 raise
2014 raise
2001
2015
2002 # Derived from index values.
2016 # Derived from index values.
2003
2017
2004 def end(self, rev):
2018 def end(self, rev):
2005 return self.start(rev) + self.length(rev)
2019 return self.start(rev) + self.length(rev)
2006
2020
2007 def parents(self, node):
2021 def parents(self, node):
2008 i = self.index
2022 i = self.index
2009 d = i[self.rev(node)]
2023 d = i[self.rev(node)]
2010 # inline node() to avoid function call overhead
2024 # inline node() to avoid function call overhead
2011 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2025 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2012 return i[d[6]][7], i[d[5]][7]
2026 return i[d[6]][7], i[d[5]][7]
2013 else:
2027 else:
2014 return i[d[5]][7], i[d[6]][7]
2028 return i[d[5]][7], i[d[6]][7]
2015
2029
2016 def chainlen(self, rev):
2030 def chainlen(self, rev):
2017 return self._chaininfo(rev)[0]
2031 return self._chaininfo(rev)[0]
2018
2032
2019 def _chaininfo(self, rev):
2033 def _chaininfo(self, rev):
2020 chaininfocache = self._chaininfocache
2034 chaininfocache = self._chaininfocache
2021 if rev in chaininfocache:
2035 if rev in chaininfocache:
2022 return chaininfocache[rev]
2036 return chaininfocache[rev]
2023 index = self.index
2037 index = self.index
2024 generaldelta = self.delta_config.general_delta
2038 generaldelta = self.delta_config.general_delta
2025 iterrev = rev
2039 iterrev = rev
2026 e = index[iterrev]
2040 e = index[iterrev]
2027 clen = 0
2041 clen = 0
2028 compresseddeltalen = 0
2042 compresseddeltalen = 0
2029 while iterrev != e[3]:
2043 while iterrev != e[3]:
2030 clen += 1
2044 clen += 1
2031 compresseddeltalen += e[1]
2045 compresseddeltalen += e[1]
2032 if generaldelta:
2046 if generaldelta:
2033 iterrev = e[3]
2047 iterrev = e[3]
2034 else:
2048 else:
2035 iterrev -= 1
2049 iterrev -= 1
2036 if iterrev in chaininfocache:
2050 if iterrev in chaininfocache:
2037 t = chaininfocache[iterrev]
2051 t = chaininfocache[iterrev]
2038 clen += t[0]
2052 clen += t[0]
2039 compresseddeltalen += t[1]
2053 compresseddeltalen += t[1]
2040 break
2054 break
2041 e = index[iterrev]
2055 e = index[iterrev]
2042 else:
2056 else:
2043 # Add text length of base since decompressing that also takes
2057 # Add text length of base since decompressing that also takes
2044 # work. For cache hits the length is already included.
2058 # work. For cache hits the length is already included.
2045 compresseddeltalen += e[1]
2059 compresseddeltalen += e[1]
2046 r = (clen, compresseddeltalen)
2060 r = (clen, compresseddeltalen)
2047 chaininfocache[rev] = r
2061 chaininfocache[rev] = r
2048 return r
2062 return r
2049
2063
2050 def _deltachain(self, rev, stoprev=None):
2064 def _deltachain(self, rev, stoprev=None):
2051 return self._inner._deltachain(rev, stoprev=stoprev)
2065 return self._inner._deltachain(rev, stoprev=stoprev)
2052
2066
2053 def ancestors(self, revs, stoprev=0, inclusive=False):
2067 def ancestors(self, revs, stoprev=0, inclusive=False):
2054 """Generate the ancestors of 'revs' in reverse revision order.
2068 """Generate the ancestors of 'revs' in reverse revision order.
2055 Does not generate revs lower than stoprev.
2069 Does not generate revs lower than stoprev.
2056
2070
2057 See the documentation for ancestor.lazyancestors for more details."""
2071 See the documentation for ancestor.lazyancestors for more details."""
2058
2072
2059 # first, make sure start revisions aren't filtered
2073 # first, make sure start revisions aren't filtered
2060 revs = list(revs)
2074 revs = list(revs)
2061 checkrev = self.node
2075 checkrev = self.node
2062 for r in revs:
2076 for r in revs:
2063 checkrev(r)
2077 checkrev(r)
2064 # and we're sure ancestors aren't filtered as well
2078 # and we're sure ancestors aren't filtered as well
2065
2079
2066 if rustancestor is not None and self.index.rust_ext_compat:
2080 if rustancestor is not None and self.index.rust_ext_compat:
2067 lazyancestors = rustancestor.LazyAncestors
2081 lazyancestors = rustancestor.LazyAncestors
2068 arg = self.index
2082 arg = self.index
2069 else:
2083 else:
2070 lazyancestors = ancestor.lazyancestors
2084 lazyancestors = ancestor.lazyancestors
2071 arg = self._uncheckedparentrevs
2085 arg = self._uncheckedparentrevs
2072 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2086 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2073
2087
2074 def descendants(self, revs):
2088 def descendants(self, revs):
2075 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2089 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2076
2090
2077 def findcommonmissing(self, common=None, heads=None):
2091 def findcommonmissing(self, common=None, heads=None):
2078 """Return a tuple of the ancestors of common and the ancestors of heads
2092 """Return a tuple of the ancestors of common and the ancestors of heads
2079 that are not ancestors of common. In revset terminology, we return the
2093 that are not ancestors of common. In revset terminology, we return the
2080 tuple:
2094 tuple:
2081
2095
2082 ::common, (::heads) - (::common)
2096 ::common, (::heads) - (::common)
2083
2097
2084 The list is sorted by revision number, meaning it is
2098 The list is sorted by revision number, meaning it is
2085 topologically sorted.
2099 topologically sorted.
2086
2100
2087 'heads' and 'common' are both lists of node IDs. If heads is
2101 'heads' and 'common' are both lists of node IDs. If heads is
2088 not supplied, uses all of the revlog's heads. If common is not
2102 not supplied, uses all of the revlog's heads. If common is not
2089 supplied, uses nullid."""
2103 supplied, uses nullid."""
2090 if common is None:
2104 if common is None:
2091 common = [self.nullid]
2105 common = [self.nullid]
2092 if heads is None:
2106 if heads is None:
2093 heads = self.heads()
2107 heads = self.heads()
2094
2108
2095 common = [self.rev(n) for n in common]
2109 common = [self.rev(n) for n in common]
2096 heads = [self.rev(n) for n in heads]
2110 heads = [self.rev(n) for n in heads]
2097
2111
2098 # we want the ancestors, but inclusive
2112 # we want the ancestors, but inclusive
2099 class lazyset:
2113 class lazyset:
2100 def __init__(self, lazyvalues):
2114 def __init__(self, lazyvalues):
2101 self.addedvalues = set()
2115 self.addedvalues = set()
2102 self.lazyvalues = lazyvalues
2116 self.lazyvalues = lazyvalues
2103
2117
2104 def __contains__(self, value):
2118 def __contains__(self, value):
2105 return value in self.addedvalues or value in self.lazyvalues
2119 return value in self.addedvalues or value in self.lazyvalues
2106
2120
2107 def __iter__(self):
2121 def __iter__(self):
2108 added = self.addedvalues
2122 added = self.addedvalues
2109 for r in added:
2123 for r in added:
2110 yield r
2124 yield r
2111 for r in self.lazyvalues:
2125 for r in self.lazyvalues:
2112 if not r in added:
2126 if not r in added:
2113 yield r
2127 yield r
2114
2128
2115 def add(self, value):
2129 def add(self, value):
2116 self.addedvalues.add(value)
2130 self.addedvalues.add(value)
2117
2131
2118 def update(self, values):
2132 def update(self, values):
2119 self.addedvalues.update(values)
2133 self.addedvalues.update(values)
2120
2134
2121 has = lazyset(self.ancestors(common))
2135 has = lazyset(self.ancestors(common))
2122 has.add(nullrev)
2136 has.add(nullrev)
2123 has.update(common)
2137 has.update(common)
2124
2138
2125 # take all ancestors from heads that aren't in has
2139 # take all ancestors from heads that aren't in has
2126 missing = set()
2140 missing = set()
2127 visit = collections.deque(r for r in heads if r not in has)
2141 visit = collections.deque(r for r in heads if r not in has)
2128 while visit:
2142 while visit:
2129 r = visit.popleft()
2143 r = visit.popleft()
2130 if r in missing:
2144 if r in missing:
2131 continue
2145 continue
2132 else:
2146 else:
2133 missing.add(r)
2147 missing.add(r)
2134 for p in self.parentrevs(r):
2148 for p in self.parentrevs(r):
2135 if p not in has:
2149 if p not in has:
2136 visit.append(p)
2150 visit.append(p)
2137 missing = list(missing)
2151 missing = list(missing)
2138 missing.sort()
2152 missing.sort()
2139 return has, [self.node(miss) for miss in missing]
2153 return has, [self.node(miss) for miss in missing]
2140
2154
2141 def incrementalmissingrevs(self, common=None):
2155 def incrementalmissingrevs(self, common=None):
2142 """Return an object that can be used to incrementally compute the
2156 """Return an object that can be used to incrementally compute the
2143 revision numbers of the ancestors of arbitrary sets that are not
2157 revision numbers of the ancestors of arbitrary sets that are not
2144 ancestors of common. This is an ancestor.incrementalmissingancestors
2158 ancestors of common. This is an ancestor.incrementalmissingancestors
2145 object.
2159 object.
2146
2160
2147 'common' is a list of revision numbers. If common is not supplied, uses
2161 'common' is a list of revision numbers. If common is not supplied, uses
2148 nullrev.
2162 nullrev.
2149 """
2163 """
2150 if common is None:
2164 if common is None:
2151 common = [nullrev]
2165 common = [nullrev]
2152
2166
2153 if rustancestor is not None and self.index.rust_ext_compat:
2167 if rustancestor is not None and self.index.rust_ext_compat:
2154 return rustancestor.MissingAncestors(self.index, common)
2168 return rustancestor.MissingAncestors(self.index, common)
2155 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2169 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2156
2170
2157 def findmissingrevs(self, common=None, heads=None):
2171 def findmissingrevs(self, common=None, heads=None):
2158 """Return the revision numbers of the ancestors of heads that
2172 """Return the revision numbers of the ancestors of heads that
2159 are not ancestors of common.
2173 are not ancestors of common.
2160
2174
2161 More specifically, return a list of revision numbers corresponding to
2175 More specifically, return a list of revision numbers corresponding to
2162 nodes N such that every N satisfies the following constraints:
2176 nodes N such that every N satisfies the following constraints:
2163
2177
2164 1. N is an ancestor of some node in 'heads'
2178 1. N is an ancestor of some node in 'heads'
2165 2. N is not an ancestor of any node in 'common'
2179 2. N is not an ancestor of any node in 'common'
2166
2180
2167 The list is sorted by revision number, meaning it is
2181 The list is sorted by revision number, meaning it is
2168 topologically sorted.
2182 topologically sorted.
2169
2183
2170 'heads' and 'common' are both lists of revision numbers. If heads is
2184 'heads' and 'common' are both lists of revision numbers. If heads is
2171 not supplied, uses all of the revlog's heads. If common is not
2185 not supplied, uses all of the revlog's heads. If common is not
2172 supplied, uses nullid."""
2186 supplied, uses nullid."""
2173 if common is None:
2187 if common is None:
2174 common = [nullrev]
2188 common = [nullrev]
2175 if heads is None:
2189 if heads is None:
2176 heads = self.headrevs()
2190 heads = self.headrevs()
2177
2191
2178 inc = self.incrementalmissingrevs(common=common)
2192 inc = self.incrementalmissingrevs(common=common)
2179 return inc.missingancestors(heads)
2193 return inc.missingancestors(heads)
2180
2194
2181 def findmissing(self, common=None, heads=None):
2195 def findmissing(self, common=None, heads=None):
2182 """Return the ancestors of heads that are not ancestors of common.
2196 """Return the ancestors of heads that are not ancestors of common.
2183
2197
2184 More specifically, return a list of nodes N such that every N
2198 More specifically, return a list of nodes N such that every N
2185 satisfies the following constraints:
2199 satisfies the following constraints:
2186
2200
2187 1. N is an ancestor of some node in 'heads'
2201 1. N is an ancestor of some node in 'heads'
2188 2. N is not an ancestor of any node in 'common'
2202 2. N is not an ancestor of any node in 'common'
2189
2203
2190 The list is sorted by revision number, meaning it is
2204 The list is sorted by revision number, meaning it is
2191 topologically sorted.
2205 topologically sorted.
2192
2206
2193 'heads' and 'common' are both lists of node IDs. If heads is
2207 'heads' and 'common' are both lists of node IDs. If heads is
2194 not supplied, uses all of the revlog's heads. If common is not
2208 not supplied, uses all of the revlog's heads. If common is not
2195 supplied, uses nullid."""
2209 supplied, uses nullid."""
2196 if common is None:
2210 if common is None:
2197 common = [self.nullid]
2211 common = [self.nullid]
2198 if heads is None:
2212 if heads is None:
2199 heads = self.heads()
2213 heads = self.heads()
2200
2214
2201 common = [self.rev(n) for n in common]
2215 common = [self.rev(n) for n in common]
2202 heads = [self.rev(n) for n in heads]
2216 heads = [self.rev(n) for n in heads]
2203
2217
2204 inc = self.incrementalmissingrevs(common=common)
2218 inc = self.incrementalmissingrevs(common=common)
2205 return [self.node(r) for r in inc.missingancestors(heads)]
2219 return [self.node(r) for r in inc.missingancestors(heads)]
2206
2220
2207 def nodesbetween(self, roots=None, heads=None):
2221 def nodesbetween(self, roots=None, heads=None):
2208 """Return a topological path from 'roots' to 'heads'.
2222 """Return a topological path from 'roots' to 'heads'.
2209
2223
2210 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2224 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2211 topologically sorted list of all nodes N that satisfy both of
2225 topologically sorted list of all nodes N that satisfy both of
2212 these constraints:
2226 these constraints:
2213
2227
2214 1. N is a descendant of some node in 'roots'
2228 1. N is a descendant of some node in 'roots'
2215 2. N is an ancestor of some node in 'heads'
2229 2. N is an ancestor of some node in 'heads'
2216
2230
2217 Every node is considered to be both a descendant and an ancestor
2231 Every node is considered to be both a descendant and an ancestor
2218 of itself, so every reachable node in 'roots' and 'heads' will be
2232 of itself, so every reachable node in 'roots' and 'heads' will be
2219 included in 'nodes'.
2233 included in 'nodes'.
2220
2234
2221 'outroots' is the list of reachable nodes in 'roots', i.e., the
2235 'outroots' is the list of reachable nodes in 'roots', i.e., the
2222 subset of 'roots' that is returned in 'nodes'. Likewise,
2236 subset of 'roots' that is returned in 'nodes'. Likewise,
2223 'outheads' is the subset of 'heads' that is also in 'nodes'.
2237 'outheads' is the subset of 'heads' that is also in 'nodes'.
2224
2238
2225 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2239 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2226 unspecified, uses nullid as the only root. If 'heads' is
2240 unspecified, uses nullid as the only root. If 'heads' is
2227 unspecified, uses list of all of the revlog's heads."""
2241 unspecified, uses list of all of the revlog's heads."""
2228 nonodes = ([], [], [])
2242 nonodes = ([], [], [])
2229 if roots is not None:
2243 if roots is not None:
2230 roots = list(roots)
2244 roots = list(roots)
2231 if not roots:
2245 if not roots:
2232 return nonodes
2246 return nonodes
2233 lowestrev = min([self.rev(n) for n in roots])
2247 lowestrev = min([self.rev(n) for n in roots])
2234 else:
2248 else:
2235 roots = [self.nullid] # Everybody's a descendant of nullid
2249 roots = [self.nullid] # Everybody's a descendant of nullid
2236 lowestrev = nullrev
2250 lowestrev = nullrev
2237 if (lowestrev == nullrev) and (heads is None):
2251 if (lowestrev == nullrev) and (heads is None):
2238 # We want _all_ the nodes!
2252 # We want _all_ the nodes!
2239 return (
2253 return (
2240 [self.node(r) for r in self],
2254 [self.node(r) for r in self],
2241 [self.nullid],
2255 [self.nullid],
2242 list(self.heads()),
2256 list(self.heads()),
2243 )
2257 )
2244 if heads is None:
2258 if heads is None:
2245 # All nodes are ancestors, so the latest ancestor is the last
2259 # All nodes are ancestors, so the latest ancestor is the last
2246 # node.
2260 # node.
2247 highestrev = len(self) - 1
2261 highestrev = len(self) - 1
2248 # Set ancestors to None to signal that every node is an ancestor.
2262 # Set ancestors to None to signal that every node is an ancestor.
2249 ancestors = None
2263 ancestors = None
2250 # Set heads to an empty dictionary for later discovery of heads
2264 # Set heads to an empty dictionary for later discovery of heads
2251 heads = {}
2265 heads = {}
2252 else:
2266 else:
2253 heads = list(heads)
2267 heads = list(heads)
2254 if not heads:
2268 if not heads:
2255 return nonodes
2269 return nonodes
2256 ancestors = set()
2270 ancestors = set()
2257 # Turn heads into a dictionary so we can remove 'fake' heads.
2271 # Turn heads into a dictionary so we can remove 'fake' heads.
2258 # Also, later we will be using it to filter out the heads we can't
2272 # Also, later we will be using it to filter out the heads we can't
2259 # find from roots.
2273 # find from roots.
2260 heads = dict.fromkeys(heads, False)
2274 heads = dict.fromkeys(heads, False)
2261 # Start at the top and keep marking parents until we're done.
2275 # Start at the top and keep marking parents until we're done.
2262 nodestotag = set(heads)
2276 nodestotag = set(heads)
2263 # Remember where the top was so we can use it as a limit later.
2277 # Remember where the top was so we can use it as a limit later.
2264 highestrev = max([self.rev(n) for n in nodestotag])
2278 highestrev = max([self.rev(n) for n in nodestotag])
2265 while nodestotag:
2279 while nodestotag:
2266 # grab a node to tag
2280 # grab a node to tag
2267 n = nodestotag.pop()
2281 n = nodestotag.pop()
2268 # Never tag nullid
2282 # Never tag nullid
2269 if n == self.nullid:
2283 if n == self.nullid:
2270 continue
2284 continue
2271 # A node's revision number represents its place in a
2285 # A node's revision number represents its place in a
2272 # topologically sorted list of nodes.
2286 # topologically sorted list of nodes.
2273 r = self.rev(n)
2287 r = self.rev(n)
2274 if r >= lowestrev:
2288 if r >= lowestrev:
2275 if n not in ancestors:
2289 if n not in ancestors:
2276 # If we are possibly a descendant of one of the roots
2290 # If we are possibly a descendant of one of the roots
2277 # and we haven't already been marked as an ancestor
2291 # and we haven't already been marked as an ancestor
2278 ancestors.add(n) # Mark as ancestor
2292 ancestors.add(n) # Mark as ancestor
2279 # Add non-nullid parents to list of nodes to tag.
2293 # Add non-nullid parents to list of nodes to tag.
2280 nodestotag.update(
2294 nodestotag.update(
2281 [p for p in self.parents(n) if p != self.nullid]
2295 [p for p in self.parents(n) if p != self.nullid]
2282 )
2296 )
2283 elif n in heads: # We've seen it before, is it a fake head?
2297 elif n in heads: # We've seen it before, is it a fake head?
2284 # So it is, real heads should not be the ancestors of
2298 # So it is, real heads should not be the ancestors of
2285 # any other heads.
2299 # any other heads.
2286 heads.pop(n)
2300 heads.pop(n)
2287 if not ancestors:
2301 if not ancestors:
2288 return nonodes
2302 return nonodes
2289 # Now that we have our set of ancestors, we want to remove any
2303 # Now that we have our set of ancestors, we want to remove any
2290 # roots that are not ancestors.
2304 # roots that are not ancestors.
2291
2305
2292 # If one of the roots was nullid, everything is included anyway.
2306 # If one of the roots was nullid, everything is included anyway.
2293 if lowestrev > nullrev:
2307 if lowestrev > nullrev:
2294 # But, since we weren't, let's recompute the lowest rev to not
2308 # But, since we weren't, let's recompute the lowest rev to not
2295 # include roots that aren't ancestors.
2309 # include roots that aren't ancestors.
2296
2310
2297 # Filter out roots that aren't ancestors of heads
2311 # Filter out roots that aren't ancestors of heads
2298 roots = [root for root in roots if root in ancestors]
2312 roots = [root for root in roots if root in ancestors]
2299 # Recompute the lowest revision
2313 # Recompute the lowest revision
2300 if roots:
2314 if roots:
2301 lowestrev = min([self.rev(root) for root in roots])
2315 lowestrev = min([self.rev(root) for root in roots])
2302 else:
2316 else:
2303 # No more roots? Return empty list
2317 # No more roots? Return empty list
2304 return nonodes
2318 return nonodes
2305 else:
2319 else:
2306 # We are descending from nullid, and don't need to care about
2320 # We are descending from nullid, and don't need to care about
2307 # any other roots.
2321 # any other roots.
2308 lowestrev = nullrev
2322 lowestrev = nullrev
2309 roots = [self.nullid]
2323 roots = [self.nullid]
2310 # Transform our roots list into a set.
2324 # Transform our roots list into a set.
2311 descendants = set(roots)
2325 descendants = set(roots)
2312 # Also, keep the original roots so we can filter out roots that aren't
2326 # Also, keep the original roots so we can filter out roots that aren't
2313 # 'real' roots (i.e. are descended from other roots).
2327 # 'real' roots (i.e. are descended from other roots).
2314 roots = descendants.copy()
2328 roots = descendants.copy()
2315 # Our topologically sorted list of output nodes.
2329 # Our topologically sorted list of output nodes.
2316 orderedout = []
2330 orderedout = []
2317 # Don't start at nullid since we don't want nullid in our output list,
2331 # Don't start at nullid since we don't want nullid in our output list,
2318 # and if nullid shows up in descendants, empty parents will look like
2332 # and if nullid shows up in descendants, empty parents will look like
2319 # they're descendants.
2333 # they're descendants.
2320 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2334 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2321 n = self.node(r)
2335 n = self.node(r)
2322 isdescendant = False
2336 isdescendant = False
2323 if lowestrev == nullrev: # Everybody is a descendant of nullid
2337 if lowestrev == nullrev: # Everybody is a descendant of nullid
2324 isdescendant = True
2338 isdescendant = True
2325 elif n in descendants:
2339 elif n in descendants:
2326 # n is already a descendant
2340 # n is already a descendant
2327 isdescendant = True
2341 isdescendant = True
2328 # This check only needs to be done here because all the roots
2342 # This check only needs to be done here because all the roots
2329 # will start being marked is descendants before the loop.
2343 # will start being marked is descendants before the loop.
2330 if n in roots:
2344 if n in roots:
2331 # If n was a root, check if it's a 'real' root.
2345 # If n was a root, check if it's a 'real' root.
2332 p = tuple(self.parents(n))
2346 p = tuple(self.parents(n))
2333 # If any of its parents are descendants, it's not a root.
2347 # If any of its parents are descendants, it's not a root.
2334 if (p[0] in descendants) or (p[1] in descendants):
2348 if (p[0] in descendants) or (p[1] in descendants):
2335 roots.remove(n)
2349 roots.remove(n)
2336 else:
2350 else:
2337 p = tuple(self.parents(n))
2351 p = tuple(self.parents(n))
2338 # A node is a descendant if either of its parents are
2352 # A node is a descendant if either of its parents are
2339 # descendants. (We seeded the dependents list with the roots
2353 # descendants. (We seeded the dependents list with the roots
2340 # up there, remember?)
2354 # up there, remember?)
2341 if (p[0] in descendants) or (p[1] in descendants):
2355 if (p[0] in descendants) or (p[1] in descendants):
2342 descendants.add(n)
2356 descendants.add(n)
2343 isdescendant = True
2357 isdescendant = True
2344 if isdescendant and ((ancestors is None) or (n in ancestors)):
2358 if isdescendant and ((ancestors is None) or (n in ancestors)):
2345 # Only include nodes that are both descendants and ancestors.
2359 # Only include nodes that are both descendants and ancestors.
2346 orderedout.append(n)
2360 orderedout.append(n)
2347 if (ancestors is not None) and (n in heads):
2361 if (ancestors is not None) and (n in heads):
2348 # We're trying to figure out which heads are reachable
2362 # We're trying to figure out which heads are reachable
2349 # from roots.
2363 # from roots.
2350 # Mark this head as having been reached
2364 # Mark this head as having been reached
2351 heads[n] = True
2365 heads[n] = True
2352 elif ancestors is None:
2366 elif ancestors is None:
2353 # Otherwise, we're trying to discover the heads.
2367 # Otherwise, we're trying to discover the heads.
2354 # Assume this is a head because if it isn't, the next step
2368 # Assume this is a head because if it isn't, the next step
2355 # will eventually remove it.
2369 # will eventually remove it.
2356 heads[n] = True
2370 heads[n] = True
2357 # But, obviously its parents aren't.
2371 # But, obviously its parents aren't.
2358 for p in self.parents(n):
2372 for p in self.parents(n):
2359 heads.pop(p, None)
2373 heads.pop(p, None)
2360 heads = [head for head, flag in heads.items() if flag]
2374 heads = [head for head, flag in heads.items() if flag]
2361 roots = list(roots)
2375 roots = list(roots)
2362 assert orderedout
2376 assert orderedout
2363 assert roots
2377 assert roots
2364 assert heads
2378 assert heads
2365 return (orderedout, roots, heads)
2379 return (orderedout, roots, heads)
2366
2380
2367 def headrevs(self, revs=None):
2381 def headrevs(self, revs=None):
2368 if revs is None:
2382 if revs is None:
2369 try:
2383 try:
2370 return self.index.headrevs()
2384 return self.index.headrevs()
2371 except AttributeError:
2385 except AttributeError:
2372 return self._headrevs()
2386 return self._headrevs()
2373 if rustdagop is not None and self.index.rust_ext_compat:
2387 if rustdagop is not None and self.index.rust_ext_compat:
2374 return rustdagop.headrevs(self.index, revs)
2388 return rustdagop.headrevs(self.index, revs)
2375 return dagop.headrevs(revs, self._uncheckedparentrevs)
2389 return dagop.headrevs(revs, self._uncheckedparentrevs)
2376
2390
2377 def headrevsdiff(self, start, stop):
2391 def headrevsdiff(self, start, stop):
2378 try:
2392 try:
2379 return self.index.headrevsdiff(start, stop)
2393 return self.index.headrevsdiff(start, stop)
2380 except AttributeError:
2394 except AttributeError:
2381 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2395 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2382
2396
2383 def computephases(self, roots):
2397 def computephases(self, roots):
2384 return self.index.computephasesmapsets(roots)
2398 return self.index.computephasesmapsets(roots)
2385
2399
2386 def _headrevs(self):
2400 def _headrevs(self):
2387 count = len(self)
2401 count = len(self)
2388 if not count:
2402 if not count:
2389 return [nullrev]
2403 return [nullrev]
2390 # we won't iter over filtered rev so nobody is a head at start
2404 # we won't iter over filtered rev so nobody is a head at start
2391 ishead = [0] * (count + 1)
2405 ishead = [0] * (count + 1)
2392 index = self.index
2406 index = self.index
2393 for r in self:
2407 for r in self:
2394 ishead[r] = 1 # I may be an head
2408 ishead[r] = 1 # I may be an head
2395 e = index[r]
2409 e = index[r]
2396 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2410 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2397 return [r for r, val in enumerate(ishead) if val]
2411 return [r for r, val in enumerate(ishead) if val]
2398
2412
2399 def _head_node_ids(self):
2413 def _head_node_ids(self):
2400 try:
2414 try:
2401 return self.index.head_node_ids()
2415 return self.index.head_node_ids()
2402 except AttributeError:
2416 except AttributeError:
2403 return [self.node(r) for r in self.headrevs()]
2417 return [self.node(r) for r in self.headrevs()]
2404
2418
2405 def heads(self, start=None, stop=None):
2419 def heads(self, start=None, stop=None):
2406 """return the list of all nodes that have no children
2420 """return the list of all nodes that have no children
2407
2421
2408 if start is specified, only heads that are descendants of
2422 if start is specified, only heads that are descendants of
2409 start will be returned
2423 start will be returned
2410 if stop is specified, it will consider all the revs from stop
2424 if stop is specified, it will consider all the revs from stop
2411 as if they had no children
2425 as if they had no children
2412 """
2426 """
2413 if start is None and stop is None:
2427 if start is None and stop is None:
2414 if not len(self):
2428 if not len(self):
2415 return [self.nullid]
2429 return [self.nullid]
2416 return self._head_node_ids()
2430 return self._head_node_ids()
2417 if start is None:
2431 if start is None:
2418 start = nullrev
2432 start = nullrev
2419 else:
2433 else:
2420 start = self.rev(start)
2434 start = self.rev(start)
2421
2435
2422 stoprevs = {self.rev(n) for n in stop or []}
2436 stoprevs = {self.rev(n) for n in stop or []}
2423
2437
2424 revs = dagop.headrevssubset(
2438 revs = dagop.headrevssubset(
2425 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2439 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2426 )
2440 )
2427
2441
2428 return [self.node(rev) for rev in revs]
2442 return [self.node(rev) for rev in revs]
2429
2443
2430 def diffheads(self, start, stop):
2444 def diffheads(self, start, stop):
2431 """return the nodes that make up the difference between
2445 """return the nodes that make up the difference between
2432 heads of revs before `start` and heads of revs before `stop`"""
2446 heads of revs before `start` and heads of revs before `stop`"""
2433 removed, added = self.headrevsdiff(start, stop)
2447 removed, added = self.headrevsdiff(start, stop)
2434 return [self.node(r) for r in removed], [self.node(r) for r in added]
2448 return [self.node(r) for r in removed], [self.node(r) for r in added]
2435
2449
2436 def children(self, node):
2450 def children(self, node):
2437 """find the children of a given node"""
2451 """find the children of a given node"""
2438 c = []
2452 c = []
2439 p = self.rev(node)
2453 p = self.rev(node)
2440 for r in self.revs(start=p + 1):
2454 for r in self.revs(start=p + 1):
2441 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2455 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2442 if prevs:
2456 if prevs:
2443 for pr in prevs:
2457 for pr in prevs:
2444 if pr == p:
2458 if pr == p:
2445 c.append(self.node(r))
2459 c.append(self.node(r))
2446 elif p == nullrev:
2460 elif p == nullrev:
2447 c.append(self.node(r))
2461 c.append(self.node(r))
2448 return c
2462 return c
2449
2463
2450 def commonancestorsheads(self, a, b):
2464 def commonancestorsheads(self, a, b):
2451 """calculate all the heads of the common ancestors of nodes a and b"""
2465 """calculate all the heads of the common ancestors of nodes a and b"""
2452 a, b = self.rev(a), self.rev(b)
2466 a, b = self.rev(a), self.rev(b)
2453 ancs = self._commonancestorsheads(a, b)
2467 ancs = self._commonancestorsheads(a, b)
2454 return pycompat.maplist(self.node, ancs)
2468 return pycompat.maplist(self.node, ancs)
2455
2469
2456 def _commonancestorsheads(self, *revs):
2470 def _commonancestorsheads(self, *revs):
2457 """calculate all the heads of the common ancestors of revs"""
2471 """calculate all the heads of the common ancestors of revs"""
2458 try:
2472 try:
2459 ancs = self.index.commonancestorsheads(*revs)
2473 ancs = self.index.commonancestorsheads(*revs)
2460 except (AttributeError, OverflowError): # C implementation failed
2474 except (AttributeError, OverflowError): # C implementation failed
2461 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2475 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2462 return ancs
2476 return ancs
2463
2477
2464 def isancestor(self, a, b):
2478 def isancestor(self, a, b):
2465 """return True if node a is an ancestor of node b
2479 """return True if node a is an ancestor of node b
2466
2480
2467 A revision is considered an ancestor of itself."""
2481 A revision is considered an ancestor of itself."""
2468 a, b = self.rev(a), self.rev(b)
2482 a, b = self.rev(a), self.rev(b)
2469 return self.isancestorrev(a, b)
2483 return self.isancestorrev(a, b)
2470
2484
2471 def isancestorrev(self, a, b):
2485 def isancestorrev(self, a, b):
2472 """return True if revision a is an ancestor of revision b
2486 """return True if revision a is an ancestor of revision b
2473
2487
2474 A revision is considered an ancestor of itself.
2488 A revision is considered an ancestor of itself.
2475
2489
2476 The implementation of this is trivial but the use of
2490 The implementation of this is trivial but the use of
2477 reachableroots is not."""
2491 reachableroots is not."""
2478 if a == nullrev:
2492 if a == nullrev:
2479 return True
2493 return True
2480 elif a == b:
2494 elif a == b:
2481 return True
2495 return True
2482 elif a > b:
2496 elif a > b:
2483 return False
2497 return False
2484 return bool(self.reachableroots(a, [b], [a], includepath=False))
2498 return bool(self.reachableroots(a, [b], [a], includepath=False))
2485
2499
2486 def reachableroots(self, minroot, heads, roots, includepath=False):
2500 def reachableroots(self, minroot, heads, roots, includepath=False):
2487 """return (heads(::(<roots> and <roots>::<heads>)))
2501 """return (heads(::(<roots> and <roots>::<heads>)))
2488
2502
2489 If includepath is True, return (<roots>::<heads>)."""
2503 If includepath is True, return (<roots>::<heads>)."""
2490 try:
2504 try:
2491 return self.index.reachableroots2(
2505 return self.index.reachableroots2(
2492 minroot, heads, roots, includepath
2506 minroot, heads, roots, includepath
2493 )
2507 )
2494 except AttributeError:
2508 except AttributeError:
2495 return dagop._reachablerootspure(
2509 return dagop._reachablerootspure(
2496 self.parentrevs, minroot, roots, heads, includepath
2510 self.parentrevs, minroot, roots, heads, includepath
2497 )
2511 )
2498
2512
2499 def ancestor(self, a, b):
2513 def ancestor(self, a, b):
2500 """calculate the "best" common ancestor of nodes a and b"""
2514 """calculate the "best" common ancestor of nodes a and b"""
2501
2515
2502 a, b = self.rev(a), self.rev(b)
2516 a, b = self.rev(a), self.rev(b)
2503 try:
2517 try:
2504 ancs = self.index.ancestors(a, b)
2518 ancs = self.index.ancestors(a, b)
2505 except (AttributeError, OverflowError):
2519 except (AttributeError, OverflowError):
2506 ancs = ancestor.ancestors(self.parentrevs, a, b)
2520 ancs = ancestor.ancestors(self.parentrevs, a, b)
2507 if ancs:
2521 if ancs:
2508 # choose a consistent winner when there's a tie
2522 # choose a consistent winner when there's a tie
2509 return min(map(self.node, ancs))
2523 return min(map(self.node, ancs))
2510 return self.nullid
2524 return self.nullid
2511
2525
2512 def _match(self, id):
2526 def _match(self, id):
2513 if isinstance(id, int):
2527 if isinstance(id, int):
2514 # rev
2528 # rev
2515 return self.node(id)
2529 return self.node(id)
2516 if len(id) == self.nodeconstants.nodelen:
2530 if len(id) == self.nodeconstants.nodelen:
2517 # possibly a binary node
2531 # possibly a binary node
2518 # odds of a binary node being all hex in ASCII are 1 in 10**25
2532 # odds of a binary node being all hex in ASCII are 1 in 10**25
2519 try:
2533 try:
2520 node = id
2534 node = id
2521 self.rev(node) # quick search the index
2535 self.rev(node) # quick search the index
2522 return node
2536 return node
2523 except error.LookupError:
2537 except error.LookupError:
2524 pass # may be partial hex id
2538 pass # may be partial hex id
2525 try:
2539 try:
2526 # str(rev)
2540 # str(rev)
2527 rev = int(id)
2541 rev = int(id)
2528 if b"%d" % rev != id:
2542 if b"%d" % rev != id:
2529 raise ValueError
2543 raise ValueError
2530 if rev < 0:
2544 if rev < 0:
2531 rev = len(self) + rev
2545 rev = len(self) + rev
2532 if rev < 0 or rev >= len(self):
2546 if rev < 0 or rev >= len(self):
2533 raise ValueError
2547 raise ValueError
2534 return self.node(rev)
2548 return self.node(rev)
2535 except (ValueError, OverflowError):
2549 except (ValueError, OverflowError):
2536 pass
2550 pass
2537 if len(id) == 2 * self.nodeconstants.nodelen:
2551 if len(id) == 2 * self.nodeconstants.nodelen:
2538 try:
2552 try:
2539 # a full hex nodeid?
2553 # a full hex nodeid?
2540 node = bin(id)
2554 node = bin(id)
2541 self.rev(node)
2555 self.rev(node)
2542 return node
2556 return node
2543 except (binascii.Error, error.LookupError):
2557 except (binascii.Error, error.LookupError):
2544 pass
2558 pass
2545
2559
2546 def _partialmatch(self, id):
2560 def _partialmatch(self, id):
2547 # we don't care wdirfilenodeids as they should be always full hash
2561 # we don't care wdirfilenodeids as they should be always full hash
2548 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2562 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2549 ambiguous = False
2563 ambiguous = False
2550 try:
2564 try:
2551 partial = self.index.partialmatch(id)
2565 partial = self.index.partialmatch(id)
2552 if partial and self.hasnode(partial):
2566 if partial and self.hasnode(partial):
2553 if maybewdir:
2567 if maybewdir:
2554 # single 'ff...' match in radix tree, ambiguous with wdir
2568 # single 'ff...' match in radix tree, ambiguous with wdir
2555 ambiguous = True
2569 ambiguous = True
2556 else:
2570 else:
2557 return partial
2571 return partial
2558 elif maybewdir:
2572 elif maybewdir:
2559 # no 'ff...' match in radix tree, wdir identified
2573 # no 'ff...' match in radix tree, wdir identified
2560 raise error.WdirUnsupported
2574 raise error.WdirUnsupported
2561 else:
2575 else:
2562 return None
2576 return None
2563 except error.RevlogError:
2577 except error.RevlogError:
2564 # parsers.c radix tree lookup gave multiple matches
2578 # parsers.c radix tree lookup gave multiple matches
2565 # fast path: for unfiltered changelog, radix tree is accurate
2579 # fast path: for unfiltered changelog, radix tree is accurate
2566 if not getattr(self, 'filteredrevs', None):
2580 if not getattr(self, 'filteredrevs', None):
2567 ambiguous = True
2581 ambiguous = True
2568 # fall through to slow path that filters hidden revisions
2582 # fall through to slow path that filters hidden revisions
2569 except (AttributeError, ValueError):
2583 except (AttributeError, ValueError):
2570 # we are pure python, or key is not hex
2584 # we are pure python, or key is not hex
2571 pass
2585 pass
2572 if ambiguous:
2586 if ambiguous:
2573 raise error.AmbiguousPrefixLookupError(
2587 raise error.AmbiguousPrefixLookupError(
2574 id, self.display_id, _(b'ambiguous identifier')
2588 id, self.display_id, _(b'ambiguous identifier')
2575 )
2589 )
2576
2590
2577 if id in self._pcache:
2591 if id in self._pcache:
2578 return self._pcache[id]
2592 return self._pcache[id]
2579
2593
2580 if len(id) <= 40:
2594 if len(id) <= 40:
2581 # hex(node)[:...]
2595 # hex(node)[:...]
2582 l = len(id) // 2 * 2 # grab an even number of digits
2596 l = len(id) // 2 * 2 # grab an even number of digits
2583 try:
2597 try:
2584 # we're dropping the last digit, so let's check that it's hex,
2598 # we're dropping the last digit, so let's check that it's hex,
2585 # to avoid the expensive computation below if it's not
2599 # to avoid the expensive computation below if it's not
2586 if len(id) % 2 > 0:
2600 if len(id) % 2 > 0:
2587 if not (id[-1] in hexdigits):
2601 if not (id[-1] in hexdigits):
2588 return None
2602 return None
2589 prefix = bin(id[:l])
2603 prefix = bin(id[:l])
2590 except binascii.Error:
2604 except binascii.Error:
2591 pass
2605 pass
2592 else:
2606 else:
2593 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2607 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2594 nl = [
2608 nl = [
2595 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2609 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2596 ]
2610 ]
2597 if self.nodeconstants.nullhex.startswith(id):
2611 if self.nodeconstants.nullhex.startswith(id):
2598 nl.append(self.nullid)
2612 nl.append(self.nullid)
2599 if len(nl) > 0:
2613 if len(nl) > 0:
2600 if len(nl) == 1 and not maybewdir:
2614 if len(nl) == 1 and not maybewdir:
2601 self._pcache[id] = nl[0]
2615 self._pcache[id] = nl[0]
2602 return nl[0]
2616 return nl[0]
2603 raise error.AmbiguousPrefixLookupError(
2617 raise error.AmbiguousPrefixLookupError(
2604 id, self.display_id, _(b'ambiguous identifier')
2618 id, self.display_id, _(b'ambiguous identifier')
2605 )
2619 )
2606 if maybewdir:
2620 if maybewdir:
2607 raise error.WdirUnsupported
2621 raise error.WdirUnsupported
2608 return None
2622 return None
2609
2623
2610 def lookup(self, id):
2624 def lookup(self, id):
2611 """locate a node based on:
2625 """locate a node based on:
2612 - revision number or str(revision number)
2626 - revision number or str(revision number)
2613 - nodeid or subset of hex nodeid
2627 - nodeid or subset of hex nodeid
2614 """
2628 """
2615 n = self._match(id)
2629 n = self._match(id)
2616 if n is not None:
2630 if n is not None:
2617 return n
2631 return n
2618 n = self._partialmatch(id)
2632 n = self._partialmatch(id)
2619 if n:
2633 if n:
2620 return n
2634 return n
2621
2635
2622 raise error.LookupError(id, self.display_id, _(b'no match found'))
2636 raise error.LookupError(id, self.display_id, _(b'no match found'))
2623
2637
2624 def shortest(self, node, minlength=1):
2638 def shortest(self, node, minlength=1):
2625 """Find the shortest unambiguous prefix that matches node."""
2639 """Find the shortest unambiguous prefix that matches node."""
2626
2640
2627 def isvalid(prefix):
2641 def isvalid(prefix):
2628 try:
2642 try:
2629 matchednode = self._partialmatch(prefix)
2643 matchednode = self._partialmatch(prefix)
2630 except error.AmbiguousPrefixLookupError:
2644 except error.AmbiguousPrefixLookupError:
2631 return False
2645 return False
2632 except error.WdirUnsupported:
2646 except error.WdirUnsupported:
2633 # single 'ff...' match
2647 # single 'ff...' match
2634 return True
2648 return True
2635 if matchednode is None:
2649 if matchednode is None:
2636 raise error.LookupError(node, self.display_id, _(b'no node'))
2650 raise error.LookupError(node, self.display_id, _(b'no node'))
2637 return True
2651 return True
2638
2652
2639 def maybewdir(prefix):
2653 def maybewdir(prefix):
2640 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2654 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2641
2655
2642 hexnode = hex(node)
2656 hexnode = hex(node)
2643
2657
2644 def disambiguate(hexnode, minlength):
2658 def disambiguate(hexnode, minlength):
2645 """Disambiguate against wdirid."""
2659 """Disambiguate against wdirid."""
2646 for length in range(minlength, len(hexnode) + 1):
2660 for length in range(minlength, len(hexnode) + 1):
2647 prefix = hexnode[:length]
2661 prefix = hexnode[:length]
2648 if not maybewdir(prefix):
2662 if not maybewdir(prefix):
2649 return prefix
2663 return prefix
2650
2664
2651 if not getattr(self, 'filteredrevs', None):
2665 if not getattr(self, 'filteredrevs', None):
2652 try:
2666 try:
2653 length = max(self.index.shortest(node), minlength)
2667 length = max(self.index.shortest(node), minlength)
2654 return disambiguate(hexnode, length)
2668 return disambiguate(hexnode, length)
2655 except error.RevlogError:
2669 except error.RevlogError:
2656 if node != self.nodeconstants.wdirid:
2670 if node != self.nodeconstants.wdirid:
2657 raise error.LookupError(
2671 raise error.LookupError(
2658 node, self.display_id, _(b'no node')
2672 node, self.display_id, _(b'no node')
2659 )
2673 )
2660 except AttributeError:
2674 except AttributeError:
2661 # Fall through to pure code
2675 # Fall through to pure code
2662 pass
2676 pass
2663
2677
2664 if node == self.nodeconstants.wdirid:
2678 if node == self.nodeconstants.wdirid:
2665 for length in range(minlength, len(hexnode) + 1):
2679 for length in range(minlength, len(hexnode) + 1):
2666 prefix = hexnode[:length]
2680 prefix = hexnode[:length]
2667 if isvalid(prefix):
2681 if isvalid(prefix):
2668 return prefix
2682 return prefix
2669
2683
2670 for length in range(minlength, len(hexnode) + 1):
2684 for length in range(minlength, len(hexnode) + 1):
2671 prefix = hexnode[:length]
2685 prefix = hexnode[:length]
2672 if isvalid(prefix):
2686 if isvalid(prefix):
2673 return disambiguate(hexnode, length)
2687 return disambiguate(hexnode, length)
2674
2688
2675 def cmp(self, node, text):
2689 def cmp(self, node, text):
2676 """compare text with a given file revision
2690 """compare text with a given file revision
2677
2691
2678 returns True if text is different than what is stored.
2692 returns True if text is different than what is stored.
2679 """
2693 """
2680 p1, p2 = self.parents(node)
2694 p1, p2 = self.parents(node)
2681 return storageutil.hashrevisionsha1(text, p1, p2) != node
2695 return storageutil.hashrevisionsha1(text, p1, p2) != node
2682
2696
2683 def deltaparent(self, rev):
2697 def deltaparent(self, rev):
2684 """return deltaparent of the given revision"""
2698 """return deltaparent of the given revision"""
2685 base = self.index[rev][3]
2699 base = self.index[rev][3]
2686 if base == rev:
2700 if base == rev:
2687 return nullrev
2701 return nullrev
2688 elif self.delta_config.general_delta:
2702 elif self.delta_config.general_delta:
2689 return base
2703 return base
2690 else:
2704 else:
2691 return rev - 1
2705 return rev - 1
2692
2706
2693 def issnapshot(self, rev):
2707 def issnapshot(self, rev):
2694 """tells whether rev is a snapshot"""
2708 """tells whether rev is a snapshot"""
2695 ret = self._inner.issnapshot(rev)
2709 ret = self._inner.issnapshot(rev)
2696 self.issnapshot = self._inner.issnapshot
2710 self.issnapshot = self._inner.issnapshot
2697 return ret
2711 return ret
2698
2712
2699 def snapshotdepth(self, rev):
2713 def snapshotdepth(self, rev):
2700 """number of snapshot in the chain before this one"""
2714 """number of snapshot in the chain before this one"""
2701 if not self.issnapshot(rev):
2715 if not self.issnapshot(rev):
2702 raise error.ProgrammingError(b'revision %d not a snapshot')
2716 raise error.ProgrammingError(b'revision %d not a snapshot')
2703 return len(self._inner._deltachain(rev)[0]) - 1
2717 return len(self._inner._deltachain(rev)[0]) - 1
2704
2718
2705 def revdiff(self, rev1, rev2):
2719 def revdiff(self, rev1, rev2):
2706 """return or calculate a delta between two revisions
2720 """return or calculate a delta between two revisions
2707
2721
2708 The delta calculated is in binary form and is intended to be written to
2722 The delta calculated is in binary form and is intended to be written to
2709 revlog data directly. So this function needs raw revision data.
2723 revlog data directly. So this function needs raw revision data.
2710 """
2724 """
2711 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2725 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2712 return bytes(self._inner._chunk(rev2))
2726 return bytes(self._inner._chunk(rev2))
2713
2727
2714 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2728 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2715
2729
2716 def revision(self, nodeorrev):
2730 def revision(self, nodeorrev):
2717 """return an uncompressed revision of a given node or revision
2731 """return an uncompressed revision of a given node or revision
2718 number.
2732 number.
2719 """
2733 """
2720 return self._revisiondata(nodeorrev)
2734 return self._revisiondata(nodeorrev)
2721
2735
2722 def sidedata(self, nodeorrev):
2736 def sidedata(self, nodeorrev):
2723 """a map of extra data related to the changeset but not part of the hash
2737 """a map of extra data related to the changeset but not part of the hash
2724
2738
2725 This function currently return a dictionary. However, more advanced
2739 This function currently return a dictionary. However, more advanced
2726 mapping object will likely be used in the future for a more
2740 mapping object will likely be used in the future for a more
2727 efficient/lazy code.
2741 efficient/lazy code.
2728 """
2742 """
2729 # deal with <nodeorrev> argument type
2743 # deal with <nodeorrev> argument type
2730 if isinstance(nodeorrev, int):
2744 if isinstance(nodeorrev, int):
2731 rev = nodeorrev
2745 rev = nodeorrev
2732 else:
2746 else:
2733 rev = self.rev(nodeorrev)
2747 rev = self.rev(nodeorrev)
2734 return self._sidedata(rev)
2748 return self._sidedata(rev)
2735
2749
2736 def _rawtext(self, node, rev):
2750 def _rawtext(self, node, rev):
2737 """return the possibly unvalidated rawtext for a revision
2751 """return the possibly unvalidated rawtext for a revision
2738
2752
2739 returns (rev, rawtext, validated)
2753 returns (rev, rawtext, validated)
2740 """
2754 """
2741 # Check if we have the entry in cache
2755 # Check if we have the entry in cache
2742 # The cache entry looks like (node, rev, rawtext)
2756 # The cache entry looks like (node, rev, rawtext)
2743 if self._inner._revisioncache:
2757 if self._inner._revisioncache:
2744 if self._inner._revisioncache[0] == node:
2758 if self._inner._revisioncache[0] == node:
2745 return (rev, self._inner._revisioncache[2], True)
2759 return (rev, self._inner._revisioncache[2], True)
2746
2760
2747 if rev is None:
2761 if rev is None:
2748 rev = self.rev(node)
2762 rev = self.rev(node)
2749
2763
2750 return self._inner.raw_text(node, rev)
2764 return self._inner.raw_text(node, rev)
2751
2765
2752 def _revisiondata(self, nodeorrev, raw=False):
2766 def _revisiondata(self, nodeorrev, raw=False):
2753 # deal with <nodeorrev> argument type
2767 # deal with <nodeorrev> argument type
2754 if isinstance(nodeorrev, int):
2768 if isinstance(nodeorrev, int):
2755 rev = nodeorrev
2769 rev = nodeorrev
2756 node = self.node(rev)
2770 node = self.node(rev)
2757 else:
2771 else:
2758 node = nodeorrev
2772 node = nodeorrev
2759 rev = None
2773 rev = None
2760
2774
2761 # fast path the special `nullid` rev
2775 # fast path the special `nullid` rev
2762 if node == self.nullid:
2776 if node == self.nullid:
2763 return b""
2777 return b""
2764
2778
2765 # ``rawtext`` is the text as stored inside the revlog. Might be the
2779 # ``rawtext`` is the text as stored inside the revlog. Might be the
2766 # revision or might need to be processed to retrieve the revision.
2780 # revision or might need to be processed to retrieve the revision.
2767 rev, rawtext, validated = self._rawtext(node, rev)
2781 rev, rawtext, validated = self._rawtext(node, rev)
2768
2782
2769 if raw and validated:
2783 if raw and validated:
2770 # if we don't want to process the raw text and that raw
2784 # if we don't want to process the raw text and that raw
2771 # text is cached, we can exit early.
2785 # text is cached, we can exit early.
2772 return rawtext
2786 return rawtext
2773 if rev is None:
2787 if rev is None:
2774 rev = self.rev(node)
2788 rev = self.rev(node)
2775 # the revlog's flag for this revision
2789 # the revlog's flag for this revision
2776 # (usually alter its state or content)
2790 # (usually alter its state or content)
2777 flags = self.flags(rev)
2791 flags = self.flags(rev)
2778
2792
2779 if validated and flags == REVIDX_DEFAULT_FLAGS:
2793 if validated and flags == REVIDX_DEFAULT_FLAGS:
2780 # no extra flags set, no flag processor runs, text = rawtext
2794 # no extra flags set, no flag processor runs, text = rawtext
2781 return rawtext
2795 return rawtext
2782
2796
2783 if raw:
2797 if raw:
2784 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2798 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2785 text = rawtext
2799 text = rawtext
2786 else:
2800 else:
2787 r = flagutil.processflagsread(self, rawtext, flags)
2801 r = flagutil.processflagsread(self, rawtext, flags)
2788 text, validatehash = r
2802 text, validatehash = r
2789 if validatehash:
2803 if validatehash:
2790 self.checkhash(text, node, rev=rev)
2804 self.checkhash(text, node, rev=rev)
2791 if not validated:
2805 if not validated:
2792 self._inner._revisioncache = (node, rev, rawtext)
2806 self._inner._revisioncache = (node, rev, rawtext)
2793
2807
2794 return text
2808 return text
2795
2809
2796 def _sidedata(self, rev):
2810 def _sidedata(self, rev):
2797 """Return the sidedata for a given revision number."""
2811 """Return the sidedata for a given revision number."""
2798 sidedata_end = None
2812 sidedata_end = None
2799 if self._docket is not None:
2813 if self._docket is not None:
2800 sidedata_end = self._docket.sidedata_end
2814 sidedata_end = self._docket.sidedata_end
2801 return self._inner.sidedata(rev, sidedata_end)
2815 return self._inner.sidedata(rev, sidedata_end)
2802
2816
2803 def rawdata(self, nodeorrev):
2817 def rawdata(self, nodeorrev):
2804 """return an uncompressed raw data of a given node or revision number."""
2818 """return an uncompressed raw data of a given node or revision number."""
2805 return self._revisiondata(nodeorrev, raw=True)
2819 return self._revisiondata(nodeorrev, raw=True)
2806
2820
2807 def hash(self, text, p1, p2):
2821 def hash(self, text, p1, p2):
2808 """Compute a node hash.
2822 """Compute a node hash.
2809
2823
2810 Available as a function so that subclasses can replace the hash
2824 Available as a function so that subclasses can replace the hash
2811 as needed.
2825 as needed.
2812 """
2826 """
2813 return storageutil.hashrevisionsha1(text, p1, p2)
2827 return storageutil.hashrevisionsha1(text, p1, p2)
2814
2828
2815 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2829 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2816 """Check node hash integrity.
2830 """Check node hash integrity.
2817
2831
2818 Available as a function so that subclasses can extend hash mismatch
2832 Available as a function so that subclasses can extend hash mismatch
2819 behaviors as needed.
2833 behaviors as needed.
2820 """
2834 """
2821 try:
2835 try:
2822 if p1 is None and p2 is None:
2836 if p1 is None and p2 is None:
2823 p1, p2 = self.parents(node)
2837 p1, p2 = self.parents(node)
2824 if node != self.hash(text, p1, p2):
2838 if node != self.hash(text, p1, p2):
2825 # Clear the revision cache on hash failure. The revision cache
2839 # Clear the revision cache on hash failure. The revision cache
2826 # only stores the raw revision and clearing the cache does have
2840 # only stores the raw revision and clearing the cache does have
2827 # the side-effect that we won't have a cache hit when the raw
2841 # the side-effect that we won't have a cache hit when the raw
2828 # revision data is accessed. But this case should be rare and
2842 # revision data is accessed. But this case should be rare and
2829 # it is extra work to teach the cache about the hash
2843 # it is extra work to teach the cache about the hash
2830 # verification state.
2844 # verification state.
2831 if (
2845 if (
2832 self._inner._revisioncache
2846 self._inner._revisioncache
2833 and self._inner._revisioncache[0] == node
2847 and self._inner._revisioncache[0] == node
2834 ):
2848 ):
2835 self._inner._revisioncache = None
2849 self._inner._revisioncache = None
2836
2850
2837 revornode = rev
2851 revornode = rev
2838 if revornode is None:
2852 if revornode is None:
2839 revornode = templatefilters.short(hex(node))
2853 revornode = templatefilters.short(hex(node))
2840 raise error.RevlogError(
2854 raise error.RevlogError(
2841 _(b"integrity check failed on %s:%s")
2855 _(b"integrity check failed on %s:%s")
2842 % (self.display_id, pycompat.bytestr(revornode))
2856 % (self.display_id, pycompat.bytestr(revornode))
2843 )
2857 )
2844 except error.RevlogError:
2858 except error.RevlogError:
2845 if self.feature_config.censorable and storageutil.iscensoredtext(
2859 if self.feature_config.censorable and storageutil.iscensoredtext(
2846 text
2860 text
2847 ):
2861 ):
2848 raise error.CensoredNodeError(self.display_id, node, text)
2862 raise error.CensoredNodeError(self.display_id, node, text)
2849 raise
2863 raise
2850
2864
2851 @property
2865 @property
2852 def _split_index_file(self):
2866 def _split_index_file(self):
2853 """the path where to expect the index of an ongoing splitting operation
2867 """the path where to expect the index of an ongoing splitting operation
2854
2868
2855 The file will only exist if a splitting operation is in progress, but
2869 The file will only exist if a splitting operation is in progress, but
2856 it is always expected at the same location."""
2870 it is always expected at the same location."""
2857 parts = self.radix.split(b'/')
2871 parts = self.radix.split(b'/')
2858 if len(parts) > 1:
2872 if len(parts) > 1:
2859 # adds a '-s' prefix to the ``data/` or `meta/` base
2873 # adds a '-s' prefix to the ``data/` or `meta/` base
2860 head = parts[0] + b'-s'
2874 head = parts[0] + b'-s'
2861 mids = parts[1:-1]
2875 mids = parts[1:-1]
2862 tail = parts[-1] + b'.i'
2876 tail = parts[-1] + b'.i'
2863 pieces = [head] + mids + [tail]
2877 pieces = [head] + mids + [tail]
2864 return b'/'.join(pieces)
2878 return b'/'.join(pieces)
2865 else:
2879 else:
2866 # the revlog is stored at the root of the store (changelog or
2880 # the revlog is stored at the root of the store (changelog or
2867 # manifest), no risk of collision.
2881 # manifest), no risk of collision.
2868 return self.radix + b'.i.s'
2882 return self.radix + b'.i.s'
2869
2883
2870 def _enforceinlinesize(self, tr):
2884 def _enforceinlinesize(self, tr):
2871 """Check if the revlog is too big for inline and convert if so.
2885 """Check if the revlog is too big for inline and convert if so.
2872
2886
2873 This should be called after revisions are added to the revlog. If the
2887 This should be called after revisions are added to the revlog. If the
2874 revlog has grown too large to be an inline revlog, it will convert it
2888 revlog has grown too large to be an inline revlog, it will convert it
2875 to use multiple index and data files.
2889 to use multiple index and data files.
2876 """
2890 """
2877 tiprev = len(self) - 1
2891 tiprev = len(self) - 1
2878 total_size = self.start(tiprev) + self.length(tiprev)
2892 total_size = self.start(tiprev) + self.length(tiprev)
2879 if not self._inline or (self._may_inline and total_size < _maxinline):
2893 if not self._inline or (self._may_inline and total_size < _maxinline):
2880 return
2894 return
2881
2895
2882 if self._docket is not None:
2896 if self._docket is not None:
2883 msg = b"inline revlog should not have a docket"
2897 msg = b"inline revlog should not have a docket"
2884 raise error.ProgrammingError(msg)
2898 raise error.ProgrammingError(msg)
2885
2899
2886 # In the common case, we enforce inline size because the revlog has
2900 # In the common case, we enforce inline size because the revlog has
2887 # been appened too. And in such case, it must have an initial offset
2901 # been appened too. And in such case, it must have an initial offset
2888 # recorded in the transaction.
2902 # recorded in the transaction.
2889 troffset = tr.findoffset(self._inner.canonical_index_file)
2903 troffset = tr.findoffset(self._inner.canonical_index_file)
2890 pre_touched = troffset is not None
2904 pre_touched = troffset is not None
2891 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2905 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2892 raise error.RevlogError(
2906 raise error.RevlogError(
2893 _(b"%s not found in the transaction") % self._indexfile
2907 _(b"%s not found in the transaction") % self._indexfile
2894 )
2908 )
2895
2909
2896 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2910 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2897 tr.add(self._datafile, 0)
2911 tr.add(self._datafile, 0)
2898
2912
2899 new_index_file_path = None
2913 new_index_file_path = None
2900 old_index_file_path = self._indexfile
2914 old_index_file_path = self._indexfile
2901 new_index_file_path = self._split_index_file
2915 new_index_file_path = self._split_index_file
2902 opener = self.opener
2916 opener = self.opener
2903 weak_self = weakref.ref(self)
2917 weak_self = weakref.ref(self)
2904
2918
2905 # the "split" index replace the real index when the transaction is
2919 # the "split" index replace the real index when the transaction is
2906 # finalized
2920 # finalized
2907 def finalize_callback(tr):
2921 def finalize_callback(tr):
2908 opener.rename(
2922 opener.rename(
2909 new_index_file_path,
2923 new_index_file_path,
2910 old_index_file_path,
2924 old_index_file_path,
2911 checkambig=True,
2925 checkambig=True,
2912 )
2926 )
2913 maybe_self = weak_self()
2927 maybe_self = weak_self()
2914 if maybe_self is not None:
2928 if maybe_self is not None:
2915 maybe_self._indexfile = old_index_file_path
2929 maybe_self._indexfile = old_index_file_path
2916 maybe_self._inner.index_file = maybe_self._indexfile
2930 maybe_self._inner.index_file = maybe_self._indexfile
2917
2931
2918 def abort_callback(tr):
2932 def abort_callback(tr):
2919 maybe_self = weak_self()
2933 maybe_self = weak_self()
2920 if maybe_self is not None:
2934 if maybe_self is not None:
2921 maybe_self._indexfile = old_index_file_path
2935 maybe_self._indexfile = old_index_file_path
2922 maybe_self._inner.inline = True
2936 maybe_self._inner.inline = True
2923 maybe_self._inner.index_file = old_index_file_path
2937 maybe_self._inner.index_file = old_index_file_path
2924
2938
2925 tr.registertmp(new_index_file_path)
2939 tr.registertmp(new_index_file_path)
2926 # we use 001 here to make this this happens after the finalisation of
2940 # we use 001 here to make this this happens after the finalisation of
2927 # pending changelog write (using 000). Otherwise the two finalizer
2941 # pending changelog write (using 000). Otherwise the two finalizer
2928 # would step over each other and delete the changelog.i file.
2942 # would step over each other and delete the changelog.i file.
2929 if self.target[1] is not None:
2943 if self.target[1] is not None:
2930 callback_id = b'001-revlog-split-%d-%s' % self.target
2944 callback_id = b'001-revlog-split-%d-%s' % self.target
2931 else:
2945 else:
2932 callback_id = b'001-revlog-split-%d' % self.target[0]
2946 callback_id = b'001-revlog-split-%d' % self.target[0]
2933 tr.addfinalize(callback_id, finalize_callback)
2947 tr.addfinalize(callback_id, finalize_callback)
2934 tr.addabort(callback_id, abort_callback)
2948 tr.addabort(callback_id, abort_callback)
2935
2949
2936 self._format_flags &= ~FLAG_INLINE_DATA
2950 self._format_flags &= ~FLAG_INLINE_DATA
2937 self._inner.split_inline(
2951 self._inner.split_inline(
2938 tr,
2952 tr,
2939 self._format_flags | self._format_version,
2953 self._format_flags | self._format_version,
2940 new_index_file_path=new_index_file_path,
2954 new_index_file_path=new_index_file_path,
2941 )
2955 )
2942
2956
2943 self._inline = False
2957 self._inline = False
2944 if new_index_file_path is not None:
2958 if new_index_file_path is not None:
2945 self._indexfile = new_index_file_path
2959 self._indexfile = new_index_file_path
2946
2960
2947 nodemaputil.setup_persistent_nodemap(tr, self)
2961 nodemaputil.setup_persistent_nodemap(tr, self)
2948
2962
2949 def _nodeduplicatecallback(self, transaction, node):
2963 def _nodeduplicatecallback(self, transaction, node):
2950 """called when trying to add a node already stored."""
2964 """called when trying to add a node already stored."""
2951
2965
2952 @contextlib.contextmanager
2966 @contextlib.contextmanager
2953 def reading(self):
2967 def reading(self):
2954 with self._inner.reading():
2968 with self._inner.reading():
2955 yield
2969 yield
2956
2970
2957 @contextlib.contextmanager
2971 @contextlib.contextmanager
2958 def _writing(self, transaction):
2972 def _writing(self, transaction):
2959 if self._trypending:
2973 if self._trypending:
2960 msg = b'try to write in a `trypending` revlog: %s'
2974 msg = b'try to write in a `trypending` revlog: %s'
2961 msg %= self.display_id
2975 msg %= self.display_id
2962 raise error.ProgrammingError(msg)
2976 raise error.ProgrammingError(msg)
2963 if self._inner.is_writing:
2977 if self._inner.is_writing:
2964 yield
2978 yield
2965 else:
2979 else:
2966 data_end = None
2980 data_end = None
2967 sidedata_end = None
2981 sidedata_end = None
2968 if self._docket is not None:
2982 if self._docket is not None:
2969 data_end = self._docket.data_end
2983 data_end = self._docket.data_end
2970 sidedata_end = self._docket.sidedata_end
2984 sidedata_end = self._docket.sidedata_end
2971 with self._inner.writing(
2985 with self._inner.writing(
2972 transaction,
2986 transaction,
2973 data_end=data_end,
2987 data_end=data_end,
2974 sidedata_end=sidedata_end,
2988 sidedata_end=sidedata_end,
2975 ):
2989 ):
2976 yield
2990 yield
2977 if self._docket is not None:
2991 if self._docket is not None:
2978 self._write_docket(transaction)
2992 self._write_docket(transaction)
2979
2993
2980 @property
2994 @property
2981 def is_delaying(self):
2995 def is_delaying(self):
2982 return self._inner.is_delaying
2996 return self._inner.is_delaying
2983
2997
2984 def _write_docket(self, transaction):
2998 def _write_docket(self, transaction):
2985 """write the current docket on disk
2999 """write the current docket on disk
2986
3000
2987 Exist as a method to help changelog to implement transaction logic
3001 Exist as a method to help changelog to implement transaction logic
2988
3002
2989 We could also imagine using the same transaction logic for all revlog
3003 We could also imagine using the same transaction logic for all revlog
2990 since docket are cheap."""
3004 since docket are cheap."""
2991 self._docket.write(transaction)
3005 self._docket.write(transaction)
2992
3006
2993 def addrevision(
3007 def addrevision(
2994 self,
3008 self,
2995 text,
3009 text,
2996 transaction,
3010 transaction,
2997 link,
3011 link,
2998 p1,
3012 p1,
2999 p2,
3013 p2,
3000 cachedelta=None,
3014 cachedelta=None,
3001 node=None,
3015 node=None,
3002 flags=REVIDX_DEFAULT_FLAGS,
3016 flags=REVIDX_DEFAULT_FLAGS,
3003 deltacomputer=None,
3017 deltacomputer=None,
3004 sidedata=None,
3018 sidedata=None,
3005 ):
3019 ):
3006 """add a revision to the log
3020 """add a revision to the log
3007
3021
3008 text - the revision data to add
3022 text - the revision data to add
3009 transaction - the transaction object used for rollback
3023 transaction - the transaction object used for rollback
3010 link - the linkrev data to add
3024 link - the linkrev data to add
3011 p1, p2 - the parent nodeids of the revision
3025 p1, p2 - the parent nodeids of the revision
3012 cachedelta - an optional precomputed delta
3026 cachedelta - an optional precomputed delta
3013 node - nodeid of revision; typically node is not specified, and it is
3027 node - nodeid of revision; typically node is not specified, and it is
3014 computed by default as hash(text, p1, p2), however subclasses might
3028 computed by default as hash(text, p1, p2), however subclasses might
3015 use different hashing method (and override checkhash() in such case)
3029 use different hashing method (and override checkhash() in such case)
3016 flags - the known flags to set on the revision
3030 flags - the known flags to set on the revision
3017 deltacomputer - an optional deltacomputer instance shared between
3031 deltacomputer - an optional deltacomputer instance shared between
3018 multiple calls
3032 multiple calls
3019 """
3033 """
3020 if link == nullrev:
3034 if link == nullrev:
3021 raise error.RevlogError(
3035 raise error.RevlogError(
3022 _(b"attempted to add linkrev -1 to %s") % self.display_id
3036 _(b"attempted to add linkrev -1 to %s") % self.display_id
3023 )
3037 )
3024
3038
3025 if sidedata is None:
3039 if sidedata is None:
3026 sidedata = {}
3040 sidedata = {}
3027 elif sidedata and not self.feature_config.has_side_data:
3041 elif sidedata and not self.feature_config.has_side_data:
3028 raise error.ProgrammingError(
3042 raise error.ProgrammingError(
3029 _(b"trying to add sidedata to a revlog who don't support them")
3043 _(b"trying to add sidedata to a revlog who don't support them")
3030 )
3044 )
3031
3045
3032 if flags:
3046 if flags:
3033 node = node or self.hash(text, p1, p2)
3047 node = node or self.hash(text, p1, p2)
3034
3048
3035 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3049 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3036
3050
3037 # If the flag processor modifies the revision data, ignore any provided
3051 # If the flag processor modifies the revision data, ignore any provided
3038 # cachedelta.
3052 # cachedelta.
3039 if rawtext != text:
3053 if rawtext != text:
3040 cachedelta = None
3054 cachedelta = None
3041
3055
3042 if len(rawtext) > _maxentrysize:
3056 if len(rawtext) > _maxentrysize:
3043 raise error.RevlogError(
3057 raise error.RevlogError(
3044 _(
3058 _(
3045 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3059 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3046 )
3060 )
3047 % (self.display_id, len(rawtext))
3061 % (self.display_id, len(rawtext))
3048 )
3062 )
3049
3063
3050 node = node or self.hash(rawtext, p1, p2)
3064 node = node or self.hash(rawtext, p1, p2)
3051 rev = self.index.get_rev(node)
3065 rev = self.index.get_rev(node)
3052 if rev is not None:
3066 if rev is not None:
3053 return rev
3067 return rev
3054
3068
3055 if validatehash:
3069 if validatehash:
3056 self.checkhash(rawtext, node, p1=p1, p2=p2)
3070 self.checkhash(rawtext, node, p1=p1, p2=p2)
3057
3071
3058 return self.addrawrevision(
3072 return self.addrawrevision(
3059 rawtext,
3073 rawtext,
3060 transaction,
3074 transaction,
3061 link,
3075 link,
3062 p1,
3076 p1,
3063 p2,
3077 p2,
3064 node,
3078 node,
3065 flags,
3079 flags,
3066 cachedelta=cachedelta,
3080 cachedelta=cachedelta,
3067 deltacomputer=deltacomputer,
3081 deltacomputer=deltacomputer,
3068 sidedata=sidedata,
3082 sidedata=sidedata,
3069 )
3083 )
3070
3084
3071 def addrawrevision(
3085 def addrawrevision(
3072 self,
3086 self,
3073 rawtext,
3087 rawtext,
3074 transaction,
3088 transaction,
3075 link,
3089 link,
3076 p1,
3090 p1,
3077 p2,
3091 p2,
3078 node,
3092 node,
3079 flags,
3093 flags,
3080 cachedelta=None,
3094 cachedelta=None,
3081 deltacomputer=None,
3095 deltacomputer=None,
3082 sidedata=None,
3096 sidedata=None,
3083 ):
3097 ):
3084 """add a raw revision with known flags, node and parents
3098 """add a raw revision with known flags, node and parents
3085 useful when reusing a revision not stored in this revlog (ex: received
3099 useful when reusing a revision not stored in this revlog (ex: received
3086 over wire, or read from an external bundle).
3100 over wire, or read from an external bundle).
3087 """
3101 """
3088 with self._writing(transaction):
3102 with self._writing(transaction):
3089 return self._addrevision(
3103 return self._addrevision(
3090 node,
3104 node,
3091 rawtext,
3105 rawtext,
3092 transaction,
3106 transaction,
3093 link,
3107 link,
3094 p1,
3108 p1,
3095 p2,
3109 p2,
3096 flags,
3110 flags,
3097 cachedelta,
3111 cachedelta,
3098 deltacomputer=deltacomputer,
3112 deltacomputer=deltacomputer,
3099 sidedata=sidedata,
3113 sidedata=sidedata,
3100 )
3114 )
3101
3115
3102 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
3116 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
3103 return self._inner.compress(data)
3117 return self._inner.compress(data)
3104
3118
3105 def decompress(self, data):
3119 def decompress(self, data):
3106 return self._inner.decompress(data)
3120 return self._inner.decompress(data)
3107
3121
3108 def _addrevision(
3122 def _addrevision(
3109 self,
3123 self,
3110 node,
3124 node,
3111 rawtext,
3125 rawtext,
3112 transaction,
3126 transaction,
3113 link,
3127 link,
3114 p1,
3128 p1,
3115 p2,
3129 p2,
3116 flags,
3130 flags,
3117 cachedelta,
3131 cachedelta,
3118 alwayscache=False,
3132 alwayscache=False,
3119 deltacomputer=None,
3133 deltacomputer=None,
3120 sidedata=None,
3134 sidedata=None,
3121 ):
3135 ):
3122 """internal function to add revisions to the log
3136 """internal function to add revisions to the log
3123
3137
3124 see addrevision for argument descriptions.
3138 see addrevision for argument descriptions.
3125
3139
3126 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3140 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3127
3141
3128 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3142 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3129 be used.
3143 be used.
3130
3144
3131 invariants:
3145 invariants:
3132 - rawtext is optional (can be None); if not set, cachedelta must be set.
3146 - rawtext is optional (can be None); if not set, cachedelta must be set.
3133 if both are set, they must correspond to each other.
3147 if both are set, they must correspond to each other.
3134 """
3148 """
3135 if node == self.nullid:
3149 if node == self.nullid:
3136 raise error.RevlogError(
3150 raise error.RevlogError(
3137 _(b"%s: attempt to add null revision") % self.display_id
3151 _(b"%s: attempt to add null revision") % self.display_id
3138 )
3152 )
3139 if (
3153 if (
3140 node == self.nodeconstants.wdirid
3154 node == self.nodeconstants.wdirid
3141 or node in self.nodeconstants.wdirfilenodeids
3155 or node in self.nodeconstants.wdirfilenodeids
3142 ):
3156 ):
3143 raise error.RevlogError(
3157 raise error.RevlogError(
3144 _(b"%s: attempt to add wdir revision") % self.display_id
3158 _(b"%s: attempt to add wdir revision") % self.display_id
3145 )
3159 )
3146 if self._inner._writinghandles is None:
3160 if self._inner._writinghandles is None:
3147 msg = b'adding revision outside `revlog._writing` context'
3161 msg = b'adding revision outside `revlog._writing` context'
3148 raise error.ProgrammingError(msg)
3162 raise error.ProgrammingError(msg)
3149
3163
3150 btext = [rawtext]
3164 btext = [rawtext]
3151
3165
3152 curr = len(self)
3166 curr = len(self)
3153 prev = curr - 1
3167 prev = curr - 1
3154
3168
3155 offset = self._get_data_offset(prev)
3169 offset = self._get_data_offset(prev)
3156
3170
3157 if self._concurrencychecker:
3171 if self._concurrencychecker:
3158 ifh, dfh, sdfh = self._inner._writinghandles
3172 ifh, dfh, sdfh = self._inner._writinghandles
3159 # XXX no checking for the sidedata file
3173 # XXX no checking for the sidedata file
3160 if self._inline:
3174 if self._inline:
3161 # offset is "as if" it were in the .d file, so we need to add on
3175 # offset is "as if" it were in the .d file, so we need to add on
3162 # the size of the entry metadata.
3176 # the size of the entry metadata.
3163 self._concurrencychecker(
3177 self._concurrencychecker(
3164 ifh, self._indexfile, offset + curr * self.index.entry_size
3178 ifh, self._indexfile, offset + curr * self.index.entry_size
3165 )
3179 )
3166 else:
3180 else:
3167 # Entries in the .i are a consistent size.
3181 # Entries in the .i are a consistent size.
3168 self._concurrencychecker(
3182 self._concurrencychecker(
3169 ifh, self._indexfile, curr * self.index.entry_size
3183 ifh, self._indexfile, curr * self.index.entry_size
3170 )
3184 )
3171 self._concurrencychecker(dfh, self._datafile, offset)
3185 self._concurrencychecker(dfh, self._datafile, offset)
3172
3186
3173 p1r, p2r = self.rev(p1), self.rev(p2)
3187 p1r, p2r = self.rev(p1), self.rev(p2)
3174
3188
3175 # full versions are inserted when the needed deltas
3189 # full versions are inserted when the needed deltas
3176 # become comparable to the uncompressed text
3190 # become comparable to the uncompressed text
3177 if rawtext is None:
3191 if rawtext is None:
3178 # need rawtext size, before changed by flag processors, which is
3192 # need rawtext size, before changed by flag processors, which is
3179 # the non-raw size. use revlog explicitly to avoid filelog's extra
3193 # the non-raw size. use revlog explicitly to avoid filelog's extra
3180 # logic that might remove metadata size.
3194 # logic that might remove metadata size.
3181 textlen = mdiff.patchedsize(
3195 textlen = mdiff.patchedsize(
3182 revlog.size(self, cachedelta[0]), cachedelta[1]
3196 revlog.size(self, cachedelta[0]), cachedelta[1]
3183 )
3197 )
3184 else:
3198 else:
3185 textlen = len(rawtext)
3199 textlen = len(rawtext)
3186
3200
3187 if deltacomputer is None:
3201 if deltacomputer is None:
3188 write_debug = None
3202 write_debug = None
3189 if self.delta_config.debug_delta:
3203 if self.delta_config.debug_delta:
3190 write_debug = transaction._report
3204 write_debug = transaction._report
3191 deltacomputer = deltautil.deltacomputer(
3205 deltacomputer = deltautil.deltacomputer(
3192 self, write_debug=write_debug
3206 self, write_debug=write_debug
3193 )
3207 )
3194
3208
3195 if cachedelta is not None and len(cachedelta) == 2:
3209 if cachedelta is not None and len(cachedelta) == 2:
3196 # If the cached delta has no information about how it should be
3210 # If the cached delta has no information about how it should be
3197 # reused, add the default reuse instruction according to the
3211 # reused, add the default reuse instruction according to the
3198 # revlog's configuration.
3212 # revlog's configuration.
3199 if (
3213 if (
3200 self.delta_config.general_delta
3214 self.delta_config.general_delta
3201 and self.delta_config.lazy_delta_base
3215 and self.delta_config.lazy_delta_base
3202 ):
3216 ):
3203 delta_base_reuse = DELTA_BASE_REUSE_TRY
3217 delta_base_reuse = DELTA_BASE_REUSE_TRY
3204 else:
3218 else:
3205 delta_base_reuse = DELTA_BASE_REUSE_NO
3219 delta_base_reuse = DELTA_BASE_REUSE_NO
3206 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3220 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3207
3221
3208 revinfo = revlogutils.revisioninfo(
3222 revinfo = revlogutils.revisioninfo(
3209 node,
3223 node,
3210 p1,
3224 p1,
3211 p2,
3225 p2,
3212 btext,
3226 btext,
3213 textlen,
3227 textlen,
3214 cachedelta,
3228 cachedelta,
3215 flags,
3229 flags,
3216 )
3230 )
3217
3231
3218 deltainfo = deltacomputer.finddeltainfo(revinfo)
3232 deltainfo = deltacomputer.finddeltainfo(revinfo)
3219
3233
3220 compression_mode = COMP_MODE_INLINE
3234 compression_mode = COMP_MODE_INLINE
3221 if self._docket is not None:
3235 if self._docket is not None:
3222 default_comp = self._docket.default_compression_header
3236 default_comp = self._docket.default_compression_header
3223 r = deltautil.delta_compression(default_comp, deltainfo)
3237 r = deltautil.delta_compression(default_comp, deltainfo)
3224 compression_mode, deltainfo = r
3238 compression_mode, deltainfo = r
3225
3239
3226 sidedata_compression_mode = COMP_MODE_INLINE
3240 sidedata_compression_mode = COMP_MODE_INLINE
3227 if sidedata and self.feature_config.has_side_data:
3241 if sidedata and self.feature_config.has_side_data:
3228 sidedata_compression_mode = COMP_MODE_PLAIN
3242 sidedata_compression_mode = COMP_MODE_PLAIN
3229 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3243 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3230 sidedata_offset = self._docket.sidedata_end
3244 sidedata_offset = self._docket.sidedata_end
3231 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3245 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3232 if (
3246 if (
3233 h != b'u'
3247 h != b'u'
3234 and comp_sidedata[0:1] != b'\0'
3248 and comp_sidedata[0:1] != b'\0'
3235 and len(comp_sidedata) < len(serialized_sidedata)
3249 and len(comp_sidedata) < len(serialized_sidedata)
3236 ):
3250 ):
3237 assert not h
3251 assert not h
3238 if (
3252 if (
3239 comp_sidedata[0:1]
3253 comp_sidedata[0:1]
3240 == self._docket.default_compression_header
3254 == self._docket.default_compression_header
3241 ):
3255 ):
3242 sidedata_compression_mode = COMP_MODE_DEFAULT
3256 sidedata_compression_mode = COMP_MODE_DEFAULT
3243 serialized_sidedata = comp_sidedata
3257 serialized_sidedata = comp_sidedata
3244 else:
3258 else:
3245 sidedata_compression_mode = COMP_MODE_INLINE
3259 sidedata_compression_mode = COMP_MODE_INLINE
3246 serialized_sidedata = comp_sidedata
3260 serialized_sidedata = comp_sidedata
3247 else:
3261 else:
3248 serialized_sidedata = b""
3262 serialized_sidedata = b""
3249 # Don't store the offset if the sidedata is empty, that way
3263 # Don't store the offset if the sidedata is empty, that way
3250 # we can easily detect empty sidedata and they will be no different
3264 # we can easily detect empty sidedata and they will be no different
3251 # than ones we manually add.
3265 # than ones we manually add.
3252 sidedata_offset = 0
3266 sidedata_offset = 0
3253
3267
3254 rank = RANK_UNKNOWN
3268 rank = RANK_UNKNOWN
3255 if self.feature_config.compute_rank:
3269 if self.feature_config.compute_rank:
3256 if (p1r, p2r) == (nullrev, nullrev):
3270 if (p1r, p2r) == (nullrev, nullrev):
3257 rank = 1
3271 rank = 1
3258 elif p1r != nullrev and p2r == nullrev:
3272 elif p1r != nullrev and p2r == nullrev:
3259 rank = 1 + self.fast_rank(p1r)
3273 rank = 1 + self.fast_rank(p1r)
3260 elif p1r == nullrev and p2r != nullrev:
3274 elif p1r == nullrev and p2r != nullrev:
3261 rank = 1 + self.fast_rank(p2r)
3275 rank = 1 + self.fast_rank(p2r)
3262 else: # merge node
3276 else: # merge node
3263 if rustdagop is not None and self.index.rust_ext_compat:
3277 if rustdagop is not None and self.index.rust_ext_compat:
3264 rank = rustdagop.rank(self.index, p1r, p2r)
3278 rank = rustdagop.rank(self.index, p1r, p2r)
3265 else:
3279 else:
3266 pmin, pmax = sorted((p1r, p2r))
3280 pmin, pmax = sorted((p1r, p2r))
3267 rank = 1 + self.fast_rank(pmax)
3281 rank = 1 + self.fast_rank(pmax)
3268 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3282 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3269
3283
3270 e = revlogutils.entry(
3284 e = revlogutils.entry(
3271 flags=flags,
3285 flags=flags,
3272 data_offset=offset,
3286 data_offset=offset,
3273 data_compressed_length=deltainfo.deltalen,
3287 data_compressed_length=deltainfo.deltalen,
3274 data_uncompressed_length=textlen,
3288 data_uncompressed_length=textlen,
3275 data_compression_mode=compression_mode,
3289 data_compression_mode=compression_mode,
3276 data_delta_base=deltainfo.base,
3290 data_delta_base=deltainfo.base,
3277 link_rev=link,
3291 link_rev=link,
3278 parent_rev_1=p1r,
3292 parent_rev_1=p1r,
3279 parent_rev_2=p2r,
3293 parent_rev_2=p2r,
3280 node_id=node,
3294 node_id=node,
3281 sidedata_offset=sidedata_offset,
3295 sidedata_offset=sidedata_offset,
3282 sidedata_compressed_length=len(serialized_sidedata),
3296 sidedata_compressed_length=len(serialized_sidedata),
3283 sidedata_compression_mode=sidedata_compression_mode,
3297 sidedata_compression_mode=sidedata_compression_mode,
3284 rank=rank,
3298 rank=rank,
3285 )
3299 )
3286
3300
3287 self.index.append(e)
3301 self.index.append(e)
3288 entry = self.index.entry_binary(curr)
3302 entry = self.index.entry_binary(curr)
3289 if curr == 0 and self._docket is None:
3303 if curr == 0 and self._docket is None:
3290 header = self._format_flags | self._format_version
3304 header = self._format_flags | self._format_version
3291 header = self.index.pack_header(header)
3305 header = self.index.pack_header(header)
3292 entry = header + entry
3306 entry = header + entry
3293 self._writeentry(
3307 self._writeentry(
3294 transaction,
3308 transaction,
3295 entry,
3309 entry,
3296 deltainfo.data,
3310 deltainfo.data,
3297 link,
3311 link,
3298 offset,
3312 offset,
3299 serialized_sidedata,
3313 serialized_sidedata,
3300 sidedata_offset,
3314 sidedata_offset,
3301 )
3315 )
3302
3316
3303 rawtext = btext[0]
3317 rawtext = btext[0]
3304
3318
3305 if alwayscache and rawtext is None:
3319 if alwayscache and rawtext is None:
3306 rawtext = deltacomputer.buildtext(revinfo)
3320 rawtext = deltacomputer.buildtext(revinfo)
3307
3321
3308 if type(rawtext) == bytes: # only accept immutable objects
3322 if type(rawtext) == bytes: # only accept immutable objects
3309 self._inner._revisioncache = (node, curr, rawtext)
3323 self._inner._revisioncache = (node, curr, rawtext)
3310 self._chainbasecache[curr] = deltainfo.chainbase
3324 self._chainbasecache[curr] = deltainfo.chainbase
3311 return curr
3325 return curr
3312
3326
3313 def _get_data_offset(self, prev):
3327 def _get_data_offset(self, prev):
3314 """Returns the current offset in the (in-transaction) data file.
3328 """Returns the current offset in the (in-transaction) data file.
3315 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3329 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3316 file to store that information: since sidedata can be rewritten to the
3330 file to store that information: since sidedata can be rewritten to the
3317 end of the data file within a transaction, you can have cases where, for
3331 end of the data file within a transaction, you can have cases where, for
3318 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3332 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3319 to `n - 1`'s sidedata being written after `n`'s data.
3333 to `n - 1`'s sidedata being written after `n`'s data.
3320
3334
3321 TODO cache this in a docket file before getting out of experimental."""
3335 TODO cache this in a docket file before getting out of experimental."""
3322 if self._docket is None:
3336 if self._docket is None:
3323 return self.end(prev)
3337 return self.end(prev)
3324 else:
3338 else:
3325 return self._docket.data_end
3339 return self._docket.data_end
3326
3340
3327 def _writeentry(
3341 def _writeentry(
3328 self,
3342 self,
3329 transaction,
3343 transaction,
3330 entry,
3344 entry,
3331 data,
3345 data,
3332 link,
3346 link,
3333 offset,
3347 offset,
3334 sidedata,
3348 sidedata,
3335 sidedata_offset,
3349 sidedata_offset,
3336 ):
3350 ):
3337 # Files opened in a+ mode have inconsistent behavior on various
3351 # Files opened in a+ mode have inconsistent behavior on various
3338 # platforms. Windows requires that a file positioning call be made
3352 # platforms. Windows requires that a file positioning call be made
3339 # when the file handle transitions between reads and writes. See
3353 # when the file handle transitions between reads and writes. See
3340 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3354 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3341 # platforms, Python or the platform itself can be buggy. Some versions
3355 # platforms, Python or the platform itself can be buggy. Some versions
3342 # of Solaris have been observed to not append at the end of the file
3356 # of Solaris have been observed to not append at the end of the file
3343 # if the file was seeked to before the end. See issue4943 for more.
3357 # if the file was seeked to before the end. See issue4943 for more.
3344 #
3358 #
3345 # We work around this issue by inserting a seek() before writing.
3359 # We work around this issue by inserting a seek() before writing.
3346 # Note: This is likely not necessary on Python 3. However, because
3360 # Note: This is likely not necessary on Python 3. However, because
3347 # the file handle is reused for reads and may be seeked there, we need
3361 # the file handle is reused for reads and may be seeked there, we need
3348 # to be careful before changing this.
3362 # to be careful before changing this.
3349 index_end = data_end = sidedata_end = None
3363 index_end = data_end = sidedata_end = None
3350 if self._docket is not None:
3364 if self._docket is not None:
3351 index_end = self._docket.index_end
3365 index_end = self._docket.index_end
3352 data_end = self._docket.data_end
3366 data_end = self._docket.data_end
3353 sidedata_end = self._docket.sidedata_end
3367 sidedata_end = self._docket.sidedata_end
3354
3368
3355 files_end = self._inner.write_entry(
3369 files_end = self._inner.write_entry(
3356 transaction,
3370 transaction,
3357 entry,
3371 entry,
3358 data,
3372 data,
3359 link,
3373 link,
3360 offset,
3374 offset,
3361 sidedata,
3375 sidedata,
3362 sidedata_offset,
3376 sidedata_offset,
3363 index_end,
3377 index_end,
3364 data_end,
3378 data_end,
3365 sidedata_end,
3379 sidedata_end,
3366 )
3380 )
3367 self._enforceinlinesize(transaction)
3381 self._enforceinlinesize(transaction)
3368 if self._docket is not None:
3382 if self._docket is not None:
3369 self._docket.index_end = files_end[0]
3383 self._docket.index_end = files_end[0]
3370 self._docket.data_end = files_end[1]
3384 self._docket.data_end = files_end[1]
3371 self._docket.sidedata_end = files_end[2]
3385 self._docket.sidedata_end = files_end[2]
3372
3386
3373 nodemaputil.setup_persistent_nodemap(transaction, self)
3387 nodemaputil.setup_persistent_nodemap(transaction, self)
3374
3388
3375 def addgroup(
3389 def addgroup(
3376 self,
3390 self,
3377 deltas,
3391 deltas,
3378 linkmapper,
3392 linkmapper,
3379 transaction,
3393 transaction,
3380 alwayscache=False,
3394 alwayscache=False,
3381 addrevisioncb=None,
3395 addrevisioncb=None,
3382 duplicaterevisioncb=None,
3396 duplicaterevisioncb=None,
3383 debug_info=None,
3397 debug_info=None,
3384 delta_base_reuse_policy=None,
3398 delta_base_reuse_policy=None,
3385 ):
3399 ):
3386 """
3400 """
3387 add a delta group
3401 add a delta group
3388
3402
3389 given a set of deltas, add them to the revision log. the
3403 given a set of deltas, add them to the revision log. the
3390 first delta is against its parent, which should be in our
3404 first delta is against its parent, which should be in our
3391 log, the rest are against the previous delta.
3405 log, the rest are against the previous delta.
3392
3406
3393 If ``addrevisioncb`` is defined, it will be called with arguments of
3407 If ``addrevisioncb`` is defined, it will be called with arguments of
3394 this revlog and the node that was added.
3408 this revlog and the node that was added.
3395 """
3409 """
3396
3410
3397 if self._adding_group:
3411 if self._adding_group:
3398 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3412 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3399
3413
3400 # read the default delta-base reuse policy from revlog config if the
3414 # read the default delta-base reuse policy from revlog config if the
3401 # group did not specify one.
3415 # group did not specify one.
3402 if delta_base_reuse_policy is None:
3416 if delta_base_reuse_policy is None:
3403 if (
3417 if (
3404 self.delta_config.general_delta
3418 self.delta_config.general_delta
3405 and self.delta_config.lazy_delta_base
3419 and self.delta_config.lazy_delta_base
3406 ):
3420 ):
3407 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3421 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3408 else:
3422 else:
3409 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3423 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3410
3424
3411 self._adding_group = True
3425 self._adding_group = True
3412 empty = True
3426 empty = True
3413 try:
3427 try:
3414 with self._writing(transaction):
3428 with self._writing(transaction):
3415 write_debug = None
3429 write_debug = None
3416 if self.delta_config.debug_delta:
3430 if self.delta_config.debug_delta:
3417 write_debug = transaction._report
3431 write_debug = transaction._report
3418 deltacomputer = deltautil.deltacomputer(
3432 deltacomputer = deltautil.deltacomputer(
3419 self,
3433 self,
3420 write_debug=write_debug,
3434 write_debug=write_debug,
3421 debug_info=debug_info,
3435 debug_info=debug_info,
3422 )
3436 )
3423 # loop through our set of deltas
3437 # loop through our set of deltas
3424 for data in deltas:
3438 for data in deltas:
3425 (
3439 (
3426 node,
3440 node,
3427 p1,
3441 p1,
3428 p2,
3442 p2,
3429 linknode,
3443 linknode,
3430 deltabase,
3444 deltabase,
3431 delta,
3445 delta,
3432 flags,
3446 flags,
3433 sidedata,
3447 sidedata,
3434 ) = data
3448 ) = data
3435 link = linkmapper(linknode)
3449 link = linkmapper(linknode)
3436 flags = flags or REVIDX_DEFAULT_FLAGS
3450 flags = flags or REVIDX_DEFAULT_FLAGS
3437
3451
3438 rev = self.index.get_rev(node)
3452 rev = self.index.get_rev(node)
3439 if rev is not None:
3453 if rev is not None:
3440 # this can happen if two branches make the same change
3454 # this can happen if two branches make the same change
3441 self._nodeduplicatecallback(transaction, rev)
3455 self._nodeduplicatecallback(transaction, rev)
3442 if duplicaterevisioncb:
3456 if duplicaterevisioncb:
3443 duplicaterevisioncb(self, rev)
3457 duplicaterevisioncb(self, rev)
3444 empty = False
3458 empty = False
3445 continue
3459 continue
3446
3460
3447 for p in (p1, p2):
3461 for p in (p1, p2):
3448 if not self.index.has_node(p):
3462 if not self.index.has_node(p):
3449 raise error.LookupError(
3463 raise error.LookupError(
3450 p, self.radix, _(b'unknown parent')
3464 p, self.radix, _(b'unknown parent')
3451 )
3465 )
3452
3466
3453 if not self.index.has_node(deltabase):
3467 if not self.index.has_node(deltabase):
3454 raise error.LookupError(
3468 raise error.LookupError(
3455 deltabase, self.display_id, _(b'unknown delta base')
3469 deltabase, self.display_id, _(b'unknown delta base')
3456 )
3470 )
3457
3471
3458 baserev = self.rev(deltabase)
3472 baserev = self.rev(deltabase)
3459
3473
3460 if baserev != nullrev and self.iscensored(baserev):
3474 if baserev != nullrev and self.iscensored(baserev):
3461 # if base is censored, delta must be full replacement in a
3475 # if base is censored, delta must be full replacement in a
3462 # single patch operation
3476 # single patch operation
3463 hlen = struct.calcsize(b">lll")
3477 hlen = struct.calcsize(b">lll")
3464 oldlen = self.rawsize(baserev)
3478 oldlen = self.rawsize(baserev)
3465 newlen = len(delta) - hlen
3479 newlen = len(delta) - hlen
3466 if delta[:hlen] != mdiff.replacediffheader(
3480 if delta[:hlen] != mdiff.replacediffheader(
3467 oldlen, newlen
3481 oldlen, newlen
3468 ):
3482 ):
3469 raise error.CensoredBaseError(
3483 raise error.CensoredBaseError(
3470 self.display_id, self.node(baserev)
3484 self.display_id, self.node(baserev)
3471 )
3485 )
3472
3486
3473 if not flags and self._peek_iscensored(baserev, delta):
3487 if not flags and self._peek_iscensored(baserev, delta):
3474 flags |= REVIDX_ISCENSORED
3488 flags |= REVIDX_ISCENSORED
3475
3489
3476 # We assume consumers of addrevisioncb will want to retrieve
3490 # We assume consumers of addrevisioncb will want to retrieve
3477 # the added revision, which will require a call to
3491 # the added revision, which will require a call to
3478 # revision(). revision() will fast path if there is a cache
3492 # revision(). revision() will fast path if there is a cache
3479 # hit. So, we tell _addrevision() to always cache in this case.
3493 # hit. So, we tell _addrevision() to always cache in this case.
3480 # We're only using addgroup() in the context of changegroup
3494 # We're only using addgroup() in the context of changegroup
3481 # generation so the revision data can always be handled as raw
3495 # generation so the revision data can always be handled as raw
3482 # by the flagprocessor.
3496 # by the flagprocessor.
3483 rev = self._addrevision(
3497 rev = self._addrevision(
3484 node,
3498 node,
3485 None,
3499 None,
3486 transaction,
3500 transaction,
3487 link,
3501 link,
3488 p1,
3502 p1,
3489 p2,
3503 p2,
3490 flags,
3504 flags,
3491 (baserev, delta, delta_base_reuse_policy),
3505 (baserev, delta, delta_base_reuse_policy),
3492 alwayscache=alwayscache,
3506 alwayscache=alwayscache,
3493 deltacomputer=deltacomputer,
3507 deltacomputer=deltacomputer,
3494 sidedata=sidedata,
3508 sidedata=sidedata,
3495 )
3509 )
3496
3510
3497 if addrevisioncb:
3511 if addrevisioncb:
3498 addrevisioncb(self, rev)
3512 addrevisioncb(self, rev)
3499 empty = False
3513 empty = False
3500 finally:
3514 finally:
3501 self._adding_group = False
3515 self._adding_group = False
3502 return not empty
3516 return not empty
3503
3517
3504 def iscensored(self, rev):
3518 def iscensored(self, rev):
3505 """Check if a file revision is censored."""
3519 """Check if a file revision is censored."""
3506 if not self.feature_config.censorable:
3520 if not self.feature_config.censorable:
3507 return False
3521 return False
3508
3522
3509 return self.flags(rev) & REVIDX_ISCENSORED
3523 return self.flags(rev) & REVIDX_ISCENSORED
3510
3524
3511 def _peek_iscensored(self, baserev, delta):
3525 def _peek_iscensored(self, baserev, delta):
3512 """Quickly check if a delta produces a censored revision."""
3526 """Quickly check if a delta produces a censored revision."""
3513 if not self.feature_config.censorable:
3527 if not self.feature_config.censorable:
3514 return False
3528 return False
3515
3529
3516 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3530 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3517
3531
3518 def getstrippoint(self, minlink):
3532 def getstrippoint(self, minlink):
3519 """find the minimum rev that must be stripped to strip the linkrev
3533 """find the minimum rev that must be stripped to strip the linkrev
3520
3534
3521 Returns a tuple containing the minimum rev and a set of all revs that
3535 Returns a tuple containing the minimum rev and a set of all revs that
3522 have linkrevs that will be broken by this strip.
3536 have linkrevs that will be broken by this strip.
3523 """
3537 """
3524 return storageutil.resolvestripinfo(
3538 return storageutil.resolvestripinfo(
3525 minlink,
3539 minlink,
3526 len(self) - 1,
3540 len(self) - 1,
3527 self.headrevs(),
3541 self.headrevs(),
3528 self.linkrev,
3542 self.linkrev,
3529 self.parentrevs,
3543 self.parentrevs,
3530 )
3544 )
3531
3545
3532 def strip(self, minlink, transaction):
3546 def strip(self, minlink, transaction):
3533 """truncate the revlog on the first revision with a linkrev >= minlink
3547 """truncate the revlog on the first revision with a linkrev >= minlink
3534
3548
3535 This function is called when we're stripping revision minlink and
3549 This function is called when we're stripping revision minlink and
3536 its descendants from the repository.
3550 its descendants from the repository.
3537
3551
3538 We have to remove all revisions with linkrev >= minlink, because
3552 We have to remove all revisions with linkrev >= minlink, because
3539 the equivalent changelog revisions will be renumbered after the
3553 the equivalent changelog revisions will be renumbered after the
3540 strip.
3554 strip.
3541
3555
3542 So we truncate the revlog on the first of these revisions, and
3556 So we truncate the revlog on the first of these revisions, and
3543 trust that the caller has saved the revisions that shouldn't be
3557 trust that the caller has saved the revisions that shouldn't be
3544 removed and that it'll re-add them after this truncation.
3558 removed and that it'll re-add them after this truncation.
3545 """
3559 """
3546 if len(self) == 0:
3560 if len(self) == 0:
3547 return
3561 return
3548
3562
3549 rev, _ = self.getstrippoint(minlink)
3563 rev, _ = self.getstrippoint(minlink)
3550 if rev == len(self):
3564 if rev == len(self):
3551 return
3565 return
3552
3566
3553 # first truncate the files on disk
3567 # first truncate the files on disk
3554 data_end = self.start(rev)
3568 data_end = self.start(rev)
3555 if not self._inline:
3569 if not self._inline:
3556 transaction.add(self._datafile, data_end)
3570 transaction.add(self._datafile, data_end)
3557 end = rev * self.index.entry_size
3571 end = rev * self.index.entry_size
3558 else:
3572 else:
3559 end = data_end + (rev * self.index.entry_size)
3573 end = data_end + (rev * self.index.entry_size)
3560
3574
3561 if self._sidedatafile:
3575 if self._sidedatafile:
3562 sidedata_end = self.sidedata_cut_off(rev)
3576 sidedata_end = self.sidedata_cut_off(rev)
3563 transaction.add(self._sidedatafile, sidedata_end)
3577 transaction.add(self._sidedatafile, sidedata_end)
3564
3578
3565 transaction.add(self._indexfile, end)
3579 transaction.add(self._indexfile, end)
3566 if self._docket is not None:
3580 if self._docket is not None:
3567 # XXX we could, leverage the docket while stripping. However it is
3581 # XXX we could, leverage the docket while stripping. However it is
3568 # not powerfull enough at the time of this comment
3582 # not powerfull enough at the time of this comment
3569 self._docket.index_end = end
3583 self._docket.index_end = end
3570 self._docket.data_end = data_end
3584 self._docket.data_end = data_end
3571 self._docket.sidedata_end = sidedata_end
3585 self._docket.sidedata_end = sidedata_end
3572 self._docket.write(transaction, stripping=True)
3586 self._docket.write(transaction, stripping=True)
3573
3587
3574 # then reset internal state in memory to forget those revisions
3588 # then reset internal state in memory to forget those revisions
3575 self._chaininfocache = util.lrucachedict(500)
3589 self._chaininfocache = util.lrucachedict(500)
3576 self._inner.clear_cache()
3590 self._inner.clear_cache()
3577
3591
3578 del self.index[rev:-1]
3592 del self.index[rev:-1]
3579
3593
3580 def checksize(self):
3594 def checksize(self):
3581 """Check size of index and data files
3595 """Check size of index and data files
3582
3596
3583 return a (dd, di) tuple.
3597 return a (dd, di) tuple.
3584 - dd: extra bytes for the "data" file
3598 - dd: extra bytes for the "data" file
3585 - di: extra bytes for the "index" file
3599 - di: extra bytes for the "index" file
3586
3600
3587 A healthy revlog will return (0, 0).
3601 A healthy revlog will return (0, 0).
3588 """
3602 """
3589 expected = 0
3603 expected = 0
3590 if len(self):
3604 if len(self):
3591 expected = max(0, self.end(len(self) - 1))
3605 expected = max(0, self.end(len(self) - 1))
3592
3606
3593 try:
3607 try:
3594 with self._datafp() as f:
3608 with self._datafp() as f:
3595 f.seek(0, io.SEEK_END)
3609 f.seek(0, io.SEEK_END)
3596 actual = f.tell()
3610 actual = f.tell()
3597 dd = actual - expected
3611 dd = actual - expected
3598 except FileNotFoundError:
3612 except FileNotFoundError:
3599 dd = 0
3613 dd = 0
3600
3614
3601 try:
3615 try:
3602 f = self.opener(self._indexfile)
3616 f = self.opener(self._indexfile)
3603 f.seek(0, io.SEEK_END)
3617 f.seek(0, io.SEEK_END)
3604 actual = f.tell()
3618 actual = f.tell()
3605 f.close()
3619 f.close()
3606 s = self.index.entry_size
3620 s = self.index.entry_size
3607 i = max(0, actual // s)
3621 i = max(0, actual // s)
3608 di = actual - (i * s)
3622 di = actual - (i * s)
3609 if self._inline:
3623 if self._inline:
3610 databytes = 0
3624 databytes = 0
3611 for r in self:
3625 for r in self:
3612 databytes += max(0, self.length(r))
3626 databytes += max(0, self.length(r))
3613 dd = 0
3627 dd = 0
3614 di = actual - len(self) * s - databytes
3628 di = actual - len(self) * s - databytes
3615 except FileNotFoundError:
3629 except FileNotFoundError:
3616 di = 0
3630 di = 0
3617
3631
3618 return (dd, di)
3632 return (dd, di)
3619
3633
3620 def files(self):
3634 def files(self):
3621 """return list of files that compose this revlog"""
3635 """return list of files that compose this revlog"""
3622 res = [self._indexfile]
3636 res = [self._indexfile]
3623 if self._docket_file is None:
3637 if self._docket_file is None:
3624 if not self._inline:
3638 if not self._inline:
3625 res.append(self._datafile)
3639 res.append(self._datafile)
3626 else:
3640 else:
3627 res.append(self._docket_file)
3641 res.append(self._docket_file)
3628 res.extend(self._docket.old_index_filepaths(include_empty=False))
3642 res.extend(self._docket.old_index_filepaths(include_empty=False))
3629 if self._docket.data_end:
3643 if self._docket.data_end:
3630 res.append(self._datafile)
3644 res.append(self._datafile)
3631 res.extend(self._docket.old_data_filepaths(include_empty=False))
3645 res.extend(self._docket.old_data_filepaths(include_empty=False))
3632 if self._docket.sidedata_end:
3646 if self._docket.sidedata_end:
3633 res.append(self._sidedatafile)
3647 res.append(self._sidedatafile)
3634 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3648 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3635 return res
3649 return res
3636
3650
3637 def emitrevisions(
3651 def emitrevisions(
3638 self,
3652 self,
3639 nodes,
3653 nodes,
3640 nodesorder=None,
3654 nodesorder=None,
3641 revisiondata=False,
3655 revisiondata=False,
3642 assumehaveparentrevisions=False,
3656 assumehaveparentrevisions=False,
3643 deltamode=repository.CG_DELTAMODE_STD,
3657 deltamode=repository.CG_DELTAMODE_STD,
3644 sidedata_helpers=None,
3658 sidedata_helpers=None,
3645 debug_info=None,
3659 debug_info=None,
3646 ):
3660 ):
3647 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3661 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3648 raise error.ProgrammingError(
3662 raise error.ProgrammingError(
3649 b'unhandled value for nodesorder: %s' % nodesorder
3663 b'unhandled value for nodesorder: %s' % nodesorder
3650 )
3664 )
3651
3665
3652 if nodesorder is None and not self.delta_config.general_delta:
3666 if nodesorder is None and not self.delta_config.general_delta:
3653 nodesorder = b'storage'
3667 nodesorder = b'storage'
3654
3668
3655 if (
3669 if (
3656 not self._storedeltachains
3670 not self._storedeltachains
3657 and deltamode != repository.CG_DELTAMODE_PREV
3671 and deltamode != repository.CG_DELTAMODE_PREV
3658 ):
3672 ):
3659 deltamode = repository.CG_DELTAMODE_FULL
3673 deltamode = repository.CG_DELTAMODE_FULL
3660
3674
3661 return storageutil.emitrevisions(
3675 return storageutil.emitrevisions(
3662 self,
3676 self,
3663 nodes,
3677 nodes,
3664 nodesorder,
3678 nodesorder,
3665 revlogrevisiondelta,
3679 revlogrevisiondelta,
3666 deltaparentfn=self.deltaparent,
3680 deltaparentfn=self.deltaparent,
3667 candeltafn=self._candelta,
3681 candeltafn=self._candelta,
3668 rawsizefn=self.rawsize,
3682 rawsizefn=self.rawsize,
3669 revdifffn=self.revdiff,
3683 revdifffn=self.revdiff,
3670 flagsfn=self.flags,
3684 flagsfn=self.flags,
3671 deltamode=deltamode,
3685 deltamode=deltamode,
3672 revisiondata=revisiondata,
3686 revisiondata=revisiondata,
3673 assumehaveparentrevisions=assumehaveparentrevisions,
3687 assumehaveparentrevisions=assumehaveparentrevisions,
3674 sidedata_helpers=sidedata_helpers,
3688 sidedata_helpers=sidedata_helpers,
3675 debug_info=debug_info,
3689 debug_info=debug_info,
3676 )
3690 )
3677
3691
3678 DELTAREUSEALWAYS = b'always'
3692 DELTAREUSEALWAYS = b'always'
3679 DELTAREUSESAMEREVS = b'samerevs'
3693 DELTAREUSESAMEREVS = b'samerevs'
3680 DELTAREUSENEVER = b'never'
3694 DELTAREUSENEVER = b'never'
3681
3695
3682 DELTAREUSEFULLADD = b'fulladd'
3696 DELTAREUSEFULLADD = b'fulladd'
3683
3697
3684 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3698 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3685
3699
3686 def clone(
3700 def clone(
3687 self,
3701 self,
3688 tr,
3702 tr,
3689 destrevlog,
3703 destrevlog,
3690 addrevisioncb=None,
3704 addrevisioncb=None,
3691 deltareuse=DELTAREUSESAMEREVS,
3705 deltareuse=DELTAREUSESAMEREVS,
3692 forcedeltabothparents=None,
3706 forcedeltabothparents=None,
3693 sidedata_helpers=None,
3707 sidedata_helpers=None,
3694 ):
3708 ):
3695 """Copy this revlog to another, possibly with format changes.
3709 """Copy this revlog to another, possibly with format changes.
3696
3710
3697 The destination revlog will contain the same revisions and nodes.
3711 The destination revlog will contain the same revisions and nodes.
3698 However, it may not be bit-for-bit identical due to e.g. delta encoding
3712 However, it may not be bit-for-bit identical due to e.g. delta encoding
3699 differences.
3713 differences.
3700
3714
3701 The ``deltareuse`` argument control how deltas from the existing revlog
3715 The ``deltareuse`` argument control how deltas from the existing revlog
3702 are preserved in the destination revlog. The argument can have the
3716 are preserved in the destination revlog. The argument can have the
3703 following values:
3717 following values:
3704
3718
3705 DELTAREUSEALWAYS
3719 DELTAREUSEALWAYS
3706 Deltas will always be reused (if possible), even if the destination
3720 Deltas will always be reused (if possible), even if the destination
3707 revlog would not select the same revisions for the delta. This is the
3721 revlog would not select the same revisions for the delta. This is the
3708 fastest mode of operation.
3722 fastest mode of operation.
3709 DELTAREUSESAMEREVS
3723 DELTAREUSESAMEREVS
3710 Deltas will be reused if the destination revlog would pick the same
3724 Deltas will be reused if the destination revlog would pick the same
3711 revisions for the delta. This mode strikes a balance between speed
3725 revisions for the delta. This mode strikes a balance between speed
3712 and optimization.
3726 and optimization.
3713 DELTAREUSENEVER
3727 DELTAREUSENEVER
3714 Deltas will never be reused. This is the slowest mode of execution.
3728 Deltas will never be reused. This is the slowest mode of execution.
3715 This mode can be used to recompute deltas (e.g. if the diff/delta
3729 This mode can be used to recompute deltas (e.g. if the diff/delta
3716 algorithm changes).
3730 algorithm changes).
3717 DELTAREUSEFULLADD
3731 DELTAREUSEFULLADD
3718 Revision will be re-added as if their were new content. This is
3732 Revision will be re-added as if their were new content. This is
3719 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3733 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3720 eg: large file detection and handling.
3734 eg: large file detection and handling.
3721
3735
3722 Delta computation can be slow, so the choice of delta reuse policy can
3736 Delta computation can be slow, so the choice of delta reuse policy can
3723 significantly affect run time.
3737 significantly affect run time.
3724
3738
3725 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3739 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3726 two extremes. Deltas will be reused if they are appropriate. But if the
3740 two extremes. Deltas will be reused if they are appropriate. But if the
3727 delta could choose a better revision, it will do so. This means if you
3741 delta could choose a better revision, it will do so. This means if you
3728 are converting a non-generaldelta revlog to a generaldelta revlog,
3742 are converting a non-generaldelta revlog to a generaldelta revlog,
3729 deltas will be recomputed if the delta's parent isn't a parent of the
3743 deltas will be recomputed if the delta's parent isn't a parent of the
3730 revision.
3744 revision.
3731
3745
3732 In addition to the delta policy, the ``forcedeltabothparents``
3746 In addition to the delta policy, the ``forcedeltabothparents``
3733 argument controls whether to force compute deltas against both parents
3747 argument controls whether to force compute deltas against both parents
3734 for merges. By default, the current default is used.
3748 for merges. By default, the current default is used.
3735
3749
3736 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3750 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3737 `sidedata_helpers`.
3751 `sidedata_helpers`.
3738 """
3752 """
3739 if deltareuse not in self.DELTAREUSEALL:
3753 if deltareuse not in self.DELTAREUSEALL:
3740 raise ValueError(
3754 raise ValueError(
3741 _(b'value for deltareuse invalid: %s') % deltareuse
3755 _(b'value for deltareuse invalid: %s') % deltareuse
3742 )
3756 )
3743
3757
3744 if len(destrevlog):
3758 if len(destrevlog):
3745 raise ValueError(_(b'destination revlog is not empty'))
3759 raise ValueError(_(b'destination revlog is not empty'))
3746
3760
3747 if getattr(self, 'filteredrevs', None):
3761 if getattr(self, 'filteredrevs', None):
3748 raise ValueError(_(b'source revlog has filtered revisions'))
3762 raise ValueError(_(b'source revlog has filtered revisions'))
3749 if getattr(destrevlog, 'filteredrevs', None):
3763 if getattr(destrevlog, 'filteredrevs', None):
3750 raise ValueError(_(b'destination revlog has filtered revisions'))
3764 raise ValueError(_(b'destination revlog has filtered revisions'))
3751
3765
3752 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3766 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3753 # if possible.
3767 # if possible.
3754 old_delta_config = destrevlog.delta_config
3768 old_delta_config = destrevlog.delta_config
3755 destrevlog.delta_config = destrevlog.delta_config.copy()
3769 destrevlog.delta_config = destrevlog.delta_config.copy()
3756
3770
3757 try:
3771 try:
3758 if deltareuse == self.DELTAREUSEALWAYS:
3772 if deltareuse == self.DELTAREUSEALWAYS:
3759 destrevlog.delta_config.lazy_delta_base = True
3773 destrevlog.delta_config.lazy_delta_base = True
3760 destrevlog.delta_config.lazy_delta = True
3774 destrevlog.delta_config.lazy_delta = True
3761 elif deltareuse == self.DELTAREUSESAMEREVS:
3775 elif deltareuse == self.DELTAREUSESAMEREVS:
3762 destrevlog.delta_config.lazy_delta_base = False
3776 destrevlog.delta_config.lazy_delta_base = False
3763 destrevlog.delta_config.lazy_delta = True
3777 destrevlog.delta_config.lazy_delta = True
3764 elif deltareuse == self.DELTAREUSENEVER:
3778 elif deltareuse == self.DELTAREUSENEVER:
3765 destrevlog.delta_config.lazy_delta_base = False
3779 destrevlog.delta_config.lazy_delta_base = False
3766 destrevlog.delta_config.lazy_delta = False
3780 destrevlog.delta_config.lazy_delta = False
3767
3781
3768 delta_both_parents = (
3782 delta_both_parents = (
3769 forcedeltabothparents or old_delta_config.delta_both_parents
3783 forcedeltabothparents or old_delta_config.delta_both_parents
3770 )
3784 )
3771 destrevlog.delta_config.delta_both_parents = delta_both_parents
3785 destrevlog.delta_config.delta_both_parents = delta_both_parents
3772
3786
3773 with self.reading(), destrevlog._writing(tr):
3787 with self.reading(), destrevlog._writing(tr):
3774 self._clone(
3788 self._clone(
3775 tr,
3789 tr,
3776 destrevlog,
3790 destrevlog,
3777 addrevisioncb,
3791 addrevisioncb,
3778 deltareuse,
3792 deltareuse,
3779 forcedeltabothparents,
3793 forcedeltabothparents,
3780 sidedata_helpers,
3794 sidedata_helpers,
3781 )
3795 )
3782
3796
3783 finally:
3797 finally:
3784 destrevlog.delta_config = old_delta_config
3798 destrevlog.delta_config = old_delta_config
3785
3799
3786 def _clone(
3800 def _clone(
3787 self,
3801 self,
3788 tr,
3802 tr,
3789 destrevlog,
3803 destrevlog,
3790 addrevisioncb,
3804 addrevisioncb,
3791 deltareuse,
3805 deltareuse,
3792 forcedeltabothparents,
3806 forcedeltabothparents,
3793 sidedata_helpers,
3807 sidedata_helpers,
3794 ):
3808 ):
3795 """perform the core duty of `revlog.clone` after parameter processing"""
3809 """perform the core duty of `revlog.clone` after parameter processing"""
3796 write_debug = None
3810 write_debug = None
3797 if self.delta_config.debug_delta:
3811 if self.delta_config.debug_delta:
3798 write_debug = tr._report
3812 write_debug = tr._report
3799 deltacomputer = deltautil.deltacomputer(
3813 deltacomputer = deltautil.deltacomputer(
3800 destrevlog,
3814 destrevlog,
3801 write_debug=write_debug,
3815 write_debug=write_debug,
3802 )
3816 )
3803 index = self.index
3817 index = self.index
3804 for rev in self:
3818 for rev in self:
3805 entry = index[rev]
3819 entry = index[rev]
3806
3820
3807 # Some classes override linkrev to take filtered revs into
3821 # Some classes override linkrev to take filtered revs into
3808 # account. Use raw entry from index.
3822 # account. Use raw entry from index.
3809 flags = entry[0] & 0xFFFF
3823 flags = entry[0] & 0xFFFF
3810 linkrev = entry[4]
3824 linkrev = entry[4]
3811 p1 = index[entry[5]][7]
3825 p1 = index[entry[5]][7]
3812 p2 = index[entry[6]][7]
3826 p2 = index[entry[6]][7]
3813 node = entry[7]
3827 node = entry[7]
3814
3828
3815 # (Possibly) reuse the delta from the revlog if allowed and
3829 # (Possibly) reuse the delta from the revlog if allowed and
3816 # the revlog chunk is a delta.
3830 # the revlog chunk is a delta.
3817 cachedelta = None
3831 cachedelta = None
3818 rawtext = None
3832 rawtext = None
3819 if deltareuse == self.DELTAREUSEFULLADD:
3833 if deltareuse == self.DELTAREUSEFULLADD:
3820 text = self._revisiondata(rev)
3834 text = self._revisiondata(rev)
3821 sidedata = self.sidedata(rev)
3835 sidedata = self.sidedata(rev)
3822
3836
3823 if sidedata_helpers is not None:
3837 if sidedata_helpers is not None:
3824 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3838 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3825 self, sidedata_helpers, sidedata, rev
3839 self, sidedata_helpers, sidedata, rev
3826 )
3840 )
3827 flags = flags | new_flags[0] & ~new_flags[1]
3841 flags = flags | new_flags[0] & ~new_flags[1]
3828
3842
3829 destrevlog.addrevision(
3843 destrevlog.addrevision(
3830 text,
3844 text,
3831 tr,
3845 tr,
3832 linkrev,
3846 linkrev,
3833 p1,
3847 p1,
3834 p2,
3848 p2,
3835 cachedelta=cachedelta,
3849 cachedelta=cachedelta,
3836 node=node,
3850 node=node,
3837 flags=flags,
3851 flags=flags,
3838 deltacomputer=deltacomputer,
3852 deltacomputer=deltacomputer,
3839 sidedata=sidedata,
3853 sidedata=sidedata,
3840 )
3854 )
3841 else:
3855 else:
3842 if destrevlog.delta_config.lazy_delta:
3856 if destrevlog.delta_config.lazy_delta:
3843 dp = self.deltaparent(rev)
3857 dp = self.deltaparent(rev)
3844 if dp != nullrev:
3858 if dp != nullrev:
3845 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3859 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3846
3860
3847 sidedata = None
3861 sidedata = None
3848 if not cachedelta:
3862 if not cachedelta:
3849 try:
3863 try:
3850 rawtext = self._revisiondata(rev)
3864 rawtext = self._revisiondata(rev)
3851 except error.CensoredNodeError as censored:
3865 except error.CensoredNodeError as censored:
3852 assert flags & REVIDX_ISCENSORED
3866 assert flags & REVIDX_ISCENSORED
3853 rawtext = censored.tombstone
3867 rawtext = censored.tombstone
3854 sidedata = self.sidedata(rev)
3868 sidedata = self.sidedata(rev)
3855 if sidedata is None:
3869 if sidedata is None:
3856 sidedata = self.sidedata(rev)
3870 sidedata = self.sidedata(rev)
3857
3871
3858 if sidedata_helpers is not None:
3872 if sidedata_helpers is not None:
3859 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3873 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3860 self, sidedata_helpers, sidedata, rev
3874 self, sidedata_helpers, sidedata, rev
3861 )
3875 )
3862 flags = flags | new_flags[0] & ~new_flags[1]
3876 flags = flags | new_flags[0] & ~new_flags[1]
3863
3877
3864 destrevlog._addrevision(
3878 destrevlog._addrevision(
3865 node,
3879 node,
3866 rawtext,
3880 rawtext,
3867 tr,
3881 tr,
3868 linkrev,
3882 linkrev,
3869 p1,
3883 p1,
3870 p2,
3884 p2,
3871 flags,
3885 flags,
3872 cachedelta,
3886 cachedelta,
3873 deltacomputer=deltacomputer,
3887 deltacomputer=deltacomputer,
3874 sidedata=sidedata,
3888 sidedata=sidedata,
3875 )
3889 )
3876
3890
3877 if addrevisioncb:
3891 if addrevisioncb:
3878 addrevisioncb(self, rev, node)
3892 addrevisioncb(self, rev, node)
3879
3893
3880 def censorrevision(self, tr, censor_nodes, tombstone=b''):
3894 def censorrevision(self, tr, censor_nodes, tombstone=b''):
3881 if self._format_version == REVLOGV0:
3895 if self._format_version == REVLOGV0:
3882 raise error.RevlogError(
3896 raise error.RevlogError(
3883 _(b'cannot censor with version %d revlogs')
3897 _(b'cannot censor with version %d revlogs')
3884 % self._format_version
3898 % self._format_version
3885 )
3899 )
3886 elif self._format_version == REVLOGV1:
3900 elif self._format_version == REVLOGV1:
3887 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
3901 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
3888 else:
3902 else:
3889 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
3903 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
3890
3904
3891 def verifyintegrity(self, state):
3905 def verifyintegrity(self, state):
3892 """Verifies the integrity of the revlog.
3906 """Verifies the integrity of the revlog.
3893
3907
3894 Yields ``revlogproblem`` instances describing problems that are
3908 Yields ``revlogproblem`` instances describing problems that are
3895 found.
3909 found.
3896 """
3910 """
3897 dd, di = self.checksize()
3911 dd, di = self.checksize()
3898 if dd:
3912 if dd:
3899 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3913 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3900 if di:
3914 if di:
3901 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3915 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3902
3916
3903 version = self._format_version
3917 version = self._format_version
3904
3918
3905 # The verifier tells us what version revlog we should be.
3919 # The verifier tells us what version revlog we should be.
3906 if version != state[b'expectedversion']:
3920 if version != state[b'expectedversion']:
3907 yield revlogproblem(
3921 yield revlogproblem(
3908 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3922 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3909 % (self.display_id, version, state[b'expectedversion'])
3923 % (self.display_id, version, state[b'expectedversion'])
3910 )
3924 )
3911
3925
3912 state[b'skipread'] = set()
3926 state[b'skipread'] = set()
3913 state[b'safe_renamed'] = set()
3927 state[b'safe_renamed'] = set()
3914
3928
3915 for rev in self:
3929 for rev in self:
3916 node = self.node(rev)
3930 node = self.node(rev)
3917
3931
3918 # Verify contents. 4 cases to care about:
3932 # Verify contents. 4 cases to care about:
3919 #
3933 #
3920 # common: the most common case
3934 # common: the most common case
3921 # rename: with a rename
3935 # rename: with a rename
3922 # meta: file content starts with b'\1\n', the metadata
3936 # meta: file content starts with b'\1\n', the metadata
3923 # header defined in filelog.py, but without a rename
3937 # header defined in filelog.py, but without a rename
3924 # ext: content stored externally
3938 # ext: content stored externally
3925 #
3939 #
3926 # More formally, their differences are shown below:
3940 # More formally, their differences are shown below:
3927 #
3941 #
3928 # | common | rename | meta | ext
3942 # | common | rename | meta | ext
3929 # -------------------------------------------------------
3943 # -------------------------------------------------------
3930 # flags() | 0 | 0 | 0 | not 0
3944 # flags() | 0 | 0 | 0 | not 0
3931 # renamed() | False | True | False | ?
3945 # renamed() | False | True | False | ?
3932 # rawtext[0:2]=='\1\n'| False | True | True | ?
3946 # rawtext[0:2]=='\1\n'| False | True | True | ?
3933 #
3947 #
3934 # "rawtext" means the raw text stored in revlog data, which
3948 # "rawtext" means the raw text stored in revlog data, which
3935 # could be retrieved by "rawdata(rev)". "text"
3949 # could be retrieved by "rawdata(rev)". "text"
3936 # mentioned below is "revision(rev)".
3950 # mentioned below is "revision(rev)".
3937 #
3951 #
3938 # There are 3 different lengths stored physically:
3952 # There are 3 different lengths stored physically:
3939 # 1. L1: rawsize, stored in revlog index
3953 # 1. L1: rawsize, stored in revlog index
3940 # 2. L2: len(rawtext), stored in revlog data
3954 # 2. L2: len(rawtext), stored in revlog data
3941 # 3. L3: len(text), stored in revlog data if flags==0, or
3955 # 3. L3: len(text), stored in revlog data if flags==0, or
3942 # possibly somewhere else if flags!=0
3956 # possibly somewhere else if flags!=0
3943 #
3957 #
3944 # L1 should be equal to L2. L3 could be different from them.
3958 # L1 should be equal to L2. L3 could be different from them.
3945 # "text" may or may not affect commit hash depending on flag
3959 # "text" may or may not affect commit hash depending on flag
3946 # processors (see flagutil.addflagprocessor).
3960 # processors (see flagutil.addflagprocessor).
3947 #
3961 #
3948 # | common | rename | meta | ext
3962 # | common | rename | meta | ext
3949 # -------------------------------------------------
3963 # -------------------------------------------------
3950 # rawsize() | L1 | L1 | L1 | L1
3964 # rawsize() | L1 | L1 | L1 | L1
3951 # size() | L1 | L2-LM | L1(*) | L1 (?)
3965 # size() | L1 | L2-LM | L1(*) | L1 (?)
3952 # len(rawtext) | L2 | L2 | L2 | L2
3966 # len(rawtext) | L2 | L2 | L2 | L2
3953 # len(text) | L2 | L2 | L2 | L3
3967 # len(text) | L2 | L2 | L2 | L3
3954 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3968 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3955 #
3969 #
3956 # LM: length of metadata, depending on rawtext
3970 # LM: length of metadata, depending on rawtext
3957 # (*): not ideal, see comment in filelog.size
3971 # (*): not ideal, see comment in filelog.size
3958 # (?): could be "- len(meta)" if the resolved content has
3972 # (?): could be "- len(meta)" if the resolved content has
3959 # rename metadata
3973 # rename metadata
3960 #
3974 #
3961 # Checks needed to be done:
3975 # Checks needed to be done:
3962 # 1. length check: L1 == L2, in all cases.
3976 # 1. length check: L1 == L2, in all cases.
3963 # 2. hash check: depending on flag processor, we may need to
3977 # 2. hash check: depending on flag processor, we may need to
3964 # use either "text" (external), or "rawtext" (in revlog).
3978 # use either "text" (external), or "rawtext" (in revlog).
3965
3979
3966 try:
3980 try:
3967 skipflags = state.get(b'skipflags', 0)
3981 skipflags = state.get(b'skipflags', 0)
3968 if skipflags:
3982 if skipflags:
3969 skipflags &= self.flags(rev)
3983 skipflags &= self.flags(rev)
3970
3984
3971 _verify_revision(self, skipflags, state, node)
3985 _verify_revision(self, skipflags, state, node)
3972
3986
3973 l1 = self.rawsize(rev)
3987 l1 = self.rawsize(rev)
3974 l2 = len(self.rawdata(node))
3988 l2 = len(self.rawdata(node))
3975
3989
3976 if l1 != l2:
3990 if l1 != l2:
3977 yield revlogproblem(
3991 yield revlogproblem(
3978 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3992 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3979 node=node,
3993 node=node,
3980 )
3994 )
3981
3995
3982 except error.CensoredNodeError:
3996 except error.CensoredNodeError:
3983 if state[b'erroroncensored']:
3997 if state[b'erroroncensored']:
3984 yield revlogproblem(
3998 yield revlogproblem(
3985 error=_(b'censored file data'), node=node
3999 error=_(b'censored file data'), node=node
3986 )
4000 )
3987 state[b'skipread'].add(node)
4001 state[b'skipread'].add(node)
3988 except Exception as e:
4002 except Exception as e:
3989 yield revlogproblem(
4003 yield revlogproblem(
3990 error=_(b'unpacking %s: %s')
4004 error=_(b'unpacking %s: %s')
3991 % (short(node), stringutil.forcebytestr(e)),
4005 % (short(node), stringutil.forcebytestr(e)),
3992 node=node,
4006 node=node,
3993 )
4007 )
3994 state[b'skipread'].add(node)
4008 state[b'skipread'].add(node)
3995
4009
3996 def storageinfo(
4010 def storageinfo(
3997 self,
4011 self,
3998 exclusivefiles=False,
4012 exclusivefiles=False,
3999 sharedfiles=False,
4013 sharedfiles=False,
4000 revisionscount=False,
4014 revisionscount=False,
4001 trackedsize=False,
4015 trackedsize=False,
4002 storedsize=False,
4016 storedsize=False,
4003 ):
4017 ):
4004 d = {}
4018 d = {}
4005
4019
4006 if exclusivefiles:
4020 if exclusivefiles:
4007 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4021 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4008 if not self._inline:
4022 if not self._inline:
4009 d[b'exclusivefiles'].append((self.opener, self._datafile))
4023 d[b'exclusivefiles'].append((self.opener, self._datafile))
4010
4024
4011 if sharedfiles:
4025 if sharedfiles:
4012 d[b'sharedfiles'] = []
4026 d[b'sharedfiles'] = []
4013
4027
4014 if revisionscount:
4028 if revisionscount:
4015 d[b'revisionscount'] = len(self)
4029 d[b'revisionscount'] = len(self)
4016
4030
4017 if trackedsize:
4031 if trackedsize:
4018 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4032 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4019
4033
4020 if storedsize:
4034 if storedsize:
4021 d[b'storedsize'] = sum(
4035 d[b'storedsize'] = sum(
4022 self.opener.stat(path).st_size for path in self.files()
4036 self.opener.stat(path).st_size for path in self.files()
4023 )
4037 )
4024
4038
4025 return d
4039 return d
4026
4040
4027 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4041 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4028 if not self.feature_config.has_side_data:
4042 if not self.feature_config.has_side_data:
4029 return
4043 return
4030 # revlog formats with sidedata support does not support inline
4044 # revlog formats with sidedata support does not support inline
4031 assert not self._inline
4045 assert not self._inline
4032 if not helpers[1] and not helpers[2]:
4046 if not helpers[1] and not helpers[2]:
4033 # Nothing to generate or remove
4047 # Nothing to generate or remove
4034 return
4048 return
4035
4049
4036 new_entries = []
4050 new_entries = []
4037 # append the new sidedata
4051 # append the new sidedata
4038 with self._writing(transaction):
4052 with self._writing(transaction):
4039 ifh, dfh, sdfh = self._inner._writinghandles
4053 ifh, dfh, sdfh = self._inner._writinghandles
4040 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4054 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4041
4055
4042 current_offset = sdfh.tell()
4056 current_offset = sdfh.tell()
4043 for rev in range(startrev, endrev + 1):
4057 for rev in range(startrev, endrev + 1):
4044 entry = self.index[rev]
4058 entry = self.index[rev]
4045 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4059 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4046 store=self,
4060 store=self,
4047 sidedata_helpers=helpers,
4061 sidedata_helpers=helpers,
4048 sidedata={},
4062 sidedata={},
4049 rev=rev,
4063 rev=rev,
4050 )
4064 )
4051
4065
4052 serialized_sidedata = sidedatautil.serialize_sidedata(
4066 serialized_sidedata = sidedatautil.serialize_sidedata(
4053 new_sidedata
4067 new_sidedata
4054 )
4068 )
4055
4069
4056 sidedata_compression_mode = COMP_MODE_INLINE
4070 sidedata_compression_mode = COMP_MODE_INLINE
4057 if serialized_sidedata and self.feature_config.has_side_data:
4071 if serialized_sidedata and self.feature_config.has_side_data:
4058 sidedata_compression_mode = COMP_MODE_PLAIN
4072 sidedata_compression_mode = COMP_MODE_PLAIN
4059 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4073 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4060 if (
4074 if (
4061 h != b'u'
4075 h != b'u'
4062 and comp_sidedata[0] != b'\0'
4076 and comp_sidedata[0] != b'\0'
4063 and len(comp_sidedata) < len(serialized_sidedata)
4077 and len(comp_sidedata) < len(serialized_sidedata)
4064 ):
4078 ):
4065 assert not h
4079 assert not h
4066 if (
4080 if (
4067 comp_sidedata[0]
4081 comp_sidedata[0]
4068 == self._docket.default_compression_header
4082 == self._docket.default_compression_header
4069 ):
4083 ):
4070 sidedata_compression_mode = COMP_MODE_DEFAULT
4084 sidedata_compression_mode = COMP_MODE_DEFAULT
4071 serialized_sidedata = comp_sidedata
4085 serialized_sidedata = comp_sidedata
4072 else:
4086 else:
4073 sidedata_compression_mode = COMP_MODE_INLINE
4087 sidedata_compression_mode = COMP_MODE_INLINE
4074 serialized_sidedata = comp_sidedata
4088 serialized_sidedata = comp_sidedata
4075 if entry[8] != 0 or entry[9] != 0:
4089 if entry[8] != 0 or entry[9] != 0:
4076 # rewriting entries that already have sidedata is not
4090 # rewriting entries that already have sidedata is not
4077 # supported yet, because it introduces garbage data in the
4091 # supported yet, because it introduces garbage data in the
4078 # revlog.
4092 # revlog.
4079 msg = b"rewriting existing sidedata is not supported yet"
4093 msg = b"rewriting existing sidedata is not supported yet"
4080 raise error.Abort(msg)
4094 raise error.Abort(msg)
4081
4095
4082 # Apply (potential) flags to add and to remove after running
4096 # Apply (potential) flags to add and to remove after running
4083 # the sidedata helpers
4097 # the sidedata helpers
4084 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4098 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4085 entry_update = (
4099 entry_update = (
4086 current_offset,
4100 current_offset,
4087 len(serialized_sidedata),
4101 len(serialized_sidedata),
4088 new_offset_flags,
4102 new_offset_flags,
4089 sidedata_compression_mode,
4103 sidedata_compression_mode,
4090 )
4104 )
4091
4105
4092 # the sidedata computation might have move the file cursors around
4106 # the sidedata computation might have move the file cursors around
4093 sdfh.seek(current_offset, os.SEEK_SET)
4107 sdfh.seek(current_offset, os.SEEK_SET)
4094 sdfh.write(serialized_sidedata)
4108 sdfh.write(serialized_sidedata)
4095 new_entries.append(entry_update)
4109 new_entries.append(entry_update)
4096 current_offset += len(serialized_sidedata)
4110 current_offset += len(serialized_sidedata)
4097 self._docket.sidedata_end = sdfh.tell()
4111 self._docket.sidedata_end = sdfh.tell()
4098
4112
4099 # rewrite the new index entries
4113 # rewrite the new index entries
4100 ifh.seek(startrev * self.index.entry_size)
4114 ifh.seek(startrev * self.index.entry_size)
4101 for i, e in enumerate(new_entries):
4115 for i, e in enumerate(new_entries):
4102 rev = startrev + i
4116 rev = startrev + i
4103 self.index.replace_sidedata_info(rev, *e)
4117 self.index.replace_sidedata_info(rev, *e)
4104 packed = self.index.entry_binary(rev)
4118 packed = self.index.entry_binary(rev)
4105 if rev == 0 and self._docket is None:
4119 if rev == 0 and self._docket is None:
4106 header = self._format_flags | self._format_version
4120 header = self._format_flags | self._format_version
4107 header = self.index.pack_header(header)
4121 header = self.index.pack_header(header)
4108 packed = header + packed
4122 packed = header + packed
4109 ifh.write(packed)
4123 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now