##// END OF EJS Templates
revlog: use the method to check if the revlog is being written to...
Raphaël Gomès -
r52751:2fd44b3d default
parent child Browse files
Show More
@@ -1,4126 +1,4126
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import functools
19 import functools
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import typing
23 import typing
24 import weakref
24 import weakref
25 import zlib
25 import zlib
26
26
27 from typing import (
27 from typing import (
28 Iterable,
28 Iterable,
29 Iterator,
29 Iterator,
30 Optional,
30 Optional,
31 Tuple,
31 Tuple,
32 )
32 )
33
33
34 # import stuff from node for others to import from revlog
34 # import stuff from node for others to import from revlog
35 from .node import (
35 from .node import (
36 bin,
36 bin,
37 hex,
37 hex,
38 nullrev,
38 nullrev,
39 sha1nodeconstants,
39 sha1nodeconstants,
40 short,
40 short,
41 wdirrev,
41 wdirrev,
42 )
42 )
43 from .i18n import _
43 from .i18n import _
44 from .revlogutils.constants import (
44 from .revlogutils.constants import (
45 ALL_KINDS,
45 ALL_KINDS,
46 CHANGELOGV2,
46 CHANGELOGV2,
47 COMP_MODE_DEFAULT,
47 COMP_MODE_DEFAULT,
48 COMP_MODE_INLINE,
48 COMP_MODE_INLINE,
49 COMP_MODE_PLAIN,
49 COMP_MODE_PLAIN,
50 DELTA_BASE_REUSE_NO,
50 DELTA_BASE_REUSE_NO,
51 DELTA_BASE_REUSE_TRY,
51 DELTA_BASE_REUSE_TRY,
52 ENTRY_RANK,
52 ENTRY_RANK,
53 FEATURES_BY_VERSION,
53 FEATURES_BY_VERSION,
54 FLAG_GENERALDELTA,
54 FLAG_GENERALDELTA,
55 FLAG_INLINE_DATA,
55 FLAG_INLINE_DATA,
56 INDEX_HEADER,
56 INDEX_HEADER,
57 KIND_CHANGELOG,
57 KIND_CHANGELOG,
58 KIND_FILELOG,
58 KIND_FILELOG,
59 RANK_UNKNOWN,
59 RANK_UNKNOWN,
60 REVLOGV0,
60 REVLOGV0,
61 REVLOGV1,
61 REVLOGV1,
62 REVLOGV1_FLAGS,
62 REVLOGV1_FLAGS,
63 REVLOGV2,
63 REVLOGV2,
64 REVLOGV2_FLAGS,
64 REVLOGV2_FLAGS,
65 REVLOG_DEFAULT_FLAGS,
65 REVLOG_DEFAULT_FLAGS,
66 REVLOG_DEFAULT_FORMAT,
66 REVLOG_DEFAULT_FORMAT,
67 REVLOG_DEFAULT_VERSION,
67 REVLOG_DEFAULT_VERSION,
68 SUPPORTED_FLAGS,
68 SUPPORTED_FLAGS,
69 )
69 )
70 from .revlogutils.flagutil import (
70 from .revlogutils.flagutil import (
71 REVIDX_DEFAULT_FLAGS,
71 REVIDX_DEFAULT_FLAGS,
72 REVIDX_ELLIPSIS,
72 REVIDX_ELLIPSIS,
73 REVIDX_EXTSTORED,
73 REVIDX_EXTSTORED,
74 REVIDX_FLAGS_ORDER,
74 REVIDX_FLAGS_ORDER,
75 REVIDX_HASCOPIESINFO,
75 REVIDX_HASCOPIESINFO,
76 REVIDX_ISCENSORED,
76 REVIDX_ISCENSORED,
77 REVIDX_RAWTEXT_CHANGING_FLAGS,
77 REVIDX_RAWTEXT_CHANGING_FLAGS,
78 )
78 )
79 from .thirdparty import attr
79 from .thirdparty import attr
80
80
81 # Force pytype to use the non-vendored package
81 # Force pytype to use the non-vendored package
82 if typing.TYPE_CHECKING:
82 if typing.TYPE_CHECKING:
83 # noinspection PyPackageRequirements
83 # noinspection PyPackageRequirements
84 import attr
84 import attr
85
85
86 from . import (
86 from . import (
87 ancestor,
87 ancestor,
88 dagop,
88 dagop,
89 error,
89 error,
90 mdiff,
90 mdiff,
91 policy,
91 policy,
92 pycompat,
92 pycompat,
93 revlogutils,
93 revlogutils,
94 templatefilters,
94 templatefilters,
95 util,
95 util,
96 vfs as vfsmod,
96 vfs as vfsmod,
97 )
97 )
98 from .interfaces import (
98 from .interfaces import (
99 repository,
99 repository,
100 util as interfaceutil,
100 util as interfaceutil,
101 )
101 )
102 from .revlogutils import (
102 from .revlogutils import (
103 deltas as deltautil,
103 deltas as deltautil,
104 docket as docketutil,
104 docket as docketutil,
105 flagutil,
105 flagutil,
106 nodemap as nodemaputil,
106 nodemap as nodemaputil,
107 randomaccessfile,
107 randomaccessfile,
108 revlogv0,
108 revlogv0,
109 rewrite,
109 rewrite,
110 sidedata as sidedatautil,
110 sidedata as sidedatautil,
111 )
111 )
112 from .utils import (
112 from .utils import (
113 storageutil,
113 storageutil,
114 stringutil,
114 stringutil,
115 )
115 )
116
116
117 # blanked usage of all the name to prevent pyflakes constraints
117 # blanked usage of all the name to prevent pyflakes constraints
118 # We need these name available in the module for extensions.
118 # We need these name available in the module for extensions.
119
119
120 REVLOGV0
120 REVLOGV0
121 REVLOGV1
121 REVLOGV1
122 REVLOGV2
122 REVLOGV2
123 CHANGELOGV2
123 CHANGELOGV2
124 FLAG_INLINE_DATA
124 FLAG_INLINE_DATA
125 FLAG_GENERALDELTA
125 FLAG_GENERALDELTA
126 REVLOG_DEFAULT_FLAGS
126 REVLOG_DEFAULT_FLAGS
127 REVLOG_DEFAULT_FORMAT
127 REVLOG_DEFAULT_FORMAT
128 REVLOG_DEFAULT_VERSION
128 REVLOG_DEFAULT_VERSION
129 REVLOGV1_FLAGS
129 REVLOGV1_FLAGS
130 REVLOGV2_FLAGS
130 REVLOGV2_FLAGS
131 REVIDX_ISCENSORED
131 REVIDX_ISCENSORED
132 REVIDX_ELLIPSIS
132 REVIDX_ELLIPSIS
133 REVIDX_HASCOPIESINFO
133 REVIDX_HASCOPIESINFO
134 REVIDX_EXTSTORED
134 REVIDX_EXTSTORED
135 REVIDX_DEFAULT_FLAGS
135 REVIDX_DEFAULT_FLAGS
136 REVIDX_FLAGS_ORDER
136 REVIDX_FLAGS_ORDER
137 REVIDX_RAWTEXT_CHANGING_FLAGS
137 REVIDX_RAWTEXT_CHANGING_FLAGS
138
138
139 parsers = policy.importmod('parsers')
139 parsers = policy.importmod('parsers')
140 rustancestor = policy.importrust('ancestor')
140 rustancestor = policy.importrust('ancestor')
141 rustdagop = policy.importrust('dagop')
141 rustdagop = policy.importrust('dagop')
142 rustrevlog = policy.importrust('revlog')
142 rustrevlog = policy.importrust('revlog')
143
143
144 # Aliased for performance.
144 # Aliased for performance.
145 _zlibdecompress = zlib.decompress
145 _zlibdecompress = zlib.decompress
146
146
147 # max size of inline data embedded into a revlog
147 # max size of inline data embedded into a revlog
148 _maxinline = 131072
148 _maxinline = 131072
149
149
150
150
151 # Flag processors for REVIDX_ELLIPSIS.
151 # Flag processors for REVIDX_ELLIPSIS.
152 def ellipsisreadprocessor(rl, text):
152 def ellipsisreadprocessor(rl, text):
153 return text, False
153 return text, False
154
154
155
155
156 def ellipsiswriteprocessor(rl, text):
156 def ellipsiswriteprocessor(rl, text):
157 return text, False
157 return text, False
158
158
159
159
160 def ellipsisrawprocessor(rl, text):
160 def ellipsisrawprocessor(rl, text):
161 return False
161 return False
162
162
163
163
164 ellipsisprocessor = (
164 ellipsisprocessor = (
165 ellipsisreadprocessor,
165 ellipsisreadprocessor,
166 ellipsiswriteprocessor,
166 ellipsiswriteprocessor,
167 ellipsisrawprocessor,
167 ellipsisrawprocessor,
168 )
168 )
169
169
170
170
171 def _verify_revision(rl, skipflags, state, node):
171 def _verify_revision(rl, skipflags, state, node):
172 """Verify the integrity of the given revlog ``node`` while providing a hook
172 """Verify the integrity of the given revlog ``node`` while providing a hook
173 point for extensions to influence the operation."""
173 point for extensions to influence the operation."""
174 if skipflags:
174 if skipflags:
175 state[b'skipread'].add(node)
175 state[b'skipread'].add(node)
176 else:
176 else:
177 # Side-effect: read content and verify hash.
177 # Side-effect: read content and verify hash.
178 rl.revision(node)
178 rl.revision(node)
179
179
180
180
181 # True if a fast implementation for persistent-nodemap is available
181 # True if a fast implementation for persistent-nodemap is available
182 #
182 #
183 # We also consider we have a "fast" implementation in "pure" python because
183 # We also consider we have a "fast" implementation in "pure" python because
184 # people using pure don't really have performance consideration (and a
184 # people using pure don't really have performance consideration (and a
185 # wheelbarrow of other slowness source)
185 # wheelbarrow of other slowness source)
186 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
186 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
187 parsers, 'BaseIndexObject'
187 parsers, 'BaseIndexObject'
188 )
188 )
189
189
190
190
191 @attr.s(slots=True)
191 @attr.s(slots=True)
192 class RevLogRevisionDelta:
192 class RevLogRevisionDelta:
193 node = attr.ib()
193 node = attr.ib()
194 p1node = attr.ib()
194 p1node = attr.ib()
195 p2node = attr.ib()
195 p2node = attr.ib()
196 basenode = attr.ib()
196 basenode = attr.ib()
197 flags = attr.ib()
197 flags = attr.ib()
198 baserevisionsize = attr.ib()
198 baserevisionsize = attr.ib()
199 revision = attr.ib()
199 revision = attr.ib()
200 delta = attr.ib()
200 delta = attr.ib()
201 sidedata = attr.ib()
201 sidedata = attr.ib()
202 protocol_flags = attr.ib()
202 protocol_flags = attr.ib()
203 linknode = attr.ib(default=None)
203 linknode = attr.ib(default=None)
204
204
205
205
206 revlogrevisiondelta = interfaceutil.implementer(repository.irevisiondelta)(
206 revlogrevisiondelta = interfaceutil.implementer(repository.irevisiondelta)(
207 RevLogRevisionDelta
207 RevLogRevisionDelta
208 )
208 )
209
209
210 if typing.TYPE_CHECKING:
210 if typing.TYPE_CHECKING:
211 revlogrevisiondelta = RevLogRevisionDelta
211 revlogrevisiondelta = RevLogRevisionDelta
212
212
213
213
214 @attr.s(frozen=True)
214 @attr.s(frozen=True)
215 class RevLogProblem:
215 class RevLogProblem:
216 warning = attr.ib(default=None, type=Optional[bytes])
216 warning = attr.ib(default=None, type=Optional[bytes])
217 error = attr.ib(default=None, type=Optional[bytes])
217 error = attr.ib(default=None, type=Optional[bytes])
218 node = attr.ib(default=None, type=Optional[bytes])
218 node = attr.ib(default=None, type=Optional[bytes])
219
219
220
220
221 revlogproblem = interfaceutil.implementer(repository.iverifyproblem)(
221 revlogproblem = interfaceutil.implementer(repository.iverifyproblem)(
222 RevLogProblem
222 RevLogProblem
223 )
223 )
224
224
225 if typing.TYPE_CHECKING:
225 if typing.TYPE_CHECKING:
226 revlogproblem = RevLogProblem
226 revlogproblem = RevLogProblem
227
227
228
228
229 def parse_index_v1(data, inline):
229 def parse_index_v1(data, inline):
230 # call the C implementation to parse the index data
230 # call the C implementation to parse the index data
231 index, cache = parsers.parse_index2(data, inline)
231 index, cache = parsers.parse_index2(data, inline)
232 return index, cache
232 return index, cache
233
233
234
234
235 def parse_index_v2(data, inline):
235 def parse_index_v2(data, inline):
236 # call the C implementation to parse the index data
236 # call the C implementation to parse the index data
237 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
237 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
238 return index, cache
238 return index, cache
239
239
240
240
241 def parse_index_cl_v2(data, inline):
241 def parse_index_cl_v2(data, inline):
242 # call the C implementation to parse the index data
242 # call the C implementation to parse the index data
243 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
243 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
244 return index, cache
244 return index, cache
245
245
246
246
247 if hasattr(parsers, 'parse_index_devel_nodemap'):
247 if hasattr(parsers, 'parse_index_devel_nodemap'):
248
248
249 def parse_index_v1_nodemap(data, inline):
249 def parse_index_v1_nodemap(data, inline):
250 index, cache = parsers.parse_index_devel_nodemap(data, inline)
250 index, cache = parsers.parse_index_devel_nodemap(data, inline)
251 return index, cache
251 return index, cache
252
252
253 else:
253 else:
254 parse_index_v1_nodemap = None
254 parse_index_v1_nodemap = None
255
255
256
256
257 def parse_index_v1_rust(data, inline, default_header):
257 def parse_index_v1_rust(data, inline, default_header):
258 cache = (0, data) if inline else None
258 cache = (0, data) if inline else None
259 return rustrevlog.Index(data, default_header), cache
259 return rustrevlog.Index(data, default_header), cache
260
260
261
261
262 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
262 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
263 # signed integer)
263 # signed integer)
264 _maxentrysize = 0x7FFFFFFF
264 _maxentrysize = 0x7FFFFFFF
265
265
266 FILE_TOO_SHORT_MSG = _(
266 FILE_TOO_SHORT_MSG = _(
267 b'cannot read from revlog %s;'
267 b'cannot read from revlog %s;'
268 b' expected %d bytes from offset %d, data size is %d'
268 b' expected %d bytes from offset %d, data size is %d'
269 )
269 )
270
270
271 hexdigits = b'0123456789abcdefABCDEF'
271 hexdigits = b'0123456789abcdefABCDEF'
272
272
273
273
274 class _Config:
274 class _Config:
275 def copy(self):
275 def copy(self):
276 return self.__class__(**self.__dict__)
276 return self.__class__(**self.__dict__)
277
277
278
278
279 @attr.s()
279 @attr.s()
280 class FeatureConfig(_Config):
280 class FeatureConfig(_Config):
281 """Hold configuration values about the available revlog features"""
281 """Hold configuration values about the available revlog features"""
282
282
283 # the default compression engine
283 # the default compression engine
284 compression_engine = attr.ib(default=b'zlib')
284 compression_engine = attr.ib(default=b'zlib')
285 # compression engines options
285 # compression engines options
286 compression_engine_options = attr.ib(default=attr.Factory(dict))
286 compression_engine_options = attr.ib(default=attr.Factory(dict))
287
287
288 # can we use censor on this revlog
288 # can we use censor on this revlog
289 censorable = attr.ib(default=False)
289 censorable = attr.ib(default=False)
290 # does this revlog use the "side data" feature
290 # does this revlog use the "side data" feature
291 has_side_data = attr.ib(default=False)
291 has_side_data = attr.ib(default=False)
292 # might remove rank configuration once the computation has no impact
292 # might remove rank configuration once the computation has no impact
293 compute_rank = attr.ib(default=False)
293 compute_rank = attr.ib(default=False)
294 # parent order is supposed to be semantically irrelevant, so we
294 # parent order is supposed to be semantically irrelevant, so we
295 # normally resort parents to ensure that the first parent is non-null,
295 # normally resort parents to ensure that the first parent is non-null,
296 # if there is a non-null parent at all.
296 # if there is a non-null parent at all.
297 # filelog abuses the parent order as flag to mark some instances of
297 # filelog abuses the parent order as flag to mark some instances of
298 # meta-encoded files, so allow it to disable this behavior.
298 # meta-encoded files, so allow it to disable this behavior.
299 canonical_parent_order = attr.ib(default=False)
299 canonical_parent_order = attr.ib(default=False)
300 # can ellipsis commit be used
300 # can ellipsis commit be used
301 enable_ellipsis = attr.ib(default=False)
301 enable_ellipsis = attr.ib(default=False)
302
302
303 def copy(self):
303 def copy(self):
304 new = super().copy()
304 new = super().copy()
305 new.compression_engine_options = self.compression_engine_options.copy()
305 new.compression_engine_options = self.compression_engine_options.copy()
306 return new
306 return new
307
307
308
308
309 @attr.s()
309 @attr.s()
310 class DataConfig(_Config):
310 class DataConfig(_Config):
311 """Hold configuration value about how the revlog data are read"""
311 """Hold configuration value about how the revlog data are read"""
312
312
313 # should we try to open the "pending" version of the revlog
313 # should we try to open the "pending" version of the revlog
314 try_pending = attr.ib(default=False)
314 try_pending = attr.ib(default=False)
315 # should we try to open the "splitted" version of the revlog
315 # should we try to open the "splitted" version of the revlog
316 try_split = attr.ib(default=False)
316 try_split = attr.ib(default=False)
317 # When True, indexfile should be opened with checkambig=True at writing,
317 # When True, indexfile should be opened with checkambig=True at writing,
318 # to avoid file stat ambiguity.
318 # to avoid file stat ambiguity.
319 check_ambig = attr.ib(default=False)
319 check_ambig = attr.ib(default=False)
320
320
321 # If true, use mmap instead of reading to deal with large index
321 # If true, use mmap instead of reading to deal with large index
322 mmap_large_index = attr.ib(default=False)
322 mmap_large_index = attr.ib(default=False)
323 # how much data is large
323 # how much data is large
324 mmap_index_threshold = attr.ib(default=None)
324 mmap_index_threshold = attr.ib(default=None)
325 # How much data to read and cache into the raw revlog data cache.
325 # How much data to read and cache into the raw revlog data cache.
326 chunk_cache_size = attr.ib(default=65536)
326 chunk_cache_size = attr.ib(default=65536)
327
327
328 # The size of the uncompressed cache compared to the largest revision seen.
328 # The size of the uncompressed cache compared to the largest revision seen.
329 uncompressed_cache_factor = attr.ib(default=None)
329 uncompressed_cache_factor = attr.ib(default=None)
330
330
331 # The number of chunk cached
331 # The number of chunk cached
332 uncompressed_cache_count = attr.ib(default=None)
332 uncompressed_cache_count = attr.ib(default=None)
333
333
334 # Allow sparse reading of the revlog data
334 # Allow sparse reading of the revlog data
335 with_sparse_read = attr.ib(default=False)
335 with_sparse_read = attr.ib(default=False)
336 # minimal density of a sparse read chunk
336 # minimal density of a sparse read chunk
337 sr_density_threshold = attr.ib(default=0.50)
337 sr_density_threshold = attr.ib(default=0.50)
338 # minimal size of data we skip when performing sparse read
338 # minimal size of data we skip when performing sparse read
339 sr_min_gap_size = attr.ib(default=262144)
339 sr_min_gap_size = attr.ib(default=262144)
340
340
341 # are delta encoded against arbitrary bases.
341 # are delta encoded against arbitrary bases.
342 generaldelta = attr.ib(default=False)
342 generaldelta = attr.ib(default=False)
343
343
344
344
345 @attr.s()
345 @attr.s()
346 class DeltaConfig(_Config):
346 class DeltaConfig(_Config):
347 """Hold configuration value about how new delta are computed
347 """Hold configuration value about how new delta are computed
348
348
349 Some attributes are duplicated from DataConfig to help havign each object
349 Some attributes are duplicated from DataConfig to help havign each object
350 self contained.
350 self contained.
351 """
351 """
352
352
353 # can delta be encoded against arbitrary bases.
353 # can delta be encoded against arbitrary bases.
354 general_delta = attr.ib(default=False)
354 general_delta = attr.ib(default=False)
355 # Allow sparse writing of the revlog data
355 # Allow sparse writing of the revlog data
356 sparse_revlog = attr.ib(default=False)
356 sparse_revlog = attr.ib(default=False)
357 # maximum length of a delta chain
357 # maximum length of a delta chain
358 max_chain_len = attr.ib(default=None)
358 max_chain_len = attr.ib(default=None)
359 # Maximum distance between delta chain base start and end
359 # Maximum distance between delta chain base start and end
360 max_deltachain_span = attr.ib(default=-1)
360 max_deltachain_span = attr.ib(default=-1)
361 # If `upper_bound_comp` is not None, this is the expected maximal gain from
361 # If `upper_bound_comp` is not None, this is the expected maximal gain from
362 # compression for the data content.
362 # compression for the data content.
363 upper_bound_comp = attr.ib(default=None)
363 upper_bound_comp = attr.ib(default=None)
364 # Should we try a delta against both parent
364 # Should we try a delta against both parent
365 delta_both_parents = attr.ib(default=True)
365 delta_both_parents = attr.ib(default=True)
366 # Test delta base candidate group by chunk of this maximal size.
366 # Test delta base candidate group by chunk of this maximal size.
367 candidate_group_chunk_size = attr.ib(default=0)
367 candidate_group_chunk_size = attr.ib(default=0)
368 # Should we display debug information about delta computation
368 # Should we display debug information about delta computation
369 debug_delta = attr.ib(default=False)
369 debug_delta = attr.ib(default=False)
370 # trust incoming delta by default
370 # trust incoming delta by default
371 lazy_delta = attr.ib(default=True)
371 lazy_delta = attr.ib(default=True)
372 # trust the base of incoming delta by default
372 # trust the base of incoming delta by default
373 lazy_delta_base = attr.ib(default=False)
373 lazy_delta_base = attr.ib(default=False)
374
374
375
375
376 class _InnerRevlog:
376 class _InnerRevlog:
377 """An inner layer of the revlog object
377 """An inner layer of the revlog object
378
378
379 That layer exist to be able to delegate some operation to Rust, its
379 That layer exist to be able to delegate some operation to Rust, its
380 boundaries are arbitrary and based on what we can delegate to Rust.
380 boundaries are arbitrary and based on what we can delegate to Rust.
381 """
381 """
382
382
383 opener: vfsmod.vfs
383 opener: vfsmod.vfs
384
384
385 def __init__(
385 def __init__(
386 self,
386 self,
387 opener: vfsmod.vfs,
387 opener: vfsmod.vfs,
388 index,
388 index,
389 index_file,
389 index_file,
390 data_file,
390 data_file,
391 sidedata_file,
391 sidedata_file,
392 inline,
392 inline,
393 data_config,
393 data_config,
394 delta_config,
394 delta_config,
395 feature_config,
395 feature_config,
396 chunk_cache,
396 chunk_cache,
397 default_compression_header,
397 default_compression_header,
398 ):
398 ):
399 self.opener = opener
399 self.opener = opener
400 self.index = index
400 self.index = index
401
401
402 self.index_file = index_file
402 self.index_file = index_file
403 self.data_file = data_file
403 self.data_file = data_file
404 self.sidedata_file = sidedata_file
404 self.sidedata_file = sidedata_file
405 self.inline = inline
405 self.inline = inline
406 self.data_config = data_config
406 self.data_config = data_config
407 self.delta_config = delta_config
407 self.delta_config = delta_config
408 self.feature_config = feature_config
408 self.feature_config = feature_config
409
409
410 # used during diverted write.
410 # used during diverted write.
411 self._orig_index_file = None
411 self._orig_index_file = None
412
412
413 self._default_compression_header = default_compression_header
413 self._default_compression_header = default_compression_header
414
414
415 # index
415 # index
416
416
417 # 3-tuple of file handles being used for active writing.
417 # 3-tuple of file handles being used for active writing.
418 self._writinghandles = None
418 self._writinghandles = None
419
419
420 self._segmentfile = randomaccessfile.randomaccessfile(
420 self._segmentfile = randomaccessfile.randomaccessfile(
421 self.opener,
421 self.opener,
422 (self.index_file if self.inline else self.data_file),
422 (self.index_file if self.inline else self.data_file),
423 self.data_config.chunk_cache_size,
423 self.data_config.chunk_cache_size,
424 chunk_cache,
424 chunk_cache,
425 )
425 )
426 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
426 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
427 self.opener,
427 self.opener,
428 self.sidedata_file,
428 self.sidedata_file,
429 self.data_config.chunk_cache_size,
429 self.data_config.chunk_cache_size,
430 )
430 )
431
431
432 # revlog header -> revlog compressor
432 # revlog header -> revlog compressor
433 self._decompressors = {}
433 self._decompressors = {}
434 # 3-tuple of (node, rev, text) for a raw revision.
434 # 3-tuple of (node, rev, text) for a raw revision.
435 self._revisioncache = None
435 self._revisioncache = None
436
436
437 # cache some uncompressed chunks
437 # cache some uncompressed chunks
438 # rev → uncompressed_chunk
438 # rev → uncompressed_chunk
439 #
439 #
440 # the max cost is dynamically updated to be proportionnal to the
440 # the max cost is dynamically updated to be proportionnal to the
441 # size of revision we actually encounter.
441 # size of revision we actually encounter.
442 self._uncompressed_chunk_cache = None
442 self._uncompressed_chunk_cache = None
443 if self.data_config.uncompressed_cache_factor is not None:
443 if self.data_config.uncompressed_cache_factor is not None:
444 self._uncompressed_chunk_cache = util.lrucachedict(
444 self._uncompressed_chunk_cache = util.lrucachedict(
445 self.data_config.uncompressed_cache_count,
445 self.data_config.uncompressed_cache_count,
446 maxcost=65536, # some arbitrary initial value
446 maxcost=65536, # some arbitrary initial value
447 )
447 )
448
448
449 self._delay_buffer = None
449 self._delay_buffer = None
450
450
451 def __len__(self):
451 def __len__(self):
452 return len(self.index)
452 return len(self.index)
453
453
454 def clear_cache(self):
454 def clear_cache(self):
455 assert not self.is_delaying
455 assert not self.is_delaying
456 self._revisioncache = None
456 self._revisioncache = None
457 if self._uncompressed_chunk_cache is not None:
457 if self._uncompressed_chunk_cache is not None:
458 self._uncompressed_chunk_cache.clear()
458 self._uncompressed_chunk_cache.clear()
459 self._segmentfile.clear_cache()
459 self._segmentfile.clear_cache()
460 self._segmentfile_sidedata.clear_cache()
460 self._segmentfile_sidedata.clear_cache()
461
461
462 @property
462 @property
463 def canonical_index_file(self):
463 def canonical_index_file(self):
464 if self._orig_index_file is not None:
464 if self._orig_index_file is not None:
465 return self._orig_index_file
465 return self._orig_index_file
466 return self.index_file
466 return self.index_file
467
467
468 @property
468 @property
469 def is_delaying(self):
469 def is_delaying(self):
470 """is the revlog is currently delaying the visibility of written data?
470 """is the revlog is currently delaying the visibility of written data?
471
471
472 The delaying mechanism can be either in-memory or written on disk in a
472 The delaying mechanism can be either in-memory or written on disk in a
473 side-file."""
473 side-file."""
474 return (self._delay_buffer is not None) or (
474 return (self._delay_buffer is not None) or (
475 self._orig_index_file is not None
475 self._orig_index_file is not None
476 )
476 )
477
477
478 # Derived from index values.
478 # Derived from index values.
479
479
480 def start(self, rev):
480 def start(self, rev):
481 """the offset of the data chunk for this revision"""
481 """the offset of the data chunk for this revision"""
482 return int(self.index[rev][0] >> 16)
482 return int(self.index[rev][0] >> 16)
483
483
484 def length(self, rev):
484 def length(self, rev):
485 """the length of the data chunk for this revision"""
485 """the length of the data chunk for this revision"""
486 return self.index[rev][1]
486 return self.index[rev][1]
487
487
488 def end(self, rev):
488 def end(self, rev):
489 """the end of the data chunk for this revision"""
489 """the end of the data chunk for this revision"""
490 return self.start(rev) + self.length(rev)
490 return self.start(rev) + self.length(rev)
491
491
492 def deltaparent(self, rev):
492 def deltaparent(self, rev):
493 """return deltaparent of the given revision"""
493 """return deltaparent of the given revision"""
494 base = self.index[rev][3]
494 base = self.index[rev][3]
495 if base == rev:
495 if base == rev:
496 return nullrev
496 return nullrev
497 elif self.delta_config.general_delta:
497 elif self.delta_config.general_delta:
498 return base
498 return base
499 else:
499 else:
500 return rev - 1
500 return rev - 1
501
501
502 def issnapshot(self, rev):
502 def issnapshot(self, rev):
503 """tells whether rev is a snapshot"""
503 """tells whether rev is a snapshot"""
504 if not self.delta_config.sparse_revlog:
504 if not self.delta_config.sparse_revlog:
505 return self.deltaparent(rev) == nullrev
505 return self.deltaparent(rev) == nullrev
506 elif hasattr(self.index, 'issnapshot'):
506 elif hasattr(self.index, 'issnapshot'):
507 # directly assign the method to cache the testing and access
507 # directly assign the method to cache the testing and access
508 self.issnapshot = self.index.issnapshot
508 self.issnapshot = self.index.issnapshot
509 return self.issnapshot(rev)
509 return self.issnapshot(rev)
510 if rev == nullrev:
510 if rev == nullrev:
511 return True
511 return True
512 entry = self.index[rev]
512 entry = self.index[rev]
513 base = entry[3]
513 base = entry[3]
514 if base == rev:
514 if base == rev:
515 return True
515 return True
516 if base == nullrev:
516 if base == nullrev:
517 return True
517 return True
518 p1 = entry[5]
518 p1 = entry[5]
519 while self.length(p1) == 0:
519 while self.length(p1) == 0:
520 b = self.deltaparent(p1)
520 b = self.deltaparent(p1)
521 if b == p1:
521 if b == p1:
522 break
522 break
523 p1 = b
523 p1 = b
524 p2 = entry[6]
524 p2 = entry[6]
525 while self.length(p2) == 0:
525 while self.length(p2) == 0:
526 b = self.deltaparent(p2)
526 b = self.deltaparent(p2)
527 if b == p2:
527 if b == p2:
528 break
528 break
529 p2 = b
529 p2 = b
530 if base == p1 or base == p2:
530 if base == p1 or base == p2:
531 return False
531 return False
532 return self.issnapshot(base)
532 return self.issnapshot(base)
533
533
534 def _deltachain(self, rev, stoprev=None):
534 def _deltachain(self, rev, stoprev=None):
535 """Obtain the delta chain for a revision.
535 """Obtain the delta chain for a revision.
536
536
537 ``stoprev`` specifies a revision to stop at. If not specified, we
537 ``stoprev`` specifies a revision to stop at. If not specified, we
538 stop at the base of the chain.
538 stop at the base of the chain.
539
539
540 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
540 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
541 revs in ascending order and ``stopped`` is a bool indicating whether
541 revs in ascending order and ``stopped`` is a bool indicating whether
542 ``stoprev`` was hit.
542 ``stoprev`` was hit.
543 """
543 """
544 generaldelta = self.delta_config.general_delta
544 generaldelta = self.delta_config.general_delta
545 # Try C implementation.
545 # Try C implementation.
546 try:
546 try:
547 return self.index.deltachain(rev, stoprev, generaldelta)
547 return self.index.deltachain(rev, stoprev, generaldelta)
548 except AttributeError:
548 except AttributeError:
549 pass
549 pass
550
550
551 chain = []
551 chain = []
552
552
553 # Alias to prevent attribute lookup in tight loop.
553 # Alias to prevent attribute lookup in tight loop.
554 index = self.index
554 index = self.index
555
555
556 iterrev = rev
556 iterrev = rev
557 e = index[iterrev]
557 e = index[iterrev]
558 while iterrev != e[3] and iterrev != stoprev:
558 while iterrev != e[3] and iterrev != stoprev:
559 chain.append(iterrev)
559 chain.append(iterrev)
560 if generaldelta:
560 if generaldelta:
561 iterrev = e[3]
561 iterrev = e[3]
562 else:
562 else:
563 iterrev -= 1
563 iterrev -= 1
564 e = index[iterrev]
564 e = index[iterrev]
565
565
566 if iterrev == stoprev:
566 if iterrev == stoprev:
567 stopped = True
567 stopped = True
568 else:
568 else:
569 chain.append(iterrev)
569 chain.append(iterrev)
570 stopped = False
570 stopped = False
571
571
572 chain.reverse()
572 chain.reverse()
573 return chain, stopped
573 return chain, stopped
574
574
575 @util.propertycache
575 @util.propertycache
576 def _compressor(self):
576 def _compressor(self):
577 engine = util.compengines[self.feature_config.compression_engine]
577 engine = util.compengines[self.feature_config.compression_engine]
578 return engine.revlogcompressor(
578 return engine.revlogcompressor(
579 self.feature_config.compression_engine_options
579 self.feature_config.compression_engine_options
580 )
580 )
581
581
582 @util.propertycache
582 @util.propertycache
583 def _decompressor(self):
583 def _decompressor(self):
584 """the default decompressor"""
584 """the default decompressor"""
585 if self._default_compression_header is None:
585 if self._default_compression_header is None:
586 return None
586 return None
587 t = self._default_compression_header
587 t = self._default_compression_header
588 c = self._get_decompressor(t)
588 c = self._get_decompressor(t)
589 return c.decompress
589 return c.decompress
590
590
591 def _get_decompressor(self, t: bytes):
591 def _get_decompressor(self, t: bytes):
592 try:
592 try:
593 compressor = self._decompressors[t]
593 compressor = self._decompressors[t]
594 except KeyError:
594 except KeyError:
595 try:
595 try:
596 engine = util.compengines.forrevlogheader(t)
596 engine = util.compengines.forrevlogheader(t)
597 compressor = engine.revlogcompressor(
597 compressor = engine.revlogcompressor(
598 self.feature_config.compression_engine_options
598 self.feature_config.compression_engine_options
599 )
599 )
600 self._decompressors[t] = compressor
600 self._decompressors[t] = compressor
601 except KeyError:
601 except KeyError:
602 raise error.RevlogError(
602 raise error.RevlogError(
603 _(b'unknown compression type %s') % binascii.hexlify(t)
603 _(b'unknown compression type %s') % binascii.hexlify(t)
604 )
604 )
605 return compressor
605 return compressor
606
606
607 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
607 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
608 """Generate a possibly-compressed representation of data."""
608 """Generate a possibly-compressed representation of data."""
609 if not data:
609 if not data:
610 return b'', data
610 return b'', data
611
611
612 compressed = self._compressor.compress(data)
612 compressed = self._compressor.compress(data)
613
613
614 if compressed:
614 if compressed:
615 # The revlog compressor added the header in the returned data.
615 # The revlog compressor added the header in the returned data.
616 return b'', compressed
616 return b'', compressed
617
617
618 if data[0:1] == b'\0':
618 if data[0:1] == b'\0':
619 return b'', data
619 return b'', data
620 return b'u', data
620 return b'u', data
621
621
622 def decompress(self, data: bytes):
622 def decompress(self, data: bytes):
623 """Decompress a revlog chunk.
623 """Decompress a revlog chunk.
624
624
625 The chunk is expected to begin with a header identifying the
625 The chunk is expected to begin with a header identifying the
626 format type so it can be routed to an appropriate decompressor.
626 format type so it can be routed to an appropriate decompressor.
627 """
627 """
628 if not data:
628 if not data:
629 return data
629 return data
630
630
631 # Revlogs are read much more frequently than they are written and many
631 # Revlogs are read much more frequently than they are written and many
632 # chunks only take microseconds to decompress, so performance is
632 # chunks only take microseconds to decompress, so performance is
633 # important here.
633 # important here.
634 #
634 #
635 # We can make a few assumptions about revlogs:
635 # We can make a few assumptions about revlogs:
636 #
636 #
637 # 1) the majority of chunks will be compressed (as opposed to inline
637 # 1) the majority of chunks will be compressed (as opposed to inline
638 # raw data).
638 # raw data).
639 # 2) decompressing *any* data will likely by at least 10x slower than
639 # 2) decompressing *any* data will likely by at least 10x slower than
640 # returning raw inline data.
640 # returning raw inline data.
641 # 3) we want to prioritize common and officially supported compression
641 # 3) we want to prioritize common and officially supported compression
642 # engines
642 # engines
643 #
643 #
644 # It follows that we want to optimize for "decompress compressed data
644 # It follows that we want to optimize for "decompress compressed data
645 # when encoded with common and officially supported compression engines"
645 # when encoded with common and officially supported compression engines"
646 # case over "raw data" and "data encoded by less common or non-official
646 # case over "raw data" and "data encoded by less common or non-official
647 # compression engines." That is why we have the inline lookup first
647 # compression engines." That is why we have the inline lookup first
648 # followed by the compengines lookup.
648 # followed by the compengines lookup.
649 #
649 #
650 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
650 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
651 # compressed chunks. And this matters for changelog and manifest reads.
651 # compressed chunks. And this matters for changelog and manifest reads.
652 t = data[0:1]
652 t = data[0:1]
653
653
654 if t == b'x':
654 if t == b'x':
655 try:
655 try:
656 return _zlibdecompress(data)
656 return _zlibdecompress(data)
657 except zlib.error as e:
657 except zlib.error as e:
658 raise error.RevlogError(
658 raise error.RevlogError(
659 _(b'revlog decompress error: %s')
659 _(b'revlog decompress error: %s')
660 % stringutil.forcebytestr(e)
660 % stringutil.forcebytestr(e)
661 )
661 )
662 # '\0' is more common than 'u' so it goes first.
662 # '\0' is more common than 'u' so it goes first.
663 elif t == b'\0':
663 elif t == b'\0':
664 return data
664 return data
665 elif t == b'u':
665 elif t == b'u':
666 return util.buffer(data, 1)
666 return util.buffer(data, 1)
667
667
668 compressor = self._get_decompressor(t)
668 compressor = self._get_decompressor(t)
669
669
670 return compressor.decompress(data)
670 return compressor.decompress(data)
671
671
672 @contextlib.contextmanager
672 @contextlib.contextmanager
673 def reading(self):
673 def reading(self):
674 """Context manager that keeps data and sidedata files open for reading"""
674 """Context manager that keeps data and sidedata files open for reading"""
675 if len(self.index) == 0:
675 if len(self.index) == 0:
676 yield # nothing to be read
676 yield # nothing to be read
677 elif self._delay_buffer is not None and self.inline:
677 elif self._delay_buffer is not None and self.inline:
678 msg = "revlog with delayed write should not be inline"
678 msg = "revlog with delayed write should not be inline"
679 raise error.ProgrammingError(msg)
679 raise error.ProgrammingError(msg)
680 else:
680 else:
681 with self._segmentfile.reading():
681 with self._segmentfile.reading():
682 with self._segmentfile_sidedata.reading():
682 with self._segmentfile_sidedata.reading():
683 yield
683 yield
684
684
685 @property
685 @property
686 def is_writing(self):
686 def is_writing(self):
687 """True is a writing context is open"""
687 """True is a writing context is open"""
688 return self._writinghandles is not None
688 return self._writinghandles is not None
689
689
690 @property
690 @property
691 def is_open(self):
691 def is_open(self):
692 """True if any file handle is being held
692 """True if any file handle is being held
693
693
694 Used for assert and debug in the python code"""
694 Used for assert and debug in the python code"""
695 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
695 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
696
696
697 @contextlib.contextmanager
697 @contextlib.contextmanager
698 def writing(self, transaction, data_end=None, sidedata_end=None):
698 def writing(self, transaction, data_end=None, sidedata_end=None):
699 """Open the revlog files for writing
699 """Open the revlog files for writing
700
700
701 Add content to a revlog should be done within such context.
701 Add content to a revlog should be done within such context.
702 """
702 """
703 if self.is_writing:
703 if self.is_writing:
704 yield
704 yield
705 else:
705 else:
706 ifh = dfh = sdfh = None
706 ifh = dfh = sdfh = None
707 try:
707 try:
708 r = len(self.index)
708 r = len(self.index)
709 # opening the data file.
709 # opening the data file.
710 dsize = 0
710 dsize = 0
711 if r:
711 if r:
712 dsize = self.end(r - 1)
712 dsize = self.end(r - 1)
713 dfh = None
713 dfh = None
714 if not self.inline:
714 if not self.inline:
715 try:
715 try:
716 dfh = self.opener(self.data_file, mode=b"r+")
716 dfh = self.opener(self.data_file, mode=b"r+")
717 if data_end is None:
717 if data_end is None:
718 dfh.seek(0, os.SEEK_END)
718 dfh.seek(0, os.SEEK_END)
719 else:
719 else:
720 dfh.seek(data_end, os.SEEK_SET)
720 dfh.seek(data_end, os.SEEK_SET)
721 except FileNotFoundError:
721 except FileNotFoundError:
722 dfh = self.opener(self.data_file, mode=b"w+")
722 dfh = self.opener(self.data_file, mode=b"w+")
723 transaction.add(self.data_file, dsize)
723 transaction.add(self.data_file, dsize)
724 if self.sidedata_file is not None:
724 if self.sidedata_file is not None:
725 assert sidedata_end is not None
725 assert sidedata_end is not None
726 # revlog-v2 does not inline, help Pytype
726 # revlog-v2 does not inline, help Pytype
727 assert dfh is not None
727 assert dfh is not None
728 try:
728 try:
729 sdfh = self.opener(self.sidedata_file, mode=b"r+")
729 sdfh = self.opener(self.sidedata_file, mode=b"r+")
730 dfh.seek(sidedata_end, os.SEEK_SET)
730 dfh.seek(sidedata_end, os.SEEK_SET)
731 except FileNotFoundError:
731 except FileNotFoundError:
732 sdfh = self.opener(self.sidedata_file, mode=b"w+")
732 sdfh = self.opener(self.sidedata_file, mode=b"w+")
733 transaction.add(self.sidedata_file, sidedata_end)
733 transaction.add(self.sidedata_file, sidedata_end)
734
734
735 # opening the index file.
735 # opening the index file.
736 isize = r * self.index.entry_size
736 isize = r * self.index.entry_size
737 ifh = self.__index_write_fp()
737 ifh = self.__index_write_fp()
738 if self.inline:
738 if self.inline:
739 transaction.add(self.index_file, dsize + isize)
739 transaction.add(self.index_file, dsize + isize)
740 else:
740 else:
741 transaction.add(self.index_file, isize)
741 transaction.add(self.index_file, isize)
742 # exposing all file handle for writing.
742 # exposing all file handle for writing.
743 self._writinghandles = (ifh, dfh, sdfh)
743 self._writinghandles = (ifh, dfh, sdfh)
744 self._segmentfile.writing_handle = ifh if self.inline else dfh
744 self._segmentfile.writing_handle = ifh if self.inline else dfh
745 self._segmentfile_sidedata.writing_handle = sdfh
745 self._segmentfile_sidedata.writing_handle = sdfh
746 yield
746 yield
747 finally:
747 finally:
748 self._writinghandles = None
748 self._writinghandles = None
749 self._segmentfile.writing_handle = None
749 self._segmentfile.writing_handle = None
750 self._segmentfile_sidedata.writing_handle = None
750 self._segmentfile_sidedata.writing_handle = None
751 if dfh is not None:
751 if dfh is not None:
752 dfh.close()
752 dfh.close()
753 if sdfh is not None:
753 if sdfh is not None:
754 sdfh.close()
754 sdfh.close()
755 # closing the index file last to avoid exposing referent to
755 # closing the index file last to avoid exposing referent to
756 # potential unflushed data content.
756 # potential unflushed data content.
757 if ifh is not None:
757 if ifh is not None:
758 ifh.close()
758 ifh.close()
759
759
760 def __index_write_fp(self, index_end=None):
760 def __index_write_fp(self, index_end=None):
761 """internal method to open the index file for writing
761 """internal method to open the index file for writing
762
762
763 You should not use this directly and use `_writing` instead
763 You should not use this directly and use `_writing` instead
764 """
764 """
765 try:
765 try:
766 if self._delay_buffer is None:
766 if self._delay_buffer is None:
767 f = self.opener(
767 f = self.opener(
768 self.index_file,
768 self.index_file,
769 mode=b"r+",
769 mode=b"r+",
770 checkambig=self.data_config.check_ambig,
770 checkambig=self.data_config.check_ambig,
771 )
771 )
772 else:
772 else:
773 # check_ambig affect we way we open file for writing, however
773 # check_ambig affect we way we open file for writing, however
774 # here, we do not actually open a file for writting as write
774 # here, we do not actually open a file for writting as write
775 # will appened to a delay_buffer. So check_ambig is not
775 # will appened to a delay_buffer. So check_ambig is not
776 # meaningful and unneeded here.
776 # meaningful and unneeded here.
777 f = randomaccessfile.appender(
777 f = randomaccessfile.appender(
778 self.opener, self.index_file, b"r+", self._delay_buffer
778 self.opener, self.index_file, b"r+", self._delay_buffer
779 )
779 )
780 if index_end is None:
780 if index_end is None:
781 f.seek(0, os.SEEK_END)
781 f.seek(0, os.SEEK_END)
782 else:
782 else:
783 f.seek(index_end, os.SEEK_SET)
783 f.seek(index_end, os.SEEK_SET)
784 return f
784 return f
785 except FileNotFoundError:
785 except FileNotFoundError:
786 if self._delay_buffer is None:
786 if self._delay_buffer is None:
787 return self.opener(
787 return self.opener(
788 self.index_file,
788 self.index_file,
789 mode=b"w+",
789 mode=b"w+",
790 checkambig=self.data_config.check_ambig,
790 checkambig=self.data_config.check_ambig,
791 )
791 )
792 else:
792 else:
793 return randomaccessfile.appender(
793 return randomaccessfile.appender(
794 self.opener, self.index_file, b"w+", self._delay_buffer
794 self.opener, self.index_file, b"w+", self._delay_buffer
795 )
795 )
796
796
797 def __index_new_fp(self):
797 def __index_new_fp(self):
798 """internal method to create a new index file for writing
798 """internal method to create a new index file for writing
799
799
800 You should not use this unless you are upgrading from inline revlog
800 You should not use this unless you are upgrading from inline revlog
801 """
801 """
802 return self.opener(
802 return self.opener(
803 self.index_file,
803 self.index_file,
804 mode=b"w",
804 mode=b"w",
805 checkambig=self.data_config.check_ambig,
805 checkambig=self.data_config.check_ambig,
806 )
806 )
807
807
808 def split_inline(self, tr, header, new_index_file_path=None):
808 def split_inline(self, tr, header, new_index_file_path=None):
809 """split the data of an inline revlog into an index and a data file"""
809 """split the data of an inline revlog into an index and a data file"""
810 assert self._delay_buffer is None
810 assert self._delay_buffer is None
811 existing_handles = False
811 existing_handles = False
812 if self._writinghandles is not None:
812 if self._writinghandles is not None:
813 existing_handles = True
813 existing_handles = True
814 fp = self._writinghandles[0]
814 fp = self._writinghandles[0]
815 fp.flush()
815 fp.flush()
816 fp.close()
816 fp.close()
817 # We can't use the cached file handle after close(). So prevent
817 # We can't use the cached file handle after close(). So prevent
818 # its usage.
818 # its usage.
819 self._writinghandles = None
819 self._writinghandles = None
820 self._segmentfile.writing_handle = None
820 self._segmentfile.writing_handle = None
821 # No need to deal with sidedata writing handle as it is only
821 # No need to deal with sidedata writing handle as it is only
822 # relevant with revlog-v2 which is never inline, not reaching
822 # relevant with revlog-v2 which is never inline, not reaching
823 # this code
823 # this code
824
824
825 new_dfh = self.opener(self.data_file, mode=b"w+")
825 new_dfh = self.opener(self.data_file, mode=b"w+")
826 new_dfh.truncate(0) # drop any potentially existing data
826 new_dfh.truncate(0) # drop any potentially existing data
827 try:
827 try:
828 with self.reading():
828 with self.reading():
829 for r in range(len(self.index)):
829 for r in range(len(self.index)):
830 new_dfh.write(self.get_segment_for_revs(r, r)[1])
830 new_dfh.write(self.get_segment_for_revs(r, r)[1])
831 new_dfh.flush()
831 new_dfh.flush()
832
832
833 if new_index_file_path is not None:
833 if new_index_file_path is not None:
834 self.index_file = new_index_file_path
834 self.index_file = new_index_file_path
835 with self.__index_new_fp() as fp:
835 with self.__index_new_fp() as fp:
836 self.inline = False
836 self.inline = False
837 for i in range(len(self.index)):
837 for i in range(len(self.index)):
838 e = self.index.entry_binary(i)
838 e = self.index.entry_binary(i)
839 if i == 0:
839 if i == 0:
840 packed_header = self.index.pack_header(header)
840 packed_header = self.index.pack_header(header)
841 e = packed_header + e
841 e = packed_header + e
842 fp.write(e)
842 fp.write(e)
843
843
844 # If we don't use side-write, the temp file replace the real
844 # If we don't use side-write, the temp file replace the real
845 # index when we exit the context manager
845 # index when we exit the context manager
846
846
847 self._segmentfile = randomaccessfile.randomaccessfile(
847 self._segmentfile = randomaccessfile.randomaccessfile(
848 self.opener,
848 self.opener,
849 self.data_file,
849 self.data_file,
850 self.data_config.chunk_cache_size,
850 self.data_config.chunk_cache_size,
851 )
851 )
852
852
853 if existing_handles:
853 if existing_handles:
854 # switched from inline to conventional reopen the index
854 # switched from inline to conventional reopen the index
855 ifh = self.__index_write_fp()
855 ifh = self.__index_write_fp()
856 self._writinghandles = (ifh, new_dfh, None)
856 self._writinghandles = (ifh, new_dfh, None)
857 self._segmentfile.writing_handle = new_dfh
857 self._segmentfile.writing_handle = new_dfh
858 new_dfh = None
858 new_dfh = None
859 # No need to deal with sidedata writing handle as it is only
859 # No need to deal with sidedata writing handle as it is only
860 # relevant with revlog-v2 which is never inline, not reaching
860 # relevant with revlog-v2 which is never inline, not reaching
861 # this code
861 # this code
862 finally:
862 finally:
863 if new_dfh is not None:
863 if new_dfh is not None:
864 new_dfh.close()
864 new_dfh.close()
865 return self.index_file
865 return self.index_file
866
866
867 def get_segment_for_revs(self, startrev, endrev):
867 def get_segment_for_revs(self, startrev, endrev):
868 """Obtain a segment of raw data corresponding to a range of revisions.
868 """Obtain a segment of raw data corresponding to a range of revisions.
869
869
870 Accepts the start and end revisions and an optional already-open
870 Accepts the start and end revisions and an optional already-open
871 file handle to be used for reading. If the file handle is read, its
871 file handle to be used for reading. If the file handle is read, its
872 seek position will not be preserved.
872 seek position will not be preserved.
873
873
874 Requests for data may be satisfied by a cache.
874 Requests for data may be satisfied by a cache.
875
875
876 Returns a 2-tuple of (offset, data) for the requested range of
876 Returns a 2-tuple of (offset, data) for the requested range of
877 revisions. Offset is the integer offset from the beginning of the
877 revisions. Offset is the integer offset from the beginning of the
878 revlog and data is a str or buffer of the raw byte data.
878 revlog and data is a str or buffer of the raw byte data.
879
879
880 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
880 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
881 to determine where each revision's data begins and ends.
881 to determine where each revision's data begins and ends.
882
882
883 API: we should consider making this a private part of the InnerRevlog
883 API: we should consider making this a private part of the InnerRevlog
884 at some point.
884 at some point.
885 """
885 """
886 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
886 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
887 # (functions are expensive).
887 # (functions are expensive).
888 index = self.index
888 index = self.index
889 istart = index[startrev]
889 istart = index[startrev]
890 start = int(istart[0] >> 16)
890 start = int(istart[0] >> 16)
891 if startrev == endrev:
891 if startrev == endrev:
892 end = start + istart[1]
892 end = start + istart[1]
893 else:
893 else:
894 iend = index[endrev]
894 iend = index[endrev]
895 end = int(iend[0] >> 16) + iend[1]
895 end = int(iend[0] >> 16) + iend[1]
896
896
897 if self.inline:
897 if self.inline:
898 start += (startrev + 1) * self.index.entry_size
898 start += (startrev + 1) * self.index.entry_size
899 end += (endrev + 1) * self.index.entry_size
899 end += (endrev + 1) * self.index.entry_size
900 length = end - start
900 length = end - start
901
901
902 return start, self._segmentfile.read_chunk(start, length)
902 return start, self._segmentfile.read_chunk(start, length)
903
903
904 def _chunk(self, rev):
904 def _chunk(self, rev):
905 """Obtain a single decompressed chunk for a revision.
905 """Obtain a single decompressed chunk for a revision.
906
906
907 Accepts an integer revision and an optional already-open file handle
907 Accepts an integer revision and an optional already-open file handle
908 to be used for reading. If used, the seek position of the file will not
908 to be used for reading. If used, the seek position of the file will not
909 be preserved.
909 be preserved.
910
910
911 Returns a str holding uncompressed data for the requested revision.
911 Returns a str holding uncompressed data for the requested revision.
912 """
912 """
913 if self._uncompressed_chunk_cache is not None:
913 if self._uncompressed_chunk_cache is not None:
914 uncomp = self._uncompressed_chunk_cache.get(rev)
914 uncomp = self._uncompressed_chunk_cache.get(rev)
915 if uncomp is not None:
915 if uncomp is not None:
916 return uncomp
916 return uncomp
917
917
918 compression_mode = self.index[rev][10]
918 compression_mode = self.index[rev][10]
919 data = self.get_segment_for_revs(rev, rev)[1]
919 data = self.get_segment_for_revs(rev, rev)[1]
920 if compression_mode == COMP_MODE_PLAIN:
920 if compression_mode == COMP_MODE_PLAIN:
921 uncomp = data
921 uncomp = data
922 elif compression_mode == COMP_MODE_DEFAULT:
922 elif compression_mode == COMP_MODE_DEFAULT:
923 uncomp = self._decompressor(data)
923 uncomp = self._decompressor(data)
924 elif compression_mode == COMP_MODE_INLINE:
924 elif compression_mode == COMP_MODE_INLINE:
925 uncomp = self.decompress(data)
925 uncomp = self.decompress(data)
926 else:
926 else:
927 msg = b'unknown compression mode %d'
927 msg = b'unknown compression mode %d'
928 msg %= compression_mode
928 msg %= compression_mode
929 raise error.RevlogError(msg)
929 raise error.RevlogError(msg)
930 if self._uncompressed_chunk_cache is not None:
930 if self._uncompressed_chunk_cache is not None:
931 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
931 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
932 return uncomp
932 return uncomp
933
933
934 def _chunks(self, revs, targetsize=None):
934 def _chunks(self, revs, targetsize=None):
935 """Obtain decompressed chunks for the specified revisions.
935 """Obtain decompressed chunks for the specified revisions.
936
936
937 Accepts an iterable of numeric revisions that are assumed to be in
937 Accepts an iterable of numeric revisions that are assumed to be in
938 ascending order.
938 ascending order.
939
939
940 This function is similar to calling ``self._chunk()`` multiple times,
940 This function is similar to calling ``self._chunk()`` multiple times,
941 but is faster.
941 but is faster.
942
942
943 Returns a list with decompressed data for each requested revision.
943 Returns a list with decompressed data for each requested revision.
944 """
944 """
945 if not revs:
945 if not revs:
946 return []
946 return []
947 start = self.start
947 start = self.start
948 length = self.length
948 length = self.length
949 inline = self.inline
949 inline = self.inline
950 iosize = self.index.entry_size
950 iosize = self.index.entry_size
951 buffer = util.buffer
951 buffer = util.buffer
952
952
953 fetched_revs = []
953 fetched_revs = []
954 fadd = fetched_revs.append
954 fadd = fetched_revs.append
955
955
956 chunks = []
956 chunks = []
957 ladd = chunks.append
957 ladd = chunks.append
958
958
959 if self._uncompressed_chunk_cache is None:
959 if self._uncompressed_chunk_cache is None:
960 fetched_revs = revs
960 fetched_revs = revs
961 else:
961 else:
962 for rev in revs:
962 for rev in revs:
963 cached_value = self._uncompressed_chunk_cache.get(rev)
963 cached_value = self._uncompressed_chunk_cache.get(rev)
964 if cached_value is None:
964 if cached_value is None:
965 fadd(rev)
965 fadd(rev)
966 else:
966 else:
967 ladd((rev, cached_value))
967 ladd((rev, cached_value))
968
968
969 if not fetched_revs:
969 if not fetched_revs:
970 slicedchunks = ()
970 slicedchunks = ()
971 elif not self.data_config.with_sparse_read:
971 elif not self.data_config.with_sparse_read:
972 slicedchunks = (fetched_revs,)
972 slicedchunks = (fetched_revs,)
973 else:
973 else:
974 slicedchunks = deltautil.slicechunk(
974 slicedchunks = deltautil.slicechunk(
975 self,
975 self,
976 fetched_revs,
976 fetched_revs,
977 targetsize=targetsize,
977 targetsize=targetsize,
978 )
978 )
979
979
980 for revschunk in slicedchunks:
980 for revschunk in slicedchunks:
981 firstrev = revschunk[0]
981 firstrev = revschunk[0]
982 # Skip trailing revisions with empty diff
982 # Skip trailing revisions with empty diff
983 for lastrev in revschunk[::-1]:
983 for lastrev in revschunk[::-1]:
984 if length(lastrev) != 0:
984 if length(lastrev) != 0:
985 break
985 break
986
986
987 try:
987 try:
988 offset, data = self.get_segment_for_revs(firstrev, lastrev)
988 offset, data = self.get_segment_for_revs(firstrev, lastrev)
989 except OverflowError:
989 except OverflowError:
990 # issue4215 - we can't cache a run of chunks greater than
990 # issue4215 - we can't cache a run of chunks greater than
991 # 2G on Windows
991 # 2G on Windows
992 for rev in revschunk:
992 for rev in revschunk:
993 ladd((rev, self._chunk(rev)))
993 ladd((rev, self._chunk(rev)))
994
994
995 decomp = self.decompress
995 decomp = self.decompress
996 # self._decompressor might be None, but will not be used in that case
996 # self._decompressor might be None, but will not be used in that case
997 def_decomp = self._decompressor
997 def_decomp = self._decompressor
998 for rev in revschunk:
998 for rev in revschunk:
999 chunkstart = start(rev)
999 chunkstart = start(rev)
1000 if inline:
1000 if inline:
1001 chunkstart += (rev + 1) * iosize
1001 chunkstart += (rev + 1) * iosize
1002 chunklength = length(rev)
1002 chunklength = length(rev)
1003 comp_mode = self.index[rev][10]
1003 comp_mode = self.index[rev][10]
1004 c = buffer(data, chunkstart - offset, chunklength)
1004 c = buffer(data, chunkstart - offset, chunklength)
1005 if comp_mode == COMP_MODE_PLAIN:
1005 if comp_mode == COMP_MODE_PLAIN:
1006 c = c
1006 c = c
1007 elif comp_mode == COMP_MODE_INLINE:
1007 elif comp_mode == COMP_MODE_INLINE:
1008 c = decomp(c)
1008 c = decomp(c)
1009 elif comp_mode == COMP_MODE_DEFAULT:
1009 elif comp_mode == COMP_MODE_DEFAULT:
1010 c = def_decomp(c)
1010 c = def_decomp(c)
1011 else:
1011 else:
1012 msg = b'unknown compression mode %d'
1012 msg = b'unknown compression mode %d'
1013 msg %= comp_mode
1013 msg %= comp_mode
1014 raise error.RevlogError(msg)
1014 raise error.RevlogError(msg)
1015 ladd((rev, c))
1015 ladd((rev, c))
1016 if self._uncompressed_chunk_cache is not None:
1016 if self._uncompressed_chunk_cache is not None:
1017 self._uncompressed_chunk_cache.insert(rev, c, len(c))
1017 self._uncompressed_chunk_cache.insert(rev, c, len(c))
1018
1018
1019 chunks.sort()
1019 chunks.sort()
1020 return [x[1] for x in chunks]
1020 return [x[1] for x in chunks]
1021
1021
1022 def raw_text(self, node, rev):
1022 def raw_text(self, node, rev):
1023 """return the possibly unvalidated rawtext for a revision
1023 """return the possibly unvalidated rawtext for a revision
1024
1024
1025 returns rawtext
1025 returns rawtext
1026 """
1026 """
1027
1027
1028 # revision in the cache (could be useful to apply delta)
1028 # revision in the cache (could be useful to apply delta)
1029 cachedrev = None
1029 cachedrev = None
1030 # An intermediate text to apply deltas to
1030 # An intermediate text to apply deltas to
1031 basetext = None
1031 basetext = None
1032
1032
1033 # Check if we have the entry in cache
1033 # Check if we have the entry in cache
1034 # The cache entry looks like (node, rev, rawtext)
1034 # The cache entry looks like (node, rev, rawtext)
1035 if self._revisioncache:
1035 if self._revisioncache:
1036 cachedrev = self._revisioncache[1]
1036 cachedrev = self._revisioncache[1]
1037
1037
1038 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1038 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1039 if stopped:
1039 if stopped:
1040 basetext = self._revisioncache[2]
1040 basetext = self._revisioncache[2]
1041
1041
1042 # drop cache to save memory, the caller is expected to
1042 # drop cache to save memory, the caller is expected to
1043 # update self._inner._revisioncache after validating the text
1043 # update self._inner._revisioncache after validating the text
1044 self._revisioncache = None
1044 self._revisioncache = None
1045
1045
1046 targetsize = None
1046 targetsize = None
1047 rawsize = self.index[rev][2]
1047 rawsize = self.index[rev][2]
1048 if 0 <= rawsize:
1048 if 0 <= rawsize:
1049 targetsize = 4 * rawsize
1049 targetsize = 4 * rawsize
1050
1050
1051 if self._uncompressed_chunk_cache is not None:
1051 if self._uncompressed_chunk_cache is not None:
1052 # dynamically update the uncompressed_chunk_cache size to the
1052 # dynamically update the uncompressed_chunk_cache size to the
1053 # largest revision we saw in this revlog.
1053 # largest revision we saw in this revlog.
1054 factor = self.data_config.uncompressed_cache_factor
1054 factor = self.data_config.uncompressed_cache_factor
1055 candidate_size = rawsize * factor
1055 candidate_size = rawsize * factor
1056 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1056 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1057 self._uncompressed_chunk_cache.maxcost = candidate_size
1057 self._uncompressed_chunk_cache.maxcost = candidate_size
1058
1058
1059 bins = self._chunks(chain, targetsize=targetsize)
1059 bins = self._chunks(chain, targetsize=targetsize)
1060 if basetext is None:
1060 if basetext is None:
1061 basetext = bytes(bins[0])
1061 basetext = bytes(bins[0])
1062 bins = bins[1:]
1062 bins = bins[1:]
1063
1063
1064 rawtext = mdiff.patches(basetext, bins)
1064 rawtext = mdiff.patches(basetext, bins)
1065 del basetext # let us have a chance to free memory early
1065 del basetext # let us have a chance to free memory early
1066 return rawtext
1066 return rawtext
1067
1067
1068 def sidedata(self, rev, sidedata_end):
1068 def sidedata(self, rev, sidedata_end):
1069 """Return the sidedata for a given revision number."""
1069 """Return the sidedata for a given revision number."""
1070 index_entry = self.index[rev]
1070 index_entry = self.index[rev]
1071 sidedata_offset = index_entry[8]
1071 sidedata_offset = index_entry[8]
1072 sidedata_size = index_entry[9]
1072 sidedata_size = index_entry[9]
1073
1073
1074 if self.inline:
1074 if self.inline:
1075 sidedata_offset += self.index.entry_size * (1 + rev)
1075 sidedata_offset += self.index.entry_size * (1 + rev)
1076 if sidedata_size == 0:
1076 if sidedata_size == 0:
1077 return {}
1077 return {}
1078
1078
1079 if sidedata_end < sidedata_offset + sidedata_size:
1079 if sidedata_end < sidedata_offset + sidedata_size:
1080 filename = self.sidedata_file
1080 filename = self.sidedata_file
1081 end = sidedata_end
1081 end = sidedata_end
1082 offset = sidedata_offset
1082 offset = sidedata_offset
1083 length = sidedata_size
1083 length = sidedata_size
1084 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1084 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1085 raise error.RevlogError(m)
1085 raise error.RevlogError(m)
1086
1086
1087 comp_segment = self._segmentfile_sidedata.read_chunk(
1087 comp_segment = self._segmentfile_sidedata.read_chunk(
1088 sidedata_offset, sidedata_size
1088 sidedata_offset, sidedata_size
1089 )
1089 )
1090
1090
1091 comp = self.index[rev][11]
1091 comp = self.index[rev][11]
1092 if comp == COMP_MODE_PLAIN:
1092 if comp == COMP_MODE_PLAIN:
1093 segment = comp_segment
1093 segment = comp_segment
1094 elif comp == COMP_MODE_DEFAULT:
1094 elif comp == COMP_MODE_DEFAULT:
1095 segment = self._decompressor(comp_segment)
1095 segment = self._decompressor(comp_segment)
1096 elif comp == COMP_MODE_INLINE:
1096 elif comp == COMP_MODE_INLINE:
1097 segment = self.decompress(comp_segment)
1097 segment = self.decompress(comp_segment)
1098 else:
1098 else:
1099 msg = b'unknown compression mode %d'
1099 msg = b'unknown compression mode %d'
1100 msg %= comp
1100 msg %= comp
1101 raise error.RevlogError(msg)
1101 raise error.RevlogError(msg)
1102
1102
1103 sidedata = sidedatautil.deserialize_sidedata(segment)
1103 sidedata = sidedatautil.deserialize_sidedata(segment)
1104 return sidedata
1104 return sidedata
1105
1105
1106 def write_entry(
1106 def write_entry(
1107 self,
1107 self,
1108 transaction,
1108 transaction,
1109 entry,
1109 entry,
1110 data,
1110 data,
1111 link,
1111 link,
1112 offset,
1112 offset,
1113 sidedata,
1113 sidedata,
1114 sidedata_offset,
1114 sidedata_offset,
1115 index_end,
1115 index_end,
1116 data_end,
1116 data_end,
1117 sidedata_end,
1117 sidedata_end,
1118 ):
1118 ):
1119 # Files opened in a+ mode have inconsistent behavior on various
1119 # Files opened in a+ mode have inconsistent behavior on various
1120 # platforms. Windows requires that a file positioning call be made
1120 # platforms. Windows requires that a file positioning call be made
1121 # when the file handle transitions between reads and writes. See
1121 # when the file handle transitions between reads and writes. See
1122 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1122 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1123 # platforms, Python or the platform itself can be buggy. Some versions
1123 # platforms, Python or the platform itself can be buggy. Some versions
1124 # of Solaris have been observed to not append at the end of the file
1124 # of Solaris have been observed to not append at the end of the file
1125 # if the file was seeked to before the end. See issue4943 for more.
1125 # if the file was seeked to before the end. See issue4943 for more.
1126 #
1126 #
1127 # We work around this issue by inserting a seek() before writing.
1127 # We work around this issue by inserting a seek() before writing.
1128 # Note: This is likely not necessary on Python 3. However, because
1128 # Note: This is likely not necessary on Python 3. However, because
1129 # the file handle is reused for reads and may be seeked there, we need
1129 # the file handle is reused for reads and may be seeked there, we need
1130 # to be careful before changing this.
1130 # to be careful before changing this.
1131 if self._writinghandles is None:
1131 if self._writinghandles is None:
1132 msg = b'adding revision outside `revlog._writing` context'
1132 msg = b'adding revision outside `revlog._writing` context'
1133 raise error.ProgrammingError(msg)
1133 raise error.ProgrammingError(msg)
1134 ifh, dfh, sdfh = self._writinghandles
1134 ifh, dfh, sdfh = self._writinghandles
1135 if index_end is None:
1135 if index_end is None:
1136 ifh.seek(0, os.SEEK_END)
1136 ifh.seek(0, os.SEEK_END)
1137 else:
1137 else:
1138 ifh.seek(index_end, os.SEEK_SET)
1138 ifh.seek(index_end, os.SEEK_SET)
1139 if dfh:
1139 if dfh:
1140 if data_end is None:
1140 if data_end is None:
1141 dfh.seek(0, os.SEEK_END)
1141 dfh.seek(0, os.SEEK_END)
1142 else:
1142 else:
1143 dfh.seek(data_end, os.SEEK_SET)
1143 dfh.seek(data_end, os.SEEK_SET)
1144 if sdfh:
1144 if sdfh:
1145 sdfh.seek(sidedata_end, os.SEEK_SET)
1145 sdfh.seek(sidedata_end, os.SEEK_SET)
1146
1146
1147 curr = len(self.index) - 1
1147 curr = len(self.index) - 1
1148 if not self.inline:
1148 if not self.inline:
1149 transaction.add(self.data_file, offset)
1149 transaction.add(self.data_file, offset)
1150 if self.sidedata_file:
1150 if self.sidedata_file:
1151 transaction.add(self.sidedata_file, sidedata_offset)
1151 transaction.add(self.sidedata_file, sidedata_offset)
1152 transaction.add(self.canonical_index_file, curr * len(entry))
1152 transaction.add(self.canonical_index_file, curr * len(entry))
1153 if data[0]:
1153 if data[0]:
1154 dfh.write(data[0])
1154 dfh.write(data[0])
1155 dfh.write(data[1])
1155 dfh.write(data[1])
1156 if sidedata:
1156 if sidedata:
1157 sdfh.write(sidedata)
1157 sdfh.write(sidedata)
1158 if self._delay_buffer is None:
1158 if self._delay_buffer is None:
1159 ifh.write(entry)
1159 ifh.write(entry)
1160 else:
1160 else:
1161 self._delay_buffer.append(entry)
1161 self._delay_buffer.append(entry)
1162 elif self._delay_buffer is not None:
1162 elif self._delay_buffer is not None:
1163 msg = b'invalid delayed write on inline revlog'
1163 msg = b'invalid delayed write on inline revlog'
1164 raise error.ProgrammingError(msg)
1164 raise error.ProgrammingError(msg)
1165 else:
1165 else:
1166 offset += curr * self.index.entry_size
1166 offset += curr * self.index.entry_size
1167 transaction.add(self.canonical_index_file, offset)
1167 transaction.add(self.canonical_index_file, offset)
1168 assert not sidedata
1168 assert not sidedata
1169 ifh.write(entry)
1169 ifh.write(entry)
1170 ifh.write(data[0])
1170 ifh.write(data[0])
1171 ifh.write(data[1])
1171 ifh.write(data[1])
1172 return (
1172 return (
1173 ifh.tell(),
1173 ifh.tell(),
1174 dfh.tell() if dfh else None,
1174 dfh.tell() if dfh else None,
1175 sdfh.tell() if sdfh else None,
1175 sdfh.tell() if sdfh else None,
1176 )
1176 )
1177
1177
1178 def _divert_index(self):
1178 def _divert_index(self):
1179 index_file = self.index_file
1179 index_file = self.index_file
1180 # when we encounter a legacy inline-changelog, split it. However it is
1180 # when we encounter a legacy inline-changelog, split it. However it is
1181 # important to use the expected filename for pending content
1181 # important to use the expected filename for pending content
1182 # (<radix>.a) otherwise hooks won't be seeing the content of the
1182 # (<radix>.a) otherwise hooks won't be seeing the content of the
1183 # pending transaction.
1183 # pending transaction.
1184 if index_file.endswith(b'.s'):
1184 if index_file.endswith(b'.s'):
1185 index_file = self.index_file[:-2]
1185 index_file = self.index_file[:-2]
1186 return index_file + b'.a'
1186 return index_file + b'.a'
1187
1187
1188 def delay(self):
1188 def delay(self):
1189 assert not self.is_open
1189 assert not self.is_open
1190 if self.inline:
1190 if self.inline:
1191 msg = "revlog with delayed write should not be inline"
1191 msg = "revlog with delayed write should not be inline"
1192 raise error.ProgrammingError(msg)
1192 raise error.ProgrammingError(msg)
1193 if self._delay_buffer is not None or self._orig_index_file is not None:
1193 if self._delay_buffer is not None or self._orig_index_file is not None:
1194 # delay or divert already in place
1194 # delay or divert already in place
1195 return None
1195 return None
1196 elif len(self.index) == 0:
1196 elif len(self.index) == 0:
1197 self._orig_index_file = self.index_file
1197 self._orig_index_file = self.index_file
1198 self.index_file = self._divert_index()
1198 self.index_file = self._divert_index()
1199 assert self._orig_index_file is not None
1199 assert self._orig_index_file is not None
1200 assert self.index_file is not None
1200 assert self.index_file is not None
1201 if self.opener.exists(self.index_file):
1201 if self.opener.exists(self.index_file):
1202 self.opener.unlink(self.index_file)
1202 self.opener.unlink(self.index_file)
1203 return self.index_file
1203 return self.index_file
1204 else:
1204 else:
1205 self._delay_buffer = []
1205 self._delay_buffer = []
1206 return None
1206 return None
1207
1207
1208 def write_pending(self):
1208 def write_pending(self):
1209 assert not self.is_open
1209 assert not self.is_open
1210 if self.inline:
1210 if self.inline:
1211 msg = "revlog with delayed write should not be inline"
1211 msg = "revlog with delayed write should not be inline"
1212 raise error.ProgrammingError(msg)
1212 raise error.ProgrammingError(msg)
1213 if self._orig_index_file is not None:
1213 if self._orig_index_file is not None:
1214 return None, True
1214 return None, True
1215 any_pending = False
1215 any_pending = False
1216 pending_index_file = self._divert_index()
1216 pending_index_file = self._divert_index()
1217 if self.opener.exists(pending_index_file):
1217 if self.opener.exists(pending_index_file):
1218 self.opener.unlink(pending_index_file)
1218 self.opener.unlink(pending_index_file)
1219 util.copyfile(
1219 util.copyfile(
1220 self.opener.join(self.index_file),
1220 self.opener.join(self.index_file),
1221 self.opener.join(pending_index_file),
1221 self.opener.join(pending_index_file),
1222 )
1222 )
1223 if self._delay_buffer:
1223 if self._delay_buffer:
1224 with self.opener(pending_index_file, b'r+') as ifh:
1224 with self.opener(pending_index_file, b'r+') as ifh:
1225 ifh.seek(0, os.SEEK_END)
1225 ifh.seek(0, os.SEEK_END)
1226 ifh.write(b"".join(self._delay_buffer))
1226 ifh.write(b"".join(self._delay_buffer))
1227 any_pending = True
1227 any_pending = True
1228 self._delay_buffer = None
1228 self._delay_buffer = None
1229 self._orig_index_file = self.index_file
1229 self._orig_index_file = self.index_file
1230 self.index_file = pending_index_file
1230 self.index_file = pending_index_file
1231 return self.index_file, any_pending
1231 return self.index_file, any_pending
1232
1232
1233 def finalize_pending(self):
1233 def finalize_pending(self):
1234 assert not self.is_open
1234 assert not self.is_open
1235 if self.inline:
1235 if self.inline:
1236 msg = "revlog with delayed write should not be inline"
1236 msg = "revlog with delayed write should not be inline"
1237 raise error.ProgrammingError(msg)
1237 raise error.ProgrammingError(msg)
1238
1238
1239 delay = self._delay_buffer is not None
1239 delay = self._delay_buffer is not None
1240 divert = self._orig_index_file is not None
1240 divert = self._orig_index_file is not None
1241
1241
1242 if delay and divert:
1242 if delay and divert:
1243 assert False, "unreachable"
1243 assert False, "unreachable"
1244 elif delay:
1244 elif delay:
1245 if self._delay_buffer:
1245 if self._delay_buffer:
1246 with self.opener(self.index_file, b'r+') as ifh:
1246 with self.opener(self.index_file, b'r+') as ifh:
1247 ifh.seek(0, os.SEEK_END)
1247 ifh.seek(0, os.SEEK_END)
1248 ifh.write(b"".join(self._delay_buffer))
1248 ifh.write(b"".join(self._delay_buffer))
1249 self._delay_buffer = None
1249 self._delay_buffer = None
1250 elif divert:
1250 elif divert:
1251 if self.opener.exists(self.index_file):
1251 if self.opener.exists(self.index_file):
1252 self.opener.rename(
1252 self.opener.rename(
1253 self.index_file,
1253 self.index_file,
1254 self._orig_index_file,
1254 self._orig_index_file,
1255 checkambig=True,
1255 checkambig=True,
1256 )
1256 )
1257 self.index_file = self._orig_index_file
1257 self.index_file = self._orig_index_file
1258 self._orig_index_file = None
1258 self._orig_index_file = None
1259 else:
1259 else:
1260 msg = b"not delay or divert found on this revlog"
1260 msg = b"not delay or divert found on this revlog"
1261 raise error.ProgrammingError(msg)
1261 raise error.ProgrammingError(msg)
1262 return self.canonical_index_file
1262 return self.canonical_index_file
1263
1263
1264
1264
1265 class revlog:
1265 class revlog:
1266 """
1266 """
1267 the underlying revision storage object
1267 the underlying revision storage object
1268
1268
1269 A revlog consists of two parts, an index and the revision data.
1269 A revlog consists of two parts, an index and the revision data.
1270
1270
1271 The index is a file with a fixed record size containing
1271 The index is a file with a fixed record size containing
1272 information on each revision, including its nodeid (hash), the
1272 information on each revision, including its nodeid (hash), the
1273 nodeids of its parents, the position and offset of its data within
1273 nodeids of its parents, the position and offset of its data within
1274 the data file, and the revision it's based on. Finally, each entry
1274 the data file, and the revision it's based on. Finally, each entry
1275 contains a linkrev entry that can serve as a pointer to external
1275 contains a linkrev entry that can serve as a pointer to external
1276 data.
1276 data.
1277
1277
1278 The revision data itself is a linear collection of data chunks.
1278 The revision data itself is a linear collection of data chunks.
1279 Each chunk represents a revision and is usually represented as a
1279 Each chunk represents a revision and is usually represented as a
1280 delta against the previous chunk. To bound lookup time, runs of
1280 delta against the previous chunk. To bound lookup time, runs of
1281 deltas are limited to about 2 times the length of the original
1281 deltas are limited to about 2 times the length of the original
1282 version data. This makes retrieval of a version proportional to
1282 version data. This makes retrieval of a version proportional to
1283 its size, or O(1) relative to the number of revisions.
1283 its size, or O(1) relative to the number of revisions.
1284
1284
1285 Both pieces of the revlog are written to in an append-only
1285 Both pieces of the revlog are written to in an append-only
1286 fashion, which means we never need to rewrite a file to insert or
1286 fashion, which means we never need to rewrite a file to insert or
1287 remove data, and can use some simple techniques to avoid the need
1287 remove data, and can use some simple techniques to avoid the need
1288 for locking while reading.
1288 for locking while reading.
1289
1289
1290 If checkambig, indexfile is opened with checkambig=True at
1290 If checkambig, indexfile is opened with checkambig=True at
1291 writing, to avoid file stat ambiguity.
1291 writing, to avoid file stat ambiguity.
1292
1292
1293 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1293 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1294 index will be mmapped rather than read if it is larger than the
1294 index will be mmapped rather than read if it is larger than the
1295 configured threshold.
1295 configured threshold.
1296
1296
1297 If censorable is True, the revlog can have censored revisions.
1297 If censorable is True, the revlog can have censored revisions.
1298
1298
1299 If `upperboundcomp` is not None, this is the expected maximal gain from
1299 If `upperboundcomp` is not None, this is the expected maximal gain from
1300 compression for the data content.
1300 compression for the data content.
1301
1301
1302 `concurrencychecker` is an optional function that receives 3 arguments: a
1302 `concurrencychecker` is an optional function that receives 3 arguments: a
1303 file handle, a filename, and an expected position. It should check whether
1303 file handle, a filename, and an expected position. It should check whether
1304 the current position in the file handle is valid, and log/warn/fail (by
1304 the current position in the file handle is valid, and log/warn/fail (by
1305 raising).
1305 raising).
1306
1306
1307 See mercurial/revlogutils/contants.py for details about the content of an
1307 See mercurial/revlogutils/contants.py for details about the content of an
1308 index entry.
1308 index entry.
1309 """
1309 """
1310
1310
1311 _flagserrorclass = error.RevlogError
1311 _flagserrorclass = error.RevlogError
1312
1312
1313 opener: vfsmod.vfs
1313 opener: vfsmod.vfs
1314
1314
1315 @staticmethod
1315 @staticmethod
1316 def is_inline_index(header_bytes):
1316 def is_inline_index(header_bytes):
1317 """Determine if a revlog is inline from the initial bytes of the index"""
1317 """Determine if a revlog is inline from the initial bytes of the index"""
1318 if len(header_bytes) == 0:
1318 if len(header_bytes) == 0:
1319 return True
1319 return True
1320
1320
1321 header = INDEX_HEADER.unpack(header_bytes)[0]
1321 header = INDEX_HEADER.unpack(header_bytes)[0]
1322
1322
1323 _format_flags = header & ~0xFFFF
1323 _format_flags = header & ~0xFFFF
1324 _format_version = header & 0xFFFF
1324 _format_version = header & 0xFFFF
1325
1325
1326 features = FEATURES_BY_VERSION[_format_version]
1326 features = FEATURES_BY_VERSION[_format_version]
1327 return features[b'inline'](_format_flags)
1327 return features[b'inline'](_format_flags)
1328
1328
1329 _docket_file: Optional[bytes]
1329 _docket_file: Optional[bytes]
1330
1330
1331 def __init__(
1331 def __init__(
1332 self,
1332 self,
1333 opener: vfsmod.vfs,
1333 opener: vfsmod.vfs,
1334 target,
1334 target,
1335 radix,
1335 radix,
1336 postfix=None, # only exist for `tmpcensored` now
1336 postfix=None, # only exist for `tmpcensored` now
1337 checkambig=False,
1337 checkambig=False,
1338 mmaplargeindex=False,
1338 mmaplargeindex=False,
1339 censorable=False,
1339 censorable=False,
1340 upperboundcomp=None,
1340 upperboundcomp=None,
1341 persistentnodemap=False,
1341 persistentnodemap=False,
1342 concurrencychecker=None,
1342 concurrencychecker=None,
1343 trypending=False,
1343 trypending=False,
1344 try_split=False,
1344 try_split=False,
1345 canonical_parent_order=True,
1345 canonical_parent_order=True,
1346 data_config=None,
1346 data_config=None,
1347 delta_config=None,
1347 delta_config=None,
1348 feature_config=None,
1348 feature_config=None,
1349 may_inline=True, # may inline new revlog
1349 may_inline=True, # may inline new revlog
1350 ):
1350 ):
1351 """
1351 """
1352 create a revlog object
1352 create a revlog object
1353
1353
1354 opener is a function that abstracts the file opening operation
1354 opener is a function that abstracts the file opening operation
1355 and can be used to implement COW semantics or the like.
1355 and can be used to implement COW semantics or the like.
1356
1356
1357 `target`: a (KIND, ID) tuple that identify the content stored in
1357 `target`: a (KIND, ID) tuple that identify the content stored in
1358 this revlog. It help the rest of the code to understand what the revlog
1358 this revlog. It help the rest of the code to understand what the revlog
1359 is about without having to resort to heuristic and index filename
1359 is about without having to resort to heuristic and index filename
1360 analysis. Note: that this must be reliably be set by normal code, but
1360 analysis. Note: that this must be reliably be set by normal code, but
1361 that test, debug, or performance measurement code might not set this to
1361 that test, debug, or performance measurement code might not set this to
1362 accurate value.
1362 accurate value.
1363 """
1363 """
1364
1364
1365 self.radix = radix
1365 self.radix = radix
1366
1366
1367 self._docket_file = None
1367 self._docket_file = None
1368 self._indexfile = None
1368 self._indexfile = None
1369 self._datafile = None
1369 self._datafile = None
1370 self._sidedatafile = None
1370 self._sidedatafile = None
1371 self._nodemap_file = None
1371 self._nodemap_file = None
1372 self.postfix = postfix
1372 self.postfix = postfix
1373 self._trypending = trypending
1373 self._trypending = trypending
1374 self._try_split = try_split
1374 self._try_split = try_split
1375 self._may_inline = may_inline
1375 self._may_inline = may_inline
1376 self.opener = opener
1376 self.opener = opener
1377 if persistentnodemap:
1377 if persistentnodemap:
1378 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1378 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1379
1379
1380 assert target[0] in ALL_KINDS
1380 assert target[0] in ALL_KINDS
1381 assert len(target) == 2
1381 assert len(target) == 2
1382 self.target = target
1382 self.target = target
1383 if feature_config is not None:
1383 if feature_config is not None:
1384 self.feature_config = feature_config.copy()
1384 self.feature_config = feature_config.copy()
1385 elif b'feature-config' in self.opener.options:
1385 elif b'feature-config' in self.opener.options:
1386 self.feature_config = self.opener.options[b'feature-config'].copy()
1386 self.feature_config = self.opener.options[b'feature-config'].copy()
1387 else:
1387 else:
1388 self.feature_config = FeatureConfig()
1388 self.feature_config = FeatureConfig()
1389 self.feature_config.censorable = censorable
1389 self.feature_config.censorable = censorable
1390 self.feature_config.canonical_parent_order = canonical_parent_order
1390 self.feature_config.canonical_parent_order = canonical_parent_order
1391 if data_config is not None:
1391 if data_config is not None:
1392 self.data_config = data_config.copy()
1392 self.data_config = data_config.copy()
1393 elif b'data-config' in self.opener.options:
1393 elif b'data-config' in self.opener.options:
1394 self.data_config = self.opener.options[b'data-config'].copy()
1394 self.data_config = self.opener.options[b'data-config'].copy()
1395 else:
1395 else:
1396 self.data_config = DataConfig()
1396 self.data_config = DataConfig()
1397 self.data_config.check_ambig = checkambig
1397 self.data_config.check_ambig = checkambig
1398 self.data_config.mmap_large_index = mmaplargeindex
1398 self.data_config.mmap_large_index = mmaplargeindex
1399 if delta_config is not None:
1399 if delta_config is not None:
1400 self.delta_config = delta_config.copy()
1400 self.delta_config = delta_config.copy()
1401 elif b'delta-config' in self.opener.options:
1401 elif b'delta-config' in self.opener.options:
1402 self.delta_config = self.opener.options[b'delta-config'].copy()
1402 self.delta_config = self.opener.options[b'delta-config'].copy()
1403 else:
1403 else:
1404 self.delta_config = DeltaConfig()
1404 self.delta_config = DeltaConfig()
1405 self.delta_config.upper_bound_comp = upperboundcomp
1405 self.delta_config.upper_bound_comp = upperboundcomp
1406
1406
1407 # Maps rev to chain base rev.
1407 # Maps rev to chain base rev.
1408 self._chainbasecache = util.lrucachedict(100)
1408 self._chainbasecache = util.lrucachedict(100)
1409
1409
1410 self.index = None
1410 self.index = None
1411 self._docket = None
1411 self._docket = None
1412 self._nodemap_docket = None
1412 self._nodemap_docket = None
1413 # Mapping of partial identifiers to full nodes.
1413 # Mapping of partial identifiers to full nodes.
1414 self._pcache = {}
1414 self._pcache = {}
1415
1415
1416 # other optionnals features
1416 # other optionnals features
1417
1417
1418 # Make copy of flag processors so each revlog instance can support
1418 # Make copy of flag processors so each revlog instance can support
1419 # custom flags.
1419 # custom flags.
1420 self._flagprocessors = dict(flagutil.flagprocessors)
1420 self._flagprocessors = dict(flagutil.flagprocessors)
1421 # prevent nesting of addgroup
1421 # prevent nesting of addgroup
1422 self._adding_group = None
1422 self._adding_group = None
1423
1423
1424 chunk_cache = self._loadindex()
1424 chunk_cache = self._loadindex()
1425 self._load_inner(chunk_cache)
1425 self._load_inner(chunk_cache)
1426 self._concurrencychecker = concurrencychecker
1426 self._concurrencychecker = concurrencychecker
1427
1427
1428 def _init_opts(self):
1428 def _init_opts(self):
1429 """process options (from above/config) to setup associated default revlog mode
1429 """process options (from above/config) to setup associated default revlog mode
1430
1430
1431 These values might be affected when actually reading on disk information.
1431 These values might be affected when actually reading on disk information.
1432
1432
1433 The relevant values are returned for use in _loadindex().
1433 The relevant values are returned for use in _loadindex().
1434
1434
1435 * newversionflags:
1435 * newversionflags:
1436 version header to use if we need to create a new revlog
1436 version header to use if we need to create a new revlog
1437
1437
1438 * mmapindexthreshold:
1438 * mmapindexthreshold:
1439 minimal index size for start to use mmap
1439 minimal index size for start to use mmap
1440
1440
1441 * force_nodemap:
1441 * force_nodemap:
1442 force the usage of a "development" version of the nodemap code
1442 force the usage of a "development" version of the nodemap code
1443 """
1443 """
1444 opts = self.opener.options
1444 opts = self.opener.options
1445
1445
1446 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1446 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1447 new_header = CHANGELOGV2
1447 new_header = CHANGELOGV2
1448 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1448 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1449 self.feature_config.compute_rank = compute_rank
1449 self.feature_config.compute_rank = compute_rank
1450 elif b'revlogv2' in opts:
1450 elif b'revlogv2' in opts:
1451 new_header = REVLOGV2
1451 new_header = REVLOGV2
1452 elif b'revlogv1' in opts:
1452 elif b'revlogv1' in opts:
1453 new_header = REVLOGV1
1453 new_header = REVLOGV1
1454 if self._may_inline:
1454 if self._may_inline:
1455 new_header |= FLAG_INLINE_DATA
1455 new_header |= FLAG_INLINE_DATA
1456 if b'generaldelta' in opts:
1456 if b'generaldelta' in opts:
1457 new_header |= FLAG_GENERALDELTA
1457 new_header |= FLAG_GENERALDELTA
1458 elif b'revlogv0' in self.opener.options:
1458 elif b'revlogv0' in self.opener.options:
1459 new_header = REVLOGV0
1459 new_header = REVLOGV0
1460 else:
1460 else:
1461 new_header = REVLOG_DEFAULT_VERSION
1461 new_header = REVLOG_DEFAULT_VERSION
1462
1462
1463 mmapindexthreshold = None
1463 mmapindexthreshold = None
1464 if self.data_config.mmap_large_index:
1464 if self.data_config.mmap_large_index:
1465 mmapindexthreshold = self.data_config.mmap_index_threshold
1465 mmapindexthreshold = self.data_config.mmap_index_threshold
1466 if self.feature_config.enable_ellipsis:
1466 if self.feature_config.enable_ellipsis:
1467 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1467 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1468
1468
1469 # revlog v0 doesn't have flag processors
1469 # revlog v0 doesn't have flag processors
1470 for flag, processor in opts.get(b'flagprocessors', {}).items():
1470 for flag, processor in opts.get(b'flagprocessors', {}).items():
1471 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1471 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1472
1472
1473 chunk_cache_size = self.data_config.chunk_cache_size
1473 chunk_cache_size = self.data_config.chunk_cache_size
1474 if chunk_cache_size <= 0:
1474 if chunk_cache_size <= 0:
1475 raise error.RevlogError(
1475 raise error.RevlogError(
1476 _(b'revlog chunk cache size %r is not greater than 0')
1476 _(b'revlog chunk cache size %r is not greater than 0')
1477 % chunk_cache_size
1477 % chunk_cache_size
1478 )
1478 )
1479 elif chunk_cache_size & (chunk_cache_size - 1):
1479 elif chunk_cache_size & (chunk_cache_size - 1):
1480 raise error.RevlogError(
1480 raise error.RevlogError(
1481 _(b'revlog chunk cache size %r is not a power of 2')
1481 _(b'revlog chunk cache size %r is not a power of 2')
1482 % chunk_cache_size
1482 % chunk_cache_size
1483 )
1483 )
1484 force_nodemap = opts.get(b'devel-force-nodemap', False)
1484 force_nodemap = opts.get(b'devel-force-nodemap', False)
1485 return new_header, mmapindexthreshold, force_nodemap
1485 return new_header, mmapindexthreshold, force_nodemap
1486
1486
1487 def _get_data(self, filepath, mmap_threshold, size=None):
1487 def _get_data(self, filepath, mmap_threshold, size=None):
1488 """return a file content with or without mmap
1488 """return a file content with or without mmap
1489
1489
1490 If the file is missing return the empty string"""
1490 If the file is missing return the empty string"""
1491 try:
1491 try:
1492 with self.opener(filepath) as fp:
1492 with self.opener(filepath) as fp:
1493 if mmap_threshold is not None:
1493 if mmap_threshold is not None:
1494 file_size = self.opener.fstat(fp).st_size
1494 file_size = self.opener.fstat(fp).st_size
1495 if (
1495 if (
1496 file_size >= mmap_threshold
1496 file_size >= mmap_threshold
1497 and self.opener.is_mmap_safe(filepath)
1497 and self.opener.is_mmap_safe(filepath)
1498 ):
1498 ):
1499 if size is not None:
1499 if size is not None:
1500 # avoid potentiel mmap crash
1500 # avoid potentiel mmap crash
1501 size = min(file_size, size)
1501 size = min(file_size, size)
1502 # TODO: should .close() to release resources without
1502 # TODO: should .close() to release resources without
1503 # relying on Python GC
1503 # relying on Python GC
1504 if size is None:
1504 if size is None:
1505 return util.buffer(util.mmapread(fp))
1505 return util.buffer(util.mmapread(fp))
1506 else:
1506 else:
1507 return util.buffer(util.mmapread(fp, size))
1507 return util.buffer(util.mmapread(fp, size))
1508 if size is None:
1508 if size is None:
1509 return fp.read()
1509 return fp.read()
1510 else:
1510 else:
1511 return fp.read(size)
1511 return fp.read(size)
1512 except FileNotFoundError:
1512 except FileNotFoundError:
1513 return b''
1513 return b''
1514
1514
1515 def get_streams(self, max_linkrev, force_inline=False):
1515 def get_streams(self, max_linkrev, force_inline=False):
1516 """return a list of streams that represent this revlog
1516 """return a list of streams that represent this revlog
1517
1517
1518 This is used by stream-clone to do bytes to bytes copies of a repository.
1518 This is used by stream-clone to do bytes to bytes copies of a repository.
1519
1519
1520 This streams data for all revisions that refer to a changelog revision up
1520 This streams data for all revisions that refer to a changelog revision up
1521 to `max_linkrev`.
1521 to `max_linkrev`.
1522
1522
1523 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1523 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1524
1524
1525 It returns is a list of three-tuple:
1525 It returns is a list of three-tuple:
1526
1526
1527 [
1527 [
1528 (filename, bytes_stream, stream_size),
1528 (filename, bytes_stream, stream_size),
1529
1529
1530 ]
1530 ]
1531 """
1531 """
1532 n = len(self)
1532 n = len(self)
1533 index = self.index
1533 index = self.index
1534 while n > 0:
1534 while n > 0:
1535 linkrev = index[n - 1][4]
1535 linkrev = index[n - 1][4]
1536 if linkrev < max_linkrev:
1536 if linkrev < max_linkrev:
1537 break
1537 break
1538 # note: this loop will rarely go through multiple iterations, since
1538 # note: this loop will rarely go through multiple iterations, since
1539 # it only traverses commits created during the current streaming
1539 # it only traverses commits created during the current streaming
1540 # pull operation.
1540 # pull operation.
1541 #
1541 #
1542 # If this become a problem, using a binary search should cap the
1542 # If this become a problem, using a binary search should cap the
1543 # runtime of this.
1543 # runtime of this.
1544 n = n - 1
1544 n = n - 1
1545 if n == 0:
1545 if n == 0:
1546 # no data to send
1546 # no data to send
1547 return []
1547 return []
1548 index_size = n * index.entry_size
1548 index_size = n * index.entry_size
1549 data_size = self.end(n - 1)
1549 data_size = self.end(n - 1)
1550
1550
1551 # XXX we might have been split (or stripped) since the object
1551 # XXX we might have been split (or stripped) since the object
1552 # initialization, We need to close this race too, but having a way to
1552 # initialization, We need to close this race too, but having a way to
1553 # pre-open the file we feed to the revlog and never closing them before
1553 # pre-open the file we feed to the revlog and never closing them before
1554 # we are done streaming.
1554 # we are done streaming.
1555
1555
1556 if self._inline:
1556 if self._inline:
1557
1557
1558 def get_stream():
1558 def get_stream():
1559 with self.opener(self._indexfile, mode=b"r") as fp:
1559 with self.opener(self._indexfile, mode=b"r") as fp:
1560 yield None
1560 yield None
1561 size = index_size + data_size
1561 size = index_size + data_size
1562 if size <= 65536:
1562 if size <= 65536:
1563 yield fp.read(size)
1563 yield fp.read(size)
1564 else:
1564 else:
1565 yield from util.filechunkiter(fp, limit=size)
1565 yield from util.filechunkiter(fp, limit=size)
1566
1566
1567 inline_stream = get_stream()
1567 inline_stream = get_stream()
1568 next(inline_stream)
1568 next(inline_stream)
1569 return [
1569 return [
1570 (self._indexfile, inline_stream, index_size + data_size),
1570 (self._indexfile, inline_stream, index_size + data_size),
1571 ]
1571 ]
1572 elif force_inline:
1572 elif force_inline:
1573
1573
1574 def get_stream():
1574 def get_stream():
1575 with self.reading():
1575 with self.reading():
1576 yield None
1576 yield None
1577
1577
1578 for rev in range(n):
1578 for rev in range(n):
1579 idx = self.index.entry_binary(rev)
1579 idx = self.index.entry_binary(rev)
1580 if rev == 0 and self._docket is None:
1580 if rev == 0 and self._docket is None:
1581 # re-inject the inline flag
1581 # re-inject the inline flag
1582 header = self._format_flags
1582 header = self._format_flags
1583 header |= self._format_version
1583 header |= self._format_version
1584 header |= FLAG_INLINE_DATA
1584 header |= FLAG_INLINE_DATA
1585 header = self.index.pack_header(header)
1585 header = self.index.pack_header(header)
1586 idx = header + idx
1586 idx = header + idx
1587 yield idx
1587 yield idx
1588 yield self._inner.get_segment_for_revs(rev, rev)[1]
1588 yield self._inner.get_segment_for_revs(rev, rev)[1]
1589
1589
1590 inline_stream = get_stream()
1590 inline_stream = get_stream()
1591 next(inline_stream)
1591 next(inline_stream)
1592 return [
1592 return [
1593 (self._indexfile, inline_stream, index_size + data_size),
1593 (self._indexfile, inline_stream, index_size + data_size),
1594 ]
1594 ]
1595 else:
1595 else:
1596
1596
1597 def get_index_stream():
1597 def get_index_stream():
1598 with self.opener(self._indexfile, mode=b"r") as fp:
1598 with self.opener(self._indexfile, mode=b"r") as fp:
1599 yield None
1599 yield None
1600 if index_size <= 65536:
1600 if index_size <= 65536:
1601 yield fp.read(index_size)
1601 yield fp.read(index_size)
1602 else:
1602 else:
1603 yield from util.filechunkiter(fp, limit=index_size)
1603 yield from util.filechunkiter(fp, limit=index_size)
1604
1604
1605 def get_data_stream():
1605 def get_data_stream():
1606 with self._datafp() as fp:
1606 with self._datafp() as fp:
1607 yield None
1607 yield None
1608 if data_size <= 65536:
1608 if data_size <= 65536:
1609 yield fp.read(data_size)
1609 yield fp.read(data_size)
1610 else:
1610 else:
1611 yield from util.filechunkiter(fp, limit=data_size)
1611 yield from util.filechunkiter(fp, limit=data_size)
1612
1612
1613 index_stream = get_index_stream()
1613 index_stream = get_index_stream()
1614 next(index_stream)
1614 next(index_stream)
1615 data_stream = get_data_stream()
1615 data_stream = get_data_stream()
1616 next(data_stream)
1616 next(data_stream)
1617 return [
1617 return [
1618 (self._datafile, data_stream, data_size),
1618 (self._datafile, data_stream, data_size),
1619 (self._indexfile, index_stream, index_size),
1619 (self._indexfile, index_stream, index_size),
1620 ]
1620 ]
1621
1621
1622 def _loadindex(self, docket=None):
1622 def _loadindex(self, docket=None):
1623 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1623 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1624
1624
1625 if self.postfix is not None:
1625 if self.postfix is not None:
1626 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1626 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1627 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1627 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1628 entry_point = b'%s.i.a' % self.radix
1628 entry_point = b'%s.i.a' % self.radix
1629 elif self._try_split and self.opener.exists(self._split_index_file):
1629 elif self._try_split and self.opener.exists(self._split_index_file):
1630 entry_point = self._split_index_file
1630 entry_point = self._split_index_file
1631 else:
1631 else:
1632 entry_point = b'%s.i' % self.radix
1632 entry_point = b'%s.i' % self.radix
1633
1633
1634 if docket is not None:
1634 if docket is not None:
1635 self._docket = docket
1635 self._docket = docket
1636 self._docket_file = entry_point
1636 self._docket_file = entry_point
1637 else:
1637 else:
1638 self._initempty = True
1638 self._initempty = True
1639 entry_data = self._get_data(entry_point, mmapindexthreshold)
1639 entry_data = self._get_data(entry_point, mmapindexthreshold)
1640 if len(entry_data) > 0:
1640 if len(entry_data) > 0:
1641 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1641 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1642 self._initempty = False
1642 self._initempty = False
1643 else:
1643 else:
1644 header = new_header
1644 header = new_header
1645
1645
1646 self._format_flags = header & ~0xFFFF
1646 self._format_flags = header & ~0xFFFF
1647 self._format_version = header & 0xFFFF
1647 self._format_version = header & 0xFFFF
1648
1648
1649 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1649 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1650 if supported_flags is None:
1650 if supported_flags is None:
1651 msg = _(b'unknown version (%d) in revlog %s')
1651 msg = _(b'unknown version (%d) in revlog %s')
1652 msg %= (self._format_version, self.display_id)
1652 msg %= (self._format_version, self.display_id)
1653 raise error.RevlogError(msg)
1653 raise error.RevlogError(msg)
1654 elif self._format_flags & ~supported_flags:
1654 elif self._format_flags & ~supported_flags:
1655 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1655 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1656 display_flag = self._format_flags >> 16
1656 display_flag = self._format_flags >> 16
1657 msg %= (display_flag, self._format_version, self.display_id)
1657 msg %= (display_flag, self._format_version, self.display_id)
1658 raise error.RevlogError(msg)
1658 raise error.RevlogError(msg)
1659
1659
1660 features = FEATURES_BY_VERSION[self._format_version]
1660 features = FEATURES_BY_VERSION[self._format_version]
1661 self._inline = features[b'inline'](self._format_flags)
1661 self._inline = features[b'inline'](self._format_flags)
1662 self.delta_config.general_delta = features[b'generaldelta'](
1662 self.delta_config.general_delta = features[b'generaldelta'](
1663 self._format_flags
1663 self._format_flags
1664 )
1664 )
1665 self.feature_config.has_side_data = features[b'sidedata']
1665 self.feature_config.has_side_data = features[b'sidedata']
1666
1666
1667 if not features[b'docket']:
1667 if not features[b'docket']:
1668 self._indexfile = entry_point
1668 self._indexfile = entry_point
1669 index_data = entry_data
1669 index_data = entry_data
1670 else:
1670 else:
1671 self._docket_file = entry_point
1671 self._docket_file = entry_point
1672 if self._initempty:
1672 if self._initempty:
1673 self._docket = docketutil.default_docket(self, header)
1673 self._docket = docketutil.default_docket(self, header)
1674 else:
1674 else:
1675 self._docket = docketutil.parse_docket(
1675 self._docket = docketutil.parse_docket(
1676 self, entry_data, use_pending=self._trypending
1676 self, entry_data, use_pending=self._trypending
1677 )
1677 )
1678
1678
1679 if self._docket is not None:
1679 if self._docket is not None:
1680 self._indexfile = self._docket.index_filepath()
1680 self._indexfile = self._docket.index_filepath()
1681 index_data = b''
1681 index_data = b''
1682 index_size = self._docket.index_end
1682 index_size = self._docket.index_end
1683 if index_size > 0:
1683 if index_size > 0:
1684 index_data = self._get_data(
1684 index_data = self._get_data(
1685 self._indexfile, mmapindexthreshold, size=index_size
1685 self._indexfile, mmapindexthreshold, size=index_size
1686 )
1686 )
1687 if len(index_data) < index_size:
1687 if len(index_data) < index_size:
1688 msg = _(b'too few index data for %s: got %d, expected %d')
1688 msg = _(b'too few index data for %s: got %d, expected %d')
1689 msg %= (self.display_id, len(index_data), index_size)
1689 msg %= (self.display_id, len(index_data), index_size)
1690 raise error.RevlogError(msg)
1690 raise error.RevlogError(msg)
1691
1691
1692 self._inline = False
1692 self._inline = False
1693 # generaldelta implied by version 2 revlogs.
1693 # generaldelta implied by version 2 revlogs.
1694 self.delta_config.general_delta = True
1694 self.delta_config.general_delta = True
1695 # the logic for persistent nodemap will be dealt with within the
1695 # the logic for persistent nodemap will be dealt with within the
1696 # main docket, so disable it for now.
1696 # main docket, so disable it for now.
1697 self._nodemap_file = None
1697 self._nodemap_file = None
1698
1698
1699 if self._docket is not None:
1699 if self._docket is not None:
1700 self._datafile = self._docket.data_filepath()
1700 self._datafile = self._docket.data_filepath()
1701 self._sidedatafile = self._docket.sidedata_filepath()
1701 self._sidedatafile = self._docket.sidedata_filepath()
1702 elif self.postfix is None:
1702 elif self.postfix is None:
1703 self._datafile = b'%s.d' % self.radix
1703 self._datafile = b'%s.d' % self.radix
1704 else:
1704 else:
1705 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1705 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1706
1706
1707 self.nodeconstants = sha1nodeconstants
1707 self.nodeconstants = sha1nodeconstants
1708 self.nullid = self.nodeconstants.nullid
1708 self.nullid = self.nodeconstants.nullid
1709
1709
1710 # sparse-revlog can't be on without general-delta (issue6056)
1710 # sparse-revlog can't be on without general-delta (issue6056)
1711 if not self.delta_config.general_delta:
1711 if not self.delta_config.general_delta:
1712 self.delta_config.sparse_revlog = False
1712 self.delta_config.sparse_revlog = False
1713
1713
1714 self._storedeltachains = True
1714 self._storedeltachains = True
1715
1715
1716 devel_nodemap = (
1716 devel_nodemap = (
1717 self._nodemap_file
1717 self._nodemap_file
1718 and force_nodemap
1718 and force_nodemap
1719 and parse_index_v1_nodemap is not None
1719 and parse_index_v1_nodemap is not None
1720 )
1720 )
1721
1721
1722 use_rust_index = False
1722 use_rust_index = False
1723 if rustrevlog is not None and self._nodemap_file is not None:
1723 if rustrevlog is not None and self._nodemap_file is not None:
1724 # we would like to use the rust_index in all case, especially
1724 # we would like to use the rust_index in all case, especially
1725 # because it is necessary for AncestorsIterator and LazyAncestors
1725 # because it is necessary for AncestorsIterator and LazyAncestors
1726 # since the 6.7 cycle.
1726 # since the 6.7 cycle.
1727 #
1727 #
1728 # However, the performance impact of inconditionnaly building the
1728 # However, the performance impact of inconditionnaly building the
1729 # nodemap is currently a problem for non-persistent nodemap
1729 # nodemap is currently a problem for non-persistent nodemap
1730 # repository.
1730 # repository.
1731 use_rust_index = True
1731 use_rust_index = True
1732
1732
1733 self._parse_index = parse_index_v1
1733 self._parse_index = parse_index_v1
1734 if self._format_version == REVLOGV0:
1734 if self._format_version == REVLOGV0:
1735 self._parse_index = revlogv0.parse_index_v0
1735 self._parse_index = revlogv0.parse_index_v0
1736 elif self._format_version == REVLOGV2:
1736 elif self._format_version == REVLOGV2:
1737 self._parse_index = parse_index_v2
1737 self._parse_index = parse_index_v2
1738 elif self._format_version == CHANGELOGV2:
1738 elif self._format_version == CHANGELOGV2:
1739 self._parse_index = parse_index_cl_v2
1739 self._parse_index = parse_index_cl_v2
1740 elif devel_nodemap:
1740 elif devel_nodemap:
1741 self._parse_index = parse_index_v1_nodemap
1741 self._parse_index = parse_index_v1_nodemap
1742 elif use_rust_index:
1742 elif use_rust_index:
1743 self._parse_index = functools.partial(
1743 self._parse_index = functools.partial(
1744 parse_index_v1_rust, default_header=new_header
1744 parse_index_v1_rust, default_header=new_header
1745 )
1745 )
1746 try:
1746 try:
1747 d = self._parse_index(index_data, self._inline)
1747 d = self._parse_index(index_data, self._inline)
1748 index, chunkcache = d
1748 index, chunkcache = d
1749 use_nodemap = (
1749 use_nodemap = (
1750 not self._inline
1750 not self._inline
1751 and self._nodemap_file is not None
1751 and self._nodemap_file is not None
1752 and hasattr(index, 'update_nodemap_data')
1752 and hasattr(index, 'update_nodemap_data')
1753 )
1753 )
1754 if use_nodemap:
1754 if use_nodemap:
1755 nodemap_data = nodemaputil.persisted_data(self)
1755 nodemap_data = nodemaputil.persisted_data(self)
1756 if nodemap_data is not None:
1756 if nodemap_data is not None:
1757 docket = nodemap_data[0]
1757 docket = nodemap_data[0]
1758 if (
1758 if (
1759 len(d[0]) > docket.tip_rev
1759 len(d[0]) > docket.tip_rev
1760 and d[0][docket.tip_rev][7] == docket.tip_node
1760 and d[0][docket.tip_rev][7] == docket.tip_node
1761 ):
1761 ):
1762 # no changelog tampering
1762 # no changelog tampering
1763 self._nodemap_docket = docket
1763 self._nodemap_docket = docket
1764 index.update_nodemap_data(*nodemap_data)
1764 index.update_nodemap_data(*nodemap_data)
1765 except (ValueError, IndexError):
1765 except (ValueError, IndexError):
1766 raise error.RevlogError(
1766 raise error.RevlogError(
1767 _(b"index %s is corrupted") % self.display_id
1767 _(b"index %s is corrupted") % self.display_id
1768 )
1768 )
1769 self.index = index
1769 self.index = index
1770 # revnum -> (chain-length, sum-delta-length)
1770 # revnum -> (chain-length, sum-delta-length)
1771 self._chaininfocache = util.lrucachedict(500)
1771 self._chaininfocache = util.lrucachedict(500)
1772
1772
1773 return chunkcache
1773 return chunkcache
1774
1774
1775 def _load_inner(self, chunk_cache):
1775 def _load_inner(self, chunk_cache):
1776 if self._docket is None:
1776 if self._docket is None:
1777 default_compression_header = None
1777 default_compression_header = None
1778 else:
1778 else:
1779 default_compression_header = self._docket.default_compression_header
1779 default_compression_header = self._docket.default_compression_header
1780
1780
1781 self._inner = _InnerRevlog(
1781 self._inner = _InnerRevlog(
1782 opener=self.opener,
1782 opener=self.opener,
1783 index=self.index,
1783 index=self.index,
1784 index_file=self._indexfile,
1784 index_file=self._indexfile,
1785 data_file=self._datafile,
1785 data_file=self._datafile,
1786 sidedata_file=self._sidedatafile,
1786 sidedata_file=self._sidedatafile,
1787 inline=self._inline,
1787 inline=self._inline,
1788 data_config=self.data_config,
1788 data_config=self.data_config,
1789 delta_config=self.delta_config,
1789 delta_config=self.delta_config,
1790 feature_config=self.feature_config,
1790 feature_config=self.feature_config,
1791 chunk_cache=chunk_cache,
1791 chunk_cache=chunk_cache,
1792 default_compression_header=default_compression_header,
1792 default_compression_header=default_compression_header,
1793 )
1793 )
1794
1794
1795 def get_revlog(self):
1795 def get_revlog(self):
1796 """simple function to mirror API of other not-really-revlog API"""
1796 """simple function to mirror API of other not-really-revlog API"""
1797 return self
1797 return self
1798
1798
1799 @util.propertycache
1799 @util.propertycache
1800 def revlog_kind(self):
1800 def revlog_kind(self):
1801 return self.target[0]
1801 return self.target[0]
1802
1802
1803 @util.propertycache
1803 @util.propertycache
1804 def display_id(self):
1804 def display_id(self):
1805 """The public facing "ID" of the revlog that we use in message"""
1805 """The public facing "ID" of the revlog that we use in message"""
1806 if self.revlog_kind == KIND_FILELOG:
1806 if self.revlog_kind == KIND_FILELOG:
1807 # Reference the file without the "data/" prefix, so it is familiar
1807 # Reference the file without the "data/" prefix, so it is familiar
1808 # to the user.
1808 # to the user.
1809 return self.target[1]
1809 return self.target[1]
1810 else:
1810 else:
1811 return self.radix
1811 return self.radix
1812
1812
1813 def _datafp(self, mode=b'r'):
1813 def _datafp(self, mode=b'r'):
1814 """file object for the revlog's data file"""
1814 """file object for the revlog's data file"""
1815 return self.opener(self._datafile, mode=mode)
1815 return self.opener(self._datafile, mode=mode)
1816
1816
1817 def tiprev(self):
1817 def tiprev(self):
1818 return len(self.index) - 1
1818 return len(self.index) - 1
1819
1819
1820 def tip(self):
1820 def tip(self):
1821 return self.node(self.tiprev())
1821 return self.node(self.tiprev())
1822
1822
1823 def __contains__(self, rev):
1823 def __contains__(self, rev):
1824 return 0 <= rev < len(self)
1824 return 0 <= rev < len(self)
1825
1825
1826 def __len__(self):
1826 def __len__(self):
1827 return len(self.index)
1827 return len(self.index)
1828
1828
1829 def __iter__(self) -> Iterator[int]:
1829 def __iter__(self) -> Iterator[int]:
1830 return iter(range(len(self)))
1830 return iter(range(len(self)))
1831
1831
1832 def revs(self, start=0, stop=None):
1832 def revs(self, start=0, stop=None):
1833 """iterate over all rev in this revlog (from start to stop)"""
1833 """iterate over all rev in this revlog (from start to stop)"""
1834 return storageutil.iterrevs(len(self), start=start, stop=stop)
1834 return storageutil.iterrevs(len(self), start=start, stop=stop)
1835
1835
1836 def hasnode(self, node):
1836 def hasnode(self, node):
1837 try:
1837 try:
1838 self.rev(node)
1838 self.rev(node)
1839 return True
1839 return True
1840 except KeyError:
1840 except KeyError:
1841 return False
1841 return False
1842
1842
1843 def _candelta(self, baserev, rev):
1843 def _candelta(self, baserev, rev):
1844 """whether two revisions (baserev, rev) can be delta-ed or not"""
1844 """whether two revisions (baserev, rev) can be delta-ed or not"""
1845 # Disable delta if either rev requires a content-changing flag
1845 # Disable delta if either rev requires a content-changing flag
1846 # processor (ex. LFS). This is because such flag processor can alter
1846 # processor (ex. LFS). This is because such flag processor can alter
1847 # the rawtext content that the delta will be based on, and two clients
1847 # the rawtext content that the delta will be based on, and two clients
1848 # could have a same revlog node with different flags (i.e. different
1848 # could have a same revlog node with different flags (i.e. different
1849 # rawtext contents) and the delta could be incompatible.
1849 # rawtext contents) and the delta could be incompatible.
1850 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1850 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1851 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1851 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1852 ):
1852 ):
1853 return False
1853 return False
1854 return True
1854 return True
1855
1855
1856 def update_caches(self, transaction):
1856 def update_caches(self, transaction):
1857 """update on disk cache
1857 """update on disk cache
1858
1858
1859 If a transaction is passed, the update may be delayed to transaction
1859 If a transaction is passed, the update may be delayed to transaction
1860 commit."""
1860 commit."""
1861 if self._nodemap_file is not None:
1861 if self._nodemap_file is not None:
1862 if transaction is None:
1862 if transaction is None:
1863 nodemaputil.update_persistent_nodemap(self)
1863 nodemaputil.update_persistent_nodemap(self)
1864 else:
1864 else:
1865 nodemaputil.setup_persistent_nodemap(transaction, self)
1865 nodemaputil.setup_persistent_nodemap(transaction, self)
1866
1866
1867 def clearcaches(self):
1867 def clearcaches(self):
1868 """Clear in-memory caches"""
1868 """Clear in-memory caches"""
1869 self._chainbasecache.clear()
1869 self._chainbasecache.clear()
1870 self._inner.clear_cache()
1870 self._inner.clear_cache()
1871 self._pcache = {}
1871 self._pcache = {}
1872 self._nodemap_docket = None
1872 self._nodemap_docket = None
1873 self.index.clearcaches()
1873 self.index.clearcaches()
1874 # The python code is the one responsible for validating the docket, we
1874 # The python code is the one responsible for validating the docket, we
1875 # end up having to refresh it here.
1875 # end up having to refresh it here.
1876 use_nodemap = (
1876 use_nodemap = (
1877 not self._inline
1877 not self._inline
1878 and self._nodemap_file is not None
1878 and self._nodemap_file is not None
1879 and hasattr(self.index, 'update_nodemap_data')
1879 and hasattr(self.index, 'update_nodemap_data')
1880 )
1880 )
1881 if use_nodemap:
1881 if use_nodemap:
1882 nodemap_data = nodemaputil.persisted_data(self)
1882 nodemap_data = nodemaputil.persisted_data(self)
1883 if nodemap_data is not None:
1883 if nodemap_data is not None:
1884 self._nodemap_docket = nodemap_data[0]
1884 self._nodemap_docket = nodemap_data[0]
1885 self.index.update_nodemap_data(*nodemap_data)
1885 self.index.update_nodemap_data(*nodemap_data)
1886
1886
1887 def rev(self, node):
1887 def rev(self, node):
1888 """return the revision number associated with a <nodeid>"""
1888 """return the revision number associated with a <nodeid>"""
1889 try:
1889 try:
1890 return self.index.rev(node)
1890 return self.index.rev(node)
1891 except TypeError:
1891 except TypeError:
1892 raise
1892 raise
1893 except error.RevlogError:
1893 except error.RevlogError:
1894 # parsers.c radix tree lookup failed
1894 # parsers.c radix tree lookup failed
1895 if (
1895 if (
1896 node == self.nodeconstants.wdirid
1896 node == self.nodeconstants.wdirid
1897 or node in self.nodeconstants.wdirfilenodeids
1897 or node in self.nodeconstants.wdirfilenodeids
1898 ):
1898 ):
1899 raise error.WdirUnsupported
1899 raise error.WdirUnsupported
1900 raise error.LookupError(node, self.display_id, _(b'no node'))
1900 raise error.LookupError(node, self.display_id, _(b'no node'))
1901
1901
1902 # Accessors for index entries.
1902 # Accessors for index entries.
1903
1903
1904 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1904 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1905 # are flags.
1905 # are flags.
1906 def start(self, rev):
1906 def start(self, rev):
1907 return int(self.index[rev][0] >> 16)
1907 return int(self.index[rev][0] >> 16)
1908
1908
1909 def sidedata_cut_off(self, rev):
1909 def sidedata_cut_off(self, rev):
1910 sd_cut_off = self.index[rev][8]
1910 sd_cut_off = self.index[rev][8]
1911 if sd_cut_off != 0:
1911 if sd_cut_off != 0:
1912 return sd_cut_off
1912 return sd_cut_off
1913 # This is some annoying dance, because entries without sidedata
1913 # This is some annoying dance, because entries without sidedata
1914 # currently use 0 as their ofsset. (instead of previous-offset +
1914 # currently use 0 as their ofsset. (instead of previous-offset +
1915 # previous-size)
1915 # previous-size)
1916 #
1916 #
1917 # We should reconsider this sidedata → 0 sidata_offset policy.
1917 # We should reconsider this sidedata → 0 sidata_offset policy.
1918 # In the meantime, we need this.
1918 # In the meantime, we need this.
1919 while 0 <= rev:
1919 while 0 <= rev:
1920 e = self.index[rev]
1920 e = self.index[rev]
1921 if e[9] != 0:
1921 if e[9] != 0:
1922 return e[8] + e[9]
1922 return e[8] + e[9]
1923 rev -= 1
1923 rev -= 1
1924 return 0
1924 return 0
1925
1925
1926 def flags(self, rev):
1926 def flags(self, rev):
1927 return self.index[rev][0] & 0xFFFF
1927 return self.index[rev][0] & 0xFFFF
1928
1928
1929 def length(self, rev):
1929 def length(self, rev):
1930 return self.index[rev][1]
1930 return self.index[rev][1]
1931
1931
1932 def sidedata_length(self, rev):
1932 def sidedata_length(self, rev):
1933 if not self.feature_config.has_side_data:
1933 if not self.feature_config.has_side_data:
1934 return 0
1934 return 0
1935 return self.index[rev][9]
1935 return self.index[rev][9]
1936
1936
1937 def rawsize(self, rev):
1937 def rawsize(self, rev):
1938 """return the length of the uncompressed text for a given revision"""
1938 """return the length of the uncompressed text for a given revision"""
1939 l = self.index[rev][2]
1939 l = self.index[rev][2]
1940 if l >= 0:
1940 if l >= 0:
1941 return l
1941 return l
1942
1942
1943 t = self.rawdata(rev)
1943 t = self.rawdata(rev)
1944 return len(t)
1944 return len(t)
1945
1945
1946 def size(self, rev):
1946 def size(self, rev):
1947 """length of non-raw text (processed by a "read" flag processor)"""
1947 """length of non-raw text (processed by a "read" flag processor)"""
1948 # fast path: if no "read" flag processor could change the content,
1948 # fast path: if no "read" flag processor could change the content,
1949 # size is rawsize. note: ELLIPSIS is known to not change the content.
1949 # size is rawsize. note: ELLIPSIS is known to not change the content.
1950 flags = self.flags(rev)
1950 flags = self.flags(rev)
1951 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1951 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1952 return self.rawsize(rev)
1952 return self.rawsize(rev)
1953
1953
1954 return len(self.revision(rev))
1954 return len(self.revision(rev))
1955
1955
1956 def fast_rank(self, rev):
1956 def fast_rank(self, rev):
1957 """Return the rank of a revision if already known, or None otherwise.
1957 """Return the rank of a revision if already known, or None otherwise.
1958
1958
1959 The rank of a revision is the size of the sub-graph it defines as a
1959 The rank of a revision is the size of the sub-graph it defines as a
1960 head. Equivalently, the rank of a revision `r` is the size of the set
1960 head. Equivalently, the rank of a revision `r` is the size of the set
1961 `ancestors(r)`, `r` included.
1961 `ancestors(r)`, `r` included.
1962
1962
1963 This method returns the rank retrieved from the revlog in constant
1963 This method returns the rank retrieved from the revlog in constant
1964 time. It makes no attempt at computing unknown values for versions of
1964 time. It makes no attempt at computing unknown values for versions of
1965 the revlog which do not persist the rank.
1965 the revlog which do not persist the rank.
1966 """
1966 """
1967 rank = self.index[rev][ENTRY_RANK]
1967 rank = self.index[rev][ENTRY_RANK]
1968 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1968 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1969 return None
1969 return None
1970 if rev == nullrev:
1970 if rev == nullrev:
1971 return 0 # convention
1971 return 0 # convention
1972 return rank
1972 return rank
1973
1973
1974 def chainbase(self, rev):
1974 def chainbase(self, rev):
1975 base = self._chainbasecache.get(rev)
1975 base = self._chainbasecache.get(rev)
1976 if base is not None:
1976 if base is not None:
1977 return base
1977 return base
1978
1978
1979 index = self.index
1979 index = self.index
1980 iterrev = rev
1980 iterrev = rev
1981 base = index[iterrev][3]
1981 base = index[iterrev][3]
1982 while base != iterrev:
1982 while base != iterrev:
1983 iterrev = base
1983 iterrev = base
1984 base = index[iterrev][3]
1984 base = index[iterrev][3]
1985
1985
1986 self._chainbasecache[rev] = base
1986 self._chainbasecache[rev] = base
1987 return base
1987 return base
1988
1988
1989 def linkrev(self, rev):
1989 def linkrev(self, rev):
1990 return self.index[rev][4]
1990 return self.index[rev][4]
1991
1991
1992 def parentrevs(self, rev):
1992 def parentrevs(self, rev):
1993 try:
1993 try:
1994 entry = self.index[rev]
1994 entry = self.index[rev]
1995 except IndexError:
1995 except IndexError:
1996 if rev == wdirrev:
1996 if rev == wdirrev:
1997 raise error.WdirUnsupported
1997 raise error.WdirUnsupported
1998 raise
1998 raise
1999
1999
2000 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
2000 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
2001 return entry[6], entry[5]
2001 return entry[6], entry[5]
2002 else:
2002 else:
2003 return entry[5], entry[6]
2003 return entry[5], entry[6]
2004
2004
2005 # fast parentrevs(rev) where rev isn't filtered
2005 # fast parentrevs(rev) where rev isn't filtered
2006 _uncheckedparentrevs = parentrevs
2006 _uncheckedparentrevs = parentrevs
2007
2007
2008 def node(self, rev):
2008 def node(self, rev):
2009 try:
2009 try:
2010 return self.index[rev][7]
2010 return self.index[rev][7]
2011 except IndexError:
2011 except IndexError:
2012 if rev == wdirrev:
2012 if rev == wdirrev:
2013 raise error.WdirUnsupported
2013 raise error.WdirUnsupported
2014 raise
2014 raise
2015
2015
2016 # Derived from index values.
2016 # Derived from index values.
2017
2017
2018 def end(self, rev):
2018 def end(self, rev):
2019 return self.start(rev) + self.length(rev)
2019 return self.start(rev) + self.length(rev)
2020
2020
2021 def parents(self, node):
2021 def parents(self, node):
2022 i = self.index
2022 i = self.index
2023 d = i[self.rev(node)]
2023 d = i[self.rev(node)]
2024 # inline node() to avoid function call overhead
2024 # inline node() to avoid function call overhead
2025 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2025 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2026 return i[d[6]][7], i[d[5]][7]
2026 return i[d[6]][7], i[d[5]][7]
2027 else:
2027 else:
2028 return i[d[5]][7], i[d[6]][7]
2028 return i[d[5]][7], i[d[6]][7]
2029
2029
2030 def chainlen(self, rev):
2030 def chainlen(self, rev):
2031 return self._chaininfo(rev)[0]
2031 return self._chaininfo(rev)[0]
2032
2032
2033 def _chaininfo(self, rev):
2033 def _chaininfo(self, rev):
2034 chaininfocache = self._chaininfocache
2034 chaininfocache = self._chaininfocache
2035 if rev in chaininfocache:
2035 if rev in chaininfocache:
2036 return chaininfocache[rev]
2036 return chaininfocache[rev]
2037 index = self.index
2037 index = self.index
2038 generaldelta = self.delta_config.general_delta
2038 generaldelta = self.delta_config.general_delta
2039 iterrev = rev
2039 iterrev = rev
2040 e = index[iterrev]
2040 e = index[iterrev]
2041 clen = 0
2041 clen = 0
2042 compresseddeltalen = 0
2042 compresseddeltalen = 0
2043 while iterrev != e[3]:
2043 while iterrev != e[3]:
2044 clen += 1
2044 clen += 1
2045 compresseddeltalen += e[1]
2045 compresseddeltalen += e[1]
2046 if generaldelta:
2046 if generaldelta:
2047 iterrev = e[3]
2047 iterrev = e[3]
2048 else:
2048 else:
2049 iterrev -= 1
2049 iterrev -= 1
2050 if iterrev in chaininfocache:
2050 if iterrev in chaininfocache:
2051 t = chaininfocache[iterrev]
2051 t = chaininfocache[iterrev]
2052 clen += t[0]
2052 clen += t[0]
2053 compresseddeltalen += t[1]
2053 compresseddeltalen += t[1]
2054 break
2054 break
2055 e = index[iterrev]
2055 e = index[iterrev]
2056 else:
2056 else:
2057 # Add text length of base since decompressing that also takes
2057 # Add text length of base since decompressing that also takes
2058 # work. For cache hits the length is already included.
2058 # work. For cache hits the length is already included.
2059 compresseddeltalen += e[1]
2059 compresseddeltalen += e[1]
2060 r = (clen, compresseddeltalen)
2060 r = (clen, compresseddeltalen)
2061 chaininfocache[rev] = r
2061 chaininfocache[rev] = r
2062 return r
2062 return r
2063
2063
2064 def _deltachain(self, rev, stoprev=None):
2064 def _deltachain(self, rev, stoprev=None):
2065 return self._inner._deltachain(rev, stoprev=stoprev)
2065 return self._inner._deltachain(rev, stoprev=stoprev)
2066
2066
2067 def ancestors(self, revs, stoprev=0, inclusive=False):
2067 def ancestors(self, revs, stoprev=0, inclusive=False):
2068 """Generate the ancestors of 'revs' in reverse revision order.
2068 """Generate the ancestors of 'revs' in reverse revision order.
2069 Does not generate revs lower than stoprev.
2069 Does not generate revs lower than stoprev.
2070
2070
2071 See the documentation for ancestor.lazyancestors for more details."""
2071 See the documentation for ancestor.lazyancestors for more details."""
2072
2072
2073 # first, make sure start revisions aren't filtered
2073 # first, make sure start revisions aren't filtered
2074 revs = list(revs)
2074 revs = list(revs)
2075 checkrev = self.node
2075 checkrev = self.node
2076 for r in revs:
2076 for r in revs:
2077 checkrev(r)
2077 checkrev(r)
2078 # and we're sure ancestors aren't filtered as well
2078 # and we're sure ancestors aren't filtered as well
2079
2079
2080 if rustancestor is not None and self.index.rust_ext_compat:
2080 if rustancestor is not None and self.index.rust_ext_compat:
2081 lazyancestors = rustancestor.LazyAncestors
2081 lazyancestors = rustancestor.LazyAncestors
2082 arg = self.index
2082 arg = self.index
2083 else:
2083 else:
2084 lazyancestors = ancestor.lazyancestors
2084 lazyancestors = ancestor.lazyancestors
2085 arg = self._uncheckedparentrevs
2085 arg = self._uncheckedparentrevs
2086 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2086 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2087
2087
2088 def descendants(self, revs):
2088 def descendants(self, revs):
2089 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2089 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2090
2090
2091 def findcommonmissing(self, common=None, heads=None):
2091 def findcommonmissing(self, common=None, heads=None):
2092 """Return a tuple of the ancestors of common and the ancestors of heads
2092 """Return a tuple of the ancestors of common and the ancestors of heads
2093 that are not ancestors of common. In revset terminology, we return the
2093 that are not ancestors of common. In revset terminology, we return the
2094 tuple:
2094 tuple:
2095
2095
2096 ::common, (::heads) - (::common)
2096 ::common, (::heads) - (::common)
2097
2097
2098 The list is sorted by revision number, meaning it is
2098 The list is sorted by revision number, meaning it is
2099 topologically sorted.
2099 topologically sorted.
2100
2100
2101 'heads' and 'common' are both lists of node IDs. If heads is
2101 'heads' and 'common' are both lists of node IDs. If heads is
2102 not supplied, uses all of the revlog's heads. If common is not
2102 not supplied, uses all of the revlog's heads. If common is not
2103 supplied, uses nullid."""
2103 supplied, uses nullid."""
2104 if common is None:
2104 if common is None:
2105 common = [self.nullid]
2105 common = [self.nullid]
2106 if heads is None:
2106 if heads is None:
2107 heads = self.heads()
2107 heads = self.heads()
2108
2108
2109 common = [self.rev(n) for n in common]
2109 common = [self.rev(n) for n in common]
2110 heads = [self.rev(n) for n in heads]
2110 heads = [self.rev(n) for n in heads]
2111
2111
2112 # we want the ancestors, but inclusive
2112 # we want the ancestors, but inclusive
2113 class lazyset:
2113 class lazyset:
2114 def __init__(self, lazyvalues):
2114 def __init__(self, lazyvalues):
2115 self.addedvalues = set()
2115 self.addedvalues = set()
2116 self.lazyvalues = lazyvalues
2116 self.lazyvalues = lazyvalues
2117
2117
2118 def __contains__(self, value):
2118 def __contains__(self, value):
2119 return value in self.addedvalues or value in self.lazyvalues
2119 return value in self.addedvalues or value in self.lazyvalues
2120
2120
2121 def __iter__(self):
2121 def __iter__(self):
2122 added = self.addedvalues
2122 added = self.addedvalues
2123 for r in added:
2123 for r in added:
2124 yield r
2124 yield r
2125 for r in self.lazyvalues:
2125 for r in self.lazyvalues:
2126 if not r in added:
2126 if not r in added:
2127 yield r
2127 yield r
2128
2128
2129 def add(self, value):
2129 def add(self, value):
2130 self.addedvalues.add(value)
2130 self.addedvalues.add(value)
2131
2131
2132 def update(self, values):
2132 def update(self, values):
2133 self.addedvalues.update(values)
2133 self.addedvalues.update(values)
2134
2134
2135 has = lazyset(self.ancestors(common))
2135 has = lazyset(self.ancestors(common))
2136 has.add(nullrev)
2136 has.add(nullrev)
2137 has.update(common)
2137 has.update(common)
2138
2138
2139 # take all ancestors from heads that aren't in has
2139 # take all ancestors from heads that aren't in has
2140 missing = set()
2140 missing = set()
2141 visit = collections.deque(r for r in heads if r not in has)
2141 visit = collections.deque(r for r in heads if r not in has)
2142 while visit:
2142 while visit:
2143 r = visit.popleft()
2143 r = visit.popleft()
2144 if r in missing:
2144 if r in missing:
2145 continue
2145 continue
2146 else:
2146 else:
2147 missing.add(r)
2147 missing.add(r)
2148 for p in self.parentrevs(r):
2148 for p in self.parentrevs(r):
2149 if p not in has:
2149 if p not in has:
2150 visit.append(p)
2150 visit.append(p)
2151 missing = list(missing)
2151 missing = list(missing)
2152 missing.sort()
2152 missing.sort()
2153 return has, [self.node(miss) for miss in missing]
2153 return has, [self.node(miss) for miss in missing]
2154
2154
2155 def incrementalmissingrevs(self, common=None):
2155 def incrementalmissingrevs(self, common=None):
2156 """Return an object that can be used to incrementally compute the
2156 """Return an object that can be used to incrementally compute the
2157 revision numbers of the ancestors of arbitrary sets that are not
2157 revision numbers of the ancestors of arbitrary sets that are not
2158 ancestors of common. This is an ancestor.incrementalmissingancestors
2158 ancestors of common. This is an ancestor.incrementalmissingancestors
2159 object.
2159 object.
2160
2160
2161 'common' is a list of revision numbers. If common is not supplied, uses
2161 'common' is a list of revision numbers. If common is not supplied, uses
2162 nullrev.
2162 nullrev.
2163 """
2163 """
2164 if common is None:
2164 if common is None:
2165 common = [nullrev]
2165 common = [nullrev]
2166
2166
2167 if rustancestor is not None and self.index.rust_ext_compat:
2167 if rustancestor is not None and self.index.rust_ext_compat:
2168 return rustancestor.MissingAncestors(self.index, common)
2168 return rustancestor.MissingAncestors(self.index, common)
2169 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2169 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2170
2170
2171 def findmissingrevs(self, common=None, heads=None):
2171 def findmissingrevs(self, common=None, heads=None):
2172 """Return the revision numbers of the ancestors of heads that
2172 """Return the revision numbers of the ancestors of heads that
2173 are not ancestors of common.
2173 are not ancestors of common.
2174
2174
2175 More specifically, return a list of revision numbers corresponding to
2175 More specifically, return a list of revision numbers corresponding to
2176 nodes N such that every N satisfies the following constraints:
2176 nodes N such that every N satisfies the following constraints:
2177
2177
2178 1. N is an ancestor of some node in 'heads'
2178 1. N is an ancestor of some node in 'heads'
2179 2. N is not an ancestor of any node in 'common'
2179 2. N is not an ancestor of any node in 'common'
2180
2180
2181 The list is sorted by revision number, meaning it is
2181 The list is sorted by revision number, meaning it is
2182 topologically sorted.
2182 topologically sorted.
2183
2183
2184 'heads' and 'common' are both lists of revision numbers. If heads is
2184 'heads' and 'common' are both lists of revision numbers. If heads is
2185 not supplied, uses all of the revlog's heads. If common is not
2185 not supplied, uses all of the revlog's heads. If common is not
2186 supplied, uses nullid."""
2186 supplied, uses nullid."""
2187 if common is None:
2187 if common is None:
2188 common = [nullrev]
2188 common = [nullrev]
2189 if heads is None:
2189 if heads is None:
2190 heads = self.headrevs()
2190 heads = self.headrevs()
2191
2191
2192 inc = self.incrementalmissingrevs(common=common)
2192 inc = self.incrementalmissingrevs(common=common)
2193 return inc.missingancestors(heads)
2193 return inc.missingancestors(heads)
2194
2194
2195 def findmissing(self, common=None, heads=None):
2195 def findmissing(self, common=None, heads=None):
2196 """Return the ancestors of heads that are not ancestors of common.
2196 """Return the ancestors of heads that are not ancestors of common.
2197
2197
2198 More specifically, return a list of nodes N such that every N
2198 More specifically, return a list of nodes N such that every N
2199 satisfies the following constraints:
2199 satisfies the following constraints:
2200
2200
2201 1. N is an ancestor of some node in 'heads'
2201 1. N is an ancestor of some node in 'heads'
2202 2. N is not an ancestor of any node in 'common'
2202 2. N is not an ancestor of any node in 'common'
2203
2203
2204 The list is sorted by revision number, meaning it is
2204 The list is sorted by revision number, meaning it is
2205 topologically sorted.
2205 topologically sorted.
2206
2206
2207 'heads' and 'common' are both lists of node IDs. If heads is
2207 'heads' and 'common' are both lists of node IDs. If heads is
2208 not supplied, uses all of the revlog's heads. If common is not
2208 not supplied, uses all of the revlog's heads. If common is not
2209 supplied, uses nullid."""
2209 supplied, uses nullid."""
2210 if common is None:
2210 if common is None:
2211 common = [self.nullid]
2211 common = [self.nullid]
2212 if heads is None:
2212 if heads is None:
2213 heads = self.heads()
2213 heads = self.heads()
2214
2214
2215 common = [self.rev(n) for n in common]
2215 common = [self.rev(n) for n in common]
2216 heads = [self.rev(n) for n in heads]
2216 heads = [self.rev(n) for n in heads]
2217
2217
2218 inc = self.incrementalmissingrevs(common=common)
2218 inc = self.incrementalmissingrevs(common=common)
2219 return [self.node(r) for r in inc.missingancestors(heads)]
2219 return [self.node(r) for r in inc.missingancestors(heads)]
2220
2220
2221 def nodesbetween(self, roots=None, heads=None):
2221 def nodesbetween(self, roots=None, heads=None):
2222 """Return a topological path from 'roots' to 'heads'.
2222 """Return a topological path from 'roots' to 'heads'.
2223
2223
2224 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2224 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2225 topologically sorted list of all nodes N that satisfy both of
2225 topologically sorted list of all nodes N that satisfy both of
2226 these constraints:
2226 these constraints:
2227
2227
2228 1. N is a descendant of some node in 'roots'
2228 1. N is a descendant of some node in 'roots'
2229 2. N is an ancestor of some node in 'heads'
2229 2. N is an ancestor of some node in 'heads'
2230
2230
2231 Every node is considered to be both a descendant and an ancestor
2231 Every node is considered to be both a descendant and an ancestor
2232 of itself, so every reachable node in 'roots' and 'heads' will be
2232 of itself, so every reachable node in 'roots' and 'heads' will be
2233 included in 'nodes'.
2233 included in 'nodes'.
2234
2234
2235 'outroots' is the list of reachable nodes in 'roots', i.e., the
2235 'outroots' is the list of reachable nodes in 'roots', i.e., the
2236 subset of 'roots' that is returned in 'nodes'. Likewise,
2236 subset of 'roots' that is returned in 'nodes'. Likewise,
2237 'outheads' is the subset of 'heads' that is also in 'nodes'.
2237 'outheads' is the subset of 'heads' that is also in 'nodes'.
2238
2238
2239 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2239 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2240 unspecified, uses nullid as the only root. If 'heads' is
2240 unspecified, uses nullid as the only root. If 'heads' is
2241 unspecified, uses list of all of the revlog's heads."""
2241 unspecified, uses list of all of the revlog's heads."""
2242 nonodes = ([], [], [])
2242 nonodes = ([], [], [])
2243 if roots is not None:
2243 if roots is not None:
2244 roots = list(roots)
2244 roots = list(roots)
2245 if not roots:
2245 if not roots:
2246 return nonodes
2246 return nonodes
2247 lowestrev = min([self.rev(n) for n in roots])
2247 lowestrev = min([self.rev(n) for n in roots])
2248 else:
2248 else:
2249 roots = [self.nullid] # Everybody's a descendant of nullid
2249 roots = [self.nullid] # Everybody's a descendant of nullid
2250 lowestrev = nullrev
2250 lowestrev = nullrev
2251 if (lowestrev == nullrev) and (heads is None):
2251 if (lowestrev == nullrev) and (heads is None):
2252 # We want _all_ the nodes!
2252 # We want _all_ the nodes!
2253 return (
2253 return (
2254 [self.node(r) for r in self],
2254 [self.node(r) for r in self],
2255 [self.nullid],
2255 [self.nullid],
2256 list(self.heads()),
2256 list(self.heads()),
2257 )
2257 )
2258 if heads is None:
2258 if heads is None:
2259 # All nodes are ancestors, so the latest ancestor is the last
2259 # All nodes are ancestors, so the latest ancestor is the last
2260 # node.
2260 # node.
2261 highestrev = len(self) - 1
2261 highestrev = len(self) - 1
2262 # Set ancestors to None to signal that every node is an ancestor.
2262 # Set ancestors to None to signal that every node is an ancestor.
2263 ancestors = None
2263 ancestors = None
2264 # Set heads to an empty dictionary for later discovery of heads
2264 # Set heads to an empty dictionary for later discovery of heads
2265 heads = {}
2265 heads = {}
2266 else:
2266 else:
2267 heads = list(heads)
2267 heads = list(heads)
2268 if not heads:
2268 if not heads:
2269 return nonodes
2269 return nonodes
2270 ancestors = set()
2270 ancestors = set()
2271 # Turn heads into a dictionary so we can remove 'fake' heads.
2271 # Turn heads into a dictionary so we can remove 'fake' heads.
2272 # Also, later we will be using it to filter out the heads we can't
2272 # Also, later we will be using it to filter out the heads we can't
2273 # find from roots.
2273 # find from roots.
2274 heads = dict.fromkeys(heads, False)
2274 heads = dict.fromkeys(heads, False)
2275 # Start at the top and keep marking parents until we're done.
2275 # Start at the top and keep marking parents until we're done.
2276 nodestotag = set(heads)
2276 nodestotag = set(heads)
2277 # Remember where the top was so we can use it as a limit later.
2277 # Remember where the top was so we can use it as a limit later.
2278 highestrev = max([self.rev(n) for n in nodestotag])
2278 highestrev = max([self.rev(n) for n in nodestotag])
2279 while nodestotag:
2279 while nodestotag:
2280 # grab a node to tag
2280 # grab a node to tag
2281 n = nodestotag.pop()
2281 n = nodestotag.pop()
2282 # Never tag nullid
2282 # Never tag nullid
2283 if n == self.nullid:
2283 if n == self.nullid:
2284 continue
2284 continue
2285 # A node's revision number represents its place in a
2285 # A node's revision number represents its place in a
2286 # topologically sorted list of nodes.
2286 # topologically sorted list of nodes.
2287 r = self.rev(n)
2287 r = self.rev(n)
2288 if r >= lowestrev:
2288 if r >= lowestrev:
2289 if n not in ancestors:
2289 if n not in ancestors:
2290 # If we are possibly a descendant of one of the roots
2290 # If we are possibly a descendant of one of the roots
2291 # and we haven't already been marked as an ancestor
2291 # and we haven't already been marked as an ancestor
2292 ancestors.add(n) # Mark as ancestor
2292 ancestors.add(n) # Mark as ancestor
2293 # Add non-nullid parents to list of nodes to tag.
2293 # Add non-nullid parents to list of nodes to tag.
2294 nodestotag.update(
2294 nodestotag.update(
2295 [p for p in self.parents(n) if p != self.nullid]
2295 [p for p in self.parents(n) if p != self.nullid]
2296 )
2296 )
2297 elif n in heads: # We've seen it before, is it a fake head?
2297 elif n in heads: # We've seen it before, is it a fake head?
2298 # So it is, real heads should not be the ancestors of
2298 # So it is, real heads should not be the ancestors of
2299 # any other heads.
2299 # any other heads.
2300 heads.pop(n)
2300 heads.pop(n)
2301 if not ancestors:
2301 if not ancestors:
2302 return nonodes
2302 return nonodes
2303 # Now that we have our set of ancestors, we want to remove any
2303 # Now that we have our set of ancestors, we want to remove any
2304 # roots that are not ancestors.
2304 # roots that are not ancestors.
2305
2305
2306 # If one of the roots was nullid, everything is included anyway.
2306 # If one of the roots was nullid, everything is included anyway.
2307 if lowestrev > nullrev:
2307 if lowestrev > nullrev:
2308 # But, since we weren't, let's recompute the lowest rev to not
2308 # But, since we weren't, let's recompute the lowest rev to not
2309 # include roots that aren't ancestors.
2309 # include roots that aren't ancestors.
2310
2310
2311 # Filter out roots that aren't ancestors of heads
2311 # Filter out roots that aren't ancestors of heads
2312 roots = [root for root in roots if root in ancestors]
2312 roots = [root for root in roots if root in ancestors]
2313 # Recompute the lowest revision
2313 # Recompute the lowest revision
2314 if roots:
2314 if roots:
2315 lowestrev = min([self.rev(root) for root in roots])
2315 lowestrev = min([self.rev(root) for root in roots])
2316 else:
2316 else:
2317 # No more roots? Return empty list
2317 # No more roots? Return empty list
2318 return nonodes
2318 return nonodes
2319 else:
2319 else:
2320 # We are descending from nullid, and don't need to care about
2320 # We are descending from nullid, and don't need to care about
2321 # any other roots.
2321 # any other roots.
2322 lowestrev = nullrev
2322 lowestrev = nullrev
2323 roots = [self.nullid]
2323 roots = [self.nullid]
2324 # Transform our roots list into a set.
2324 # Transform our roots list into a set.
2325 descendants = set(roots)
2325 descendants = set(roots)
2326 # Also, keep the original roots so we can filter out roots that aren't
2326 # Also, keep the original roots so we can filter out roots that aren't
2327 # 'real' roots (i.e. are descended from other roots).
2327 # 'real' roots (i.e. are descended from other roots).
2328 roots = descendants.copy()
2328 roots = descendants.copy()
2329 # Our topologically sorted list of output nodes.
2329 # Our topologically sorted list of output nodes.
2330 orderedout = []
2330 orderedout = []
2331 # Don't start at nullid since we don't want nullid in our output list,
2331 # Don't start at nullid since we don't want nullid in our output list,
2332 # and if nullid shows up in descendants, empty parents will look like
2332 # and if nullid shows up in descendants, empty parents will look like
2333 # they're descendants.
2333 # they're descendants.
2334 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2334 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2335 n = self.node(r)
2335 n = self.node(r)
2336 isdescendant = False
2336 isdescendant = False
2337 if lowestrev == nullrev: # Everybody is a descendant of nullid
2337 if lowestrev == nullrev: # Everybody is a descendant of nullid
2338 isdescendant = True
2338 isdescendant = True
2339 elif n in descendants:
2339 elif n in descendants:
2340 # n is already a descendant
2340 # n is already a descendant
2341 isdescendant = True
2341 isdescendant = True
2342 # This check only needs to be done here because all the roots
2342 # This check only needs to be done here because all the roots
2343 # will start being marked is descendants before the loop.
2343 # will start being marked is descendants before the loop.
2344 if n in roots:
2344 if n in roots:
2345 # If n was a root, check if it's a 'real' root.
2345 # If n was a root, check if it's a 'real' root.
2346 p = tuple(self.parents(n))
2346 p = tuple(self.parents(n))
2347 # If any of its parents are descendants, it's not a root.
2347 # If any of its parents are descendants, it's not a root.
2348 if (p[0] in descendants) or (p[1] in descendants):
2348 if (p[0] in descendants) or (p[1] in descendants):
2349 roots.remove(n)
2349 roots.remove(n)
2350 else:
2350 else:
2351 p = tuple(self.parents(n))
2351 p = tuple(self.parents(n))
2352 # A node is a descendant if either of its parents are
2352 # A node is a descendant if either of its parents are
2353 # descendants. (We seeded the dependents list with the roots
2353 # descendants. (We seeded the dependents list with the roots
2354 # up there, remember?)
2354 # up there, remember?)
2355 if (p[0] in descendants) or (p[1] in descendants):
2355 if (p[0] in descendants) or (p[1] in descendants):
2356 descendants.add(n)
2356 descendants.add(n)
2357 isdescendant = True
2357 isdescendant = True
2358 if isdescendant and ((ancestors is None) or (n in ancestors)):
2358 if isdescendant and ((ancestors is None) or (n in ancestors)):
2359 # Only include nodes that are both descendants and ancestors.
2359 # Only include nodes that are both descendants and ancestors.
2360 orderedout.append(n)
2360 orderedout.append(n)
2361 if (ancestors is not None) and (n in heads):
2361 if (ancestors is not None) and (n in heads):
2362 # We're trying to figure out which heads are reachable
2362 # We're trying to figure out which heads are reachable
2363 # from roots.
2363 # from roots.
2364 # Mark this head as having been reached
2364 # Mark this head as having been reached
2365 heads[n] = True
2365 heads[n] = True
2366 elif ancestors is None:
2366 elif ancestors is None:
2367 # Otherwise, we're trying to discover the heads.
2367 # Otherwise, we're trying to discover the heads.
2368 # Assume this is a head because if it isn't, the next step
2368 # Assume this is a head because if it isn't, the next step
2369 # will eventually remove it.
2369 # will eventually remove it.
2370 heads[n] = True
2370 heads[n] = True
2371 # But, obviously its parents aren't.
2371 # But, obviously its parents aren't.
2372 for p in self.parents(n):
2372 for p in self.parents(n):
2373 heads.pop(p, None)
2373 heads.pop(p, None)
2374 heads = [head for head, flag in heads.items() if flag]
2374 heads = [head for head, flag in heads.items() if flag]
2375 roots = list(roots)
2375 roots = list(roots)
2376 assert orderedout
2376 assert orderedout
2377 assert roots
2377 assert roots
2378 assert heads
2378 assert heads
2379 return (orderedout, roots, heads)
2379 return (orderedout, roots, heads)
2380
2380
2381 def headrevs(self, revs=None):
2381 def headrevs(self, revs=None):
2382 if revs is None:
2382 if revs is None:
2383 try:
2383 try:
2384 return self.index.headrevs()
2384 return self.index.headrevs()
2385 except AttributeError:
2385 except AttributeError:
2386 return self._headrevs()
2386 return self._headrevs()
2387 if rustdagop is not None and self.index.rust_ext_compat:
2387 if rustdagop is not None and self.index.rust_ext_compat:
2388 return rustdagop.headrevs(self.index, revs)
2388 return rustdagop.headrevs(self.index, revs)
2389 return dagop.headrevs(revs, self._uncheckedparentrevs)
2389 return dagop.headrevs(revs, self._uncheckedparentrevs)
2390
2390
2391 def headrevsdiff(self, start, stop):
2391 def headrevsdiff(self, start, stop):
2392 try:
2392 try:
2393 return self.index.headrevsdiff(start, stop)
2393 return self.index.headrevsdiff(start, stop)
2394 except AttributeError:
2394 except AttributeError:
2395 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2395 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2396
2396
2397 def computephases(self, roots):
2397 def computephases(self, roots):
2398 return self.index.computephasesmapsets(roots)
2398 return self.index.computephasesmapsets(roots)
2399
2399
2400 def _headrevs(self):
2400 def _headrevs(self):
2401 count = len(self)
2401 count = len(self)
2402 if not count:
2402 if not count:
2403 return [nullrev]
2403 return [nullrev]
2404 # we won't iter over filtered rev so nobody is a head at start
2404 # we won't iter over filtered rev so nobody is a head at start
2405 ishead = [0] * (count + 1)
2405 ishead = [0] * (count + 1)
2406 index = self.index
2406 index = self.index
2407 for r in self:
2407 for r in self:
2408 ishead[r] = 1 # I may be an head
2408 ishead[r] = 1 # I may be an head
2409 e = index[r]
2409 e = index[r]
2410 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2410 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2411 return [r for r, val in enumerate(ishead) if val]
2411 return [r for r, val in enumerate(ishead) if val]
2412
2412
2413 def _head_node_ids(self):
2413 def _head_node_ids(self):
2414 try:
2414 try:
2415 return self.index.head_node_ids()
2415 return self.index.head_node_ids()
2416 except AttributeError:
2416 except AttributeError:
2417 return [self.node(r) for r in self.headrevs()]
2417 return [self.node(r) for r in self.headrevs()]
2418
2418
2419 def heads(self, start=None, stop=None):
2419 def heads(self, start=None, stop=None):
2420 """return the list of all nodes that have no children
2420 """return the list of all nodes that have no children
2421
2421
2422 if start is specified, only heads that are descendants of
2422 if start is specified, only heads that are descendants of
2423 start will be returned
2423 start will be returned
2424 if stop is specified, it will consider all the revs from stop
2424 if stop is specified, it will consider all the revs from stop
2425 as if they had no children
2425 as if they had no children
2426 """
2426 """
2427 if start is None and stop is None:
2427 if start is None and stop is None:
2428 if not len(self):
2428 if not len(self):
2429 return [self.nullid]
2429 return [self.nullid]
2430 return self._head_node_ids()
2430 return self._head_node_ids()
2431 if start is None:
2431 if start is None:
2432 start = nullrev
2432 start = nullrev
2433 else:
2433 else:
2434 start = self.rev(start)
2434 start = self.rev(start)
2435
2435
2436 stoprevs = {self.rev(n) for n in stop or []}
2436 stoprevs = {self.rev(n) for n in stop or []}
2437
2437
2438 revs = dagop.headrevssubset(
2438 revs = dagop.headrevssubset(
2439 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2439 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2440 )
2440 )
2441
2441
2442 return [self.node(rev) for rev in revs]
2442 return [self.node(rev) for rev in revs]
2443
2443
2444 def diffheads(self, start, stop):
2444 def diffheads(self, start, stop):
2445 """return the nodes that make up the difference between
2445 """return the nodes that make up the difference between
2446 heads of revs before `start` and heads of revs before `stop`"""
2446 heads of revs before `start` and heads of revs before `stop`"""
2447 removed, added = self.headrevsdiff(start, stop)
2447 removed, added = self.headrevsdiff(start, stop)
2448 return [self.node(r) for r in removed], [self.node(r) for r in added]
2448 return [self.node(r) for r in removed], [self.node(r) for r in added]
2449
2449
2450 def children(self, node):
2450 def children(self, node):
2451 """find the children of a given node"""
2451 """find the children of a given node"""
2452 c = []
2452 c = []
2453 p = self.rev(node)
2453 p = self.rev(node)
2454 for r in self.revs(start=p + 1):
2454 for r in self.revs(start=p + 1):
2455 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2455 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2456 if prevs:
2456 if prevs:
2457 for pr in prevs:
2457 for pr in prevs:
2458 if pr == p:
2458 if pr == p:
2459 c.append(self.node(r))
2459 c.append(self.node(r))
2460 elif p == nullrev:
2460 elif p == nullrev:
2461 c.append(self.node(r))
2461 c.append(self.node(r))
2462 return c
2462 return c
2463
2463
2464 def commonancestorsheads(self, a, b):
2464 def commonancestorsheads(self, a, b):
2465 """calculate all the heads of the common ancestors of nodes a and b"""
2465 """calculate all the heads of the common ancestors of nodes a and b"""
2466 a, b = self.rev(a), self.rev(b)
2466 a, b = self.rev(a), self.rev(b)
2467 ancs = self._commonancestorsheads(a, b)
2467 ancs = self._commonancestorsheads(a, b)
2468 return pycompat.maplist(self.node, ancs)
2468 return pycompat.maplist(self.node, ancs)
2469
2469
2470 def _commonancestorsheads(self, *revs):
2470 def _commonancestorsheads(self, *revs):
2471 """calculate all the heads of the common ancestors of revs"""
2471 """calculate all the heads of the common ancestors of revs"""
2472 try:
2472 try:
2473 ancs = self.index.commonancestorsheads(*revs)
2473 ancs = self.index.commonancestorsheads(*revs)
2474 except (AttributeError, OverflowError): # C implementation failed
2474 except (AttributeError, OverflowError): # C implementation failed
2475 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2475 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2476 return ancs
2476 return ancs
2477
2477
2478 def isancestor(self, a, b):
2478 def isancestor(self, a, b):
2479 """return True if node a is an ancestor of node b
2479 """return True if node a is an ancestor of node b
2480
2480
2481 A revision is considered an ancestor of itself."""
2481 A revision is considered an ancestor of itself."""
2482 a, b = self.rev(a), self.rev(b)
2482 a, b = self.rev(a), self.rev(b)
2483 return self.isancestorrev(a, b)
2483 return self.isancestorrev(a, b)
2484
2484
2485 def isancestorrev(self, a, b):
2485 def isancestorrev(self, a, b):
2486 """return True if revision a is an ancestor of revision b
2486 """return True if revision a is an ancestor of revision b
2487
2487
2488 A revision is considered an ancestor of itself.
2488 A revision is considered an ancestor of itself.
2489
2489
2490 The implementation of this is trivial but the use of
2490 The implementation of this is trivial but the use of
2491 reachableroots is not."""
2491 reachableroots is not."""
2492 if a == nullrev:
2492 if a == nullrev:
2493 return True
2493 return True
2494 elif a == b:
2494 elif a == b:
2495 return True
2495 return True
2496 elif a > b:
2496 elif a > b:
2497 return False
2497 return False
2498 return bool(self.reachableroots(a, [b], [a], includepath=False))
2498 return bool(self.reachableroots(a, [b], [a], includepath=False))
2499
2499
2500 def reachableroots(self, minroot, heads, roots, includepath=False):
2500 def reachableroots(self, minroot, heads, roots, includepath=False):
2501 """return (heads(::(<roots> and <roots>::<heads>)))
2501 """return (heads(::(<roots> and <roots>::<heads>)))
2502
2502
2503 If includepath is True, return (<roots>::<heads>)."""
2503 If includepath is True, return (<roots>::<heads>)."""
2504 try:
2504 try:
2505 return self.index.reachableroots2(
2505 return self.index.reachableroots2(
2506 minroot, heads, roots, includepath
2506 minroot, heads, roots, includepath
2507 )
2507 )
2508 except AttributeError:
2508 except AttributeError:
2509 return dagop._reachablerootspure(
2509 return dagop._reachablerootspure(
2510 self.parentrevs, minroot, roots, heads, includepath
2510 self.parentrevs, minroot, roots, heads, includepath
2511 )
2511 )
2512
2512
2513 def ancestor(self, a, b):
2513 def ancestor(self, a, b):
2514 """calculate the "best" common ancestor of nodes a and b"""
2514 """calculate the "best" common ancestor of nodes a and b"""
2515
2515
2516 a, b = self.rev(a), self.rev(b)
2516 a, b = self.rev(a), self.rev(b)
2517 try:
2517 try:
2518 ancs = self.index.ancestors(a, b)
2518 ancs = self.index.ancestors(a, b)
2519 except (AttributeError, OverflowError):
2519 except (AttributeError, OverflowError):
2520 ancs = ancestor.ancestors(self.parentrevs, a, b)
2520 ancs = ancestor.ancestors(self.parentrevs, a, b)
2521 if ancs:
2521 if ancs:
2522 # choose a consistent winner when there's a tie
2522 # choose a consistent winner when there's a tie
2523 return min(map(self.node, ancs))
2523 return min(map(self.node, ancs))
2524 return self.nullid
2524 return self.nullid
2525
2525
2526 def _match(self, id):
2526 def _match(self, id):
2527 if isinstance(id, int):
2527 if isinstance(id, int):
2528 # rev
2528 # rev
2529 return self.node(id)
2529 return self.node(id)
2530 if len(id) == self.nodeconstants.nodelen:
2530 if len(id) == self.nodeconstants.nodelen:
2531 # possibly a binary node
2531 # possibly a binary node
2532 # odds of a binary node being all hex in ASCII are 1 in 10**25
2532 # odds of a binary node being all hex in ASCII are 1 in 10**25
2533 try:
2533 try:
2534 node = id
2534 node = id
2535 self.rev(node) # quick search the index
2535 self.rev(node) # quick search the index
2536 return node
2536 return node
2537 except error.LookupError:
2537 except error.LookupError:
2538 pass # may be partial hex id
2538 pass # may be partial hex id
2539 try:
2539 try:
2540 # str(rev)
2540 # str(rev)
2541 rev = int(id)
2541 rev = int(id)
2542 if b"%d" % rev != id:
2542 if b"%d" % rev != id:
2543 raise ValueError
2543 raise ValueError
2544 if rev < 0:
2544 if rev < 0:
2545 rev = len(self) + rev
2545 rev = len(self) + rev
2546 if rev < 0 or rev >= len(self):
2546 if rev < 0 or rev >= len(self):
2547 raise ValueError
2547 raise ValueError
2548 return self.node(rev)
2548 return self.node(rev)
2549 except (ValueError, OverflowError):
2549 except (ValueError, OverflowError):
2550 pass
2550 pass
2551 if len(id) == 2 * self.nodeconstants.nodelen:
2551 if len(id) == 2 * self.nodeconstants.nodelen:
2552 try:
2552 try:
2553 # a full hex nodeid?
2553 # a full hex nodeid?
2554 node = bin(id)
2554 node = bin(id)
2555 self.rev(node)
2555 self.rev(node)
2556 return node
2556 return node
2557 except (binascii.Error, error.LookupError):
2557 except (binascii.Error, error.LookupError):
2558 pass
2558 pass
2559
2559
2560 def _partialmatch(self, id):
2560 def _partialmatch(self, id):
2561 # we don't care wdirfilenodeids as they should be always full hash
2561 # we don't care wdirfilenodeids as they should be always full hash
2562 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2562 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2563 ambiguous = False
2563 ambiguous = False
2564 try:
2564 try:
2565 partial = self.index.partialmatch(id)
2565 partial = self.index.partialmatch(id)
2566 if partial and self.hasnode(partial):
2566 if partial and self.hasnode(partial):
2567 if maybewdir:
2567 if maybewdir:
2568 # single 'ff...' match in radix tree, ambiguous with wdir
2568 # single 'ff...' match in radix tree, ambiguous with wdir
2569 ambiguous = True
2569 ambiguous = True
2570 else:
2570 else:
2571 return partial
2571 return partial
2572 elif maybewdir:
2572 elif maybewdir:
2573 # no 'ff...' match in radix tree, wdir identified
2573 # no 'ff...' match in radix tree, wdir identified
2574 raise error.WdirUnsupported
2574 raise error.WdirUnsupported
2575 else:
2575 else:
2576 return None
2576 return None
2577 except error.RevlogError:
2577 except error.RevlogError:
2578 # parsers.c radix tree lookup gave multiple matches
2578 # parsers.c radix tree lookup gave multiple matches
2579 # fast path: for unfiltered changelog, radix tree is accurate
2579 # fast path: for unfiltered changelog, radix tree is accurate
2580 if not getattr(self, 'filteredrevs', None):
2580 if not getattr(self, 'filteredrevs', None):
2581 ambiguous = True
2581 ambiguous = True
2582 # fall through to slow path that filters hidden revisions
2582 # fall through to slow path that filters hidden revisions
2583 except (AttributeError, ValueError):
2583 except (AttributeError, ValueError):
2584 # we are pure python, or key is not hex
2584 # we are pure python, or key is not hex
2585 pass
2585 pass
2586 if ambiguous:
2586 if ambiguous:
2587 raise error.AmbiguousPrefixLookupError(
2587 raise error.AmbiguousPrefixLookupError(
2588 id, self.display_id, _(b'ambiguous identifier')
2588 id, self.display_id, _(b'ambiguous identifier')
2589 )
2589 )
2590
2590
2591 if id in self._pcache:
2591 if id in self._pcache:
2592 return self._pcache[id]
2592 return self._pcache[id]
2593
2593
2594 if len(id) <= 40:
2594 if len(id) <= 40:
2595 # hex(node)[:...]
2595 # hex(node)[:...]
2596 l = len(id) // 2 * 2 # grab an even number of digits
2596 l = len(id) // 2 * 2 # grab an even number of digits
2597 try:
2597 try:
2598 # we're dropping the last digit, so let's check that it's hex,
2598 # we're dropping the last digit, so let's check that it's hex,
2599 # to avoid the expensive computation below if it's not
2599 # to avoid the expensive computation below if it's not
2600 if len(id) % 2 > 0:
2600 if len(id) % 2 > 0:
2601 if not (id[-1] in hexdigits):
2601 if not (id[-1] in hexdigits):
2602 return None
2602 return None
2603 prefix = bin(id[:l])
2603 prefix = bin(id[:l])
2604 except binascii.Error:
2604 except binascii.Error:
2605 pass
2605 pass
2606 else:
2606 else:
2607 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2607 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2608 nl = [
2608 nl = [
2609 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2609 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2610 ]
2610 ]
2611 if self.nodeconstants.nullhex.startswith(id):
2611 if self.nodeconstants.nullhex.startswith(id):
2612 nl.append(self.nullid)
2612 nl.append(self.nullid)
2613 if len(nl) > 0:
2613 if len(nl) > 0:
2614 if len(nl) == 1 and not maybewdir:
2614 if len(nl) == 1 and not maybewdir:
2615 self._pcache[id] = nl[0]
2615 self._pcache[id] = nl[0]
2616 return nl[0]
2616 return nl[0]
2617 raise error.AmbiguousPrefixLookupError(
2617 raise error.AmbiguousPrefixLookupError(
2618 id, self.display_id, _(b'ambiguous identifier')
2618 id, self.display_id, _(b'ambiguous identifier')
2619 )
2619 )
2620 if maybewdir:
2620 if maybewdir:
2621 raise error.WdirUnsupported
2621 raise error.WdirUnsupported
2622 return None
2622 return None
2623
2623
2624 def lookup(self, id):
2624 def lookup(self, id):
2625 """locate a node based on:
2625 """locate a node based on:
2626 - revision number or str(revision number)
2626 - revision number or str(revision number)
2627 - nodeid or subset of hex nodeid
2627 - nodeid or subset of hex nodeid
2628 """
2628 """
2629 n = self._match(id)
2629 n = self._match(id)
2630 if n is not None:
2630 if n is not None:
2631 return n
2631 return n
2632 n = self._partialmatch(id)
2632 n = self._partialmatch(id)
2633 if n:
2633 if n:
2634 return n
2634 return n
2635
2635
2636 raise error.LookupError(id, self.display_id, _(b'no match found'))
2636 raise error.LookupError(id, self.display_id, _(b'no match found'))
2637
2637
2638 def shortest(self, node, minlength=1):
2638 def shortest(self, node, minlength=1):
2639 """Find the shortest unambiguous prefix that matches node."""
2639 """Find the shortest unambiguous prefix that matches node."""
2640
2640
2641 def isvalid(prefix):
2641 def isvalid(prefix):
2642 try:
2642 try:
2643 matchednode = self._partialmatch(prefix)
2643 matchednode = self._partialmatch(prefix)
2644 except error.AmbiguousPrefixLookupError:
2644 except error.AmbiguousPrefixLookupError:
2645 return False
2645 return False
2646 except error.WdirUnsupported:
2646 except error.WdirUnsupported:
2647 # single 'ff...' match
2647 # single 'ff...' match
2648 return True
2648 return True
2649 if matchednode is None:
2649 if matchednode is None:
2650 raise error.LookupError(node, self.display_id, _(b'no node'))
2650 raise error.LookupError(node, self.display_id, _(b'no node'))
2651 return True
2651 return True
2652
2652
2653 def maybewdir(prefix):
2653 def maybewdir(prefix):
2654 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2654 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2655
2655
2656 hexnode = hex(node)
2656 hexnode = hex(node)
2657
2657
2658 def disambiguate(hexnode, minlength):
2658 def disambiguate(hexnode, minlength):
2659 """Disambiguate against wdirid."""
2659 """Disambiguate against wdirid."""
2660 for length in range(minlength, len(hexnode) + 1):
2660 for length in range(minlength, len(hexnode) + 1):
2661 prefix = hexnode[:length]
2661 prefix = hexnode[:length]
2662 if not maybewdir(prefix):
2662 if not maybewdir(prefix):
2663 return prefix
2663 return prefix
2664
2664
2665 if not getattr(self, 'filteredrevs', None):
2665 if not getattr(self, 'filteredrevs', None):
2666 try:
2666 try:
2667 length = max(self.index.shortest(node), minlength)
2667 length = max(self.index.shortest(node), minlength)
2668 return disambiguate(hexnode, length)
2668 return disambiguate(hexnode, length)
2669 except error.RevlogError:
2669 except error.RevlogError:
2670 if node != self.nodeconstants.wdirid:
2670 if node != self.nodeconstants.wdirid:
2671 raise error.LookupError(
2671 raise error.LookupError(
2672 node, self.display_id, _(b'no node')
2672 node, self.display_id, _(b'no node')
2673 )
2673 )
2674 except AttributeError:
2674 except AttributeError:
2675 # Fall through to pure code
2675 # Fall through to pure code
2676 pass
2676 pass
2677
2677
2678 if node == self.nodeconstants.wdirid:
2678 if node == self.nodeconstants.wdirid:
2679 for length in range(minlength, len(hexnode) + 1):
2679 for length in range(minlength, len(hexnode) + 1):
2680 prefix = hexnode[:length]
2680 prefix = hexnode[:length]
2681 if isvalid(prefix):
2681 if isvalid(prefix):
2682 return prefix
2682 return prefix
2683
2683
2684 for length in range(minlength, len(hexnode) + 1):
2684 for length in range(minlength, len(hexnode) + 1):
2685 prefix = hexnode[:length]
2685 prefix = hexnode[:length]
2686 if isvalid(prefix):
2686 if isvalid(prefix):
2687 return disambiguate(hexnode, length)
2687 return disambiguate(hexnode, length)
2688
2688
2689 def cmp(self, node, text):
2689 def cmp(self, node, text):
2690 """compare text with a given file revision
2690 """compare text with a given file revision
2691
2691
2692 returns True if text is different than what is stored.
2692 returns True if text is different than what is stored.
2693 """
2693 """
2694 p1, p2 = self.parents(node)
2694 p1, p2 = self.parents(node)
2695 return storageutil.hashrevisionsha1(text, p1, p2) != node
2695 return storageutil.hashrevisionsha1(text, p1, p2) != node
2696
2696
2697 def deltaparent(self, rev):
2697 def deltaparent(self, rev):
2698 """return deltaparent of the given revision"""
2698 """return deltaparent of the given revision"""
2699 base = self.index[rev][3]
2699 base = self.index[rev][3]
2700 if base == rev:
2700 if base == rev:
2701 return nullrev
2701 return nullrev
2702 elif self.delta_config.general_delta:
2702 elif self.delta_config.general_delta:
2703 return base
2703 return base
2704 else:
2704 else:
2705 return rev - 1
2705 return rev - 1
2706
2706
2707 def issnapshot(self, rev):
2707 def issnapshot(self, rev):
2708 """tells whether rev is a snapshot"""
2708 """tells whether rev is a snapshot"""
2709 ret = self._inner.issnapshot(rev)
2709 ret = self._inner.issnapshot(rev)
2710 self.issnapshot = self._inner.issnapshot
2710 self.issnapshot = self._inner.issnapshot
2711 return ret
2711 return ret
2712
2712
2713 def snapshotdepth(self, rev):
2713 def snapshotdepth(self, rev):
2714 """number of snapshot in the chain before this one"""
2714 """number of snapshot in the chain before this one"""
2715 if not self.issnapshot(rev):
2715 if not self.issnapshot(rev):
2716 raise error.ProgrammingError(b'revision %d not a snapshot')
2716 raise error.ProgrammingError(b'revision %d not a snapshot')
2717 return len(self._inner._deltachain(rev)[0]) - 1
2717 return len(self._inner._deltachain(rev)[0]) - 1
2718
2718
2719 def revdiff(self, rev1, rev2):
2719 def revdiff(self, rev1, rev2):
2720 """return or calculate a delta between two revisions
2720 """return or calculate a delta between two revisions
2721
2721
2722 The delta calculated is in binary form and is intended to be written to
2722 The delta calculated is in binary form and is intended to be written to
2723 revlog data directly. So this function needs raw revision data.
2723 revlog data directly. So this function needs raw revision data.
2724 """
2724 """
2725 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2725 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2726 return bytes(self._inner._chunk(rev2))
2726 return bytes(self._inner._chunk(rev2))
2727
2727
2728 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2728 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2729
2729
2730 def revision(self, nodeorrev):
2730 def revision(self, nodeorrev):
2731 """return an uncompressed revision of a given node or revision
2731 """return an uncompressed revision of a given node or revision
2732 number.
2732 number.
2733 """
2733 """
2734 return self._revisiondata(nodeorrev)
2734 return self._revisiondata(nodeorrev)
2735
2735
2736 def sidedata(self, nodeorrev):
2736 def sidedata(self, nodeorrev):
2737 """a map of extra data related to the changeset but not part of the hash
2737 """a map of extra data related to the changeset but not part of the hash
2738
2738
2739 This function currently return a dictionary. However, more advanced
2739 This function currently return a dictionary. However, more advanced
2740 mapping object will likely be used in the future for a more
2740 mapping object will likely be used in the future for a more
2741 efficient/lazy code.
2741 efficient/lazy code.
2742 """
2742 """
2743 # deal with <nodeorrev> argument type
2743 # deal with <nodeorrev> argument type
2744 if isinstance(nodeorrev, int):
2744 if isinstance(nodeorrev, int):
2745 rev = nodeorrev
2745 rev = nodeorrev
2746 else:
2746 else:
2747 rev = self.rev(nodeorrev)
2747 rev = self.rev(nodeorrev)
2748 return self._sidedata(rev)
2748 return self._sidedata(rev)
2749
2749
2750 def _rawtext(self, node, rev):
2750 def _rawtext(self, node, rev):
2751 """return the possibly unvalidated rawtext for a revision
2751 """return the possibly unvalidated rawtext for a revision
2752
2752
2753 returns (rev, rawtext, validated)
2753 returns (rev, rawtext, validated)
2754 """
2754 """
2755 # Check if we have the entry in cache
2755 # Check if we have the entry in cache
2756 # The cache entry looks like (node, rev, rawtext)
2756 # The cache entry looks like (node, rev, rawtext)
2757 if self._inner._revisioncache:
2757 if self._inner._revisioncache:
2758 if self._inner._revisioncache[0] == node:
2758 if self._inner._revisioncache[0] == node:
2759 return (rev, self._inner._revisioncache[2], True)
2759 return (rev, self._inner._revisioncache[2], True)
2760
2760
2761 if rev is None:
2761 if rev is None:
2762 rev = self.rev(node)
2762 rev = self.rev(node)
2763
2763
2764 text = self._inner.raw_text(node, rev)
2764 text = self._inner.raw_text(node, rev)
2765 return (rev, text, False)
2765 return (rev, text, False)
2766
2766
2767 def _revisiondata(self, nodeorrev, raw=False):
2767 def _revisiondata(self, nodeorrev, raw=False):
2768 # deal with <nodeorrev> argument type
2768 # deal with <nodeorrev> argument type
2769 if isinstance(nodeorrev, int):
2769 if isinstance(nodeorrev, int):
2770 rev = nodeorrev
2770 rev = nodeorrev
2771 node = self.node(rev)
2771 node = self.node(rev)
2772 else:
2772 else:
2773 node = nodeorrev
2773 node = nodeorrev
2774 rev = None
2774 rev = None
2775
2775
2776 # fast path the special `nullid` rev
2776 # fast path the special `nullid` rev
2777 if node == self.nullid:
2777 if node == self.nullid:
2778 return b""
2778 return b""
2779
2779
2780 # ``rawtext`` is the text as stored inside the revlog. Might be the
2780 # ``rawtext`` is the text as stored inside the revlog. Might be the
2781 # revision or might need to be processed to retrieve the revision.
2781 # revision or might need to be processed to retrieve the revision.
2782 rev, rawtext, validated = self._rawtext(node, rev)
2782 rev, rawtext, validated = self._rawtext(node, rev)
2783
2783
2784 if raw and validated:
2784 if raw and validated:
2785 # if we don't want to process the raw text and that raw
2785 # if we don't want to process the raw text and that raw
2786 # text is cached, we can exit early.
2786 # text is cached, we can exit early.
2787 return rawtext
2787 return rawtext
2788 if rev is None:
2788 if rev is None:
2789 rev = self.rev(node)
2789 rev = self.rev(node)
2790 # the revlog's flag for this revision
2790 # the revlog's flag for this revision
2791 # (usually alter its state or content)
2791 # (usually alter its state or content)
2792 flags = self.flags(rev)
2792 flags = self.flags(rev)
2793
2793
2794 if validated and flags == REVIDX_DEFAULT_FLAGS:
2794 if validated and flags == REVIDX_DEFAULT_FLAGS:
2795 # no extra flags set, no flag processor runs, text = rawtext
2795 # no extra flags set, no flag processor runs, text = rawtext
2796 return rawtext
2796 return rawtext
2797
2797
2798 if raw:
2798 if raw:
2799 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2799 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2800 text = rawtext
2800 text = rawtext
2801 else:
2801 else:
2802 r = flagutil.processflagsread(self, rawtext, flags)
2802 r = flagutil.processflagsread(self, rawtext, flags)
2803 text, validatehash = r
2803 text, validatehash = r
2804 if validatehash:
2804 if validatehash:
2805 self.checkhash(text, node, rev=rev)
2805 self.checkhash(text, node, rev=rev)
2806 if not validated:
2806 if not validated:
2807 self._inner._revisioncache = (node, rev, rawtext)
2807 self._inner._revisioncache = (node, rev, rawtext)
2808
2808
2809 return text
2809 return text
2810
2810
2811 def _sidedata(self, rev):
2811 def _sidedata(self, rev):
2812 """Return the sidedata for a given revision number."""
2812 """Return the sidedata for a given revision number."""
2813 if self._sidedatafile is None:
2813 if self._sidedatafile is None:
2814 return {}
2814 return {}
2815 sidedata_end = None
2815 sidedata_end = None
2816 if self._docket is not None:
2816 if self._docket is not None:
2817 sidedata_end = self._docket.sidedata_end
2817 sidedata_end = self._docket.sidedata_end
2818 return self._inner.sidedata(rev, sidedata_end)
2818 return self._inner.sidedata(rev, sidedata_end)
2819
2819
2820 def rawdata(self, nodeorrev):
2820 def rawdata(self, nodeorrev):
2821 """return an uncompressed raw data of a given node or revision number."""
2821 """return an uncompressed raw data of a given node or revision number."""
2822 return self._revisiondata(nodeorrev, raw=True)
2822 return self._revisiondata(nodeorrev, raw=True)
2823
2823
2824 def hash(self, text, p1, p2):
2824 def hash(self, text, p1, p2):
2825 """Compute a node hash.
2825 """Compute a node hash.
2826
2826
2827 Available as a function so that subclasses can replace the hash
2827 Available as a function so that subclasses can replace the hash
2828 as needed.
2828 as needed.
2829 """
2829 """
2830 return storageutil.hashrevisionsha1(text, p1, p2)
2830 return storageutil.hashrevisionsha1(text, p1, p2)
2831
2831
2832 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2832 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2833 """Check node hash integrity.
2833 """Check node hash integrity.
2834
2834
2835 Available as a function so that subclasses can extend hash mismatch
2835 Available as a function so that subclasses can extend hash mismatch
2836 behaviors as needed.
2836 behaviors as needed.
2837 """
2837 """
2838 try:
2838 try:
2839 if p1 is None and p2 is None:
2839 if p1 is None and p2 is None:
2840 p1, p2 = self.parents(node)
2840 p1, p2 = self.parents(node)
2841 if node != self.hash(text, p1, p2):
2841 if node != self.hash(text, p1, p2):
2842 # Clear the revision cache on hash failure. The revision cache
2842 # Clear the revision cache on hash failure. The revision cache
2843 # only stores the raw revision and clearing the cache does have
2843 # only stores the raw revision and clearing the cache does have
2844 # the side-effect that we won't have a cache hit when the raw
2844 # the side-effect that we won't have a cache hit when the raw
2845 # revision data is accessed. But this case should be rare and
2845 # revision data is accessed. But this case should be rare and
2846 # it is extra work to teach the cache about the hash
2846 # it is extra work to teach the cache about the hash
2847 # verification state.
2847 # verification state.
2848 if (
2848 if (
2849 self._inner._revisioncache
2849 self._inner._revisioncache
2850 and self._inner._revisioncache[0] == node
2850 and self._inner._revisioncache[0] == node
2851 ):
2851 ):
2852 self._inner._revisioncache = None
2852 self._inner._revisioncache = None
2853
2853
2854 revornode = rev
2854 revornode = rev
2855 if revornode is None:
2855 if revornode is None:
2856 revornode = templatefilters.short(hex(node))
2856 revornode = templatefilters.short(hex(node))
2857 raise error.RevlogError(
2857 raise error.RevlogError(
2858 _(b"integrity check failed on %s:%s")
2858 _(b"integrity check failed on %s:%s")
2859 % (self.display_id, pycompat.bytestr(revornode))
2859 % (self.display_id, pycompat.bytestr(revornode))
2860 )
2860 )
2861 except error.RevlogError:
2861 except error.RevlogError:
2862 if self.feature_config.censorable and storageutil.iscensoredtext(
2862 if self.feature_config.censorable and storageutil.iscensoredtext(
2863 text
2863 text
2864 ):
2864 ):
2865 raise error.CensoredNodeError(self.display_id, node, text)
2865 raise error.CensoredNodeError(self.display_id, node, text)
2866 raise
2866 raise
2867
2867
2868 @property
2868 @property
2869 def _split_index_file(self):
2869 def _split_index_file(self):
2870 """the path where to expect the index of an ongoing splitting operation
2870 """the path where to expect the index of an ongoing splitting operation
2871
2871
2872 The file will only exist if a splitting operation is in progress, but
2872 The file will only exist if a splitting operation is in progress, but
2873 it is always expected at the same location."""
2873 it is always expected at the same location."""
2874 parts = self.radix.split(b'/')
2874 parts = self.radix.split(b'/')
2875 if len(parts) > 1:
2875 if len(parts) > 1:
2876 # adds a '-s' prefix to the ``data/` or `meta/` base
2876 # adds a '-s' prefix to the ``data/` or `meta/` base
2877 head = parts[0] + b'-s'
2877 head = parts[0] + b'-s'
2878 mids = parts[1:-1]
2878 mids = parts[1:-1]
2879 tail = parts[-1] + b'.i'
2879 tail = parts[-1] + b'.i'
2880 pieces = [head] + mids + [tail]
2880 pieces = [head] + mids + [tail]
2881 return b'/'.join(pieces)
2881 return b'/'.join(pieces)
2882 else:
2882 else:
2883 # the revlog is stored at the root of the store (changelog or
2883 # the revlog is stored at the root of the store (changelog or
2884 # manifest), no risk of collision.
2884 # manifest), no risk of collision.
2885 return self.radix + b'.i.s'
2885 return self.radix + b'.i.s'
2886
2886
2887 def _enforceinlinesize(self, tr):
2887 def _enforceinlinesize(self, tr):
2888 """Check if the revlog is too big for inline and convert if so.
2888 """Check if the revlog is too big for inline and convert if so.
2889
2889
2890 This should be called after revisions are added to the revlog. If the
2890 This should be called after revisions are added to the revlog. If the
2891 revlog has grown too large to be an inline revlog, it will convert it
2891 revlog has grown too large to be an inline revlog, it will convert it
2892 to use multiple index and data files.
2892 to use multiple index and data files.
2893 """
2893 """
2894 tiprev = len(self) - 1
2894 tiprev = len(self) - 1
2895 total_size = self.start(tiprev) + self.length(tiprev)
2895 total_size = self.start(tiprev) + self.length(tiprev)
2896 if not self._inline or (self._may_inline and total_size < _maxinline):
2896 if not self._inline or (self._may_inline and total_size < _maxinline):
2897 return
2897 return
2898
2898
2899 if self._docket is not None:
2899 if self._docket is not None:
2900 msg = b"inline revlog should not have a docket"
2900 msg = b"inline revlog should not have a docket"
2901 raise error.ProgrammingError(msg)
2901 raise error.ProgrammingError(msg)
2902
2902
2903 # In the common case, we enforce inline size because the revlog has
2903 # In the common case, we enforce inline size because the revlog has
2904 # been appened too. And in such case, it must have an initial offset
2904 # been appened too. And in such case, it must have an initial offset
2905 # recorded in the transaction.
2905 # recorded in the transaction.
2906 troffset = tr.findoffset(self._inner.canonical_index_file)
2906 troffset = tr.findoffset(self._inner.canonical_index_file)
2907 pre_touched = troffset is not None
2907 pre_touched = troffset is not None
2908 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2908 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2909 raise error.RevlogError(
2909 raise error.RevlogError(
2910 _(b"%s not found in the transaction") % self._indexfile
2910 _(b"%s not found in the transaction") % self._indexfile
2911 )
2911 )
2912
2912
2913 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2913 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2914 tr.add(self._datafile, 0)
2914 tr.add(self._datafile, 0)
2915
2915
2916 new_index_file_path = None
2916 new_index_file_path = None
2917 old_index_file_path = self._indexfile
2917 old_index_file_path = self._indexfile
2918 new_index_file_path = self._split_index_file
2918 new_index_file_path = self._split_index_file
2919 opener = self.opener
2919 opener = self.opener
2920 weak_self = weakref.ref(self)
2920 weak_self = weakref.ref(self)
2921
2921
2922 # the "split" index replace the real index when the transaction is
2922 # the "split" index replace the real index when the transaction is
2923 # finalized
2923 # finalized
2924 def finalize_callback(tr):
2924 def finalize_callback(tr):
2925 opener.rename(
2925 opener.rename(
2926 new_index_file_path,
2926 new_index_file_path,
2927 old_index_file_path,
2927 old_index_file_path,
2928 checkambig=True,
2928 checkambig=True,
2929 )
2929 )
2930 maybe_self = weak_self()
2930 maybe_self = weak_self()
2931 if maybe_self is not None:
2931 if maybe_self is not None:
2932 maybe_self._indexfile = old_index_file_path
2932 maybe_self._indexfile = old_index_file_path
2933 maybe_self._inner.index_file = maybe_self._indexfile
2933 maybe_self._inner.index_file = maybe_self._indexfile
2934
2934
2935 def abort_callback(tr):
2935 def abort_callback(tr):
2936 maybe_self = weak_self()
2936 maybe_self = weak_self()
2937 if maybe_self is not None:
2937 if maybe_self is not None:
2938 maybe_self._indexfile = old_index_file_path
2938 maybe_self._indexfile = old_index_file_path
2939 maybe_self._inner.inline = True
2939 maybe_self._inner.inline = True
2940 maybe_self._inner.index_file = old_index_file_path
2940 maybe_self._inner.index_file = old_index_file_path
2941
2941
2942 tr.registertmp(new_index_file_path)
2942 tr.registertmp(new_index_file_path)
2943 # we use 001 here to make this this happens after the finalisation of
2943 # we use 001 here to make this this happens after the finalisation of
2944 # pending changelog write (using 000). Otherwise the two finalizer
2944 # pending changelog write (using 000). Otherwise the two finalizer
2945 # would step over each other and delete the changelog.i file.
2945 # would step over each other and delete the changelog.i file.
2946 if self.target[1] is not None:
2946 if self.target[1] is not None:
2947 callback_id = b'001-revlog-split-%d-%s' % self.target
2947 callback_id = b'001-revlog-split-%d-%s' % self.target
2948 else:
2948 else:
2949 callback_id = b'001-revlog-split-%d' % self.target[0]
2949 callback_id = b'001-revlog-split-%d' % self.target[0]
2950 tr.addfinalize(callback_id, finalize_callback)
2950 tr.addfinalize(callback_id, finalize_callback)
2951 tr.addabort(callback_id, abort_callback)
2951 tr.addabort(callback_id, abort_callback)
2952
2952
2953 self._format_flags &= ~FLAG_INLINE_DATA
2953 self._format_flags &= ~FLAG_INLINE_DATA
2954 self._inner.split_inline(
2954 self._inner.split_inline(
2955 tr,
2955 tr,
2956 self._format_flags | self._format_version,
2956 self._format_flags | self._format_version,
2957 new_index_file_path=new_index_file_path,
2957 new_index_file_path=new_index_file_path,
2958 )
2958 )
2959
2959
2960 self._inline = False
2960 self._inline = False
2961 if new_index_file_path is not None:
2961 if new_index_file_path is not None:
2962 self._indexfile = new_index_file_path
2962 self._indexfile = new_index_file_path
2963
2963
2964 nodemaputil.setup_persistent_nodemap(tr, self)
2964 nodemaputil.setup_persistent_nodemap(tr, self)
2965
2965
2966 def _nodeduplicatecallback(self, transaction, node):
2966 def _nodeduplicatecallback(self, transaction, node):
2967 """called when trying to add a node already stored."""
2967 """called when trying to add a node already stored."""
2968
2968
2969 @contextlib.contextmanager
2969 @contextlib.contextmanager
2970 def reading(self):
2970 def reading(self):
2971 with self._inner.reading():
2971 with self._inner.reading():
2972 yield
2972 yield
2973
2973
2974 @contextlib.contextmanager
2974 @contextlib.contextmanager
2975 def _writing(self, transaction):
2975 def _writing(self, transaction):
2976 if self._trypending:
2976 if self._trypending:
2977 msg = b'try to write in a `trypending` revlog: %s'
2977 msg = b'try to write in a `trypending` revlog: %s'
2978 msg %= self.display_id
2978 msg %= self.display_id
2979 raise error.ProgrammingError(msg)
2979 raise error.ProgrammingError(msg)
2980 if self._inner.is_writing:
2980 if self._inner.is_writing:
2981 yield
2981 yield
2982 else:
2982 else:
2983 data_end = None
2983 data_end = None
2984 sidedata_end = None
2984 sidedata_end = None
2985 if self._docket is not None:
2985 if self._docket is not None:
2986 data_end = self._docket.data_end
2986 data_end = self._docket.data_end
2987 sidedata_end = self._docket.sidedata_end
2987 sidedata_end = self._docket.sidedata_end
2988 with self._inner.writing(
2988 with self._inner.writing(
2989 transaction,
2989 transaction,
2990 data_end=data_end,
2990 data_end=data_end,
2991 sidedata_end=sidedata_end,
2991 sidedata_end=sidedata_end,
2992 ):
2992 ):
2993 yield
2993 yield
2994 if self._docket is not None:
2994 if self._docket is not None:
2995 self._write_docket(transaction)
2995 self._write_docket(transaction)
2996
2996
2997 @property
2997 @property
2998 def is_delaying(self):
2998 def is_delaying(self):
2999 return self._inner.is_delaying
2999 return self._inner.is_delaying
3000
3000
3001 def _write_docket(self, transaction):
3001 def _write_docket(self, transaction):
3002 """write the current docket on disk
3002 """write the current docket on disk
3003
3003
3004 Exist as a method to help changelog to implement transaction logic
3004 Exist as a method to help changelog to implement transaction logic
3005
3005
3006 We could also imagine using the same transaction logic for all revlog
3006 We could also imagine using the same transaction logic for all revlog
3007 since docket are cheap."""
3007 since docket are cheap."""
3008 self._docket.write(transaction)
3008 self._docket.write(transaction)
3009
3009
3010 def addrevision(
3010 def addrevision(
3011 self,
3011 self,
3012 text,
3012 text,
3013 transaction,
3013 transaction,
3014 link,
3014 link,
3015 p1,
3015 p1,
3016 p2,
3016 p2,
3017 cachedelta=None,
3017 cachedelta=None,
3018 node=None,
3018 node=None,
3019 flags=REVIDX_DEFAULT_FLAGS,
3019 flags=REVIDX_DEFAULT_FLAGS,
3020 deltacomputer=None,
3020 deltacomputer=None,
3021 sidedata=None,
3021 sidedata=None,
3022 ):
3022 ):
3023 """add a revision to the log
3023 """add a revision to the log
3024
3024
3025 text - the revision data to add
3025 text - the revision data to add
3026 transaction - the transaction object used for rollback
3026 transaction - the transaction object used for rollback
3027 link - the linkrev data to add
3027 link - the linkrev data to add
3028 p1, p2 - the parent nodeids of the revision
3028 p1, p2 - the parent nodeids of the revision
3029 cachedelta - an optional precomputed delta
3029 cachedelta - an optional precomputed delta
3030 node - nodeid of revision; typically node is not specified, and it is
3030 node - nodeid of revision; typically node is not specified, and it is
3031 computed by default as hash(text, p1, p2), however subclasses might
3031 computed by default as hash(text, p1, p2), however subclasses might
3032 use different hashing method (and override checkhash() in such case)
3032 use different hashing method (and override checkhash() in such case)
3033 flags - the known flags to set on the revision
3033 flags - the known flags to set on the revision
3034 deltacomputer - an optional deltacomputer instance shared between
3034 deltacomputer - an optional deltacomputer instance shared between
3035 multiple calls
3035 multiple calls
3036 """
3036 """
3037 if link == nullrev:
3037 if link == nullrev:
3038 raise error.RevlogError(
3038 raise error.RevlogError(
3039 _(b"attempted to add linkrev -1 to %s") % self.display_id
3039 _(b"attempted to add linkrev -1 to %s") % self.display_id
3040 )
3040 )
3041
3041
3042 if sidedata is None:
3042 if sidedata is None:
3043 sidedata = {}
3043 sidedata = {}
3044 elif sidedata and not self.feature_config.has_side_data:
3044 elif sidedata and not self.feature_config.has_side_data:
3045 raise error.ProgrammingError(
3045 raise error.ProgrammingError(
3046 _(b"trying to add sidedata to a revlog who don't support them")
3046 _(b"trying to add sidedata to a revlog who don't support them")
3047 )
3047 )
3048
3048
3049 if flags:
3049 if flags:
3050 node = node or self.hash(text, p1, p2)
3050 node = node or self.hash(text, p1, p2)
3051
3051
3052 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3052 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3053
3053
3054 # If the flag processor modifies the revision data, ignore any provided
3054 # If the flag processor modifies the revision data, ignore any provided
3055 # cachedelta.
3055 # cachedelta.
3056 if rawtext != text:
3056 if rawtext != text:
3057 cachedelta = None
3057 cachedelta = None
3058
3058
3059 if len(rawtext) > _maxentrysize:
3059 if len(rawtext) > _maxentrysize:
3060 raise error.RevlogError(
3060 raise error.RevlogError(
3061 _(
3061 _(
3062 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3062 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3063 )
3063 )
3064 % (self.display_id, len(rawtext))
3064 % (self.display_id, len(rawtext))
3065 )
3065 )
3066
3066
3067 node = node or self.hash(rawtext, p1, p2)
3067 node = node or self.hash(rawtext, p1, p2)
3068 rev = self.index.get_rev(node)
3068 rev = self.index.get_rev(node)
3069 if rev is not None:
3069 if rev is not None:
3070 return rev
3070 return rev
3071
3071
3072 if validatehash:
3072 if validatehash:
3073 self.checkhash(rawtext, node, p1=p1, p2=p2)
3073 self.checkhash(rawtext, node, p1=p1, p2=p2)
3074
3074
3075 return self.addrawrevision(
3075 return self.addrawrevision(
3076 rawtext,
3076 rawtext,
3077 transaction,
3077 transaction,
3078 link,
3078 link,
3079 p1,
3079 p1,
3080 p2,
3080 p2,
3081 node,
3081 node,
3082 flags,
3082 flags,
3083 cachedelta=cachedelta,
3083 cachedelta=cachedelta,
3084 deltacomputer=deltacomputer,
3084 deltacomputer=deltacomputer,
3085 sidedata=sidedata,
3085 sidedata=sidedata,
3086 )
3086 )
3087
3087
3088 def addrawrevision(
3088 def addrawrevision(
3089 self,
3089 self,
3090 rawtext,
3090 rawtext,
3091 transaction,
3091 transaction,
3092 link,
3092 link,
3093 p1,
3093 p1,
3094 p2,
3094 p2,
3095 node,
3095 node,
3096 flags,
3096 flags,
3097 cachedelta=None,
3097 cachedelta=None,
3098 deltacomputer=None,
3098 deltacomputer=None,
3099 sidedata=None,
3099 sidedata=None,
3100 ):
3100 ):
3101 """add a raw revision with known flags, node and parents
3101 """add a raw revision with known flags, node and parents
3102 useful when reusing a revision not stored in this revlog (ex: received
3102 useful when reusing a revision not stored in this revlog (ex: received
3103 over wire, or read from an external bundle).
3103 over wire, or read from an external bundle).
3104 """
3104 """
3105 with self._writing(transaction):
3105 with self._writing(transaction):
3106 return self._addrevision(
3106 return self._addrevision(
3107 node,
3107 node,
3108 rawtext,
3108 rawtext,
3109 transaction,
3109 transaction,
3110 link,
3110 link,
3111 p1,
3111 p1,
3112 p2,
3112 p2,
3113 flags,
3113 flags,
3114 cachedelta,
3114 cachedelta,
3115 deltacomputer=deltacomputer,
3115 deltacomputer=deltacomputer,
3116 sidedata=sidedata,
3116 sidedata=sidedata,
3117 )
3117 )
3118
3118
3119 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
3119 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
3120 return self._inner.compress(data)
3120 return self._inner.compress(data)
3121
3121
3122 def decompress(self, data):
3122 def decompress(self, data):
3123 return self._inner.decompress(data)
3123 return self._inner.decompress(data)
3124
3124
3125 def _addrevision(
3125 def _addrevision(
3126 self,
3126 self,
3127 node,
3127 node,
3128 rawtext,
3128 rawtext,
3129 transaction,
3129 transaction,
3130 link,
3130 link,
3131 p1,
3131 p1,
3132 p2,
3132 p2,
3133 flags,
3133 flags,
3134 cachedelta,
3134 cachedelta,
3135 alwayscache=False,
3135 alwayscache=False,
3136 deltacomputer=None,
3136 deltacomputer=None,
3137 sidedata=None,
3137 sidedata=None,
3138 ):
3138 ):
3139 """internal function to add revisions to the log
3139 """internal function to add revisions to the log
3140
3140
3141 see addrevision for argument descriptions.
3141 see addrevision for argument descriptions.
3142
3142
3143 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3143 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3144
3144
3145 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3145 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3146 be used.
3146 be used.
3147
3147
3148 invariants:
3148 invariants:
3149 - rawtext is optional (can be None); if not set, cachedelta must be set.
3149 - rawtext is optional (can be None); if not set, cachedelta must be set.
3150 if both are set, they must correspond to each other.
3150 if both are set, they must correspond to each other.
3151 """
3151 """
3152 if node == self.nullid:
3152 if node == self.nullid:
3153 raise error.RevlogError(
3153 raise error.RevlogError(
3154 _(b"%s: attempt to add null revision") % self.display_id
3154 _(b"%s: attempt to add null revision") % self.display_id
3155 )
3155 )
3156 if (
3156 if (
3157 node == self.nodeconstants.wdirid
3157 node == self.nodeconstants.wdirid
3158 or node in self.nodeconstants.wdirfilenodeids
3158 or node in self.nodeconstants.wdirfilenodeids
3159 ):
3159 ):
3160 raise error.RevlogError(
3160 raise error.RevlogError(
3161 _(b"%s: attempt to add wdir revision") % self.display_id
3161 _(b"%s: attempt to add wdir revision") % self.display_id
3162 )
3162 )
3163 if self._inner._writinghandles is None:
3163 if not self._inner.is_writing:
3164 msg = b'adding revision outside `revlog._writing` context'
3164 msg = b'adding revision outside `revlog._writing` context'
3165 raise error.ProgrammingError(msg)
3165 raise error.ProgrammingError(msg)
3166
3166
3167 btext = [rawtext]
3167 btext = [rawtext]
3168
3168
3169 curr = len(self)
3169 curr = len(self)
3170 prev = curr - 1
3170 prev = curr - 1
3171
3171
3172 offset = self._get_data_offset(prev)
3172 offset = self._get_data_offset(prev)
3173
3173
3174 if self._concurrencychecker:
3174 if self._concurrencychecker:
3175 ifh, dfh, sdfh = self._inner._writinghandles
3175 ifh, dfh, sdfh = self._inner._writinghandles
3176 # XXX no checking for the sidedata file
3176 # XXX no checking for the sidedata file
3177 if self._inline:
3177 if self._inline:
3178 # offset is "as if" it were in the .d file, so we need to add on
3178 # offset is "as if" it were in the .d file, so we need to add on
3179 # the size of the entry metadata.
3179 # the size of the entry metadata.
3180 self._concurrencychecker(
3180 self._concurrencychecker(
3181 ifh, self._indexfile, offset + curr * self.index.entry_size
3181 ifh, self._indexfile, offset + curr * self.index.entry_size
3182 )
3182 )
3183 else:
3183 else:
3184 # Entries in the .i are a consistent size.
3184 # Entries in the .i are a consistent size.
3185 self._concurrencychecker(
3185 self._concurrencychecker(
3186 ifh, self._indexfile, curr * self.index.entry_size
3186 ifh, self._indexfile, curr * self.index.entry_size
3187 )
3187 )
3188 self._concurrencychecker(dfh, self._datafile, offset)
3188 self._concurrencychecker(dfh, self._datafile, offset)
3189
3189
3190 p1r, p2r = self.rev(p1), self.rev(p2)
3190 p1r, p2r = self.rev(p1), self.rev(p2)
3191
3191
3192 # full versions are inserted when the needed deltas
3192 # full versions are inserted when the needed deltas
3193 # become comparable to the uncompressed text
3193 # become comparable to the uncompressed text
3194 if rawtext is None:
3194 if rawtext is None:
3195 # need rawtext size, before changed by flag processors, which is
3195 # need rawtext size, before changed by flag processors, which is
3196 # the non-raw size. use revlog explicitly to avoid filelog's extra
3196 # the non-raw size. use revlog explicitly to avoid filelog's extra
3197 # logic that might remove metadata size.
3197 # logic that might remove metadata size.
3198 textlen = mdiff.patchedsize(
3198 textlen = mdiff.patchedsize(
3199 revlog.size(self, cachedelta[0]), cachedelta[1]
3199 revlog.size(self, cachedelta[0]), cachedelta[1]
3200 )
3200 )
3201 else:
3201 else:
3202 textlen = len(rawtext)
3202 textlen = len(rawtext)
3203
3203
3204 if deltacomputer is None:
3204 if deltacomputer is None:
3205 write_debug = None
3205 write_debug = None
3206 if self.delta_config.debug_delta:
3206 if self.delta_config.debug_delta:
3207 write_debug = transaction._report
3207 write_debug = transaction._report
3208 deltacomputer = deltautil.deltacomputer(
3208 deltacomputer = deltautil.deltacomputer(
3209 self, write_debug=write_debug
3209 self, write_debug=write_debug
3210 )
3210 )
3211
3211
3212 if cachedelta is not None and len(cachedelta) == 2:
3212 if cachedelta is not None and len(cachedelta) == 2:
3213 # If the cached delta has no information about how it should be
3213 # If the cached delta has no information about how it should be
3214 # reused, add the default reuse instruction according to the
3214 # reused, add the default reuse instruction according to the
3215 # revlog's configuration.
3215 # revlog's configuration.
3216 if (
3216 if (
3217 self.delta_config.general_delta
3217 self.delta_config.general_delta
3218 and self.delta_config.lazy_delta_base
3218 and self.delta_config.lazy_delta_base
3219 ):
3219 ):
3220 delta_base_reuse = DELTA_BASE_REUSE_TRY
3220 delta_base_reuse = DELTA_BASE_REUSE_TRY
3221 else:
3221 else:
3222 delta_base_reuse = DELTA_BASE_REUSE_NO
3222 delta_base_reuse = DELTA_BASE_REUSE_NO
3223 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3223 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3224
3224
3225 revinfo = revlogutils.revisioninfo(
3225 revinfo = revlogutils.revisioninfo(
3226 node,
3226 node,
3227 p1,
3227 p1,
3228 p2,
3228 p2,
3229 btext,
3229 btext,
3230 textlen,
3230 textlen,
3231 cachedelta,
3231 cachedelta,
3232 flags,
3232 flags,
3233 )
3233 )
3234
3234
3235 deltainfo = deltacomputer.finddeltainfo(revinfo)
3235 deltainfo = deltacomputer.finddeltainfo(revinfo)
3236
3236
3237 compression_mode = COMP_MODE_INLINE
3237 compression_mode = COMP_MODE_INLINE
3238 if self._docket is not None:
3238 if self._docket is not None:
3239 default_comp = self._docket.default_compression_header
3239 default_comp = self._docket.default_compression_header
3240 r = deltautil.delta_compression(default_comp, deltainfo)
3240 r = deltautil.delta_compression(default_comp, deltainfo)
3241 compression_mode, deltainfo = r
3241 compression_mode, deltainfo = r
3242
3242
3243 sidedata_compression_mode = COMP_MODE_INLINE
3243 sidedata_compression_mode = COMP_MODE_INLINE
3244 if sidedata and self.feature_config.has_side_data:
3244 if sidedata and self.feature_config.has_side_data:
3245 sidedata_compression_mode = COMP_MODE_PLAIN
3245 sidedata_compression_mode = COMP_MODE_PLAIN
3246 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3246 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3247 sidedata_offset = self._docket.sidedata_end
3247 sidedata_offset = self._docket.sidedata_end
3248 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3248 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3249 if (
3249 if (
3250 h != b'u'
3250 h != b'u'
3251 and comp_sidedata[0:1] != b'\0'
3251 and comp_sidedata[0:1] != b'\0'
3252 and len(comp_sidedata) < len(serialized_sidedata)
3252 and len(comp_sidedata) < len(serialized_sidedata)
3253 ):
3253 ):
3254 assert not h
3254 assert not h
3255 if (
3255 if (
3256 comp_sidedata[0:1]
3256 comp_sidedata[0:1]
3257 == self._docket.default_compression_header
3257 == self._docket.default_compression_header
3258 ):
3258 ):
3259 sidedata_compression_mode = COMP_MODE_DEFAULT
3259 sidedata_compression_mode = COMP_MODE_DEFAULT
3260 serialized_sidedata = comp_sidedata
3260 serialized_sidedata = comp_sidedata
3261 else:
3261 else:
3262 sidedata_compression_mode = COMP_MODE_INLINE
3262 sidedata_compression_mode = COMP_MODE_INLINE
3263 serialized_sidedata = comp_sidedata
3263 serialized_sidedata = comp_sidedata
3264 else:
3264 else:
3265 serialized_sidedata = b""
3265 serialized_sidedata = b""
3266 # Don't store the offset if the sidedata is empty, that way
3266 # Don't store the offset if the sidedata is empty, that way
3267 # we can easily detect empty sidedata and they will be no different
3267 # we can easily detect empty sidedata and they will be no different
3268 # than ones we manually add.
3268 # than ones we manually add.
3269 sidedata_offset = 0
3269 sidedata_offset = 0
3270
3270
3271 rank = RANK_UNKNOWN
3271 rank = RANK_UNKNOWN
3272 if self.feature_config.compute_rank:
3272 if self.feature_config.compute_rank:
3273 if (p1r, p2r) == (nullrev, nullrev):
3273 if (p1r, p2r) == (nullrev, nullrev):
3274 rank = 1
3274 rank = 1
3275 elif p1r != nullrev and p2r == nullrev:
3275 elif p1r != nullrev and p2r == nullrev:
3276 rank = 1 + self.fast_rank(p1r)
3276 rank = 1 + self.fast_rank(p1r)
3277 elif p1r == nullrev and p2r != nullrev:
3277 elif p1r == nullrev and p2r != nullrev:
3278 rank = 1 + self.fast_rank(p2r)
3278 rank = 1 + self.fast_rank(p2r)
3279 else: # merge node
3279 else: # merge node
3280 if rustdagop is not None and self.index.rust_ext_compat:
3280 if rustdagop is not None and self.index.rust_ext_compat:
3281 rank = rustdagop.rank(self.index, p1r, p2r)
3281 rank = rustdagop.rank(self.index, p1r, p2r)
3282 else:
3282 else:
3283 pmin, pmax = sorted((p1r, p2r))
3283 pmin, pmax = sorted((p1r, p2r))
3284 rank = 1 + self.fast_rank(pmax)
3284 rank = 1 + self.fast_rank(pmax)
3285 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3285 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3286
3286
3287 e = revlogutils.entry(
3287 e = revlogutils.entry(
3288 flags=flags,
3288 flags=flags,
3289 data_offset=offset,
3289 data_offset=offset,
3290 data_compressed_length=deltainfo.deltalen,
3290 data_compressed_length=deltainfo.deltalen,
3291 data_uncompressed_length=textlen,
3291 data_uncompressed_length=textlen,
3292 data_compression_mode=compression_mode,
3292 data_compression_mode=compression_mode,
3293 data_delta_base=deltainfo.base,
3293 data_delta_base=deltainfo.base,
3294 link_rev=link,
3294 link_rev=link,
3295 parent_rev_1=p1r,
3295 parent_rev_1=p1r,
3296 parent_rev_2=p2r,
3296 parent_rev_2=p2r,
3297 node_id=node,
3297 node_id=node,
3298 sidedata_offset=sidedata_offset,
3298 sidedata_offset=sidedata_offset,
3299 sidedata_compressed_length=len(serialized_sidedata),
3299 sidedata_compressed_length=len(serialized_sidedata),
3300 sidedata_compression_mode=sidedata_compression_mode,
3300 sidedata_compression_mode=sidedata_compression_mode,
3301 rank=rank,
3301 rank=rank,
3302 )
3302 )
3303
3303
3304 self.index.append(e)
3304 self.index.append(e)
3305 entry = self.index.entry_binary(curr)
3305 entry = self.index.entry_binary(curr)
3306 if curr == 0 and self._docket is None:
3306 if curr == 0 and self._docket is None:
3307 header = self._format_flags | self._format_version
3307 header = self._format_flags | self._format_version
3308 header = self.index.pack_header(header)
3308 header = self.index.pack_header(header)
3309 entry = header + entry
3309 entry = header + entry
3310 self._writeentry(
3310 self._writeentry(
3311 transaction,
3311 transaction,
3312 entry,
3312 entry,
3313 deltainfo.data,
3313 deltainfo.data,
3314 link,
3314 link,
3315 offset,
3315 offset,
3316 serialized_sidedata,
3316 serialized_sidedata,
3317 sidedata_offset,
3317 sidedata_offset,
3318 )
3318 )
3319
3319
3320 rawtext = btext[0]
3320 rawtext = btext[0]
3321
3321
3322 if alwayscache and rawtext is None:
3322 if alwayscache and rawtext is None:
3323 rawtext = deltacomputer.buildtext(revinfo)
3323 rawtext = deltacomputer.buildtext(revinfo)
3324
3324
3325 if type(rawtext) == bytes: # only accept immutable objects
3325 if type(rawtext) == bytes: # only accept immutable objects
3326 self._inner._revisioncache = (node, curr, rawtext)
3326 self._inner._revisioncache = (node, curr, rawtext)
3327 self._chainbasecache[curr] = deltainfo.chainbase
3327 self._chainbasecache[curr] = deltainfo.chainbase
3328 return curr
3328 return curr
3329
3329
3330 def _get_data_offset(self, prev):
3330 def _get_data_offset(self, prev):
3331 """Returns the current offset in the (in-transaction) data file.
3331 """Returns the current offset in the (in-transaction) data file.
3332 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3332 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3333 file to store that information: since sidedata can be rewritten to the
3333 file to store that information: since sidedata can be rewritten to the
3334 end of the data file within a transaction, you can have cases where, for
3334 end of the data file within a transaction, you can have cases where, for
3335 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3335 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3336 to `n - 1`'s sidedata being written after `n`'s data.
3336 to `n - 1`'s sidedata being written after `n`'s data.
3337
3337
3338 TODO cache this in a docket file before getting out of experimental."""
3338 TODO cache this in a docket file before getting out of experimental."""
3339 if self._docket is None:
3339 if self._docket is None:
3340 return self.end(prev)
3340 return self.end(prev)
3341 else:
3341 else:
3342 return self._docket.data_end
3342 return self._docket.data_end
3343
3343
3344 def _writeentry(
3344 def _writeentry(
3345 self,
3345 self,
3346 transaction,
3346 transaction,
3347 entry,
3347 entry,
3348 data,
3348 data,
3349 link,
3349 link,
3350 offset,
3350 offset,
3351 sidedata,
3351 sidedata,
3352 sidedata_offset,
3352 sidedata_offset,
3353 ):
3353 ):
3354 # Files opened in a+ mode have inconsistent behavior on various
3354 # Files opened in a+ mode have inconsistent behavior on various
3355 # platforms. Windows requires that a file positioning call be made
3355 # platforms. Windows requires that a file positioning call be made
3356 # when the file handle transitions between reads and writes. See
3356 # when the file handle transitions between reads and writes. See
3357 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3357 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3358 # platforms, Python or the platform itself can be buggy. Some versions
3358 # platforms, Python or the platform itself can be buggy. Some versions
3359 # of Solaris have been observed to not append at the end of the file
3359 # of Solaris have been observed to not append at the end of the file
3360 # if the file was seeked to before the end. See issue4943 for more.
3360 # if the file was seeked to before the end. See issue4943 for more.
3361 #
3361 #
3362 # We work around this issue by inserting a seek() before writing.
3362 # We work around this issue by inserting a seek() before writing.
3363 # Note: This is likely not necessary on Python 3. However, because
3363 # Note: This is likely not necessary on Python 3. However, because
3364 # the file handle is reused for reads and may be seeked there, we need
3364 # the file handle is reused for reads and may be seeked there, we need
3365 # to be careful before changing this.
3365 # to be careful before changing this.
3366 index_end = data_end = sidedata_end = None
3366 index_end = data_end = sidedata_end = None
3367 if self._docket is not None:
3367 if self._docket is not None:
3368 index_end = self._docket.index_end
3368 index_end = self._docket.index_end
3369 data_end = self._docket.data_end
3369 data_end = self._docket.data_end
3370 sidedata_end = self._docket.sidedata_end
3370 sidedata_end = self._docket.sidedata_end
3371
3371
3372 files_end = self._inner.write_entry(
3372 files_end = self._inner.write_entry(
3373 transaction,
3373 transaction,
3374 entry,
3374 entry,
3375 data,
3375 data,
3376 link,
3376 link,
3377 offset,
3377 offset,
3378 sidedata,
3378 sidedata,
3379 sidedata_offset,
3379 sidedata_offset,
3380 index_end,
3380 index_end,
3381 data_end,
3381 data_end,
3382 sidedata_end,
3382 sidedata_end,
3383 )
3383 )
3384 self._enforceinlinesize(transaction)
3384 self._enforceinlinesize(transaction)
3385 if self._docket is not None:
3385 if self._docket is not None:
3386 self._docket.index_end = files_end[0]
3386 self._docket.index_end = files_end[0]
3387 self._docket.data_end = files_end[1]
3387 self._docket.data_end = files_end[1]
3388 self._docket.sidedata_end = files_end[2]
3388 self._docket.sidedata_end = files_end[2]
3389
3389
3390 nodemaputil.setup_persistent_nodemap(transaction, self)
3390 nodemaputil.setup_persistent_nodemap(transaction, self)
3391
3391
3392 def addgroup(
3392 def addgroup(
3393 self,
3393 self,
3394 deltas,
3394 deltas,
3395 linkmapper,
3395 linkmapper,
3396 transaction,
3396 transaction,
3397 alwayscache=False,
3397 alwayscache=False,
3398 addrevisioncb=None,
3398 addrevisioncb=None,
3399 duplicaterevisioncb=None,
3399 duplicaterevisioncb=None,
3400 debug_info=None,
3400 debug_info=None,
3401 delta_base_reuse_policy=None,
3401 delta_base_reuse_policy=None,
3402 ):
3402 ):
3403 """
3403 """
3404 add a delta group
3404 add a delta group
3405
3405
3406 given a set of deltas, add them to the revision log. the
3406 given a set of deltas, add them to the revision log. the
3407 first delta is against its parent, which should be in our
3407 first delta is against its parent, which should be in our
3408 log, the rest are against the previous delta.
3408 log, the rest are against the previous delta.
3409
3409
3410 If ``addrevisioncb`` is defined, it will be called with arguments of
3410 If ``addrevisioncb`` is defined, it will be called with arguments of
3411 this revlog and the node that was added.
3411 this revlog and the node that was added.
3412 """
3412 """
3413
3413
3414 if self._adding_group:
3414 if self._adding_group:
3415 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3415 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3416
3416
3417 # read the default delta-base reuse policy from revlog config if the
3417 # read the default delta-base reuse policy from revlog config if the
3418 # group did not specify one.
3418 # group did not specify one.
3419 if delta_base_reuse_policy is None:
3419 if delta_base_reuse_policy is None:
3420 if (
3420 if (
3421 self.delta_config.general_delta
3421 self.delta_config.general_delta
3422 and self.delta_config.lazy_delta_base
3422 and self.delta_config.lazy_delta_base
3423 ):
3423 ):
3424 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3424 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3425 else:
3425 else:
3426 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3426 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3427
3427
3428 self._adding_group = True
3428 self._adding_group = True
3429 empty = True
3429 empty = True
3430 try:
3430 try:
3431 with self._writing(transaction):
3431 with self._writing(transaction):
3432 write_debug = None
3432 write_debug = None
3433 if self.delta_config.debug_delta:
3433 if self.delta_config.debug_delta:
3434 write_debug = transaction._report
3434 write_debug = transaction._report
3435 deltacomputer = deltautil.deltacomputer(
3435 deltacomputer = deltautil.deltacomputer(
3436 self,
3436 self,
3437 write_debug=write_debug,
3437 write_debug=write_debug,
3438 debug_info=debug_info,
3438 debug_info=debug_info,
3439 )
3439 )
3440 # loop through our set of deltas
3440 # loop through our set of deltas
3441 for data in deltas:
3441 for data in deltas:
3442 (
3442 (
3443 node,
3443 node,
3444 p1,
3444 p1,
3445 p2,
3445 p2,
3446 linknode,
3446 linknode,
3447 deltabase,
3447 deltabase,
3448 delta,
3448 delta,
3449 flags,
3449 flags,
3450 sidedata,
3450 sidedata,
3451 ) = data
3451 ) = data
3452 link = linkmapper(linknode)
3452 link = linkmapper(linknode)
3453 flags = flags or REVIDX_DEFAULT_FLAGS
3453 flags = flags or REVIDX_DEFAULT_FLAGS
3454
3454
3455 rev = self.index.get_rev(node)
3455 rev = self.index.get_rev(node)
3456 if rev is not None:
3456 if rev is not None:
3457 # this can happen if two branches make the same change
3457 # this can happen if two branches make the same change
3458 self._nodeduplicatecallback(transaction, rev)
3458 self._nodeduplicatecallback(transaction, rev)
3459 if duplicaterevisioncb:
3459 if duplicaterevisioncb:
3460 duplicaterevisioncb(self, rev)
3460 duplicaterevisioncb(self, rev)
3461 empty = False
3461 empty = False
3462 continue
3462 continue
3463
3463
3464 for p in (p1, p2):
3464 for p in (p1, p2):
3465 if not self.index.has_node(p):
3465 if not self.index.has_node(p):
3466 raise error.LookupError(
3466 raise error.LookupError(
3467 p, self.radix, _(b'unknown parent')
3467 p, self.radix, _(b'unknown parent')
3468 )
3468 )
3469
3469
3470 if not self.index.has_node(deltabase):
3470 if not self.index.has_node(deltabase):
3471 raise error.LookupError(
3471 raise error.LookupError(
3472 deltabase, self.display_id, _(b'unknown delta base')
3472 deltabase, self.display_id, _(b'unknown delta base')
3473 )
3473 )
3474
3474
3475 baserev = self.rev(deltabase)
3475 baserev = self.rev(deltabase)
3476
3476
3477 if baserev != nullrev and self.iscensored(baserev):
3477 if baserev != nullrev and self.iscensored(baserev):
3478 # if base is censored, delta must be full replacement in a
3478 # if base is censored, delta must be full replacement in a
3479 # single patch operation
3479 # single patch operation
3480 hlen = struct.calcsize(b">lll")
3480 hlen = struct.calcsize(b">lll")
3481 oldlen = self.rawsize(baserev)
3481 oldlen = self.rawsize(baserev)
3482 newlen = len(delta) - hlen
3482 newlen = len(delta) - hlen
3483 if delta[:hlen] != mdiff.replacediffheader(
3483 if delta[:hlen] != mdiff.replacediffheader(
3484 oldlen, newlen
3484 oldlen, newlen
3485 ):
3485 ):
3486 raise error.CensoredBaseError(
3486 raise error.CensoredBaseError(
3487 self.display_id, self.node(baserev)
3487 self.display_id, self.node(baserev)
3488 )
3488 )
3489
3489
3490 if not flags and self._peek_iscensored(baserev, delta):
3490 if not flags and self._peek_iscensored(baserev, delta):
3491 flags |= REVIDX_ISCENSORED
3491 flags |= REVIDX_ISCENSORED
3492
3492
3493 # We assume consumers of addrevisioncb will want to retrieve
3493 # We assume consumers of addrevisioncb will want to retrieve
3494 # the added revision, which will require a call to
3494 # the added revision, which will require a call to
3495 # revision(). revision() will fast path if there is a cache
3495 # revision(). revision() will fast path if there is a cache
3496 # hit. So, we tell _addrevision() to always cache in this case.
3496 # hit. So, we tell _addrevision() to always cache in this case.
3497 # We're only using addgroup() in the context of changegroup
3497 # We're only using addgroup() in the context of changegroup
3498 # generation so the revision data can always be handled as raw
3498 # generation so the revision data can always be handled as raw
3499 # by the flagprocessor.
3499 # by the flagprocessor.
3500 rev = self._addrevision(
3500 rev = self._addrevision(
3501 node,
3501 node,
3502 None,
3502 None,
3503 transaction,
3503 transaction,
3504 link,
3504 link,
3505 p1,
3505 p1,
3506 p2,
3506 p2,
3507 flags,
3507 flags,
3508 (baserev, delta, delta_base_reuse_policy),
3508 (baserev, delta, delta_base_reuse_policy),
3509 alwayscache=alwayscache,
3509 alwayscache=alwayscache,
3510 deltacomputer=deltacomputer,
3510 deltacomputer=deltacomputer,
3511 sidedata=sidedata,
3511 sidedata=sidedata,
3512 )
3512 )
3513
3513
3514 if addrevisioncb:
3514 if addrevisioncb:
3515 addrevisioncb(self, rev)
3515 addrevisioncb(self, rev)
3516 empty = False
3516 empty = False
3517 finally:
3517 finally:
3518 self._adding_group = False
3518 self._adding_group = False
3519 return not empty
3519 return not empty
3520
3520
3521 def iscensored(self, rev):
3521 def iscensored(self, rev):
3522 """Check if a file revision is censored."""
3522 """Check if a file revision is censored."""
3523 if not self.feature_config.censorable:
3523 if not self.feature_config.censorable:
3524 return False
3524 return False
3525
3525
3526 return self.flags(rev) & REVIDX_ISCENSORED
3526 return self.flags(rev) & REVIDX_ISCENSORED
3527
3527
3528 def _peek_iscensored(self, baserev, delta):
3528 def _peek_iscensored(self, baserev, delta):
3529 """Quickly check if a delta produces a censored revision."""
3529 """Quickly check if a delta produces a censored revision."""
3530 if not self.feature_config.censorable:
3530 if not self.feature_config.censorable:
3531 return False
3531 return False
3532
3532
3533 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3533 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3534
3534
3535 def getstrippoint(self, minlink):
3535 def getstrippoint(self, minlink):
3536 """find the minimum rev that must be stripped to strip the linkrev
3536 """find the minimum rev that must be stripped to strip the linkrev
3537
3537
3538 Returns a tuple containing the minimum rev and a set of all revs that
3538 Returns a tuple containing the minimum rev and a set of all revs that
3539 have linkrevs that will be broken by this strip.
3539 have linkrevs that will be broken by this strip.
3540 """
3540 """
3541 return storageutil.resolvestripinfo(
3541 return storageutil.resolvestripinfo(
3542 minlink,
3542 minlink,
3543 len(self) - 1,
3543 len(self) - 1,
3544 self.headrevs(),
3544 self.headrevs(),
3545 self.linkrev,
3545 self.linkrev,
3546 self.parentrevs,
3546 self.parentrevs,
3547 )
3547 )
3548
3548
3549 def strip(self, minlink, transaction):
3549 def strip(self, minlink, transaction):
3550 """truncate the revlog on the first revision with a linkrev >= minlink
3550 """truncate the revlog on the first revision with a linkrev >= minlink
3551
3551
3552 This function is called when we're stripping revision minlink and
3552 This function is called when we're stripping revision minlink and
3553 its descendants from the repository.
3553 its descendants from the repository.
3554
3554
3555 We have to remove all revisions with linkrev >= minlink, because
3555 We have to remove all revisions with linkrev >= minlink, because
3556 the equivalent changelog revisions will be renumbered after the
3556 the equivalent changelog revisions will be renumbered after the
3557 strip.
3557 strip.
3558
3558
3559 So we truncate the revlog on the first of these revisions, and
3559 So we truncate the revlog on the first of these revisions, and
3560 trust that the caller has saved the revisions that shouldn't be
3560 trust that the caller has saved the revisions that shouldn't be
3561 removed and that it'll re-add them after this truncation.
3561 removed and that it'll re-add them after this truncation.
3562 """
3562 """
3563 if len(self) == 0:
3563 if len(self) == 0:
3564 return
3564 return
3565
3565
3566 rev, _ = self.getstrippoint(minlink)
3566 rev, _ = self.getstrippoint(minlink)
3567 if rev == len(self):
3567 if rev == len(self):
3568 return
3568 return
3569
3569
3570 # first truncate the files on disk
3570 # first truncate the files on disk
3571 data_end = self.start(rev)
3571 data_end = self.start(rev)
3572 if not self._inline:
3572 if not self._inline:
3573 transaction.add(self._datafile, data_end)
3573 transaction.add(self._datafile, data_end)
3574 end = rev * self.index.entry_size
3574 end = rev * self.index.entry_size
3575 else:
3575 else:
3576 end = data_end + (rev * self.index.entry_size)
3576 end = data_end + (rev * self.index.entry_size)
3577
3577
3578 if self._sidedatafile:
3578 if self._sidedatafile:
3579 sidedata_end = self.sidedata_cut_off(rev)
3579 sidedata_end = self.sidedata_cut_off(rev)
3580 transaction.add(self._sidedatafile, sidedata_end)
3580 transaction.add(self._sidedatafile, sidedata_end)
3581
3581
3582 transaction.add(self._indexfile, end)
3582 transaction.add(self._indexfile, end)
3583 if self._docket is not None:
3583 if self._docket is not None:
3584 # XXX we could, leverage the docket while stripping. However it is
3584 # XXX we could, leverage the docket while stripping. However it is
3585 # not powerfull enough at the time of this comment
3585 # not powerfull enough at the time of this comment
3586 self._docket.index_end = end
3586 self._docket.index_end = end
3587 self._docket.data_end = data_end
3587 self._docket.data_end = data_end
3588 self._docket.sidedata_end = sidedata_end
3588 self._docket.sidedata_end = sidedata_end
3589 self._docket.write(transaction, stripping=True)
3589 self._docket.write(transaction, stripping=True)
3590
3590
3591 # then reset internal state in memory to forget those revisions
3591 # then reset internal state in memory to forget those revisions
3592 self._chaininfocache = util.lrucachedict(500)
3592 self._chaininfocache = util.lrucachedict(500)
3593 self._inner.clear_cache()
3593 self._inner.clear_cache()
3594
3594
3595 del self.index[rev:-1]
3595 del self.index[rev:-1]
3596
3596
3597 def checksize(self):
3597 def checksize(self):
3598 """Check size of index and data files
3598 """Check size of index and data files
3599
3599
3600 return a (dd, di) tuple.
3600 return a (dd, di) tuple.
3601 - dd: extra bytes for the "data" file
3601 - dd: extra bytes for the "data" file
3602 - di: extra bytes for the "index" file
3602 - di: extra bytes for the "index" file
3603
3603
3604 A healthy revlog will return (0, 0).
3604 A healthy revlog will return (0, 0).
3605 """
3605 """
3606 expected = 0
3606 expected = 0
3607 if len(self):
3607 if len(self):
3608 expected = max(0, self.end(len(self) - 1))
3608 expected = max(0, self.end(len(self) - 1))
3609
3609
3610 try:
3610 try:
3611 with self._datafp() as f:
3611 with self._datafp() as f:
3612 f.seek(0, io.SEEK_END)
3612 f.seek(0, io.SEEK_END)
3613 actual = f.tell()
3613 actual = f.tell()
3614 dd = actual - expected
3614 dd = actual - expected
3615 except FileNotFoundError:
3615 except FileNotFoundError:
3616 dd = 0
3616 dd = 0
3617
3617
3618 try:
3618 try:
3619 f = self.opener(self._indexfile)
3619 f = self.opener(self._indexfile)
3620 f.seek(0, io.SEEK_END)
3620 f.seek(0, io.SEEK_END)
3621 actual = f.tell()
3621 actual = f.tell()
3622 f.close()
3622 f.close()
3623 s = self.index.entry_size
3623 s = self.index.entry_size
3624 i = max(0, actual // s)
3624 i = max(0, actual // s)
3625 di = actual - (i * s)
3625 di = actual - (i * s)
3626 if self._inline:
3626 if self._inline:
3627 databytes = 0
3627 databytes = 0
3628 for r in self:
3628 for r in self:
3629 databytes += max(0, self.length(r))
3629 databytes += max(0, self.length(r))
3630 dd = 0
3630 dd = 0
3631 di = actual - len(self) * s - databytes
3631 di = actual - len(self) * s - databytes
3632 except FileNotFoundError:
3632 except FileNotFoundError:
3633 di = 0
3633 di = 0
3634
3634
3635 return (dd, di)
3635 return (dd, di)
3636
3636
3637 def files(self):
3637 def files(self):
3638 """return list of files that compose this revlog"""
3638 """return list of files that compose this revlog"""
3639 res = [self._indexfile]
3639 res = [self._indexfile]
3640 if self._docket_file is None:
3640 if self._docket_file is None:
3641 if not self._inline:
3641 if not self._inline:
3642 res.append(self._datafile)
3642 res.append(self._datafile)
3643 else:
3643 else:
3644 res.append(self._docket_file)
3644 res.append(self._docket_file)
3645 res.extend(self._docket.old_index_filepaths(include_empty=False))
3645 res.extend(self._docket.old_index_filepaths(include_empty=False))
3646 if self._docket.data_end:
3646 if self._docket.data_end:
3647 res.append(self._datafile)
3647 res.append(self._datafile)
3648 res.extend(self._docket.old_data_filepaths(include_empty=False))
3648 res.extend(self._docket.old_data_filepaths(include_empty=False))
3649 if self._docket.sidedata_end:
3649 if self._docket.sidedata_end:
3650 res.append(self._sidedatafile)
3650 res.append(self._sidedatafile)
3651 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3651 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3652 return res
3652 return res
3653
3653
3654 def emitrevisions(
3654 def emitrevisions(
3655 self,
3655 self,
3656 nodes,
3656 nodes,
3657 nodesorder=None,
3657 nodesorder=None,
3658 revisiondata=False,
3658 revisiondata=False,
3659 assumehaveparentrevisions=False,
3659 assumehaveparentrevisions=False,
3660 deltamode=repository.CG_DELTAMODE_STD,
3660 deltamode=repository.CG_DELTAMODE_STD,
3661 sidedata_helpers=None,
3661 sidedata_helpers=None,
3662 debug_info=None,
3662 debug_info=None,
3663 ):
3663 ):
3664 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3664 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3665 raise error.ProgrammingError(
3665 raise error.ProgrammingError(
3666 b'unhandled value for nodesorder: %s' % nodesorder
3666 b'unhandled value for nodesorder: %s' % nodesorder
3667 )
3667 )
3668
3668
3669 if nodesorder is None and not self.delta_config.general_delta:
3669 if nodesorder is None and not self.delta_config.general_delta:
3670 nodesorder = b'storage'
3670 nodesorder = b'storage'
3671
3671
3672 if (
3672 if (
3673 not self._storedeltachains
3673 not self._storedeltachains
3674 and deltamode != repository.CG_DELTAMODE_PREV
3674 and deltamode != repository.CG_DELTAMODE_PREV
3675 ):
3675 ):
3676 deltamode = repository.CG_DELTAMODE_FULL
3676 deltamode = repository.CG_DELTAMODE_FULL
3677
3677
3678 return storageutil.emitrevisions(
3678 return storageutil.emitrevisions(
3679 self,
3679 self,
3680 nodes,
3680 nodes,
3681 nodesorder,
3681 nodesorder,
3682 revlogrevisiondelta,
3682 revlogrevisiondelta,
3683 deltaparentfn=self.deltaparent,
3683 deltaparentfn=self.deltaparent,
3684 candeltafn=self._candelta,
3684 candeltafn=self._candelta,
3685 rawsizefn=self.rawsize,
3685 rawsizefn=self.rawsize,
3686 revdifffn=self.revdiff,
3686 revdifffn=self.revdiff,
3687 flagsfn=self.flags,
3687 flagsfn=self.flags,
3688 deltamode=deltamode,
3688 deltamode=deltamode,
3689 revisiondata=revisiondata,
3689 revisiondata=revisiondata,
3690 assumehaveparentrevisions=assumehaveparentrevisions,
3690 assumehaveparentrevisions=assumehaveparentrevisions,
3691 sidedata_helpers=sidedata_helpers,
3691 sidedata_helpers=sidedata_helpers,
3692 debug_info=debug_info,
3692 debug_info=debug_info,
3693 )
3693 )
3694
3694
3695 DELTAREUSEALWAYS = b'always'
3695 DELTAREUSEALWAYS = b'always'
3696 DELTAREUSESAMEREVS = b'samerevs'
3696 DELTAREUSESAMEREVS = b'samerevs'
3697 DELTAREUSENEVER = b'never'
3697 DELTAREUSENEVER = b'never'
3698
3698
3699 DELTAREUSEFULLADD = b'fulladd'
3699 DELTAREUSEFULLADD = b'fulladd'
3700
3700
3701 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3701 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3702
3702
3703 def clone(
3703 def clone(
3704 self,
3704 self,
3705 tr,
3705 tr,
3706 destrevlog,
3706 destrevlog,
3707 addrevisioncb=None,
3707 addrevisioncb=None,
3708 deltareuse=DELTAREUSESAMEREVS,
3708 deltareuse=DELTAREUSESAMEREVS,
3709 forcedeltabothparents=None,
3709 forcedeltabothparents=None,
3710 sidedata_helpers=None,
3710 sidedata_helpers=None,
3711 ):
3711 ):
3712 """Copy this revlog to another, possibly with format changes.
3712 """Copy this revlog to another, possibly with format changes.
3713
3713
3714 The destination revlog will contain the same revisions and nodes.
3714 The destination revlog will contain the same revisions and nodes.
3715 However, it may not be bit-for-bit identical due to e.g. delta encoding
3715 However, it may not be bit-for-bit identical due to e.g. delta encoding
3716 differences.
3716 differences.
3717
3717
3718 The ``deltareuse`` argument control how deltas from the existing revlog
3718 The ``deltareuse`` argument control how deltas from the existing revlog
3719 are preserved in the destination revlog. The argument can have the
3719 are preserved in the destination revlog. The argument can have the
3720 following values:
3720 following values:
3721
3721
3722 DELTAREUSEALWAYS
3722 DELTAREUSEALWAYS
3723 Deltas will always be reused (if possible), even if the destination
3723 Deltas will always be reused (if possible), even if the destination
3724 revlog would not select the same revisions for the delta. This is the
3724 revlog would not select the same revisions for the delta. This is the
3725 fastest mode of operation.
3725 fastest mode of operation.
3726 DELTAREUSESAMEREVS
3726 DELTAREUSESAMEREVS
3727 Deltas will be reused if the destination revlog would pick the same
3727 Deltas will be reused if the destination revlog would pick the same
3728 revisions for the delta. This mode strikes a balance between speed
3728 revisions for the delta. This mode strikes a balance between speed
3729 and optimization.
3729 and optimization.
3730 DELTAREUSENEVER
3730 DELTAREUSENEVER
3731 Deltas will never be reused. This is the slowest mode of execution.
3731 Deltas will never be reused. This is the slowest mode of execution.
3732 This mode can be used to recompute deltas (e.g. if the diff/delta
3732 This mode can be used to recompute deltas (e.g. if the diff/delta
3733 algorithm changes).
3733 algorithm changes).
3734 DELTAREUSEFULLADD
3734 DELTAREUSEFULLADD
3735 Revision will be re-added as if their were new content. This is
3735 Revision will be re-added as if their were new content. This is
3736 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3736 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3737 eg: large file detection and handling.
3737 eg: large file detection and handling.
3738
3738
3739 Delta computation can be slow, so the choice of delta reuse policy can
3739 Delta computation can be slow, so the choice of delta reuse policy can
3740 significantly affect run time.
3740 significantly affect run time.
3741
3741
3742 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3742 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3743 two extremes. Deltas will be reused if they are appropriate. But if the
3743 two extremes. Deltas will be reused if they are appropriate. But if the
3744 delta could choose a better revision, it will do so. This means if you
3744 delta could choose a better revision, it will do so. This means if you
3745 are converting a non-generaldelta revlog to a generaldelta revlog,
3745 are converting a non-generaldelta revlog to a generaldelta revlog,
3746 deltas will be recomputed if the delta's parent isn't a parent of the
3746 deltas will be recomputed if the delta's parent isn't a parent of the
3747 revision.
3747 revision.
3748
3748
3749 In addition to the delta policy, the ``forcedeltabothparents``
3749 In addition to the delta policy, the ``forcedeltabothparents``
3750 argument controls whether to force compute deltas against both parents
3750 argument controls whether to force compute deltas against both parents
3751 for merges. By default, the current default is used.
3751 for merges. By default, the current default is used.
3752
3752
3753 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3753 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3754 `sidedata_helpers`.
3754 `sidedata_helpers`.
3755 """
3755 """
3756 if deltareuse not in self.DELTAREUSEALL:
3756 if deltareuse not in self.DELTAREUSEALL:
3757 raise ValueError(
3757 raise ValueError(
3758 _(b'value for deltareuse invalid: %s') % deltareuse
3758 _(b'value for deltareuse invalid: %s') % deltareuse
3759 )
3759 )
3760
3760
3761 if len(destrevlog):
3761 if len(destrevlog):
3762 raise ValueError(_(b'destination revlog is not empty'))
3762 raise ValueError(_(b'destination revlog is not empty'))
3763
3763
3764 if getattr(self, 'filteredrevs', None):
3764 if getattr(self, 'filteredrevs', None):
3765 raise ValueError(_(b'source revlog has filtered revisions'))
3765 raise ValueError(_(b'source revlog has filtered revisions'))
3766 if getattr(destrevlog, 'filteredrevs', None):
3766 if getattr(destrevlog, 'filteredrevs', None):
3767 raise ValueError(_(b'destination revlog has filtered revisions'))
3767 raise ValueError(_(b'destination revlog has filtered revisions'))
3768
3768
3769 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3769 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3770 # if possible.
3770 # if possible.
3771 old_delta_config = destrevlog.delta_config
3771 old_delta_config = destrevlog.delta_config
3772 destrevlog.delta_config = destrevlog.delta_config.copy()
3772 destrevlog.delta_config = destrevlog.delta_config.copy()
3773
3773
3774 try:
3774 try:
3775 if deltareuse == self.DELTAREUSEALWAYS:
3775 if deltareuse == self.DELTAREUSEALWAYS:
3776 destrevlog.delta_config.lazy_delta_base = True
3776 destrevlog.delta_config.lazy_delta_base = True
3777 destrevlog.delta_config.lazy_delta = True
3777 destrevlog.delta_config.lazy_delta = True
3778 elif deltareuse == self.DELTAREUSESAMEREVS:
3778 elif deltareuse == self.DELTAREUSESAMEREVS:
3779 destrevlog.delta_config.lazy_delta_base = False
3779 destrevlog.delta_config.lazy_delta_base = False
3780 destrevlog.delta_config.lazy_delta = True
3780 destrevlog.delta_config.lazy_delta = True
3781 elif deltareuse == self.DELTAREUSENEVER:
3781 elif deltareuse == self.DELTAREUSENEVER:
3782 destrevlog.delta_config.lazy_delta_base = False
3782 destrevlog.delta_config.lazy_delta_base = False
3783 destrevlog.delta_config.lazy_delta = False
3783 destrevlog.delta_config.lazy_delta = False
3784
3784
3785 delta_both_parents = (
3785 delta_both_parents = (
3786 forcedeltabothparents or old_delta_config.delta_both_parents
3786 forcedeltabothparents or old_delta_config.delta_both_parents
3787 )
3787 )
3788 destrevlog.delta_config.delta_both_parents = delta_both_parents
3788 destrevlog.delta_config.delta_both_parents = delta_both_parents
3789
3789
3790 with self.reading(), destrevlog._writing(tr):
3790 with self.reading(), destrevlog._writing(tr):
3791 self._clone(
3791 self._clone(
3792 tr,
3792 tr,
3793 destrevlog,
3793 destrevlog,
3794 addrevisioncb,
3794 addrevisioncb,
3795 deltareuse,
3795 deltareuse,
3796 forcedeltabothparents,
3796 forcedeltabothparents,
3797 sidedata_helpers,
3797 sidedata_helpers,
3798 )
3798 )
3799
3799
3800 finally:
3800 finally:
3801 destrevlog.delta_config = old_delta_config
3801 destrevlog.delta_config = old_delta_config
3802
3802
3803 def _clone(
3803 def _clone(
3804 self,
3804 self,
3805 tr,
3805 tr,
3806 destrevlog,
3806 destrevlog,
3807 addrevisioncb,
3807 addrevisioncb,
3808 deltareuse,
3808 deltareuse,
3809 forcedeltabothparents,
3809 forcedeltabothparents,
3810 sidedata_helpers,
3810 sidedata_helpers,
3811 ):
3811 ):
3812 """perform the core duty of `revlog.clone` after parameter processing"""
3812 """perform the core duty of `revlog.clone` after parameter processing"""
3813 write_debug = None
3813 write_debug = None
3814 if self.delta_config.debug_delta:
3814 if self.delta_config.debug_delta:
3815 write_debug = tr._report
3815 write_debug = tr._report
3816 deltacomputer = deltautil.deltacomputer(
3816 deltacomputer = deltautil.deltacomputer(
3817 destrevlog,
3817 destrevlog,
3818 write_debug=write_debug,
3818 write_debug=write_debug,
3819 )
3819 )
3820 index = self.index
3820 index = self.index
3821 for rev in self:
3821 for rev in self:
3822 entry = index[rev]
3822 entry = index[rev]
3823
3823
3824 # Some classes override linkrev to take filtered revs into
3824 # Some classes override linkrev to take filtered revs into
3825 # account. Use raw entry from index.
3825 # account. Use raw entry from index.
3826 flags = entry[0] & 0xFFFF
3826 flags = entry[0] & 0xFFFF
3827 linkrev = entry[4]
3827 linkrev = entry[4]
3828 p1 = index[entry[5]][7]
3828 p1 = index[entry[5]][7]
3829 p2 = index[entry[6]][7]
3829 p2 = index[entry[6]][7]
3830 node = entry[7]
3830 node = entry[7]
3831
3831
3832 # (Possibly) reuse the delta from the revlog if allowed and
3832 # (Possibly) reuse the delta from the revlog if allowed and
3833 # the revlog chunk is a delta.
3833 # the revlog chunk is a delta.
3834 cachedelta = None
3834 cachedelta = None
3835 rawtext = None
3835 rawtext = None
3836 if deltareuse == self.DELTAREUSEFULLADD:
3836 if deltareuse == self.DELTAREUSEFULLADD:
3837 text = self._revisiondata(rev)
3837 text = self._revisiondata(rev)
3838 sidedata = self.sidedata(rev)
3838 sidedata = self.sidedata(rev)
3839
3839
3840 if sidedata_helpers is not None:
3840 if sidedata_helpers is not None:
3841 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3841 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3842 self, sidedata_helpers, sidedata, rev
3842 self, sidedata_helpers, sidedata, rev
3843 )
3843 )
3844 flags = flags | new_flags[0] & ~new_flags[1]
3844 flags = flags | new_flags[0] & ~new_flags[1]
3845
3845
3846 destrevlog.addrevision(
3846 destrevlog.addrevision(
3847 text,
3847 text,
3848 tr,
3848 tr,
3849 linkrev,
3849 linkrev,
3850 p1,
3850 p1,
3851 p2,
3851 p2,
3852 cachedelta=cachedelta,
3852 cachedelta=cachedelta,
3853 node=node,
3853 node=node,
3854 flags=flags,
3854 flags=flags,
3855 deltacomputer=deltacomputer,
3855 deltacomputer=deltacomputer,
3856 sidedata=sidedata,
3856 sidedata=sidedata,
3857 )
3857 )
3858 else:
3858 else:
3859 if destrevlog.delta_config.lazy_delta:
3859 if destrevlog.delta_config.lazy_delta:
3860 dp = self.deltaparent(rev)
3860 dp = self.deltaparent(rev)
3861 if dp != nullrev:
3861 if dp != nullrev:
3862 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3862 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3863
3863
3864 sidedata = None
3864 sidedata = None
3865 if not cachedelta:
3865 if not cachedelta:
3866 try:
3866 try:
3867 rawtext = self._revisiondata(rev)
3867 rawtext = self._revisiondata(rev)
3868 except error.CensoredNodeError as censored:
3868 except error.CensoredNodeError as censored:
3869 assert flags & REVIDX_ISCENSORED
3869 assert flags & REVIDX_ISCENSORED
3870 rawtext = censored.tombstone
3870 rawtext = censored.tombstone
3871 sidedata = self.sidedata(rev)
3871 sidedata = self.sidedata(rev)
3872 if sidedata is None:
3872 if sidedata is None:
3873 sidedata = self.sidedata(rev)
3873 sidedata = self.sidedata(rev)
3874
3874
3875 if sidedata_helpers is not None:
3875 if sidedata_helpers is not None:
3876 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3876 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3877 self, sidedata_helpers, sidedata, rev
3877 self, sidedata_helpers, sidedata, rev
3878 )
3878 )
3879 flags = flags | new_flags[0] & ~new_flags[1]
3879 flags = flags | new_flags[0] & ~new_flags[1]
3880
3880
3881 destrevlog._addrevision(
3881 destrevlog._addrevision(
3882 node,
3882 node,
3883 rawtext,
3883 rawtext,
3884 tr,
3884 tr,
3885 linkrev,
3885 linkrev,
3886 p1,
3886 p1,
3887 p2,
3887 p2,
3888 flags,
3888 flags,
3889 cachedelta,
3889 cachedelta,
3890 deltacomputer=deltacomputer,
3890 deltacomputer=deltacomputer,
3891 sidedata=sidedata,
3891 sidedata=sidedata,
3892 )
3892 )
3893
3893
3894 if addrevisioncb:
3894 if addrevisioncb:
3895 addrevisioncb(self, rev, node)
3895 addrevisioncb(self, rev, node)
3896
3896
3897 def censorrevision(self, tr, censor_nodes, tombstone=b''):
3897 def censorrevision(self, tr, censor_nodes, tombstone=b''):
3898 if self._format_version == REVLOGV0:
3898 if self._format_version == REVLOGV0:
3899 raise error.RevlogError(
3899 raise error.RevlogError(
3900 _(b'cannot censor with version %d revlogs')
3900 _(b'cannot censor with version %d revlogs')
3901 % self._format_version
3901 % self._format_version
3902 )
3902 )
3903 elif self._format_version == REVLOGV1:
3903 elif self._format_version == REVLOGV1:
3904 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
3904 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
3905 else:
3905 else:
3906 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
3906 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
3907
3907
3908 def verifyintegrity(self, state) -> Iterable[RevLogProblem]:
3908 def verifyintegrity(self, state) -> Iterable[RevLogProblem]:
3909 """Verifies the integrity of the revlog.
3909 """Verifies the integrity of the revlog.
3910
3910
3911 Yields ``revlogproblem`` instances describing problems that are
3911 Yields ``revlogproblem`` instances describing problems that are
3912 found.
3912 found.
3913 """
3913 """
3914 dd, di = self.checksize()
3914 dd, di = self.checksize()
3915 if dd:
3915 if dd:
3916 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3916 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3917 if di:
3917 if di:
3918 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3918 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3919
3919
3920 version = self._format_version
3920 version = self._format_version
3921
3921
3922 # The verifier tells us what version revlog we should be.
3922 # The verifier tells us what version revlog we should be.
3923 if version != state[b'expectedversion']:
3923 if version != state[b'expectedversion']:
3924 yield revlogproblem(
3924 yield revlogproblem(
3925 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3925 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3926 % (self.display_id, version, state[b'expectedversion'])
3926 % (self.display_id, version, state[b'expectedversion'])
3927 )
3927 )
3928
3928
3929 state[b'skipread'] = set()
3929 state[b'skipread'] = set()
3930 state[b'safe_renamed'] = set()
3930 state[b'safe_renamed'] = set()
3931
3931
3932 for rev in self:
3932 for rev in self:
3933 node = self.node(rev)
3933 node = self.node(rev)
3934
3934
3935 # Verify contents. 4 cases to care about:
3935 # Verify contents. 4 cases to care about:
3936 #
3936 #
3937 # common: the most common case
3937 # common: the most common case
3938 # rename: with a rename
3938 # rename: with a rename
3939 # meta: file content starts with b'\1\n', the metadata
3939 # meta: file content starts with b'\1\n', the metadata
3940 # header defined in filelog.py, but without a rename
3940 # header defined in filelog.py, but without a rename
3941 # ext: content stored externally
3941 # ext: content stored externally
3942 #
3942 #
3943 # More formally, their differences are shown below:
3943 # More formally, their differences are shown below:
3944 #
3944 #
3945 # | common | rename | meta | ext
3945 # | common | rename | meta | ext
3946 # -------------------------------------------------------
3946 # -------------------------------------------------------
3947 # flags() | 0 | 0 | 0 | not 0
3947 # flags() | 0 | 0 | 0 | not 0
3948 # renamed() | False | True | False | ?
3948 # renamed() | False | True | False | ?
3949 # rawtext[0:2]=='\1\n'| False | True | True | ?
3949 # rawtext[0:2]=='\1\n'| False | True | True | ?
3950 #
3950 #
3951 # "rawtext" means the raw text stored in revlog data, which
3951 # "rawtext" means the raw text stored in revlog data, which
3952 # could be retrieved by "rawdata(rev)". "text"
3952 # could be retrieved by "rawdata(rev)". "text"
3953 # mentioned below is "revision(rev)".
3953 # mentioned below is "revision(rev)".
3954 #
3954 #
3955 # There are 3 different lengths stored physically:
3955 # There are 3 different lengths stored physically:
3956 # 1. L1: rawsize, stored in revlog index
3956 # 1. L1: rawsize, stored in revlog index
3957 # 2. L2: len(rawtext), stored in revlog data
3957 # 2. L2: len(rawtext), stored in revlog data
3958 # 3. L3: len(text), stored in revlog data if flags==0, or
3958 # 3. L3: len(text), stored in revlog data if flags==0, or
3959 # possibly somewhere else if flags!=0
3959 # possibly somewhere else if flags!=0
3960 #
3960 #
3961 # L1 should be equal to L2. L3 could be different from them.
3961 # L1 should be equal to L2. L3 could be different from them.
3962 # "text" may or may not affect commit hash depending on flag
3962 # "text" may or may not affect commit hash depending on flag
3963 # processors (see flagutil.addflagprocessor).
3963 # processors (see flagutil.addflagprocessor).
3964 #
3964 #
3965 # | common | rename | meta | ext
3965 # | common | rename | meta | ext
3966 # -------------------------------------------------
3966 # -------------------------------------------------
3967 # rawsize() | L1 | L1 | L1 | L1
3967 # rawsize() | L1 | L1 | L1 | L1
3968 # size() | L1 | L2-LM | L1(*) | L1 (?)
3968 # size() | L1 | L2-LM | L1(*) | L1 (?)
3969 # len(rawtext) | L2 | L2 | L2 | L2
3969 # len(rawtext) | L2 | L2 | L2 | L2
3970 # len(text) | L2 | L2 | L2 | L3
3970 # len(text) | L2 | L2 | L2 | L3
3971 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3971 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3972 #
3972 #
3973 # LM: length of metadata, depending on rawtext
3973 # LM: length of metadata, depending on rawtext
3974 # (*): not ideal, see comment in filelog.size
3974 # (*): not ideal, see comment in filelog.size
3975 # (?): could be "- len(meta)" if the resolved content has
3975 # (?): could be "- len(meta)" if the resolved content has
3976 # rename metadata
3976 # rename metadata
3977 #
3977 #
3978 # Checks needed to be done:
3978 # Checks needed to be done:
3979 # 1. length check: L1 == L2, in all cases.
3979 # 1. length check: L1 == L2, in all cases.
3980 # 2. hash check: depending on flag processor, we may need to
3980 # 2. hash check: depending on flag processor, we may need to
3981 # use either "text" (external), or "rawtext" (in revlog).
3981 # use either "text" (external), or "rawtext" (in revlog).
3982
3982
3983 try:
3983 try:
3984 skipflags = state.get(b'skipflags', 0)
3984 skipflags = state.get(b'skipflags', 0)
3985 if skipflags:
3985 if skipflags:
3986 skipflags &= self.flags(rev)
3986 skipflags &= self.flags(rev)
3987
3987
3988 _verify_revision(self, skipflags, state, node)
3988 _verify_revision(self, skipflags, state, node)
3989
3989
3990 l1 = self.rawsize(rev)
3990 l1 = self.rawsize(rev)
3991 l2 = len(self.rawdata(node))
3991 l2 = len(self.rawdata(node))
3992
3992
3993 if l1 != l2:
3993 if l1 != l2:
3994 yield revlogproblem(
3994 yield revlogproblem(
3995 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3995 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3996 node=node,
3996 node=node,
3997 )
3997 )
3998
3998
3999 except error.CensoredNodeError:
3999 except error.CensoredNodeError:
4000 if state[b'erroroncensored']:
4000 if state[b'erroroncensored']:
4001 yield revlogproblem(
4001 yield revlogproblem(
4002 error=_(b'censored file data'), node=node
4002 error=_(b'censored file data'), node=node
4003 )
4003 )
4004 state[b'skipread'].add(node)
4004 state[b'skipread'].add(node)
4005 except Exception as e:
4005 except Exception as e:
4006 yield revlogproblem(
4006 yield revlogproblem(
4007 error=_(b'unpacking %s: %s')
4007 error=_(b'unpacking %s: %s')
4008 % (short(node), stringutil.forcebytestr(e)),
4008 % (short(node), stringutil.forcebytestr(e)),
4009 node=node,
4009 node=node,
4010 )
4010 )
4011 state[b'skipread'].add(node)
4011 state[b'skipread'].add(node)
4012
4012
4013 def storageinfo(
4013 def storageinfo(
4014 self,
4014 self,
4015 exclusivefiles=False,
4015 exclusivefiles=False,
4016 sharedfiles=False,
4016 sharedfiles=False,
4017 revisionscount=False,
4017 revisionscount=False,
4018 trackedsize=False,
4018 trackedsize=False,
4019 storedsize=False,
4019 storedsize=False,
4020 ):
4020 ):
4021 d = {}
4021 d = {}
4022
4022
4023 if exclusivefiles:
4023 if exclusivefiles:
4024 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4024 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4025 if not self._inline:
4025 if not self._inline:
4026 d[b'exclusivefiles'].append((self.opener, self._datafile))
4026 d[b'exclusivefiles'].append((self.opener, self._datafile))
4027
4027
4028 if sharedfiles:
4028 if sharedfiles:
4029 d[b'sharedfiles'] = []
4029 d[b'sharedfiles'] = []
4030
4030
4031 if revisionscount:
4031 if revisionscount:
4032 d[b'revisionscount'] = len(self)
4032 d[b'revisionscount'] = len(self)
4033
4033
4034 if trackedsize:
4034 if trackedsize:
4035 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4035 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4036
4036
4037 if storedsize:
4037 if storedsize:
4038 d[b'storedsize'] = sum(
4038 d[b'storedsize'] = sum(
4039 self.opener.stat(path).st_size for path in self.files()
4039 self.opener.stat(path).st_size for path in self.files()
4040 )
4040 )
4041
4041
4042 return d
4042 return d
4043
4043
4044 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4044 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4045 if not self.feature_config.has_side_data:
4045 if not self.feature_config.has_side_data:
4046 return
4046 return
4047 # revlog formats with sidedata support does not support inline
4047 # revlog formats with sidedata support does not support inline
4048 assert not self._inline
4048 assert not self._inline
4049 if not helpers[1] and not helpers[2]:
4049 if not helpers[1] and not helpers[2]:
4050 # Nothing to generate or remove
4050 # Nothing to generate or remove
4051 return
4051 return
4052
4052
4053 new_entries = []
4053 new_entries = []
4054 # append the new sidedata
4054 # append the new sidedata
4055 with self._writing(transaction):
4055 with self._writing(transaction):
4056 ifh, dfh, sdfh = self._inner._writinghandles
4056 ifh, dfh, sdfh = self._inner._writinghandles
4057 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4057 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4058
4058
4059 current_offset = sdfh.tell()
4059 current_offset = sdfh.tell()
4060 for rev in range(startrev, endrev + 1):
4060 for rev in range(startrev, endrev + 1):
4061 entry = self.index[rev]
4061 entry = self.index[rev]
4062 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4062 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4063 store=self,
4063 store=self,
4064 sidedata_helpers=helpers,
4064 sidedata_helpers=helpers,
4065 sidedata={},
4065 sidedata={},
4066 rev=rev,
4066 rev=rev,
4067 )
4067 )
4068
4068
4069 serialized_sidedata = sidedatautil.serialize_sidedata(
4069 serialized_sidedata = sidedatautil.serialize_sidedata(
4070 new_sidedata
4070 new_sidedata
4071 )
4071 )
4072
4072
4073 sidedata_compression_mode = COMP_MODE_INLINE
4073 sidedata_compression_mode = COMP_MODE_INLINE
4074 if serialized_sidedata and self.feature_config.has_side_data:
4074 if serialized_sidedata and self.feature_config.has_side_data:
4075 sidedata_compression_mode = COMP_MODE_PLAIN
4075 sidedata_compression_mode = COMP_MODE_PLAIN
4076 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4076 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4077 if (
4077 if (
4078 h != b'u'
4078 h != b'u'
4079 and comp_sidedata[0] != b'\0'
4079 and comp_sidedata[0] != b'\0'
4080 and len(comp_sidedata) < len(serialized_sidedata)
4080 and len(comp_sidedata) < len(serialized_sidedata)
4081 ):
4081 ):
4082 assert not h
4082 assert not h
4083 if (
4083 if (
4084 comp_sidedata[0]
4084 comp_sidedata[0]
4085 == self._docket.default_compression_header
4085 == self._docket.default_compression_header
4086 ):
4086 ):
4087 sidedata_compression_mode = COMP_MODE_DEFAULT
4087 sidedata_compression_mode = COMP_MODE_DEFAULT
4088 serialized_sidedata = comp_sidedata
4088 serialized_sidedata = comp_sidedata
4089 else:
4089 else:
4090 sidedata_compression_mode = COMP_MODE_INLINE
4090 sidedata_compression_mode = COMP_MODE_INLINE
4091 serialized_sidedata = comp_sidedata
4091 serialized_sidedata = comp_sidedata
4092 if entry[8] != 0 or entry[9] != 0:
4092 if entry[8] != 0 or entry[9] != 0:
4093 # rewriting entries that already have sidedata is not
4093 # rewriting entries that already have sidedata is not
4094 # supported yet, because it introduces garbage data in the
4094 # supported yet, because it introduces garbage data in the
4095 # revlog.
4095 # revlog.
4096 msg = b"rewriting existing sidedata is not supported yet"
4096 msg = b"rewriting existing sidedata is not supported yet"
4097 raise error.Abort(msg)
4097 raise error.Abort(msg)
4098
4098
4099 # Apply (potential) flags to add and to remove after running
4099 # Apply (potential) flags to add and to remove after running
4100 # the sidedata helpers
4100 # the sidedata helpers
4101 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4101 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4102 entry_update = (
4102 entry_update = (
4103 current_offset,
4103 current_offset,
4104 len(serialized_sidedata),
4104 len(serialized_sidedata),
4105 new_offset_flags,
4105 new_offset_flags,
4106 sidedata_compression_mode,
4106 sidedata_compression_mode,
4107 )
4107 )
4108
4108
4109 # the sidedata computation might have move the file cursors around
4109 # the sidedata computation might have move the file cursors around
4110 sdfh.seek(current_offset, os.SEEK_SET)
4110 sdfh.seek(current_offset, os.SEEK_SET)
4111 sdfh.write(serialized_sidedata)
4111 sdfh.write(serialized_sidedata)
4112 new_entries.append(entry_update)
4112 new_entries.append(entry_update)
4113 current_offset += len(serialized_sidedata)
4113 current_offset += len(serialized_sidedata)
4114 self._docket.sidedata_end = sdfh.tell()
4114 self._docket.sidedata_end = sdfh.tell()
4115
4115
4116 # rewrite the new index entries
4116 # rewrite the new index entries
4117 ifh.seek(startrev * self.index.entry_size)
4117 ifh.seek(startrev * self.index.entry_size)
4118 for i, e in enumerate(new_entries):
4118 for i, e in enumerate(new_entries):
4119 rev = startrev + i
4119 rev = startrev + i
4120 self.index.replace_sidedata_info(rev, *e)
4120 self.index.replace_sidedata_info(rev, *e)
4121 packed = self.index.entry_binary(rev)
4121 packed = self.index.entry_binary(rev)
4122 if rev == 0 and self._docket is None:
4122 if rev == 0 and self._docket is None:
4123 header = self._format_flags | self._format_version
4123 header = self._format_flags | self._format_version
4124 header = self.index.pack_header(header)
4124 header = self.index.pack_header(header)
4125 packed = header + packed
4125 packed = header + packed
4126 ifh.write(packed)
4126 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now