##// END OF EJS Templates
revlog: avoid exposing delayed index entry too widely in non-inline revlog...
marmoute -
r52058:66417f55 stable
parent child Browse files
Show More
@@ -1,4243 +1,4249 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of inline data embedded into a revlog
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class _Config:
244 class _Config:
245 def copy(self):
245 def copy(self):
246 return self.__class__(**self.__dict__)
246 return self.__class__(**self.__dict__)
247
247
248
248
249 @attr.s()
249 @attr.s()
250 class FeatureConfig(_Config):
250 class FeatureConfig(_Config):
251 """Hold configuration values about the available revlog features"""
251 """Hold configuration values about the available revlog features"""
252
252
253 # the default compression engine
253 # the default compression engine
254 compression_engine = attr.ib(default=b'zlib')
254 compression_engine = attr.ib(default=b'zlib')
255 # compression engines options
255 # compression engines options
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257
257
258 # can we use censor on this revlog
258 # can we use censor on this revlog
259 censorable = attr.ib(default=False)
259 censorable = attr.ib(default=False)
260 # does this revlog use the "side data" feature
260 # does this revlog use the "side data" feature
261 has_side_data = attr.ib(default=False)
261 has_side_data = attr.ib(default=False)
262 # might remove rank configuration once the computation has no impact
262 # might remove rank configuration once the computation has no impact
263 compute_rank = attr.ib(default=False)
263 compute_rank = attr.ib(default=False)
264 # parent order is supposed to be semantically irrelevant, so we
264 # parent order is supposed to be semantically irrelevant, so we
265 # normally resort parents to ensure that the first parent is non-null,
265 # normally resort parents to ensure that the first parent is non-null,
266 # if there is a non-null parent at all.
266 # if there is a non-null parent at all.
267 # filelog abuses the parent order as flag to mark some instances of
267 # filelog abuses the parent order as flag to mark some instances of
268 # meta-encoded files, so allow it to disable this behavior.
268 # meta-encoded files, so allow it to disable this behavior.
269 canonical_parent_order = attr.ib(default=False)
269 canonical_parent_order = attr.ib(default=False)
270 # can ellipsis commit be used
270 # can ellipsis commit be used
271 enable_ellipsis = attr.ib(default=False)
271 enable_ellipsis = attr.ib(default=False)
272
272
273 def copy(self):
273 def copy(self):
274 new = super().copy()
274 new = super().copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
276 return new
276 return new
277
277
278
278
279 @attr.s()
279 @attr.s()
280 class DataConfig(_Config):
280 class DataConfig(_Config):
281 """Hold configuration value about how the revlog data are read"""
281 """Hold configuration value about how the revlog data are read"""
282
282
283 # should we try to open the "pending" version of the revlog
283 # should we try to open the "pending" version of the revlog
284 try_pending = attr.ib(default=False)
284 try_pending = attr.ib(default=False)
285 # should we try to open the "splitted" version of the revlog
285 # should we try to open the "splitted" version of the revlog
286 try_split = attr.ib(default=False)
286 try_split = attr.ib(default=False)
287 # When True, indexfile should be opened with checkambig=True at writing,
287 # When True, indexfile should be opened with checkambig=True at writing,
288 # to avoid file stat ambiguity.
288 # to avoid file stat ambiguity.
289 check_ambig = attr.ib(default=False)
289 check_ambig = attr.ib(default=False)
290
290
291 # If true, use mmap instead of reading to deal with large index
291 # If true, use mmap instead of reading to deal with large index
292 mmap_large_index = attr.ib(default=False)
292 mmap_large_index = attr.ib(default=False)
293 # how much data is large
293 # how much data is large
294 mmap_index_threshold = attr.ib(default=None)
294 mmap_index_threshold = attr.ib(default=None)
295 # How much data to read and cache into the raw revlog data cache.
295 # How much data to read and cache into the raw revlog data cache.
296 chunk_cache_size = attr.ib(default=65536)
296 chunk_cache_size = attr.ib(default=65536)
297
297
298 # The size of the uncompressed cache compared to the largest revision seen.
298 # The size of the uncompressed cache compared to the largest revision seen.
299 uncompressed_cache_factor = attr.ib(default=None)
299 uncompressed_cache_factor = attr.ib(default=None)
300
300
301 # The number of chunk cached
301 # The number of chunk cached
302 uncompressed_cache_count = attr.ib(default=None)
302 uncompressed_cache_count = attr.ib(default=None)
303
303
304 # Allow sparse reading of the revlog data
304 # Allow sparse reading of the revlog data
305 with_sparse_read = attr.ib(default=False)
305 with_sparse_read = attr.ib(default=False)
306 # minimal density of a sparse read chunk
306 # minimal density of a sparse read chunk
307 sr_density_threshold = attr.ib(default=0.50)
307 sr_density_threshold = attr.ib(default=0.50)
308 # minimal size of data we skip when performing sparse read
308 # minimal size of data we skip when performing sparse read
309 sr_min_gap_size = attr.ib(default=262144)
309 sr_min_gap_size = attr.ib(default=262144)
310
310
311 # are delta encoded against arbitrary bases.
311 # are delta encoded against arbitrary bases.
312 generaldelta = attr.ib(default=False)
312 generaldelta = attr.ib(default=False)
313
313
314
314
315 @attr.s()
315 @attr.s()
316 class DeltaConfig(_Config):
316 class DeltaConfig(_Config):
317 """Hold configuration value about how new delta are computed
317 """Hold configuration value about how new delta are computed
318
318
319 Some attributes are duplicated from DataConfig to help havign each object
319 Some attributes are duplicated from DataConfig to help havign each object
320 self contained.
320 self contained.
321 """
321 """
322
322
323 # can delta be encoded against arbitrary bases.
323 # can delta be encoded against arbitrary bases.
324 general_delta = attr.ib(default=False)
324 general_delta = attr.ib(default=False)
325 # Allow sparse writing of the revlog data
325 # Allow sparse writing of the revlog data
326 sparse_revlog = attr.ib(default=False)
326 sparse_revlog = attr.ib(default=False)
327 # maximum length of a delta chain
327 # maximum length of a delta chain
328 max_chain_len = attr.ib(default=None)
328 max_chain_len = attr.ib(default=None)
329 # Maximum distance between delta chain base start and end
329 # Maximum distance between delta chain base start and end
330 max_deltachain_span = attr.ib(default=-1)
330 max_deltachain_span = attr.ib(default=-1)
331 # If `upper_bound_comp` is not None, this is the expected maximal gain from
331 # If `upper_bound_comp` is not None, this is the expected maximal gain from
332 # compression for the data content.
332 # compression for the data content.
333 upper_bound_comp = attr.ib(default=None)
333 upper_bound_comp = attr.ib(default=None)
334 # Should we try a delta against both parent
334 # Should we try a delta against both parent
335 delta_both_parents = attr.ib(default=True)
335 delta_both_parents = attr.ib(default=True)
336 # Test delta base candidate group by chunk of this maximal size.
336 # Test delta base candidate group by chunk of this maximal size.
337 candidate_group_chunk_size = attr.ib(default=0)
337 candidate_group_chunk_size = attr.ib(default=0)
338 # Should we display debug information about delta computation
338 # Should we display debug information about delta computation
339 debug_delta = attr.ib(default=False)
339 debug_delta = attr.ib(default=False)
340 # trust incoming delta by default
340 # trust incoming delta by default
341 lazy_delta = attr.ib(default=True)
341 lazy_delta = attr.ib(default=True)
342 # trust the base of incoming delta by default
342 # trust the base of incoming delta by default
343 lazy_delta_base = attr.ib(default=False)
343 lazy_delta_base = attr.ib(default=False)
344
344
345
345
346 class _InnerRevlog:
346 class _InnerRevlog:
347 """An inner layer of the revlog object
347 """An inner layer of the revlog object
348
348
349 That layer exist to be able to delegate some operation to Rust, its
349 That layer exist to be able to delegate some operation to Rust, its
350 boundaries are arbitrary and based on what we can delegate to Rust.
350 boundaries are arbitrary and based on what we can delegate to Rust.
351 """
351 """
352
352
353 def __init__(
353 def __init__(
354 self,
354 self,
355 opener,
355 opener,
356 index,
356 index,
357 index_file,
357 index_file,
358 data_file,
358 data_file,
359 sidedata_file,
359 sidedata_file,
360 inline,
360 inline,
361 data_config,
361 data_config,
362 delta_config,
362 delta_config,
363 feature_config,
363 feature_config,
364 chunk_cache,
364 chunk_cache,
365 default_compression_header,
365 default_compression_header,
366 ):
366 ):
367 self.opener = opener
367 self.opener = opener
368 self.index = index
368 self.index = index
369
369
370 self.__index_file = index_file
370 self.__index_file = index_file
371 self.data_file = data_file
371 self.data_file = data_file
372 self.sidedata_file = sidedata_file
372 self.sidedata_file = sidedata_file
373 self.inline = inline
373 self.inline = inline
374 self.data_config = data_config
374 self.data_config = data_config
375 self.delta_config = delta_config
375 self.delta_config = delta_config
376 self.feature_config = feature_config
376 self.feature_config = feature_config
377
377
378 # used during diverted write.
378 # used during diverted write.
379 self._orig_index_file = None
379 self._orig_index_file = None
380
380
381 self._default_compression_header = default_compression_header
381 self._default_compression_header = default_compression_header
382
382
383 # index
383 # index
384
384
385 # 3-tuple of file handles being used for active writing.
385 # 3-tuple of file handles being used for active writing.
386 self._writinghandles = None
386 self._writinghandles = None
387
387
388 self._segmentfile = randomaccessfile.randomaccessfile(
388 self._segmentfile = randomaccessfile.randomaccessfile(
389 self.opener,
389 self.opener,
390 (self.index_file if self.inline else self.data_file),
390 (self.index_file if self.inline else self.data_file),
391 self.data_config.chunk_cache_size,
391 self.data_config.chunk_cache_size,
392 chunk_cache,
392 chunk_cache,
393 )
393 )
394 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
394 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
395 self.opener,
395 self.opener,
396 self.sidedata_file,
396 self.sidedata_file,
397 self.data_config.chunk_cache_size,
397 self.data_config.chunk_cache_size,
398 )
398 )
399
399
400 # revlog header -> revlog compressor
400 # revlog header -> revlog compressor
401 self._decompressors = {}
401 self._decompressors = {}
402 # 3-tuple of (node, rev, text) for a raw revision.
402 # 3-tuple of (node, rev, text) for a raw revision.
403 self._revisioncache = None
403 self._revisioncache = None
404
404
405 # cache some uncompressed chunks
405 # cache some uncompressed chunks
406 # rev β†’ uncompressed_chunk
406 # rev β†’ uncompressed_chunk
407 #
407 #
408 # the max cost is dynamically updated to be proportionnal to the
408 # the max cost is dynamically updated to be proportionnal to the
409 # size of revision we actually encounter.
409 # size of revision we actually encounter.
410 self._uncompressed_chunk_cache = None
410 self._uncompressed_chunk_cache = None
411 if self.data_config.uncompressed_cache_factor is not None:
411 if self.data_config.uncompressed_cache_factor is not None:
412 self._uncompressed_chunk_cache = util.lrucachedict(
412 self._uncompressed_chunk_cache = util.lrucachedict(
413 self.data_config.uncompressed_cache_count,
413 self.data_config.uncompressed_cache_count,
414 maxcost=65536, # some arbitrary initial value
414 maxcost=65536, # some arbitrary initial value
415 )
415 )
416
416
417 self._delay_buffer = None
417 self._delay_buffer = None
418
418
419 @property
419 @property
420 def index_file(self):
420 def index_file(self):
421 return self.__index_file
421 return self.__index_file
422
422
423 @index_file.setter
423 @index_file.setter
424 def index_file(self, new_index_file):
424 def index_file(self, new_index_file):
425 self.__index_file = new_index_file
425 self.__index_file = new_index_file
426 if self.inline:
426 if self.inline:
427 self._segmentfile.filename = new_index_file
427 self._segmentfile.filename = new_index_file
428
428
429 def __len__(self):
429 def __len__(self):
430 return len(self.index)
430 return len(self.index)
431
431
432 def clear_cache(self):
432 def clear_cache(self):
433 assert not self.is_delaying
433 assert not self.is_delaying
434 self._revisioncache = None
434 self._revisioncache = None
435 if self._uncompressed_chunk_cache is not None:
435 if self._uncompressed_chunk_cache is not None:
436 self._uncompressed_chunk_cache.clear()
436 self._uncompressed_chunk_cache.clear()
437 self._segmentfile.clear_cache()
437 self._segmentfile.clear_cache()
438 self._segmentfile_sidedata.clear_cache()
438 self._segmentfile_sidedata.clear_cache()
439
439
440 @property
440 @property
441 def canonical_index_file(self):
441 def canonical_index_file(self):
442 if self._orig_index_file is not None:
442 if self._orig_index_file is not None:
443 return self._orig_index_file
443 return self._orig_index_file
444 return self.index_file
444 return self.index_file
445
445
446 @property
446 @property
447 def is_delaying(self):
447 def is_delaying(self):
448 """is the revlog is currently delaying the visibility of written data?
448 """is the revlog is currently delaying the visibility of written data?
449
449
450 The delaying mechanism can be either in-memory or written on disk in a
450 The delaying mechanism can be either in-memory or written on disk in a
451 side-file."""
451 side-file."""
452 return (self._delay_buffer is not None) or (
452 return (self._delay_buffer is not None) or (
453 self._orig_index_file is not None
453 self._orig_index_file is not None
454 )
454 )
455
455
456 # Derived from index values.
456 # Derived from index values.
457
457
458 def start(self, rev):
458 def start(self, rev):
459 """the offset of the data chunk for this revision"""
459 """the offset of the data chunk for this revision"""
460 return int(self.index[rev][0] >> 16)
460 return int(self.index[rev][0] >> 16)
461
461
462 def length(self, rev):
462 def length(self, rev):
463 """the length of the data chunk for this revision"""
463 """the length of the data chunk for this revision"""
464 return self.index[rev][1]
464 return self.index[rev][1]
465
465
466 def end(self, rev):
466 def end(self, rev):
467 """the end of the data chunk for this revision"""
467 """the end of the data chunk for this revision"""
468 return self.start(rev) + self.length(rev)
468 return self.start(rev) + self.length(rev)
469
469
470 def deltaparent(self, rev):
470 def deltaparent(self, rev):
471 """return deltaparent of the given revision"""
471 """return deltaparent of the given revision"""
472 base = self.index[rev][3]
472 base = self.index[rev][3]
473 if base == rev:
473 if base == rev:
474 return nullrev
474 return nullrev
475 elif self.delta_config.general_delta:
475 elif self.delta_config.general_delta:
476 return base
476 return base
477 else:
477 else:
478 return rev - 1
478 return rev - 1
479
479
480 def issnapshot(self, rev):
480 def issnapshot(self, rev):
481 """tells whether rev is a snapshot"""
481 """tells whether rev is a snapshot"""
482 if not self.delta_config.sparse_revlog:
482 if not self.delta_config.sparse_revlog:
483 return self.deltaparent(rev) == nullrev
483 return self.deltaparent(rev) == nullrev
484 elif hasattr(self.index, 'issnapshot'):
484 elif hasattr(self.index, 'issnapshot'):
485 # directly assign the method to cache the testing and access
485 # directly assign the method to cache the testing and access
486 self.issnapshot = self.index.issnapshot
486 self.issnapshot = self.index.issnapshot
487 return self.issnapshot(rev)
487 return self.issnapshot(rev)
488 if rev == nullrev:
488 if rev == nullrev:
489 return True
489 return True
490 entry = self.index[rev]
490 entry = self.index[rev]
491 base = entry[3]
491 base = entry[3]
492 if base == rev:
492 if base == rev:
493 return True
493 return True
494 if base == nullrev:
494 if base == nullrev:
495 return True
495 return True
496 p1 = entry[5]
496 p1 = entry[5]
497 while self.length(p1) == 0:
497 while self.length(p1) == 0:
498 b = self.deltaparent(p1)
498 b = self.deltaparent(p1)
499 if b == p1:
499 if b == p1:
500 break
500 break
501 p1 = b
501 p1 = b
502 p2 = entry[6]
502 p2 = entry[6]
503 while self.length(p2) == 0:
503 while self.length(p2) == 0:
504 b = self.deltaparent(p2)
504 b = self.deltaparent(p2)
505 if b == p2:
505 if b == p2:
506 break
506 break
507 p2 = b
507 p2 = b
508 if base == p1 or base == p2:
508 if base == p1 or base == p2:
509 return False
509 return False
510 return self.issnapshot(base)
510 return self.issnapshot(base)
511
511
512 def _deltachain(self, rev, stoprev=None):
512 def _deltachain(self, rev, stoprev=None):
513 """Obtain the delta chain for a revision.
513 """Obtain the delta chain for a revision.
514
514
515 ``stoprev`` specifies a revision to stop at. If not specified, we
515 ``stoprev`` specifies a revision to stop at. If not specified, we
516 stop at the base of the chain.
516 stop at the base of the chain.
517
517
518 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
518 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
519 revs in ascending order and ``stopped`` is a bool indicating whether
519 revs in ascending order and ``stopped`` is a bool indicating whether
520 ``stoprev`` was hit.
520 ``stoprev`` was hit.
521 """
521 """
522 generaldelta = self.delta_config.general_delta
522 generaldelta = self.delta_config.general_delta
523 # Try C implementation.
523 # Try C implementation.
524 try:
524 try:
525 return self.index.deltachain(rev, stoprev, generaldelta)
525 return self.index.deltachain(rev, stoprev, generaldelta)
526 except AttributeError:
526 except AttributeError:
527 pass
527 pass
528
528
529 chain = []
529 chain = []
530
530
531 # Alias to prevent attribute lookup in tight loop.
531 # Alias to prevent attribute lookup in tight loop.
532 index = self.index
532 index = self.index
533
533
534 iterrev = rev
534 iterrev = rev
535 e = index[iterrev]
535 e = index[iterrev]
536 while iterrev != e[3] and iterrev != stoprev:
536 while iterrev != e[3] and iterrev != stoprev:
537 chain.append(iterrev)
537 chain.append(iterrev)
538 if generaldelta:
538 if generaldelta:
539 iterrev = e[3]
539 iterrev = e[3]
540 else:
540 else:
541 iterrev -= 1
541 iterrev -= 1
542 e = index[iterrev]
542 e = index[iterrev]
543
543
544 if iterrev == stoprev:
544 if iterrev == stoprev:
545 stopped = True
545 stopped = True
546 else:
546 else:
547 chain.append(iterrev)
547 chain.append(iterrev)
548 stopped = False
548 stopped = False
549
549
550 chain.reverse()
550 chain.reverse()
551 return chain, stopped
551 return chain, stopped
552
552
553 @util.propertycache
553 @util.propertycache
554 def _compressor(self):
554 def _compressor(self):
555 engine = util.compengines[self.feature_config.compression_engine]
555 engine = util.compengines[self.feature_config.compression_engine]
556 return engine.revlogcompressor(
556 return engine.revlogcompressor(
557 self.feature_config.compression_engine_options
557 self.feature_config.compression_engine_options
558 )
558 )
559
559
560 @util.propertycache
560 @util.propertycache
561 def _decompressor(self):
561 def _decompressor(self):
562 """the default decompressor"""
562 """the default decompressor"""
563 if self._default_compression_header is None:
563 if self._default_compression_header is None:
564 return None
564 return None
565 t = self._default_compression_header
565 t = self._default_compression_header
566 c = self._get_decompressor(t)
566 c = self._get_decompressor(t)
567 return c.decompress
567 return c.decompress
568
568
569 def _get_decompressor(self, t):
569 def _get_decompressor(self, t):
570 try:
570 try:
571 compressor = self._decompressors[t]
571 compressor = self._decompressors[t]
572 except KeyError:
572 except KeyError:
573 try:
573 try:
574 engine = util.compengines.forrevlogheader(t)
574 engine = util.compengines.forrevlogheader(t)
575 compressor = engine.revlogcompressor(
575 compressor = engine.revlogcompressor(
576 self.feature_config.compression_engine_options
576 self.feature_config.compression_engine_options
577 )
577 )
578 self._decompressors[t] = compressor
578 self._decompressors[t] = compressor
579 except KeyError:
579 except KeyError:
580 raise error.RevlogError(
580 raise error.RevlogError(
581 _(b'unknown compression type %s') % binascii.hexlify(t)
581 _(b'unknown compression type %s') % binascii.hexlify(t)
582 )
582 )
583 return compressor
583 return compressor
584
584
585 def compress(self, data):
585 def compress(self, data):
586 """Generate a possibly-compressed representation of data."""
586 """Generate a possibly-compressed representation of data."""
587 if not data:
587 if not data:
588 return b'', data
588 return b'', data
589
589
590 compressed = self._compressor.compress(data)
590 compressed = self._compressor.compress(data)
591
591
592 if compressed:
592 if compressed:
593 # The revlog compressor added the header in the returned data.
593 # The revlog compressor added the header in the returned data.
594 return b'', compressed
594 return b'', compressed
595
595
596 if data[0:1] == b'\0':
596 if data[0:1] == b'\0':
597 return b'', data
597 return b'', data
598 return b'u', data
598 return b'u', data
599
599
600 def decompress(self, data):
600 def decompress(self, data):
601 """Decompress a revlog chunk.
601 """Decompress a revlog chunk.
602
602
603 The chunk is expected to begin with a header identifying the
603 The chunk is expected to begin with a header identifying the
604 format type so it can be routed to an appropriate decompressor.
604 format type so it can be routed to an appropriate decompressor.
605 """
605 """
606 if not data:
606 if not data:
607 return data
607 return data
608
608
609 # Revlogs are read much more frequently than they are written and many
609 # Revlogs are read much more frequently than they are written and many
610 # chunks only take microseconds to decompress, so performance is
610 # chunks only take microseconds to decompress, so performance is
611 # important here.
611 # important here.
612 #
612 #
613 # We can make a few assumptions about revlogs:
613 # We can make a few assumptions about revlogs:
614 #
614 #
615 # 1) the majority of chunks will be compressed (as opposed to inline
615 # 1) the majority of chunks will be compressed (as opposed to inline
616 # raw data).
616 # raw data).
617 # 2) decompressing *any* data will likely by at least 10x slower than
617 # 2) decompressing *any* data will likely by at least 10x slower than
618 # returning raw inline data.
618 # returning raw inline data.
619 # 3) we want to prioritize common and officially supported compression
619 # 3) we want to prioritize common and officially supported compression
620 # engines
620 # engines
621 #
621 #
622 # It follows that we want to optimize for "decompress compressed data
622 # It follows that we want to optimize for "decompress compressed data
623 # when encoded with common and officially supported compression engines"
623 # when encoded with common and officially supported compression engines"
624 # case over "raw data" and "data encoded by less common or non-official
624 # case over "raw data" and "data encoded by less common or non-official
625 # compression engines." That is why we have the inline lookup first
625 # compression engines." That is why we have the inline lookup first
626 # followed by the compengines lookup.
626 # followed by the compengines lookup.
627 #
627 #
628 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
628 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
629 # compressed chunks. And this matters for changelog and manifest reads.
629 # compressed chunks. And this matters for changelog and manifest reads.
630 t = data[0:1]
630 t = data[0:1]
631
631
632 if t == b'x':
632 if t == b'x':
633 try:
633 try:
634 return _zlibdecompress(data)
634 return _zlibdecompress(data)
635 except zlib.error as e:
635 except zlib.error as e:
636 raise error.RevlogError(
636 raise error.RevlogError(
637 _(b'revlog decompress error: %s')
637 _(b'revlog decompress error: %s')
638 % stringutil.forcebytestr(e)
638 % stringutil.forcebytestr(e)
639 )
639 )
640 # '\0' is more common than 'u' so it goes first.
640 # '\0' is more common than 'u' so it goes first.
641 elif t == b'\0':
641 elif t == b'\0':
642 return data
642 return data
643 elif t == b'u':
643 elif t == b'u':
644 return util.buffer(data, 1)
644 return util.buffer(data, 1)
645
645
646 compressor = self._get_decompressor(t)
646 compressor = self._get_decompressor(t)
647
647
648 return compressor.decompress(data)
648 return compressor.decompress(data)
649
649
650 @contextlib.contextmanager
650 @contextlib.contextmanager
651 def reading(self):
651 def reading(self):
652 """Context manager that keeps data and sidedata files open for reading"""
652 """Context manager that keeps data and sidedata files open for reading"""
653 if len(self.index) == 0:
653 if len(self.index) == 0:
654 yield # nothing to be read
654 yield # nothing to be read
655 else:
655 else:
656 with self._segmentfile.reading():
656 with self._segmentfile.reading():
657 with self._segmentfile_sidedata.reading():
657 with self._segmentfile_sidedata.reading():
658 yield
658 yield
659
659
660 @property
660 @property
661 def is_writing(self):
661 def is_writing(self):
662 """True is a writing context is open"""
662 """True is a writing context is open"""
663 return self._writinghandles is not None
663 return self._writinghandles is not None
664
664
665 @property
665 @property
666 def is_open(self):
666 def is_open(self):
667 """True if any file handle is being held
667 """True if any file handle is being held
668
668
669 Used for assert and debug in the python code"""
669 Used for assert and debug in the python code"""
670 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
670 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
671
671
672 @contextlib.contextmanager
672 @contextlib.contextmanager
673 def writing(self, transaction, data_end=None, sidedata_end=None):
673 def writing(self, transaction, data_end=None, sidedata_end=None):
674 """Open the revlog files for writing
674 """Open the revlog files for writing
675
675
676 Add content to a revlog should be done within such context.
676 Add content to a revlog should be done within such context.
677 """
677 """
678 if self.is_writing:
678 if self.is_writing:
679 yield
679 yield
680 else:
680 else:
681 ifh = dfh = sdfh = None
681 ifh = dfh = sdfh = None
682 try:
682 try:
683 r = len(self.index)
683 r = len(self.index)
684 # opening the data file.
684 # opening the data file.
685 dsize = 0
685 dsize = 0
686 if r:
686 if r:
687 dsize = self.end(r - 1)
687 dsize = self.end(r - 1)
688 dfh = None
688 dfh = None
689 if not self.inline:
689 if not self.inline:
690 try:
690 try:
691 dfh = self.opener(self.data_file, mode=b"r+")
691 dfh = self.opener(self.data_file, mode=b"r+")
692 if data_end is None:
692 if data_end is None:
693 dfh.seek(0, os.SEEK_END)
693 dfh.seek(0, os.SEEK_END)
694 else:
694 else:
695 dfh.seek(data_end, os.SEEK_SET)
695 dfh.seek(data_end, os.SEEK_SET)
696 except FileNotFoundError:
696 except FileNotFoundError:
697 dfh = self.opener(self.data_file, mode=b"w+")
697 dfh = self.opener(self.data_file, mode=b"w+")
698 transaction.add(self.data_file, dsize)
698 transaction.add(self.data_file, dsize)
699 if self.sidedata_file is not None:
699 if self.sidedata_file is not None:
700 assert sidedata_end is not None
700 assert sidedata_end is not None
701 # revlog-v2 does not inline, help Pytype
701 # revlog-v2 does not inline, help Pytype
702 assert dfh is not None
702 assert dfh is not None
703 try:
703 try:
704 sdfh = self.opener(self.sidedata_file, mode=b"r+")
704 sdfh = self.opener(self.sidedata_file, mode=b"r+")
705 dfh.seek(sidedata_end, os.SEEK_SET)
705 dfh.seek(sidedata_end, os.SEEK_SET)
706 except FileNotFoundError:
706 except FileNotFoundError:
707 sdfh = self.opener(self.sidedata_file, mode=b"w+")
707 sdfh = self.opener(self.sidedata_file, mode=b"w+")
708 transaction.add(self.sidedata_file, sidedata_end)
708 transaction.add(self.sidedata_file, sidedata_end)
709
709
710 # opening the index file.
710 # opening the index file.
711 isize = r * self.index.entry_size
711 isize = r * self.index.entry_size
712 ifh = self.__index_write_fp()
712 ifh = self.__index_write_fp()
713 if self.inline:
713 if self.inline:
714 transaction.add(self.index_file, dsize + isize)
714 transaction.add(self.index_file, dsize + isize)
715 else:
715 else:
716 transaction.add(self.index_file, isize)
716 transaction.add(self.index_file, isize)
717 # exposing all file handle for writing.
717 # exposing all file handle for writing.
718 self._writinghandles = (ifh, dfh, sdfh)
718 self._writinghandles = (ifh, dfh, sdfh)
719 self._segmentfile.writing_handle = ifh if self.inline else dfh
719 self._segmentfile.writing_handle = ifh if self.inline else dfh
720 self._segmentfile_sidedata.writing_handle = sdfh
720 self._segmentfile_sidedata.writing_handle = sdfh
721 yield
721 yield
722 finally:
722 finally:
723 self._writinghandles = None
723 self._writinghandles = None
724 self._segmentfile.writing_handle = None
724 self._segmentfile.writing_handle = None
725 self._segmentfile_sidedata.writing_handle = None
725 self._segmentfile_sidedata.writing_handle = None
726 if dfh is not None:
726 if dfh is not None:
727 dfh.close()
727 dfh.close()
728 if sdfh is not None:
728 if sdfh is not None:
729 sdfh.close()
729 sdfh.close()
730 # closing the index file last to avoid exposing referent to
730 # closing the index file last to avoid exposing referent to
731 # potential unflushed data content.
731 # potential unflushed data content.
732 if ifh is not None:
732 if ifh is not None:
733 ifh.close()
733 ifh.close()
734
734
735 def __index_write_fp(self, index_end=None):
735 def __index_write_fp(self, index_end=None):
736 """internal method to open the index file for writing
736 """internal method to open the index file for writing
737
737
738 You should not use this directly and use `_writing` instead
738 You should not use this directly and use `_writing` instead
739 """
739 """
740 try:
740 try:
741 if self._delay_buffer is None:
741 if self._delay_buffer is None:
742 f = self.opener(
742 f = self.opener(
743 self.index_file,
743 self.index_file,
744 mode=b"r+",
744 mode=b"r+",
745 checkambig=self.data_config.check_ambig,
745 checkambig=self.data_config.check_ambig,
746 )
746 )
747 else:
747 else:
748 # check_ambig affect we way we open file for writing, however
748 # check_ambig affect we way we open file for writing, however
749 # here, we do not actually open a file for writting as write
749 # here, we do not actually open a file for writting as write
750 # will appened to a delay_buffer. So check_ambig is not
750 # will appened to a delay_buffer. So check_ambig is not
751 # meaningful and unneeded here.
751 # meaningful and unneeded here.
752 f = randomaccessfile.appender(
752 f = randomaccessfile.appender(
753 self.opener, self.index_file, b"r+", self._delay_buffer
753 self.opener, self.index_file, b"r+", self._delay_buffer
754 )
754 )
755 if index_end is None:
755 if index_end is None:
756 f.seek(0, os.SEEK_END)
756 f.seek(0, os.SEEK_END)
757 else:
757 else:
758 f.seek(index_end, os.SEEK_SET)
758 f.seek(index_end, os.SEEK_SET)
759 return f
759 return f
760 except FileNotFoundError:
760 except FileNotFoundError:
761 if self._delay_buffer is None:
761 if self._delay_buffer is None:
762 return self.opener(
762 return self.opener(
763 self.index_file,
763 self.index_file,
764 mode=b"w+",
764 mode=b"w+",
765 checkambig=self.data_config.check_ambig,
765 checkambig=self.data_config.check_ambig,
766 )
766 )
767 else:
767 else:
768 return randomaccessfile.appender(
768 return randomaccessfile.appender(
769 self.opener, self.index_file, b"w+", self._delay_buffer
769 self.opener, self.index_file, b"w+", self._delay_buffer
770 )
770 )
771
771
772 def __index_new_fp(self):
772 def __index_new_fp(self):
773 """internal method to create a new index file for writing
773 """internal method to create a new index file for writing
774
774
775 You should not use this unless you are upgrading from inline revlog
775 You should not use this unless you are upgrading from inline revlog
776 """
776 """
777 return self.opener(
777 return self.opener(
778 self.index_file,
778 self.index_file,
779 mode=b"w",
779 mode=b"w",
780 checkambig=self.data_config.check_ambig,
780 checkambig=self.data_config.check_ambig,
781 atomictemp=True,
781 atomictemp=True,
782 )
782 )
783
783
784 def split_inline(self, tr, header, new_index_file_path=None):
784 def split_inline(self, tr, header, new_index_file_path=None):
785 """split the data of an inline revlog into an index and a data file"""
785 """split the data of an inline revlog into an index and a data file"""
786 assert self._delay_buffer is None
786 assert self._delay_buffer is None
787 existing_handles = False
787 existing_handles = False
788 if self._writinghandles is not None:
788 if self._writinghandles is not None:
789 existing_handles = True
789 existing_handles = True
790 fp = self._writinghandles[0]
790 fp = self._writinghandles[0]
791 fp.flush()
791 fp.flush()
792 fp.close()
792 fp.close()
793 # We can't use the cached file handle after close(). So prevent
793 # We can't use the cached file handle after close(). So prevent
794 # its usage.
794 # its usage.
795 self._writinghandles = None
795 self._writinghandles = None
796 self._segmentfile.writing_handle = None
796 self._segmentfile.writing_handle = None
797 # No need to deal with sidedata writing handle as it is only
797 # No need to deal with sidedata writing handle as it is only
798 # relevant with revlog-v2 which is never inline, not reaching
798 # relevant with revlog-v2 which is never inline, not reaching
799 # this code
799 # this code
800
800
801 new_dfh = self.opener(self.data_file, mode=b"w+")
801 new_dfh = self.opener(self.data_file, mode=b"w+")
802 new_dfh.truncate(0) # drop any potentially existing data
802 new_dfh.truncate(0) # drop any potentially existing data
803 try:
803 try:
804 with self.reading():
804 with self.reading():
805 for r in range(len(self.index)):
805 for r in range(len(self.index)):
806 new_dfh.write(self.get_segment_for_revs(r, r)[1])
806 new_dfh.write(self.get_segment_for_revs(r, r)[1])
807 new_dfh.flush()
807 new_dfh.flush()
808
808
809 if new_index_file_path is not None:
809 if new_index_file_path is not None:
810 self.index_file = new_index_file_path
810 self.index_file = new_index_file_path
811 with self.__index_new_fp() as fp:
811 with self.__index_new_fp() as fp:
812 self.inline = False
812 self.inline = False
813 for i in range(len(self.index)):
813 for i in range(len(self.index)):
814 e = self.index.entry_binary(i)
814 e = self.index.entry_binary(i)
815 if i == 0:
815 if i == 0:
816 packed_header = self.index.pack_header(header)
816 packed_header = self.index.pack_header(header)
817 e = packed_header + e
817 e = packed_header + e
818 fp.write(e)
818 fp.write(e)
819
819
820 # If we don't use side-write, the temp file replace the real
820 # If we don't use side-write, the temp file replace the real
821 # index when we exit the context manager
821 # index when we exit the context manager
822
822
823 self._segmentfile = randomaccessfile.randomaccessfile(
823 self._segmentfile = randomaccessfile.randomaccessfile(
824 self.opener,
824 self.opener,
825 self.data_file,
825 self.data_file,
826 self.data_config.chunk_cache_size,
826 self.data_config.chunk_cache_size,
827 )
827 )
828
828
829 if existing_handles:
829 if existing_handles:
830 # switched from inline to conventional reopen the index
830 # switched from inline to conventional reopen the index
831 ifh = self.__index_write_fp()
831 ifh = self.__index_write_fp()
832 self._writinghandles = (ifh, new_dfh, None)
832 self._writinghandles = (ifh, new_dfh, None)
833 self._segmentfile.writing_handle = new_dfh
833 self._segmentfile.writing_handle = new_dfh
834 new_dfh = None
834 new_dfh = None
835 # No need to deal with sidedata writing handle as it is only
835 # No need to deal with sidedata writing handle as it is only
836 # relevant with revlog-v2 which is never inline, not reaching
836 # relevant with revlog-v2 which is never inline, not reaching
837 # this code
837 # this code
838 finally:
838 finally:
839 if new_dfh is not None:
839 if new_dfh is not None:
840 new_dfh.close()
840 new_dfh.close()
841 return self.index_file
841 return self.index_file
842
842
843 def get_segment_for_revs(self, startrev, endrev):
843 def get_segment_for_revs(self, startrev, endrev):
844 """Obtain a segment of raw data corresponding to a range of revisions.
844 """Obtain a segment of raw data corresponding to a range of revisions.
845
845
846 Accepts the start and end revisions and an optional already-open
846 Accepts the start and end revisions and an optional already-open
847 file handle to be used for reading. If the file handle is read, its
847 file handle to be used for reading. If the file handle is read, its
848 seek position will not be preserved.
848 seek position will not be preserved.
849
849
850 Requests for data may be satisfied by a cache.
850 Requests for data may be satisfied by a cache.
851
851
852 Returns a 2-tuple of (offset, data) for the requested range of
852 Returns a 2-tuple of (offset, data) for the requested range of
853 revisions. Offset is the integer offset from the beginning of the
853 revisions. Offset is the integer offset from the beginning of the
854 revlog and data is a str or buffer of the raw byte data.
854 revlog and data is a str or buffer of the raw byte data.
855
855
856 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
856 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
857 to determine where each revision's data begins and ends.
857 to determine where each revision's data begins and ends.
858
858
859 API: we should consider making this a private part of the InnerRevlog
859 API: we should consider making this a private part of the InnerRevlog
860 at some point.
860 at some point.
861 """
861 """
862 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
862 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
863 # (functions are expensive).
863 # (functions are expensive).
864 index = self.index
864 index = self.index
865 istart = index[startrev]
865 istart = index[startrev]
866 start = int(istart[0] >> 16)
866 start = int(istart[0] >> 16)
867 if startrev == endrev:
867 if startrev == endrev:
868 end = start + istart[1]
868 end = start + istart[1]
869 else:
869 else:
870 iend = index[endrev]
870 iend = index[endrev]
871 end = int(iend[0] >> 16) + iend[1]
871 end = int(iend[0] >> 16) + iend[1]
872
872
873 if self.inline:
873 if self.inline:
874 start += (startrev + 1) * self.index.entry_size
874 start += (startrev + 1) * self.index.entry_size
875 end += (endrev + 1) * self.index.entry_size
875 end += (endrev + 1) * self.index.entry_size
876 length = end - start
876 length = end - start
877
877
878 return start, self._segmentfile.read_chunk(start, length)
878 return start, self._segmentfile.read_chunk(start, length)
879
879
880 def _chunk(self, rev):
880 def _chunk(self, rev):
881 """Obtain a single decompressed chunk for a revision.
881 """Obtain a single decompressed chunk for a revision.
882
882
883 Accepts an integer revision and an optional already-open file handle
883 Accepts an integer revision and an optional already-open file handle
884 to be used for reading. If used, the seek position of the file will not
884 to be used for reading. If used, the seek position of the file will not
885 be preserved.
885 be preserved.
886
886
887 Returns a str holding uncompressed data for the requested revision.
887 Returns a str holding uncompressed data for the requested revision.
888 """
888 """
889 if self._uncompressed_chunk_cache is not None:
889 if self._uncompressed_chunk_cache is not None:
890 uncomp = self._uncompressed_chunk_cache.get(rev)
890 uncomp = self._uncompressed_chunk_cache.get(rev)
891 if uncomp is not None:
891 if uncomp is not None:
892 return uncomp
892 return uncomp
893
893
894 compression_mode = self.index[rev][10]
894 compression_mode = self.index[rev][10]
895 data = self.get_segment_for_revs(rev, rev)[1]
895 data = self.get_segment_for_revs(rev, rev)[1]
896 if compression_mode == COMP_MODE_PLAIN:
896 if compression_mode == COMP_MODE_PLAIN:
897 uncomp = data
897 uncomp = data
898 elif compression_mode == COMP_MODE_DEFAULT:
898 elif compression_mode == COMP_MODE_DEFAULT:
899 uncomp = self._decompressor(data)
899 uncomp = self._decompressor(data)
900 elif compression_mode == COMP_MODE_INLINE:
900 elif compression_mode == COMP_MODE_INLINE:
901 uncomp = self.decompress(data)
901 uncomp = self.decompress(data)
902 else:
902 else:
903 msg = b'unknown compression mode %d'
903 msg = b'unknown compression mode %d'
904 msg %= compression_mode
904 msg %= compression_mode
905 raise error.RevlogError(msg)
905 raise error.RevlogError(msg)
906 if self._uncompressed_chunk_cache is not None:
906 if self._uncompressed_chunk_cache is not None:
907 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
907 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
908 return uncomp
908 return uncomp
909
909
910 def _chunks(self, revs, targetsize=None):
910 def _chunks(self, revs, targetsize=None):
911 """Obtain decompressed chunks for the specified revisions.
911 """Obtain decompressed chunks for the specified revisions.
912
912
913 Accepts an iterable of numeric revisions that are assumed to be in
913 Accepts an iterable of numeric revisions that are assumed to be in
914 ascending order. Also accepts an optional already-open file handle
914 ascending order. Also accepts an optional already-open file handle
915 to be used for reading. If used, the seek position of the file will
915 to be used for reading. If used, the seek position of the file will
916 not be preserved.
916 not be preserved.
917
917
918 This function is similar to calling ``self._chunk()`` multiple times,
918 This function is similar to calling ``self._chunk()`` multiple times,
919 but is faster.
919 but is faster.
920
920
921 Returns a list with decompressed data for each requested revision.
921 Returns a list with decompressed data for each requested revision.
922 """
922 """
923 if not revs:
923 if not revs:
924 return []
924 return []
925 start = self.start
925 start = self.start
926 length = self.length
926 length = self.length
927 inline = self.inline
927 inline = self.inline
928 iosize = self.index.entry_size
928 iosize = self.index.entry_size
929 buffer = util.buffer
929 buffer = util.buffer
930
930
931 fetched_revs = []
931 fetched_revs = []
932 fadd = fetched_revs.append
932 fadd = fetched_revs.append
933
933
934 chunks = []
934 chunks = []
935 ladd = chunks.append
935 ladd = chunks.append
936
936
937 if self._uncompressed_chunk_cache is None:
937 if self._uncompressed_chunk_cache is None:
938 fetched_revs = revs
938 fetched_revs = revs
939 else:
939 else:
940 for rev in revs:
940 for rev in revs:
941 cached_value = self._uncompressed_chunk_cache.get(rev)
941 cached_value = self._uncompressed_chunk_cache.get(rev)
942 if cached_value is None:
942 if cached_value is None:
943 fadd(rev)
943 fadd(rev)
944 else:
944 else:
945 ladd((rev, cached_value))
945 ladd((rev, cached_value))
946
946
947 if not fetched_revs:
947 if not fetched_revs:
948 slicedchunks = ()
948 slicedchunks = ()
949 elif not self.data_config.with_sparse_read:
949 elif not self.data_config.with_sparse_read:
950 slicedchunks = (fetched_revs,)
950 slicedchunks = (fetched_revs,)
951 else:
951 else:
952 slicedchunks = deltautil.slicechunk(
952 slicedchunks = deltautil.slicechunk(
953 self,
953 self,
954 fetched_revs,
954 fetched_revs,
955 targetsize=targetsize,
955 targetsize=targetsize,
956 )
956 )
957
957
958 for revschunk in slicedchunks:
958 for revschunk in slicedchunks:
959 firstrev = revschunk[0]
959 firstrev = revschunk[0]
960 # Skip trailing revisions with empty diff
960 # Skip trailing revisions with empty diff
961 for lastrev in revschunk[::-1]:
961 for lastrev in revschunk[::-1]:
962 if length(lastrev) != 0:
962 if length(lastrev) != 0:
963 break
963 break
964
964
965 try:
965 try:
966 offset, data = self.get_segment_for_revs(firstrev, lastrev)
966 offset, data = self.get_segment_for_revs(firstrev, lastrev)
967 except OverflowError:
967 except OverflowError:
968 # issue4215 - we can't cache a run of chunks greater than
968 # issue4215 - we can't cache a run of chunks greater than
969 # 2G on Windows
969 # 2G on Windows
970 for rev in revschunk:
970 for rev in revschunk:
971 ladd((rev, self._chunk(rev)))
971 ladd((rev, self._chunk(rev)))
972
972
973 decomp = self.decompress
973 decomp = self.decompress
974 # self._decompressor might be None, but will not be used in that case
974 # self._decompressor might be None, but will not be used in that case
975 def_decomp = self._decompressor
975 def_decomp = self._decompressor
976 for rev in revschunk:
976 for rev in revschunk:
977 chunkstart = start(rev)
977 chunkstart = start(rev)
978 if inline:
978 if inline:
979 chunkstart += (rev + 1) * iosize
979 chunkstart += (rev + 1) * iosize
980 chunklength = length(rev)
980 chunklength = length(rev)
981 comp_mode = self.index[rev][10]
981 comp_mode = self.index[rev][10]
982 c = buffer(data, chunkstart - offset, chunklength)
982 c = buffer(data, chunkstart - offset, chunklength)
983 if comp_mode == COMP_MODE_PLAIN:
983 if comp_mode == COMP_MODE_PLAIN:
984 c = c
984 c = c
985 elif comp_mode == COMP_MODE_INLINE:
985 elif comp_mode == COMP_MODE_INLINE:
986 c = decomp(c)
986 c = decomp(c)
987 elif comp_mode == COMP_MODE_DEFAULT:
987 elif comp_mode == COMP_MODE_DEFAULT:
988 c = def_decomp(c)
988 c = def_decomp(c)
989 else:
989 else:
990 msg = b'unknown compression mode %d'
990 msg = b'unknown compression mode %d'
991 msg %= comp_mode
991 msg %= comp_mode
992 raise error.RevlogError(msg)
992 raise error.RevlogError(msg)
993 ladd((rev, c))
993 ladd((rev, c))
994 if self._uncompressed_chunk_cache is not None:
994 if self._uncompressed_chunk_cache is not None:
995 self._uncompressed_chunk_cache.insert(rev, c, len(c))
995 self._uncompressed_chunk_cache.insert(rev, c, len(c))
996
996
997 chunks.sort()
997 chunks.sort()
998 return [x[1] for x in chunks]
998 return [x[1] for x in chunks]
999
999
1000 def raw_text(self, node, rev):
1000 def raw_text(self, node, rev):
1001 """return the possibly unvalidated rawtext for a revision
1001 """return the possibly unvalidated rawtext for a revision
1002
1002
1003 returns (rev, rawtext, validated)
1003 returns (rev, rawtext, validated)
1004 """
1004 """
1005
1005
1006 # revision in the cache (could be useful to apply delta)
1006 # revision in the cache (could be useful to apply delta)
1007 cachedrev = None
1007 cachedrev = None
1008 # An intermediate text to apply deltas to
1008 # An intermediate text to apply deltas to
1009 basetext = None
1009 basetext = None
1010
1010
1011 # Check if we have the entry in cache
1011 # Check if we have the entry in cache
1012 # The cache entry looks like (node, rev, rawtext)
1012 # The cache entry looks like (node, rev, rawtext)
1013 if self._revisioncache:
1013 if self._revisioncache:
1014 cachedrev = self._revisioncache[1]
1014 cachedrev = self._revisioncache[1]
1015
1015
1016 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1016 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1017 if stopped:
1017 if stopped:
1018 basetext = self._revisioncache[2]
1018 basetext = self._revisioncache[2]
1019
1019
1020 # drop cache to save memory, the caller is expected to
1020 # drop cache to save memory, the caller is expected to
1021 # update self._inner._revisioncache after validating the text
1021 # update self._inner._revisioncache after validating the text
1022 self._revisioncache = None
1022 self._revisioncache = None
1023
1023
1024 targetsize = None
1024 targetsize = None
1025 rawsize = self.index[rev][2]
1025 rawsize = self.index[rev][2]
1026 if 0 <= rawsize:
1026 if 0 <= rawsize:
1027 targetsize = 4 * rawsize
1027 targetsize = 4 * rawsize
1028
1028
1029 if self._uncompressed_chunk_cache is not None:
1029 if self._uncompressed_chunk_cache is not None:
1030 # dynamically update the uncompressed_chunk_cache size to the
1030 # dynamically update the uncompressed_chunk_cache size to the
1031 # largest revision we saw in this revlog.
1031 # largest revision we saw in this revlog.
1032 factor = self.data_config.uncompressed_cache_factor
1032 factor = self.data_config.uncompressed_cache_factor
1033 candidate_size = rawsize * factor
1033 candidate_size = rawsize * factor
1034 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1034 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1035 self._uncompressed_chunk_cache.maxcost = candidate_size
1035 self._uncompressed_chunk_cache.maxcost = candidate_size
1036
1036
1037 bins = self._chunks(chain, targetsize=targetsize)
1037 bins = self._chunks(chain, targetsize=targetsize)
1038 if basetext is None:
1038 if basetext is None:
1039 basetext = bytes(bins[0])
1039 basetext = bytes(bins[0])
1040 bins = bins[1:]
1040 bins = bins[1:]
1041
1041
1042 rawtext = mdiff.patches(basetext, bins)
1042 rawtext = mdiff.patches(basetext, bins)
1043 del basetext # let us have a chance to free memory early
1043 del basetext # let us have a chance to free memory early
1044 return (rev, rawtext, False)
1044 return (rev, rawtext, False)
1045
1045
1046 def sidedata(self, rev, sidedata_end):
1046 def sidedata(self, rev, sidedata_end):
1047 """Return the sidedata for a given revision number."""
1047 """Return the sidedata for a given revision number."""
1048 index_entry = self.index[rev]
1048 index_entry = self.index[rev]
1049 sidedata_offset = index_entry[8]
1049 sidedata_offset = index_entry[8]
1050 sidedata_size = index_entry[9]
1050 sidedata_size = index_entry[9]
1051
1051
1052 if self.inline:
1052 if self.inline:
1053 sidedata_offset += self.index.entry_size * (1 + rev)
1053 sidedata_offset += self.index.entry_size * (1 + rev)
1054 if sidedata_size == 0:
1054 if sidedata_size == 0:
1055 return {}
1055 return {}
1056
1056
1057 if sidedata_end < sidedata_offset + sidedata_size:
1057 if sidedata_end < sidedata_offset + sidedata_size:
1058 filename = self.sidedata_file
1058 filename = self.sidedata_file
1059 end = sidedata_end
1059 end = sidedata_end
1060 offset = sidedata_offset
1060 offset = sidedata_offset
1061 length = sidedata_size
1061 length = sidedata_size
1062 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1062 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1063 raise error.RevlogError(m)
1063 raise error.RevlogError(m)
1064
1064
1065 comp_segment = self._segmentfile_sidedata.read_chunk(
1065 comp_segment = self._segmentfile_sidedata.read_chunk(
1066 sidedata_offset, sidedata_size
1066 sidedata_offset, sidedata_size
1067 )
1067 )
1068
1068
1069 comp = self.index[rev][11]
1069 comp = self.index[rev][11]
1070 if comp == COMP_MODE_PLAIN:
1070 if comp == COMP_MODE_PLAIN:
1071 segment = comp_segment
1071 segment = comp_segment
1072 elif comp == COMP_MODE_DEFAULT:
1072 elif comp == COMP_MODE_DEFAULT:
1073 segment = self._decompressor(comp_segment)
1073 segment = self._decompressor(comp_segment)
1074 elif comp == COMP_MODE_INLINE:
1074 elif comp == COMP_MODE_INLINE:
1075 segment = self.decompress(comp_segment)
1075 segment = self.decompress(comp_segment)
1076 else:
1076 else:
1077 msg = b'unknown compression mode %d'
1077 msg = b'unknown compression mode %d'
1078 msg %= comp
1078 msg %= comp
1079 raise error.RevlogError(msg)
1079 raise error.RevlogError(msg)
1080
1080
1081 sidedata = sidedatautil.deserialize_sidedata(segment)
1081 sidedata = sidedatautil.deserialize_sidedata(segment)
1082 return sidedata
1082 return sidedata
1083
1083
1084 def write_entry(
1084 def write_entry(
1085 self,
1085 self,
1086 transaction,
1086 transaction,
1087 entry,
1087 entry,
1088 data,
1088 data,
1089 link,
1089 link,
1090 offset,
1090 offset,
1091 sidedata,
1091 sidedata,
1092 sidedata_offset,
1092 sidedata_offset,
1093 index_end,
1093 index_end,
1094 data_end,
1094 data_end,
1095 sidedata_end,
1095 sidedata_end,
1096 ):
1096 ):
1097 # Files opened in a+ mode have inconsistent behavior on various
1097 # Files opened in a+ mode have inconsistent behavior on various
1098 # platforms. Windows requires that a file positioning call be made
1098 # platforms. Windows requires that a file positioning call be made
1099 # when the file handle transitions between reads and writes. See
1099 # when the file handle transitions between reads and writes. See
1100 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1100 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1101 # platforms, Python or the platform itself can be buggy. Some versions
1101 # platforms, Python or the platform itself can be buggy. Some versions
1102 # of Solaris have been observed to not append at the end of the file
1102 # of Solaris have been observed to not append at the end of the file
1103 # if the file was seeked to before the end. See issue4943 for more.
1103 # if the file was seeked to before the end. See issue4943 for more.
1104 #
1104 #
1105 # We work around this issue by inserting a seek() before writing.
1105 # We work around this issue by inserting a seek() before writing.
1106 # Note: This is likely not necessary on Python 3. However, because
1106 # Note: This is likely not necessary on Python 3. However, because
1107 # the file handle is reused for reads and may be seeked there, we need
1107 # the file handle is reused for reads and may be seeked there, we need
1108 # to be careful before changing this.
1108 # to be careful before changing this.
1109 if self._writinghandles is None:
1109 if self._writinghandles is None:
1110 msg = b'adding revision outside `revlog._writing` context'
1110 msg = b'adding revision outside `revlog._writing` context'
1111 raise error.ProgrammingError(msg)
1111 raise error.ProgrammingError(msg)
1112 ifh, dfh, sdfh = self._writinghandles
1112 ifh, dfh, sdfh = self._writinghandles
1113 if index_end is None:
1113 if index_end is None:
1114 ifh.seek(0, os.SEEK_END)
1114 ifh.seek(0, os.SEEK_END)
1115 else:
1115 else:
1116 ifh.seek(index_end, os.SEEK_SET)
1116 ifh.seek(index_end, os.SEEK_SET)
1117 if dfh:
1117 if dfh:
1118 if data_end is None:
1118 if data_end is None:
1119 dfh.seek(0, os.SEEK_END)
1119 dfh.seek(0, os.SEEK_END)
1120 else:
1120 else:
1121 dfh.seek(data_end, os.SEEK_SET)
1121 dfh.seek(data_end, os.SEEK_SET)
1122 if sdfh:
1122 if sdfh:
1123 sdfh.seek(sidedata_end, os.SEEK_SET)
1123 sdfh.seek(sidedata_end, os.SEEK_SET)
1124
1124
1125 curr = len(self.index) - 1
1125 curr = len(self.index) - 1
1126 if not self.inline:
1126 if not self.inline:
1127 transaction.add(self.data_file, offset)
1127 transaction.add(self.data_file, offset)
1128 if self.sidedata_file:
1128 if self.sidedata_file:
1129 transaction.add(self.sidedata_file, sidedata_offset)
1129 transaction.add(self.sidedata_file, sidedata_offset)
1130 transaction.add(self.canonical_index_file, curr * len(entry))
1130 transaction.add(self.canonical_index_file, curr * len(entry))
1131 if data[0]:
1131 if data[0]:
1132 dfh.write(data[0])
1132 dfh.write(data[0])
1133 dfh.write(data[1])
1133 dfh.write(data[1])
1134 if sidedata:
1134 if sidedata:
1135 sdfh.write(sidedata)
1135 sdfh.write(sidedata)
1136 if self._delay_buffer is None:
1136 if self._delay_buffer is None:
1137 ifh.write(entry)
1137 ifh.write(entry)
1138 else:
1138 else:
1139 self._delay_buffer.append(entry)
1139 self._delay_buffer.append(entry)
1140 else:
1140 else:
1141 offset += curr * self.index.entry_size
1141 offset += curr * self.index.entry_size
1142 transaction.add(self.canonical_index_file, offset)
1142 transaction.add(self.canonical_index_file, offset)
1143 assert not sidedata
1143 assert not sidedata
1144 if self._delay_buffer is None:
1144 if self._delay_buffer is None:
1145 ifh.write(entry)
1145 ifh.write(entry)
1146 ifh.write(data[0])
1146 ifh.write(data[0])
1147 ifh.write(data[1])
1147 ifh.write(data[1])
1148 else:
1148 else:
1149 self._delay_buffer.append(entry)
1149 self._delay_buffer.append(entry)
1150 self._delay_buffer.append(data[0])
1150 self._delay_buffer.append(data[0])
1151 self._delay_buffer.append(data[1])
1151 self._delay_buffer.append(data[1])
1152 return (
1152 return (
1153 ifh.tell(),
1153 ifh.tell(),
1154 dfh.tell() if dfh else None,
1154 dfh.tell() if dfh else None,
1155 sdfh.tell() if sdfh else None,
1155 sdfh.tell() if sdfh else None,
1156 )
1156 )
1157
1157
1158 def _divert_index(self):
1158 def _divert_index(self):
1159 return self.index_file + b'.a'
1159 return self.index_file + b'.a'
1160
1160
1161 def delay(self):
1161 def delay(self):
1162 assert not self.is_open
1162 assert not self.is_open
1163 if self._delay_buffer is not None or self._orig_index_file is not None:
1163 if self._delay_buffer is not None or self._orig_index_file is not None:
1164 # delay or divert already in place
1164 # delay or divert already in place
1165 return None
1165 return None
1166 elif len(self.index) == 0:
1166 elif len(self.index) == 0:
1167 self._orig_index_file = self.index_file
1167 self._orig_index_file = self.index_file
1168 self.index_file = self._divert_index()
1168 self.index_file = self._divert_index()
1169 self._segmentfile.filename = self.index_file
1169 self._segmentfile.filename = self.index_file
1170 assert self._orig_index_file is not None
1170 assert self._orig_index_file is not None
1171 assert self.index_file is not None
1171 assert self.index_file is not None
1172 if self.opener.exists(self.index_file):
1172 if self.opener.exists(self.index_file):
1173 self.opener.unlink(self.index_file)
1173 self.opener.unlink(self.index_file)
1174 return self.index_file
1174 return self.index_file
1175 else:
1175 else:
1176 self._segmentfile._delay_buffer = self._delay_buffer = []
1176 self._delay_buffer = []
1177 if self.inline:
1178 self._segmentfile._delay_buffer = self._delay_buffer
1177 return None
1179 return None
1178
1180
1179 def write_pending(self):
1181 def write_pending(self):
1180 assert not self.is_open
1182 assert not self.is_open
1181 if self._orig_index_file is not None:
1183 if self._orig_index_file is not None:
1182 return None, True
1184 return None, True
1183 any_pending = False
1185 any_pending = False
1184 pending_index_file = self._divert_index()
1186 pending_index_file = self._divert_index()
1185 if self.opener.exists(pending_index_file):
1187 if self.opener.exists(pending_index_file):
1186 self.opener.unlink(pending_index_file)
1188 self.opener.unlink(pending_index_file)
1187 util.copyfile(
1189 util.copyfile(
1188 self.opener.join(self.index_file),
1190 self.opener.join(self.index_file),
1189 self.opener.join(pending_index_file),
1191 self.opener.join(pending_index_file),
1190 )
1192 )
1191 if self._delay_buffer:
1193 if self._delay_buffer:
1192 with self.opener(pending_index_file, b'r+') as ifh:
1194 with self.opener(pending_index_file, b'r+') as ifh:
1193 ifh.seek(0, os.SEEK_END)
1195 ifh.seek(0, os.SEEK_END)
1194 ifh.write(b"".join(self._delay_buffer))
1196 ifh.write(b"".join(self._delay_buffer))
1195 any_pending = True
1197 any_pending = True
1196 self._segmentfile._delay_buffer = self._delay_buffer = None
1198 self._delay_buffer = None
1199 if self.inline:
1200 self._segmentfile._delay_buffer = self._delay_buffer
1201 else:
1202 assert self._segmentfile._delay_buffer is None
1197 self._orig_index_file = self.index_file
1203 self._orig_index_file = self.index_file
1198 self.index_file = pending_index_file
1204 self.index_file = pending_index_file
1199 self._segmentfile.filename = self.index_file
1205 self._segmentfile.filename = self.index_file
1200 return self.index_file, any_pending
1206 return self.index_file, any_pending
1201
1207
1202 def finalize_pending(self):
1208 def finalize_pending(self):
1203 assert not self.is_open
1209 assert not self.is_open
1204
1210
1205 delay = self._delay_buffer is not None
1211 delay = self._delay_buffer is not None
1206 divert = self._orig_index_file is not None
1212 divert = self._orig_index_file is not None
1207
1213
1208 if delay and divert:
1214 if delay and divert:
1209 assert False, "unreachable"
1215 assert False, "unreachable"
1210 elif delay:
1216 elif delay:
1211 if self._delay_buffer:
1217 if self._delay_buffer:
1212 with self.opener(self.index_file, b'r+') as ifh:
1218 with self.opener(self.index_file, b'r+') as ifh:
1213 ifh.seek(0, os.SEEK_END)
1219 ifh.seek(0, os.SEEK_END)
1214 ifh.write(b"".join(self._delay_buffer))
1220 ifh.write(b"".join(self._delay_buffer))
1215 self._segmentfile._delay_buffer = self._delay_buffer = None
1221 self._segmentfile._delay_buffer = self._delay_buffer = None
1216 elif divert:
1222 elif divert:
1217 if self.opener.exists(self.index_file):
1223 if self.opener.exists(self.index_file):
1218 self.opener.rename(
1224 self.opener.rename(
1219 self.index_file,
1225 self.index_file,
1220 self._orig_index_file,
1226 self._orig_index_file,
1221 checkambig=True,
1227 checkambig=True,
1222 )
1228 )
1223 self.index_file = self._orig_index_file
1229 self.index_file = self._orig_index_file
1224 self._orig_index_file = None
1230 self._orig_index_file = None
1225 self._segmentfile.filename = self.index_file
1231 self._segmentfile.filename = self.index_file
1226 else:
1232 else:
1227 msg = b"not delay or divert found on this revlog"
1233 msg = b"not delay or divert found on this revlog"
1228 raise error.ProgrammingError(msg)
1234 raise error.ProgrammingError(msg)
1229 return self.canonical_index_file
1235 return self.canonical_index_file
1230
1236
1231
1237
1232 class revlog:
1238 class revlog:
1233 """
1239 """
1234 the underlying revision storage object
1240 the underlying revision storage object
1235
1241
1236 A revlog consists of two parts, an index and the revision data.
1242 A revlog consists of two parts, an index and the revision data.
1237
1243
1238 The index is a file with a fixed record size containing
1244 The index is a file with a fixed record size containing
1239 information on each revision, including its nodeid (hash), the
1245 information on each revision, including its nodeid (hash), the
1240 nodeids of its parents, the position and offset of its data within
1246 nodeids of its parents, the position and offset of its data within
1241 the data file, and the revision it's based on. Finally, each entry
1247 the data file, and the revision it's based on. Finally, each entry
1242 contains a linkrev entry that can serve as a pointer to external
1248 contains a linkrev entry that can serve as a pointer to external
1243 data.
1249 data.
1244
1250
1245 The revision data itself is a linear collection of data chunks.
1251 The revision data itself is a linear collection of data chunks.
1246 Each chunk represents a revision and is usually represented as a
1252 Each chunk represents a revision and is usually represented as a
1247 delta against the previous chunk. To bound lookup time, runs of
1253 delta against the previous chunk. To bound lookup time, runs of
1248 deltas are limited to about 2 times the length of the original
1254 deltas are limited to about 2 times the length of the original
1249 version data. This makes retrieval of a version proportional to
1255 version data. This makes retrieval of a version proportional to
1250 its size, or O(1) relative to the number of revisions.
1256 its size, or O(1) relative to the number of revisions.
1251
1257
1252 Both pieces of the revlog are written to in an append-only
1258 Both pieces of the revlog are written to in an append-only
1253 fashion, which means we never need to rewrite a file to insert or
1259 fashion, which means we never need to rewrite a file to insert or
1254 remove data, and can use some simple techniques to avoid the need
1260 remove data, and can use some simple techniques to avoid the need
1255 for locking while reading.
1261 for locking while reading.
1256
1262
1257 If checkambig, indexfile is opened with checkambig=True at
1263 If checkambig, indexfile is opened with checkambig=True at
1258 writing, to avoid file stat ambiguity.
1264 writing, to avoid file stat ambiguity.
1259
1265
1260 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1266 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1261 index will be mmapped rather than read if it is larger than the
1267 index will be mmapped rather than read if it is larger than the
1262 configured threshold.
1268 configured threshold.
1263
1269
1264 If censorable is True, the revlog can have censored revisions.
1270 If censorable is True, the revlog can have censored revisions.
1265
1271
1266 If `upperboundcomp` is not None, this is the expected maximal gain from
1272 If `upperboundcomp` is not None, this is the expected maximal gain from
1267 compression for the data content.
1273 compression for the data content.
1268
1274
1269 `concurrencychecker` is an optional function that receives 3 arguments: a
1275 `concurrencychecker` is an optional function that receives 3 arguments: a
1270 file handle, a filename, and an expected position. It should check whether
1276 file handle, a filename, and an expected position. It should check whether
1271 the current position in the file handle is valid, and log/warn/fail (by
1277 the current position in the file handle is valid, and log/warn/fail (by
1272 raising).
1278 raising).
1273
1279
1274 See mercurial/revlogutils/contants.py for details about the content of an
1280 See mercurial/revlogutils/contants.py for details about the content of an
1275 index entry.
1281 index entry.
1276 """
1282 """
1277
1283
1278 _flagserrorclass = error.RevlogError
1284 _flagserrorclass = error.RevlogError
1279
1285
1280 @staticmethod
1286 @staticmethod
1281 def is_inline_index(header_bytes):
1287 def is_inline_index(header_bytes):
1282 """Determine if a revlog is inline from the initial bytes of the index"""
1288 """Determine if a revlog is inline from the initial bytes of the index"""
1283 if len(header_bytes) == 0:
1289 if len(header_bytes) == 0:
1284 return True
1290 return True
1285
1291
1286 header = INDEX_HEADER.unpack(header_bytes)[0]
1292 header = INDEX_HEADER.unpack(header_bytes)[0]
1287
1293
1288 _format_flags = header & ~0xFFFF
1294 _format_flags = header & ~0xFFFF
1289 _format_version = header & 0xFFFF
1295 _format_version = header & 0xFFFF
1290
1296
1291 features = FEATURES_BY_VERSION[_format_version]
1297 features = FEATURES_BY_VERSION[_format_version]
1292 return features[b'inline'](_format_flags)
1298 return features[b'inline'](_format_flags)
1293
1299
1294 def __init__(
1300 def __init__(
1295 self,
1301 self,
1296 opener,
1302 opener,
1297 target,
1303 target,
1298 radix,
1304 radix,
1299 postfix=None, # only exist for `tmpcensored` now
1305 postfix=None, # only exist for `tmpcensored` now
1300 checkambig=False,
1306 checkambig=False,
1301 mmaplargeindex=False,
1307 mmaplargeindex=False,
1302 censorable=False,
1308 censorable=False,
1303 upperboundcomp=None,
1309 upperboundcomp=None,
1304 persistentnodemap=False,
1310 persistentnodemap=False,
1305 concurrencychecker=None,
1311 concurrencychecker=None,
1306 trypending=False,
1312 trypending=False,
1307 try_split=False,
1313 try_split=False,
1308 canonical_parent_order=True,
1314 canonical_parent_order=True,
1309 data_config=None,
1315 data_config=None,
1310 delta_config=None,
1316 delta_config=None,
1311 feature_config=None,
1317 feature_config=None,
1312 may_inline=True, # may inline new revlog
1318 may_inline=True, # may inline new revlog
1313 ):
1319 ):
1314 """
1320 """
1315 create a revlog object
1321 create a revlog object
1316
1322
1317 opener is a function that abstracts the file opening operation
1323 opener is a function that abstracts the file opening operation
1318 and can be used to implement COW semantics or the like.
1324 and can be used to implement COW semantics or the like.
1319
1325
1320 `target`: a (KIND, ID) tuple that identify the content stored in
1326 `target`: a (KIND, ID) tuple that identify the content stored in
1321 this revlog. It help the rest of the code to understand what the revlog
1327 this revlog. It help the rest of the code to understand what the revlog
1322 is about without having to resort to heuristic and index filename
1328 is about without having to resort to heuristic and index filename
1323 analysis. Note: that this must be reliably be set by normal code, but
1329 analysis. Note: that this must be reliably be set by normal code, but
1324 that test, debug, or performance measurement code might not set this to
1330 that test, debug, or performance measurement code might not set this to
1325 accurate value.
1331 accurate value.
1326 """
1332 """
1327
1333
1328 self.radix = radix
1334 self.radix = radix
1329
1335
1330 self._docket_file = None
1336 self._docket_file = None
1331 self._indexfile = None
1337 self._indexfile = None
1332 self._datafile = None
1338 self._datafile = None
1333 self._sidedatafile = None
1339 self._sidedatafile = None
1334 self._nodemap_file = None
1340 self._nodemap_file = None
1335 self.postfix = postfix
1341 self.postfix = postfix
1336 self._trypending = trypending
1342 self._trypending = trypending
1337 self._try_split = try_split
1343 self._try_split = try_split
1338 self._may_inline = may_inline
1344 self._may_inline = may_inline
1339 self.opener = opener
1345 self.opener = opener
1340 if persistentnodemap:
1346 if persistentnodemap:
1341 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1347 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1342
1348
1343 assert target[0] in ALL_KINDS
1349 assert target[0] in ALL_KINDS
1344 assert len(target) == 2
1350 assert len(target) == 2
1345 self.target = target
1351 self.target = target
1346 if feature_config is not None:
1352 if feature_config is not None:
1347 self.feature_config = feature_config.copy()
1353 self.feature_config = feature_config.copy()
1348 elif b'feature-config' in self.opener.options:
1354 elif b'feature-config' in self.opener.options:
1349 self.feature_config = self.opener.options[b'feature-config'].copy()
1355 self.feature_config = self.opener.options[b'feature-config'].copy()
1350 else:
1356 else:
1351 self.feature_config = FeatureConfig()
1357 self.feature_config = FeatureConfig()
1352 self.feature_config.censorable = censorable
1358 self.feature_config.censorable = censorable
1353 self.feature_config.canonical_parent_order = canonical_parent_order
1359 self.feature_config.canonical_parent_order = canonical_parent_order
1354 if data_config is not None:
1360 if data_config is not None:
1355 self.data_config = data_config.copy()
1361 self.data_config = data_config.copy()
1356 elif b'data-config' in self.opener.options:
1362 elif b'data-config' in self.opener.options:
1357 self.data_config = self.opener.options[b'data-config'].copy()
1363 self.data_config = self.opener.options[b'data-config'].copy()
1358 else:
1364 else:
1359 self.data_config = DataConfig()
1365 self.data_config = DataConfig()
1360 self.data_config.check_ambig = checkambig
1366 self.data_config.check_ambig = checkambig
1361 self.data_config.mmap_large_index = mmaplargeindex
1367 self.data_config.mmap_large_index = mmaplargeindex
1362 if delta_config is not None:
1368 if delta_config is not None:
1363 self.delta_config = delta_config.copy()
1369 self.delta_config = delta_config.copy()
1364 elif b'delta-config' in self.opener.options:
1370 elif b'delta-config' in self.opener.options:
1365 self.delta_config = self.opener.options[b'delta-config'].copy()
1371 self.delta_config = self.opener.options[b'delta-config'].copy()
1366 else:
1372 else:
1367 self.delta_config = DeltaConfig()
1373 self.delta_config = DeltaConfig()
1368 self.delta_config.upper_bound_comp = upperboundcomp
1374 self.delta_config.upper_bound_comp = upperboundcomp
1369
1375
1370 # Maps rev to chain base rev.
1376 # Maps rev to chain base rev.
1371 self._chainbasecache = util.lrucachedict(100)
1377 self._chainbasecache = util.lrucachedict(100)
1372
1378
1373 self.index = None
1379 self.index = None
1374 self._docket = None
1380 self._docket = None
1375 self._nodemap_docket = None
1381 self._nodemap_docket = None
1376 # Mapping of partial identifiers to full nodes.
1382 # Mapping of partial identifiers to full nodes.
1377 self._pcache = {}
1383 self._pcache = {}
1378
1384
1379 # other optionnals features
1385 # other optionnals features
1380
1386
1381 # Make copy of flag processors so each revlog instance can support
1387 # Make copy of flag processors so each revlog instance can support
1382 # custom flags.
1388 # custom flags.
1383 self._flagprocessors = dict(flagutil.flagprocessors)
1389 self._flagprocessors = dict(flagutil.flagprocessors)
1384 # prevent nesting of addgroup
1390 # prevent nesting of addgroup
1385 self._adding_group = None
1391 self._adding_group = None
1386
1392
1387 chunk_cache = self._loadindex()
1393 chunk_cache = self._loadindex()
1388 self._load_inner(chunk_cache)
1394 self._load_inner(chunk_cache)
1389 self._concurrencychecker = concurrencychecker
1395 self._concurrencychecker = concurrencychecker
1390
1396
1391 @property
1397 @property
1392 def _generaldelta(self):
1398 def _generaldelta(self):
1393 """temporary compatibility proxy"""
1399 """temporary compatibility proxy"""
1394 util.nouideprecwarn(
1400 util.nouideprecwarn(
1395 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1401 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1396 )
1402 )
1397 return self.delta_config.general_delta
1403 return self.delta_config.general_delta
1398
1404
1399 @property
1405 @property
1400 def _checkambig(self):
1406 def _checkambig(self):
1401 """temporary compatibility proxy"""
1407 """temporary compatibility proxy"""
1402 util.nouideprecwarn(
1408 util.nouideprecwarn(
1403 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1409 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1404 )
1410 )
1405 return self.data_config.check_ambig
1411 return self.data_config.check_ambig
1406
1412
1407 @property
1413 @property
1408 def _mmaplargeindex(self):
1414 def _mmaplargeindex(self):
1409 """temporary compatibility proxy"""
1415 """temporary compatibility proxy"""
1410 util.nouideprecwarn(
1416 util.nouideprecwarn(
1411 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1417 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1412 )
1418 )
1413 return self.data_config.mmap_large_index
1419 return self.data_config.mmap_large_index
1414
1420
1415 @property
1421 @property
1416 def _censorable(self):
1422 def _censorable(self):
1417 """temporary compatibility proxy"""
1423 """temporary compatibility proxy"""
1418 util.nouideprecwarn(
1424 util.nouideprecwarn(
1419 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1425 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1420 )
1426 )
1421 return self.feature_config.censorable
1427 return self.feature_config.censorable
1422
1428
1423 @property
1429 @property
1424 def _chunkcachesize(self):
1430 def _chunkcachesize(self):
1425 """temporary compatibility proxy"""
1431 """temporary compatibility proxy"""
1426 util.nouideprecwarn(
1432 util.nouideprecwarn(
1427 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1433 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1428 )
1434 )
1429 return self.data_config.chunk_cache_size
1435 return self.data_config.chunk_cache_size
1430
1436
1431 @property
1437 @property
1432 def _maxchainlen(self):
1438 def _maxchainlen(self):
1433 """temporary compatibility proxy"""
1439 """temporary compatibility proxy"""
1434 util.nouideprecwarn(
1440 util.nouideprecwarn(
1435 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1441 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1436 )
1442 )
1437 return self.delta_config.max_chain_len
1443 return self.delta_config.max_chain_len
1438
1444
1439 @property
1445 @property
1440 def _deltabothparents(self):
1446 def _deltabothparents(self):
1441 """temporary compatibility proxy"""
1447 """temporary compatibility proxy"""
1442 util.nouideprecwarn(
1448 util.nouideprecwarn(
1443 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1449 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1444 )
1450 )
1445 return self.delta_config.delta_both_parents
1451 return self.delta_config.delta_both_parents
1446
1452
1447 @property
1453 @property
1448 def _candidate_group_chunk_size(self):
1454 def _candidate_group_chunk_size(self):
1449 """temporary compatibility proxy"""
1455 """temporary compatibility proxy"""
1450 util.nouideprecwarn(
1456 util.nouideprecwarn(
1451 b"use revlog.delta_config.candidate_group_chunk_size",
1457 b"use revlog.delta_config.candidate_group_chunk_size",
1452 b"6.6",
1458 b"6.6",
1453 stacklevel=2,
1459 stacklevel=2,
1454 )
1460 )
1455 return self.delta_config.candidate_group_chunk_size
1461 return self.delta_config.candidate_group_chunk_size
1456
1462
1457 @property
1463 @property
1458 def _debug_delta(self):
1464 def _debug_delta(self):
1459 """temporary compatibility proxy"""
1465 """temporary compatibility proxy"""
1460 util.nouideprecwarn(
1466 util.nouideprecwarn(
1461 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1467 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1462 )
1468 )
1463 return self.delta_config.debug_delta
1469 return self.delta_config.debug_delta
1464
1470
1465 @property
1471 @property
1466 def _compengine(self):
1472 def _compengine(self):
1467 """temporary compatibility proxy"""
1473 """temporary compatibility proxy"""
1468 util.nouideprecwarn(
1474 util.nouideprecwarn(
1469 b"use revlog.feature_config.compression_engine",
1475 b"use revlog.feature_config.compression_engine",
1470 b"6.6",
1476 b"6.6",
1471 stacklevel=2,
1477 stacklevel=2,
1472 )
1478 )
1473 return self.feature_config.compression_engine
1479 return self.feature_config.compression_engine
1474
1480
1475 @property
1481 @property
1476 def upperboundcomp(self):
1482 def upperboundcomp(self):
1477 """temporary compatibility proxy"""
1483 """temporary compatibility proxy"""
1478 util.nouideprecwarn(
1484 util.nouideprecwarn(
1479 b"use revlog.delta_config.upper_bound_comp",
1485 b"use revlog.delta_config.upper_bound_comp",
1480 b"6.6",
1486 b"6.6",
1481 stacklevel=2,
1487 stacklevel=2,
1482 )
1488 )
1483 return self.delta_config.upper_bound_comp
1489 return self.delta_config.upper_bound_comp
1484
1490
1485 @property
1491 @property
1486 def _compengineopts(self):
1492 def _compengineopts(self):
1487 """temporary compatibility proxy"""
1493 """temporary compatibility proxy"""
1488 util.nouideprecwarn(
1494 util.nouideprecwarn(
1489 b"use revlog.feature_config.compression_engine_options",
1495 b"use revlog.feature_config.compression_engine_options",
1490 b"6.6",
1496 b"6.6",
1491 stacklevel=2,
1497 stacklevel=2,
1492 )
1498 )
1493 return self.feature_config.compression_engine_options
1499 return self.feature_config.compression_engine_options
1494
1500
1495 @property
1501 @property
1496 def _maxdeltachainspan(self):
1502 def _maxdeltachainspan(self):
1497 """temporary compatibility proxy"""
1503 """temporary compatibility proxy"""
1498 util.nouideprecwarn(
1504 util.nouideprecwarn(
1499 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1505 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1500 )
1506 )
1501 return self.delta_config.max_deltachain_span
1507 return self.delta_config.max_deltachain_span
1502
1508
1503 @property
1509 @property
1504 def _withsparseread(self):
1510 def _withsparseread(self):
1505 """temporary compatibility proxy"""
1511 """temporary compatibility proxy"""
1506 util.nouideprecwarn(
1512 util.nouideprecwarn(
1507 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1513 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1508 )
1514 )
1509 return self.data_config.with_sparse_read
1515 return self.data_config.with_sparse_read
1510
1516
1511 @property
1517 @property
1512 def _sparserevlog(self):
1518 def _sparserevlog(self):
1513 """temporary compatibility proxy"""
1519 """temporary compatibility proxy"""
1514 util.nouideprecwarn(
1520 util.nouideprecwarn(
1515 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1521 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1516 )
1522 )
1517 return self.delta_config.sparse_revlog
1523 return self.delta_config.sparse_revlog
1518
1524
1519 @property
1525 @property
1520 def hassidedata(self):
1526 def hassidedata(self):
1521 """temporary compatibility proxy"""
1527 """temporary compatibility proxy"""
1522 util.nouideprecwarn(
1528 util.nouideprecwarn(
1523 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1529 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1524 )
1530 )
1525 return self.feature_config.has_side_data
1531 return self.feature_config.has_side_data
1526
1532
1527 @property
1533 @property
1528 def _srdensitythreshold(self):
1534 def _srdensitythreshold(self):
1529 """temporary compatibility proxy"""
1535 """temporary compatibility proxy"""
1530 util.nouideprecwarn(
1536 util.nouideprecwarn(
1531 b"use revlog.data_config.sr_density_threshold",
1537 b"use revlog.data_config.sr_density_threshold",
1532 b"6.6",
1538 b"6.6",
1533 stacklevel=2,
1539 stacklevel=2,
1534 )
1540 )
1535 return self.data_config.sr_density_threshold
1541 return self.data_config.sr_density_threshold
1536
1542
1537 @property
1543 @property
1538 def _srmingapsize(self):
1544 def _srmingapsize(self):
1539 """temporary compatibility proxy"""
1545 """temporary compatibility proxy"""
1540 util.nouideprecwarn(
1546 util.nouideprecwarn(
1541 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1547 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1542 )
1548 )
1543 return self.data_config.sr_min_gap_size
1549 return self.data_config.sr_min_gap_size
1544
1550
1545 @property
1551 @property
1546 def _compute_rank(self):
1552 def _compute_rank(self):
1547 """temporary compatibility proxy"""
1553 """temporary compatibility proxy"""
1548 util.nouideprecwarn(
1554 util.nouideprecwarn(
1549 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1555 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1550 )
1556 )
1551 return self.feature_config.compute_rank
1557 return self.feature_config.compute_rank
1552
1558
1553 @property
1559 @property
1554 def canonical_parent_order(self):
1560 def canonical_parent_order(self):
1555 """temporary compatibility proxy"""
1561 """temporary compatibility proxy"""
1556 util.nouideprecwarn(
1562 util.nouideprecwarn(
1557 b"use revlog.feature_config.canonical_parent_order",
1563 b"use revlog.feature_config.canonical_parent_order",
1558 b"6.6",
1564 b"6.6",
1559 stacklevel=2,
1565 stacklevel=2,
1560 )
1566 )
1561 return self.feature_config.canonical_parent_order
1567 return self.feature_config.canonical_parent_order
1562
1568
1563 @property
1569 @property
1564 def _lazydelta(self):
1570 def _lazydelta(self):
1565 """temporary compatibility proxy"""
1571 """temporary compatibility proxy"""
1566 util.nouideprecwarn(
1572 util.nouideprecwarn(
1567 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1573 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1568 )
1574 )
1569 return self.delta_config.lazy_delta
1575 return self.delta_config.lazy_delta
1570
1576
1571 @property
1577 @property
1572 def _lazydeltabase(self):
1578 def _lazydeltabase(self):
1573 """temporary compatibility proxy"""
1579 """temporary compatibility proxy"""
1574 util.nouideprecwarn(
1580 util.nouideprecwarn(
1575 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1581 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1576 )
1582 )
1577 return self.delta_config.lazy_delta_base
1583 return self.delta_config.lazy_delta_base
1578
1584
1579 def _init_opts(self):
1585 def _init_opts(self):
1580 """process options (from above/config) to setup associated default revlog mode
1586 """process options (from above/config) to setup associated default revlog mode
1581
1587
1582 These values might be affected when actually reading on disk information.
1588 These values might be affected when actually reading on disk information.
1583
1589
1584 The relevant values are returned for use in _loadindex().
1590 The relevant values are returned for use in _loadindex().
1585
1591
1586 * newversionflags:
1592 * newversionflags:
1587 version header to use if we need to create a new revlog
1593 version header to use if we need to create a new revlog
1588
1594
1589 * mmapindexthreshold:
1595 * mmapindexthreshold:
1590 minimal index size for start to use mmap
1596 minimal index size for start to use mmap
1591
1597
1592 * force_nodemap:
1598 * force_nodemap:
1593 force the usage of a "development" version of the nodemap code
1599 force the usage of a "development" version of the nodemap code
1594 """
1600 """
1595 opts = self.opener.options
1601 opts = self.opener.options
1596
1602
1597 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1603 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1598 new_header = CHANGELOGV2
1604 new_header = CHANGELOGV2
1599 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1605 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1600 self.feature_config.compute_rank = compute_rank
1606 self.feature_config.compute_rank = compute_rank
1601 elif b'revlogv2' in opts:
1607 elif b'revlogv2' in opts:
1602 new_header = REVLOGV2
1608 new_header = REVLOGV2
1603 elif b'revlogv1' in opts:
1609 elif b'revlogv1' in opts:
1604 new_header = REVLOGV1
1610 new_header = REVLOGV1
1605 if self._may_inline:
1611 if self._may_inline:
1606 new_header |= FLAG_INLINE_DATA
1612 new_header |= FLAG_INLINE_DATA
1607 if b'generaldelta' in opts:
1613 if b'generaldelta' in opts:
1608 new_header |= FLAG_GENERALDELTA
1614 new_header |= FLAG_GENERALDELTA
1609 elif b'revlogv0' in self.opener.options:
1615 elif b'revlogv0' in self.opener.options:
1610 new_header = REVLOGV0
1616 new_header = REVLOGV0
1611 else:
1617 else:
1612 new_header = REVLOG_DEFAULT_VERSION
1618 new_header = REVLOG_DEFAULT_VERSION
1613
1619
1614 mmapindexthreshold = None
1620 mmapindexthreshold = None
1615 if self.data_config.mmap_large_index:
1621 if self.data_config.mmap_large_index:
1616 mmapindexthreshold = self.data_config.mmap_index_threshold
1622 mmapindexthreshold = self.data_config.mmap_index_threshold
1617 if self.feature_config.enable_ellipsis:
1623 if self.feature_config.enable_ellipsis:
1618 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1624 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1619
1625
1620 # revlog v0 doesn't have flag processors
1626 # revlog v0 doesn't have flag processors
1621 for flag, processor in opts.get(b'flagprocessors', {}).items():
1627 for flag, processor in opts.get(b'flagprocessors', {}).items():
1622 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1628 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1623
1629
1624 chunk_cache_size = self.data_config.chunk_cache_size
1630 chunk_cache_size = self.data_config.chunk_cache_size
1625 if chunk_cache_size <= 0:
1631 if chunk_cache_size <= 0:
1626 raise error.RevlogError(
1632 raise error.RevlogError(
1627 _(b'revlog chunk cache size %r is not greater than 0')
1633 _(b'revlog chunk cache size %r is not greater than 0')
1628 % chunk_cache_size
1634 % chunk_cache_size
1629 )
1635 )
1630 elif chunk_cache_size & (chunk_cache_size - 1):
1636 elif chunk_cache_size & (chunk_cache_size - 1):
1631 raise error.RevlogError(
1637 raise error.RevlogError(
1632 _(b'revlog chunk cache size %r is not a power of 2')
1638 _(b'revlog chunk cache size %r is not a power of 2')
1633 % chunk_cache_size
1639 % chunk_cache_size
1634 )
1640 )
1635 force_nodemap = opts.get(b'devel-force-nodemap', False)
1641 force_nodemap = opts.get(b'devel-force-nodemap', False)
1636 return new_header, mmapindexthreshold, force_nodemap
1642 return new_header, mmapindexthreshold, force_nodemap
1637
1643
1638 def _get_data(self, filepath, mmap_threshold, size=None):
1644 def _get_data(self, filepath, mmap_threshold, size=None):
1639 """return a file content with or without mmap
1645 """return a file content with or without mmap
1640
1646
1641 If the file is missing return the empty string"""
1647 If the file is missing return the empty string"""
1642 try:
1648 try:
1643 with self.opener(filepath) as fp:
1649 with self.opener(filepath) as fp:
1644 if mmap_threshold is not None:
1650 if mmap_threshold is not None:
1645 file_size = self.opener.fstat(fp).st_size
1651 file_size = self.opener.fstat(fp).st_size
1646 if file_size >= mmap_threshold:
1652 if file_size >= mmap_threshold:
1647 if size is not None:
1653 if size is not None:
1648 # avoid potentiel mmap crash
1654 # avoid potentiel mmap crash
1649 size = min(file_size, size)
1655 size = min(file_size, size)
1650 # TODO: should .close() to release resources without
1656 # TODO: should .close() to release resources without
1651 # relying on Python GC
1657 # relying on Python GC
1652 if size is None:
1658 if size is None:
1653 return util.buffer(util.mmapread(fp))
1659 return util.buffer(util.mmapread(fp))
1654 else:
1660 else:
1655 return util.buffer(util.mmapread(fp, size))
1661 return util.buffer(util.mmapread(fp, size))
1656 if size is None:
1662 if size is None:
1657 return fp.read()
1663 return fp.read()
1658 else:
1664 else:
1659 return fp.read(size)
1665 return fp.read(size)
1660 except FileNotFoundError:
1666 except FileNotFoundError:
1661 return b''
1667 return b''
1662
1668
1663 def get_streams(self, max_linkrev, force_inline=False):
1669 def get_streams(self, max_linkrev, force_inline=False):
1664 """return a list of streams that represent this revlog
1670 """return a list of streams that represent this revlog
1665
1671
1666 This is used by stream-clone to do bytes to bytes copies of a repository.
1672 This is used by stream-clone to do bytes to bytes copies of a repository.
1667
1673
1668 This streams data for all revisions that refer to a changelog revision up
1674 This streams data for all revisions that refer to a changelog revision up
1669 to `max_linkrev`.
1675 to `max_linkrev`.
1670
1676
1671 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1677 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1672
1678
1673 It returns is a list of three-tuple:
1679 It returns is a list of three-tuple:
1674
1680
1675 [
1681 [
1676 (filename, bytes_stream, stream_size),
1682 (filename, bytes_stream, stream_size),
1677 …
1683 …
1678 ]
1684 ]
1679 """
1685 """
1680 n = len(self)
1686 n = len(self)
1681 index = self.index
1687 index = self.index
1682 while n > 0:
1688 while n > 0:
1683 linkrev = index[n - 1][4]
1689 linkrev = index[n - 1][4]
1684 if linkrev < max_linkrev:
1690 if linkrev < max_linkrev:
1685 break
1691 break
1686 # note: this loop will rarely go through multiple iterations, since
1692 # note: this loop will rarely go through multiple iterations, since
1687 # it only traverses commits created during the current streaming
1693 # it only traverses commits created during the current streaming
1688 # pull operation.
1694 # pull operation.
1689 #
1695 #
1690 # If this become a problem, using a binary search should cap the
1696 # If this become a problem, using a binary search should cap the
1691 # runtime of this.
1697 # runtime of this.
1692 n = n - 1
1698 n = n - 1
1693 if n == 0:
1699 if n == 0:
1694 # no data to send
1700 # no data to send
1695 return []
1701 return []
1696 index_size = n * index.entry_size
1702 index_size = n * index.entry_size
1697 data_size = self.end(n - 1)
1703 data_size = self.end(n - 1)
1698
1704
1699 # XXX we might have been split (or stripped) since the object
1705 # XXX we might have been split (or stripped) since the object
1700 # initialization, We need to close this race too, but having a way to
1706 # initialization, We need to close this race too, but having a way to
1701 # pre-open the file we feed to the revlog and never closing them before
1707 # pre-open the file we feed to the revlog and never closing them before
1702 # we are done streaming.
1708 # we are done streaming.
1703
1709
1704 if self._inline:
1710 if self._inline:
1705
1711
1706 def get_stream():
1712 def get_stream():
1707 with self.opener(self._indexfile, mode=b"r") as fp:
1713 with self.opener(self._indexfile, mode=b"r") as fp:
1708 yield None
1714 yield None
1709 size = index_size + data_size
1715 size = index_size + data_size
1710 if size <= 65536:
1716 if size <= 65536:
1711 yield fp.read(size)
1717 yield fp.read(size)
1712 else:
1718 else:
1713 yield from util.filechunkiter(fp, limit=size)
1719 yield from util.filechunkiter(fp, limit=size)
1714
1720
1715 inline_stream = get_stream()
1721 inline_stream = get_stream()
1716 next(inline_stream)
1722 next(inline_stream)
1717 return [
1723 return [
1718 (self._indexfile, inline_stream, index_size + data_size),
1724 (self._indexfile, inline_stream, index_size + data_size),
1719 ]
1725 ]
1720 elif force_inline:
1726 elif force_inline:
1721
1727
1722 def get_stream():
1728 def get_stream():
1723 with self.reading():
1729 with self.reading():
1724 yield None
1730 yield None
1725
1731
1726 for rev in range(n):
1732 for rev in range(n):
1727 idx = self.index.entry_binary(rev)
1733 idx = self.index.entry_binary(rev)
1728 if rev == 0 and self._docket is None:
1734 if rev == 0 and self._docket is None:
1729 # re-inject the inline flag
1735 # re-inject the inline flag
1730 header = self._format_flags
1736 header = self._format_flags
1731 header |= self._format_version
1737 header |= self._format_version
1732 header |= FLAG_INLINE_DATA
1738 header |= FLAG_INLINE_DATA
1733 header = self.index.pack_header(header)
1739 header = self.index.pack_header(header)
1734 idx = header + idx
1740 idx = header + idx
1735 yield idx
1741 yield idx
1736 yield self._inner.get_segment_for_revs(rev, rev)[1]
1742 yield self._inner.get_segment_for_revs(rev, rev)[1]
1737
1743
1738 inline_stream = get_stream()
1744 inline_stream = get_stream()
1739 next(inline_stream)
1745 next(inline_stream)
1740 return [
1746 return [
1741 (self._indexfile, inline_stream, index_size + data_size),
1747 (self._indexfile, inline_stream, index_size + data_size),
1742 ]
1748 ]
1743 else:
1749 else:
1744
1750
1745 def get_index_stream():
1751 def get_index_stream():
1746 with self.opener(self._indexfile, mode=b"r") as fp:
1752 with self.opener(self._indexfile, mode=b"r") as fp:
1747 yield None
1753 yield None
1748 if index_size <= 65536:
1754 if index_size <= 65536:
1749 yield fp.read(index_size)
1755 yield fp.read(index_size)
1750 else:
1756 else:
1751 yield from util.filechunkiter(fp, limit=index_size)
1757 yield from util.filechunkiter(fp, limit=index_size)
1752
1758
1753 def get_data_stream():
1759 def get_data_stream():
1754 with self._datafp() as fp:
1760 with self._datafp() as fp:
1755 yield None
1761 yield None
1756 if data_size <= 65536:
1762 if data_size <= 65536:
1757 yield fp.read(data_size)
1763 yield fp.read(data_size)
1758 else:
1764 else:
1759 yield from util.filechunkiter(fp, limit=data_size)
1765 yield from util.filechunkiter(fp, limit=data_size)
1760
1766
1761 index_stream = get_index_stream()
1767 index_stream = get_index_stream()
1762 next(index_stream)
1768 next(index_stream)
1763 data_stream = get_data_stream()
1769 data_stream = get_data_stream()
1764 next(data_stream)
1770 next(data_stream)
1765 return [
1771 return [
1766 (self._datafile, data_stream, data_size),
1772 (self._datafile, data_stream, data_size),
1767 (self._indexfile, index_stream, index_size),
1773 (self._indexfile, index_stream, index_size),
1768 ]
1774 ]
1769
1775
1770 def _loadindex(self, docket=None):
1776 def _loadindex(self, docket=None):
1771
1777
1772 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1778 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1773
1779
1774 if self.postfix is not None:
1780 if self.postfix is not None:
1775 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1781 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1776 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1782 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1777 entry_point = b'%s.i.a' % self.radix
1783 entry_point = b'%s.i.a' % self.radix
1778 elif self._try_split and self.opener.exists(self._split_index_file):
1784 elif self._try_split and self.opener.exists(self._split_index_file):
1779 entry_point = self._split_index_file
1785 entry_point = self._split_index_file
1780 else:
1786 else:
1781 entry_point = b'%s.i' % self.radix
1787 entry_point = b'%s.i' % self.radix
1782
1788
1783 if docket is not None:
1789 if docket is not None:
1784 self._docket = docket
1790 self._docket = docket
1785 self._docket_file = entry_point
1791 self._docket_file = entry_point
1786 else:
1792 else:
1787 self._initempty = True
1793 self._initempty = True
1788 entry_data = self._get_data(entry_point, mmapindexthreshold)
1794 entry_data = self._get_data(entry_point, mmapindexthreshold)
1789 if len(entry_data) > 0:
1795 if len(entry_data) > 0:
1790 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1796 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1791 self._initempty = False
1797 self._initempty = False
1792 else:
1798 else:
1793 header = new_header
1799 header = new_header
1794
1800
1795 self._format_flags = header & ~0xFFFF
1801 self._format_flags = header & ~0xFFFF
1796 self._format_version = header & 0xFFFF
1802 self._format_version = header & 0xFFFF
1797
1803
1798 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1804 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1799 if supported_flags is None:
1805 if supported_flags is None:
1800 msg = _(b'unknown version (%d) in revlog %s')
1806 msg = _(b'unknown version (%d) in revlog %s')
1801 msg %= (self._format_version, self.display_id)
1807 msg %= (self._format_version, self.display_id)
1802 raise error.RevlogError(msg)
1808 raise error.RevlogError(msg)
1803 elif self._format_flags & ~supported_flags:
1809 elif self._format_flags & ~supported_flags:
1804 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1810 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1805 display_flag = self._format_flags >> 16
1811 display_flag = self._format_flags >> 16
1806 msg %= (display_flag, self._format_version, self.display_id)
1812 msg %= (display_flag, self._format_version, self.display_id)
1807 raise error.RevlogError(msg)
1813 raise error.RevlogError(msg)
1808
1814
1809 features = FEATURES_BY_VERSION[self._format_version]
1815 features = FEATURES_BY_VERSION[self._format_version]
1810 self._inline = features[b'inline'](self._format_flags)
1816 self._inline = features[b'inline'](self._format_flags)
1811 self.delta_config.general_delta = features[b'generaldelta'](
1817 self.delta_config.general_delta = features[b'generaldelta'](
1812 self._format_flags
1818 self._format_flags
1813 )
1819 )
1814 self.feature_config.has_side_data = features[b'sidedata']
1820 self.feature_config.has_side_data = features[b'sidedata']
1815
1821
1816 if not features[b'docket']:
1822 if not features[b'docket']:
1817 self._indexfile = entry_point
1823 self._indexfile = entry_point
1818 index_data = entry_data
1824 index_data = entry_data
1819 else:
1825 else:
1820 self._docket_file = entry_point
1826 self._docket_file = entry_point
1821 if self._initempty:
1827 if self._initempty:
1822 self._docket = docketutil.default_docket(self, header)
1828 self._docket = docketutil.default_docket(self, header)
1823 else:
1829 else:
1824 self._docket = docketutil.parse_docket(
1830 self._docket = docketutil.parse_docket(
1825 self, entry_data, use_pending=self._trypending
1831 self, entry_data, use_pending=self._trypending
1826 )
1832 )
1827
1833
1828 if self._docket is not None:
1834 if self._docket is not None:
1829 self._indexfile = self._docket.index_filepath()
1835 self._indexfile = self._docket.index_filepath()
1830 index_data = b''
1836 index_data = b''
1831 index_size = self._docket.index_end
1837 index_size = self._docket.index_end
1832 if index_size > 0:
1838 if index_size > 0:
1833 index_data = self._get_data(
1839 index_data = self._get_data(
1834 self._indexfile, mmapindexthreshold, size=index_size
1840 self._indexfile, mmapindexthreshold, size=index_size
1835 )
1841 )
1836 if len(index_data) < index_size:
1842 if len(index_data) < index_size:
1837 msg = _(b'too few index data for %s: got %d, expected %d')
1843 msg = _(b'too few index data for %s: got %d, expected %d')
1838 msg %= (self.display_id, len(index_data), index_size)
1844 msg %= (self.display_id, len(index_data), index_size)
1839 raise error.RevlogError(msg)
1845 raise error.RevlogError(msg)
1840
1846
1841 self._inline = False
1847 self._inline = False
1842 # generaldelta implied by version 2 revlogs.
1848 # generaldelta implied by version 2 revlogs.
1843 self.delta_config.general_delta = True
1849 self.delta_config.general_delta = True
1844 # the logic for persistent nodemap will be dealt with within the
1850 # the logic for persistent nodemap will be dealt with within the
1845 # main docket, so disable it for now.
1851 # main docket, so disable it for now.
1846 self._nodemap_file = None
1852 self._nodemap_file = None
1847
1853
1848 if self._docket is not None:
1854 if self._docket is not None:
1849 self._datafile = self._docket.data_filepath()
1855 self._datafile = self._docket.data_filepath()
1850 self._sidedatafile = self._docket.sidedata_filepath()
1856 self._sidedatafile = self._docket.sidedata_filepath()
1851 elif self.postfix is None:
1857 elif self.postfix is None:
1852 self._datafile = b'%s.d' % self.radix
1858 self._datafile = b'%s.d' % self.radix
1853 else:
1859 else:
1854 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1860 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1855
1861
1856 self.nodeconstants = sha1nodeconstants
1862 self.nodeconstants = sha1nodeconstants
1857 self.nullid = self.nodeconstants.nullid
1863 self.nullid = self.nodeconstants.nullid
1858
1864
1859 # sparse-revlog can't be on without general-delta (issue6056)
1865 # sparse-revlog can't be on without general-delta (issue6056)
1860 if not self.delta_config.general_delta:
1866 if not self.delta_config.general_delta:
1861 self.delta_config.sparse_revlog = False
1867 self.delta_config.sparse_revlog = False
1862
1868
1863 self._storedeltachains = True
1869 self._storedeltachains = True
1864
1870
1865 devel_nodemap = (
1871 devel_nodemap = (
1866 self._nodemap_file
1872 self._nodemap_file
1867 and force_nodemap
1873 and force_nodemap
1868 and parse_index_v1_nodemap is not None
1874 and parse_index_v1_nodemap is not None
1869 )
1875 )
1870
1876
1871 use_rust_index = False
1877 use_rust_index = False
1872 if rustrevlog is not None:
1878 if rustrevlog is not None:
1873 if self._nodemap_file is not None:
1879 if self._nodemap_file is not None:
1874 use_rust_index = True
1880 use_rust_index = True
1875 else:
1881 else:
1876 use_rust_index = self.opener.options.get(b'rust.index')
1882 use_rust_index = self.opener.options.get(b'rust.index')
1877
1883
1878 self._parse_index = parse_index_v1
1884 self._parse_index = parse_index_v1
1879 if self._format_version == REVLOGV0:
1885 if self._format_version == REVLOGV0:
1880 self._parse_index = revlogv0.parse_index_v0
1886 self._parse_index = revlogv0.parse_index_v0
1881 elif self._format_version == REVLOGV2:
1887 elif self._format_version == REVLOGV2:
1882 self._parse_index = parse_index_v2
1888 self._parse_index = parse_index_v2
1883 elif self._format_version == CHANGELOGV2:
1889 elif self._format_version == CHANGELOGV2:
1884 self._parse_index = parse_index_cl_v2
1890 self._parse_index = parse_index_cl_v2
1885 elif devel_nodemap:
1891 elif devel_nodemap:
1886 self._parse_index = parse_index_v1_nodemap
1892 self._parse_index = parse_index_v1_nodemap
1887 elif use_rust_index:
1893 elif use_rust_index:
1888 self._parse_index = parse_index_v1_mixed
1894 self._parse_index = parse_index_v1_mixed
1889 try:
1895 try:
1890 d = self._parse_index(index_data, self._inline)
1896 d = self._parse_index(index_data, self._inline)
1891 index, chunkcache = d
1897 index, chunkcache = d
1892 use_nodemap = (
1898 use_nodemap = (
1893 not self._inline
1899 not self._inline
1894 and self._nodemap_file is not None
1900 and self._nodemap_file is not None
1895 and hasattr(index, 'update_nodemap_data')
1901 and hasattr(index, 'update_nodemap_data')
1896 )
1902 )
1897 if use_nodemap:
1903 if use_nodemap:
1898 nodemap_data = nodemaputil.persisted_data(self)
1904 nodemap_data = nodemaputil.persisted_data(self)
1899 if nodemap_data is not None:
1905 if nodemap_data is not None:
1900 docket = nodemap_data[0]
1906 docket = nodemap_data[0]
1901 if (
1907 if (
1902 len(d[0]) > docket.tip_rev
1908 len(d[0]) > docket.tip_rev
1903 and d[0][docket.tip_rev][7] == docket.tip_node
1909 and d[0][docket.tip_rev][7] == docket.tip_node
1904 ):
1910 ):
1905 # no changelog tampering
1911 # no changelog tampering
1906 self._nodemap_docket = docket
1912 self._nodemap_docket = docket
1907 index.update_nodemap_data(*nodemap_data)
1913 index.update_nodemap_data(*nodemap_data)
1908 except (ValueError, IndexError):
1914 except (ValueError, IndexError):
1909 raise error.RevlogError(
1915 raise error.RevlogError(
1910 _(b"index %s is corrupted") % self.display_id
1916 _(b"index %s is corrupted") % self.display_id
1911 )
1917 )
1912 self.index = index
1918 self.index = index
1913 # revnum -> (chain-length, sum-delta-length)
1919 # revnum -> (chain-length, sum-delta-length)
1914 self._chaininfocache = util.lrucachedict(500)
1920 self._chaininfocache = util.lrucachedict(500)
1915
1921
1916 return chunkcache
1922 return chunkcache
1917
1923
1918 def _load_inner(self, chunk_cache):
1924 def _load_inner(self, chunk_cache):
1919 if self._docket is None:
1925 if self._docket is None:
1920 default_compression_header = None
1926 default_compression_header = None
1921 else:
1927 else:
1922 default_compression_header = self._docket.default_compression_header
1928 default_compression_header = self._docket.default_compression_header
1923
1929
1924 self._inner = _InnerRevlog(
1930 self._inner = _InnerRevlog(
1925 opener=self.opener,
1931 opener=self.opener,
1926 index=self.index,
1932 index=self.index,
1927 index_file=self._indexfile,
1933 index_file=self._indexfile,
1928 data_file=self._datafile,
1934 data_file=self._datafile,
1929 sidedata_file=self._sidedatafile,
1935 sidedata_file=self._sidedatafile,
1930 inline=self._inline,
1936 inline=self._inline,
1931 data_config=self.data_config,
1937 data_config=self.data_config,
1932 delta_config=self.delta_config,
1938 delta_config=self.delta_config,
1933 feature_config=self.feature_config,
1939 feature_config=self.feature_config,
1934 chunk_cache=chunk_cache,
1940 chunk_cache=chunk_cache,
1935 default_compression_header=default_compression_header,
1941 default_compression_header=default_compression_header,
1936 )
1942 )
1937
1943
1938 def get_revlog(self):
1944 def get_revlog(self):
1939 """simple function to mirror API of other not-really-revlog API"""
1945 """simple function to mirror API of other not-really-revlog API"""
1940 return self
1946 return self
1941
1947
1942 @util.propertycache
1948 @util.propertycache
1943 def revlog_kind(self):
1949 def revlog_kind(self):
1944 return self.target[0]
1950 return self.target[0]
1945
1951
1946 @util.propertycache
1952 @util.propertycache
1947 def display_id(self):
1953 def display_id(self):
1948 """The public facing "ID" of the revlog that we use in message"""
1954 """The public facing "ID" of the revlog that we use in message"""
1949 if self.revlog_kind == KIND_FILELOG:
1955 if self.revlog_kind == KIND_FILELOG:
1950 # Reference the file without the "data/" prefix, so it is familiar
1956 # Reference the file without the "data/" prefix, so it is familiar
1951 # to the user.
1957 # to the user.
1952 return self.target[1]
1958 return self.target[1]
1953 else:
1959 else:
1954 return self.radix
1960 return self.radix
1955
1961
1956 def _datafp(self, mode=b'r'):
1962 def _datafp(self, mode=b'r'):
1957 """file object for the revlog's data file"""
1963 """file object for the revlog's data file"""
1958 return self.opener(self._datafile, mode=mode)
1964 return self.opener(self._datafile, mode=mode)
1959
1965
1960 def tiprev(self):
1966 def tiprev(self):
1961 return len(self.index) - 1
1967 return len(self.index) - 1
1962
1968
1963 def tip(self):
1969 def tip(self):
1964 return self.node(self.tiprev())
1970 return self.node(self.tiprev())
1965
1971
1966 def __contains__(self, rev):
1972 def __contains__(self, rev):
1967 return 0 <= rev < len(self)
1973 return 0 <= rev < len(self)
1968
1974
1969 def __len__(self):
1975 def __len__(self):
1970 return len(self.index)
1976 return len(self.index)
1971
1977
1972 def __iter__(self):
1978 def __iter__(self):
1973 return iter(range(len(self)))
1979 return iter(range(len(self)))
1974
1980
1975 def revs(self, start=0, stop=None):
1981 def revs(self, start=0, stop=None):
1976 """iterate over all rev in this revlog (from start to stop)"""
1982 """iterate over all rev in this revlog (from start to stop)"""
1977 return storageutil.iterrevs(len(self), start=start, stop=stop)
1983 return storageutil.iterrevs(len(self), start=start, stop=stop)
1978
1984
1979 def hasnode(self, node):
1985 def hasnode(self, node):
1980 try:
1986 try:
1981 self.rev(node)
1987 self.rev(node)
1982 return True
1988 return True
1983 except KeyError:
1989 except KeyError:
1984 return False
1990 return False
1985
1991
1986 def _candelta(self, baserev, rev):
1992 def _candelta(self, baserev, rev):
1987 """whether two revisions (baserev, rev) can be delta-ed or not"""
1993 """whether two revisions (baserev, rev) can be delta-ed or not"""
1988 # Disable delta if either rev requires a content-changing flag
1994 # Disable delta if either rev requires a content-changing flag
1989 # processor (ex. LFS). This is because such flag processor can alter
1995 # processor (ex. LFS). This is because such flag processor can alter
1990 # the rawtext content that the delta will be based on, and two clients
1996 # the rawtext content that the delta will be based on, and two clients
1991 # could have a same revlog node with different flags (i.e. different
1997 # could have a same revlog node with different flags (i.e. different
1992 # rawtext contents) and the delta could be incompatible.
1998 # rawtext contents) and the delta could be incompatible.
1993 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1999 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1994 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
2000 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1995 ):
2001 ):
1996 return False
2002 return False
1997 return True
2003 return True
1998
2004
1999 def update_caches(self, transaction):
2005 def update_caches(self, transaction):
2000 """update on disk cache
2006 """update on disk cache
2001
2007
2002 If a transaction is passed, the update may be delayed to transaction
2008 If a transaction is passed, the update may be delayed to transaction
2003 commit."""
2009 commit."""
2004 if self._nodemap_file is not None:
2010 if self._nodemap_file is not None:
2005 if transaction is None:
2011 if transaction is None:
2006 nodemaputil.update_persistent_nodemap(self)
2012 nodemaputil.update_persistent_nodemap(self)
2007 else:
2013 else:
2008 nodemaputil.setup_persistent_nodemap(transaction, self)
2014 nodemaputil.setup_persistent_nodemap(transaction, self)
2009
2015
2010 def clearcaches(self):
2016 def clearcaches(self):
2011 """Clear in-memory caches"""
2017 """Clear in-memory caches"""
2012 self._chainbasecache.clear()
2018 self._chainbasecache.clear()
2013 self._inner.clear_cache()
2019 self._inner.clear_cache()
2014 self._pcache = {}
2020 self._pcache = {}
2015 self._nodemap_docket = None
2021 self._nodemap_docket = None
2016 self.index.clearcaches()
2022 self.index.clearcaches()
2017 # The python code is the one responsible for validating the docket, we
2023 # The python code is the one responsible for validating the docket, we
2018 # end up having to refresh it here.
2024 # end up having to refresh it here.
2019 use_nodemap = (
2025 use_nodemap = (
2020 not self._inline
2026 not self._inline
2021 and self._nodemap_file is not None
2027 and self._nodemap_file is not None
2022 and hasattr(self.index, 'update_nodemap_data')
2028 and hasattr(self.index, 'update_nodemap_data')
2023 )
2029 )
2024 if use_nodemap:
2030 if use_nodemap:
2025 nodemap_data = nodemaputil.persisted_data(self)
2031 nodemap_data = nodemaputil.persisted_data(self)
2026 if nodemap_data is not None:
2032 if nodemap_data is not None:
2027 self._nodemap_docket = nodemap_data[0]
2033 self._nodemap_docket = nodemap_data[0]
2028 self.index.update_nodemap_data(*nodemap_data)
2034 self.index.update_nodemap_data(*nodemap_data)
2029
2035
2030 def rev(self, node):
2036 def rev(self, node):
2031 """return the revision number associated with a <nodeid>"""
2037 """return the revision number associated with a <nodeid>"""
2032 try:
2038 try:
2033 return self.index.rev(node)
2039 return self.index.rev(node)
2034 except TypeError:
2040 except TypeError:
2035 raise
2041 raise
2036 except error.RevlogError:
2042 except error.RevlogError:
2037 # parsers.c radix tree lookup failed
2043 # parsers.c radix tree lookup failed
2038 if (
2044 if (
2039 node == self.nodeconstants.wdirid
2045 node == self.nodeconstants.wdirid
2040 or node in self.nodeconstants.wdirfilenodeids
2046 or node in self.nodeconstants.wdirfilenodeids
2041 ):
2047 ):
2042 raise error.WdirUnsupported
2048 raise error.WdirUnsupported
2043 raise error.LookupError(node, self.display_id, _(b'no node'))
2049 raise error.LookupError(node, self.display_id, _(b'no node'))
2044
2050
2045 # Accessors for index entries.
2051 # Accessors for index entries.
2046
2052
2047 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
2053 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
2048 # are flags.
2054 # are flags.
2049 def start(self, rev):
2055 def start(self, rev):
2050 return int(self.index[rev][0] >> 16)
2056 return int(self.index[rev][0] >> 16)
2051
2057
2052 def sidedata_cut_off(self, rev):
2058 def sidedata_cut_off(self, rev):
2053 sd_cut_off = self.index[rev][8]
2059 sd_cut_off = self.index[rev][8]
2054 if sd_cut_off != 0:
2060 if sd_cut_off != 0:
2055 return sd_cut_off
2061 return sd_cut_off
2056 # This is some annoying dance, because entries without sidedata
2062 # This is some annoying dance, because entries without sidedata
2057 # currently use 0 as their ofsset. (instead of previous-offset +
2063 # currently use 0 as their ofsset. (instead of previous-offset +
2058 # previous-size)
2064 # previous-size)
2059 #
2065 #
2060 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
2066 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
2061 # In the meantime, we need this.
2067 # In the meantime, we need this.
2062 while 0 <= rev:
2068 while 0 <= rev:
2063 e = self.index[rev]
2069 e = self.index[rev]
2064 if e[9] != 0:
2070 if e[9] != 0:
2065 return e[8] + e[9]
2071 return e[8] + e[9]
2066 rev -= 1
2072 rev -= 1
2067 return 0
2073 return 0
2068
2074
2069 def flags(self, rev):
2075 def flags(self, rev):
2070 return self.index[rev][0] & 0xFFFF
2076 return self.index[rev][0] & 0xFFFF
2071
2077
2072 def length(self, rev):
2078 def length(self, rev):
2073 return self.index[rev][1]
2079 return self.index[rev][1]
2074
2080
2075 def sidedata_length(self, rev):
2081 def sidedata_length(self, rev):
2076 if not self.feature_config.has_side_data:
2082 if not self.feature_config.has_side_data:
2077 return 0
2083 return 0
2078 return self.index[rev][9]
2084 return self.index[rev][9]
2079
2085
2080 def rawsize(self, rev):
2086 def rawsize(self, rev):
2081 """return the length of the uncompressed text for a given revision"""
2087 """return the length of the uncompressed text for a given revision"""
2082 l = self.index[rev][2]
2088 l = self.index[rev][2]
2083 if l >= 0:
2089 if l >= 0:
2084 return l
2090 return l
2085
2091
2086 t = self.rawdata(rev)
2092 t = self.rawdata(rev)
2087 return len(t)
2093 return len(t)
2088
2094
2089 def size(self, rev):
2095 def size(self, rev):
2090 """length of non-raw text (processed by a "read" flag processor)"""
2096 """length of non-raw text (processed by a "read" flag processor)"""
2091 # fast path: if no "read" flag processor could change the content,
2097 # fast path: if no "read" flag processor could change the content,
2092 # size is rawsize. note: ELLIPSIS is known to not change the content.
2098 # size is rawsize. note: ELLIPSIS is known to not change the content.
2093 flags = self.flags(rev)
2099 flags = self.flags(rev)
2094 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
2100 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
2095 return self.rawsize(rev)
2101 return self.rawsize(rev)
2096
2102
2097 return len(self.revision(rev))
2103 return len(self.revision(rev))
2098
2104
2099 def fast_rank(self, rev):
2105 def fast_rank(self, rev):
2100 """Return the rank of a revision if already known, or None otherwise.
2106 """Return the rank of a revision if already known, or None otherwise.
2101
2107
2102 The rank of a revision is the size of the sub-graph it defines as a
2108 The rank of a revision is the size of the sub-graph it defines as a
2103 head. Equivalently, the rank of a revision `r` is the size of the set
2109 head. Equivalently, the rank of a revision `r` is the size of the set
2104 `ancestors(r)`, `r` included.
2110 `ancestors(r)`, `r` included.
2105
2111
2106 This method returns the rank retrieved from the revlog in constant
2112 This method returns the rank retrieved from the revlog in constant
2107 time. It makes no attempt at computing unknown values for versions of
2113 time. It makes no attempt at computing unknown values for versions of
2108 the revlog which do not persist the rank.
2114 the revlog which do not persist the rank.
2109 """
2115 """
2110 rank = self.index[rev][ENTRY_RANK]
2116 rank = self.index[rev][ENTRY_RANK]
2111 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
2117 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
2112 return None
2118 return None
2113 if rev == nullrev:
2119 if rev == nullrev:
2114 return 0 # convention
2120 return 0 # convention
2115 return rank
2121 return rank
2116
2122
2117 def chainbase(self, rev):
2123 def chainbase(self, rev):
2118 base = self._chainbasecache.get(rev)
2124 base = self._chainbasecache.get(rev)
2119 if base is not None:
2125 if base is not None:
2120 return base
2126 return base
2121
2127
2122 index = self.index
2128 index = self.index
2123 iterrev = rev
2129 iterrev = rev
2124 base = index[iterrev][3]
2130 base = index[iterrev][3]
2125 while base != iterrev:
2131 while base != iterrev:
2126 iterrev = base
2132 iterrev = base
2127 base = index[iterrev][3]
2133 base = index[iterrev][3]
2128
2134
2129 self._chainbasecache[rev] = base
2135 self._chainbasecache[rev] = base
2130 return base
2136 return base
2131
2137
2132 def linkrev(self, rev):
2138 def linkrev(self, rev):
2133 return self.index[rev][4]
2139 return self.index[rev][4]
2134
2140
2135 def parentrevs(self, rev):
2141 def parentrevs(self, rev):
2136 try:
2142 try:
2137 entry = self.index[rev]
2143 entry = self.index[rev]
2138 except IndexError:
2144 except IndexError:
2139 if rev == wdirrev:
2145 if rev == wdirrev:
2140 raise error.WdirUnsupported
2146 raise error.WdirUnsupported
2141 raise
2147 raise
2142
2148
2143 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
2149 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
2144 return entry[6], entry[5]
2150 return entry[6], entry[5]
2145 else:
2151 else:
2146 return entry[5], entry[6]
2152 return entry[5], entry[6]
2147
2153
2148 # fast parentrevs(rev) where rev isn't filtered
2154 # fast parentrevs(rev) where rev isn't filtered
2149 _uncheckedparentrevs = parentrevs
2155 _uncheckedparentrevs = parentrevs
2150
2156
2151 def node(self, rev):
2157 def node(self, rev):
2152 try:
2158 try:
2153 return self.index[rev][7]
2159 return self.index[rev][7]
2154 except IndexError:
2160 except IndexError:
2155 if rev == wdirrev:
2161 if rev == wdirrev:
2156 raise error.WdirUnsupported
2162 raise error.WdirUnsupported
2157 raise
2163 raise
2158
2164
2159 # Derived from index values.
2165 # Derived from index values.
2160
2166
2161 def end(self, rev):
2167 def end(self, rev):
2162 return self.start(rev) + self.length(rev)
2168 return self.start(rev) + self.length(rev)
2163
2169
2164 def parents(self, node):
2170 def parents(self, node):
2165 i = self.index
2171 i = self.index
2166 d = i[self.rev(node)]
2172 d = i[self.rev(node)]
2167 # inline node() to avoid function call overhead
2173 # inline node() to avoid function call overhead
2168 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2174 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2169 return i[d[6]][7], i[d[5]][7]
2175 return i[d[6]][7], i[d[5]][7]
2170 else:
2176 else:
2171 return i[d[5]][7], i[d[6]][7]
2177 return i[d[5]][7], i[d[6]][7]
2172
2178
2173 def chainlen(self, rev):
2179 def chainlen(self, rev):
2174 return self._chaininfo(rev)[0]
2180 return self._chaininfo(rev)[0]
2175
2181
2176 def _chaininfo(self, rev):
2182 def _chaininfo(self, rev):
2177 chaininfocache = self._chaininfocache
2183 chaininfocache = self._chaininfocache
2178 if rev in chaininfocache:
2184 if rev in chaininfocache:
2179 return chaininfocache[rev]
2185 return chaininfocache[rev]
2180 index = self.index
2186 index = self.index
2181 generaldelta = self.delta_config.general_delta
2187 generaldelta = self.delta_config.general_delta
2182 iterrev = rev
2188 iterrev = rev
2183 e = index[iterrev]
2189 e = index[iterrev]
2184 clen = 0
2190 clen = 0
2185 compresseddeltalen = 0
2191 compresseddeltalen = 0
2186 while iterrev != e[3]:
2192 while iterrev != e[3]:
2187 clen += 1
2193 clen += 1
2188 compresseddeltalen += e[1]
2194 compresseddeltalen += e[1]
2189 if generaldelta:
2195 if generaldelta:
2190 iterrev = e[3]
2196 iterrev = e[3]
2191 else:
2197 else:
2192 iterrev -= 1
2198 iterrev -= 1
2193 if iterrev in chaininfocache:
2199 if iterrev in chaininfocache:
2194 t = chaininfocache[iterrev]
2200 t = chaininfocache[iterrev]
2195 clen += t[0]
2201 clen += t[0]
2196 compresseddeltalen += t[1]
2202 compresseddeltalen += t[1]
2197 break
2203 break
2198 e = index[iterrev]
2204 e = index[iterrev]
2199 else:
2205 else:
2200 # Add text length of base since decompressing that also takes
2206 # Add text length of base since decompressing that also takes
2201 # work. For cache hits the length is already included.
2207 # work. For cache hits the length is already included.
2202 compresseddeltalen += e[1]
2208 compresseddeltalen += e[1]
2203 r = (clen, compresseddeltalen)
2209 r = (clen, compresseddeltalen)
2204 chaininfocache[rev] = r
2210 chaininfocache[rev] = r
2205 return r
2211 return r
2206
2212
2207 def _deltachain(self, rev, stoprev=None):
2213 def _deltachain(self, rev, stoprev=None):
2208 return self._inner._deltachain(rev, stoprev=stoprev)
2214 return self._inner._deltachain(rev, stoprev=stoprev)
2209
2215
2210 def ancestors(self, revs, stoprev=0, inclusive=False):
2216 def ancestors(self, revs, stoprev=0, inclusive=False):
2211 """Generate the ancestors of 'revs' in reverse revision order.
2217 """Generate the ancestors of 'revs' in reverse revision order.
2212 Does not generate revs lower than stoprev.
2218 Does not generate revs lower than stoprev.
2213
2219
2214 See the documentation for ancestor.lazyancestors for more details."""
2220 See the documentation for ancestor.lazyancestors for more details."""
2215
2221
2216 # first, make sure start revisions aren't filtered
2222 # first, make sure start revisions aren't filtered
2217 revs = list(revs)
2223 revs = list(revs)
2218 checkrev = self.node
2224 checkrev = self.node
2219 for r in revs:
2225 for r in revs:
2220 checkrev(r)
2226 checkrev(r)
2221 # and we're sure ancestors aren't filtered as well
2227 # and we're sure ancestors aren't filtered as well
2222
2228
2223 if rustancestor is not None and self.index.rust_ext_compat:
2229 if rustancestor is not None and self.index.rust_ext_compat:
2224 lazyancestors = rustancestor.LazyAncestors
2230 lazyancestors = rustancestor.LazyAncestors
2225 arg = self.index
2231 arg = self.index
2226 else:
2232 else:
2227 lazyancestors = ancestor.lazyancestors
2233 lazyancestors = ancestor.lazyancestors
2228 arg = self._uncheckedparentrevs
2234 arg = self._uncheckedparentrevs
2229 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2235 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2230
2236
2231 def descendants(self, revs):
2237 def descendants(self, revs):
2232 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2238 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2233
2239
2234 def findcommonmissing(self, common=None, heads=None):
2240 def findcommonmissing(self, common=None, heads=None):
2235 """Return a tuple of the ancestors of common and the ancestors of heads
2241 """Return a tuple of the ancestors of common and the ancestors of heads
2236 that are not ancestors of common. In revset terminology, we return the
2242 that are not ancestors of common. In revset terminology, we return the
2237 tuple:
2243 tuple:
2238
2244
2239 ::common, (::heads) - (::common)
2245 ::common, (::heads) - (::common)
2240
2246
2241 The list is sorted by revision number, meaning it is
2247 The list is sorted by revision number, meaning it is
2242 topologically sorted.
2248 topologically sorted.
2243
2249
2244 'heads' and 'common' are both lists of node IDs. If heads is
2250 'heads' and 'common' are both lists of node IDs. If heads is
2245 not supplied, uses all of the revlog's heads. If common is not
2251 not supplied, uses all of the revlog's heads. If common is not
2246 supplied, uses nullid."""
2252 supplied, uses nullid."""
2247 if common is None:
2253 if common is None:
2248 common = [self.nullid]
2254 common = [self.nullid]
2249 if heads is None:
2255 if heads is None:
2250 heads = self.heads()
2256 heads = self.heads()
2251
2257
2252 common = [self.rev(n) for n in common]
2258 common = [self.rev(n) for n in common]
2253 heads = [self.rev(n) for n in heads]
2259 heads = [self.rev(n) for n in heads]
2254
2260
2255 # we want the ancestors, but inclusive
2261 # we want the ancestors, but inclusive
2256 class lazyset:
2262 class lazyset:
2257 def __init__(self, lazyvalues):
2263 def __init__(self, lazyvalues):
2258 self.addedvalues = set()
2264 self.addedvalues = set()
2259 self.lazyvalues = lazyvalues
2265 self.lazyvalues = lazyvalues
2260
2266
2261 def __contains__(self, value):
2267 def __contains__(self, value):
2262 return value in self.addedvalues or value in self.lazyvalues
2268 return value in self.addedvalues or value in self.lazyvalues
2263
2269
2264 def __iter__(self):
2270 def __iter__(self):
2265 added = self.addedvalues
2271 added = self.addedvalues
2266 for r in added:
2272 for r in added:
2267 yield r
2273 yield r
2268 for r in self.lazyvalues:
2274 for r in self.lazyvalues:
2269 if not r in added:
2275 if not r in added:
2270 yield r
2276 yield r
2271
2277
2272 def add(self, value):
2278 def add(self, value):
2273 self.addedvalues.add(value)
2279 self.addedvalues.add(value)
2274
2280
2275 def update(self, values):
2281 def update(self, values):
2276 self.addedvalues.update(values)
2282 self.addedvalues.update(values)
2277
2283
2278 has = lazyset(self.ancestors(common))
2284 has = lazyset(self.ancestors(common))
2279 has.add(nullrev)
2285 has.add(nullrev)
2280 has.update(common)
2286 has.update(common)
2281
2287
2282 # take all ancestors from heads that aren't in has
2288 # take all ancestors from heads that aren't in has
2283 missing = set()
2289 missing = set()
2284 visit = collections.deque(r for r in heads if r not in has)
2290 visit = collections.deque(r for r in heads if r not in has)
2285 while visit:
2291 while visit:
2286 r = visit.popleft()
2292 r = visit.popleft()
2287 if r in missing:
2293 if r in missing:
2288 continue
2294 continue
2289 else:
2295 else:
2290 missing.add(r)
2296 missing.add(r)
2291 for p in self.parentrevs(r):
2297 for p in self.parentrevs(r):
2292 if p not in has:
2298 if p not in has:
2293 visit.append(p)
2299 visit.append(p)
2294 missing = list(missing)
2300 missing = list(missing)
2295 missing.sort()
2301 missing.sort()
2296 return has, [self.node(miss) for miss in missing]
2302 return has, [self.node(miss) for miss in missing]
2297
2303
2298 def incrementalmissingrevs(self, common=None):
2304 def incrementalmissingrevs(self, common=None):
2299 """Return an object that can be used to incrementally compute the
2305 """Return an object that can be used to incrementally compute the
2300 revision numbers of the ancestors of arbitrary sets that are not
2306 revision numbers of the ancestors of arbitrary sets that are not
2301 ancestors of common. This is an ancestor.incrementalmissingancestors
2307 ancestors of common. This is an ancestor.incrementalmissingancestors
2302 object.
2308 object.
2303
2309
2304 'common' is a list of revision numbers. If common is not supplied, uses
2310 'common' is a list of revision numbers. If common is not supplied, uses
2305 nullrev.
2311 nullrev.
2306 """
2312 """
2307 if common is None:
2313 if common is None:
2308 common = [nullrev]
2314 common = [nullrev]
2309
2315
2310 if rustancestor is not None and self.index.rust_ext_compat:
2316 if rustancestor is not None and self.index.rust_ext_compat:
2311 return rustancestor.MissingAncestors(self.index, common)
2317 return rustancestor.MissingAncestors(self.index, common)
2312 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2318 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2313
2319
2314 def findmissingrevs(self, common=None, heads=None):
2320 def findmissingrevs(self, common=None, heads=None):
2315 """Return the revision numbers of the ancestors of heads that
2321 """Return the revision numbers of the ancestors of heads that
2316 are not ancestors of common.
2322 are not ancestors of common.
2317
2323
2318 More specifically, return a list of revision numbers corresponding to
2324 More specifically, return a list of revision numbers corresponding to
2319 nodes N such that every N satisfies the following constraints:
2325 nodes N such that every N satisfies the following constraints:
2320
2326
2321 1. N is an ancestor of some node in 'heads'
2327 1. N is an ancestor of some node in 'heads'
2322 2. N is not an ancestor of any node in 'common'
2328 2. N is not an ancestor of any node in 'common'
2323
2329
2324 The list is sorted by revision number, meaning it is
2330 The list is sorted by revision number, meaning it is
2325 topologically sorted.
2331 topologically sorted.
2326
2332
2327 'heads' and 'common' are both lists of revision numbers. If heads is
2333 'heads' and 'common' are both lists of revision numbers. If heads is
2328 not supplied, uses all of the revlog's heads. If common is not
2334 not supplied, uses all of the revlog's heads. If common is not
2329 supplied, uses nullid."""
2335 supplied, uses nullid."""
2330 if common is None:
2336 if common is None:
2331 common = [nullrev]
2337 common = [nullrev]
2332 if heads is None:
2338 if heads is None:
2333 heads = self.headrevs()
2339 heads = self.headrevs()
2334
2340
2335 inc = self.incrementalmissingrevs(common=common)
2341 inc = self.incrementalmissingrevs(common=common)
2336 return inc.missingancestors(heads)
2342 return inc.missingancestors(heads)
2337
2343
2338 def findmissing(self, common=None, heads=None):
2344 def findmissing(self, common=None, heads=None):
2339 """Return the ancestors of heads that are not ancestors of common.
2345 """Return the ancestors of heads that are not ancestors of common.
2340
2346
2341 More specifically, return a list of nodes N such that every N
2347 More specifically, return a list of nodes N such that every N
2342 satisfies the following constraints:
2348 satisfies the following constraints:
2343
2349
2344 1. N is an ancestor of some node in 'heads'
2350 1. N is an ancestor of some node in 'heads'
2345 2. N is not an ancestor of any node in 'common'
2351 2. N is not an ancestor of any node in 'common'
2346
2352
2347 The list is sorted by revision number, meaning it is
2353 The list is sorted by revision number, meaning it is
2348 topologically sorted.
2354 topologically sorted.
2349
2355
2350 'heads' and 'common' are both lists of node IDs. If heads is
2356 'heads' and 'common' are both lists of node IDs. If heads is
2351 not supplied, uses all of the revlog's heads. If common is not
2357 not supplied, uses all of the revlog's heads. If common is not
2352 supplied, uses nullid."""
2358 supplied, uses nullid."""
2353 if common is None:
2359 if common is None:
2354 common = [self.nullid]
2360 common = [self.nullid]
2355 if heads is None:
2361 if heads is None:
2356 heads = self.heads()
2362 heads = self.heads()
2357
2363
2358 common = [self.rev(n) for n in common]
2364 common = [self.rev(n) for n in common]
2359 heads = [self.rev(n) for n in heads]
2365 heads = [self.rev(n) for n in heads]
2360
2366
2361 inc = self.incrementalmissingrevs(common=common)
2367 inc = self.incrementalmissingrevs(common=common)
2362 return [self.node(r) for r in inc.missingancestors(heads)]
2368 return [self.node(r) for r in inc.missingancestors(heads)]
2363
2369
2364 def nodesbetween(self, roots=None, heads=None):
2370 def nodesbetween(self, roots=None, heads=None):
2365 """Return a topological path from 'roots' to 'heads'.
2371 """Return a topological path from 'roots' to 'heads'.
2366
2372
2367 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2373 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2368 topologically sorted list of all nodes N that satisfy both of
2374 topologically sorted list of all nodes N that satisfy both of
2369 these constraints:
2375 these constraints:
2370
2376
2371 1. N is a descendant of some node in 'roots'
2377 1. N is a descendant of some node in 'roots'
2372 2. N is an ancestor of some node in 'heads'
2378 2. N is an ancestor of some node in 'heads'
2373
2379
2374 Every node is considered to be both a descendant and an ancestor
2380 Every node is considered to be both a descendant and an ancestor
2375 of itself, so every reachable node in 'roots' and 'heads' will be
2381 of itself, so every reachable node in 'roots' and 'heads' will be
2376 included in 'nodes'.
2382 included in 'nodes'.
2377
2383
2378 'outroots' is the list of reachable nodes in 'roots', i.e., the
2384 'outroots' is the list of reachable nodes in 'roots', i.e., the
2379 subset of 'roots' that is returned in 'nodes'. Likewise,
2385 subset of 'roots' that is returned in 'nodes'. Likewise,
2380 'outheads' is the subset of 'heads' that is also in 'nodes'.
2386 'outheads' is the subset of 'heads' that is also in 'nodes'.
2381
2387
2382 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2388 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2383 unspecified, uses nullid as the only root. If 'heads' is
2389 unspecified, uses nullid as the only root. If 'heads' is
2384 unspecified, uses list of all of the revlog's heads."""
2390 unspecified, uses list of all of the revlog's heads."""
2385 nonodes = ([], [], [])
2391 nonodes = ([], [], [])
2386 if roots is not None:
2392 if roots is not None:
2387 roots = list(roots)
2393 roots = list(roots)
2388 if not roots:
2394 if not roots:
2389 return nonodes
2395 return nonodes
2390 lowestrev = min([self.rev(n) for n in roots])
2396 lowestrev = min([self.rev(n) for n in roots])
2391 else:
2397 else:
2392 roots = [self.nullid] # Everybody's a descendant of nullid
2398 roots = [self.nullid] # Everybody's a descendant of nullid
2393 lowestrev = nullrev
2399 lowestrev = nullrev
2394 if (lowestrev == nullrev) and (heads is None):
2400 if (lowestrev == nullrev) and (heads is None):
2395 # We want _all_ the nodes!
2401 # We want _all_ the nodes!
2396 return (
2402 return (
2397 [self.node(r) for r in self],
2403 [self.node(r) for r in self],
2398 [self.nullid],
2404 [self.nullid],
2399 list(self.heads()),
2405 list(self.heads()),
2400 )
2406 )
2401 if heads is None:
2407 if heads is None:
2402 # All nodes are ancestors, so the latest ancestor is the last
2408 # All nodes are ancestors, so the latest ancestor is the last
2403 # node.
2409 # node.
2404 highestrev = len(self) - 1
2410 highestrev = len(self) - 1
2405 # Set ancestors to None to signal that every node is an ancestor.
2411 # Set ancestors to None to signal that every node is an ancestor.
2406 ancestors = None
2412 ancestors = None
2407 # Set heads to an empty dictionary for later discovery of heads
2413 # Set heads to an empty dictionary for later discovery of heads
2408 heads = {}
2414 heads = {}
2409 else:
2415 else:
2410 heads = list(heads)
2416 heads = list(heads)
2411 if not heads:
2417 if not heads:
2412 return nonodes
2418 return nonodes
2413 ancestors = set()
2419 ancestors = set()
2414 # Turn heads into a dictionary so we can remove 'fake' heads.
2420 # Turn heads into a dictionary so we can remove 'fake' heads.
2415 # Also, later we will be using it to filter out the heads we can't
2421 # Also, later we will be using it to filter out the heads we can't
2416 # find from roots.
2422 # find from roots.
2417 heads = dict.fromkeys(heads, False)
2423 heads = dict.fromkeys(heads, False)
2418 # Start at the top and keep marking parents until we're done.
2424 # Start at the top and keep marking parents until we're done.
2419 nodestotag = set(heads)
2425 nodestotag = set(heads)
2420 # Remember where the top was so we can use it as a limit later.
2426 # Remember where the top was so we can use it as a limit later.
2421 highestrev = max([self.rev(n) for n in nodestotag])
2427 highestrev = max([self.rev(n) for n in nodestotag])
2422 while nodestotag:
2428 while nodestotag:
2423 # grab a node to tag
2429 # grab a node to tag
2424 n = nodestotag.pop()
2430 n = nodestotag.pop()
2425 # Never tag nullid
2431 # Never tag nullid
2426 if n == self.nullid:
2432 if n == self.nullid:
2427 continue
2433 continue
2428 # A node's revision number represents its place in a
2434 # A node's revision number represents its place in a
2429 # topologically sorted list of nodes.
2435 # topologically sorted list of nodes.
2430 r = self.rev(n)
2436 r = self.rev(n)
2431 if r >= lowestrev:
2437 if r >= lowestrev:
2432 if n not in ancestors:
2438 if n not in ancestors:
2433 # If we are possibly a descendant of one of the roots
2439 # If we are possibly a descendant of one of the roots
2434 # and we haven't already been marked as an ancestor
2440 # and we haven't already been marked as an ancestor
2435 ancestors.add(n) # Mark as ancestor
2441 ancestors.add(n) # Mark as ancestor
2436 # Add non-nullid parents to list of nodes to tag.
2442 # Add non-nullid parents to list of nodes to tag.
2437 nodestotag.update(
2443 nodestotag.update(
2438 [p for p in self.parents(n) if p != self.nullid]
2444 [p for p in self.parents(n) if p != self.nullid]
2439 )
2445 )
2440 elif n in heads: # We've seen it before, is it a fake head?
2446 elif n in heads: # We've seen it before, is it a fake head?
2441 # So it is, real heads should not be the ancestors of
2447 # So it is, real heads should not be the ancestors of
2442 # any other heads.
2448 # any other heads.
2443 heads.pop(n)
2449 heads.pop(n)
2444 if not ancestors:
2450 if not ancestors:
2445 return nonodes
2451 return nonodes
2446 # Now that we have our set of ancestors, we want to remove any
2452 # Now that we have our set of ancestors, we want to remove any
2447 # roots that are not ancestors.
2453 # roots that are not ancestors.
2448
2454
2449 # If one of the roots was nullid, everything is included anyway.
2455 # If one of the roots was nullid, everything is included anyway.
2450 if lowestrev > nullrev:
2456 if lowestrev > nullrev:
2451 # But, since we weren't, let's recompute the lowest rev to not
2457 # But, since we weren't, let's recompute the lowest rev to not
2452 # include roots that aren't ancestors.
2458 # include roots that aren't ancestors.
2453
2459
2454 # Filter out roots that aren't ancestors of heads
2460 # Filter out roots that aren't ancestors of heads
2455 roots = [root for root in roots if root in ancestors]
2461 roots = [root for root in roots if root in ancestors]
2456 # Recompute the lowest revision
2462 # Recompute the lowest revision
2457 if roots:
2463 if roots:
2458 lowestrev = min([self.rev(root) for root in roots])
2464 lowestrev = min([self.rev(root) for root in roots])
2459 else:
2465 else:
2460 # No more roots? Return empty list
2466 # No more roots? Return empty list
2461 return nonodes
2467 return nonodes
2462 else:
2468 else:
2463 # We are descending from nullid, and don't need to care about
2469 # We are descending from nullid, and don't need to care about
2464 # any other roots.
2470 # any other roots.
2465 lowestrev = nullrev
2471 lowestrev = nullrev
2466 roots = [self.nullid]
2472 roots = [self.nullid]
2467 # Transform our roots list into a set.
2473 # Transform our roots list into a set.
2468 descendants = set(roots)
2474 descendants = set(roots)
2469 # Also, keep the original roots so we can filter out roots that aren't
2475 # Also, keep the original roots so we can filter out roots that aren't
2470 # 'real' roots (i.e. are descended from other roots).
2476 # 'real' roots (i.e. are descended from other roots).
2471 roots = descendants.copy()
2477 roots = descendants.copy()
2472 # Our topologically sorted list of output nodes.
2478 # Our topologically sorted list of output nodes.
2473 orderedout = []
2479 orderedout = []
2474 # Don't start at nullid since we don't want nullid in our output list,
2480 # Don't start at nullid since we don't want nullid in our output list,
2475 # and if nullid shows up in descendants, empty parents will look like
2481 # and if nullid shows up in descendants, empty parents will look like
2476 # they're descendants.
2482 # they're descendants.
2477 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2483 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2478 n = self.node(r)
2484 n = self.node(r)
2479 isdescendant = False
2485 isdescendant = False
2480 if lowestrev == nullrev: # Everybody is a descendant of nullid
2486 if lowestrev == nullrev: # Everybody is a descendant of nullid
2481 isdescendant = True
2487 isdescendant = True
2482 elif n in descendants:
2488 elif n in descendants:
2483 # n is already a descendant
2489 # n is already a descendant
2484 isdescendant = True
2490 isdescendant = True
2485 # This check only needs to be done here because all the roots
2491 # This check only needs to be done here because all the roots
2486 # will start being marked is descendants before the loop.
2492 # will start being marked is descendants before the loop.
2487 if n in roots:
2493 if n in roots:
2488 # If n was a root, check if it's a 'real' root.
2494 # If n was a root, check if it's a 'real' root.
2489 p = tuple(self.parents(n))
2495 p = tuple(self.parents(n))
2490 # If any of its parents are descendants, it's not a root.
2496 # If any of its parents are descendants, it's not a root.
2491 if (p[0] in descendants) or (p[1] in descendants):
2497 if (p[0] in descendants) or (p[1] in descendants):
2492 roots.remove(n)
2498 roots.remove(n)
2493 else:
2499 else:
2494 p = tuple(self.parents(n))
2500 p = tuple(self.parents(n))
2495 # A node is a descendant if either of its parents are
2501 # A node is a descendant if either of its parents are
2496 # descendants. (We seeded the dependents list with the roots
2502 # descendants. (We seeded the dependents list with the roots
2497 # up there, remember?)
2503 # up there, remember?)
2498 if (p[0] in descendants) or (p[1] in descendants):
2504 if (p[0] in descendants) or (p[1] in descendants):
2499 descendants.add(n)
2505 descendants.add(n)
2500 isdescendant = True
2506 isdescendant = True
2501 if isdescendant and ((ancestors is None) or (n in ancestors)):
2507 if isdescendant and ((ancestors is None) or (n in ancestors)):
2502 # Only include nodes that are both descendants and ancestors.
2508 # Only include nodes that are both descendants and ancestors.
2503 orderedout.append(n)
2509 orderedout.append(n)
2504 if (ancestors is not None) and (n in heads):
2510 if (ancestors is not None) and (n in heads):
2505 # We're trying to figure out which heads are reachable
2511 # We're trying to figure out which heads are reachable
2506 # from roots.
2512 # from roots.
2507 # Mark this head as having been reached
2513 # Mark this head as having been reached
2508 heads[n] = True
2514 heads[n] = True
2509 elif ancestors is None:
2515 elif ancestors is None:
2510 # Otherwise, we're trying to discover the heads.
2516 # Otherwise, we're trying to discover the heads.
2511 # Assume this is a head because if it isn't, the next step
2517 # Assume this is a head because if it isn't, the next step
2512 # will eventually remove it.
2518 # will eventually remove it.
2513 heads[n] = True
2519 heads[n] = True
2514 # But, obviously its parents aren't.
2520 # But, obviously its parents aren't.
2515 for p in self.parents(n):
2521 for p in self.parents(n):
2516 heads.pop(p, None)
2522 heads.pop(p, None)
2517 heads = [head for head, flag in heads.items() if flag]
2523 heads = [head for head, flag in heads.items() if flag]
2518 roots = list(roots)
2524 roots = list(roots)
2519 assert orderedout
2525 assert orderedout
2520 assert roots
2526 assert roots
2521 assert heads
2527 assert heads
2522 return (orderedout, roots, heads)
2528 return (orderedout, roots, heads)
2523
2529
2524 def headrevs(self, revs=None):
2530 def headrevs(self, revs=None):
2525 if revs is None:
2531 if revs is None:
2526 try:
2532 try:
2527 return self.index.headrevs()
2533 return self.index.headrevs()
2528 except AttributeError:
2534 except AttributeError:
2529 return self._headrevs()
2535 return self._headrevs()
2530 if rustdagop is not None and self.index.rust_ext_compat:
2536 if rustdagop is not None and self.index.rust_ext_compat:
2531 return rustdagop.headrevs(self.index, revs)
2537 return rustdagop.headrevs(self.index, revs)
2532 return dagop.headrevs(revs, self._uncheckedparentrevs)
2538 return dagop.headrevs(revs, self._uncheckedparentrevs)
2533
2539
2534 def computephases(self, roots):
2540 def computephases(self, roots):
2535 return self.index.computephasesmapsets(roots)
2541 return self.index.computephasesmapsets(roots)
2536
2542
2537 def _headrevs(self):
2543 def _headrevs(self):
2538 count = len(self)
2544 count = len(self)
2539 if not count:
2545 if not count:
2540 return [nullrev]
2546 return [nullrev]
2541 # we won't iter over filtered rev so nobody is a head at start
2547 # we won't iter over filtered rev so nobody is a head at start
2542 ishead = [0] * (count + 1)
2548 ishead = [0] * (count + 1)
2543 index = self.index
2549 index = self.index
2544 for r in self:
2550 for r in self:
2545 ishead[r] = 1 # I may be an head
2551 ishead[r] = 1 # I may be an head
2546 e = index[r]
2552 e = index[r]
2547 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2553 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2548 return [r for r, val in enumerate(ishead) if val]
2554 return [r for r, val in enumerate(ishead) if val]
2549
2555
2550 def heads(self, start=None, stop=None):
2556 def heads(self, start=None, stop=None):
2551 """return the list of all nodes that have no children
2557 """return the list of all nodes that have no children
2552
2558
2553 if start is specified, only heads that are descendants of
2559 if start is specified, only heads that are descendants of
2554 start will be returned
2560 start will be returned
2555 if stop is specified, it will consider all the revs from stop
2561 if stop is specified, it will consider all the revs from stop
2556 as if they had no children
2562 as if they had no children
2557 """
2563 """
2558 if start is None and stop is None:
2564 if start is None and stop is None:
2559 if not len(self):
2565 if not len(self):
2560 return [self.nullid]
2566 return [self.nullid]
2561 return [self.node(r) for r in self.headrevs()]
2567 return [self.node(r) for r in self.headrevs()]
2562
2568
2563 if start is None:
2569 if start is None:
2564 start = nullrev
2570 start = nullrev
2565 else:
2571 else:
2566 start = self.rev(start)
2572 start = self.rev(start)
2567
2573
2568 stoprevs = {self.rev(n) for n in stop or []}
2574 stoprevs = {self.rev(n) for n in stop or []}
2569
2575
2570 revs = dagop.headrevssubset(
2576 revs = dagop.headrevssubset(
2571 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2577 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2572 )
2578 )
2573
2579
2574 return [self.node(rev) for rev in revs]
2580 return [self.node(rev) for rev in revs]
2575
2581
2576 def children(self, node):
2582 def children(self, node):
2577 """find the children of a given node"""
2583 """find the children of a given node"""
2578 c = []
2584 c = []
2579 p = self.rev(node)
2585 p = self.rev(node)
2580 for r in self.revs(start=p + 1):
2586 for r in self.revs(start=p + 1):
2581 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2587 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2582 if prevs:
2588 if prevs:
2583 for pr in prevs:
2589 for pr in prevs:
2584 if pr == p:
2590 if pr == p:
2585 c.append(self.node(r))
2591 c.append(self.node(r))
2586 elif p == nullrev:
2592 elif p == nullrev:
2587 c.append(self.node(r))
2593 c.append(self.node(r))
2588 return c
2594 return c
2589
2595
2590 def commonancestorsheads(self, a, b):
2596 def commonancestorsheads(self, a, b):
2591 """calculate all the heads of the common ancestors of nodes a and b"""
2597 """calculate all the heads of the common ancestors of nodes a and b"""
2592 a, b = self.rev(a), self.rev(b)
2598 a, b = self.rev(a), self.rev(b)
2593 ancs = self._commonancestorsheads(a, b)
2599 ancs = self._commonancestorsheads(a, b)
2594 return pycompat.maplist(self.node, ancs)
2600 return pycompat.maplist(self.node, ancs)
2595
2601
2596 def _commonancestorsheads(self, *revs):
2602 def _commonancestorsheads(self, *revs):
2597 """calculate all the heads of the common ancestors of revs"""
2603 """calculate all the heads of the common ancestors of revs"""
2598 try:
2604 try:
2599 ancs = self.index.commonancestorsheads(*revs)
2605 ancs = self.index.commonancestorsheads(*revs)
2600 except (AttributeError, OverflowError): # C implementation failed
2606 except (AttributeError, OverflowError): # C implementation failed
2601 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2607 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2602 return ancs
2608 return ancs
2603
2609
2604 def isancestor(self, a, b):
2610 def isancestor(self, a, b):
2605 """return True if node a is an ancestor of node b
2611 """return True if node a is an ancestor of node b
2606
2612
2607 A revision is considered an ancestor of itself."""
2613 A revision is considered an ancestor of itself."""
2608 a, b = self.rev(a), self.rev(b)
2614 a, b = self.rev(a), self.rev(b)
2609 return self.isancestorrev(a, b)
2615 return self.isancestorrev(a, b)
2610
2616
2611 def isancestorrev(self, a, b):
2617 def isancestorrev(self, a, b):
2612 """return True if revision a is an ancestor of revision b
2618 """return True if revision a is an ancestor of revision b
2613
2619
2614 A revision is considered an ancestor of itself.
2620 A revision is considered an ancestor of itself.
2615
2621
2616 The implementation of this is trivial but the use of
2622 The implementation of this is trivial but the use of
2617 reachableroots is not."""
2623 reachableroots is not."""
2618 if a == nullrev:
2624 if a == nullrev:
2619 return True
2625 return True
2620 elif a == b:
2626 elif a == b:
2621 return True
2627 return True
2622 elif a > b:
2628 elif a > b:
2623 return False
2629 return False
2624 return bool(self.reachableroots(a, [b], [a], includepath=False))
2630 return bool(self.reachableroots(a, [b], [a], includepath=False))
2625
2631
2626 def reachableroots(self, minroot, heads, roots, includepath=False):
2632 def reachableroots(self, minroot, heads, roots, includepath=False):
2627 """return (heads(::(<roots> and <roots>::<heads>)))
2633 """return (heads(::(<roots> and <roots>::<heads>)))
2628
2634
2629 If includepath is True, return (<roots>::<heads>)."""
2635 If includepath is True, return (<roots>::<heads>)."""
2630 try:
2636 try:
2631 return self.index.reachableroots2(
2637 return self.index.reachableroots2(
2632 minroot, heads, roots, includepath
2638 minroot, heads, roots, includepath
2633 )
2639 )
2634 except AttributeError:
2640 except AttributeError:
2635 return dagop._reachablerootspure(
2641 return dagop._reachablerootspure(
2636 self.parentrevs, minroot, roots, heads, includepath
2642 self.parentrevs, minroot, roots, heads, includepath
2637 )
2643 )
2638
2644
2639 def ancestor(self, a, b):
2645 def ancestor(self, a, b):
2640 """calculate the "best" common ancestor of nodes a and b"""
2646 """calculate the "best" common ancestor of nodes a and b"""
2641
2647
2642 a, b = self.rev(a), self.rev(b)
2648 a, b = self.rev(a), self.rev(b)
2643 try:
2649 try:
2644 ancs = self.index.ancestors(a, b)
2650 ancs = self.index.ancestors(a, b)
2645 except (AttributeError, OverflowError):
2651 except (AttributeError, OverflowError):
2646 ancs = ancestor.ancestors(self.parentrevs, a, b)
2652 ancs = ancestor.ancestors(self.parentrevs, a, b)
2647 if ancs:
2653 if ancs:
2648 # choose a consistent winner when there's a tie
2654 # choose a consistent winner when there's a tie
2649 return min(map(self.node, ancs))
2655 return min(map(self.node, ancs))
2650 return self.nullid
2656 return self.nullid
2651
2657
2652 def _match(self, id):
2658 def _match(self, id):
2653 if isinstance(id, int):
2659 if isinstance(id, int):
2654 # rev
2660 # rev
2655 return self.node(id)
2661 return self.node(id)
2656 if len(id) == self.nodeconstants.nodelen:
2662 if len(id) == self.nodeconstants.nodelen:
2657 # possibly a binary node
2663 # possibly a binary node
2658 # odds of a binary node being all hex in ASCII are 1 in 10**25
2664 # odds of a binary node being all hex in ASCII are 1 in 10**25
2659 try:
2665 try:
2660 node = id
2666 node = id
2661 self.rev(node) # quick search the index
2667 self.rev(node) # quick search the index
2662 return node
2668 return node
2663 except error.LookupError:
2669 except error.LookupError:
2664 pass # may be partial hex id
2670 pass # may be partial hex id
2665 try:
2671 try:
2666 # str(rev)
2672 # str(rev)
2667 rev = int(id)
2673 rev = int(id)
2668 if b"%d" % rev != id:
2674 if b"%d" % rev != id:
2669 raise ValueError
2675 raise ValueError
2670 if rev < 0:
2676 if rev < 0:
2671 rev = len(self) + rev
2677 rev = len(self) + rev
2672 if rev < 0 or rev >= len(self):
2678 if rev < 0 or rev >= len(self):
2673 raise ValueError
2679 raise ValueError
2674 return self.node(rev)
2680 return self.node(rev)
2675 except (ValueError, OverflowError):
2681 except (ValueError, OverflowError):
2676 pass
2682 pass
2677 if len(id) == 2 * self.nodeconstants.nodelen:
2683 if len(id) == 2 * self.nodeconstants.nodelen:
2678 try:
2684 try:
2679 # a full hex nodeid?
2685 # a full hex nodeid?
2680 node = bin(id)
2686 node = bin(id)
2681 self.rev(node)
2687 self.rev(node)
2682 return node
2688 return node
2683 except (binascii.Error, error.LookupError):
2689 except (binascii.Error, error.LookupError):
2684 pass
2690 pass
2685
2691
2686 def _partialmatch(self, id):
2692 def _partialmatch(self, id):
2687 # we don't care wdirfilenodeids as they should be always full hash
2693 # we don't care wdirfilenodeids as they should be always full hash
2688 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2694 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2689 ambiguous = False
2695 ambiguous = False
2690 try:
2696 try:
2691 partial = self.index.partialmatch(id)
2697 partial = self.index.partialmatch(id)
2692 if partial and self.hasnode(partial):
2698 if partial and self.hasnode(partial):
2693 if maybewdir:
2699 if maybewdir:
2694 # single 'ff...' match in radix tree, ambiguous with wdir
2700 # single 'ff...' match in radix tree, ambiguous with wdir
2695 ambiguous = True
2701 ambiguous = True
2696 else:
2702 else:
2697 return partial
2703 return partial
2698 elif maybewdir:
2704 elif maybewdir:
2699 # no 'ff...' match in radix tree, wdir identified
2705 # no 'ff...' match in radix tree, wdir identified
2700 raise error.WdirUnsupported
2706 raise error.WdirUnsupported
2701 else:
2707 else:
2702 return None
2708 return None
2703 except error.RevlogError:
2709 except error.RevlogError:
2704 # parsers.c radix tree lookup gave multiple matches
2710 # parsers.c radix tree lookup gave multiple matches
2705 # fast path: for unfiltered changelog, radix tree is accurate
2711 # fast path: for unfiltered changelog, radix tree is accurate
2706 if not getattr(self, 'filteredrevs', None):
2712 if not getattr(self, 'filteredrevs', None):
2707 ambiguous = True
2713 ambiguous = True
2708 # fall through to slow path that filters hidden revisions
2714 # fall through to slow path that filters hidden revisions
2709 except (AttributeError, ValueError):
2715 except (AttributeError, ValueError):
2710 # we are pure python, or key is not hex
2716 # we are pure python, or key is not hex
2711 pass
2717 pass
2712 if ambiguous:
2718 if ambiguous:
2713 raise error.AmbiguousPrefixLookupError(
2719 raise error.AmbiguousPrefixLookupError(
2714 id, self.display_id, _(b'ambiguous identifier')
2720 id, self.display_id, _(b'ambiguous identifier')
2715 )
2721 )
2716
2722
2717 if id in self._pcache:
2723 if id in self._pcache:
2718 return self._pcache[id]
2724 return self._pcache[id]
2719
2725
2720 if len(id) <= 40:
2726 if len(id) <= 40:
2721 # hex(node)[:...]
2727 # hex(node)[:...]
2722 l = len(id) // 2 * 2 # grab an even number of digits
2728 l = len(id) // 2 * 2 # grab an even number of digits
2723 try:
2729 try:
2724 # we're dropping the last digit, so let's check that it's hex,
2730 # we're dropping the last digit, so let's check that it's hex,
2725 # to avoid the expensive computation below if it's not
2731 # to avoid the expensive computation below if it's not
2726 if len(id) % 2 > 0:
2732 if len(id) % 2 > 0:
2727 if not (id[-1] in hexdigits):
2733 if not (id[-1] in hexdigits):
2728 return None
2734 return None
2729 prefix = bin(id[:l])
2735 prefix = bin(id[:l])
2730 except binascii.Error:
2736 except binascii.Error:
2731 pass
2737 pass
2732 else:
2738 else:
2733 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2739 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2734 nl = [
2740 nl = [
2735 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2741 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2736 ]
2742 ]
2737 if self.nodeconstants.nullhex.startswith(id):
2743 if self.nodeconstants.nullhex.startswith(id):
2738 nl.append(self.nullid)
2744 nl.append(self.nullid)
2739 if len(nl) > 0:
2745 if len(nl) > 0:
2740 if len(nl) == 1 and not maybewdir:
2746 if len(nl) == 1 and not maybewdir:
2741 self._pcache[id] = nl[0]
2747 self._pcache[id] = nl[0]
2742 return nl[0]
2748 return nl[0]
2743 raise error.AmbiguousPrefixLookupError(
2749 raise error.AmbiguousPrefixLookupError(
2744 id, self.display_id, _(b'ambiguous identifier')
2750 id, self.display_id, _(b'ambiguous identifier')
2745 )
2751 )
2746 if maybewdir:
2752 if maybewdir:
2747 raise error.WdirUnsupported
2753 raise error.WdirUnsupported
2748 return None
2754 return None
2749
2755
2750 def lookup(self, id):
2756 def lookup(self, id):
2751 """locate a node based on:
2757 """locate a node based on:
2752 - revision number or str(revision number)
2758 - revision number or str(revision number)
2753 - nodeid or subset of hex nodeid
2759 - nodeid or subset of hex nodeid
2754 """
2760 """
2755 n = self._match(id)
2761 n = self._match(id)
2756 if n is not None:
2762 if n is not None:
2757 return n
2763 return n
2758 n = self._partialmatch(id)
2764 n = self._partialmatch(id)
2759 if n:
2765 if n:
2760 return n
2766 return n
2761
2767
2762 raise error.LookupError(id, self.display_id, _(b'no match found'))
2768 raise error.LookupError(id, self.display_id, _(b'no match found'))
2763
2769
2764 def shortest(self, node, minlength=1):
2770 def shortest(self, node, minlength=1):
2765 """Find the shortest unambiguous prefix that matches node."""
2771 """Find the shortest unambiguous prefix that matches node."""
2766
2772
2767 def isvalid(prefix):
2773 def isvalid(prefix):
2768 try:
2774 try:
2769 matchednode = self._partialmatch(prefix)
2775 matchednode = self._partialmatch(prefix)
2770 except error.AmbiguousPrefixLookupError:
2776 except error.AmbiguousPrefixLookupError:
2771 return False
2777 return False
2772 except error.WdirUnsupported:
2778 except error.WdirUnsupported:
2773 # single 'ff...' match
2779 # single 'ff...' match
2774 return True
2780 return True
2775 if matchednode is None:
2781 if matchednode is None:
2776 raise error.LookupError(node, self.display_id, _(b'no node'))
2782 raise error.LookupError(node, self.display_id, _(b'no node'))
2777 return True
2783 return True
2778
2784
2779 def maybewdir(prefix):
2785 def maybewdir(prefix):
2780 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2786 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2781
2787
2782 hexnode = hex(node)
2788 hexnode = hex(node)
2783
2789
2784 def disambiguate(hexnode, minlength):
2790 def disambiguate(hexnode, minlength):
2785 """Disambiguate against wdirid."""
2791 """Disambiguate against wdirid."""
2786 for length in range(minlength, len(hexnode) + 1):
2792 for length in range(minlength, len(hexnode) + 1):
2787 prefix = hexnode[:length]
2793 prefix = hexnode[:length]
2788 if not maybewdir(prefix):
2794 if not maybewdir(prefix):
2789 return prefix
2795 return prefix
2790
2796
2791 if not getattr(self, 'filteredrevs', None):
2797 if not getattr(self, 'filteredrevs', None):
2792 try:
2798 try:
2793 length = max(self.index.shortest(node), minlength)
2799 length = max(self.index.shortest(node), minlength)
2794 return disambiguate(hexnode, length)
2800 return disambiguate(hexnode, length)
2795 except error.RevlogError:
2801 except error.RevlogError:
2796 if node != self.nodeconstants.wdirid:
2802 if node != self.nodeconstants.wdirid:
2797 raise error.LookupError(
2803 raise error.LookupError(
2798 node, self.display_id, _(b'no node')
2804 node, self.display_id, _(b'no node')
2799 )
2805 )
2800 except AttributeError:
2806 except AttributeError:
2801 # Fall through to pure code
2807 # Fall through to pure code
2802 pass
2808 pass
2803
2809
2804 if node == self.nodeconstants.wdirid:
2810 if node == self.nodeconstants.wdirid:
2805 for length in range(minlength, len(hexnode) + 1):
2811 for length in range(minlength, len(hexnode) + 1):
2806 prefix = hexnode[:length]
2812 prefix = hexnode[:length]
2807 if isvalid(prefix):
2813 if isvalid(prefix):
2808 return prefix
2814 return prefix
2809
2815
2810 for length in range(minlength, len(hexnode) + 1):
2816 for length in range(minlength, len(hexnode) + 1):
2811 prefix = hexnode[:length]
2817 prefix = hexnode[:length]
2812 if isvalid(prefix):
2818 if isvalid(prefix):
2813 return disambiguate(hexnode, length)
2819 return disambiguate(hexnode, length)
2814
2820
2815 def cmp(self, node, text):
2821 def cmp(self, node, text):
2816 """compare text with a given file revision
2822 """compare text with a given file revision
2817
2823
2818 returns True if text is different than what is stored.
2824 returns True if text is different than what is stored.
2819 """
2825 """
2820 p1, p2 = self.parents(node)
2826 p1, p2 = self.parents(node)
2821 return storageutil.hashrevisionsha1(text, p1, p2) != node
2827 return storageutil.hashrevisionsha1(text, p1, p2) != node
2822
2828
2823 def deltaparent(self, rev):
2829 def deltaparent(self, rev):
2824 """return deltaparent of the given revision"""
2830 """return deltaparent of the given revision"""
2825 base = self.index[rev][3]
2831 base = self.index[rev][3]
2826 if base == rev:
2832 if base == rev:
2827 return nullrev
2833 return nullrev
2828 elif self.delta_config.general_delta:
2834 elif self.delta_config.general_delta:
2829 return base
2835 return base
2830 else:
2836 else:
2831 return rev - 1
2837 return rev - 1
2832
2838
2833 def issnapshot(self, rev):
2839 def issnapshot(self, rev):
2834 """tells whether rev is a snapshot"""
2840 """tells whether rev is a snapshot"""
2835 ret = self._inner.issnapshot(rev)
2841 ret = self._inner.issnapshot(rev)
2836 self.issnapshot = self._inner.issnapshot
2842 self.issnapshot = self._inner.issnapshot
2837 return ret
2843 return ret
2838
2844
2839 def snapshotdepth(self, rev):
2845 def snapshotdepth(self, rev):
2840 """number of snapshot in the chain before this one"""
2846 """number of snapshot in the chain before this one"""
2841 if not self.issnapshot(rev):
2847 if not self.issnapshot(rev):
2842 raise error.ProgrammingError(b'revision %d not a snapshot')
2848 raise error.ProgrammingError(b'revision %d not a snapshot')
2843 return len(self._inner._deltachain(rev)[0]) - 1
2849 return len(self._inner._deltachain(rev)[0]) - 1
2844
2850
2845 def revdiff(self, rev1, rev2):
2851 def revdiff(self, rev1, rev2):
2846 """return or calculate a delta between two revisions
2852 """return or calculate a delta between two revisions
2847
2853
2848 The delta calculated is in binary form and is intended to be written to
2854 The delta calculated is in binary form and is intended to be written to
2849 revlog data directly. So this function needs raw revision data.
2855 revlog data directly. So this function needs raw revision data.
2850 """
2856 """
2851 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2857 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2852 return bytes(self._inner._chunk(rev2))
2858 return bytes(self._inner._chunk(rev2))
2853
2859
2854 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2860 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2855
2861
2856 def revision(self, nodeorrev):
2862 def revision(self, nodeorrev):
2857 """return an uncompressed revision of a given node or revision
2863 """return an uncompressed revision of a given node or revision
2858 number.
2864 number.
2859 """
2865 """
2860 return self._revisiondata(nodeorrev)
2866 return self._revisiondata(nodeorrev)
2861
2867
2862 def sidedata(self, nodeorrev):
2868 def sidedata(self, nodeorrev):
2863 """a map of extra data related to the changeset but not part of the hash
2869 """a map of extra data related to the changeset but not part of the hash
2864
2870
2865 This function currently return a dictionary. However, more advanced
2871 This function currently return a dictionary. However, more advanced
2866 mapping object will likely be used in the future for a more
2872 mapping object will likely be used in the future for a more
2867 efficient/lazy code.
2873 efficient/lazy code.
2868 """
2874 """
2869 # deal with <nodeorrev> argument type
2875 # deal with <nodeorrev> argument type
2870 if isinstance(nodeorrev, int):
2876 if isinstance(nodeorrev, int):
2871 rev = nodeorrev
2877 rev = nodeorrev
2872 else:
2878 else:
2873 rev = self.rev(nodeorrev)
2879 rev = self.rev(nodeorrev)
2874 return self._sidedata(rev)
2880 return self._sidedata(rev)
2875
2881
2876 def _rawtext(self, node, rev):
2882 def _rawtext(self, node, rev):
2877 """return the possibly unvalidated rawtext for a revision
2883 """return the possibly unvalidated rawtext for a revision
2878
2884
2879 returns (rev, rawtext, validated)
2885 returns (rev, rawtext, validated)
2880 """
2886 """
2881 # Check if we have the entry in cache
2887 # Check if we have the entry in cache
2882 # The cache entry looks like (node, rev, rawtext)
2888 # The cache entry looks like (node, rev, rawtext)
2883 if self._inner._revisioncache:
2889 if self._inner._revisioncache:
2884 if self._inner._revisioncache[0] == node:
2890 if self._inner._revisioncache[0] == node:
2885 return (rev, self._inner._revisioncache[2], True)
2891 return (rev, self._inner._revisioncache[2], True)
2886
2892
2887 if rev is None:
2893 if rev is None:
2888 rev = self.rev(node)
2894 rev = self.rev(node)
2889
2895
2890 return self._inner.raw_text(node, rev)
2896 return self._inner.raw_text(node, rev)
2891
2897
2892 def _revisiondata(self, nodeorrev, raw=False):
2898 def _revisiondata(self, nodeorrev, raw=False):
2893 # deal with <nodeorrev> argument type
2899 # deal with <nodeorrev> argument type
2894 if isinstance(nodeorrev, int):
2900 if isinstance(nodeorrev, int):
2895 rev = nodeorrev
2901 rev = nodeorrev
2896 node = self.node(rev)
2902 node = self.node(rev)
2897 else:
2903 else:
2898 node = nodeorrev
2904 node = nodeorrev
2899 rev = None
2905 rev = None
2900
2906
2901 # fast path the special `nullid` rev
2907 # fast path the special `nullid` rev
2902 if node == self.nullid:
2908 if node == self.nullid:
2903 return b""
2909 return b""
2904
2910
2905 # ``rawtext`` is the text as stored inside the revlog. Might be the
2911 # ``rawtext`` is the text as stored inside the revlog. Might be the
2906 # revision or might need to be processed to retrieve the revision.
2912 # revision or might need to be processed to retrieve the revision.
2907 rev, rawtext, validated = self._rawtext(node, rev)
2913 rev, rawtext, validated = self._rawtext(node, rev)
2908
2914
2909 if raw and validated:
2915 if raw and validated:
2910 # if we don't want to process the raw text and that raw
2916 # if we don't want to process the raw text and that raw
2911 # text is cached, we can exit early.
2917 # text is cached, we can exit early.
2912 return rawtext
2918 return rawtext
2913 if rev is None:
2919 if rev is None:
2914 rev = self.rev(node)
2920 rev = self.rev(node)
2915 # the revlog's flag for this revision
2921 # the revlog's flag for this revision
2916 # (usually alter its state or content)
2922 # (usually alter its state or content)
2917 flags = self.flags(rev)
2923 flags = self.flags(rev)
2918
2924
2919 if validated and flags == REVIDX_DEFAULT_FLAGS:
2925 if validated and flags == REVIDX_DEFAULT_FLAGS:
2920 # no extra flags set, no flag processor runs, text = rawtext
2926 # no extra flags set, no flag processor runs, text = rawtext
2921 return rawtext
2927 return rawtext
2922
2928
2923 if raw:
2929 if raw:
2924 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2930 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2925 text = rawtext
2931 text = rawtext
2926 else:
2932 else:
2927 r = flagutil.processflagsread(self, rawtext, flags)
2933 r = flagutil.processflagsread(self, rawtext, flags)
2928 text, validatehash = r
2934 text, validatehash = r
2929 if validatehash:
2935 if validatehash:
2930 self.checkhash(text, node, rev=rev)
2936 self.checkhash(text, node, rev=rev)
2931 if not validated:
2937 if not validated:
2932 self._inner._revisioncache = (node, rev, rawtext)
2938 self._inner._revisioncache = (node, rev, rawtext)
2933
2939
2934 return text
2940 return text
2935
2941
2936 def _sidedata(self, rev):
2942 def _sidedata(self, rev):
2937 """Return the sidedata for a given revision number."""
2943 """Return the sidedata for a given revision number."""
2938 sidedata_end = None
2944 sidedata_end = None
2939 if self._docket is not None:
2945 if self._docket is not None:
2940 sidedata_end = self._docket.sidedata_end
2946 sidedata_end = self._docket.sidedata_end
2941 return self._inner.sidedata(rev, sidedata_end)
2947 return self._inner.sidedata(rev, sidedata_end)
2942
2948
2943 def rawdata(self, nodeorrev):
2949 def rawdata(self, nodeorrev):
2944 """return an uncompressed raw data of a given node or revision number."""
2950 """return an uncompressed raw data of a given node or revision number."""
2945 return self._revisiondata(nodeorrev, raw=True)
2951 return self._revisiondata(nodeorrev, raw=True)
2946
2952
2947 def hash(self, text, p1, p2):
2953 def hash(self, text, p1, p2):
2948 """Compute a node hash.
2954 """Compute a node hash.
2949
2955
2950 Available as a function so that subclasses can replace the hash
2956 Available as a function so that subclasses can replace the hash
2951 as needed.
2957 as needed.
2952 """
2958 """
2953 return storageutil.hashrevisionsha1(text, p1, p2)
2959 return storageutil.hashrevisionsha1(text, p1, p2)
2954
2960
2955 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2961 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2956 """Check node hash integrity.
2962 """Check node hash integrity.
2957
2963
2958 Available as a function so that subclasses can extend hash mismatch
2964 Available as a function so that subclasses can extend hash mismatch
2959 behaviors as needed.
2965 behaviors as needed.
2960 """
2966 """
2961 try:
2967 try:
2962 if p1 is None and p2 is None:
2968 if p1 is None and p2 is None:
2963 p1, p2 = self.parents(node)
2969 p1, p2 = self.parents(node)
2964 if node != self.hash(text, p1, p2):
2970 if node != self.hash(text, p1, p2):
2965 # Clear the revision cache on hash failure. The revision cache
2971 # Clear the revision cache on hash failure. The revision cache
2966 # only stores the raw revision and clearing the cache does have
2972 # only stores the raw revision and clearing the cache does have
2967 # the side-effect that we won't have a cache hit when the raw
2973 # the side-effect that we won't have a cache hit when the raw
2968 # revision data is accessed. But this case should be rare and
2974 # revision data is accessed. But this case should be rare and
2969 # it is extra work to teach the cache about the hash
2975 # it is extra work to teach the cache about the hash
2970 # verification state.
2976 # verification state.
2971 if (
2977 if (
2972 self._inner._revisioncache
2978 self._inner._revisioncache
2973 and self._inner._revisioncache[0] == node
2979 and self._inner._revisioncache[0] == node
2974 ):
2980 ):
2975 self._inner._revisioncache = None
2981 self._inner._revisioncache = None
2976
2982
2977 revornode = rev
2983 revornode = rev
2978 if revornode is None:
2984 if revornode is None:
2979 revornode = templatefilters.short(hex(node))
2985 revornode = templatefilters.short(hex(node))
2980 raise error.RevlogError(
2986 raise error.RevlogError(
2981 _(b"integrity check failed on %s:%s")
2987 _(b"integrity check failed on %s:%s")
2982 % (self.display_id, pycompat.bytestr(revornode))
2988 % (self.display_id, pycompat.bytestr(revornode))
2983 )
2989 )
2984 except error.RevlogError:
2990 except error.RevlogError:
2985 if self.feature_config.censorable and storageutil.iscensoredtext(
2991 if self.feature_config.censorable and storageutil.iscensoredtext(
2986 text
2992 text
2987 ):
2993 ):
2988 raise error.CensoredNodeError(self.display_id, node, text)
2994 raise error.CensoredNodeError(self.display_id, node, text)
2989 raise
2995 raise
2990
2996
2991 @property
2997 @property
2992 def _split_index_file(self):
2998 def _split_index_file(self):
2993 """the path where to expect the index of an ongoing splitting operation
2999 """the path where to expect the index of an ongoing splitting operation
2994
3000
2995 The file will only exist if a splitting operation is in progress, but
3001 The file will only exist if a splitting operation is in progress, but
2996 it is always expected at the same location."""
3002 it is always expected at the same location."""
2997 parts = self.radix.split(b'/')
3003 parts = self.radix.split(b'/')
2998 if len(parts) > 1:
3004 if len(parts) > 1:
2999 # adds a '-s' prefix to the ``data/` or `meta/` base
3005 # adds a '-s' prefix to the ``data/` or `meta/` base
3000 head = parts[0] + b'-s'
3006 head = parts[0] + b'-s'
3001 mids = parts[1:-1]
3007 mids = parts[1:-1]
3002 tail = parts[-1] + b'.i'
3008 tail = parts[-1] + b'.i'
3003 pieces = [head] + mids + [tail]
3009 pieces = [head] + mids + [tail]
3004 return b'/'.join(pieces)
3010 return b'/'.join(pieces)
3005 else:
3011 else:
3006 # the revlog is stored at the root of the store (changelog or
3012 # the revlog is stored at the root of the store (changelog or
3007 # manifest), no risk of collision.
3013 # manifest), no risk of collision.
3008 return self.radix + b'.i.s'
3014 return self.radix + b'.i.s'
3009
3015
3010 def _enforceinlinesize(self, tr, side_write=True):
3016 def _enforceinlinesize(self, tr, side_write=True):
3011 """Check if the revlog is too big for inline and convert if so.
3017 """Check if the revlog is too big for inline and convert if so.
3012
3018
3013 This should be called after revisions are added to the revlog. If the
3019 This should be called after revisions are added to the revlog. If the
3014 revlog has grown too large to be an inline revlog, it will convert it
3020 revlog has grown too large to be an inline revlog, it will convert it
3015 to use multiple index and data files.
3021 to use multiple index and data files.
3016 """
3022 """
3017 tiprev = len(self) - 1
3023 tiprev = len(self) - 1
3018 total_size = self.start(tiprev) + self.length(tiprev)
3024 total_size = self.start(tiprev) + self.length(tiprev)
3019 if not self._inline or total_size < _maxinline:
3025 if not self._inline or total_size < _maxinline:
3020 return
3026 return
3021
3027
3022 if self._docket is not None:
3028 if self._docket is not None:
3023 msg = b"inline revlog should not have a docket"
3029 msg = b"inline revlog should not have a docket"
3024 raise error.ProgrammingError(msg)
3030 raise error.ProgrammingError(msg)
3025
3031
3026 troffset = tr.findoffset(self._inner.canonical_index_file)
3032 troffset = tr.findoffset(self._inner.canonical_index_file)
3027 if troffset is None:
3033 if troffset is None:
3028 raise error.RevlogError(
3034 raise error.RevlogError(
3029 _(b"%s not found in the transaction") % self._indexfile
3035 _(b"%s not found in the transaction") % self._indexfile
3030 )
3036 )
3031 if troffset:
3037 if troffset:
3032 tr.addbackup(self._inner.canonical_index_file, for_offset=True)
3038 tr.addbackup(self._inner.canonical_index_file, for_offset=True)
3033 tr.add(self._datafile, 0)
3039 tr.add(self._datafile, 0)
3034
3040
3035 new_index_file_path = None
3041 new_index_file_path = None
3036 if side_write:
3042 if side_write:
3037 old_index_file_path = self._indexfile
3043 old_index_file_path = self._indexfile
3038 new_index_file_path = self._split_index_file
3044 new_index_file_path = self._split_index_file
3039 opener = self.opener
3045 opener = self.opener
3040 weak_self = weakref.ref(self)
3046 weak_self = weakref.ref(self)
3041
3047
3042 # the "split" index replace the real index when the transaction is
3048 # the "split" index replace the real index when the transaction is
3043 # finalized
3049 # finalized
3044 def finalize_callback(tr):
3050 def finalize_callback(tr):
3045 opener.rename(
3051 opener.rename(
3046 new_index_file_path,
3052 new_index_file_path,
3047 old_index_file_path,
3053 old_index_file_path,
3048 checkambig=True,
3054 checkambig=True,
3049 )
3055 )
3050 maybe_self = weak_self()
3056 maybe_self = weak_self()
3051 if maybe_self is not None:
3057 if maybe_self is not None:
3052 maybe_self._indexfile = old_index_file_path
3058 maybe_self._indexfile = old_index_file_path
3053 maybe_self._inner.index_file = maybe_self._indexfile
3059 maybe_self._inner.index_file = maybe_self._indexfile
3054
3060
3055 def abort_callback(tr):
3061 def abort_callback(tr):
3056 maybe_self = weak_self()
3062 maybe_self = weak_self()
3057 if maybe_self is not None:
3063 if maybe_self is not None:
3058 maybe_self._indexfile = old_index_file_path
3064 maybe_self._indexfile = old_index_file_path
3059 maybe_self._inner.inline = True
3065 maybe_self._inner.inline = True
3060 maybe_self._inner.index_file = old_index_file_path
3066 maybe_self._inner.index_file = old_index_file_path
3061
3067
3062 tr.registertmp(new_index_file_path)
3068 tr.registertmp(new_index_file_path)
3063 if self.target[1] is not None:
3069 if self.target[1] is not None:
3064 callback_id = b'000-revlog-split-%d-%s' % self.target
3070 callback_id = b'000-revlog-split-%d-%s' % self.target
3065 else:
3071 else:
3066 callback_id = b'000-revlog-split-%d' % self.target[0]
3072 callback_id = b'000-revlog-split-%d' % self.target[0]
3067 tr.addfinalize(callback_id, finalize_callback)
3073 tr.addfinalize(callback_id, finalize_callback)
3068 tr.addabort(callback_id, abort_callback)
3074 tr.addabort(callback_id, abort_callback)
3069
3075
3070 self._format_flags &= ~FLAG_INLINE_DATA
3076 self._format_flags &= ~FLAG_INLINE_DATA
3071 self._inner.split_inline(
3077 self._inner.split_inline(
3072 tr,
3078 tr,
3073 self._format_flags | self._format_version,
3079 self._format_flags | self._format_version,
3074 new_index_file_path=new_index_file_path,
3080 new_index_file_path=new_index_file_path,
3075 )
3081 )
3076
3082
3077 self._inline = False
3083 self._inline = False
3078 if new_index_file_path is not None:
3084 if new_index_file_path is not None:
3079 self._indexfile = new_index_file_path
3085 self._indexfile = new_index_file_path
3080
3086
3081 nodemaputil.setup_persistent_nodemap(tr, self)
3087 nodemaputil.setup_persistent_nodemap(tr, self)
3082
3088
3083 def _nodeduplicatecallback(self, transaction, node):
3089 def _nodeduplicatecallback(self, transaction, node):
3084 """called when trying to add a node already stored."""
3090 """called when trying to add a node already stored."""
3085
3091
3086 @contextlib.contextmanager
3092 @contextlib.contextmanager
3087 def reading(self):
3093 def reading(self):
3088 with self._inner.reading():
3094 with self._inner.reading():
3089 yield
3095 yield
3090
3096
3091 @contextlib.contextmanager
3097 @contextlib.contextmanager
3092 def _writing(self, transaction):
3098 def _writing(self, transaction):
3093 if self._trypending:
3099 if self._trypending:
3094 msg = b'try to write in a `trypending` revlog: %s'
3100 msg = b'try to write in a `trypending` revlog: %s'
3095 msg %= self.display_id
3101 msg %= self.display_id
3096 raise error.ProgrammingError(msg)
3102 raise error.ProgrammingError(msg)
3097 if self._inner.is_writing:
3103 if self._inner.is_writing:
3098 yield
3104 yield
3099 else:
3105 else:
3100 data_end = None
3106 data_end = None
3101 sidedata_end = None
3107 sidedata_end = None
3102 if self._docket is not None:
3108 if self._docket is not None:
3103 data_end = self._docket.data_end
3109 data_end = self._docket.data_end
3104 sidedata_end = self._docket.sidedata_end
3110 sidedata_end = self._docket.sidedata_end
3105 with self._inner.writing(
3111 with self._inner.writing(
3106 transaction,
3112 transaction,
3107 data_end=data_end,
3113 data_end=data_end,
3108 sidedata_end=sidedata_end,
3114 sidedata_end=sidedata_end,
3109 ):
3115 ):
3110 yield
3116 yield
3111 if self._docket is not None:
3117 if self._docket is not None:
3112 self._write_docket(transaction)
3118 self._write_docket(transaction)
3113
3119
3114 @property
3120 @property
3115 def is_delaying(self):
3121 def is_delaying(self):
3116 return self._inner.is_delaying
3122 return self._inner.is_delaying
3117
3123
3118 def _write_docket(self, transaction):
3124 def _write_docket(self, transaction):
3119 """write the current docket on disk
3125 """write the current docket on disk
3120
3126
3121 Exist as a method to help changelog to implement transaction logic
3127 Exist as a method to help changelog to implement transaction logic
3122
3128
3123 We could also imagine using the same transaction logic for all revlog
3129 We could also imagine using the same transaction logic for all revlog
3124 since docket are cheap."""
3130 since docket are cheap."""
3125 self._docket.write(transaction)
3131 self._docket.write(transaction)
3126
3132
3127 def addrevision(
3133 def addrevision(
3128 self,
3134 self,
3129 text,
3135 text,
3130 transaction,
3136 transaction,
3131 link,
3137 link,
3132 p1,
3138 p1,
3133 p2,
3139 p2,
3134 cachedelta=None,
3140 cachedelta=None,
3135 node=None,
3141 node=None,
3136 flags=REVIDX_DEFAULT_FLAGS,
3142 flags=REVIDX_DEFAULT_FLAGS,
3137 deltacomputer=None,
3143 deltacomputer=None,
3138 sidedata=None,
3144 sidedata=None,
3139 ):
3145 ):
3140 """add a revision to the log
3146 """add a revision to the log
3141
3147
3142 text - the revision data to add
3148 text - the revision data to add
3143 transaction - the transaction object used for rollback
3149 transaction - the transaction object used for rollback
3144 link - the linkrev data to add
3150 link - the linkrev data to add
3145 p1, p2 - the parent nodeids of the revision
3151 p1, p2 - the parent nodeids of the revision
3146 cachedelta - an optional precomputed delta
3152 cachedelta - an optional precomputed delta
3147 node - nodeid of revision; typically node is not specified, and it is
3153 node - nodeid of revision; typically node is not specified, and it is
3148 computed by default as hash(text, p1, p2), however subclasses might
3154 computed by default as hash(text, p1, p2), however subclasses might
3149 use different hashing method (and override checkhash() in such case)
3155 use different hashing method (and override checkhash() in such case)
3150 flags - the known flags to set on the revision
3156 flags - the known flags to set on the revision
3151 deltacomputer - an optional deltacomputer instance shared between
3157 deltacomputer - an optional deltacomputer instance shared between
3152 multiple calls
3158 multiple calls
3153 """
3159 """
3154 if link == nullrev:
3160 if link == nullrev:
3155 raise error.RevlogError(
3161 raise error.RevlogError(
3156 _(b"attempted to add linkrev -1 to %s") % self.display_id
3162 _(b"attempted to add linkrev -1 to %s") % self.display_id
3157 )
3163 )
3158
3164
3159 if sidedata is None:
3165 if sidedata is None:
3160 sidedata = {}
3166 sidedata = {}
3161 elif sidedata and not self.feature_config.has_side_data:
3167 elif sidedata and not self.feature_config.has_side_data:
3162 raise error.ProgrammingError(
3168 raise error.ProgrammingError(
3163 _(b"trying to add sidedata to a revlog who don't support them")
3169 _(b"trying to add sidedata to a revlog who don't support them")
3164 )
3170 )
3165
3171
3166 if flags:
3172 if flags:
3167 node = node or self.hash(text, p1, p2)
3173 node = node or self.hash(text, p1, p2)
3168
3174
3169 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3175 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3170
3176
3171 # If the flag processor modifies the revision data, ignore any provided
3177 # If the flag processor modifies the revision data, ignore any provided
3172 # cachedelta.
3178 # cachedelta.
3173 if rawtext != text:
3179 if rawtext != text:
3174 cachedelta = None
3180 cachedelta = None
3175
3181
3176 if len(rawtext) > _maxentrysize:
3182 if len(rawtext) > _maxentrysize:
3177 raise error.RevlogError(
3183 raise error.RevlogError(
3178 _(
3184 _(
3179 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3185 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3180 )
3186 )
3181 % (self.display_id, len(rawtext))
3187 % (self.display_id, len(rawtext))
3182 )
3188 )
3183
3189
3184 node = node or self.hash(rawtext, p1, p2)
3190 node = node or self.hash(rawtext, p1, p2)
3185 rev = self.index.get_rev(node)
3191 rev = self.index.get_rev(node)
3186 if rev is not None:
3192 if rev is not None:
3187 return rev
3193 return rev
3188
3194
3189 if validatehash:
3195 if validatehash:
3190 self.checkhash(rawtext, node, p1=p1, p2=p2)
3196 self.checkhash(rawtext, node, p1=p1, p2=p2)
3191
3197
3192 return self.addrawrevision(
3198 return self.addrawrevision(
3193 rawtext,
3199 rawtext,
3194 transaction,
3200 transaction,
3195 link,
3201 link,
3196 p1,
3202 p1,
3197 p2,
3203 p2,
3198 node,
3204 node,
3199 flags,
3205 flags,
3200 cachedelta=cachedelta,
3206 cachedelta=cachedelta,
3201 deltacomputer=deltacomputer,
3207 deltacomputer=deltacomputer,
3202 sidedata=sidedata,
3208 sidedata=sidedata,
3203 )
3209 )
3204
3210
3205 def addrawrevision(
3211 def addrawrevision(
3206 self,
3212 self,
3207 rawtext,
3213 rawtext,
3208 transaction,
3214 transaction,
3209 link,
3215 link,
3210 p1,
3216 p1,
3211 p2,
3217 p2,
3212 node,
3218 node,
3213 flags,
3219 flags,
3214 cachedelta=None,
3220 cachedelta=None,
3215 deltacomputer=None,
3221 deltacomputer=None,
3216 sidedata=None,
3222 sidedata=None,
3217 ):
3223 ):
3218 """add a raw revision with known flags, node and parents
3224 """add a raw revision with known flags, node and parents
3219 useful when reusing a revision not stored in this revlog (ex: received
3225 useful when reusing a revision not stored in this revlog (ex: received
3220 over wire, or read from an external bundle).
3226 over wire, or read from an external bundle).
3221 """
3227 """
3222 with self._writing(transaction):
3228 with self._writing(transaction):
3223 return self._addrevision(
3229 return self._addrevision(
3224 node,
3230 node,
3225 rawtext,
3231 rawtext,
3226 transaction,
3232 transaction,
3227 link,
3233 link,
3228 p1,
3234 p1,
3229 p2,
3235 p2,
3230 flags,
3236 flags,
3231 cachedelta,
3237 cachedelta,
3232 deltacomputer=deltacomputer,
3238 deltacomputer=deltacomputer,
3233 sidedata=sidedata,
3239 sidedata=sidedata,
3234 )
3240 )
3235
3241
3236 def compress(self, data):
3242 def compress(self, data):
3237 return self._inner.compress(data)
3243 return self._inner.compress(data)
3238
3244
3239 def decompress(self, data):
3245 def decompress(self, data):
3240 return self._inner.decompress(data)
3246 return self._inner.decompress(data)
3241
3247
3242 def _addrevision(
3248 def _addrevision(
3243 self,
3249 self,
3244 node,
3250 node,
3245 rawtext,
3251 rawtext,
3246 transaction,
3252 transaction,
3247 link,
3253 link,
3248 p1,
3254 p1,
3249 p2,
3255 p2,
3250 flags,
3256 flags,
3251 cachedelta,
3257 cachedelta,
3252 alwayscache=False,
3258 alwayscache=False,
3253 deltacomputer=None,
3259 deltacomputer=None,
3254 sidedata=None,
3260 sidedata=None,
3255 ):
3261 ):
3256 """internal function to add revisions to the log
3262 """internal function to add revisions to the log
3257
3263
3258 see addrevision for argument descriptions.
3264 see addrevision for argument descriptions.
3259
3265
3260 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3266 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3261
3267
3262 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3268 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3263 be used.
3269 be used.
3264
3270
3265 invariants:
3271 invariants:
3266 - rawtext is optional (can be None); if not set, cachedelta must be set.
3272 - rawtext is optional (can be None); if not set, cachedelta must be set.
3267 if both are set, they must correspond to each other.
3273 if both are set, they must correspond to each other.
3268 """
3274 """
3269 if node == self.nullid:
3275 if node == self.nullid:
3270 raise error.RevlogError(
3276 raise error.RevlogError(
3271 _(b"%s: attempt to add null revision") % self.display_id
3277 _(b"%s: attempt to add null revision") % self.display_id
3272 )
3278 )
3273 if (
3279 if (
3274 node == self.nodeconstants.wdirid
3280 node == self.nodeconstants.wdirid
3275 or node in self.nodeconstants.wdirfilenodeids
3281 or node in self.nodeconstants.wdirfilenodeids
3276 ):
3282 ):
3277 raise error.RevlogError(
3283 raise error.RevlogError(
3278 _(b"%s: attempt to add wdir revision") % self.display_id
3284 _(b"%s: attempt to add wdir revision") % self.display_id
3279 )
3285 )
3280 if self._inner._writinghandles is None:
3286 if self._inner._writinghandles is None:
3281 msg = b'adding revision outside `revlog._writing` context'
3287 msg = b'adding revision outside `revlog._writing` context'
3282 raise error.ProgrammingError(msg)
3288 raise error.ProgrammingError(msg)
3283
3289
3284 btext = [rawtext]
3290 btext = [rawtext]
3285
3291
3286 curr = len(self)
3292 curr = len(self)
3287 prev = curr - 1
3293 prev = curr - 1
3288
3294
3289 offset = self._get_data_offset(prev)
3295 offset = self._get_data_offset(prev)
3290
3296
3291 if self._concurrencychecker:
3297 if self._concurrencychecker:
3292 ifh, dfh, sdfh = self._inner._writinghandles
3298 ifh, dfh, sdfh = self._inner._writinghandles
3293 # XXX no checking for the sidedata file
3299 # XXX no checking for the sidedata file
3294 if self._inline:
3300 if self._inline:
3295 # offset is "as if" it were in the .d file, so we need to add on
3301 # offset is "as if" it were in the .d file, so we need to add on
3296 # the size of the entry metadata.
3302 # the size of the entry metadata.
3297 self._concurrencychecker(
3303 self._concurrencychecker(
3298 ifh, self._indexfile, offset + curr * self.index.entry_size
3304 ifh, self._indexfile, offset + curr * self.index.entry_size
3299 )
3305 )
3300 else:
3306 else:
3301 # Entries in the .i are a consistent size.
3307 # Entries in the .i are a consistent size.
3302 self._concurrencychecker(
3308 self._concurrencychecker(
3303 ifh, self._indexfile, curr * self.index.entry_size
3309 ifh, self._indexfile, curr * self.index.entry_size
3304 )
3310 )
3305 self._concurrencychecker(dfh, self._datafile, offset)
3311 self._concurrencychecker(dfh, self._datafile, offset)
3306
3312
3307 p1r, p2r = self.rev(p1), self.rev(p2)
3313 p1r, p2r = self.rev(p1), self.rev(p2)
3308
3314
3309 # full versions are inserted when the needed deltas
3315 # full versions are inserted when the needed deltas
3310 # become comparable to the uncompressed text
3316 # become comparable to the uncompressed text
3311 if rawtext is None:
3317 if rawtext is None:
3312 # need rawtext size, before changed by flag processors, which is
3318 # need rawtext size, before changed by flag processors, which is
3313 # the non-raw size. use revlog explicitly to avoid filelog's extra
3319 # the non-raw size. use revlog explicitly to avoid filelog's extra
3314 # logic that might remove metadata size.
3320 # logic that might remove metadata size.
3315 textlen = mdiff.patchedsize(
3321 textlen = mdiff.patchedsize(
3316 revlog.size(self, cachedelta[0]), cachedelta[1]
3322 revlog.size(self, cachedelta[0]), cachedelta[1]
3317 )
3323 )
3318 else:
3324 else:
3319 textlen = len(rawtext)
3325 textlen = len(rawtext)
3320
3326
3321 if deltacomputer is None:
3327 if deltacomputer is None:
3322 write_debug = None
3328 write_debug = None
3323 if self.delta_config.debug_delta:
3329 if self.delta_config.debug_delta:
3324 write_debug = transaction._report
3330 write_debug = transaction._report
3325 deltacomputer = deltautil.deltacomputer(
3331 deltacomputer = deltautil.deltacomputer(
3326 self, write_debug=write_debug
3332 self, write_debug=write_debug
3327 )
3333 )
3328
3334
3329 if cachedelta is not None and len(cachedelta) == 2:
3335 if cachedelta is not None and len(cachedelta) == 2:
3330 # If the cached delta has no information about how it should be
3336 # If the cached delta has no information about how it should be
3331 # reused, add the default reuse instruction according to the
3337 # reused, add the default reuse instruction according to the
3332 # revlog's configuration.
3338 # revlog's configuration.
3333 if (
3339 if (
3334 self.delta_config.general_delta
3340 self.delta_config.general_delta
3335 and self.delta_config.lazy_delta_base
3341 and self.delta_config.lazy_delta_base
3336 ):
3342 ):
3337 delta_base_reuse = DELTA_BASE_REUSE_TRY
3343 delta_base_reuse = DELTA_BASE_REUSE_TRY
3338 else:
3344 else:
3339 delta_base_reuse = DELTA_BASE_REUSE_NO
3345 delta_base_reuse = DELTA_BASE_REUSE_NO
3340 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3346 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3341
3347
3342 revinfo = revlogutils.revisioninfo(
3348 revinfo = revlogutils.revisioninfo(
3343 node,
3349 node,
3344 p1,
3350 p1,
3345 p2,
3351 p2,
3346 btext,
3352 btext,
3347 textlen,
3353 textlen,
3348 cachedelta,
3354 cachedelta,
3349 flags,
3355 flags,
3350 )
3356 )
3351
3357
3352 deltainfo = deltacomputer.finddeltainfo(revinfo)
3358 deltainfo = deltacomputer.finddeltainfo(revinfo)
3353
3359
3354 compression_mode = COMP_MODE_INLINE
3360 compression_mode = COMP_MODE_INLINE
3355 if self._docket is not None:
3361 if self._docket is not None:
3356 default_comp = self._docket.default_compression_header
3362 default_comp = self._docket.default_compression_header
3357 r = deltautil.delta_compression(default_comp, deltainfo)
3363 r = deltautil.delta_compression(default_comp, deltainfo)
3358 compression_mode, deltainfo = r
3364 compression_mode, deltainfo = r
3359
3365
3360 sidedata_compression_mode = COMP_MODE_INLINE
3366 sidedata_compression_mode = COMP_MODE_INLINE
3361 if sidedata and self.feature_config.has_side_data:
3367 if sidedata and self.feature_config.has_side_data:
3362 sidedata_compression_mode = COMP_MODE_PLAIN
3368 sidedata_compression_mode = COMP_MODE_PLAIN
3363 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3369 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3364 sidedata_offset = self._docket.sidedata_end
3370 sidedata_offset = self._docket.sidedata_end
3365 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3371 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3366 if (
3372 if (
3367 h != b'u'
3373 h != b'u'
3368 and comp_sidedata[0:1] != b'\0'
3374 and comp_sidedata[0:1] != b'\0'
3369 and len(comp_sidedata) < len(serialized_sidedata)
3375 and len(comp_sidedata) < len(serialized_sidedata)
3370 ):
3376 ):
3371 assert not h
3377 assert not h
3372 if (
3378 if (
3373 comp_sidedata[0:1]
3379 comp_sidedata[0:1]
3374 == self._docket.default_compression_header
3380 == self._docket.default_compression_header
3375 ):
3381 ):
3376 sidedata_compression_mode = COMP_MODE_DEFAULT
3382 sidedata_compression_mode = COMP_MODE_DEFAULT
3377 serialized_sidedata = comp_sidedata
3383 serialized_sidedata = comp_sidedata
3378 else:
3384 else:
3379 sidedata_compression_mode = COMP_MODE_INLINE
3385 sidedata_compression_mode = COMP_MODE_INLINE
3380 serialized_sidedata = comp_sidedata
3386 serialized_sidedata = comp_sidedata
3381 else:
3387 else:
3382 serialized_sidedata = b""
3388 serialized_sidedata = b""
3383 # Don't store the offset if the sidedata is empty, that way
3389 # Don't store the offset if the sidedata is empty, that way
3384 # we can easily detect empty sidedata and they will be no different
3390 # we can easily detect empty sidedata and they will be no different
3385 # than ones we manually add.
3391 # than ones we manually add.
3386 sidedata_offset = 0
3392 sidedata_offset = 0
3387
3393
3388 rank = RANK_UNKNOWN
3394 rank = RANK_UNKNOWN
3389 if self.feature_config.compute_rank:
3395 if self.feature_config.compute_rank:
3390 if (p1r, p2r) == (nullrev, nullrev):
3396 if (p1r, p2r) == (nullrev, nullrev):
3391 rank = 1
3397 rank = 1
3392 elif p1r != nullrev and p2r == nullrev:
3398 elif p1r != nullrev and p2r == nullrev:
3393 rank = 1 + self.fast_rank(p1r)
3399 rank = 1 + self.fast_rank(p1r)
3394 elif p1r == nullrev and p2r != nullrev:
3400 elif p1r == nullrev and p2r != nullrev:
3395 rank = 1 + self.fast_rank(p2r)
3401 rank = 1 + self.fast_rank(p2r)
3396 else: # merge node
3402 else: # merge node
3397 if rustdagop is not None and self.index.rust_ext_compat:
3403 if rustdagop is not None and self.index.rust_ext_compat:
3398 rank = rustdagop.rank(self.index, p1r, p2r)
3404 rank = rustdagop.rank(self.index, p1r, p2r)
3399 else:
3405 else:
3400 pmin, pmax = sorted((p1r, p2r))
3406 pmin, pmax = sorted((p1r, p2r))
3401 rank = 1 + self.fast_rank(pmax)
3407 rank = 1 + self.fast_rank(pmax)
3402 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3408 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3403
3409
3404 e = revlogutils.entry(
3410 e = revlogutils.entry(
3405 flags=flags,
3411 flags=flags,
3406 data_offset=offset,
3412 data_offset=offset,
3407 data_compressed_length=deltainfo.deltalen,
3413 data_compressed_length=deltainfo.deltalen,
3408 data_uncompressed_length=textlen,
3414 data_uncompressed_length=textlen,
3409 data_compression_mode=compression_mode,
3415 data_compression_mode=compression_mode,
3410 data_delta_base=deltainfo.base,
3416 data_delta_base=deltainfo.base,
3411 link_rev=link,
3417 link_rev=link,
3412 parent_rev_1=p1r,
3418 parent_rev_1=p1r,
3413 parent_rev_2=p2r,
3419 parent_rev_2=p2r,
3414 node_id=node,
3420 node_id=node,
3415 sidedata_offset=sidedata_offset,
3421 sidedata_offset=sidedata_offset,
3416 sidedata_compressed_length=len(serialized_sidedata),
3422 sidedata_compressed_length=len(serialized_sidedata),
3417 sidedata_compression_mode=sidedata_compression_mode,
3423 sidedata_compression_mode=sidedata_compression_mode,
3418 rank=rank,
3424 rank=rank,
3419 )
3425 )
3420
3426
3421 self.index.append(e)
3427 self.index.append(e)
3422 entry = self.index.entry_binary(curr)
3428 entry = self.index.entry_binary(curr)
3423 if curr == 0 and self._docket is None:
3429 if curr == 0 and self._docket is None:
3424 header = self._format_flags | self._format_version
3430 header = self._format_flags | self._format_version
3425 header = self.index.pack_header(header)
3431 header = self.index.pack_header(header)
3426 entry = header + entry
3432 entry = header + entry
3427 self._writeentry(
3433 self._writeentry(
3428 transaction,
3434 transaction,
3429 entry,
3435 entry,
3430 deltainfo.data,
3436 deltainfo.data,
3431 link,
3437 link,
3432 offset,
3438 offset,
3433 serialized_sidedata,
3439 serialized_sidedata,
3434 sidedata_offset,
3440 sidedata_offset,
3435 )
3441 )
3436
3442
3437 rawtext = btext[0]
3443 rawtext = btext[0]
3438
3444
3439 if alwayscache and rawtext is None:
3445 if alwayscache and rawtext is None:
3440 rawtext = deltacomputer.buildtext(revinfo)
3446 rawtext = deltacomputer.buildtext(revinfo)
3441
3447
3442 if type(rawtext) == bytes: # only accept immutable objects
3448 if type(rawtext) == bytes: # only accept immutable objects
3443 self._inner._revisioncache = (node, curr, rawtext)
3449 self._inner._revisioncache = (node, curr, rawtext)
3444 self._chainbasecache[curr] = deltainfo.chainbase
3450 self._chainbasecache[curr] = deltainfo.chainbase
3445 return curr
3451 return curr
3446
3452
3447 def _get_data_offset(self, prev):
3453 def _get_data_offset(self, prev):
3448 """Returns the current offset in the (in-transaction) data file.
3454 """Returns the current offset in the (in-transaction) data file.
3449 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3455 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3450 file to store that information: since sidedata can be rewritten to the
3456 file to store that information: since sidedata can be rewritten to the
3451 end of the data file within a transaction, you can have cases where, for
3457 end of the data file within a transaction, you can have cases where, for
3452 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3458 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3453 to `n - 1`'s sidedata being written after `n`'s data.
3459 to `n - 1`'s sidedata being written after `n`'s data.
3454
3460
3455 TODO cache this in a docket file before getting out of experimental."""
3461 TODO cache this in a docket file before getting out of experimental."""
3456 if self._docket is None:
3462 if self._docket is None:
3457 return self.end(prev)
3463 return self.end(prev)
3458 else:
3464 else:
3459 return self._docket.data_end
3465 return self._docket.data_end
3460
3466
3461 def _writeentry(
3467 def _writeentry(
3462 self,
3468 self,
3463 transaction,
3469 transaction,
3464 entry,
3470 entry,
3465 data,
3471 data,
3466 link,
3472 link,
3467 offset,
3473 offset,
3468 sidedata,
3474 sidedata,
3469 sidedata_offset,
3475 sidedata_offset,
3470 ):
3476 ):
3471 # Files opened in a+ mode have inconsistent behavior on various
3477 # Files opened in a+ mode have inconsistent behavior on various
3472 # platforms. Windows requires that a file positioning call be made
3478 # platforms. Windows requires that a file positioning call be made
3473 # when the file handle transitions between reads and writes. See
3479 # when the file handle transitions between reads and writes. See
3474 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3480 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3475 # platforms, Python or the platform itself can be buggy. Some versions
3481 # platforms, Python or the platform itself can be buggy. Some versions
3476 # of Solaris have been observed to not append at the end of the file
3482 # of Solaris have been observed to not append at the end of the file
3477 # if the file was seeked to before the end. See issue4943 for more.
3483 # if the file was seeked to before the end. See issue4943 for more.
3478 #
3484 #
3479 # We work around this issue by inserting a seek() before writing.
3485 # We work around this issue by inserting a seek() before writing.
3480 # Note: This is likely not necessary on Python 3. However, because
3486 # Note: This is likely not necessary on Python 3. However, because
3481 # the file handle is reused for reads and may be seeked there, we need
3487 # the file handle is reused for reads and may be seeked there, we need
3482 # to be careful before changing this.
3488 # to be careful before changing this.
3483 index_end = data_end = sidedata_end = None
3489 index_end = data_end = sidedata_end = None
3484 if self._docket is not None:
3490 if self._docket is not None:
3485 index_end = self._docket.index_end
3491 index_end = self._docket.index_end
3486 data_end = self._docket.data_end
3492 data_end = self._docket.data_end
3487 sidedata_end = self._docket.sidedata_end
3493 sidedata_end = self._docket.sidedata_end
3488
3494
3489 files_end = self._inner.write_entry(
3495 files_end = self._inner.write_entry(
3490 transaction,
3496 transaction,
3491 entry,
3497 entry,
3492 data,
3498 data,
3493 link,
3499 link,
3494 offset,
3500 offset,
3495 sidedata,
3501 sidedata,
3496 sidedata_offset,
3502 sidedata_offset,
3497 index_end,
3503 index_end,
3498 data_end,
3504 data_end,
3499 sidedata_end,
3505 sidedata_end,
3500 )
3506 )
3501 self._enforceinlinesize(transaction)
3507 self._enforceinlinesize(transaction)
3502 if self._docket is not None:
3508 if self._docket is not None:
3503 self._docket.index_end = files_end[0]
3509 self._docket.index_end = files_end[0]
3504 self._docket.data_end = files_end[1]
3510 self._docket.data_end = files_end[1]
3505 self._docket.sidedata_end = files_end[2]
3511 self._docket.sidedata_end = files_end[2]
3506
3512
3507 nodemaputil.setup_persistent_nodemap(transaction, self)
3513 nodemaputil.setup_persistent_nodemap(transaction, self)
3508
3514
3509 def addgroup(
3515 def addgroup(
3510 self,
3516 self,
3511 deltas,
3517 deltas,
3512 linkmapper,
3518 linkmapper,
3513 transaction,
3519 transaction,
3514 alwayscache=False,
3520 alwayscache=False,
3515 addrevisioncb=None,
3521 addrevisioncb=None,
3516 duplicaterevisioncb=None,
3522 duplicaterevisioncb=None,
3517 debug_info=None,
3523 debug_info=None,
3518 delta_base_reuse_policy=None,
3524 delta_base_reuse_policy=None,
3519 ):
3525 ):
3520 """
3526 """
3521 add a delta group
3527 add a delta group
3522
3528
3523 given a set of deltas, add them to the revision log. the
3529 given a set of deltas, add them to the revision log. the
3524 first delta is against its parent, which should be in our
3530 first delta is against its parent, which should be in our
3525 log, the rest are against the previous delta.
3531 log, the rest are against the previous delta.
3526
3532
3527 If ``addrevisioncb`` is defined, it will be called with arguments of
3533 If ``addrevisioncb`` is defined, it will be called with arguments of
3528 this revlog and the node that was added.
3534 this revlog and the node that was added.
3529 """
3535 """
3530
3536
3531 if self._adding_group:
3537 if self._adding_group:
3532 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3538 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3533
3539
3534 # read the default delta-base reuse policy from revlog config if the
3540 # read the default delta-base reuse policy from revlog config if the
3535 # group did not specify one.
3541 # group did not specify one.
3536 if delta_base_reuse_policy is None:
3542 if delta_base_reuse_policy is None:
3537 if (
3543 if (
3538 self.delta_config.general_delta
3544 self.delta_config.general_delta
3539 and self.delta_config.lazy_delta_base
3545 and self.delta_config.lazy_delta_base
3540 ):
3546 ):
3541 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3547 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3542 else:
3548 else:
3543 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3549 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3544
3550
3545 self._adding_group = True
3551 self._adding_group = True
3546 empty = True
3552 empty = True
3547 try:
3553 try:
3548 with self._writing(transaction):
3554 with self._writing(transaction):
3549 write_debug = None
3555 write_debug = None
3550 if self.delta_config.debug_delta:
3556 if self.delta_config.debug_delta:
3551 write_debug = transaction._report
3557 write_debug = transaction._report
3552 deltacomputer = deltautil.deltacomputer(
3558 deltacomputer = deltautil.deltacomputer(
3553 self,
3559 self,
3554 write_debug=write_debug,
3560 write_debug=write_debug,
3555 debug_info=debug_info,
3561 debug_info=debug_info,
3556 )
3562 )
3557 # loop through our set of deltas
3563 # loop through our set of deltas
3558 for data in deltas:
3564 for data in deltas:
3559 (
3565 (
3560 node,
3566 node,
3561 p1,
3567 p1,
3562 p2,
3568 p2,
3563 linknode,
3569 linknode,
3564 deltabase,
3570 deltabase,
3565 delta,
3571 delta,
3566 flags,
3572 flags,
3567 sidedata,
3573 sidedata,
3568 ) = data
3574 ) = data
3569 link = linkmapper(linknode)
3575 link = linkmapper(linknode)
3570 flags = flags or REVIDX_DEFAULT_FLAGS
3576 flags = flags or REVIDX_DEFAULT_FLAGS
3571
3577
3572 rev = self.index.get_rev(node)
3578 rev = self.index.get_rev(node)
3573 if rev is not None:
3579 if rev is not None:
3574 # this can happen if two branches make the same change
3580 # this can happen if two branches make the same change
3575 self._nodeduplicatecallback(transaction, rev)
3581 self._nodeduplicatecallback(transaction, rev)
3576 if duplicaterevisioncb:
3582 if duplicaterevisioncb:
3577 duplicaterevisioncb(self, rev)
3583 duplicaterevisioncb(self, rev)
3578 empty = False
3584 empty = False
3579 continue
3585 continue
3580
3586
3581 for p in (p1, p2):
3587 for p in (p1, p2):
3582 if not self.index.has_node(p):
3588 if not self.index.has_node(p):
3583 raise error.LookupError(
3589 raise error.LookupError(
3584 p, self.radix, _(b'unknown parent')
3590 p, self.radix, _(b'unknown parent')
3585 )
3591 )
3586
3592
3587 if not self.index.has_node(deltabase):
3593 if not self.index.has_node(deltabase):
3588 raise error.LookupError(
3594 raise error.LookupError(
3589 deltabase, self.display_id, _(b'unknown delta base')
3595 deltabase, self.display_id, _(b'unknown delta base')
3590 )
3596 )
3591
3597
3592 baserev = self.rev(deltabase)
3598 baserev = self.rev(deltabase)
3593
3599
3594 if baserev != nullrev and self.iscensored(baserev):
3600 if baserev != nullrev and self.iscensored(baserev):
3595 # if base is censored, delta must be full replacement in a
3601 # if base is censored, delta must be full replacement in a
3596 # single patch operation
3602 # single patch operation
3597 hlen = struct.calcsize(b">lll")
3603 hlen = struct.calcsize(b">lll")
3598 oldlen = self.rawsize(baserev)
3604 oldlen = self.rawsize(baserev)
3599 newlen = len(delta) - hlen
3605 newlen = len(delta) - hlen
3600 if delta[:hlen] != mdiff.replacediffheader(
3606 if delta[:hlen] != mdiff.replacediffheader(
3601 oldlen, newlen
3607 oldlen, newlen
3602 ):
3608 ):
3603 raise error.CensoredBaseError(
3609 raise error.CensoredBaseError(
3604 self.display_id, self.node(baserev)
3610 self.display_id, self.node(baserev)
3605 )
3611 )
3606
3612
3607 if not flags and self._peek_iscensored(baserev, delta):
3613 if not flags and self._peek_iscensored(baserev, delta):
3608 flags |= REVIDX_ISCENSORED
3614 flags |= REVIDX_ISCENSORED
3609
3615
3610 # We assume consumers of addrevisioncb will want to retrieve
3616 # We assume consumers of addrevisioncb will want to retrieve
3611 # the added revision, which will require a call to
3617 # the added revision, which will require a call to
3612 # revision(). revision() will fast path if there is a cache
3618 # revision(). revision() will fast path if there is a cache
3613 # hit. So, we tell _addrevision() to always cache in this case.
3619 # hit. So, we tell _addrevision() to always cache in this case.
3614 # We're only using addgroup() in the context of changegroup
3620 # We're only using addgroup() in the context of changegroup
3615 # generation so the revision data can always be handled as raw
3621 # generation so the revision data can always be handled as raw
3616 # by the flagprocessor.
3622 # by the flagprocessor.
3617 rev = self._addrevision(
3623 rev = self._addrevision(
3618 node,
3624 node,
3619 None,
3625 None,
3620 transaction,
3626 transaction,
3621 link,
3627 link,
3622 p1,
3628 p1,
3623 p2,
3629 p2,
3624 flags,
3630 flags,
3625 (baserev, delta, delta_base_reuse_policy),
3631 (baserev, delta, delta_base_reuse_policy),
3626 alwayscache=alwayscache,
3632 alwayscache=alwayscache,
3627 deltacomputer=deltacomputer,
3633 deltacomputer=deltacomputer,
3628 sidedata=sidedata,
3634 sidedata=sidedata,
3629 )
3635 )
3630
3636
3631 if addrevisioncb:
3637 if addrevisioncb:
3632 addrevisioncb(self, rev)
3638 addrevisioncb(self, rev)
3633 empty = False
3639 empty = False
3634 finally:
3640 finally:
3635 self._adding_group = False
3641 self._adding_group = False
3636 return not empty
3642 return not empty
3637
3643
3638 def iscensored(self, rev):
3644 def iscensored(self, rev):
3639 """Check if a file revision is censored."""
3645 """Check if a file revision is censored."""
3640 if not self.feature_config.censorable:
3646 if not self.feature_config.censorable:
3641 return False
3647 return False
3642
3648
3643 return self.flags(rev) & REVIDX_ISCENSORED
3649 return self.flags(rev) & REVIDX_ISCENSORED
3644
3650
3645 def _peek_iscensored(self, baserev, delta):
3651 def _peek_iscensored(self, baserev, delta):
3646 """Quickly check if a delta produces a censored revision."""
3652 """Quickly check if a delta produces a censored revision."""
3647 if not self.feature_config.censorable:
3653 if not self.feature_config.censorable:
3648 return False
3654 return False
3649
3655
3650 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3656 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3651
3657
3652 def getstrippoint(self, minlink):
3658 def getstrippoint(self, minlink):
3653 """find the minimum rev that must be stripped to strip the linkrev
3659 """find the minimum rev that must be stripped to strip the linkrev
3654
3660
3655 Returns a tuple containing the minimum rev and a set of all revs that
3661 Returns a tuple containing the minimum rev and a set of all revs that
3656 have linkrevs that will be broken by this strip.
3662 have linkrevs that will be broken by this strip.
3657 """
3663 """
3658 return storageutil.resolvestripinfo(
3664 return storageutil.resolvestripinfo(
3659 minlink,
3665 minlink,
3660 len(self) - 1,
3666 len(self) - 1,
3661 self.headrevs(),
3667 self.headrevs(),
3662 self.linkrev,
3668 self.linkrev,
3663 self.parentrevs,
3669 self.parentrevs,
3664 )
3670 )
3665
3671
3666 def strip(self, minlink, transaction):
3672 def strip(self, minlink, transaction):
3667 """truncate the revlog on the first revision with a linkrev >= minlink
3673 """truncate the revlog on the first revision with a linkrev >= minlink
3668
3674
3669 This function is called when we're stripping revision minlink and
3675 This function is called when we're stripping revision minlink and
3670 its descendants from the repository.
3676 its descendants from the repository.
3671
3677
3672 We have to remove all revisions with linkrev >= minlink, because
3678 We have to remove all revisions with linkrev >= minlink, because
3673 the equivalent changelog revisions will be renumbered after the
3679 the equivalent changelog revisions will be renumbered after the
3674 strip.
3680 strip.
3675
3681
3676 So we truncate the revlog on the first of these revisions, and
3682 So we truncate the revlog on the first of these revisions, and
3677 trust that the caller has saved the revisions that shouldn't be
3683 trust that the caller has saved the revisions that shouldn't be
3678 removed and that it'll re-add them after this truncation.
3684 removed and that it'll re-add them after this truncation.
3679 """
3685 """
3680 if len(self) == 0:
3686 if len(self) == 0:
3681 return
3687 return
3682
3688
3683 rev, _ = self.getstrippoint(minlink)
3689 rev, _ = self.getstrippoint(minlink)
3684 if rev == len(self):
3690 if rev == len(self):
3685 return
3691 return
3686
3692
3687 # first truncate the files on disk
3693 # first truncate the files on disk
3688 data_end = self.start(rev)
3694 data_end = self.start(rev)
3689 if not self._inline:
3695 if not self._inline:
3690 transaction.add(self._datafile, data_end)
3696 transaction.add(self._datafile, data_end)
3691 end = rev * self.index.entry_size
3697 end = rev * self.index.entry_size
3692 else:
3698 else:
3693 end = data_end + (rev * self.index.entry_size)
3699 end = data_end + (rev * self.index.entry_size)
3694
3700
3695 if self._sidedatafile:
3701 if self._sidedatafile:
3696 sidedata_end = self.sidedata_cut_off(rev)
3702 sidedata_end = self.sidedata_cut_off(rev)
3697 transaction.add(self._sidedatafile, sidedata_end)
3703 transaction.add(self._sidedatafile, sidedata_end)
3698
3704
3699 transaction.add(self._indexfile, end)
3705 transaction.add(self._indexfile, end)
3700 if self._docket is not None:
3706 if self._docket is not None:
3701 # XXX we could, leverage the docket while stripping. However it is
3707 # XXX we could, leverage the docket while stripping. However it is
3702 # not powerfull enough at the time of this comment
3708 # not powerfull enough at the time of this comment
3703 self._docket.index_end = end
3709 self._docket.index_end = end
3704 self._docket.data_end = data_end
3710 self._docket.data_end = data_end
3705 self._docket.sidedata_end = sidedata_end
3711 self._docket.sidedata_end = sidedata_end
3706 self._docket.write(transaction, stripping=True)
3712 self._docket.write(transaction, stripping=True)
3707
3713
3708 # then reset internal state in memory to forget those revisions
3714 # then reset internal state in memory to forget those revisions
3709 self._chaininfocache = util.lrucachedict(500)
3715 self._chaininfocache = util.lrucachedict(500)
3710 self._inner.clear_cache()
3716 self._inner.clear_cache()
3711
3717
3712 del self.index[rev:-1]
3718 del self.index[rev:-1]
3713
3719
3714 def checksize(self):
3720 def checksize(self):
3715 """Check size of index and data files
3721 """Check size of index and data files
3716
3722
3717 return a (dd, di) tuple.
3723 return a (dd, di) tuple.
3718 - dd: extra bytes for the "data" file
3724 - dd: extra bytes for the "data" file
3719 - di: extra bytes for the "index" file
3725 - di: extra bytes for the "index" file
3720
3726
3721 A healthy revlog will return (0, 0).
3727 A healthy revlog will return (0, 0).
3722 """
3728 """
3723 expected = 0
3729 expected = 0
3724 if len(self):
3730 if len(self):
3725 expected = max(0, self.end(len(self) - 1))
3731 expected = max(0, self.end(len(self) - 1))
3726
3732
3727 try:
3733 try:
3728 with self._datafp() as f:
3734 with self._datafp() as f:
3729 f.seek(0, io.SEEK_END)
3735 f.seek(0, io.SEEK_END)
3730 actual = f.tell()
3736 actual = f.tell()
3731 dd = actual - expected
3737 dd = actual - expected
3732 except FileNotFoundError:
3738 except FileNotFoundError:
3733 dd = 0
3739 dd = 0
3734
3740
3735 try:
3741 try:
3736 f = self.opener(self._indexfile)
3742 f = self.opener(self._indexfile)
3737 f.seek(0, io.SEEK_END)
3743 f.seek(0, io.SEEK_END)
3738 actual = f.tell()
3744 actual = f.tell()
3739 f.close()
3745 f.close()
3740 s = self.index.entry_size
3746 s = self.index.entry_size
3741 i = max(0, actual // s)
3747 i = max(0, actual // s)
3742 di = actual - (i * s)
3748 di = actual - (i * s)
3743 if self._inline:
3749 if self._inline:
3744 databytes = 0
3750 databytes = 0
3745 for r in self:
3751 for r in self:
3746 databytes += max(0, self.length(r))
3752 databytes += max(0, self.length(r))
3747 dd = 0
3753 dd = 0
3748 di = actual - len(self) * s - databytes
3754 di = actual - len(self) * s - databytes
3749 except FileNotFoundError:
3755 except FileNotFoundError:
3750 di = 0
3756 di = 0
3751
3757
3752 return (dd, di)
3758 return (dd, di)
3753
3759
3754 def files(self):
3760 def files(self):
3755 """return list of files that compose this revlog"""
3761 """return list of files that compose this revlog"""
3756 res = [self._indexfile]
3762 res = [self._indexfile]
3757 if self._docket_file is None:
3763 if self._docket_file is None:
3758 if not self._inline:
3764 if not self._inline:
3759 res.append(self._datafile)
3765 res.append(self._datafile)
3760 else:
3766 else:
3761 res.append(self._docket_file)
3767 res.append(self._docket_file)
3762 res.extend(self._docket.old_index_filepaths(include_empty=False))
3768 res.extend(self._docket.old_index_filepaths(include_empty=False))
3763 if self._docket.data_end:
3769 if self._docket.data_end:
3764 res.append(self._datafile)
3770 res.append(self._datafile)
3765 res.extend(self._docket.old_data_filepaths(include_empty=False))
3771 res.extend(self._docket.old_data_filepaths(include_empty=False))
3766 if self._docket.sidedata_end:
3772 if self._docket.sidedata_end:
3767 res.append(self._sidedatafile)
3773 res.append(self._sidedatafile)
3768 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3774 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3769 return res
3775 return res
3770
3776
3771 def emitrevisions(
3777 def emitrevisions(
3772 self,
3778 self,
3773 nodes,
3779 nodes,
3774 nodesorder=None,
3780 nodesorder=None,
3775 revisiondata=False,
3781 revisiondata=False,
3776 assumehaveparentrevisions=False,
3782 assumehaveparentrevisions=False,
3777 deltamode=repository.CG_DELTAMODE_STD,
3783 deltamode=repository.CG_DELTAMODE_STD,
3778 sidedata_helpers=None,
3784 sidedata_helpers=None,
3779 debug_info=None,
3785 debug_info=None,
3780 ):
3786 ):
3781 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3787 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3782 raise error.ProgrammingError(
3788 raise error.ProgrammingError(
3783 b'unhandled value for nodesorder: %s' % nodesorder
3789 b'unhandled value for nodesorder: %s' % nodesorder
3784 )
3790 )
3785
3791
3786 if nodesorder is None and not self.delta_config.general_delta:
3792 if nodesorder is None and not self.delta_config.general_delta:
3787 nodesorder = b'storage'
3793 nodesorder = b'storage'
3788
3794
3789 if (
3795 if (
3790 not self._storedeltachains
3796 not self._storedeltachains
3791 and deltamode != repository.CG_DELTAMODE_PREV
3797 and deltamode != repository.CG_DELTAMODE_PREV
3792 ):
3798 ):
3793 deltamode = repository.CG_DELTAMODE_FULL
3799 deltamode = repository.CG_DELTAMODE_FULL
3794
3800
3795 return storageutil.emitrevisions(
3801 return storageutil.emitrevisions(
3796 self,
3802 self,
3797 nodes,
3803 nodes,
3798 nodesorder,
3804 nodesorder,
3799 revlogrevisiondelta,
3805 revlogrevisiondelta,
3800 deltaparentfn=self.deltaparent,
3806 deltaparentfn=self.deltaparent,
3801 candeltafn=self._candelta,
3807 candeltafn=self._candelta,
3802 rawsizefn=self.rawsize,
3808 rawsizefn=self.rawsize,
3803 revdifffn=self.revdiff,
3809 revdifffn=self.revdiff,
3804 flagsfn=self.flags,
3810 flagsfn=self.flags,
3805 deltamode=deltamode,
3811 deltamode=deltamode,
3806 revisiondata=revisiondata,
3812 revisiondata=revisiondata,
3807 assumehaveparentrevisions=assumehaveparentrevisions,
3813 assumehaveparentrevisions=assumehaveparentrevisions,
3808 sidedata_helpers=sidedata_helpers,
3814 sidedata_helpers=sidedata_helpers,
3809 debug_info=debug_info,
3815 debug_info=debug_info,
3810 )
3816 )
3811
3817
3812 DELTAREUSEALWAYS = b'always'
3818 DELTAREUSEALWAYS = b'always'
3813 DELTAREUSESAMEREVS = b'samerevs'
3819 DELTAREUSESAMEREVS = b'samerevs'
3814 DELTAREUSENEVER = b'never'
3820 DELTAREUSENEVER = b'never'
3815
3821
3816 DELTAREUSEFULLADD = b'fulladd'
3822 DELTAREUSEFULLADD = b'fulladd'
3817
3823
3818 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3824 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3819
3825
3820 def clone(
3826 def clone(
3821 self,
3827 self,
3822 tr,
3828 tr,
3823 destrevlog,
3829 destrevlog,
3824 addrevisioncb=None,
3830 addrevisioncb=None,
3825 deltareuse=DELTAREUSESAMEREVS,
3831 deltareuse=DELTAREUSESAMEREVS,
3826 forcedeltabothparents=None,
3832 forcedeltabothparents=None,
3827 sidedata_helpers=None,
3833 sidedata_helpers=None,
3828 ):
3834 ):
3829 """Copy this revlog to another, possibly with format changes.
3835 """Copy this revlog to another, possibly with format changes.
3830
3836
3831 The destination revlog will contain the same revisions and nodes.
3837 The destination revlog will contain the same revisions and nodes.
3832 However, it may not be bit-for-bit identical due to e.g. delta encoding
3838 However, it may not be bit-for-bit identical due to e.g. delta encoding
3833 differences.
3839 differences.
3834
3840
3835 The ``deltareuse`` argument control how deltas from the existing revlog
3841 The ``deltareuse`` argument control how deltas from the existing revlog
3836 are preserved in the destination revlog. The argument can have the
3842 are preserved in the destination revlog. The argument can have the
3837 following values:
3843 following values:
3838
3844
3839 DELTAREUSEALWAYS
3845 DELTAREUSEALWAYS
3840 Deltas will always be reused (if possible), even if the destination
3846 Deltas will always be reused (if possible), even if the destination
3841 revlog would not select the same revisions for the delta. This is the
3847 revlog would not select the same revisions for the delta. This is the
3842 fastest mode of operation.
3848 fastest mode of operation.
3843 DELTAREUSESAMEREVS
3849 DELTAREUSESAMEREVS
3844 Deltas will be reused if the destination revlog would pick the same
3850 Deltas will be reused if the destination revlog would pick the same
3845 revisions for the delta. This mode strikes a balance between speed
3851 revisions for the delta. This mode strikes a balance between speed
3846 and optimization.
3852 and optimization.
3847 DELTAREUSENEVER
3853 DELTAREUSENEVER
3848 Deltas will never be reused. This is the slowest mode of execution.
3854 Deltas will never be reused. This is the slowest mode of execution.
3849 This mode can be used to recompute deltas (e.g. if the diff/delta
3855 This mode can be used to recompute deltas (e.g. if the diff/delta
3850 algorithm changes).
3856 algorithm changes).
3851 DELTAREUSEFULLADD
3857 DELTAREUSEFULLADD
3852 Revision will be re-added as if their were new content. This is
3858 Revision will be re-added as if their were new content. This is
3853 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3859 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3854 eg: large file detection and handling.
3860 eg: large file detection and handling.
3855
3861
3856 Delta computation can be slow, so the choice of delta reuse policy can
3862 Delta computation can be slow, so the choice of delta reuse policy can
3857 significantly affect run time.
3863 significantly affect run time.
3858
3864
3859 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3865 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3860 two extremes. Deltas will be reused if they are appropriate. But if the
3866 two extremes. Deltas will be reused if they are appropriate. But if the
3861 delta could choose a better revision, it will do so. This means if you
3867 delta could choose a better revision, it will do so. This means if you
3862 are converting a non-generaldelta revlog to a generaldelta revlog,
3868 are converting a non-generaldelta revlog to a generaldelta revlog,
3863 deltas will be recomputed if the delta's parent isn't a parent of the
3869 deltas will be recomputed if the delta's parent isn't a parent of the
3864 revision.
3870 revision.
3865
3871
3866 In addition to the delta policy, the ``forcedeltabothparents``
3872 In addition to the delta policy, the ``forcedeltabothparents``
3867 argument controls whether to force compute deltas against both parents
3873 argument controls whether to force compute deltas against both parents
3868 for merges. By default, the current default is used.
3874 for merges. By default, the current default is used.
3869
3875
3870 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3876 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3871 `sidedata_helpers`.
3877 `sidedata_helpers`.
3872 """
3878 """
3873 if deltareuse not in self.DELTAREUSEALL:
3879 if deltareuse not in self.DELTAREUSEALL:
3874 raise ValueError(
3880 raise ValueError(
3875 _(b'value for deltareuse invalid: %s') % deltareuse
3881 _(b'value for deltareuse invalid: %s') % deltareuse
3876 )
3882 )
3877
3883
3878 if len(destrevlog):
3884 if len(destrevlog):
3879 raise ValueError(_(b'destination revlog is not empty'))
3885 raise ValueError(_(b'destination revlog is not empty'))
3880
3886
3881 if getattr(self, 'filteredrevs', None):
3887 if getattr(self, 'filteredrevs', None):
3882 raise ValueError(_(b'source revlog has filtered revisions'))
3888 raise ValueError(_(b'source revlog has filtered revisions'))
3883 if getattr(destrevlog, 'filteredrevs', None):
3889 if getattr(destrevlog, 'filteredrevs', None):
3884 raise ValueError(_(b'destination revlog has filtered revisions'))
3890 raise ValueError(_(b'destination revlog has filtered revisions'))
3885
3891
3886 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3892 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3887 # if possible.
3893 # if possible.
3888 old_delta_config = destrevlog.delta_config
3894 old_delta_config = destrevlog.delta_config
3889 destrevlog.delta_config = destrevlog.delta_config.copy()
3895 destrevlog.delta_config = destrevlog.delta_config.copy()
3890
3896
3891 try:
3897 try:
3892 if deltareuse == self.DELTAREUSEALWAYS:
3898 if deltareuse == self.DELTAREUSEALWAYS:
3893 destrevlog.delta_config.lazy_delta_base = True
3899 destrevlog.delta_config.lazy_delta_base = True
3894 destrevlog.delta_config.lazy_delta = True
3900 destrevlog.delta_config.lazy_delta = True
3895 elif deltareuse == self.DELTAREUSESAMEREVS:
3901 elif deltareuse == self.DELTAREUSESAMEREVS:
3896 destrevlog.delta_config.lazy_delta_base = False
3902 destrevlog.delta_config.lazy_delta_base = False
3897 destrevlog.delta_config.lazy_delta = True
3903 destrevlog.delta_config.lazy_delta = True
3898 elif deltareuse == self.DELTAREUSENEVER:
3904 elif deltareuse == self.DELTAREUSENEVER:
3899 destrevlog.delta_config.lazy_delta_base = False
3905 destrevlog.delta_config.lazy_delta_base = False
3900 destrevlog.delta_config.lazy_delta = False
3906 destrevlog.delta_config.lazy_delta = False
3901
3907
3902 delta_both_parents = (
3908 delta_both_parents = (
3903 forcedeltabothparents or old_delta_config.delta_both_parents
3909 forcedeltabothparents or old_delta_config.delta_both_parents
3904 )
3910 )
3905 destrevlog.delta_config.delta_both_parents = delta_both_parents
3911 destrevlog.delta_config.delta_both_parents = delta_both_parents
3906
3912
3907 with self.reading(), destrevlog._writing(tr):
3913 with self.reading(), destrevlog._writing(tr):
3908 self._clone(
3914 self._clone(
3909 tr,
3915 tr,
3910 destrevlog,
3916 destrevlog,
3911 addrevisioncb,
3917 addrevisioncb,
3912 deltareuse,
3918 deltareuse,
3913 forcedeltabothparents,
3919 forcedeltabothparents,
3914 sidedata_helpers,
3920 sidedata_helpers,
3915 )
3921 )
3916
3922
3917 finally:
3923 finally:
3918 destrevlog.delta_config = old_delta_config
3924 destrevlog.delta_config = old_delta_config
3919
3925
3920 def _clone(
3926 def _clone(
3921 self,
3927 self,
3922 tr,
3928 tr,
3923 destrevlog,
3929 destrevlog,
3924 addrevisioncb,
3930 addrevisioncb,
3925 deltareuse,
3931 deltareuse,
3926 forcedeltabothparents,
3932 forcedeltabothparents,
3927 sidedata_helpers,
3933 sidedata_helpers,
3928 ):
3934 ):
3929 """perform the core duty of `revlog.clone` after parameter processing"""
3935 """perform the core duty of `revlog.clone` after parameter processing"""
3930 write_debug = None
3936 write_debug = None
3931 if self.delta_config.debug_delta:
3937 if self.delta_config.debug_delta:
3932 write_debug = tr._report
3938 write_debug = tr._report
3933 deltacomputer = deltautil.deltacomputer(
3939 deltacomputer = deltautil.deltacomputer(
3934 destrevlog,
3940 destrevlog,
3935 write_debug=write_debug,
3941 write_debug=write_debug,
3936 )
3942 )
3937 index = self.index
3943 index = self.index
3938 for rev in self:
3944 for rev in self:
3939 entry = index[rev]
3945 entry = index[rev]
3940
3946
3941 # Some classes override linkrev to take filtered revs into
3947 # Some classes override linkrev to take filtered revs into
3942 # account. Use raw entry from index.
3948 # account. Use raw entry from index.
3943 flags = entry[0] & 0xFFFF
3949 flags = entry[0] & 0xFFFF
3944 linkrev = entry[4]
3950 linkrev = entry[4]
3945 p1 = index[entry[5]][7]
3951 p1 = index[entry[5]][7]
3946 p2 = index[entry[6]][7]
3952 p2 = index[entry[6]][7]
3947 node = entry[7]
3953 node = entry[7]
3948
3954
3949 # (Possibly) reuse the delta from the revlog if allowed and
3955 # (Possibly) reuse the delta from the revlog if allowed and
3950 # the revlog chunk is a delta.
3956 # the revlog chunk is a delta.
3951 cachedelta = None
3957 cachedelta = None
3952 rawtext = None
3958 rawtext = None
3953 if deltareuse == self.DELTAREUSEFULLADD:
3959 if deltareuse == self.DELTAREUSEFULLADD:
3954 text = self._revisiondata(rev)
3960 text = self._revisiondata(rev)
3955 sidedata = self.sidedata(rev)
3961 sidedata = self.sidedata(rev)
3956
3962
3957 if sidedata_helpers is not None:
3963 if sidedata_helpers is not None:
3958 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3964 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3959 self, sidedata_helpers, sidedata, rev
3965 self, sidedata_helpers, sidedata, rev
3960 )
3966 )
3961 flags = flags | new_flags[0] & ~new_flags[1]
3967 flags = flags | new_flags[0] & ~new_flags[1]
3962
3968
3963 destrevlog.addrevision(
3969 destrevlog.addrevision(
3964 text,
3970 text,
3965 tr,
3971 tr,
3966 linkrev,
3972 linkrev,
3967 p1,
3973 p1,
3968 p2,
3974 p2,
3969 cachedelta=cachedelta,
3975 cachedelta=cachedelta,
3970 node=node,
3976 node=node,
3971 flags=flags,
3977 flags=flags,
3972 deltacomputer=deltacomputer,
3978 deltacomputer=deltacomputer,
3973 sidedata=sidedata,
3979 sidedata=sidedata,
3974 )
3980 )
3975 else:
3981 else:
3976 if destrevlog.delta_config.lazy_delta:
3982 if destrevlog.delta_config.lazy_delta:
3977 dp = self.deltaparent(rev)
3983 dp = self.deltaparent(rev)
3978 if dp != nullrev:
3984 if dp != nullrev:
3979 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3985 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3980
3986
3981 sidedata = None
3987 sidedata = None
3982 if not cachedelta:
3988 if not cachedelta:
3983 try:
3989 try:
3984 rawtext = self._revisiondata(rev)
3990 rawtext = self._revisiondata(rev)
3985 except error.CensoredNodeError as censored:
3991 except error.CensoredNodeError as censored:
3986 assert flags & REVIDX_ISCENSORED
3992 assert flags & REVIDX_ISCENSORED
3987 rawtext = censored.tombstone
3993 rawtext = censored.tombstone
3988 sidedata = self.sidedata(rev)
3994 sidedata = self.sidedata(rev)
3989 if sidedata is None:
3995 if sidedata is None:
3990 sidedata = self.sidedata(rev)
3996 sidedata = self.sidedata(rev)
3991
3997
3992 if sidedata_helpers is not None:
3998 if sidedata_helpers is not None:
3993 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3999 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3994 self, sidedata_helpers, sidedata, rev
4000 self, sidedata_helpers, sidedata, rev
3995 )
4001 )
3996 flags = flags | new_flags[0] & ~new_flags[1]
4002 flags = flags | new_flags[0] & ~new_flags[1]
3997
4003
3998 destrevlog._addrevision(
4004 destrevlog._addrevision(
3999 node,
4005 node,
4000 rawtext,
4006 rawtext,
4001 tr,
4007 tr,
4002 linkrev,
4008 linkrev,
4003 p1,
4009 p1,
4004 p2,
4010 p2,
4005 flags,
4011 flags,
4006 cachedelta,
4012 cachedelta,
4007 deltacomputer=deltacomputer,
4013 deltacomputer=deltacomputer,
4008 sidedata=sidedata,
4014 sidedata=sidedata,
4009 )
4015 )
4010
4016
4011 if addrevisioncb:
4017 if addrevisioncb:
4012 addrevisioncb(self, rev, node)
4018 addrevisioncb(self, rev, node)
4013
4019
4014 def censorrevision(self, tr, censornode, tombstone=b''):
4020 def censorrevision(self, tr, censornode, tombstone=b''):
4015 if self._format_version == REVLOGV0:
4021 if self._format_version == REVLOGV0:
4016 raise error.RevlogError(
4022 raise error.RevlogError(
4017 _(b'cannot censor with version %d revlogs')
4023 _(b'cannot censor with version %d revlogs')
4018 % self._format_version
4024 % self._format_version
4019 )
4025 )
4020 elif self._format_version == REVLOGV1:
4026 elif self._format_version == REVLOGV1:
4021 rewrite.v1_censor(self, tr, censornode, tombstone)
4027 rewrite.v1_censor(self, tr, censornode, tombstone)
4022 else:
4028 else:
4023 rewrite.v2_censor(self, tr, censornode, tombstone)
4029 rewrite.v2_censor(self, tr, censornode, tombstone)
4024
4030
4025 def verifyintegrity(self, state):
4031 def verifyintegrity(self, state):
4026 """Verifies the integrity of the revlog.
4032 """Verifies the integrity of the revlog.
4027
4033
4028 Yields ``revlogproblem`` instances describing problems that are
4034 Yields ``revlogproblem`` instances describing problems that are
4029 found.
4035 found.
4030 """
4036 """
4031 dd, di = self.checksize()
4037 dd, di = self.checksize()
4032 if dd:
4038 if dd:
4033 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
4039 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
4034 if di:
4040 if di:
4035 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
4041 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
4036
4042
4037 version = self._format_version
4043 version = self._format_version
4038
4044
4039 # The verifier tells us what version revlog we should be.
4045 # The verifier tells us what version revlog we should be.
4040 if version != state[b'expectedversion']:
4046 if version != state[b'expectedversion']:
4041 yield revlogproblem(
4047 yield revlogproblem(
4042 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
4048 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
4043 % (self.display_id, version, state[b'expectedversion'])
4049 % (self.display_id, version, state[b'expectedversion'])
4044 )
4050 )
4045
4051
4046 state[b'skipread'] = set()
4052 state[b'skipread'] = set()
4047 state[b'safe_renamed'] = set()
4053 state[b'safe_renamed'] = set()
4048
4054
4049 for rev in self:
4055 for rev in self:
4050 node = self.node(rev)
4056 node = self.node(rev)
4051
4057
4052 # Verify contents. 4 cases to care about:
4058 # Verify contents. 4 cases to care about:
4053 #
4059 #
4054 # common: the most common case
4060 # common: the most common case
4055 # rename: with a rename
4061 # rename: with a rename
4056 # meta: file content starts with b'\1\n', the metadata
4062 # meta: file content starts with b'\1\n', the metadata
4057 # header defined in filelog.py, but without a rename
4063 # header defined in filelog.py, but without a rename
4058 # ext: content stored externally
4064 # ext: content stored externally
4059 #
4065 #
4060 # More formally, their differences are shown below:
4066 # More formally, their differences are shown below:
4061 #
4067 #
4062 # | common | rename | meta | ext
4068 # | common | rename | meta | ext
4063 # -------------------------------------------------------
4069 # -------------------------------------------------------
4064 # flags() | 0 | 0 | 0 | not 0
4070 # flags() | 0 | 0 | 0 | not 0
4065 # renamed() | False | True | False | ?
4071 # renamed() | False | True | False | ?
4066 # rawtext[0:2]=='\1\n'| False | True | True | ?
4072 # rawtext[0:2]=='\1\n'| False | True | True | ?
4067 #
4073 #
4068 # "rawtext" means the raw text stored in revlog data, which
4074 # "rawtext" means the raw text stored in revlog data, which
4069 # could be retrieved by "rawdata(rev)". "text"
4075 # could be retrieved by "rawdata(rev)". "text"
4070 # mentioned below is "revision(rev)".
4076 # mentioned below is "revision(rev)".
4071 #
4077 #
4072 # There are 3 different lengths stored physically:
4078 # There are 3 different lengths stored physically:
4073 # 1. L1: rawsize, stored in revlog index
4079 # 1. L1: rawsize, stored in revlog index
4074 # 2. L2: len(rawtext), stored in revlog data
4080 # 2. L2: len(rawtext), stored in revlog data
4075 # 3. L3: len(text), stored in revlog data if flags==0, or
4081 # 3. L3: len(text), stored in revlog data if flags==0, or
4076 # possibly somewhere else if flags!=0
4082 # possibly somewhere else if flags!=0
4077 #
4083 #
4078 # L1 should be equal to L2. L3 could be different from them.
4084 # L1 should be equal to L2. L3 could be different from them.
4079 # "text" may or may not affect commit hash depending on flag
4085 # "text" may or may not affect commit hash depending on flag
4080 # processors (see flagutil.addflagprocessor).
4086 # processors (see flagutil.addflagprocessor).
4081 #
4087 #
4082 # | common | rename | meta | ext
4088 # | common | rename | meta | ext
4083 # -------------------------------------------------
4089 # -------------------------------------------------
4084 # rawsize() | L1 | L1 | L1 | L1
4090 # rawsize() | L1 | L1 | L1 | L1
4085 # size() | L1 | L2-LM | L1(*) | L1 (?)
4091 # size() | L1 | L2-LM | L1(*) | L1 (?)
4086 # len(rawtext) | L2 | L2 | L2 | L2
4092 # len(rawtext) | L2 | L2 | L2 | L2
4087 # len(text) | L2 | L2 | L2 | L3
4093 # len(text) | L2 | L2 | L2 | L3
4088 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
4094 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
4089 #
4095 #
4090 # LM: length of metadata, depending on rawtext
4096 # LM: length of metadata, depending on rawtext
4091 # (*): not ideal, see comment in filelog.size
4097 # (*): not ideal, see comment in filelog.size
4092 # (?): could be "- len(meta)" if the resolved content has
4098 # (?): could be "- len(meta)" if the resolved content has
4093 # rename metadata
4099 # rename metadata
4094 #
4100 #
4095 # Checks needed to be done:
4101 # Checks needed to be done:
4096 # 1. length check: L1 == L2, in all cases.
4102 # 1. length check: L1 == L2, in all cases.
4097 # 2. hash check: depending on flag processor, we may need to
4103 # 2. hash check: depending on flag processor, we may need to
4098 # use either "text" (external), or "rawtext" (in revlog).
4104 # use either "text" (external), or "rawtext" (in revlog).
4099
4105
4100 try:
4106 try:
4101 skipflags = state.get(b'skipflags', 0)
4107 skipflags = state.get(b'skipflags', 0)
4102 if skipflags:
4108 if skipflags:
4103 skipflags &= self.flags(rev)
4109 skipflags &= self.flags(rev)
4104
4110
4105 _verify_revision(self, skipflags, state, node)
4111 _verify_revision(self, skipflags, state, node)
4106
4112
4107 l1 = self.rawsize(rev)
4113 l1 = self.rawsize(rev)
4108 l2 = len(self.rawdata(node))
4114 l2 = len(self.rawdata(node))
4109
4115
4110 if l1 != l2:
4116 if l1 != l2:
4111 yield revlogproblem(
4117 yield revlogproblem(
4112 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
4118 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
4113 node=node,
4119 node=node,
4114 )
4120 )
4115
4121
4116 except error.CensoredNodeError:
4122 except error.CensoredNodeError:
4117 if state[b'erroroncensored']:
4123 if state[b'erroroncensored']:
4118 yield revlogproblem(
4124 yield revlogproblem(
4119 error=_(b'censored file data'), node=node
4125 error=_(b'censored file data'), node=node
4120 )
4126 )
4121 state[b'skipread'].add(node)
4127 state[b'skipread'].add(node)
4122 except Exception as e:
4128 except Exception as e:
4123 yield revlogproblem(
4129 yield revlogproblem(
4124 error=_(b'unpacking %s: %s')
4130 error=_(b'unpacking %s: %s')
4125 % (short(node), stringutil.forcebytestr(e)),
4131 % (short(node), stringutil.forcebytestr(e)),
4126 node=node,
4132 node=node,
4127 )
4133 )
4128 state[b'skipread'].add(node)
4134 state[b'skipread'].add(node)
4129
4135
4130 def storageinfo(
4136 def storageinfo(
4131 self,
4137 self,
4132 exclusivefiles=False,
4138 exclusivefiles=False,
4133 sharedfiles=False,
4139 sharedfiles=False,
4134 revisionscount=False,
4140 revisionscount=False,
4135 trackedsize=False,
4141 trackedsize=False,
4136 storedsize=False,
4142 storedsize=False,
4137 ):
4143 ):
4138 d = {}
4144 d = {}
4139
4145
4140 if exclusivefiles:
4146 if exclusivefiles:
4141 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4147 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4142 if not self._inline:
4148 if not self._inline:
4143 d[b'exclusivefiles'].append((self.opener, self._datafile))
4149 d[b'exclusivefiles'].append((self.opener, self._datafile))
4144
4150
4145 if sharedfiles:
4151 if sharedfiles:
4146 d[b'sharedfiles'] = []
4152 d[b'sharedfiles'] = []
4147
4153
4148 if revisionscount:
4154 if revisionscount:
4149 d[b'revisionscount'] = len(self)
4155 d[b'revisionscount'] = len(self)
4150
4156
4151 if trackedsize:
4157 if trackedsize:
4152 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4158 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4153
4159
4154 if storedsize:
4160 if storedsize:
4155 d[b'storedsize'] = sum(
4161 d[b'storedsize'] = sum(
4156 self.opener.stat(path).st_size for path in self.files()
4162 self.opener.stat(path).st_size for path in self.files()
4157 )
4163 )
4158
4164
4159 return d
4165 return d
4160
4166
4161 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4167 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4162 if not self.feature_config.has_side_data:
4168 if not self.feature_config.has_side_data:
4163 return
4169 return
4164 # revlog formats with sidedata support does not support inline
4170 # revlog formats with sidedata support does not support inline
4165 assert not self._inline
4171 assert not self._inline
4166 if not helpers[1] and not helpers[2]:
4172 if not helpers[1] and not helpers[2]:
4167 # Nothing to generate or remove
4173 # Nothing to generate or remove
4168 return
4174 return
4169
4175
4170 new_entries = []
4176 new_entries = []
4171 # append the new sidedata
4177 # append the new sidedata
4172 with self._writing(transaction):
4178 with self._writing(transaction):
4173 ifh, dfh, sdfh = self._inner._writinghandles
4179 ifh, dfh, sdfh = self._inner._writinghandles
4174 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4180 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4175
4181
4176 current_offset = sdfh.tell()
4182 current_offset = sdfh.tell()
4177 for rev in range(startrev, endrev + 1):
4183 for rev in range(startrev, endrev + 1):
4178 entry = self.index[rev]
4184 entry = self.index[rev]
4179 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4185 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4180 store=self,
4186 store=self,
4181 sidedata_helpers=helpers,
4187 sidedata_helpers=helpers,
4182 sidedata={},
4188 sidedata={},
4183 rev=rev,
4189 rev=rev,
4184 )
4190 )
4185
4191
4186 serialized_sidedata = sidedatautil.serialize_sidedata(
4192 serialized_sidedata = sidedatautil.serialize_sidedata(
4187 new_sidedata
4193 new_sidedata
4188 )
4194 )
4189
4195
4190 sidedata_compression_mode = COMP_MODE_INLINE
4196 sidedata_compression_mode = COMP_MODE_INLINE
4191 if serialized_sidedata and self.feature_config.has_side_data:
4197 if serialized_sidedata and self.feature_config.has_side_data:
4192 sidedata_compression_mode = COMP_MODE_PLAIN
4198 sidedata_compression_mode = COMP_MODE_PLAIN
4193 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4199 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4194 if (
4200 if (
4195 h != b'u'
4201 h != b'u'
4196 and comp_sidedata[0] != b'\0'
4202 and comp_sidedata[0] != b'\0'
4197 and len(comp_sidedata) < len(serialized_sidedata)
4203 and len(comp_sidedata) < len(serialized_sidedata)
4198 ):
4204 ):
4199 assert not h
4205 assert not h
4200 if (
4206 if (
4201 comp_sidedata[0]
4207 comp_sidedata[0]
4202 == self._docket.default_compression_header
4208 == self._docket.default_compression_header
4203 ):
4209 ):
4204 sidedata_compression_mode = COMP_MODE_DEFAULT
4210 sidedata_compression_mode = COMP_MODE_DEFAULT
4205 serialized_sidedata = comp_sidedata
4211 serialized_sidedata = comp_sidedata
4206 else:
4212 else:
4207 sidedata_compression_mode = COMP_MODE_INLINE
4213 sidedata_compression_mode = COMP_MODE_INLINE
4208 serialized_sidedata = comp_sidedata
4214 serialized_sidedata = comp_sidedata
4209 if entry[8] != 0 or entry[9] != 0:
4215 if entry[8] != 0 or entry[9] != 0:
4210 # rewriting entries that already have sidedata is not
4216 # rewriting entries that already have sidedata is not
4211 # supported yet, because it introduces garbage data in the
4217 # supported yet, because it introduces garbage data in the
4212 # revlog.
4218 # revlog.
4213 msg = b"rewriting existing sidedata is not supported yet"
4219 msg = b"rewriting existing sidedata is not supported yet"
4214 raise error.Abort(msg)
4220 raise error.Abort(msg)
4215
4221
4216 # Apply (potential) flags to add and to remove after running
4222 # Apply (potential) flags to add and to remove after running
4217 # the sidedata helpers
4223 # the sidedata helpers
4218 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4224 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4219 entry_update = (
4225 entry_update = (
4220 current_offset,
4226 current_offset,
4221 len(serialized_sidedata),
4227 len(serialized_sidedata),
4222 new_offset_flags,
4228 new_offset_flags,
4223 sidedata_compression_mode,
4229 sidedata_compression_mode,
4224 )
4230 )
4225
4231
4226 # the sidedata computation might have move the file cursors around
4232 # the sidedata computation might have move the file cursors around
4227 sdfh.seek(current_offset, os.SEEK_SET)
4233 sdfh.seek(current_offset, os.SEEK_SET)
4228 sdfh.write(serialized_sidedata)
4234 sdfh.write(serialized_sidedata)
4229 new_entries.append(entry_update)
4235 new_entries.append(entry_update)
4230 current_offset += len(serialized_sidedata)
4236 current_offset += len(serialized_sidedata)
4231 self._docket.sidedata_end = sdfh.tell()
4237 self._docket.sidedata_end = sdfh.tell()
4232
4238
4233 # rewrite the new index entries
4239 # rewrite the new index entries
4234 ifh.seek(startrev * self.index.entry_size)
4240 ifh.seek(startrev * self.index.entry_size)
4235 for i, e in enumerate(new_entries):
4241 for i, e in enumerate(new_entries):
4236 rev = startrev + i
4242 rev = startrev + i
4237 self.index.replace_sidedata_info(rev, *e)
4243 self.index.replace_sidedata_info(rev, *e)
4238 packed = self.index.entry_binary(rev)
4244 packed = self.index.entry_binary(rev)
4239 if rev == 0 and self._docket is None:
4245 if rev == 0 and self._docket is None:
4240 header = self._format_flags | self._format_version
4246 header = self._format_flags | self._format_version
4241 header = self.index.pack_header(header)
4247 header = self.index.pack_header(header)
4242 packed = header + packed
4248 packed = header + packed
4243 ifh.write(packed)
4249 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now