##// END OF EJS Templates
typing: add a few type hints to `mercurial/revlog.py`...
Matt Harbison -
r52570:df6ce326 default
parent child Browse files
Show More
@@ -1,4091 +1,4098
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import functools
19 import functools
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import weakref
23 import weakref
24 import zlib
24 import zlib
25
25
26 from typing import (
27 Optional,
28 Tuple,
29 )
30
26 # import stuff from node for others to import from revlog
31 # import stuff from node for others to import from revlog
27 from .node import (
32 from .node import (
28 bin,
33 bin,
29 hex,
34 hex,
30 nullrev,
35 nullrev,
31 sha1nodeconstants,
36 sha1nodeconstants,
32 short,
37 short,
33 wdirrev,
38 wdirrev,
34 )
39 )
35 from .i18n import _
40 from .i18n import _
36 from .revlogutils.constants import (
41 from .revlogutils.constants import (
37 ALL_KINDS,
42 ALL_KINDS,
38 CHANGELOGV2,
43 CHANGELOGV2,
39 COMP_MODE_DEFAULT,
44 COMP_MODE_DEFAULT,
40 COMP_MODE_INLINE,
45 COMP_MODE_INLINE,
41 COMP_MODE_PLAIN,
46 COMP_MODE_PLAIN,
42 DELTA_BASE_REUSE_NO,
47 DELTA_BASE_REUSE_NO,
43 DELTA_BASE_REUSE_TRY,
48 DELTA_BASE_REUSE_TRY,
44 ENTRY_RANK,
49 ENTRY_RANK,
45 FEATURES_BY_VERSION,
50 FEATURES_BY_VERSION,
46 FLAG_GENERALDELTA,
51 FLAG_GENERALDELTA,
47 FLAG_INLINE_DATA,
52 FLAG_INLINE_DATA,
48 INDEX_HEADER,
53 INDEX_HEADER,
49 KIND_CHANGELOG,
54 KIND_CHANGELOG,
50 KIND_FILELOG,
55 KIND_FILELOG,
51 RANK_UNKNOWN,
56 RANK_UNKNOWN,
52 REVLOGV0,
57 REVLOGV0,
53 REVLOGV1,
58 REVLOGV1,
54 REVLOGV1_FLAGS,
59 REVLOGV1_FLAGS,
55 REVLOGV2,
60 REVLOGV2,
56 REVLOGV2_FLAGS,
61 REVLOGV2_FLAGS,
57 REVLOG_DEFAULT_FLAGS,
62 REVLOG_DEFAULT_FLAGS,
58 REVLOG_DEFAULT_FORMAT,
63 REVLOG_DEFAULT_FORMAT,
59 REVLOG_DEFAULT_VERSION,
64 REVLOG_DEFAULT_VERSION,
60 SUPPORTED_FLAGS,
65 SUPPORTED_FLAGS,
61 )
66 )
62 from .revlogutils.flagutil import (
67 from .revlogutils.flagutil import (
63 REVIDX_DEFAULT_FLAGS,
68 REVIDX_DEFAULT_FLAGS,
64 REVIDX_ELLIPSIS,
69 REVIDX_ELLIPSIS,
65 REVIDX_EXTSTORED,
70 REVIDX_EXTSTORED,
66 REVIDX_FLAGS_ORDER,
71 REVIDX_FLAGS_ORDER,
67 REVIDX_HASCOPIESINFO,
72 REVIDX_HASCOPIESINFO,
68 REVIDX_ISCENSORED,
73 REVIDX_ISCENSORED,
69 REVIDX_RAWTEXT_CHANGING_FLAGS,
74 REVIDX_RAWTEXT_CHANGING_FLAGS,
70 )
75 )
71 from .thirdparty import attr
76 from .thirdparty import attr
72 from . import (
77 from . import (
73 ancestor,
78 ancestor,
74 dagop,
79 dagop,
75 error,
80 error,
76 mdiff,
81 mdiff,
77 policy,
82 policy,
78 pycompat,
83 pycompat,
79 revlogutils,
84 revlogutils,
80 templatefilters,
85 templatefilters,
81 util,
86 util,
82 )
87 )
83 from .interfaces import (
88 from .interfaces import (
84 repository,
89 repository,
85 util as interfaceutil,
90 util as interfaceutil,
86 )
91 )
87 from .revlogutils import (
92 from .revlogutils import (
88 deltas as deltautil,
93 deltas as deltautil,
89 docket as docketutil,
94 docket as docketutil,
90 flagutil,
95 flagutil,
91 nodemap as nodemaputil,
96 nodemap as nodemaputil,
92 randomaccessfile,
97 randomaccessfile,
93 revlogv0,
98 revlogv0,
94 rewrite,
99 rewrite,
95 sidedata as sidedatautil,
100 sidedata as sidedatautil,
96 )
101 )
97 from .utils import (
102 from .utils import (
98 storageutil,
103 storageutil,
99 stringutil,
104 stringutil,
100 )
105 )
101
106
102 # blanked usage of all the name to prevent pyflakes constraints
107 # blanked usage of all the name to prevent pyflakes constraints
103 # We need these name available in the module for extensions.
108 # We need these name available in the module for extensions.
104
109
105 REVLOGV0
110 REVLOGV0
106 REVLOGV1
111 REVLOGV1
107 REVLOGV2
112 REVLOGV2
108 CHANGELOGV2
113 CHANGELOGV2
109 FLAG_INLINE_DATA
114 FLAG_INLINE_DATA
110 FLAG_GENERALDELTA
115 FLAG_GENERALDELTA
111 REVLOG_DEFAULT_FLAGS
116 REVLOG_DEFAULT_FLAGS
112 REVLOG_DEFAULT_FORMAT
117 REVLOG_DEFAULT_FORMAT
113 REVLOG_DEFAULT_VERSION
118 REVLOG_DEFAULT_VERSION
114 REVLOGV1_FLAGS
119 REVLOGV1_FLAGS
115 REVLOGV2_FLAGS
120 REVLOGV2_FLAGS
116 REVIDX_ISCENSORED
121 REVIDX_ISCENSORED
117 REVIDX_ELLIPSIS
122 REVIDX_ELLIPSIS
118 REVIDX_HASCOPIESINFO
123 REVIDX_HASCOPIESINFO
119 REVIDX_EXTSTORED
124 REVIDX_EXTSTORED
120 REVIDX_DEFAULT_FLAGS
125 REVIDX_DEFAULT_FLAGS
121 REVIDX_FLAGS_ORDER
126 REVIDX_FLAGS_ORDER
122 REVIDX_RAWTEXT_CHANGING_FLAGS
127 REVIDX_RAWTEXT_CHANGING_FLAGS
123
128
124 parsers = policy.importmod('parsers')
129 parsers = policy.importmod('parsers')
125 rustancestor = policy.importrust('ancestor')
130 rustancestor = policy.importrust('ancestor')
126 rustdagop = policy.importrust('dagop')
131 rustdagop = policy.importrust('dagop')
127 rustrevlog = policy.importrust('revlog')
132 rustrevlog = policy.importrust('revlog')
128
133
129 # Aliased for performance.
134 # Aliased for performance.
130 _zlibdecompress = zlib.decompress
135 _zlibdecompress = zlib.decompress
131
136
132 # max size of inline data embedded into a revlog
137 # max size of inline data embedded into a revlog
133 _maxinline = 131072
138 _maxinline = 131072
134
139
135
140
136 # Flag processors for REVIDX_ELLIPSIS.
141 # Flag processors for REVIDX_ELLIPSIS.
137 def ellipsisreadprocessor(rl, text):
142 def ellipsisreadprocessor(rl, text):
138 return text, False
143 return text, False
139
144
140
145
141 def ellipsiswriteprocessor(rl, text):
146 def ellipsiswriteprocessor(rl, text):
142 return text, False
147 return text, False
143
148
144
149
145 def ellipsisrawprocessor(rl, text):
150 def ellipsisrawprocessor(rl, text):
146 return False
151 return False
147
152
148
153
149 ellipsisprocessor = (
154 ellipsisprocessor = (
150 ellipsisreadprocessor,
155 ellipsisreadprocessor,
151 ellipsiswriteprocessor,
156 ellipsiswriteprocessor,
152 ellipsisrawprocessor,
157 ellipsisrawprocessor,
153 )
158 )
154
159
155
160
156 def _verify_revision(rl, skipflags, state, node):
161 def _verify_revision(rl, skipflags, state, node):
157 """Verify the integrity of the given revlog ``node`` while providing a hook
162 """Verify the integrity of the given revlog ``node`` while providing a hook
158 point for extensions to influence the operation."""
163 point for extensions to influence the operation."""
159 if skipflags:
164 if skipflags:
160 state[b'skipread'].add(node)
165 state[b'skipread'].add(node)
161 else:
166 else:
162 # Side-effect: read content and verify hash.
167 # Side-effect: read content and verify hash.
163 rl.revision(node)
168 rl.revision(node)
164
169
165
170
166 # True if a fast implementation for persistent-nodemap is available
171 # True if a fast implementation for persistent-nodemap is available
167 #
172 #
168 # We also consider we have a "fast" implementation in "pure" python because
173 # We also consider we have a "fast" implementation in "pure" python because
169 # people using pure don't really have performance consideration (and a
174 # people using pure don't really have performance consideration (and a
170 # wheelbarrow of other slowness source)
175 # wheelbarrow of other slowness source)
171 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
176 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
172 parsers, 'BaseIndexObject'
177 parsers, 'BaseIndexObject'
173 )
178 )
174
179
175
180
176 @interfaceutil.implementer(repository.irevisiondelta)
181 @interfaceutil.implementer(repository.irevisiondelta)
177 @attr.s(slots=True)
182 @attr.s(slots=True)
178 class revlogrevisiondelta:
183 class revlogrevisiondelta:
179 node = attr.ib()
184 node = attr.ib()
180 p1node = attr.ib()
185 p1node = attr.ib()
181 p2node = attr.ib()
186 p2node = attr.ib()
182 basenode = attr.ib()
187 basenode = attr.ib()
183 flags = attr.ib()
188 flags = attr.ib()
184 baserevisionsize = attr.ib()
189 baserevisionsize = attr.ib()
185 revision = attr.ib()
190 revision = attr.ib()
186 delta = attr.ib()
191 delta = attr.ib()
187 sidedata = attr.ib()
192 sidedata = attr.ib()
188 protocol_flags = attr.ib()
193 protocol_flags = attr.ib()
189 linknode = attr.ib(default=None)
194 linknode = attr.ib(default=None)
190
195
191
196
192 @interfaceutil.implementer(repository.iverifyproblem)
197 @interfaceutil.implementer(repository.iverifyproblem)
193 @attr.s(frozen=True)
198 @attr.s(frozen=True)
194 class revlogproblem:
199 class revlogproblem:
195 warning = attr.ib(default=None)
200 warning = attr.ib(default=None)
196 error = attr.ib(default=None)
201 error = attr.ib(default=None)
197 node = attr.ib(default=None)
202 node = attr.ib(default=None)
198
203
199
204
200 def parse_index_v1(data, inline):
205 def parse_index_v1(data, inline):
201 # call the C implementation to parse the index data
206 # call the C implementation to parse the index data
202 index, cache = parsers.parse_index2(data, inline)
207 index, cache = parsers.parse_index2(data, inline)
203 return index, cache
208 return index, cache
204
209
205
210
206 def parse_index_v2(data, inline):
211 def parse_index_v2(data, inline):
207 # call the C implementation to parse the index data
212 # call the C implementation to parse the index data
208 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
213 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
209 return index, cache
214 return index, cache
210
215
211
216
212 def parse_index_cl_v2(data, inline):
217 def parse_index_cl_v2(data, inline):
213 # call the C implementation to parse the index data
218 # call the C implementation to parse the index data
214 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
219 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
215 return index, cache
220 return index, cache
216
221
217
222
218 if hasattr(parsers, 'parse_index_devel_nodemap'):
223 if hasattr(parsers, 'parse_index_devel_nodemap'):
219
224
220 def parse_index_v1_nodemap(data, inline):
225 def parse_index_v1_nodemap(data, inline):
221 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 index, cache = parsers.parse_index_devel_nodemap(data, inline)
222 return index, cache
227 return index, cache
223
228
224
229
225 else:
230 else:
226 parse_index_v1_nodemap = None
231 parse_index_v1_nodemap = None
227
232
228
233
229 def parse_index_v1_rust(data, inline, default_header):
234 def parse_index_v1_rust(data, inline, default_header):
230 cache = (0, data) if inline else None
235 cache = (0, data) if inline else None
231 return rustrevlog.Index(data, default_header), cache
236 return rustrevlog.Index(data, default_header), cache
232
237
233
238
234 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
235 # signed integer)
240 # signed integer)
236 _maxentrysize = 0x7FFFFFFF
241 _maxentrysize = 0x7FFFFFFF
237
242
238 FILE_TOO_SHORT_MSG = _(
243 FILE_TOO_SHORT_MSG = _(
239 b'cannot read from revlog %s;'
244 b'cannot read from revlog %s;'
240 b' expected %d bytes from offset %d, data size is %d'
245 b' expected %d bytes from offset %d, data size is %d'
241 )
246 )
242
247
243 hexdigits = b'0123456789abcdefABCDEF'
248 hexdigits = b'0123456789abcdefABCDEF'
244
249
245
250
246 class _Config:
251 class _Config:
247 def copy(self):
252 def copy(self):
248 return self.__class__(**self.__dict__)
253 return self.__class__(**self.__dict__)
249
254
250
255
251 @attr.s()
256 @attr.s()
252 class FeatureConfig(_Config):
257 class FeatureConfig(_Config):
253 """Hold configuration values about the available revlog features"""
258 """Hold configuration values about the available revlog features"""
254
259
255 # the default compression engine
260 # the default compression engine
256 compression_engine = attr.ib(default=b'zlib')
261 compression_engine = attr.ib(default=b'zlib')
257 # compression engines options
262 # compression engines options
258 compression_engine_options = attr.ib(default=attr.Factory(dict))
263 compression_engine_options = attr.ib(default=attr.Factory(dict))
259
264
260 # can we use censor on this revlog
265 # can we use censor on this revlog
261 censorable = attr.ib(default=False)
266 censorable = attr.ib(default=False)
262 # does this revlog use the "side data" feature
267 # does this revlog use the "side data" feature
263 has_side_data = attr.ib(default=False)
268 has_side_data = attr.ib(default=False)
264 # might remove rank configuration once the computation has no impact
269 # might remove rank configuration once the computation has no impact
265 compute_rank = attr.ib(default=False)
270 compute_rank = attr.ib(default=False)
266 # parent order is supposed to be semantically irrelevant, so we
271 # parent order is supposed to be semantically irrelevant, so we
267 # normally resort parents to ensure that the first parent is non-null,
272 # normally resort parents to ensure that the first parent is non-null,
268 # if there is a non-null parent at all.
273 # if there is a non-null parent at all.
269 # filelog abuses the parent order as flag to mark some instances of
274 # filelog abuses the parent order as flag to mark some instances of
270 # meta-encoded files, so allow it to disable this behavior.
275 # meta-encoded files, so allow it to disable this behavior.
271 canonical_parent_order = attr.ib(default=False)
276 canonical_parent_order = attr.ib(default=False)
272 # can ellipsis commit be used
277 # can ellipsis commit be used
273 enable_ellipsis = attr.ib(default=False)
278 enable_ellipsis = attr.ib(default=False)
274
279
275 def copy(self):
280 def copy(self):
276 new = super().copy()
281 new = super().copy()
277 new.compression_engine_options = self.compression_engine_options.copy()
282 new.compression_engine_options = self.compression_engine_options.copy()
278 return new
283 return new
279
284
280
285
281 @attr.s()
286 @attr.s()
282 class DataConfig(_Config):
287 class DataConfig(_Config):
283 """Hold configuration value about how the revlog data are read"""
288 """Hold configuration value about how the revlog data are read"""
284
289
285 # should we try to open the "pending" version of the revlog
290 # should we try to open the "pending" version of the revlog
286 try_pending = attr.ib(default=False)
291 try_pending = attr.ib(default=False)
287 # should we try to open the "splitted" version of the revlog
292 # should we try to open the "splitted" version of the revlog
288 try_split = attr.ib(default=False)
293 try_split = attr.ib(default=False)
289 # When True, indexfile should be opened with checkambig=True at writing,
294 # When True, indexfile should be opened with checkambig=True at writing,
290 # to avoid file stat ambiguity.
295 # to avoid file stat ambiguity.
291 check_ambig = attr.ib(default=False)
296 check_ambig = attr.ib(default=False)
292
297
293 # If true, use mmap instead of reading to deal with large index
298 # If true, use mmap instead of reading to deal with large index
294 mmap_large_index = attr.ib(default=False)
299 mmap_large_index = attr.ib(default=False)
295 # how much data is large
300 # how much data is large
296 mmap_index_threshold = attr.ib(default=None)
301 mmap_index_threshold = attr.ib(default=None)
297 # How much data to read and cache into the raw revlog data cache.
302 # How much data to read and cache into the raw revlog data cache.
298 chunk_cache_size = attr.ib(default=65536)
303 chunk_cache_size = attr.ib(default=65536)
299
304
300 # The size of the uncompressed cache compared to the largest revision seen.
305 # The size of the uncompressed cache compared to the largest revision seen.
301 uncompressed_cache_factor = attr.ib(default=None)
306 uncompressed_cache_factor = attr.ib(default=None)
302
307
303 # The number of chunk cached
308 # The number of chunk cached
304 uncompressed_cache_count = attr.ib(default=None)
309 uncompressed_cache_count = attr.ib(default=None)
305
310
306 # Allow sparse reading of the revlog data
311 # Allow sparse reading of the revlog data
307 with_sparse_read = attr.ib(default=False)
312 with_sparse_read = attr.ib(default=False)
308 # minimal density of a sparse read chunk
313 # minimal density of a sparse read chunk
309 sr_density_threshold = attr.ib(default=0.50)
314 sr_density_threshold = attr.ib(default=0.50)
310 # minimal size of data we skip when performing sparse read
315 # minimal size of data we skip when performing sparse read
311 sr_min_gap_size = attr.ib(default=262144)
316 sr_min_gap_size = attr.ib(default=262144)
312
317
313 # are delta encoded against arbitrary bases.
318 # are delta encoded against arbitrary bases.
314 generaldelta = attr.ib(default=False)
319 generaldelta = attr.ib(default=False)
315
320
316
321
317 @attr.s()
322 @attr.s()
318 class DeltaConfig(_Config):
323 class DeltaConfig(_Config):
319 """Hold configuration value about how new delta are computed
324 """Hold configuration value about how new delta are computed
320
325
321 Some attributes are duplicated from DataConfig to help havign each object
326 Some attributes are duplicated from DataConfig to help havign each object
322 self contained.
327 self contained.
323 """
328 """
324
329
325 # can delta be encoded against arbitrary bases.
330 # can delta be encoded against arbitrary bases.
326 general_delta = attr.ib(default=False)
331 general_delta = attr.ib(default=False)
327 # Allow sparse writing of the revlog data
332 # Allow sparse writing of the revlog data
328 sparse_revlog = attr.ib(default=False)
333 sparse_revlog = attr.ib(default=False)
329 # maximum length of a delta chain
334 # maximum length of a delta chain
330 max_chain_len = attr.ib(default=None)
335 max_chain_len = attr.ib(default=None)
331 # Maximum distance between delta chain base start and end
336 # Maximum distance between delta chain base start and end
332 max_deltachain_span = attr.ib(default=-1)
337 max_deltachain_span = attr.ib(default=-1)
333 # If `upper_bound_comp` is not None, this is the expected maximal gain from
338 # If `upper_bound_comp` is not None, this is the expected maximal gain from
334 # compression for the data content.
339 # compression for the data content.
335 upper_bound_comp = attr.ib(default=None)
340 upper_bound_comp = attr.ib(default=None)
336 # Should we try a delta against both parent
341 # Should we try a delta against both parent
337 delta_both_parents = attr.ib(default=True)
342 delta_both_parents = attr.ib(default=True)
338 # Test delta base candidate group by chunk of this maximal size.
343 # Test delta base candidate group by chunk of this maximal size.
339 candidate_group_chunk_size = attr.ib(default=0)
344 candidate_group_chunk_size = attr.ib(default=0)
340 # Should we display debug information about delta computation
345 # Should we display debug information about delta computation
341 debug_delta = attr.ib(default=False)
346 debug_delta = attr.ib(default=False)
342 # trust incoming delta by default
347 # trust incoming delta by default
343 lazy_delta = attr.ib(default=True)
348 lazy_delta = attr.ib(default=True)
344 # trust the base of incoming delta by default
349 # trust the base of incoming delta by default
345 lazy_delta_base = attr.ib(default=False)
350 lazy_delta_base = attr.ib(default=False)
346
351
347
352
348 class _InnerRevlog:
353 class _InnerRevlog:
349 """An inner layer of the revlog object
354 """An inner layer of the revlog object
350
355
351 That layer exist to be able to delegate some operation to Rust, its
356 That layer exist to be able to delegate some operation to Rust, its
352 boundaries are arbitrary and based on what we can delegate to Rust.
357 boundaries are arbitrary and based on what we can delegate to Rust.
353 """
358 """
354
359
355 def __init__(
360 def __init__(
356 self,
361 self,
357 opener,
362 opener,
358 index,
363 index,
359 index_file,
364 index_file,
360 data_file,
365 data_file,
361 sidedata_file,
366 sidedata_file,
362 inline,
367 inline,
363 data_config,
368 data_config,
364 delta_config,
369 delta_config,
365 feature_config,
370 feature_config,
366 chunk_cache,
371 chunk_cache,
367 default_compression_header,
372 default_compression_header,
368 ):
373 ):
369 self.opener = opener
374 self.opener = opener
370 self.index = index
375 self.index = index
371
376
372 self.index_file = index_file
377 self.index_file = index_file
373 self.data_file = data_file
378 self.data_file = data_file
374 self.sidedata_file = sidedata_file
379 self.sidedata_file = sidedata_file
375 self.inline = inline
380 self.inline = inline
376 self.data_config = data_config
381 self.data_config = data_config
377 self.delta_config = delta_config
382 self.delta_config = delta_config
378 self.feature_config = feature_config
383 self.feature_config = feature_config
379
384
380 # used during diverted write.
385 # used during diverted write.
381 self._orig_index_file = None
386 self._orig_index_file = None
382
387
383 self._default_compression_header = default_compression_header
388 self._default_compression_header = default_compression_header
384
389
385 # index
390 # index
386
391
387 # 3-tuple of file handles being used for active writing.
392 # 3-tuple of file handles being used for active writing.
388 self._writinghandles = None
393 self._writinghandles = None
389
394
390 self._segmentfile = randomaccessfile.randomaccessfile(
395 self._segmentfile = randomaccessfile.randomaccessfile(
391 self.opener,
396 self.opener,
392 (self.index_file if self.inline else self.data_file),
397 (self.index_file if self.inline else self.data_file),
393 self.data_config.chunk_cache_size,
398 self.data_config.chunk_cache_size,
394 chunk_cache,
399 chunk_cache,
395 )
400 )
396 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
401 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
397 self.opener,
402 self.opener,
398 self.sidedata_file,
403 self.sidedata_file,
399 self.data_config.chunk_cache_size,
404 self.data_config.chunk_cache_size,
400 )
405 )
401
406
402 # revlog header -> revlog compressor
407 # revlog header -> revlog compressor
403 self._decompressors = {}
408 self._decompressors = {}
404 # 3-tuple of (node, rev, text) for a raw revision.
409 # 3-tuple of (node, rev, text) for a raw revision.
405 self._revisioncache = None
410 self._revisioncache = None
406
411
407 # cache some uncompressed chunks
412 # cache some uncompressed chunks
408 # rev β†’ uncompressed_chunk
413 # rev β†’ uncompressed_chunk
409 #
414 #
410 # the max cost is dynamically updated to be proportionnal to the
415 # the max cost is dynamically updated to be proportionnal to the
411 # size of revision we actually encounter.
416 # size of revision we actually encounter.
412 self._uncompressed_chunk_cache = None
417 self._uncompressed_chunk_cache = None
413 if self.data_config.uncompressed_cache_factor is not None:
418 if self.data_config.uncompressed_cache_factor is not None:
414 self._uncompressed_chunk_cache = util.lrucachedict(
419 self._uncompressed_chunk_cache = util.lrucachedict(
415 self.data_config.uncompressed_cache_count,
420 self.data_config.uncompressed_cache_count,
416 maxcost=65536, # some arbitrary initial value
421 maxcost=65536, # some arbitrary initial value
417 )
422 )
418
423
419 self._delay_buffer = None
424 self._delay_buffer = None
420
425
421 def __len__(self):
426 def __len__(self):
422 return len(self.index)
427 return len(self.index)
423
428
424 def clear_cache(self):
429 def clear_cache(self):
425 assert not self.is_delaying
430 assert not self.is_delaying
426 self._revisioncache = None
431 self._revisioncache = None
427 if self._uncompressed_chunk_cache is not None:
432 if self._uncompressed_chunk_cache is not None:
428 self._uncompressed_chunk_cache.clear()
433 self._uncompressed_chunk_cache.clear()
429 self._segmentfile.clear_cache()
434 self._segmentfile.clear_cache()
430 self._segmentfile_sidedata.clear_cache()
435 self._segmentfile_sidedata.clear_cache()
431
436
432 @property
437 @property
433 def canonical_index_file(self):
438 def canonical_index_file(self):
434 if self._orig_index_file is not None:
439 if self._orig_index_file is not None:
435 return self._orig_index_file
440 return self._orig_index_file
436 return self.index_file
441 return self.index_file
437
442
438 @property
443 @property
439 def is_delaying(self):
444 def is_delaying(self):
440 """is the revlog is currently delaying the visibility of written data?
445 """is the revlog is currently delaying the visibility of written data?
441
446
442 The delaying mechanism can be either in-memory or written on disk in a
447 The delaying mechanism can be either in-memory or written on disk in a
443 side-file."""
448 side-file."""
444 return (self._delay_buffer is not None) or (
449 return (self._delay_buffer is not None) or (
445 self._orig_index_file is not None
450 self._orig_index_file is not None
446 )
451 )
447
452
448 # Derived from index values.
453 # Derived from index values.
449
454
450 def start(self, rev):
455 def start(self, rev):
451 """the offset of the data chunk for this revision"""
456 """the offset of the data chunk for this revision"""
452 return int(self.index[rev][0] >> 16)
457 return int(self.index[rev][0] >> 16)
453
458
454 def length(self, rev):
459 def length(self, rev):
455 """the length of the data chunk for this revision"""
460 """the length of the data chunk for this revision"""
456 return self.index[rev][1]
461 return self.index[rev][1]
457
462
458 def end(self, rev):
463 def end(self, rev):
459 """the end of the data chunk for this revision"""
464 """the end of the data chunk for this revision"""
460 return self.start(rev) + self.length(rev)
465 return self.start(rev) + self.length(rev)
461
466
462 def deltaparent(self, rev):
467 def deltaparent(self, rev):
463 """return deltaparent of the given revision"""
468 """return deltaparent of the given revision"""
464 base = self.index[rev][3]
469 base = self.index[rev][3]
465 if base == rev:
470 if base == rev:
466 return nullrev
471 return nullrev
467 elif self.delta_config.general_delta:
472 elif self.delta_config.general_delta:
468 return base
473 return base
469 else:
474 else:
470 return rev - 1
475 return rev - 1
471
476
472 def issnapshot(self, rev):
477 def issnapshot(self, rev):
473 """tells whether rev is a snapshot"""
478 """tells whether rev is a snapshot"""
474 if not self.delta_config.sparse_revlog:
479 if not self.delta_config.sparse_revlog:
475 return self.deltaparent(rev) == nullrev
480 return self.deltaparent(rev) == nullrev
476 elif hasattr(self.index, 'issnapshot'):
481 elif hasattr(self.index, 'issnapshot'):
477 # directly assign the method to cache the testing and access
482 # directly assign the method to cache the testing and access
478 self.issnapshot = self.index.issnapshot
483 self.issnapshot = self.index.issnapshot
479 return self.issnapshot(rev)
484 return self.issnapshot(rev)
480 if rev == nullrev:
485 if rev == nullrev:
481 return True
486 return True
482 entry = self.index[rev]
487 entry = self.index[rev]
483 base = entry[3]
488 base = entry[3]
484 if base == rev:
489 if base == rev:
485 return True
490 return True
486 if base == nullrev:
491 if base == nullrev:
487 return True
492 return True
488 p1 = entry[5]
493 p1 = entry[5]
489 while self.length(p1) == 0:
494 while self.length(p1) == 0:
490 b = self.deltaparent(p1)
495 b = self.deltaparent(p1)
491 if b == p1:
496 if b == p1:
492 break
497 break
493 p1 = b
498 p1 = b
494 p2 = entry[6]
499 p2 = entry[6]
495 while self.length(p2) == 0:
500 while self.length(p2) == 0:
496 b = self.deltaparent(p2)
501 b = self.deltaparent(p2)
497 if b == p2:
502 if b == p2:
498 break
503 break
499 p2 = b
504 p2 = b
500 if base == p1 or base == p2:
505 if base == p1 or base == p2:
501 return False
506 return False
502 return self.issnapshot(base)
507 return self.issnapshot(base)
503
508
504 def _deltachain(self, rev, stoprev=None):
509 def _deltachain(self, rev, stoprev=None):
505 """Obtain the delta chain for a revision.
510 """Obtain the delta chain for a revision.
506
511
507 ``stoprev`` specifies a revision to stop at. If not specified, we
512 ``stoprev`` specifies a revision to stop at. If not specified, we
508 stop at the base of the chain.
513 stop at the base of the chain.
509
514
510 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
515 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
511 revs in ascending order and ``stopped`` is a bool indicating whether
516 revs in ascending order and ``stopped`` is a bool indicating whether
512 ``stoprev`` was hit.
517 ``stoprev`` was hit.
513 """
518 """
514 generaldelta = self.delta_config.general_delta
519 generaldelta = self.delta_config.general_delta
515 # Try C implementation.
520 # Try C implementation.
516 try:
521 try:
517 return self.index.deltachain(rev, stoprev, generaldelta)
522 return self.index.deltachain(rev, stoprev, generaldelta)
518 except AttributeError:
523 except AttributeError:
519 pass
524 pass
520
525
521 chain = []
526 chain = []
522
527
523 # Alias to prevent attribute lookup in tight loop.
528 # Alias to prevent attribute lookup in tight loop.
524 index = self.index
529 index = self.index
525
530
526 iterrev = rev
531 iterrev = rev
527 e = index[iterrev]
532 e = index[iterrev]
528 while iterrev != e[3] and iterrev != stoprev:
533 while iterrev != e[3] and iterrev != stoprev:
529 chain.append(iterrev)
534 chain.append(iterrev)
530 if generaldelta:
535 if generaldelta:
531 iterrev = e[3]
536 iterrev = e[3]
532 else:
537 else:
533 iterrev -= 1
538 iterrev -= 1
534 e = index[iterrev]
539 e = index[iterrev]
535
540
536 if iterrev == stoprev:
541 if iterrev == stoprev:
537 stopped = True
542 stopped = True
538 else:
543 else:
539 chain.append(iterrev)
544 chain.append(iterrev)
540 stopped = False
545 stopped = False
541
546
542 chain.reverse()
547 chain.reverse()
543 return chain, stopped
548 return chain, stopped
544
549
545 @util.propertycache
550 @util.propertycache
546 def _compressor(self):
551 def _compressor(self):
547 engine = util.compengines[self.feature_config.compression_engine]
552 engine = util.compengines[self.feature_config.compression_engine]
548 return engine.revlogcompressor(
553 return engine.revlogcompressor(
549 self.feature_config.compression_engine_options
554 self.feature_config.compression_engine_options
550 )
555 )
551
556
552 @util.propertycache
557 @util.propertycache
553 def _decompressor(self):
558 def _decompressor(self):
554 """the default decompressor"""
559 """the default decompressor"""
555 if self._default_compression_header is None:
560 if self._default_compression_header is None:
556 return None
561 return None
557 t = self._default_compression_header
562 t = self._default_compression_header
558 c = self._get_decompressor(t)
563 c = self._get_decompressor(t)
559 return c.decompress
564 return c.decompress
560
565
561 def _get_decompressor(self, t):
566 def _get_decompressor(self, t: bytes):
562 try:
567 try:
563 compressor = self._decompressors[t]
568 compressor = self._decompressors[t]
564 except KeyError:
569 except KeyError:
565 try:
570 try:
566 engine = util.compengines.forrevlogheader(t)
571 engine = util.compengines.forrevlogheader(t)
567 compressor = engine.revlogcompressor(
572 compressor = engine.revlogcompressor(
568 self.feature_config.compression_engine_options
573 self.feature_config.compression_engine_options
569 )
574 )
570 self._decompressors[t] = compressor
575 self._decompressors[t] = compressor
571 except KeyError:
576 except KeyError:
572 raise error.RevlogError(
577 raise error.RevlogError(
573 _(b'unknown compression type %s') % binascii.hexlify(t)
578 _(b'unknown compression type %s') % binascii.hexlify(t)
574 )
579 )
575 return compressor
580 return compressor
576
581
577 def compress(self, data):
582 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
578 """Generate a possibly-compressed representation of data."""
583 """Generate a possibly-compressed representation of data."""
579 if not data:
584 if not data:
580 return b'', data
585 return b'', data
581
586
582 compressed = self._compressor.compress(data)
587 compressed = self._compressor.compress(data)
583
588
584 if compressed:
589 if compressed:
585 # The revlog compressor added the header in the returned data.
590 # The revlog compressor added the header in the returned data.
586 return b'', compressed
591 return b'', compressed
587
592
588 if data[0:1] == b'\0':
593 if data[0:1] == b'\0':
589 return b'', data
594 return b'', data
590 return b'u', data
595 return b'u', data
591
596
592 def decompress(self, data):
597 def decompress(self, data: bytes):
593 """Decompress a revlog chunk.
598 """Decompress a revlog chunk.
594
599
595 The chunk is expected to begin with a header identifying the
600 The chunk is expected to begin with a header identifying the
596 format type so it can be routed to an appropriate decompressor.
601 format type so it can be routed to an appropriate decompressor.
597 """
602 """
598 if not data:
603 if not data:
599 return data
604 return data
600
605
601 # Revlogs are read much more frequently than they are written and many
606 # Revlogs are read much more frequently than they are written and many
602 # chunks only take microseconds to decompress, so performance is
607 # chunks only take microseconds to decompress, so performance is
603 # important here.
608 # important here.
604 #
609 #
605 # We can make a few assumptions about revlogs:
610 # We can make a few assumptions about revlogs:
606 #
611 #
607 # 1) the majority of chunks will be compressed (as opposed to inline
612 # 1) the majority of chunks will be compressed (as opposed to inline
608 # raw data).
613 # raw data).
609 # 2) decompressing *any* data will likely by at least 10x slower than
614 # 2) decompressing *any* data will likely by at least 10x slower than
610 # returning raw inline data.
615 # returning raw inline data.
611 # 3) we want to prioritize common and officially supported compression
616 # 3) we want to prioritize common and officially supported compression
612 # engines
617 # engines
613 #
618 #
614 # It follows that we want to optimize for "decompress compressed data
619 # It follows that we want to optimize for "decompress compressed data
615 # when encoded with common and officially supported compression engines"
620 # when encoded with common and officially supported compression engines"
616 # case over "raw data" and "data encoded by less common or non-official
621 # case over "raw data" and "data encoded by less common or non-official
617 # compression engines." That is why we have the inline lookup first
622 # compression engines." That is why we have the inline lookup first
618 # followed by the compengines lookup.
623 # followed by the compengines lookup.
619 #
624 #
620 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
625 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
621 # compressed chunks. And this matters for changelog and manifest reads.
626 # compressed chunks. And this matters for changelog and manifest reads.
622 t = data[0:1]
627 t = data[0:1]
623
628
624 if t == b'x':
629 if t == b'x':
625 try:
630 try:
626 return _zlibdecompress(data)
631 return _zlibdecompress(data)
627 except zlib.error as e:
632 except zlib.error as e:
628 raise error.RevlogError(
633 raise error.RevlogError(
629 _(b'revlog decompress error: %s')
634 _(b'revlog decompress error: %s')
630 % stringutil.forcebytestr(e)
635 % stringutil.forcebytestr(e)
631 )
636 )
632 # '\0' is more common than 'u' so it goes first.
637 # '\0' is more common than 'u' so it goes first.
633 elif t == b'\0':
638 elif t == b'\0':
634 return data
639 return data
635 elif t == b'u':
640 elif t == b'u':
636 return util.buffer(data, 1)
641 return util.buffer(data, 1)
637
642
638 compressor = self._get_decompressor(t)
643 compressor = self._get_decompressor(t)
639
644
640 return compressor.decompress(data)
645 return compressor.decompress(data)
641
646
642 @contextlib.contextmanager
647 @contextlib.contextmanager
643 def reading(self):
648 def reading(self):
644 """Context manager that keeps data and sidedata files open for reading"""
649 """Context manager that keeps data and sidedata files open for reading"""
645 if len(self.index) == 0:
650 if len(self.index) == 0:
646 yield # nothing to be read
651 yield # nothing to be read
647 elif self._delay_buffer is not None and self.inline:
652 elif self._delay_buffer is not None and self.inline:
648 msg = "revlog with delayed write should not be inline"
653 msg = "revlog with delayed write should not be inline"
649 raise error.ProgrammingError(msg)
654 raise error.ProgrammingError(msg)
650 else:
655 else:
651 with self._segmentfile.reading():
656 with self._segmentfile.reading():
652 with self._segmentfile_sidedata.reading():
657 with self._segmentfile_sidedata.reading():
653 yield
658 yield
654
659
655 @property
660 @property
656 def is_writing(self):
661 def is_writing(self):
657 """True is a writing context is open"""
662 """True is a writing context is open"""
658 return self._writinghandles is not None
663 return self._writinghandles is not None
659
664
660 @property
665 @property
661 def is_open(self):
666 def is_open(self):
662 """True if any file handle is being held
667 """True if any file handle is being held
663
668
664 Used for assert and debug in the python code"""
669 Used for assert and debug in the python code"""
665 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
670 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
666
671
667 @contextlib.contextmanager
672 @contextlib.contextmanager
668 def writing(self, transaction, data_end=None, sidedata_end=None):
673 def writing(self, transaction, data_end=None, sidedata_end=None):
669 """Open the revlog files for writing
674 """Open the revlog files for writing
670
675
671 Add content to a revlog should be done within such context.
676 Add content to a revlog should be done within such context.
672 """
677 """
673 if self.is_writing:
678 if self.is_writing:
674 yield
679 yield
675 else:
680 else:
676 ifh = dfh = sdfh = None
681 ifh = dfh = sdfh = None
677 try:
682 try:
678 r = len(self.index)
683 r = len(self.index)
679 # opening the data file.
684 # opening the data file.
680 dsize = 0
685 dsize = 0
681 if r:
686 if r:
682 dsize = self.end(r - 1)
687 dsize = self.end(r - 1)
683 dfh = None
688 dfh = None
684 if not self.inline:
689 if not self.inline:
685 try:
690 try:
686 dfh = self.opener(self.data_file, mode=b"r+")
691 dfh = self.opener(self.data_file, mode=b"r+")
687 if data_end is None:
692 if data_end is None:
688 dfh.seek(0, os.SEEK_END)
693 dfh.seek(0, os.SEEK_END)
689 else:
694 else:
690 dfh.seek(data_end, os.SEEK_SET)
695 dfh.seek(data_end, os.SEEK_SET)
691 except FileNotFoundError:
696 except FileNotFoundError:
692 dfh = self.opener(self.data_file, mode=b"w+")
697 dfh = self.opener(self.data_file, mode=b"w+")
693 transaction.add(self.data_file, dsize)
698 transaction.add(self.data_file, dsize)
694 if self.sidedata_file is not None:
699 if self.sidedata_file is not None:
695 assert sidedata_end is not None
700 assert sidedata_end is not None
696 # revlog-v2 does not inline, help Pytype
701 # revlog-v2 does not inline, help Pytype
697 assert dfh is not None
702 assert dfh is not None
698 try:
703 try:
699 sdfh = self.opener(self.sidedata_file, mode=b"r+")
704 sdfh = self.opener(self.sidedata_file, mode=b"r+")
700 dfh.seek(sidedata_end, os.SEEK_SET)
705 dfh.seek(sidedata_end, os.SEEK_SET)
701 except FileNotFoundError:
706 except FileNotFoundError:
702 sdfh = self.opener(self.sidedata_file, mode=b"w+")
707 sdfh = self.opener(self.sidedata_file, mode=b"w+")
703 transaction.add(self.sidedata_file, sidedata_end)
708 transaction.add(self.sidedata_file, sidedata_end)
704
709
705 # opening the index file.
710 # opening the index file.
706 isize = r * self.index.entry_size
711 isize = r * self.index.entry_size
707 ifh = self.__index_write_fp()
712 ifh = self.__index_write_fp()
708 if self.inline:
713 if self.inline:
709 transaction.add(self.index_file, dsize + isize)
714 transaction.add(self.index_file, dsize + isize)
710 else:
715 else:
711 transaction.add(self.index_file, isize)
716 transaction.add(self.index_file, isize)
712 # exposing all file handle for writing.
717 # exposing all file handle for writing.
713 self._writinghandles = (ifh, dfh, sdfh)
718 self._writinghandles = (ifh, dfh, sdfh)
714 self._segmentfile.writing_handle = ifh if self.inline else dfh
719 self._segmentfile.writing_handle = ifh if self.inline else dfh
715 self._segmentfile_sidedata.writing_handle = sdfh
720 self._segmentfile_sidedata.writing_handle = sdfh
716 yield
721 yield
717 finally:
722 finally:
718 self._writinghandles = None
723 self._writinghandles = None
719 self._segmentfile.writing_handle = None
724 self._segmentfile.writing_handle = None
720 self._segmentfile_sidedata.writing_handle = None
725 self._segmentfile_sidedata.writing_handle = None
721 if dfh is not None:
726 if dfh is not None:
722 dfh.close()
727 dfh.close()
723 if sdfh is not None:
728 if sdfh is not None:
724 sdfh.close()
729 sdfh.close()
725 # closing the index file last to avoid exposing referent to
730 # closing the index file last to avoid exposing referent to
726 # potential unflushed data content.
731 # potential unflushed data content.
727 if ifh is not None:
732 if ifh is not None:
728 ifh.close()
733 ifh.close()
729
734
730 def __index_write_fp(self, index_end=None):
735 def __index_write_fp(self, index_end=None):
731 """internal method to open the index file for writing
736 """internal method to open the index file for writing
732
737
733 You should not use this directly and use `_writing` instead
738 You should not use this directly and use `_writing` instead
734 """
739 """
735 try:
740 try:
736 if self._delay_buffer is None:
741 if self._delay_buffer is None:
737 f = self.opener(
742 f = self.opener(
738 self.index_file,
743 self.index_file,
739 mode=b"r+",
744 mode=b"r+",
740 checkambig=self.data_config.check_ambig,
745 checkambig=self.data_config.check_ambig,
741 )
746 )
742 else:
747 else:
743 # check_ambig affect we way we open file for writing, however
748 # check_ambig affect we way we open file for writing, however
744 # here, we do not actually open a file for writting as write
749 # here, we do not actually open a file for writting as write
745 # will appened to a delay_buffer. So check_ambig is not
750 # will appened to a delay_buffer. So check_ambig is not
746 # meaningful and unneeded here.
751 # meaningful and unneeded here.
747 f = randomaccessfile.appender(
752 f = randomaccessfile.appender(
748 self.opener, self.index_file, b"r+", self._delay_buffer
753 self.opener, self.index_file, b"r+", self._delay_buffer
749 )
754 )
750 if index_end is None:
755 if index_end is None:
751 f.seek(0, os.SEEK_END)
756 f.seek(0, os.SEEK_END)
752 else:
757 else:
753 f.seek(index_end, os.SEEK_SET)
758 f.seek(index_end, os.SEEK_SET)
754 return f
759 return f
755 except FileNotFoundError:
760 except FileNotFoundError:
756 if self._delay_buffer is None:
761 if self._delay_buffer is None:
757 return self.opener(
762 return self.opener(
758 self.index_file,
763 self.index_file,
759 mode=b"w+",
764 mode=b"w+",
760 checkambig=self.data_config.check_ambig,
765 checkambig=self.data_config.check_ambig,
761 )
766 )
762 else:
767 else:
763 return randomaccessfile.appender(
768 return randomaccessfile.appender(
764 self.opener, self.index_file, b"w+", self._delay_buffer
769 self.opener, self.index_file, b"w+", self._delay_buffer
765 )
770 )
766
771
767 def __index_new_fp(self):
772 def __index_new_fp(self):
768 """internal method to create a new index file for writing
773 """internal method to create a new index file for writing
769
774
770 You should not use this unless you are upgrading from inline revlog
775 You should not use this unless you are upgrading from inline revlog
771 """
776 """
772 return self.opener(
777 return self.opener(
773 self.index_file,
778 self.index_file,
774 mode=b"w",
779 mode=b"w",
775 checkambig=self.data_config.check_ambig,
780 checkambig=self.data_config.check_ambig,
776 )
781 )
777
782
778 def split_inline(self, tr, header, new_index_file_path=None):
783 def split_inline(self, tr, header, new_index_file_path=None):
779 """split the data of an inline revlog into an index and a data file"""
784 """split the data of an inline revlog into an index and a data file"""
780 assert self._delay_buffer is None
785 assert self._delay_buffer is None
781 existing_handles = False
786 existing_handles = False
782 if self._writinghandles is not None:
787 if self._writinghandles is not None:
783 existing_handles = True
788 existing_handles = True
784 fp = self._writinghandles[0]
789 fp = self._writinghandles[0]
785 fp.flush()
790 fp.flush()
786 fp.close()
791 fp.close()
787 # We can't use the cached file handle after close(). So prevent
792 # We can't use the cached file handle after close(). So prevent
788 # its usage.
793 # its usage.
789 self._writinghandles = None
794 self._writinghandles = None
790 self._segmentfile.writing_handle = None
795 self._segmentfile.writing_handle = None
791 # No need to deal with sidedata writing handle as it is only
796 # No need to deal with sidedata writing handle as it is only
792 # relevant with revlog-v2 which is never inline, not reaching
797 # relevant with revlog-v2 which is never inline, not reaching
793 # this code
798 # this code
794
799
795 new_dfh = self.opener(self.data_file, mode=b"w+")
800 new_dfh = self.opener(self.data_file, mode=b"w+")
796 new_dfh.truncate(0) # drop any potentially existing data
801 new_dfh.truncate(0) # drop any potentially existing data
797 try:
802 try:
798 with self.reading():
803 with self.reading():
799 for r in range(len(self.index)):
804 for r in range(len(self.index)):
800 new_dfh.write(self.get_segment_for_revs(r, r)[1])
805 new_dfh.write(self.get_segment_for_revs(r, r)[1])
801 new_dfh.flush()
806 new_dfh.flush()
802
807
803 if new_index_file_path is not None:
808 if new_index_file_path is not None:
804 self.index_file = new_index_file_path
809 self.index_file = new_index_file_path
805 with self.__index_new_fp() as fp:
810 with self.__index_new_fp() as fp:
806 self.inline = False
811 self.inline = False
807 for i in range(len(self.index)):
812 for i in range(len(self.index)):
808 e = self.index.entry_binary(i)
813 e = self.index.entry_binary(i)
809 if i == 0:
814 if i == 0:
810 packed_header = self.index.pack_header(header)
815 packed_header = self.index.pack_header(header)
811 e = packed_header + e
816 e = packed_header + e
812 fp.write(e)
817 fp.write(e)
813
818
814 # If we don't use side-write, the temp file replace the real
819 # If we don't use side-write, the temp file replace the real
815 # index when we exit the context manager
820 # index when we exit the context manager
816
821
817 self._segmentfile = randomaccessfile.randomaccessfile(
822 self._segmentfile = randomaccessfile.randomaccessfile(
818 self.opener,
823 self.opener,
819 self.data_file,
824 self.data_file,
820 self.data_config.chunk_cache_size,
825 self.data_config.chunk_cache_size,
821 )
826 )
822
827
823 if existing_handles:
828 if existing_handles:
824 # switched from inline to conventional reopen the index
829 # switched from inline to conventional reopen the index
825 ifh = self.__index_write_fp()
830 ifh = self.__index_write_fp()
826 self._writinghandles = (ifh, new_dfh, None)
831 self._writinghandles = (ifh, new_dfh, None)
827 self._segmentfile.writing_handle = new_dfh
832 self._segmentfile.writing_handle = new_dfh
828 new_dfh = None
833 new_dfh = None
829 # No need to deal with sidedata writing handle as it is only
834 # No need to deal with sidedata writing handle as it is only
830 # relevant with revlog-v2 which is never inline, not reaching
835 # relevant with revlog-v2 which is never inline, not reaching
831 # this code
836 # this code
832 finally:
837 finally:
833 if new_dfh is not None:
838 if new_dfh is not None:
834 new_dfh.close()
839 new_dfh.close()
835 return self.index_file
840 return self.index_file
836
841
837 def get_segment_for_revs(self, startrev, endrev):
842 def get_segment_for_revs(self, startrev, endrev):
838 """Obtain a segment of raw data corresponding to a range of revisions.
843 """Obtain a segment of raw data corresponding to a range of revisions.
839
844
840 Accepts the start and end revisions and an optional already-open
845 Accepts the start and end revisions and an optional already-open
841 file handle to be used for reading. If the file handle is read, its
846 file handle to be used for reading. If the file handle is read, its
842 seek position will not be preserved.
847 seek position will not be preserved.
843
848
844 Requests for data may be satisfied by a cache.
849 Requests for data may be satisfied by a cache.
845
850
846 Returns a 2-tuple of (offset, data) for the requested range of
851 Returns a 2-tuple of (offset, data) for the requested range of
847 revisions. Offset is the integer offset from the beginning of the
852 revisions. Offset is the integer offset from the beginning of the
848 revlog and data is a str or buffer of the raw byte data.
853 revlog and data is a str or buffer of the raw byte data.
849
854
850 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
855 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
851 to determine where each revision's data begins and ends.
856 to determine where each revision's data begins and ends.
852
857
853 API: we should consider making this a private part of the InnerRevlog
858 API: we should consider making this a private part of the InnerRevlog
854 at some point.
859 at some point.
855 """
860 """
856 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
861 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
857 # (functions are expensive).
862 # (functions are expensive).
858 index = self.index
863 index = self.index
859 istart = index[startrev]
864 istart = index[startrev]
860 start = int(istart[0] >> 16)
865 start = int(istart[0] >> 16)
861 if startrev == endrev:
866 if startrev == endrev:
862 end = start + istart[1]
867 end = start + istart[1]
863 else:
868 else:
864 iend = index[endrev]
869 iend = index[endrev]
865 end = int(iend[0] >> 16) + iend[1]
870 end = int(iend[0] >> 16) + iend[1]
866
871
867 if self.inline:
872 if self.inline:
868 start += (startrev + 1) * self.index.entry_size
873 start += (startrev + 1) * self.index.entry_size
869 end += (endrev + 1) * self.index.entry_size
874 end += (endrev + 1) * self.index.entry_size
870 length = end - start
875 length = end - start
871
876
872 return start, self._segmentfile.read_chunk(start, length)
877 return start, self._segmentfile.read_chunk(start, length)
873
878
874 def _chunk(self, rev):
879 def _chunk(self, rev):
875 """Obtain a single decompressed chunk for a revision.
880 """Obtain a single decompressed chunk for a revision.
876
881
877 Accepts an integer revision and an optional already-open file handle
882 Accepts an integer revision and an optional already-open file handle
878 to be used for reading. If used, the seek position of the file will not
883 to be used for reading. If used, the seek position of the file will not
879 be preserved.
884 be preserved.
880
885
881 Returns a str holding uncompressed data for the requested revision.
886 Returns a str holding uncompressed data for the requested revision.
882 """
887 """
883 if self._uncompressed_chunk_cache is not None:
888 if self._uncompressed_chunk_cache is not None:
884 uncomp = self._uncompressed_chunk_cache.get(rev)
889 uncomp = self._uncompressed_chunk_cache.get(rev)
885 if uncomp is not None:
890 if uncomp is not None:
886 return uncomp
891 return uncomp
887
892
888 compression_mode = self.index[rev][10]
893 compression_mode = self.index[rev][10]
889 data = self.get_segment_for_revs(rev, rev)[1]
894 data = self.get_segment_for_revs(rev, rev)[1]
890 if compression_mode == COMP_MODE_PLAIN:
895 if compression_mode == COMP_MODE_PLAIN:
891 uncomp = data
896 uncomp = data
892 elif compression_mode == COMP_MODE_DEFAULT:
897 elif compression_mode == COMP_MODE_DEFAULT:
893 uncomp = self._decompressor(data)
898 uncomp = self._decompressor(data)
894 elif compression_mode == COMP_MODE_INLINE:
899 elif compression_mode == COMP_MODE_INLINE:
895 uncomp = self.decompress(data)
900 uncomp = self.decompress(data)
896 else:
901 else:
897 msg = b'unknown compression mode %d'
902 msg = b'unknown compression mode %d'
898 msg %= compression_mode
903 msg %= compression_mode
899 raise error.RevlogError(msg)
904 raise error.RevlogError(msg)
900 if self._uncompressed_chunk_cache is not None:
905 if self._uncompressed_chunk_cache is not None:
901 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
906 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
902 return uncomp
907 return uncomp
903
908
904 def _chunks(self, revs, targetsize=None):
909 def _chunks(self, revs, targetsize=None):
905 """Obtain decompressed chunks for the specified revisions.
910 """Obtain decompressed chunks for the specified revisions.
906
911
907 Accepts an iterable of numeric revisions that are assumed to be in
912 Accepts an iterable of numeric revisions that are assumed to be in
908 ascending order. Also accepts an optional already-open file handle
913 ascending order. Also accepts an optional already-open file handle
909 to be used for reading. If used, the seek position of the file will
914 to be used for reading. If used, the seek position of the file will
910 not be preserved.
915 not be preserved.
911
916
912 This function is similar to calling ``self._chunk()`` multiple times,
917 This function is similar to calling ``self._chunk()`` multiple times,
913 but is faster.
918 but is faster.
914
919
915 Returns a list with decompressed data for each requested revision.
920 Returns a list with decompressed data for each requested revision.
916 """
921 """
917 if not revs:
922 if not revs:
918 return []
923 return []
919 start = self.start
924 start = self.start
920 length = self.length
925 length = self.length
921 inline = self.inline
926 inline = self.inline
922 iosize = self.index.entry_size
927 iosize = self.index.entry_size
923 buffer = util.buffer
928 buffer = util.buffer
924
929
925 fetched_revs = []
930 fetched_revs = []
926 fadd = fetched_revs.append
931 fadd = fetched_revs.append
927
932
928 chunks = []
933 chunks = []
929 ladd = chunks.append
934 ladd = chunks.append
930
935
931 if self._uncompressed_chunk_cache is None:
936 if self._uncompressed_chunk_cache is None:
932 fetched_revs = revs
937 fetched_revs = revs
933 else:
938 else:
934 for rev in revs:
939 for rev in revs:
935 cached_value = self._uncompressed_chunk_cache.get(rev)
940 cached_value = self._uncompressed_chunk_cache.get(rev)
936 if cached_value is None:
941 if cached_value is None:
937 fadd(rev)
942 fadd(rev)
938 else:
943 else:
939 ladd((rev, cached_value))
944 ladd((rev, cached_value))
940
945
941 if not fetched_revs:
946 if not fetched_revs:
942 slicedchunks = ()
947 slicedchunks = ()
943 elif not self.data_config.with_sparse_read:
948 elif not self.data_config.with_sparse_read:
944 slicedchunks = (fetched_revs,)
949 slicedchunks = (fetched_revs,)
945 else:
950 else:
946 slicedchunks = deltautil.slicechunk(
951 slicedchunks = deltautil.slicechunk(
947 self,
952 self,
948 fetched_revs,
953 fetched_revs,
949 targetsize=targetsize,
954 targetsize=targetsize,
950 )
955 )
951
956
952 for revschunk in slicedchunks:
957 for revschunk in slicedchunks:
953 firstrev = revschunk[0]
958 firstrev = revschunk[0]
954 # Skip trailing revisions with empty diff
959 # Skip trailing revisions with empty diff
955 for lastrev in revschunk[::-1]:
960 for lastrev in revschunk[::-1]:
956 if length(lastrev) != 0:
961 if length(lastrev) != 0:
957 break
962 break
958
963
959 try:
964 try:
960 offset, data = self.get_segment_for_revs(firstrev, lastrev)
965 offset, data = self.get_segment_for_revs(firstrev, lastrev)
961 except OverflowError:
966 except OverflowError:
962 # issue4215 - we can't cache a run of chunks greater than
967 # issue4215 - we can't cache a run of chunks greater than
963 # 2G on Windows
968 # 2G on Windows
964 for rev in revschunk:
969 for rev in revschunk:
965 ladd((rev, self._chunk(rev)))
970 ladd((rev, self._chunk(rev)))
966
971
967 decomp = self.decompress
972 decomp = self.decompress
968 # self._decompressor might be None, but will not be used in that case
973 # self._decompressor might be None, but will not be used in that case
969 def_decomp = self._decompressor
974 def_decomp = self._decompressor
970 for rev in revschunk:
975 for rev in revschunk:
971 chunkstart = start(rev)
976 chunkstart = start(rev)
972 if inline:
977 if inline:
973 chunkstart += (rev + 1) * iosize
978 chunkstart += (rev + 1) * iosize
974 chunklength = length(rev)
979 chunklength = length(rev)
975 comp_mode = self.index[rev][10]
980 comp_mode = self.index[rev][10]
976 c = buffer(data, chunkstart - offset, chunklength)
981 c = buffer(data, chunkstart - offset, chunklength)
977 if comp_mode == COMP_MODE_PLAIN:
982 if comp_mode == COMP_MODE_PLAIN:
978 c = c
983 c = c
979 elif comp_mode == COMP_MODE_INLINE:
984 elif comp_mode == COMP_MODE_INLINE:
980 c = decomp(c)
985 c = decomp(c)
981 elif comp_mode == COMP_MODE_DEFAULT:
986 elif comp_mode == COMP_MODE_DEFAULT:
982 c = def_decomp(c)
987 c = def_decomp(c)
983 else:
988 else:
984 msg = b'unknown compression mode %d'
989 msg = b'unknown compression mode %d'
985 msg %= comp_mode
990 msg %= comp_mode
986 raise error.RevlogError(msg)
991 raise error.RevlogError(msg)
987 ladd((rev, c))
992 ladd((rev, c))
988 if self._uncompressed_chunk_cache is not None:
993 if self._uncompressed_chunk_cache is not None:
989 self._uncompressed_chunk_cache.insert(rev, c, len(c))
994 self._uncompressed_chunk_cache.insert(rev, c, len(c))
990
995
991 chunks.sort()
996 chunks.sort()
992 return [x[1] for x in chunks]
997 return [x[1] for x in chunks]
993
998
994 def raw_text(self, node, rev):
999 def raw_text(self, node, rev):
995 """return the possibly unvalidated rawtext for a revision
1000 """return the possibly unvalidated rawtext for a revision
996
1001
997 returns (rev, rawtext, validated)
1002 returns (rev, rawtext, validated)
998 """
1003 """
999
1004
1000 # revision in the cache (could be useful to apply delta)
1005 # revision in the cache (could be useful to apply delta)
1001 cachedrev = None
1006 cachedrev = None
1002 # An intermediate text to apply deltas to
1007 # An intermediate text to apply deltas to
1003 basetext = None
1008 basetext = None
1004
1009
1005 # Check if we have the entry in cache
1010 # Check if we have the entry in cache
1006 # The cache entry looks like (node, rev, rawtext)
1011 # The cache entry looks like (node, rev, rawtext)
1007 if self._revisioncache:
1012 if self._revisioncache:
1008 cachedrev = self._revisioncache[1]
1013 cachedrev = self._revisioncache[1]
1009
1014
1010 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1015 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1011 if stopped:
1016 if stopped:
1012 basetext = self._revisioncache[2]
1017 basetext = self._revisioncache[2]
1013
1018
1014 # drop cache to save memory, the caller is expected to
1019 # drop cache to save memory, the caller is expected to
1015 # update self._inner._revisioncache after validating the text
1020 # update self._inner._revisioncache after validating the text
1016 self._revisioncache = None
1021 self._revisioncache = None
1017
1022
1018 targetsize = None
1023 targetsize = None
1019 rawsize = self.index[rev][2]
1024 rawsize = self.index[rev][2]
1020 if 0 <= rawsize:
1025 if 0 <= rawsize:
1021 targetsize = 4 * rawsize
1026 targetsize = 4 * rawsize
1022
1027
1023 if self._uncompressed_chunk_cache is not None:
1028 if self._uncompressed_chunk_cache is not None:
1024 # dynamically update the uncompressed_chunk_cache size to the
1029 # dynamically update the uncompressed_chunk_cache size to the
1025 # largest revision we saw in this revlog.
1030 # largest revision we saw in this revlog.
1026 factor = self.data_config.uncompressed_cache_factor
1031 factor = self.data_config.uncompressed_cache_factor
1027 candidate_size = rawsize * factor
1032 candidate_size = rawsize * factor
1028 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1033 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1029 self._uncompressed_chunk_cache.maxcost = candidate_size
1034 self._uncompressed_chunk_cache.maxcost = candidate_size
1030
1035
1031 bins = self._chunks(chain, targetsize=targetsize)
1036 bins = self._chunks(chain, targetsize=targetsize)
1032 if basetext is None:
1037 if basetext is None:
1033 basetext = bytes(bins[0])
1038 basetext = bytes(bins[0])
1034 bins = bins[1:]
1039 bins = bins[1:]
1035
1040
1036 rawtext = mdiff.patches(basetext, bins)
1041 rawtext = mdiff.patches(basetext, bins)
1037 del basetext # let us have a chance to free memory early
1042 del basetext # let us have a chance to free memory early
1038 return (rev, rawtext, False)
1043 return (rev, rawtext, False)
1039
1044
1040 def sidedata(self, rev, sidedata_end):
1045 def sidedata(self, rev, sidedata_end):
1041 """Return the sidedata for a given revision number."""
1046 """Return the sidedata for a given revision number."""
1042 index_entry = self.index[rev]
1047 index_entry = self.index[rev]
1043 sidedata_offset = index_entry[8]
1048 sidedata_offset = index_entry[8]
1044 sidedata_size = index_entry[9]
1049 sidedata_size = index_entry[9]
1045
1050
1046 if self.inline:
1051 if self.inline:
1047 sidedata_offset += self.index.entry_size * (1 + rev)
1052 sidedata_offset += self.index.entry_size * (1 + rev)
1048 if sidedata_size == 0:
1053 if sidedata_size == 0:
1049 return {}
1054 return {}
1050
1055
1051 if sidedata_end < sidedata_offset + sidedata_size:
1056 if sidedata_end < sidedata_offset + sidedata_size:
1052 filename = self.sidedata_file
1057 filename = self.sidedata_file
1053 end = sidedata_end
1058 end = sidedata_end
1054 offset = sidedata_offset
1059 offset = sidedata_offset
1055 length = sidedata_size
1060 length = sidedata_size
1056 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1061 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1057 raise error.RevlogError(m)
1062 raise error.RevlogError(m)
1058
1063
1059 comp_segment = self._segmentfile_sidedata.read_chunk(
1064 comp_segment = self._segmentfile_sidedata.read_chunk(
1060 sidedata_offset, sidedata_size
1065 sidedata_offset, sidedata_size
1061 )
1066 )
1062
1067
1063 comp = self.index[rev][11]
1068 comp = self.index[rev][11]
1064 if comp == COMP_MODE_PLAIN:
1069 if comp == COMP_MODE_PLAIN:
1065 segment = comp_segment
1070 segment = comp_segment
1066 elif comp == COMP_MODE_DEFAULT:
1071 elif comp == COMP_MODE_DEFAULT:
1067 segment = self._decompressor(comp_segment)
1072 segment = self._decompressor(comp_segment)
1068 elif comp == COMP_MODE_INLINE:
1073 elif comp == COMP_MODE_INLINE:
1069 segment = self.decompress(comp_segment)
1074 segment = self.decompress(comp_segment)
1070 else:
1075 else:
1071 msg = b'unknown compression mode %d'
1076 msg = b'unknown compression mode %d'
1072 msg %= comp
1077 msg %= comp
1073 raise error.RevlogError(msg)
1078 raise error.RevlogError(msg)
1074
1079
1075 sidedata = sidedatautil.deserialize_sidedata(segment)
1080 sidedata = sidedatautil.deserialize_sidedata(segment)
1076 return sidedata
1081 return sidedata
1077
1082
1078 def write_entry(
1083 def write_entry(
1079 self,
1084 self,
1080 transaction,
1085 transaction,
1081 entry,
1086 entry,
1082 data,
1087 data,
1083 link,
1088 link,
1084 offset,
1089 offset,
1085 sidedata,
1090 sidedata,
1086 sidedata_offset,
1091 sidedata_offset,
1087 index_end,
1092 index_end,
1088 data_end,
1093 data_end,
1089 sidedata_end,
1094 sidedata_end,
1090 ):
1095 ):
1091 # Files opened in a+ mode have inconsistent behavior on various
1096 # Files opened in a+ mode have inconsistent behavior on various
1092 # platforms. Windows requires that a file positioning call be made
1097 # platforms. Windows requires that a file positioning call be made
1093 # when the file handle transitions between reads and writes. See
1098 # when the file handle transitions between reads and writes. See
1094 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1099 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1095 # platforms, Python or the platform itself can be buggy. Some versions
1100 # platforms, Python or the platform itself can be buggy. Some versions
1096 # of Solaris have been observed to not append at the end of the file
1101 # of Solaris have been observed to not append at the end of the file
1097 # if the file was seeked to before the end. See issue4943 for more.
1102 # if the file was seeked to before the end. See issue4943 for more.
1098 #
1103 #
1099 # We work around this issue by inserting a seek() before writing.
1104 # We work around this issue by inserting a seek() before writing.
1100 # Note: This is likely not necessary on Python 3. However, because
1105 # Note: This is likely not necessary on Python 3. However, because
1101 # the file handle is reused for reads and may be seeked there, we need
1106 # the file handle is reused for reads and may be seeked there, we need
1102 # to be careful before changing this.
1107 # to be careful before changing this.
1103 if self._writinghandles is None:
1108 if self._writinghandles is None:
1104 msg = b'adding revision outside `revlog._writing` context'
1109 msg = b'adding revision outside `revlog._writing` context'
1105 raise error.ProgrammingError(msg)
1110 raise error.ProgrammingError(msg)
1106 ifh, dfh, sdfh = self._writinghandles
1111 ifh, dfh, sdfh = self._writinghandles
1107 if index_end is None:
1112 if index_end is None:
1108 ifh.seek(0, os.SEEK_END)
1113 ifh.seek(0, os.SEEK_END)
1109 else:
1114 else:
1110 ifh.seek(index_end, os.SEEK_SET)
1115 ifh.seek(index_end, os.SEEK_SET)
1111 if dfh:
1116 if dfh:
1112 if data_end is None:
1117 if data_end is None:
1113 dfh.seek(0, os.SEEK_END)
1118 dfh.seek(0, os.SEEK_END)
1114 else:
1119 else:
1115 dfh.seek(data_end, os.SEEK_SET)
1120 dfh.seek(data_end, os.SEEK_SET)
1116 if sdfh:
1121 if sdfh:
1117 sdfh.seek(sidedata_end, os.SEEK_SET)
1122 sdfh.seek(sidedata_end, os.SEEK_SET)
1118
1123
1119 curr = len(self.index) - 1
1124 curr = len(self.index) - 1
1120 if not self.inline:
1125 if not self.inline:
1121 transaction.add(self.data_file, offset)
1126 transaction.add(self.data_file, offset)
1122 if self.sidedata_file:
1127 if self.sidedata_file:
1123 transaction.add(self.sidedata_file, sidedata_offset)
1128 transaction.add(self.sidedata_file, sidedata_offset)
1124 transaction.add(self.canonical_index_file, curr * len(entry))
1129 transaction.add(self.canonical_index_file, curr * len(entry))
1125 if data[0]:
1130 if data[0]:
1126 dfh.write(data[0])
1131 dfh.write(data[0])
1127 dfh.write(data[1])
1132 dfh.write(data[1])
1128 if sidedata:
1133 if sidedata:
1129 sdfh.write(sidedata)
1134 sdfh.write(sidedata)
1130 if self._delay_buffer is None:
1135 if self._delay_buffer is None:
1131 ifh.write(entry)
1136 ifh.write(entry)
1132 else:
1137 else:
1133 self._delay_buffer.append(entry)
1138 self._delay_buffer.append(entry)
1134 elif self._delay_buffer is not None:
1139 elif self._delay_buffer is not None:
1135 msg = b'invalid delayed write on inline revlog'
1140 msg = b'invalid delayed write on inline revlog'
1136 raise error.ProgrammingError(msg)
1141 raise error.ProgrammingError(msg)
1137 else:
1142 else:
1138 offset += curr * self.index.entry_size
1143 offset += curr * self.index.entry_size
1139 transaction.add(self.canonical_index_file, offset)
1144 transaction.add(self.canonical_index_file, offset)
1140 assert not sidedata
1145 assert not sidedata
1141 ifh.write(entry)
1146 ifh.write(entry)
1142 ifh.write(data[0])
1147 ifh.write(data[0])
1143 ifh.write(data[1])
1148 ifh.write(data[1])
1144 return (
1149 return (
1145 ifh.tell(),
1150 ifh.tell(),
1146 dfh.tell() if dfh else None,
1151 dfh.tell() if dfh else None,
1147 sdfh.tell() if sdfh else None,
1152 sdfh.tell() if sdfh else None,
1148 )
1153 )
1149
1154
1150 def _divert_index(self):
1155 def _divert_index(self):
1151 index_file = self.index_file
1156 index_file = self.index_file
1152 # when we encounter a legacy inline-changelog, split it. However it is
1157 # when we encounter a legacy inline-changelog, split it. However it is
1153 # important to use the expected filename for pending content
1158 # important to use the expected filename for pending content
1154 # (<radix>.a) otherwise hooks won't be seeing the content of the
1159 # (<radix>.a) otherwise hooks won't be seeing the content of the
1155 # pending transaction.
1160 # pending transaction.
1156 if index_file.endswith(b'.s'):
1161 if index_file.endswith(b'.s'):
1157 index_file = self.index_file[:-2]
1162 index_file = self.index_file[:-2]
1158 return index_file + b'.a'
1163 return index_file + b'.a'
1159
1164
1160 def delay(self):
1165 def delay(self):
1161 assert not self.is_open
1166 assert not self.is_open
1162 if self.inline:
1167 if self.inline:
1163 msg = "revlog with delayed write should not be inline"
1168 msg = "revlog with delayed write should not be inline"
1164 raise error.ProgrammingError(msg)
1169 raise error.ProgrammingError(msg)
1165 if self._delay_buffer is not None or self._orig_index_file is not None:
1170 if self._delay_buffer is not None or self._orig_index_file is not None:
1166 # delay or divert already in place
1171 # delay or divert already in place
1167 return None
1172 return None
1168 elif len(self.index) == 0:
1173 elif len(self.index) == 0:
1169 self._orig_index_file = self.index_file
1174 self._orig_index_file = self.index_file
1170 self.index_file = self._divert_index()
1175 self.index_file = self._divert_index()
1171 assert self._orig_index_file is not None
1176 assert self._orig_index_file is not None
1172 assert self.index_file is not None
1177 assert self.index_file is not None
1173 if self.opener.exists(self.index_file):
1178 if self.opener.exists(self.index_file):
1174 self.opener.unlink(self.index_file)
1179 self.opener.unlink(self.index_file)
1175 return self.index_file
1180 return self.index_file
1176 else:
1181 else:
1177 self._delay_buffer = []
1182 self._delay_buffer = []
1178 return None
1183 return None
1179
1184
1180 def write_pending(self):
1185 def write_pending(self):
1181 assert not self.is_open
1186 assert not self.is_open
1182 if self.inline:
1187 if self.inline:
1183 msg = "revlog with delayed write should not be inline"
1188 msg = "revlog with delayed write should not be inline"
1184 raise error.ProgrammingError(msg)
1189 raise error.ProgrammingError(msg)
1185 if self._orig_index_file is not None:
1190 if self._orig_index_file is not None:
1186 return None, True
1191 return None, True
1187 any_pending = False
1192 any_pending = False
1188 pending_index_file = self._divert_index()
1193 pending_index_file = self._divert_index()
1189 if self.opener.exists(pending_index_file):
1194 if self.opener.exists(pending_index_file):
1190 self.opener.unlink(pending_index_file)
1195 self.opener.unlink(pending_index_file)
1191 util.copyfile(
1196 util.copyfile(
1192 self.opener.join(self.index_file),
1197 self.opener.join(self.index_file),
1193 self.opener.join(pending_index_file),
1198 self.opener.join(pending_index_file),
1194 )
1199 )
1195 if self._delay_buffer:
1200 if self._delay_buffer:
1196 with self.opener(pending_index_file, b'r+') as ifh:
1201 with self.opener(pending_index_file, b'r+') as ifh:
1197 ifh.seek(0, os.SEEK_END)
1202 ifh.seek(0, os.SEEK_END)
1198 ifh.write(b"".join(self._delay_buffer))
1203 ifh.write(b"".join(self._delay_buffer))
1199 any_pending = True
1204 any_pending = True
1200 self._delay_buffer = None
1205 self._delay_buffer = None
1201 self._orig_index_file = self.index_file
1206 self._orig_index_file = self.index_file
1202 self.index_file = pending_index_file
1207 self.index_file = pending_index_file
1203 return self.index_file, any_pending
1208 return self.index_file, any_pending
1204
1209
1205 def finalize_pending(self):
1210 def finalize_pending(self):
1206 assert not self.is_open
1211 assert not self.is_open
1207 if self.inline:
1212 if self.inline:
1208 msg = "revlog with delayed write should not be inline"
1213 msg = "revlog with delayed write should not be inline"
1209 raise error.ProgrammingError(msg)
1214 raise error.ProgrammingError(msg)
1210
1215
1211 delay = self._delay_buffer is not None
1216 delay = self._delay_buffer is not None
1212 divert = self._orig_index_file is not None
1217 divert = self._orig_index_file is not None
1213
1218
1214 if delay and divert:
1219 if delay and divert:
1215 assert False, "unreachable"
1220 assert False, "unreachable"
1216 elif delay:
1221 elif delay:
1217 if self._delay_buffer:
1222 if self._delay_buffer:
1218 with self.opener(self.index_file, b'r+') as ifh:
1223 with self.opener(self.index_file, b'r+') as ifh:
1219 ifh.seek(0, os.SEEK_END)
1224 ifh.seek(0, os.SEEK_END)
1220 ifh.write(b"".join(self._delay_buffer))
1225 ifh.write(b"".join(self._delay_buffer))
1221 self._delay_buffer = None
1226 self._delay_buffer = None
1222 elif divert:
1227 elif divert:
1223 if self.opener.exists(self.index_file):
1228 if self.opener.exists(self.index_file):
1224 self.opener.rename(
1229 self.opener.rename(
1225 self.index_file,
1230 self.index_file,
1226 self._orig_index_file,
1231 self._orig_index_file,
1227 checkambig=True,
1232 checkambig=True,
1228 )
1233 )
1229 self.index_file = self._orig_index_file
1234 self.index_file = self._orig_index_file
1230 self._orig_index_file = None
1235 self._orig_index_file = None
1231 else:
1236 else:
1232 msg = b"not delay or divert found on this revlog"
1237 msg = b"not delay or divert found on this revlog"
1233 raise error.ProgrammingError(msg)
1238 raise error.ProgrammingError(msg)
1234 return self.canonical_index_file
1239 return self.canonical_index_file
1235
1240
1236
1241
1237 class revlog:
1242 class revlog:
1238 """
1243 """
1239 the underlying revision storage object
1244 the underlying revision storage object
1240
1245
1241 A revlog consists of two parts, an index and the revision data.
1246 A revlog consists of two parts, an index and the revision data.
1242
1247
1243 The index is a file with a fixed record size containing
1248 The index is a file with a fixed record size containing
1244 information on each revision, including its nodeid (hash), the
1249 information on each revision, including its nodeid (hash), the
1245 nodeids of its parents, the position and offset of its data within
1250 nodeids of its parents, the position and offset of its data within
1246 the data file, and the revision it's based on. Finally, each entry
1251 the data file, and the revision it's based on. Finally, each entry
1247 contains a linkrev entry that can serve as a pointer to external
1252 contains a linkrev entry that can serve as a pointer to external
1248 data.
1253 data.
1249
1254
1250 The revision data itself is a linear collection of data chunks.
1255 The revision data itself is a linear collection of data chunks.
1251 Each chunk represents a revision and is usually represented as a
1256 Each chunk represents a revision and is usually represented as a
1252 delta against the previous chunk. To bound lookup time, runs of
1257 delta against the previous chunk. To bound lookup time, runs of
1253 deltas are limited to about 2 times the length of the original
1258 deltas are limited to about 2 times the length of the original
1254 version data. This makes retrieval of a version proportional to
1259 version data. This makes retrieval of a version proportional to
1255 its size, or O(1) relative to the number of revisions.
1260 its size, or O(1) relative to the number of revisions.
1256
1261
1257 Both pieces of the revlog are written to in an append-only
1262 Both pieces of the revlog are written to in an append-only
1258 fashion, which means we never need to rewrite a file to insert or
1263 fashion, which means we never need to rewrite a file to insert or
1259 remove data, and can use some simple techniques to avoid the need
1264 remove data, and can use some simple techniques to avoid the need
1260 for locking while reading.
1265 for locking while reading.
1261
1266
1262 If checkambig, indexfile is opened with checkambig=True at
1267 If checkambig, indexfile is opened with checkambig=True at
1263 writing, to avoid file stat ambiguity.
1268 writing, to avoid file stat ambiguity.
1264
1269
1265 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1270 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1266 index will be mmapped rather than read if it is larger than the
1271 index will be mmapped rather than read if it is larger than the
1267 configured threshold.
1272 configured threshold.
1268
1273
1269 If censorable is True, the revlog can have censored revisions.
1274 If censorable is True, the revlog can have censored revisions.
1270
1275
1271 If `upperboundcomp` is not None, this is the expected maximal gain from
1276 If `upperboundcomp` is not None, this is the expected maximal gain from
1272 compression for the data content.
1277 compression for the data content.
1273
1278
1274 `concurrencychecker` is an optional function that receives 3 arguments: a
1279 `concurrencychecker` is an optional function that receives 3 arguments: a
1275 file handle, a filename, and an expected position. It should check whether
1280 file handle, a filename, and an expected position. It should check whether
1276 the current position in the file handle is valid, and log/warn/fail (by
1281 the current position in the file handle is valid, and log/warn/fail (by
1277 raising).
1282 raising).
1278
1283
1279 See mercurial/revlogutils/contants.py for details about the content of an
1284 See mercurial/revlogutils/contants.py for details about the content of an
1280 index entry.
1285 index entry.
1281 """
1286 """
1282
1287
1283 _flagserrorclass = error.RevlogError
1288 _flagserrorclass = error.RevlogError
1284
1289
1285 @staticmethod
1290 @staticmethod
1286 def is_inline_index(header_bytes):
1291 def is_inline_index(header_bytes):
1287 """Determine if a revlog is inline from the initial bytes of the index"""
1292 """Determine if a revlog is inline from the initial bytes of the index"""
1288 if len(header_bytes) == 0:
1293 if len(header_bytes) == 0:
1289 return True
1294 return True
1290
1295
1291 header = INDEX_HEADER.unpack(header_bytes)[0]
1296 header = INDEX_HEADER.unpack(header_bytes)[0]
1292
1297
1293 _format_flags = header & ~0xFFFF
1298 _format_flags = header & ~0xFFFF
1294 _format_version = header & 0xFFFF
1299 _format_version = header & 0xFFFF
1295
1300
1296 features = FEATURES_BY_VERSION[_format_version]
1301 features = FEATURES_BY_VERSION[_format_version]
1297 return features[b'inline'](_format_flags)
1302 return features[b'inline'](_format_flags)
1298
1303
1304 _docket_file: Optional[bytes]
1305
1299 def __init__(
1306 def __init__(
1300 self,
1307 self,
1301 opener,
1308 opener,
1302 target,
1309 target,
1303 radix,
1310 radix,
1304 postfix=None, # only exist for `tmpcensored` now
1311 postfix=None, # only exist for `tmpcensored` now
1305 checkambig=False,
1312 checkambig=False,
1306 mmaplargeindex=False,
1313 mmaplargeindex=False,
1307 censorable=False,
1314 censorable=False,
1308 upperboundcomp=None,
1315 upperboundcomp=None,
1309 persistentnodemap=False,
1316 persistentnodemap=False,
1310 concurrencychecker=None,
1317 concurrencychecker=None,
1311 trypending=False,
1318 trypending=False,
1312 try_split=False,
1319 try_split=False,
1313 canonical_parent_order=True,
1320 canonical_parent_order=True,
1314 data_config=None,
1321 data_config=None,
1315 delta_config=None,
1322 delta_config=None,
1316 feature_config=None,
1323 feature_config=None,
1317 may_inline=True, # may inline new revlog
1324 may_inline=True, # may inline new revlog
1318 ):
1325 ):
1319 """
1326 """
1320 create a revlog object
1327 create a revlog object
1321
1328
1322 opener is a function that abstracts the file opening operation
1329 opener is a function that abstracts the file opening operation
1323 and can be used to implement COW semantics or the like.
1330 and can be used to implement COW semantics or the like.
1324
1331
1325 `target`: a (KIND, ID) tuple that identify the content stored in
1332 `target`: a (KIND, ID) tuple that identify the content stored in
1326 this revlog. It help the rest of the code to understand what the revlog
1333 this revlog. It help the rest of the code to understand what the revlog
1327 is about without having to resort to heuristic and index filename
1334 is about without having to resort to heuristic and index filename
1328 analysis. Note: that this must be reliably be set by normal code, but
1335 analysis. Note: that this must be reliably be set by normal code, but
1329 that test, debug, or performance measurement code might not set this to
1336 that test, debug, or performance measurement code might not set this to
1330 accurate value.
1337 accurate value.
1331 """
1338 """
1332
1339
1333 self.radix = radix
1340 self.radix = radix
1334
1341
1335 self._docket_file = None
1342 self._docket_file = None
1336 self._indexfile = None
1343 self._indexfile = None
1337 self._datafile = None
1344 self._datafile = None
1338 self._sidedatafile = None
1345 self._sidedatafile = None
1339 self._nodemap_file = None
1346 self._nodemap_file = None
1340 self.postfix = postfix
1347 self.postfix = postfix
1341 self._trypending = trypending
1348 self._trypending = trypending
1342 self._try_split = try_split
1349 self._try_split = try_split
1343 self._may_inline = may_inline
1350 self._may_inline = may_inline
1344 self.opener = opener
1351 self.opener = opener
1345 if persistentnodemap:
1352 if persistentnodemap:
1346 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1353 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1347
1354
1348 assert target[0] in ALL_KINDS
1355 assert target[0] in ALL_KINDS
1349 assert len(target) == 2
1356 assert len(target) == 2
1350 self.target = target
1357 self.target = target
1351 if feature_config is not None:
1358 if feature_config is not None:
1352 self.feature_config = feature_config.copy()
1359 self.feature_config = feature_config.copy()
1353 elif b'feature-config' in self.opener.options:
1360 elif b'feature-config' in self.opener.options:
1354 self.feature_config = self.opener.options[b'feature-config'].copy()
1361 self.feature_config = self.opener.options[b'feature-config'].copy()
1355 else:
1362 else:
1356 self.feature_config = FeatureConfig()
1363 self.feature_config = FeatureConfig()
1357 self.feature_config.censorable = censorable
1364 self.feature_config.censorable = censorable
1358 self.feature_config.canonical_parent_order = canonical_parent_order
1365 self.feature_config.canonical_parent_order = canonical_parent_order
1359 if data_config is not None:
1366 if data_config is not None:
1360 self.data_config = data_config.copy()
1367 self.data_config = data_config.copy()
1361 elif b'data-config' in self.opener.options:
1368 elif b'data-config' in self.opener.options:
1362 self.data_config = self.opener.options[b'data-config'].copy()
1369 self.data_config = self.opener.options[b'data-config'].copy()
1363 else:
1370 else:
1364 self.data_config = DataConfig()
1371 self.data_config = DataConfig()
1365 self.data_config.check_ambig = checkambig
1372 self.data_config.check_ambig = checkambig
1366 self.data_config.mmap_large_index = mmaplargeindex
1373 self.data_config.mmap_large_index = mmaplargeindex
1367 if delta_config is not None:
1374 if delta_config is not None:
1368 self.delta_config = delta_config.copy()
1375 self.delta_config = delta_config.copy()
1369 elif b'delta-config' in self.opener.options:
1376 elif b'delta-config' in self.opener.options:
1370 self.delta_config = self.opener.options[b'delta-config'].copy()
1377 self.delta_config = self.opener.options[b'delta-config'].copy()
1371 else:
1378 else:
1372 self.delta_config = DeltaConfig()
1379 self.delta_config = DeltaConfig()
1373 self.delta_config.upper_bound_comp = upperboundcomp
1380 self.delta_config.upper_bound_comp = upperboundcomp
1374
1381
1375 # Maps rev to chain base rev.
1382 # Maps rev to chain base rev.
1376 self._chainbasecache = util.lrucachedict(100)
1383 self._chainbasecache = util.lrucachedict(100)
1377
1384
1378 self.index = None
1385 self.index = None
1379 self._docket = None
1386 self._docket = None
1380 self._nodemap_docket = None
1387 self._nodemap_docket = None
1381 # Mapping of partial identifiers to full nodes.
1388 # Mapping of partial identifiers to full nodes.
1382 self._pcache = {}
1389 self._pcache = {}
1383
1390
1384 # other optionnals features
1391 # other optionnals features
1385
1392
1386 # Make copy of flag processors so each revlog instance can support
1393 # Make copy of flag processors so each revlog instance can support
1387 # custom flags.
1394 # custom flags.
1388 self._flagprocessors = dict(flagutil.flagprocessors)
1395 self._flagprocessors = dict(flagutil.flagprocessors)
1389 # prevent nesting of addgroup
1396 # prevent nesting of addgroup
1390 self._adding_group = None
1397 self._adding_group = None
1391
1398
1392 chunk_cache = self._loadindex()
1399 chunk_cache = self._loadindex()
1393 self._load_inner(chunk_cache)
1400 self._load_inner(chunk_cache)
1394 self._concurrencychecker = concurrencychecker
1401 self._concurrencychecker = concurrencychecker
1395
1402
1396 def _init_opts(self):
1403 def _init_opts(self):
1397 """process options (from above/config) to setup associated default revlog mode
1404 """process options (from above/config) to setup associated default revlog mode
1398
1405
1399 These values might be affected when actually reading on disk information.
1406 These values might be affected when actually reading on disk information.
1400
1407
1401 The relevant values are returned for use in _loadindex().
1408 The relevant values are returned for use in _loadindex().
1402
1409
1403 * newversionflags:
1410 * newversionflags:
1404 version header to use if we need to create a new revlog
1411 version header to use if we need to create a new revlog
1405
1412
1406 * mmapindexthreshold:
1413 * mmapindexthreshold:
1407 minimal index size for start to use mmap
1414 minimal index size for start to use mmap
1408
1415
1409 * force_nodemap:
1416 * force_nodemap:
1410 force the usage of a "development" version of the nodemap code
1417 force the usage of a "development" version of the nodemap code
1411 """
1418 """
1412 opts = self.opener.options
1419 opts = self.opener.options
1413
1420
1414 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1421 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1415 new_header = CHANGELOGV2
1422 new_header = CHANGELOGV2
1416 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1423 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1417 self.feature_config.compute_rank = compute_rank
1424 self.feature_config.compute_rank = compute_rank
1418 elif b'revlogv2' in opts:
1425 elif b'revlogv2' in opts:
1419 new_header = REVLOGV2
1426 new_header = REVLOGV2
1420 elif b'revlogv1' in opts:
1427 elif b'revlogv1' in opts:
1421 new_header = REVLOGV1
1428 new_header = REVLOGV1
1422 if self._may_inline:
1429 if self._may_inline:
1423 new_header |= FLAG_INLINE_DATA
1430 new_header |= FLAG_INLINE_DATA
1424 if b'generaldelta' in opts:
1431 if b'generaldelta' in opts:
1425 new_header |= FLAG_GENERALDELTA
1432 new_header |= FLAG_GENERALDELTA
1426 elif b'revlogv0' in self.opener.options:
1433 elif b'revlogv0' in self.opener.options:
1427 new_header = REVLOGV0
1434 new_header = REVLOGV0
1428 else:
1435 else:
1429 new_header = REVLOG_DEFAULT_VERSION
1436 new_header = REVLOG_DEFAULT_VERSION
1430
1437
1431 mmapindexthreshold = None
1438 mmapindexthreshold = None
1432 if self.data_config.mmap_large_index:
1439 if self.data_config.mmap_large_index:
1433 mmapindexthreshold = self.data_config.mmap_index_threshold
1440 mmapindexthreshold = self.data_config.mmap_index_threshold
1434 if self.feature_config.enable_ellipsis:
1441 if self.feature_config.enable_ellipsis:
1435 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1442 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1436
1443
1437 # revlog v0 doesn't have flag processors
1444 # revlog v0 doesn't have flag processors
1438 for flag, processor in opts.get(b'flagprocessors', {}).items():
1445 for flag, processor in opts.get(b'flagprocessors', {}).items():
1439 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1446 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1440
1447
1441 chunk_cache_size = self.data_config.chunk_cache_size
1448 chunk_cache_size = self.data_config.chunk_cache_size
1442 if chunk_cache_size <= 0:
1449 if chunk_cache_size <= 0:
1443 raise error.RevlogError(
1450 raise error.RevlogError(
1444 _(b'revlog chunk cache size %r is not greater than 0')
1451 _(b'revlog chunk cache size %r is not greater than 0')
1445 % chunk_cache_size
1452 % chunk_cache_size
1446 )
1453 )
1447 elif chunk_cache_size & (chunk_cache_size - 1):
1454 elif chunk_cache_size & (chunk_cache_size - 1):
1448 raise error.RevlogError(
1455 raise error.RevlogError(
1449 _(b'revlog chunk cache size %r is not a power of 2')
1456 _(b'revlog chunk cache size %r is not a power of 2')
1450 % chunk_cache_size
1457 % chunk_cache_size
1451 )
1458 )
1452 force_nodemap = opts.get(b'devel-force-nodemap', False)
1459 force_nodemap = opts.get(b'devel-force-nodemap', False)
1453 return new_header, mmapindexthreshold, force_nodemap
1460 return new_header, mmapindexthreshold, force_nodemap
1454
1461
1455 def _get_data(self, filepath, mmap_threshold, size=None):
1462 def _get_data(self, filepath, mmap_threshold, size=None):
1456 """return a file content with or without mmap
1463 """return a file content with or without mmap
1457
1464
1458 If the file is missing return the empty string"""
1465 If the file is missing return the empty string"""
1459 try:
1466 try:
1460 with self.opener(filepath) as fp:
1467 with self.opener(filepath) as fp:
1461 if mmap_threshold is not None:
1468 if mmap_threshold is not None:
1462 file_size = self.opener.fstat(fp).st_size
1469 file_size = self.opener.fstat(fp).st_size
1463 if (
1470 if (
1464 file_size >= mmap_threshold
1471 file_size >= mmap_threshold
1465 and self.opener.is_mmap_safe(filepath)
1472 and self.opener.is_mmap_safe(filepath)
1466 ):
1473 ):
1467 if size is not None:
1474 if size is not None:
1468 # avoid potentiel mmap crash
1475 # avoid potentiel mmap crash
1469 size = min(file_size, size)
1476 size = min(file_size, size)
1470 # TODO: should .close() to release resources without
1477 # TODO: should .close() to release resources without
1471 # relying on Python GC
1478 # relying on Python GC
1472 if size is None:
1479 if size is None:
1473 return util.buffer(util.mmapread(fp))
1480 return util.buffer(util.mmapread(fp))
1474 else:
1481 else:
1475 return util.buffer(util.mmapread(fp, size))
1482 return util.buffer(util.mmapread(fp, size))
1476 if size is None:
1483 if size is None:
1477 return fp.read()
1484 return fp.read()
1478 else:
1485 else:
1479 return fp.read(size)
1486 return fp.read(size)
1480 except FileNotFoundError:
1487 except FileNotFoundError:
1481 return b''
1488 return b''
1482
1489
1483 def get_streams(self, max_linkrev, force_inline=False):
1490 def get_streams(self, max_linkrev, force_inline=False):
1484 """return a list of streams that represent this revlog
1491 """return a list of streams that represent this revlog
1485
1492
1486 This is used by stream-clone to do bytes to bytes copies of a repository.
1493 This is used by stream-clone to do bytes to bytes copies of a repository.
1487
1494
1488 This streams data for all revisions that refer to a changelog revision up
1495 This streams data for all revisions that refer to a changelog revision up
1489 to `max_linkrev`.
1496 to `max_linkrev`.
1490
1497
1491 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1498 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1492
1499
1493 It returns is a list of three-tuple:
1500 It returns is a list of three-tuple:
1494
1501
1495 [
1502 [
1496 (filename, bytes_stream, stream_size),
1503 (filename, bytes_stream, stream_size),
1497 …
1504 …
1498 ]
1505 ]
1499 """
1506 """
1500 n = len(self)
1507 n = len(self)
1501 index = self.index
1508 index = self.index
1502 while n > 0:
1509 while n > 0:
1503 linkrev = index[n - 1][4]
1510 linkrev = index[n - 1][4]
1504 if linkrev < max_linkrev:
1511 if linkrev < max_linkrev:
1505 break
1512 break
1506 # note: this loop will rarely go through multiple iterations, since
1513 # note: this loop will rarely go through multiple iterations, since
1507 # it only traverses commits created during the current streaming
1514 # it only traverses commits created during the current streaming
1508 # pull operation.
1515 # pull operation.
1509 #
1516 #
1510 # If this become a problem, using a binary search should cap the
1517 # If this become a problem, using a binary search should cap the
1511 # runtime of this.
1518 # runtime of this.
1512 n = n - 1
1519 n = n - 1
1513 if n == 0:
1520 if n == 0:
1514 # no data to send
1521 # no data to send
1515 return []
1522 return []
1516 index_size = n * index.entry_size
1523 index_size = n * index.entry_size
1517 data_size = self.end(n - 1)
1524 data_size = self.end(n - 1)
1518
1525
1519 # XXX we might have been split (or stripped) since the object
1526 # XXX we might have been split (or stripped) since the object
1520 # initialization, We need to close this race too, but having a way to
1527 # initialization, We need to close this race too, but having a way to
1521 # pre-open the file we feed to the revlog and never closing them before
1528 # pre-open the file we feed to the revlog and never closing them before
1522 # we are done streaming.
1529 # we are done streaming.
1523
1530
1524 if self._inline:
1531 if self._inline:
1525
1532
1526 def get_stream():
1533 def get_stream():
1527 with self.opener(self._indexfile, mode=b"r") as fp:
1534 with self.opener(self._indexfile, mode=b"r") as fp:
1528 yield None
1535 yield None
1529 size = index_size + data_size
1536 size = index_size + data_size
1530 if size <= 65536:
1537 if size <= 65536:
1531 yield fp.read(size)
1538 yield fp.read(size)
1532 else:
1539 else:
1533 yield from util.filechunkiter(fp, limit=size)
1540 yield from util.filechunkiter(fp, limit=size)
1534
1541
1535 inline_stream = get_stream()
1542 inline_stream = get_stream()
1536 next(inline_stream)
1543 next(inline_stream)
1537 return [
1544 return [
1538 (self._indexfile, inline_stream, index_size + data_size),
1545 (self._indexfile, inline_stream, index_size + data_size),
1539 ]
1546 ]
1540 elif force_inline:
1547 elif force_inline:
1541
1548
1542 def get_stream():
1549 def get_stream():
1543 with self.reading():
1550 with self.reading():
1544 yield None
1551 yield None
1545
1552
1546 for rev in range(n):
1553 for rev in range(n):
1547 idx = self.index.entry_binary(rev)
1554 idx = self.index.entry_binary(rev)
1548 if rev == 0 and self._docket is None:
1555 if rev == 0 and self._docket is None:
1549 # re-inject the inline flag
1556 # re-inject the inline flag
1550 header = self._format_flags
1557 header = self._format_flags
1551 header |= self._format_version
1558 header |= self._format_version
1552 header |= FLAG_INLINE_DATA
1559 header |= FLAG_INLINE_DATA
1553 header = self.index.pack_header(header)
1560 header = self.index.pack_header(header)
1554 idx = header + idx
1561 idx = header + idx
1555 yield idx
1562 yield idx
1556 yield self._inner.get_segment_for_revs(rev, rev)[1]
1563 yield self._inner.get_segment_for_revs(rev, rev)[1]
1557
1564
1558 inline_stream = get_stream()
1565 inline_stream = get_stream()
1559 next(inline_stream)
1566 next(inline_stream)
1560 return [
1567 return [
1561 (self._indexfile, inline_stream, index_size + data_size),
1568 (self._indexfile, inline_stream, index_size + data_size),
1562 ]
1569 ]
1563 else:
1570 else:
1564
1571
1565 def get_index_stream():
1572 def get_index_stream():
1566 with self.opener(self._indexfile, mode=b"r") as fp:
1573 with self.opener(self._indexfile, mode=b"r") as fp:
1567 yield None
1574 yield None
1568 if index_size <= 65536:
1575 if index_size <= 65536:
1569 yield fp.read(index_size)
1576 yield fp.read(index_size)
1570 else:
1577 else:
1571 yield from util.filechunkiter(fp, limit=index_size)
1578 yield from util.filechunkiter(fp, limit=index_size)
1572
1579
1573 def get_data_stream():
1580 def get_data_stream():
1574 with self._datafp() as fp:
1581 with self._datafp() as fp:
1575 yield None
1582 yield None
1576 if data_size <= 65536:
1583 if data_size <= 65536:
1577 yield fp.read(data_size)
1584 yield fp.read(data_size)
1578 else:
1585 else:
1579 yield from util.filechunkiter(fp, limit=data_size)
1586 yield from util.filechunkiter(fp, limit=data_size)
1580
1587
1581 index_stream = get_index_stream()
1588 index_stream = get_index_stream()
1582 next(index_stream)
1589 next(index_stream)
1583 data_stream = get_data_stream()
1590 data_stream = get_data_stream()
1584 next(data_stream)
1591 next(data_stream)
1585 return [
1592 return [
1586 (self._datafile, data_stream, data_size),
1593 (self._datafile, data_stream, data_size),
1587 (self._indexfile, index_stream, index_size),
1594 (self._indexfile, index_stream, index_size),
1588 ]
1595 ]
1589
1596
1590 def _loadindex(self, docket=None):
1597 def _loadindex(self, docket=None):
1591 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1598 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1592
1599
1593 if self.postfix is not None:
1600 if self.postfix is not None:
1594 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1601 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1595 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1602 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1596 entry_point = b'%s.i.a' % self.radix
1603 entry_point = b'%s.i.a' % self.radix
1597 elif self._try_split and self.opener.exists(self._split_index_file):
1604 elif self._try_split and self.opener.exists(self._split_index_file):
1598 entry_point = self._split_index_file
1605 entry_point = self._split_index_file
1599 else:
1606 else:
1600 entry_point = b'%s.i' % self.radix
1607 entry_point = b'%s.i' % self.radix
1601
1608
1602 if docket is not None:
1609 if docket is not None:
1603 self._docket = docket
1610 self._docket = docket
1604 self._docket_file = entry_point
1611 self._docket_file = entry_point
1605 else:
1612 else:
1606 self._initempty = True
1613 self._initempty = True
1607 entry_data = self._get_data(entry_point, mmapindexthreshold)
1614 entry_data = self._get_data(entry_point, mmapindexthreshold)
1608 if len(entry_data) > 0:
1615 if len(entry_data) > 0:
1609 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1616 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1610 self._initempty = False
1617 self._initempty = False
1611 else:
1618 else:
1612 header = new_header
1619 header = new_header
1613
1620
1614 self._format_flags = header & ~0xFFFF
1621 self._format_flags = header & ~0xFFFF
1615 self._format_version = header & 0xFFFF
1622 self._format_version = header & 0xFFFF
1616
1623
1617 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1624 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1618 if supported_flags is None:
1625 if supported_flags is None:
1619 msg = _(b'unknown version (%d) in revlog %s')
1626 msg = _(b'unknown version (%d) in revlog %s')
1620 msg %= (self._format_version, self.display_id)
1627 msg %= (self._format_version, self.display_id)
1621 raise error.RevlogError(msg)
1628 raise error.RevlogError(msg)
1622 elif self._format_flags & ~supported_flags:
1629 elif self._format_flags & ~supported_flags:
1623 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1630 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1624 display_flag = self._format_flags >> 16
1631 display_flag = self._format_flags >> 16
1625 msg %= (display_flag, self._format_version, self.display_id)
1632 msg %= (display_flag, self._format_version, self.display_id)
1626 raise error.RevlogError(msg)
1633 raise error.RevlogError(msg)
1627
1634
1628 features = FEATURES_BY_VERSION[self._format_version]
1635 features = FEATURES_BY_VERSION[self._format_version]
1629 self._inline = features[b'inline'](self._format_flags)
1636 self._inline = features[b'inline'](self._format_flags)
1630 self.delta_config.general_delta = features[b'generaldelta'](
1637 self.delta_config.general_delta = features[b'generaldelta'](
1631 self._format_flags
1638 self._format_flags
1632 )
1639 )
1633 self.feature_config.has_side_data = features[b'sidedata']
1640 self.feature_config.has_side_data = features[b'sidedata']
1634
1641
1635 if not features[b'docket']:
1642 if not features[b'docket']:
1636 self._indexfile = entry_point
1643 self._indexfile = entry_point
1637 index_data = entry_data
1644 index_data = entry_data
1638 else:
1645 else:
1639 self._docket_file = entry_point
1646 self._docket_file = entry_point
1640 if self._initempty:
1647 if self._initempty:
1641 self._docket = docketutil.default_docket(self, header)
1648 self._docket = docketutil.default_docket(self, header)
1642 else:
1649 else:
1643 self._docket = docketutil.parse_docket(
1650 self._docket = docketutil.parse_docket(
1644 self, entry_data, use_pending=self._trypending
1651 self, entry_data, use_pending=self._trypending
1645 )
1652 )
1646
1653
1647 if self._docket is not None:
1654 if self._docket is not None:
1648 self._indexfile = self._docket.index_filepath()
1655 self._indexfile = self._docket.index_filepath()
1649 index_data = b''
1656 index_data = b''
1650 index_size = self._docket.index_end
1657 index_size = self._docket.index_end
1651 if index_size > 0:
1658 if index_size > 0:
1652 index_data = self._get_data(
1659 index_data = self._get_data(
1653 self._indexfile, mmapindexthreshold, size=index_size
1660 self._indexfile, mmapindexthreshold, size=index_size
1654 )
1661 )
1655 if len(index_data) < index_size:
1662 if len(index_data) < index_size:
1656 msg = _(b'too few index data for %s: got %d, expected %d')
1663 msg = _(b'too few index data for %s: got %d, expected %d')
1657 msg %= (self.display_id, len(index_data), index_size)
1664 msg %= (self.display_id, len(index_data), index_size)
1658 raise error.RevlogError(msg)
1665 raise error.RevlogError(msg)
1659
1666
1660 self._inline = False
1667 self._inline = False
1661 # generaldelta implied by version 2 revlogs.
1668 # generaldelta implied by version 2 revlogs.
1662 self.delta_config.general_delta = True
1669 self.delta_config.general_delta = True
1663 # the logic for persistent nodemap will be dealt with within the
1670 # the logic for persistent nodemap will be dealt with within the
1664 # main docket, so disable it for now.
1671 # main docket, so disable it for now.
1665 self._nodemap_file = None
1672 self._nodemap_file = None
1666
1673
1667 if self._docket is not None:
1674 if self._docket is not None:
1668 self._datafile = self._docket.data_filepath()
1675 self._datafile = self._docket.data_filepath()
1669 self._sidedatafile = self._docket.sidedata_filepath()
1676 self._sidedatafile = self._docket.sidedata_filepath()
1670 elif self.postfix is None:
1677 elif self.postfix is None:
1671 self._datafile = b'%s.d' % self.radix
1678 self._datafile = b'%s.d' % self.radix
1672 else:
1679 else:
1673 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1680 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1674
1681
1675 self.nodeconstants = sha1nodeconstants
1682 self.nodeconstants = sha1nodeconstants
1676 self.nullid = self.nodeconstants.nullid
1683 self.nullid = self.nodeconstants.nullid
1677
1684
1678 # sparse-revlog can't be on without general-delta (issue6056)
1685 # sparse-revlog can't be on without general-delta (issue6056)
1679 if not self.delta_config.general_delta:
1686 if not self.delta_config.general_delta:
1680 self.delta_config.sparse_revlog = False
1687 self.delta_config.sparse_revlog = False
1681
1688
1682 self._storedeltachains = True
1689 self._storedeltachains = True
1683
1690
1684 devel_nodemap = (
1691 devel_nodemap = (
1685 self._nodemap_file
1692 self._nodemap_file
1686 and force_nodemap
1693 and force_nodemap
1687 and parse_index_v1_nodemap is not None
1694 and parse_index_v1_nodemap is not None
1688 )
1695 )
1689
1696
1690 use_rust_index = False
1697 use_rust_index = False
1691 if rustrevlog is not None and self._nodemap_file is not None:
1698 if rustrevlog is not None and self._nodemap_file is not None:
1692 # we would like to use the rust_index in all case, especially
1699 # we would like to use the rust_index in all case, especially
1693 # because it is necessary for AncestorsIterator and LazyAncestors
1700 # because it is necessary for AncestorsIterator and LazyAncestors
1694 # since the 6.7 cycle.
1701 # since the 6.7 cycle.
1695 #
1702 #
1696 # However, the performance impact of inconditionnaly building the
1703 # However, the performance impact of inconditionnaly building the
1697 # nodemap is currently a problem for non-persistent nodemap
1704 # nodemap is currently a problem for non-persistent nodemap
1698 # repository.
1705 # repository.
1699 use_rust_index = True
1706 use_rust_index = True
1700
1707
1701 self._parse_index = parse_index_v1
1708 self._parse_index = parse_index_v1
1702 if self._format_version == REVLOGV0:
1709 if self._format_version == REVLOGV0:
1703 self._parse_index = revlogv0.parse_index_v0
1710 self._parse_index = revlogv0.parse_index_v0
1704 elif self._format_version == REVLOGV2:
1711 elif self._format_version == REVLOGV2:
1705 self._parse_index = parse_index_v2
1712 self._parse_index = parse_index_v2
1706 elif self._format_version == CHANGELOGV2:
1713 elif self._format_version == CHANGELOGV2:
1707 self._parse_index = parse_index_cl_v2
1714 self._parse_index = parse_index_cl_v2
1708 elif devel_nodemap:
1715 elif devel_nodemap:
1709 self._parse_index = parse_index_v1_nodemap
1716 self._parse_index = parse_index_v1_nodemap
1710 elif use_rust_index:
1717 elif use_rust_index:
1711 self._parse_index = functools.partial(
1718 self._parse_index = functools.partial(
1712 parse_index_v1_rust, default_header=new_header
1719 parse_index_v1_rust, default_header=new_header
1713 )
1720 )
1714 try:
1721 try:
1715 d = self._parse_index(index_data, self._inline)
1722 d = self._parse_index(index_data, self._inline)
1716 index, chunkcache = d
1723 index, chunkcache = d
1717 use_nodemap = (
1724 use_nodemap = (
1718 not self._inline
1725 not self._inline
1719 and self._nodemap_file is not None
1726 and self._nodemap_file is not None
1720 and hasattr(index, 'update_nodemap_data')
1727 and hasattr(index, 'update_nodemap_data')
1721 )
1728 )
1722 if use_nodemap:
1729 if use_nodemap:
1723 nodemap_data = nodemaputil.persisted_data(self)
1730 nodemap_data = nodemaputil.persisted_data(self)
1724 if nodemap_data is not None:
1731 if nodemap_data is not None:
1725 docket = nodemap_data[0]
1732 docket = nodemap_data[0]
1726 if (
1733 if (
1727 len(d[0]) > docket.tip_rev
1734 len(d[0]) > docket.tip_rev
1728 and d[0][docket.tip_rev][7] == docket.tip_node
1735 and d[0][docket.tip_rev][7] == docket.tip_node
1729 ):
1736 ):
1730 # no changelog tampering
1737 # no changelog tampering
1731 self._nodemap_docket = docket
1738 self._nodemap_docket = docket
1732 index.update_nodemap_data(*nodemap_data)
1739 index.update_nodemap_data(*nodemap_data)
1733 except (ValueError, IndexError):
1740 except (ValueError, IndexError):
1734 raise error.RevlogError(
1741 raise error.RevlogError(
1735 _(b"index %s is corrupted") % self.display_id
1742 _(b"index %s is corrupted") % self.display_id
1736 )
1743 )
1737 self.index = index
1744 self.index = index
1738 # revnum -> (chain-length, sum-delta-length)
1745 # revnum -> (chain-length, sum-delta-length)
1739 self._chaininfocache = util.lrucachedict(500)
1746 self._chaininfocache = util.lrucachedict(500)
1740
1747
1741 return chunkcache
1748 return chunkcache
1742
1749
1743 def _load_inner(self, chunk_cache):
1750 def _load_inner(self, chunk_cache):
1744 if self._docket is None:
1751 if self._docket is None:
1745 default_compression_header = None
1752 default_compression_header = None
1746 else:
1753 else:
1747 default_compression_header = self._docket.default_compression_header
1754 default_compression_header = self._docket.default_compression_header
1748
1755
1749 self._inner = _InnerRevlog(
1756 self._inner = _InnerRevlog(
1750 opener=self.opener,
1757 opener=self.opener,
1751 index=self.index,
1758 index=self.index,
1752 index_file=self._indexfile,
1759 index_file=self._indexfile,
1753 data_file=self._datafile,
1760 data_file=self._datafile,
1754 sidedata_file=self._sidedatafile,
1761 sidedata_file=self._sidedatafile,
1755 inline=self._inline,
1762 inline=self._inline,
1756 data_config=self.data_config,
1763 data_config=self.data_config,
1757 delta_config=self.delta_config,
1764 delta_config=self.delta_config,
1758 feature_config=self.feature_config,
1765 feature_config=self.feature_config,
1759 chunk_cache=chunk_cache,
1766 chunk_cache=chunk_cache,
1760 default_compression_header=default_compression_header,
1767 default_compression_header=default_compression_header,
1761 )
1768 )
1762
1769
1763 def get_revlog(self):
1770 def get_revlog(self):
1764 """simple function to mirror API of other not-really-revlog API"""
1771 """simple function to mirror API of other not-really-revlog API"""
1765 return self
1772 return self
1766
1773
1767 @util.propertycache
1774 @util.propertycache
1768 def revlog_kind(self):
1775 def revlog_kind(self):
1769 return self.target[0]
1776 return self.target[0]
1770
1777
1771 @util.propertycache
1778 @util.propertycache
1772 def display_id(self):
1779 def display_id(self):
1773 """The public facing "ID" of the revlog that we use in message"""
1780 """The public facing "ID" of the revlog that we use in message"""
1774 if self.revlog_kind == KIND_FILELOG:
1781 if self.revlog_kind == KIND_FILELOG:
1775 # Reference the file without the "data/" prefix, so it is familiar
1782 # Reference the file without the "data/" prefix, so it is familiar
1776 # to the user.
1783 # to the user.
1777 return self.target[1]
1784 return self.target[1]
1778 else:
1785 else:
1779 return self.radix
1786 return self.radix
1780
1787
1781 def _datafp(self, mode=b'r'):
1788 def _datafp(self, mode=b'r'):
1782 """file object for the revlog's data file"""
1789 """file object for the revlog's data file"""
1783 return self.opener(self._datafile, mode=mode)
1790 return self.opener(self._datafile, mode=mode)
1784
1791
1785 def tiprev(self):
1792 def tiprev(self):
1786 return len(self.index) - 1
1793 return len(self.index) - 1
1787
1794
1788 def tip(self):
1795 def tip(self):
1789 return self.node(self.tiprev())
1796 return self.node(self.tiprev())
1790
1797
1791 def __contains__(self, rev):
1798 def __contains__(self, rev):
1792 return 0 <= rev < len(self)
1799 return 0 <= rev < len(self)
1793
1800
1794 def __len__(self):
1801 def __len__(self):
1795 return len(self.index)
1802 return len(self.index)
1796
1803
1797 def __iter__(self):
1804 def __iter__(self):
1798 return iter(range(len(self)))
1805 return iter(range(len(self)))
1799
1806
1800 def revs(self, start=0, stop=None):
1807 def revs(self, start=0, stop=None):
1801 """iterate over all rev in this revlog (from start to stop)"""
1808 """iterate over all rev in this revlog (from start to stop)"""
1802 return storageutil.iterrevs(len(self), start=start, stop=stop)
1809 return storageutil.iterrevs(len(self), start=start, stop=stop)
1803
1810
1804 def hasnode(self, node):
1811 def hasnode(self, node):
1805 try:
1812 try:
1806 self.rev(node)
1813 self.rev(node)
1807 return True
1814 return True
1808 except KeyError:
1815 except KeyError:
1809 return False
1816 return False
1810
1817
1811 def _candelta(self, baserev, rev):
1818 def _candelta(self, baserev, rev):
1812 """whether two revisions (baserev, rev) can be delta-ed or not"""
1819 """whether two revisions (baserev, rev) can be delta-ed or not"""
1813 # Disable delta if either rev requires a content-changing flag
1820 # Disable delta if either rev requires a content-changing flag
1814 # processor (ex. LFS). This is because such flag processor can alter
1821 # processor (ex. LFS). This is because such flag processor can alter
1815 # the rawtext content that the delta will be based on, and two clients
1822 # the rawtext content that the delta will be based on, and two clients
1816 # could have a same revlog node with different flags (i.e. different
1823 # could have a same revlog node with different flags (i.e. different
1817 # rawtext contents) and the delta could be incompatible.
1824 # rawtext contents) and the delta could be incompatible.
1818 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1825 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1819 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1826 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1820 ):
1827 ):
1821 return False
1828 return False
1822 return True
1829 return True
1823
1830
1824 def update_caches(self, transaction):
1831 def update_caches(self, transaction):
1825 """update on disk cache
1832 """update on disk cache
1826
1833
1827 If a transaction is passed, the update may be delayed to transaction
1834 If a transaction is passed, the update may be delayed to transaction
1828 commit."""
1835 commit."""
1829 if self._nodemap_file is not None:
1836 if self._nodemap_file is not None:
1830 if transaction is None:
1837 if transaction is None:
1831 nodemaputil.update_persistent_nodemap(self)
1838 nodemaputil.update_persistent_nodemap(self)
1832 else:
1839 else:
1833 nodemaputil.setup_persistent_nodemap(transaction, self)
1840 nodemaputil.setup_persistent_nodemap(transaction, self)
1834
1841
1835 def clearcaches(self):
1842 def clearcaches(self):
1836 """Clear in-memory caches"""
1843 """Clear in-memory caches"""
1837 self._chainbasecache.clear()
1844 self._chainbasecache.clear()
1838 self._inner.clear_cache()
1845 self._inner.clear_cache()
1839 self._pcache = {}
1846 self._pcache = {}
1840 self._nodemap_docket = None
1847 self._nodemap_docket = None
1841 self.index.clearcaches()
1848 self.index.clearcaches()
1842 # The python code is the one responsible for validating the docket, we
1849 # The python code is the one responsible for validating the docket, we
1843 # end up having to refresh it here.
1850 # end up having to refresh it here.
1844 use_nodemap = (
1851 use_nodemap = (
1845 not self._inline
1852 not self._inline
1846 and self._nodemap_file is not None
1853 and self._nodemap_file is not None
1847 and hasattr(self.index, 'update_nodemap_data')
1854 and hasattr(self.index, 'update_nodemap_data')
1848 )
1855 )
1849 if use_nodemap:
1856 if use_nodemap:
1850 nodemap_data = nodemaputil.persisted_data(self)
1857 nodemap_data = nodemaputil.persisted_data(self)
1851 if nodemap_data is not None:
1858 if nodemap_data is not None:
1852 self._nodemap_docket = nodemap_data[0]
1859 self._nodemap_docket = nodemap_data[0]
1853 self.index.update_nodemap_data(*nodemap_data)
1860 self.index.update_nodemap_data(*nodemap_data)
1854
1861
1855 def rev(self, node):
1862 def rev(self, node):
1856 """return the revision number associated with a <nodeid>"""
1863 """return the revision number associated with a <nodeid>"""
1857 try:
1864 try:
1858 return self.index.rev(node)
1865 return self.index.rev(node)
1859 except TypeError:
1866 except TypeError:
1860 raise
1867 raise
1861 except error.RevlogError:
1868 except error.RevlogError:
1862 # parsers.c radix tree lookup failed
1869 # parsers.c radix tree lookup failed
1863 if (
1870 if (
1864 node == self.nodeconstants.wdirid
1871 node == self.nodeconstants.wdirid
1865 or node in self.nodeconstants.wdirfilenodeids
1872 or node in self.nodeconstants.wdirfilenodeids
1866 ):
1873 ):
1867 raise error.WdirUnsupported
1874 raise error.WdirUnsupported
1868 raise error.LookupError(node, self.display_id, _(b'no node'))
1875 raise error.LookupError(node, self.display_id, _(b'no node'))
1869
1876
1870 # Accessors for index entries.
1877 # Accessors for index entries.
1871
1878
1872 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1879 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1873 # are flags.
1880 # are flags.
1874 def start(self, rev):
1881 def start(self, rev):
1875 return int(self.index[rev][0] >> 16)
1882 return int(self.index[rev][0] >> 16)
1876
1883
1877 def sidedata_cut_off(self, rev):
1884 def sidedata_cut_off(self, rev):
1878 sd_cut_off = self.index[rev][8]
1885 sd_cut_off = self.index[rev][8]
1879 if sd_cut_off != 0:
1886 if sd_cut_off != 0:
1880 return sd_cut_off
1887 return sd_cut_off
1881 # This is some annoying dance, because entries without sidedata
1888 # This is some annoying dance, because entries without sidedata
1882 # currently use 0 as their ofsset. (instead of previous-offset +
1889 # currently use 0 as their ofsset. (instead of previous-offset +
1883 # previous-size)
1890 # previous-size)
1884 #
1891 #
1885 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1892 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1886 # In the meantime, we need this.
1893 # In the meantime, we need this.
1887 while 0 <= rev:
1894 while 0 <= rev:
1888 e = self.index[rev]
1895 e = self.index[rev]
1889 if e[9] != 0:
1896 if e[9] != 0:
1890 return e[8] + e[9]
1897 return e[8] + e[9]
1891 rev -= 1
1898 rev -= 1
1892 return 0
1899 return 0
1893
1900
1894 def flags(self, rev):
1901 def flags(self, rev):
1895 return self.index[rev][0] & 0xFFFF
1902 return self.index[rev][0] & 0xFFFF
1896
1903
1897 def length(self, rev):
1904 def length(self, rev):
1898 return self.index[rev][1]
1905 return self.index[rev][1]
1899
1906
1900 def sidedata_length(self, rev):
1907 def sidedata_length(self, rev):
1901 if not self.feature_config.has_side_data:
1908 if not self.feature_config.has_side_data:
1902 return 0
1909 return 0
1903 return self.index[rev][9]
1910 return self.index[rev][9]
1904
1911
1905 def rawsize(self, rev):
1912 def rawsize(self, rev):
1906 """return the length of the uncompressed text for a given revision"""
1913 """return the length of the uncompressed text for a given revision"""
1907 l = self.index[rev][2]
1914 l = self.index[rev][2]
1908 if l >= 0:
1915 if l >= 0:
1909 return l
1916 return l
1910
1917
1911 t = self.rawdata(rev)
1918 t = self.rawdata(rev)
1912 return len(t)
1919 return len(t)
1913
1920
1914 def size(self, rev):
1921 def size(self, rev):
1915 """length of non-raw text (processed by a "read" flag processor)"""
1922 """length of non-raw text (processed by a "read" flag processor)"""
1916 # fast path: if no "read" flag processor could change the content,
1923 # fast path: if no "read" flag processor could change the content,
1917 # size is rawsize. note: ELLIPSIS is known to not change the content.
1924 # size is rawsize. note: ELLIPSIS is known to not change the content.
1918 flags = self.flags(rev)
1925 flags = self.flags(rev)
1919 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1926 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1920 return self.rawsize(rev)
1927 return self.rawsize(rev)
1921
1928
1922 return len(self.revision(rev))
1929 return len(self.revision(rev))
1923
1930
1924 def fast_rank(self, rev):
1931 def fast_rank(self, rev):
1925 """Return the rank of a revision if already known, or None otherwise.
1932 """Return the rank of a revision if already known, or None otherwise.
1926
1933
1927 The rank of a revision is the size of the sub-graph it defines as a
1934 The rank of a revision is the size of the sub-graph it defines as a
1928 head. Equivalently, the rank of a revision `r` is the size of the set
1935 head. Equivalently, the rank of a revision `r` is the size of the set
1929 `ancestors(r)`, `r` included.
1936 `ancestors(r)`, `r` included.
1930
1937
1931 This method returns the rank retrieved from the revlog in constant
1938 This method returns the rank retrieved from the revlog in constant
1932 time. It makes no attempt at computing unknown values for versions of
1939 time. It makes no attempt at computing unknown values for versions of
1933 the revlog which do not persist the rank.
1940 the revlog which do not persist the rank.
1934 """
1941 """
1935 rank = self.index[rev][ENTRY_RANK]
1942 rank = self.index[rev][ENTRY_RANK]
1936 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1943 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1937 return None
1944 return None
1938 if rev == nullrev:
1945 if rev == nullrev:
1939 return 0 # convention
1946 return 0 # convention
1940 return rank
1947 return rank
1941
1948
1942 def chainbase(self, rev):
1949 def chainbase(self, rev):
1943 base = self._chainbasecache.get(rev)
1950 base = self._chainbasecache.get(rev)
1944 if base is not None:
1951 if base is not None:
1945 return base
1952 return base
1946
1953
1947 index = self.index
1954 index = self.index
1948 iterrev = rev
1955 iterrev = rev
1949 base = index[iterrev][3]
1956 base = index[iterrev][3]
1950 while base != iterrev:
1957 while base != iterrev:
1951 iterrev = base
1958 iterrev = base
1952 base = index[iterrev][3]
1959 base = index[iterrev][3]
1953
1960
1954 self._chainbasecache[rev] = base
1961 self._chainbasecache[rev] = base
1955 return base
1962 return base
1956
1963
1957 def linkrev(self, rev):
1964 def linkrev(self, rev):
1958 return self.index[rev][4]
1965 return self.index[rev][4]
1959
1966
1960 def parentrevs(self, rev):
1967 def parentrevs(self, rev):
1961 try:
1968 try:
1962 entry = self.index[rev]
1969 entry = self.index[rev]
1963 except IndexError:
1970 except IndexError:
1964 if rev == wdirrev:
1971 if rev == wdirrev:
1965 raise error.WdirUnsupported
1972 raise error.WdirUnsupported
1966 raise
1973 raise
1967
1974
1968 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1975 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1969 return entry[6], entry[5]
1976 return entry[6], entry[5]
1970 else:
1977 else:
1971 return entry[5], entry[6]
1978 return entry[5], entry[6]
1972
1979
1973 # fast parentrevs(rev) where rev isn't filtered
1980 # fast parentrevs(rev) where rev isn't filtered
1974 _uncheckedparentrevs = parentrevs
1981 _uncheckedparentrevs = parentrevs
1975
1982
1976 def node(self, rev):
1983 def node(self, rev):
1977 try:
1984 try:
1978 return self.index[rev][7]
1985 return self.index[rev][7]
1979 except IndexError:
1986 except IndexError:
1980 if rev == wdirrev:
1987 if rev == wdirrev:
1981 raise error.WdirUnsupported
1988 raise error.WdirUnsupported
1982 raise
1989 raise
1983
1990
1984 # Derived from index values.
1991 # Derived from index values.
1985
1992
1986 def end(self, rev):
1993 def end(self, rev):
1987 return self.start(rev) + self.length(rev)
1994 return self.start(rev) + self.length(rev)
1988
1995
1989 def parents(self, node):
1996 def parents(self, node):
1990 i = self.index
1997 i = self.index
1991 d = i[self.rev(node)]
1998 d = i[self.rev(node)]
1992 # inline node() to avoid function call overhead
1999 # inline node() to avoid function call overhead
1993 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2000 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1994 return i[d[6]][7], i[d[5]][7]
2001 return i[d[6]][7], i[d[5]][7]
1995 else:
2002 else:
1996 return i[d[5]][7], i[d[6]][7]
2003 return i[d[5]][7], i[d[6]][7]
1997
2004
1998 def chainlen(self, rev):
2005 def chainlen(self, rev):
1999 return self._chaininfo(rev)[0]
2006 return self._chaininfo(rev)[0]
2000
2007
2001 def _chaininfo(self, rev):
2008 def _chaininfo(self, rev):
2002 chaininfocache = self._chaininfocache
2009 chaininfocache = self._chaininfocache
2003 if rev in chaininfocache:
2010 if rev in chaininfocache:
2004 return chaininfocache[rev]
2011 return chaininfocache[rev]
2005 index = self.index
2012 index = self.index
2006 generaldelta = self.delta_config.general_delta
2013 generaldelta = self.delta_config.general_delta
2007 iterrev = rev
2014 iterrev = rev
2008 e = index[iterrev]
2015 e = index[iterrev]
2009 clen = 0
2016 clen = 0
2010 compresseddeltalen = 0
2017 compresseddeltalen = 0
2011 while iterrev != e[3]:
2018 while iterrev != e[3]:
2012 clen += 1
2019 clen += 1
2013 compresseddeltalen += e[1]
2020 compresseddeltalen += e[1]
2014 if generaldelta:
2021 if generaldelta:
2015 iterrev = e[3]
2022 iterrev = e[3]
2016 else:
2023 else:
2017 iterrev -= 1
2024 iterrev -= 1
2018 if iterrev in chaininfocache:
2025 if iterrev in chaininfocache:
2019 t = chaininfocache[iterrev]
2026 t = chaininfocache[iterrev]
2020 clen += t[0]
2027 clen += t[0]
2021 compresseddeltalen += t[1]
2028 compresseddeltalen += t[1]
2022 break
2029 break
2023 e = index[iterrev]
2030 e = index[iterrev]
2024 else:
2031 else:
2025 # Add text length of base since decompressing that also takes
2032 # Add text length of base since decompressing that also takes
2026 # work. For cache hits the length is already included.
2033 # work. For cache hits the length is already included.
2027 compresseddeltalen += e[1]
2034 compresseddeltalen += e[1]
2028 r = (clen, compresseddeltalen)
2035 r = (clen, compresseddeltalen)
2029 chaininfocache[rev] = r
2036 chaininfocache[rev] = r
2030 return r
2037 return r
2031
2038
2032 def _deltachain(self, rev, stoprev=None):
2039 def _deltachain(self, rev, stoprev=None):
2033 return self._inner._deltachain(rev, stoprev=stoprev)
2040 return self._inner._deltachain(rev, stoprev=stoprev)
2034
2041
2035 def ancestors(self, revs, stoprev=0, inclusive=False):
2042 def ancestors(self, revs, stoprev=0, inclusive=False):
2036 """Generate the ancestors of 'revs' in reverse revision order.
2043 """Generate the ancestors of 'revs' in reverse revision order.
2037 Does not generate revs lower than stoprev.
2044 Does not generate revs lower than stoprev.
2038
2045
2039 See the documentation for ancestor.lazyancestors for more details."""
2046 See the documentation for ancestor.lazyancestors for more details."""
2040
2047
2041 # first, make sure start revisions aren't filtered
2048 # first, make sure start revisions aren't filtered
2042 revs = list(revs)
2049 revs = list(revs)
2043 checkrev = self.node
2050 checkrev = self.node
2044 for r in revs:
2051 for r in revs:
2045 checkrev(r)
2052 checkrev(r)
2046 # and we're sure ancestors aren't filtered as well
2053 # and we're sure ancestors aren't filtered as well
2047
2054
2048 if rustancestor is not None and self.index.rust_ext_compat:
2055 if rustancestor is not None and self.index.rust_ext_compat:
2049 lazyancestors = rustancestor.LazyAncestors
2056 lazyancestors = rustancestor.LazyAncestors
2050 arg = self.index
2057 arg = self.index
2051 else:
2058 else:
2052 lazyancestors = ancestor.lazyancestors
2059 lazyancestors = ancestor.lazyancestors
2053 arg = self._uncheckedparentrevs
2060 arg = self._uncheckedparentrevs
2054 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2061 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2055
2062
2056 def descendants(self, revs):
2063 def descendants(self, revs):
2057 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2064 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2058
2065
2059 def findcommonmissing(self, common=None, heads=None):
2066 def findcommonmissing(self, common=None, heads=None):
2060 """Return a tuple of the ancestors of common and the ancestors of heads
2067 """Return a tuple of the ancestors of common and the ancestors of heads
2061 that are not ancestors of common. In revset terminology, we return the
2068 that are not ancestors of common. In revset terminology, we return the
2062 tuple:
2069 tuple:
2063
2070
2064 ::common, (::heads) - (::common)
2071 ::common, (::heads) - (::common)
2065
2072
2066 The list is sorted by revision number, meaning it is
2073 The list is sorted by revision number, meaning it is
2067 topologically sorted.
2074 topologically sorted.
2068
2075
2069 'heads' and 'common' are both lists of node IDs. If heads is
2076 'heads' and 'common' are both lists of node IDs. If heads is
2070 not supplied, uses all of the revlog's heads. If common is not
2077 not supplied, uses all of the revlog's heads. If common is not
2071 supplied, uses nullid."""
2078 supplied, uses nullid."""
2072 if common is None:
2079 if common is None:
2073 common = [self.nullid]
2080 common = [self.nullid]
2074 if heads is None:
2081 if heads is None:
2075 heads = self.heads()
2082 heads = self.heads()
2076
2083
2077 common = [self.rev(n) for n in common]
2084 common = [self.rev(n) for n in common]
2078 heads = [self.rev(n) for n in heads]
2085 heads = [self.rev(n) for n in heads]
2079
2086
2080 # we want the ancestors, but inclusive
2087 # we want the ancestors, but inclusive
2081 class lazyset:
2088 class lazyset:
2082 def __init__(self, lazyvalues):
2089 def __init__(self, lazyvalues):
2083 self.addedvalues = set()
2090 self.addedvalues = set()
2084 self.lazyvalues = lazyvalues
2091 self.lazyvalues = lazyvalues
2085
2092
2086 def __contains__(self, value):
2093 def __contains__(self, value):
2087 return value in self.addedvalues or value in self.lazyvalues
2094 return value in self.addedvalues or value in self.lazyvalues
2088
2095
2089 def __iter__(self):
2096 def __iter__(self):
2090 added = self.addedvalues
2097 added = self.addedvalues
2091 for r in added:
2098 for r in added:
2092 yield r
2099 yield r
2093 for r in self.lazyvalues:
2100 for r in self.lazyvalues:
2094 if not r in added:
2101 if not r in added:
2095 yield r
2102 yield r
2096
2103
2097 def add(self, value):
2104 def add(self, value):
2098 self.addedvalues.add(value)
2105 self.addedvalues.add(value)
2099
2106
2100 def update(self, values):
2107 def update(self, values):
2101 self.addedvalues.update(values)
2108 self.addedvalues.update(values)
2102
2109
2103 has = lazyset(self.ancestors(common))
2110 has = lazyset(self.ancestors(common))
2104 has.add(nullrev)
2111 has.add(nullrev)
2105 has.update(common)
2112 has.update(common)
2106
2113
2107 # take all ancestors from heads that aren't in has
2114 # take all ancestors from heads that aren't in has
2108 missing = set()
2115 missing = set()
2109 visit = collections.deque(r for r in heads if r not in has)
2116 visit = collections.deque(r for r in heads if r not in has)
2110 while visit:
2117 while visit:
2111 r = visit.popleft()
2118 r = visit.popleft()
2112 if r in missing:
2119 if r in missing:
2113 continue
2120 continue
2114 else:
2121 else:
2115 missing.add(r)
2122 missing.add(r)
2116 for p in self.parentrevs(r):
2123 for p in self.parentrevs(r):
2117 if p not in has:
2124 if p not in has:
2118 visit.append(p)
2125 visit.append(p)
2119 missing = list(missing)
2126 missing = list(missing)
2120 missing.sort()
2127 missing.sort()
2121 return has, [self.node(miss) for miss in missing]
2128 return has, [self.node(miss) for miss in missing]
2122
2129
2123 def incrementalmissingrevs(self, common=None):
2130 def incrementalmissingrevs(self, common=None):
2124 """Return an object that can be used to incrementally compute the
2131 """Return an object that can be used to incrementally compute the
2125 revision numbers of the ancestors of arbitrary sets that are not
2132 revision numbers of the ancestors of arbitrary sets that are not
2126 ancestors of common. This is an ancestor.incrementalmissingancestors
2133 ancestors of common. This is an ancestor.incrementalmissingancestors
2127 object.
2134 object.
2128
2135
2129 'common' is a list of revision numbers. If common is not supplied, uses
2136 'common' is a list of revision numbers. If common is not supplied, uses
2130 nullrev.
2137 nullrev.
2131 """
2138 """
2132 if common is None:
2139 if common is None:
2133 common = [nullrev]
2140 common = [nullrev]
2134
2141
2135 if rustancestor is not None and self.index.rust_ext_compat:
2142 if rustancestor is not None and self.index.rust_ext_compat:
2136 return rustancestor.MissingAncestors(self.index, common)
2143 return rustancestor.MissingAncestors(self.index, common)
2137 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2144 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2138
2145
2139 def findmissingrevs(self, common=None, heads=None):
2146 def findmissingrevs(self, common=None, heads=None):
2140 """Return the revision numbers of the ancestors of heads that
2147 """Return the revision numbers of the ancestors of heads that
2141 are not ancestors of common.
2148 are not ancestors of common.
2142
2149
2143 More specifically, return a list of revision numbers corresponding to
2150 More specifically, return a list of revision numbers corresponding to
2144 nodes N such that every N satisfies the following constraints:
2151 nodes N such that every N satisfies the following constraints:
2145
2152
2146 1. N is an ancestor of some node in 'heads'
2153 1. N is an ancestor of some node in 'heads'
2147 2. N is not an ancestor of any node in 'common'
2154 2. N is not an ancestor of any node in 'common'
2148
2155
2149 The list is sorted by revision number, meaning it is
2156 The list is sorted by revision number, meaning it is
2150 topologically sorted.
2157 topologically sorted.
2151
2158
2152 'heads' and 'common' are both lists of revision numbers. If heads is
2159 'heads' and 'common' are both lists of revision numbers. If heads is
2153 not supplied, uses all of the revlog's heads. If common is not
2160 not supplied, uses all of the revlog's heads. If common is not
2154 supplied, uses nullid."""
2161 supplied, uses nullid."""
2155 if common is None:
2162 if common is None:
2156 common = [nullrev]
2163 common = [nullrev]
2157 if heads is None:
2164 if heads is None:
2158 heads = self.headrevs()
2165 heads = self.headrevs()
2159
2166
2160 inc = self.incrementalmissingrevs(common=common)
2167 inc = self.incrementalmissingrevs(common=common)
2161 return inc.missingancestors(heads)
2168 return inc.missingancestors(heads)
2162
2169
2163 def findmissing(self, common=None, heads=None):
2170 def findmissing(self, common=None, heads=None):
2164 """Return the ancestors of heads that are not ancestors of common.
2171 """Return the ancestors of heads that are not ancestors of common.
2165
2172
2166 More specifically, return a list of nodes N such that every N
2173 More specifically, return a list of nodes N such that every N
2167 satisfies the following constraints:
2174 satisfies the following constraints:
2168
2175
2169 1. N is an ancestor of some node in 'heads'
2176 1. N is an ancestor of some node in 'heads'
2170 2. N is not an ancestor of any node in 'common'
2177 2. N is not an ancestor of any node in 'common'
2171
2178
2172 The list is sorted by revision number, meaning it is
2179 The list is sorted by revision number, meaning it is
2173 topologically sorted.
2180 topologically sorted.
2174
2181
2175 'heads' and 'common' are both lists of node IDs. If heads is
2182 'heads' and 'common' are both lists of node IDs. If heads is
2176 not supplied, uses all of the revlog's heads. If common is not
2183 not supplied, uses all of the revlog's heads. If common is not
2177 supplied, uses nullid."""
2184 supplied, uses nullid."""
2178 if common is None:
2185 if common is None:
2179 common = [self.nullid]
2186 common = [self.nullid]
2180 if heads is None:
2187 if heads is None:
2181 heads = self.heads()
2188 heads = self.heads()
2182
2189
2183 common = [self.rev(n) for n in common]
2190 common = [self.rev(n) for n in common]
2184 heads = [self.rev(n) for n in heads]
2191 heads = [self.rev(n) for n in heads]
2185
2192
2186 inc = self.incrementalmissingrevs(common=common)
2193 inc = self.incrementalmissingrevs(common=common)
2187 return [self.node(r) for r in inc.missingancestors(heads)]
2194 return [self.node(r) for r in inc.missingancestors(heads)]
2188
2195
2189 def nodesbetween(self, roots=None, heads=None):
2196 def nodesbetween(self, roots=None, heads=None):
2190 """Return a topological path from 'roots' to 'heads'.
2197 """Return a topological path from 'roots' to 'heads'.
2191
2198
2192 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2199 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2193 topologically sorted list of all nodes N that satisfy both of
2200 topologically sorted list of all nodes N that satisfy both of
2194 these constraints:
2201 these constraints:
2195
2202
2196 1. N is a descendant of some node in 'roots'
2203 1. N is a descendant of some node in 'roots'
2197 2. N is an ancestor of some node in 'heads'
2204 2. N is an ancestor of some node in 'heads'
2198
2205
2199 Every node is considered to be both a descendant and an ancestor
2206 Every node is considered to be both a descendant and an ancestor
2200 of itself, so every reachable node in 'roots' and 'heads' will be
2207 of itself, so every reachable node in 'roots' and 'heads' will be
2201 included in 'nodes'.
2208 included in 'nodes'.
2202
2209
2203 'outroots' is the list of reachable nodes in 'roots', i.e., the
2210 'outroots' is the list of reachable nodes in 'roots', i.e., the
2204 subset of 'roots' that is returned in 'nodes'. Likewise,
2211 subset of 'roots' that is returned in 'nodes'. Likewise,
2205 'outheads' is the subset of 'heads' that is also in 'nodes'.
2212 'outheads' is the subset of 'heads' that is also in 'nodes'.
2206
2213
2207 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2214 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2208 unspecified, uses nullid as the only root. If 'heads' is
2215 unspecified, uses nullid as the only root. If 'heads' is
2209 unspecified, uses list of all of the revlog's heads."""
2216 unspecified, uses list of all of the revlog's heads."""
2210 nonodes = ([], [], [])
2217 nonodes = ([], [], [])
2211 if roots is not None:
2218 if roots is not None:
2212 roots = list(roots)
2219 roots = list(roots)
2213 if not roots:
2220 if not roots:
2214 return nonodes
2221 return nonodes
2215 lowestrev = min([self.rev(n) for n in roots])
2222 lowestrev = min([self.rev(n) for n in roots])
2216 else:
2223 else:
2217 roots = [self.nullid] # Everybody's a descendant of nullid
2224 roots = [self.nullid] # Everybody's a descendant of nullid
2218 lowestrev = nullrev
2225 lowestrev = nullrev
2219 if (lowestrev == nullrev) and (heads is None):
2226 if (lowestrev == nullrev) and (heads is None):
2220 # We want _all_ the nodes!
2227 # We want _all_ the nodes!
2221 return (
2228 return (
2222 [self.node(r) for r in self],
2229 [self.node(r) for r in self],
2223 [self.nullid],
2230 [self.nullid],
2224 list(self.heads()),
2231 list(self.heads()),
2225 )
2232 )
2226 if heads is None:
2233 if heads is None:
2227 # All nodes are ancestors, so the latest ancestor is the last
2234 # All nodes are ancestors, so the latest ancestor is the last
2228 # node.
2235 # node.
2229 highestrev = len(self) - 1
2236 highestrev = len(self) - 1
2230 # Set ancestors to None to signal that every node is an ancestor.
2237 # Set ancestors to None to signal that every node is an ancestor.
2231 ancestors = None
2238 ancestors = None
2232 # Set heads to an empty dictionary for later discovery of heads
2239 # Set heads to an empty dictionary for later discovery of heads
2233 heads = {}
2240 heads = {}
2234 else:
2241 else:
2235 heads = list(heads)
2242 heads = list(heads)
2236 if not heads:
2243 if not heads:
2237 return nonodes
2244 return nonodes
2238 ancestors = set()
2245 ancestors = set()
2239 # Turn heads into a dictionary so we can remove 'fake' heads.
2246 # Turn heads into a dictionary so we can remove 'fake' heads.
2240 # Also, later we will be using it to filter out the heads we can't
2247 # Also, later we will be using it to filter out the heads we can't
2241 # find from roots.
2248 # find from roots.
2242 heads = dict.fromkeys(heads, False)
2249 heads = dict.fromkeys(heads, False)
2243 # Start at the top and keep marking parents until we're done.
2250 # Start at the top and keep marking parents until we're done.
2244 nodestotag = set(heads)
2251 nodestotag = set(heads)
2245 # Remember where the top was so we can use it as a limit later.
2252 # Remember where the top was so we can use it as a limit later.
2246 highestrev = max([self.rev(n) for n in nodestotag])
2253 highestrev = max([self.rev(n) for n in nodestotag])
2247 while nodestotag:
2254 while nodestotag:
2248 # grab a node to tag
2255 # grab a node to tag
2249 n = nodestotag.pop()
2256 n = nodestotag.pop()
2250 # Never tag nullid
2257 # Never tag nullid
2251 if n == self.nullid:
2258 if n == self.nullid:
2252 continue
2259 continue
2253 # A node's revision number represents its place in a
2260 # A node's revision number represents its place in a
2254 # topologically sorted list of nodes.
2261 # topologically sorted list of nodes.
2255 r = self.rev(n)
2262 r = self.rev(n)
2256 if r >= lowestrev:
2263 if r >= lowestrev:
2257 if n not in ancestors:
2264 if n not in ancestors:
2258 # If we are possibly a descendant of one of the roots
2265 # If we are possibly a descendant of one of the roots
2259 # and we haven't already been marked as an ancestor
2266 # and we haven't already been marked as an ancestor
2260 ancestors.add(n) # Mark as ancestor
2267 ancestors.add(n) # Mark as ancestor
2261 # Add non-nullid parents to list of nodes to tag.
2268 # Add non-nullid parents to list of nodes to tag.
2262 nodestotag.update(
2269 nodestotag.update(
2263 [p for p in self.parents(n) if p != self.nullid]
2270 [p for p in self.parents(n) if p != self.nullid]
2264 )
2271 )
2265 elif n in heads: # We've seen it before, is it a fake head?
2272 elif n in heads: # We've seen it before, is it a fake head?
2266 # So it is, real heads should not be the ancestors of
2273 # So it is, real heads should not be the ancestors of
2267 # any other heads.
2274 # any other heads.
2268 heads.pop(n)
2275 heads.pop(n)
2269 if not ancestors:
2276 if not ancestors:
2270 return nonodes
2277 return nonodes
2271 # Now that we have our set of ancestors, we want to remove any
2278 # Now that we have our set of ancestors, we want to remove any
2272 # roots that are not ancestors.
2279 # roots that are not ancestors.
2273
2280
2274 # If one of the roots was nullid, everything is included anyway.
2281 # If one of the roots was nullid, everything is included anyway.
2275 if lowestrev > nullrev:
2282 if lowestrev > nullrev:
2276 # But, since we weren't, let's recompute the lowest rev to not
2283 # But, since we weren't, let's recompute the lowest rev to not
2277 # include roots that aren't ancestors.
2284 # include roots that aren't ancestors.
2278
2285
2279 # Filter out roots that aren't ancestors of heads
2286 # Filter out roots that aren't ancestors of heads
2280 roots = [root for root in roots if root in ancestors]
2287 roots = [root for root in roots if root in ancestors]
2281 # Recompute the lowest revision
2288 # Recompute the lowest revision
2282 if roots:
2289 if roots:
2283 lowestrev = min([self.rev(root) for root in roots])
2290 lowestrev = min([self.rev(root) for root in roots])
2284 else:
2291 else:
2285 # No more roots? Return empty list
2292 # No more roots? Return empty list
2286 return nonodes
2293 return nonodes
2287 else:
2294 else:
2288 # We are descending from nullid, and don't need to care about
2295 # We are descending from nullid, and don't need to care about
2289 # any other roots.
2296 # any other roots.
2290 lowestrev = nullrev
2297 lowestrev = nullrev
2291 roots = [self.nullid]
2298 roots = [self.nullid]
2292 # Transform our roots list into a set.
2299 # Transform our roots list into a set.
2293 descendants = set(roots)
2300 descendants = set(roots)
2294 # Also, keep the original roots so we can filter out roots that aren't
2301 # Also, keep the original roots so we can filter out roots that aren't
2295 # 'real' roots (i.e. are descended from other roots).
2302 # 'real' roots (i.e. are descended from other roots).
2296 roots = descendants.copy()
2303 roots = descendants.copy()
2297 # Our topologically sorted list of output nodes.
2304 # Our topologically sorted list of output nodes.
2298 orderedout = []
2305 orderedout = []
2299 # Don't start at nullid since we don't want nullid in our output list,
2306 # Don't start at nullid since we don't want nullid in our output list,
2300 # and if nullid shows up in descendants, empty parents will look like
2307 # and if nullid shows up in descendants, empty parents will look like
2301 # they're descendants.
2308 # they're descendants.
2302 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2309 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2303 n = self.node(r)
2310 n = self.node(r)
2304 isdescendant = False
2311 isdescendant = False
2305 if lowestrev == nullrev: # Everybody is a descendant of nullid
2312 if lowestrev == nullrev: # Everybody is a descendant of nullid
2306 isdescendant = True
2313 isdescendant = True
2307 elif n in descendants:
2314 elif n in descendants:
2308 # n is already a descendant
2315 # n is already a descendant
2309 isdescendant = True
2316 isdescendant = True
2310 # This check only needs to be done here because all the roots
2317 # This check only needs to be done here because all the roots
2311 # will start being marked is descendants before the loop.
2318 # will start being marked is descendants before the loop.
2312 if n in roots:
2319 if n in roots:
2313 # If n was a root, check if it's a 'real' root.
2320 # If n was a root, check if it's a 'real' root.
2314 p = tuple(self.parents(n))
2321 p = tuple(self.parents(n))
2315 # If any of its parents are descendants, it's not a root.
2322 # If any of its parents are descendants, it's not a root.
2316 if (p[0] in descendants) or (p[1] in descendants):
2323 if (p[0] in descendants) or (p[1] in descendants):
2317 roots.remove(n)
2324 roots.remove(n)
2318 else:
2325 else:
2319 p = tuple(self.parents(n))
2326 p = tuple(self.parents(n))
2320 # A node is a descendant if either of its parents are
2327 # A node is a descendant if either of its parents are
2321 # descendants. (We seeded the dependents list with the roots
2328 # descendants. (We seeded the dependents list with the roots
2322 # up there, remember?)
2329 # up there, remember?)
2323 if (p[0] in descendants) or (p[1] in descendants):
2330 if (p[0] in descendants) or (p[1] in descendants):
2324 descendants.add(n)
2331 descendants.add(n)
2325 isdescendant = True
2332 isdescendant = True
2326 if isdescendant and ((ancestors is None) or (n in ancestors)):
2333 if isdescendant and ((ancestors is None) or (n in ancestors)):
2327 # Only include nodes that are both descendants and ancestors.
2334 # Only include nodes that are both descendants and ancestors.
2328 orderedout.append(n)
2335 orderedout.append(n)
2329 if (ancestors is not None) and (n in heads):
2336 if (ancestors is not None) and (n in heads):
2330 # We're trying to figure out which heads are reachable
2337 # We're trying to figure out which heads are reachable
2331 # from roots.
2338 # from roots.
2332 # Mark this head as having been reached
2339 # Mark this head as having been reached
2333 heads[n] = True
2340 heads[n] = True
2334 elif ancestors is None:
2341 elif ancestors is None:
2335 # Otherwise, we're trying to discover the heads.
2342 # Otherwise, we're trying to discover the heads.
2336 # Assume this is a head because if it isn't, the next step
2343 # Assume this is a head because if it isn't, the next step
2337 # will eventually remove it.
2344 # will eventually remove it.
2338 heads[n] = True
2345 heads[n] = True
2339 # But, obviously its parents aren't.
2346 # But, obviously its parents aren't.
2340 for p in self.parents(n):
2347 for p in self.parents(n):
2341 heads.pop(p, None)
2348 heads.pop(p, None)
2342 heads = [head for head, flag in heads.items() if flag]
2349 heads = [head for head, flag in heads.items() if flag]
2343 roots = list(roots)
2350 roots = list(roots)
2344 assert orderedout
2351 assert orderedout
2345 assert roots
2352 assert roots
2346 assert heads
2353 assert heads
2347 return (orderedout, roots, heads)
2354 return (orderedout, roots, heads)
2348
2355
2349 def headrevs(self, revs=None):
2356 def headrevs(self, revs=None):
2350 if revs is None:
2357 if revs is None:
2351 try:
2358 try:
2352 return self.index.headrevs()
2359 return self.index.headrevs()
2353 except AttributeError:
2360 except AttributeError:
2354 return self._headrevs()
2361 return self._headrevs()
2355 if rustdagop is not None and self.index.rust_ext_compat:
2362 if rustdagop is not None and self.index.rust_ext_compat:
2356 return rustdagop.headrevs(self.index, revs)
2363 return rustdagop.headrevs(self.index, revs)
2357 return dagop.headrevs(revs, self._uncheckedparentrevs)
2364 return dagop.headrevs(revs, self._uncheckedparentrevs)
2358
2365
2359 def headrevsdiff(self, start, stop):
2366 def headrevsdiff(self, start, stop):
2360 try:
2367 try:
2361 return self.index.headrevsdiff(start, stop)
2368 return self.index.headrevsdiff(start, stop)
2362 except AttributeError:
2369 except AttributeError:
2363 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2370 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2364
2371
2365 def computephases(self, roots):
2372 def computephases(self, roots):
2366 return self.index.computephasesmapsets(roots)
2373 return self.index.computephasesmapsets(roots)
2367
2374
2368 def _headrevs(self):
2375 def _headrevs(self):
2369 count = len(self)
2376 count = len(self)
2370 if not count:
2377 if not count:
2371 return [nullrev]
2378 return [nullrev]
2372 # we won't iter over filtered rev so nobody is a head at start
2379 # we won't iter over filtered rev so nobody is a head at start
2373 ishead = [0] * (count + 1)
2380 ishead = [0] * (count + 1)
2374 index = self.index
2381 index = self.index
2375 for r in self:
2382 for r in self:
2376 ishead[r] = 1 # I may be an head
2383 ishead[r] = 1 # I may be an head
2377 e = index[r]
2384 e = index[r]
2378 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2385 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2379 return [r for r, val in enumerate(ishead) if val]
2386 return [r for r, val in enumerate(ishead) if val]
2380
2387
2381 def _head_node_ids(self):
2388 def _head_node_ids(self):
2382 try:
2389 try:
2383 return self.index.head_node_ids()
2390 return self.index.head_node_ids()
2384 except AttributeError:
2391 except AttributeError:
2385 return [self.node(r) for r in self.headrevs()]
2392 return [self.node(r) for r in self.headrevs()]
2386
2393
2387 def heads(self, start=None, stop=None):
2394 def heads(self, start=None, stop=None):
2388 """return the list of all nodes that have no children
2395 """return the list of all nodes that have no children
2389
2396
2390 if start is specified, only heads that are descendants of
2397 if start is specified, only heads that are descendants of
2391 start will be returned
2398 start will be returned
2392 if stop is specified, it will consider all the revs from stop
2399 if stop is specified, it will consider all the revs from stop
2393 as if they had no children
2400 as if they had no children
2394 """
2401 """
2395 if start is None and stop is None:
2402 if start is None and stop is None:
2396 if not len(self):
2403 if not len(self):
2397 return [self.nullid]
2404 return [self.nullid]
2398 return self._head_node_ids()
2405 return self._head_node_ids()
2399 if start is None:
2406 if start is None:
2400 start = nullrev
2407 start = nullrev
2401 else:
2408 else:
2402 start = self.rev(start)
2409 start = self.rev(start)
2403
2410
2404 stoprevs = {self.rev(n) for n in stop or []}
2411 stoprevs = {self.rev(n) for n in stop or []}
2405
2412
2406 revs = dagop.headrevssubset(
2413 revs = dagop.headrevssubset(
2407 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2414 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2408 )
2415 )
2409
2416
2410 return [self.node(rev) for rev in revs]
2417 return [self.node(rev) for rev in revs]
2411
2418
2412 def diffheads(self, start, stop):
2419 def diffheads(self, start, stop):
2413 """return the nodes that make up the difference between
2420 """return the nodes that make up the difference between
2414 heads of revs before `start` and heads of revs before `stop`"""
2421 heads of revs before `start` and heads of revs before `stop`"""
2415 removed, added = self.headrevsdiff(start, stop)
2422 removed, added = self.headrevsdiff(start, stop)
2416 return [self.node(r) for r in removed], [self.node(r) for r in added]
2423 return [self.node(r) for r in removed], [self.node(r) for r in added]
2417
2424
2418 def children(self, node):
2425 def children(self, node):
2419 """find the children of a given node"""
2426 """find the children of a given node"""
2420 c = []
2427 c = []
2421 p = self.rev(node)
2428 p = self.rev(node)
2422 for r in self.revs(start=p + 1):
2429 for r in self.revs(start=p + 1):
2423 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2430 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2424 if prevs:
2431 if prevs:
2425 for pr in prevs:
2432 for pr in prevs:
2426 if pr == p:
2433 if pr == p:
2427 c.append(self.node(r))
2434 c.append(self.node(r))
2428 elif p == nullrev:
2435 elif p == nullrev:
2429 c.append(self.node(r))
2436 c.append(self.node(r))
2430 return c
2437 return c
2431
2438
2432 def commonancestorsheads(self, a, b):
2439 def commonancestorsheads(self, a, b):
2433 """calculate all the heads of the common ancestors of nodes a and b"""
2440 """calculate all the heads of the common ancestors of nodes a and b"""
2434 a, b = self.rev(a), self.rev(b)
2441 a, b = self.rev(a), self.rev(b)
2435 ancs = self._commonancestorsheads(a, b)
2442 ancs = self._commonancestorsheads(a, b)
2436 return pycompat.maplist(self.node, ancs)
2443 return pycompat.maplist(self.node, ancs)
2437
2444
2438 def _commonancestorsheads(self, *revs):
2445 def _commonancestorsheads(self, *revs):
2439 """calculate all the heads of the common ancestors of revs"""
2446 """calculate all the heads of the common ancestors of revs"""
2440 try:
2447 try:
2441 ancs = self.index.commonancestorsheads(*revs)
2448 ancs = self.index.commonancestorsheads(*revs)
2442 except (AttributeError, OverflowError): # C implementation failed
2449 except (AttributeError, OverflowError): # C implementation failed
2443 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2450 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2444 return ancs
2451 return ancs
2445
2452
2446 def isancestor(self, a, b):
2453 def isancestor(self, a, b):
2447 """return True if node a is an ancestor of node b
2454 """return True if node a is an ancestor of node b
2448
2455
2449 A revision is considered an ancestor of itself."""
2456 A revision is considered an ancestor of itself."""
2450 a, b = self.rev(a), self.rev(b)
2457 a, b = self.rev(a), self.rev(b)
2451 return self.isancestorrev(a, b)
2458 return self.isancestorrev(a, b)
2452
2459
2453 def isancestorrev(self, a, b):
2460 def isancestorrev(self, a, b):
2454 """return True if revision a is an ancestor of revision b
2461 """return True if revision a is an ancestor of revision b
2455
2462
2456 A revision is considered an ancestor of itself.
2463 A revision is considered an ancestor of itself.
2457
2464
2458 The implementation of this is trivial but the use of
2465 The implementation of this is trivial but the use of
2459 reachableroots is not."""
2466 reachableroots is not."""
2460 if a == nullrev:
2467 if a == nullrev:
2461 return True
2468 return True
2462 elif a == b:
2469 elif a == b:
2463 return True
2470 return True
2464 elif a > b:
2471 elif a > b:
2465 return False
2472 return False
2466 return bool(self.reachableroots(a, [b], [a], includepath=False))
2473 return bool(self.reachableroots(a, [b], [a], includepath=False))
2467
2474
2468 def reachableroots(self, minroot, heads, roots, includepath=False):
2475 def reachableroots(self, minroot, heads, roots, includepath=False):
2469 """return (heads(::(<roots> and <roots>::<heads>)))
2476 """return (heads(::(<roots> and <roots>::<heads>)))
2470
2477
2471 If includepath is True, return (<roots>::<heads>)."""
2478 If includepath is True, return (<roots>::<heads>)."""
2472 try:
2479 try:
2473 return self.index.reachableroots2(
2480 return self.index.reachableroots2(
2474 minroot, heads, roots, includepath
2481 minroot, heads, roots, includepath
2475 )
2482 )
2476 except AttributeError:
2483 except AttributeError:
2477 return dagop._reachablerootspure(
2484 return dagop._reachablerootspure(
2478 self.parentrevs, minroot, roots, heads, includepath
2485 self.parentrevs, minroot, roots, heads, includepath
2479 )
2486 )
2480
2487
2481 def ancestor(self, a, b):
2488 def ancestor(self, a, b):
2482 """calculate the "best" common ancestor of nodes a and b"""
2489 """calculate the "best" common ancestor of nodes a and b"""
2483
2490
2484 a, b = self.rev(a), self.rev(b)
2491 a, b = self.rev(a), self.rev(b)
2485 try:
2492 try:
2486 ancs = self.index.ancestors(a, b)
2493 ancs = self.index.ancestors(a, b)
2487 except (AttributeError, OverflowError):
2494 except (AttributeError, OverflowError):
2488 ancs = ancestor.ancestors(self.parentrevs, a, b)
2495 ancs = ancestor.ancestors(self.parentrevs, a, b)
2489 if ancs:
2496 if ancs:
2490 # choose a consistent winner when there's a tie
2497 # choose a consistent winner when there's a tie
2491 return min(map(self.node, ancs))
2498 return min(map(self.node, ancs))
2492 return self.nullid
2499 return self.nullid
2493
2500
2494 def _match(self, id):
2501 def _match(self, id):
2495 if isinstance(id, int):
2502 if isinstance(id, int):
2496 # rev
2503 # rev
2497 return self.node(id)
2504 return self.node(id)
2498 if len(id) == self.nodeconstants.nodelen:
2505 if len(id) == self.nodeconstants.nodelen:
2499 # possibly a binary node
2506 # possibly a binary node
2500 # odds of a binary node being all hex in ASCII are 1 in 10**25
2507 # odds of a binary node being all hex in ASCII are 1 in 10**25
2501 try:
2508 try:
2502 node = id
2509 node = id
2503 self.rev(node) # quick search the index
2510 self.rev(node) # quick search the index
2504 return node
2511 return node
2505 except error.LookupError:
2512 except error.LookupError:
2506 pass # may be partial hex id
2513 pass # may be partial hex id
2507 try:
2514 try:
2508 # str(rev)
2515 # str(rev)
2509 rev = int(id)
2516 rev = int(id)
2510 if b"%d" % rev != id:
2517 if b"%d" % rev != id:
2511 raise ValueError
2518 raise ValueError
2512 if rev < 0:
2519 if rev < 0:
2513 rev = len(self) + rev
2520 rev = len(self) + rev
2514 if rev < 0 or rev >= len(self):
2521 if rev < 0 or rev >= len(self):
2515 raise ValueError
2522 raise ValueError
2516 return self.node(rev)
2523 return self.node(rev)
2517 except (ValueError, OverflowError):
2524 except (ValueError, OverflowError):
2518 pass
2525 pass
2519 if len(id) == 2 * self.nodeconstants.nodelen:
2526 if len(id) == 2 * self.nodeconstants.nodelen:
2520 try:
2527 try:
2521 # a full hex nodeid?
2528 # a full hex nodeid?
2522 node = bin(id)
2529 node = bin(id)
2523 self.rev(node)
2530 self.rev(node)
2524 return node
2531 return node
2525 except (binascii.Error, error.LookupError):
2532 except (binascii.Error, error.LookupError):
2526 pass
2533 pass
2527
2534
2528 def _partialmatch(self, id):
2535 def _partialmatch(self, id):
2529 # we don't care wdirfilenodeids as they should be always full hash
2536 # we don't care wdirfilenodeids as they should be always full hash
2530 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2537 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2531 ambiguous = False
2538 ambiguous = False
2532 try:
2539 try:
2533 partial = self.index.partialmatch(id)
2540 partial = self.index.partialmatch(id)
2534 if partial and self.hasnode(partial):
2541 if partial and self.hasnode(partial):
2535 if maybewdir:
2542 if maybewdir:
2536 # single 'ff...' match in radix tree, ambiguous with wdir
2543 # single 'ff...' match in radix tree, ambiguous with wdir
2537 ambiguous = True
2544 ambiguous = True
2538 else:
2545 else:
2539 return partial
2546 return partial
2540 elif maybewdir:
2547 elif maybewdir:
2541 # no 'ff...' match in radix tree, wdir identified
2548 # no 'ff...' match in radix tree, wdir identified
2542 raise error.WdirUnsupported
2549 raise error.WdirUnsupported
2543 else:
2550 else:
2544 return None
2551 return None
2545 except error.RevlogError:
2552 except error.RevlogError:
2546 # parsers.c radix tree lookup gave multiple matches
2553 # parsers.c radix tree lookup gave multiple matches
2547 # fast path: for unfiltered changelog, radix tree is accurate
2554 # fast path: for unfiltered changelog, radix tree is accurate
2548 if not getattr(self, 'filteredrevs', None):
2555 if not getattr(self, 'filteredrevs', None):
2549 ambiguous = True
2556 ambiguous = True
2550 # fall through to slow path that filters hidden revisions
2557 # fall through to slow path that filters hidden revisions
2551 except (AttributeError, ValueError):
2558 except (AttributeError, ValueError):
2552 # we are pure python, or key is not hex
2559 # we are pure python, or key is not hex
2553 pass
2560 pass
2554 if ambiguous:
2561 if ambiguous:
2555 raise error.AmbiguousPrefixLookupError(
2562 raise error.AmbiguousPrefixLookupError(
2556 id, self.display_id, _(b'ambiguous identifier')
2563 id, self.display_id, _(b'ambiguous identifier')
2557 )
2564 )
2558
2565
2559 if id in self._pcache:
2566 if id in self._pcache:
2560 return self._pcache[id]
2567 return self._pcache[id]
2561
2568
2562 if len(id) <= 40:
2569 if len(id) <= 40:
2563 # hex(node)[:...]
2570 # hex(node)[:...]
2564 l = len(id) // 2 * 2 # grab an even number of digits
2571 l = len(id) // 2 * 2 # grab an even number of digits
2565 try:
2572 try:
2566 # we're dropping the last digit, so let's check that it's hex,
2573 # we're dropping the last digit, so let's check that it's hex,
2567 # to avoid the expensive computation below if it's not
2574 # to avoid the expensive computation below if it's not
2568 if len(id) % 2 > 0:
2575 if len(id) % 2 > 0:
2569 if not (id[-1] in hexdigits):
2576 if not (id[-1] in hexdigits):
2570 return None
2577 return None
2571 prefix = bin(id[:l])
2578 prefix = bin(id[:l])
2572 except binascii.Error:
2579 except binascii.Error:
2573 pass
2580 pass
2574 else:
2581 else:
2575 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2582 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2576 nl = [
2583 nl = [
2577 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2584 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2578 ]
2585 ]
2579 if self.nodeconstants.nullhex.startswith(id):
2586 if self.nodeconstants.nullhex.startswith(id):
2580 nl.append(self.nullid)
2587 nl.append(self.nullid)
2581 if len(nl) > 0:
2588 if len(nl) > 0:
2582 if len(nl) == 1 and not maybewdir:
2589 if len(nl) == 1 and not maybewdir:
2583 self._pcache[id] = nl[0]
2590 self._pcache[id] = nl[0]
2584 return nl[0]
2591 return nl[0]
2585 raise error.AmbiguousPrefixLookupError(
2592 raise error.AmbiguousPrefixLookupError(
2586 id, self.display_id, _(b'ambiguous identifier')
2593 id, self.display_id, _(b'ambiguous identifier')
2587 )
2594 )
2588 if maybewdir:
2595 if maybewdir:
2589 raise error.WdirUnsupported
2596 raise error.WdirUnsupported
2590 return None
2597 return None
2591
2598
2592 def lookup(self, id):
2599 def lookup(self, id):
2593 """locate a node based on:
2600 """locate a node based on:
2594 - revision number or str(revision number)
2601 - revision number or str(revision number)
2595 - nodeid or subset of hex nodeid
2602 - nodeid or subset of hex nodeid
2596 """
2603 """
2597 n = self._match(id)
2604 n = self._match(id)
2598 if n is not None:
2605 if n is not None:
2599 return n
2606 return n
2600 n = self._partialmatch(id)
2607 n = self._partialmatch(id)
2601 if n:
2608 if n:
2602 return n
2609 return n
2603
2610
2604 raise error.LookupError(id, self.display_id, _(b'no match found'))
2611 raise error.LookupError(id, self.display_id, _(b'no match found'))
2605
2612
2606 def shortest(self, node, minlength=1):
2613 def shortest(self, node, minlength=1):
2607 """Find the shortest unambiguous prefix that matches node."""
2614 """Find the shortest unambiguous prefix that matches node."""
2608
2615
2609 def isvalid(prefix):
2616 def isvalid(prefix):
2610 try:
2617 try:
2611 matchednode = self._partialmatch(prefix)
2618 matchednode = self._partialmatch(prefix)
2612 except error.AmbiguousPrefixLookupError:
2619 except error.AmbiguousPrefixLookupError:
2613 return False
2620 return False
2614 except error.WdirUnsupported:
2621 except error.WdirUnsupported:
2615 # single 'ff...' match
2622 # single 'ff...' match
2616 return True
2623 return True
2617 if matchednode is None:
2624 if matchednode is None:
2618 raise error.LookupError(node, self.display_id, _(b'no node'))
2625 raise error.LookupError(node, self.display_id, _(b'no node'))
2619 return True
2626 return True
2620
2627
2621 def maybewdir(prefix):
2628 def maybewdir(prefix):
2622 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2629 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2623
2630
2624 hexnode = hex(node)
2631 hexnode = hex(node)
2625
2632
2626 def disambiguate(hexnode, minlength):
2633 def disambiguate(hexnode, minlength):
2627 """Disambiguate against wdirid."""
2634 """Disambiguate against wdirid."""
2628 for length in range(minlength, len(hexnode) + 1):
2635 for length in range(minlength, len(hexnode) + 1):
2629 prefix = hexnode[:length]
2636 prefix = hexnode[:length]
2630 if not maybewdir(prefix):
2637 if not maybewdir(prefix):
2631 return prefix
2638 return prefix
2632
2639
2633 if not getattr(self, 'filteredrevs', None):
2640 if not getattr(self, 'filteredrevs', None):
2634 try:
2641 try:
2635 length = max(self.index.shortest(node), minlength)
2642 length = max(self.index.shortest(node), minlength)
2636 return disambiguate(hexnode, length)
2643 return disambiguate(hexnode, length)
2637 except error.RevlogError:
2644 except error.RevlogError:
2638 if node != self.nodeconstants.wdirid:
2645 if node != self.nodeconstants.wdirid:
2639 raise error.LookupError(
2646 raise error.LookupError(
2640 node, self.display_id, _(b'no node')
2647 node, self.display_id, _(b'no node')
2641 )
2648 )
2642 except AttributeError:
2649 except AttributeError:
2643 # Fall through to pure code
2650 # Fall through to pure code
2644 pass
2651 pass
2645
2652
2646 if node == self.nodeconstants.wdirid:
2653 if node == self.nodeconstants.wdirid:
2647 for length in range(minlength, len(hexnode) + 1):
2654 for length in range(minlength, len(hexnode) + 1):
2648 prefix = hexnode[:length]
2655 prefix = hexnode[:length]
2649 if isvalid(prefix):
2656 if isvalid(prefix):
2650 return prefix
2657 return prefix
2651
2658
2652 for length in range(minlength, len(hexnode) + 1):
2659 for length in range(minlength, len(hexnode) + 1):
2653 prefix = hexnode[:length]
2660 prefix = hexnode[:length]
2654 if isvalid(prefix):
2661 if isvalid(prefix):
2655 return disambiguate(hexnode, length)
2662 return disambiguate(hexnode, length)
2656
2663
2657 def cmp(self, node, text):
2664 def cmp(self, node, text):
2658 """compare text with a given file revision
2665 """compare text with a given file revision
2659
2666
2660 returns True if text is different than what is stored.
2667 returns True if text is different than what is stored.
2661 """
2668 """
2662 p1, p2 = self.parents(node)
2669 p1, p2 = self.parents(node)
2663 return storageutil.hashrevisionsha1(text, p1, p2) != node
2670 return storageutil.hashrevisionsha1(text, p1, p2) != node
2664
2671
2665 def deltaparent(self, rev):
2672 def deltaparent(self, rev):
2666 """return deltaparent of the given revision"""
2673 """return deltaparent of the given revision"""
2667 base = self.index[rev][3]
2674 base = self.index[rev][3]
2668 if base == rev:
2675 if base == rev:
2669 return nullrev
2676 return nullrev
2670 elif self.delta_config.general_delta:
2677 elif self.delta_config.general_delta:
2671 return base
2678 return base
2672 else:
2679 else:
2673 return rev - 1
2680 return rev - 1
2674
2681
2675 def issnapshot(self, rev):
2682 def issnapshot(self, rev):
2676 """tells whether rev is a snapshot"""
2683 """tells whether rev is a snapshot"""
2677 ret = self._inner.issnapshot(rev)
2684 ret = self._inner.issnapshot(rev)
2678 self.issnapshot = self._inner.issnapshot
2685 self.issnapshot = self._inner.issnapshot
2679 return ret
2686 return ret
2680
2687
2681 def snapshotdepth(self, rev):
2688 def snapshotdepth(self, rev):
2682 """number of snapshot in the chain before this one"""
2689 """number of snapshot in the chain before this one"""
2683 if not self.issnapshot(rev):
2690 if not self.issnapshot(rev):
2684 raise error.ProgrammingError(b'revision %d not a snapshot')
2691 raise error.ProgrammingError(b'revision %d not a snapshot')
2685 return len(self._inner._deltachain(rev)[0]) - 1
2692 return len(self._inner._deltachain(rev)[0]) - 1
2686
2693
2687 def revdiff(self, rev1, rev2):
2694 def revdiff(self, rev1, rev2):
2688 """return or calculate a delta between two revisions
2695 """return or calculate a delta between two revisions
2689
2696
2690 The delta calculated is in binary form and is intended to be written to
2697 The delta calculated is in binary form and is intended to be written to
2691 revlog data directly. So this function needs raw revision data.
2698 revlog data directly. So this function needs raw revision data.
2692 """
2699 """
2693 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2700 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2694 return bytes(self._inner._chunk(rev2))
2701 return bytes(self._inner._chunk(rev2))
2695
2702
2696 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2703 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2697
2704
2698 def revision(self, nodeorrev):
2705 def revision(self, nodeorrev):
2699 """return an uncompressed revision of a given node or revision
2706 """return an uncompressed revision of a given node or revision
2700 number.
2707 number.
2701 """
2708 """
2702 return self._revisiondata(nodeorrev)
2709 return self._revisiondata(nodeorrev)
2703
2710
2704 def sidedata(self, nodeorrev):
2711 def sidedata(self, nodeorrev):
2705 """a map of extra data related to the changeset but not part of the hash
2712 """a map of extra data related to the changeset but not part of the hash
2706
2713
2707 This function currently return a dictionary. However, more advanced
2714 This function currently return a dictionary. However, more advanced
2708 mapping object will likely be used in the future for a more
2715 mapping object will likely be used in the future for a more
2709 efficient/lazy code.
2716 efficient/lazy code.
2710 """
2717 """
2711 # deal with <nodeorrev> argument type
2718 # deal with <nodeorrev> argument type
2712 if isinstance(nodeorrev, int):
2719 if isinstance(nodeorrev, int):
2713 rev = nodeorrev
2720 rev = nodeorrev
2714 else:
2721 else:
2715 rev = self.rev(nodeorrev)
2722 rev = self.rev(nodeorrev)
2716 return self._sidedata(rev)
2723 return self._sidedata(rev)
2717
2724
2718 def _rawtext(self, node, rev):
2725 def _rawtext(self, node, rev):
2719 """return the possibly unvalidated rawtext for a revision
2726 """return the possibly unvalidated rawtext for a revision
2720
2727
2721 returns (rev, rawtext, validated)
2728 returns (rev, rawtext, validated)
2722 """
2729 """
2723 # Check if we have the entry in cache
2730 # Check if we have the entry in cache
2724 # The cache entry looks like (node, rev, rawtext)
2731 # The cache entry looks like (node, rev, rawtext)
2725 if self._inner._revisioncache:
2732 if self._inner._revisioncache:
2726 if self._inner._revisioncache[0] == node:
2733 if self._inner._revisioncache[0] == node:
2727 return (rev, self._inner._revisioncache[2], True)
2734 return (rev, self._inner._revisioncache[2], True)
2728
2735
2729 if rev is None:
2736 if rev is None:
2730 rev = self.rev(node)
2737 rev = self.rev(node)
2731
2738
2732 return self._inner.raw_text(node, rev)
2739 return self._inner.raw_text(node, rev)
2733
2740
2734 def _revisiondata(self, nodeorrev, raw=False):
2741 def _revisiondata(self, nodeorrev, raw=False):
2735 # deal with <nodeorrev> argument type
2742 # deal with <nodeorrev> argument type
2736 if isinstance(nodeorrev, int):
2743 if isinstance(nodeorrev, int):
2737 rev = nodeorrev
2744 rev = nodeorrev
2738 node = self.node(rev)
2745 node = self.node(rev)
2739 else:
2746 else:
2740 node = nodeorrev
2747 node = nodeorrev
2741 rev = None
2748 rev = None
2742
2749
2743 # fast path the special `nullid` rev
2750 # fast path the special `nullid` rev
2744 if node == self.nullid:
2751 if node == self.nullid:
2745 return b""
2752 return b""
2746
2753
2747 # ``rawtext`` is the text as stored inside the revlog. Might be the
2754 # ``rawtext`` is the text as stored inside the revlog. Might be the
2748 # revision or might need to be processed to retrieve the revision.
2755 # revision or might need to be processed to retrieve the revision.
2749 rev, rawtext, validated = self._rawtext(node, rev)
2756 rev, rawtext, validated = self._rawtext(node, rev)
2750
2757
2751 if raw and validated:
2758 if raw and validated:
2752 # if we don't want to process the raw text and that raw
2759 # if we don't want to process the raw text and that raw
2753 # text is cached, we can exit early.
2760 # text is cached, we can exit early.
2754 return rawtext
2761 return rawtext
2755 if rev is None:
2762 if rev is None:
2756 rev = self.rev(node)
2763 rev = self.rev(node)
2757 # the revlog's flag for this revision
2764 # the revlog's flag for this revision
2758 # (usually alter its state or content)
2765 # (usually alter its state or content)
2759 flags = self.flags(rev)
2766 flags = self.flags(rev)
2760
2767
2761 if validated and flags == REVIDX_DEFAULT_FLAGS:
2768 if validated and flags == REVIDX_DEFAULT_FLAGS:
2762 # no extra flags set, no flag processor runs, text = rawtext
2769 # no extra flags set, no flag processor runs, text = rawtext
2763 return rawtext
2770 return rawtext
2764
2771
2765 if raw:
2772 if raw:
2766 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2773 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2767 text = rawtext
2774 text = rawtext
2768 else:
2775 else:
2769 r = flagutil.processflagsread(self, rawtext, flags)
2776 r = flagutil.processflagsread(self, rawtext, flags)
2770 text, validatehash = r
2777 text, validatehash = r
2771 if validatehash:
2778 if validatehash:
2772 self.checkhash(text, node, rev=rev)
2779 self.checkhash(text, node, rev=rev)
2773 if not validated:
2780 if not validated:
2774 self._inner._revisioncache = (node, rev, rawtext)
2781 self._inner._revisioncache = (node, rev, rawtext)
2775
2782
2776 return text
2783 return text
2777
2784
2778 def _sidedata(self, rev):
2785 def _sidedata(self, rev):
2779 """Return the sidedata for a given revision number."""
2786 """Return the sidedata for a given revision number."""
2780 sidedata_end = None
2787 sidedata_end = None
2781 if self._docket is not None:
2788 if self._docket is not None:
2782 sidedata_end = self._docket.sidedata_end
2789 sidedata_end = self._docket.sidedata_end
2783 return self._inner.sidedata(rev, sidedata_end)
2790 return self._inner.sidedata(rev, sidedata_end)
2784
2791
2785 def rawdata(self, nodeorrev):
2792 def rawdata(self, nodeorrev):
2786 """return an uncompressed raw data of a given node or revision number."""
2793 """return an uncompressed raw data of a given node or revision number."""
2787 return self._revisiondata(nodeorrev, raw=True)
2794 return self._revisiondata(nodeorrev, raw=True)
2788
2795
2789 def hash(self, text, p1, p2):
2796 def hash(self, text, p1, p2):
2790 """Compute a node hash.
2797 """Compute a node hash.
2791
2798
2792 Available as a function so that subclasses can replace the hash
2799 Available as a function so that subclasses can replace the hash
2793 as needed.
2800 as needed.
2794 """
2801 """
2795 return storageutil.hashrevisionsha1(text, p1, p2)
2802 return storageutil.hashrevisionsha1(text, p1, p2)
2796
2803
2797 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2804 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2798 """Check node hash integrity.
2805 """Check node hash integrity.
2799
2806
2800 Available as a function so that subclasses can extend hash mismatch
2807 Available as a function so that subclasses can extend hash mismatch
2801 behaviors as needed.
2808 behaviors as needed.
2802 """
2809 """
2803 try:
2810 try:
2804 if p1 is None and p2 is None:
2811 if p1 is None and p2 is None:
2805 p1, p2 = self.parents(node)
2812 p1, p2 = self.parents(node)
2806 if node != self.hash(text, p1, p2):
2813 if node != self.hash(text, p1, p2):
2807 # Clear the revision cache on hash failure. The revision cache
2814 # Clear the revision cache on hash failure. The revision cache
2808 # only stores the raw revision and clearing the cache does have
2815 # only stores the raw revision and clearing the cache does have
2809 # the side-effect that we won't have a cache hit when the raw
2816 # the side-effect that we won't have a cache hit when the raw
2810 # revision data is accessed. But this case should be rare and
2817 # revision data is accessed. But this case should be rare and
2811 # it is extra work to teach the cache about the hash
2818 # it is extra work to teach the cache about the hash
2812 # verification state.
2819 # verification state.
2813 if (
2820 if (
2814 self._inner._revisioncache
2821 self._inner._revisioncache
2815 and self._inner._revisioncache[0] == node
2822 and self._inner._revisioncache[0] == node
2816 ):
2823 ):
2817 self._inner._revisioncache = None
2824 self._inner._revisioncache = None
2818
2825
2819 revornode = rev
2826 revornode = rev
2820 if revornode is None:
2827 if revornode is None:
2821 revornode = templatefilters.short(hex(node))
2828 revornode = templatefilters.short(hex(node))
2822 raise error.RevlogError(
2829 raise error.RevlogError(
2823 _(b"integrity check failed on %s:%s")
2830 _(b"integrity check failed on %s:%s")
2824 % (self.display_id, pycompat.bytestr(revornode))
2831 % (self.display_id, pycompat.bytestr(revornode))
2825 )
2832 )
2826 except error.RevlogError:
2833 except error.RevlogError:
2827 if self.feature_config.censorable and storageutil.iscensoredtext(
2834 if self.feature_config.censorable and storageutil.iscensoredtext(
2828 text
2835 text
2829 ):
2836 ):
2830 raise error.CensoredNodeError(self.display_id, node, text)
2837 raise error.CensoredNodeError(self.display_id, node, text)
2831 raise
2838 raise
2832
2839
2833 @property
2840 @property
2834 def _split_index_file(self):
2841 def _split_index_file(self):
2835 """the path where to expect the index of an ongoing splitting operation
2842 """the path where to expect the index of an ongoing splitting operation
2836
2843
2837 The file will only exist if a splitting operation is in progress, but
2844 The file will only exist if a splitting operation is in progress, but
2838 it is always expected at the same location."""
2845 it is always expected at the same location."""
2839 parts = self.radix.split(b'/')
2846 parts = self.radix.split(b'/')
2840 if len(parts) > 1:
2847 if len(parts) > 1:
2841 # adds a '-s' prefix to the ``data/` or `meta/` base
2848 # adds a '-s' prefix to the ``data/` or `meta/` base
2842 head = parts[0] + b'-s'
2849 head = parts[0] + b'-s'
2843 mids = parts[1:-1]
2850 mids = parts[1:-1]
2844 tail = parts[-1] + b'.i'
2851 tail = parts[-1] + b'.i'
2845 pieces = [head] + mids + [tail]
2852 pieces = [head] + mids + [tail]
2846 return b'/'.join(pieces)
2853 return b'/'.join(pieces)
2847 else:
2854 else:
2848 # the revlog is stored at the root of the store (changelog or
2855 # the revlog is stored at the root of the store (changelog or
2849 # manifest), no risk of collision.
2856 # manifest), no risk of collision.
2850 return self.radix + b'.i.s'
2857 return self.radix + b'.i.s'
2851
2858
2852 def _enforceinlinesize(self, tr):
2859 def _enforceinlinesize(self, tr):
2853 """Check if the revlog is too big for inline and convert if so.
2860 """Check if the revlog is too big for inline and convert if so.
2854
2861
2855 This should be called after revisions are added to the revlog. If the
2862 This should be called after revisions are added to the revlog. If the
2856 revlog has grown too large to be an inline revlog, it will convert it
2863 revlog has grown too large to be an inline revlog, it will convert it
2857 to use multiple index and data files.
2864 to use multiple index and data files.
2858 """
2865 """
2859 tiprev = len(self) - 1
2866 tiprev = len(self) - 1
2860 total_size = self.start(tiprev) + self.length(tiprev)
2867 total_size = self.start(tiprev) + self.length(tiprev)
2861 if not self._inline or (self._may_inline and total_size < _maxinline):
2868 if not self._inline or (self._may_inline and total_size < _maxinline):
2862 return
2869 return
2863
2870
2864 if self._docket is not None:
2871 if self._docket is not None:
2865 msg = b"inline revlog should not have a docket"
2872 msg = b"inline revlog should not have a docket"
2866 raise error.ProgrammingError(msg)
2873 raise error.ProgrammingError(msg)
2867
2874
2868 # In the common case, we enforce inline size because the revlog has
2875 # In the common case, we enforce inline size because the revlog has
2869 # been appened too. And in such case, it must have an initial offset
2876 # been appened too. And in such case, it must have an initial offset
2870 # recorded in the transaction.
2877 # recorded in the transaction.
2871 troffset = tr.findoffset(self._inner.canonical_index_file)
2878 troffset = tr.findoffset(self._inner.canonical_index_file)
2872 pre_touched = troffset is not None
2879 pre_touched = troffset is not None
2873 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2880 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2874 raise error.RevlogError(
2881 raise error.RevlogError(
2875 _(b"%s not found in the transaction") % self._indexfile
2882 _(b"%s not found in the transaction") % self._indexfile
2876 )
2883 )
2877
2884
2878 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2885 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2879 tr.add(self._datafile, 0)
2886 tr.add(self._datafile, 0)
2880
2887
2881 new_index_file_path = None
2888 new_index_file_path = None
2882 old_index_file_path = self._indexfile
2889 old_index_file_path = self._indexfile
2883 new_index_file_path = self._split_index_file
2890 new_index_file_path = self._split_index_file
2884 opener = self.opener
2891 opener = self.opener
2885 weak_self = weakref.ref(self)
2892 weak_self = weakref.ref(self)
2886
2893
2887 # the "split" index replace the real index when the transaction is
2894 # the "split" index replace the real index when the transaction is
2888 # finalized
2895 # finalized
2889 def finalize_callback(tr):
2896 def finalize_callback(tr):
2890 opener.rename(
2897 opener.rename(
2891 new_index_file_path,
2898 new_index_file_path,
2892 old_index_file_path,
2899 old_index_file_path,
2893 checkambig=True,
2900 checkambig=True,
2894 )
2901 )
2895 maybe_self = weak_self()
2902 maybe_self = weak_self()
2896 if maybe_self is not None:
2903 if maybe_self is not None:
2897 maybe_self._indexfile = old_index_file_path
2904 maybe_self._indexfile = old_index_file_path
2898 maybe_self._inner.index_file = maybe_self._indexfile
2905 maybe_self._inner.index_file = maybe_self._indexfile
2899
2906
2900 def abort_callback(tr):
2907 def abort_callback(tr):
2901 maybe_self = weak_self()
2908 maybe_self = weak_self()
2902 if maybe_self is not None:
2909 if maybe_self is not None:
2903 maybe_self._indexfile = old_index_file_path
2910 maybe_self._indexfile = old_index_file_path
2904 maybe_self._inner.inline = True
2911 maybe_self._inner.inline = True
2905 maybe_self._inner.index_file = old_index_file_path
2912 maybe_self._inner.index_file = old_index_file_path
2906
2913
2907 tr.registertmp(new_index_file_path)
2914 tr.registertmp(new_index_file_path)
2908 # we use 001 here to make this this happens after the finalisation of
2915 # we use 001 here to make this this happens after the finalisation of
2909 # pending changelog write (using 000). Otherwise the two finalizer
2916 # pending changelog write (using 000). Otherwise the two finalizer
2910 # would step over each other and delete the changelog.i file.
2917 # would step over each other and delete the changelog.i file.
2911 if self.target[1] is not None:
2918 if self.target[1] is not None:
2912 callback_id = b'001-revlog-split-%d-%s' % self.target
2919 callback_id = b'001-revlog-split-%d-%s' % self.target
2913 else:
2920 else:
2914 callback_id = b'001-revlog-split-%d' % self.target[0]
2921 callback_id = b'001-revlog-split-%d' % self.target[0]
2915 tr.addfinalize(callback_id, finalize_callback)
2922 tr.addfinalize(callback_id, finalize_callback)
2916 tr.addabort(callback_id, abort_callback)
2923 tr.addabort(callback_id, abort_callback)
2917
2924
2918 self._format_flags &= ~FLAG_INLINE_DATA
2925 self._format_flags &= ~FLAG_INLINE_DATA
2919 self._inner.split_inline(
2926 self._inner.split_inline(
2920 tr,
2927 tr,
2921 self._format_flags | self._format_version,
2928 self._format_flags | self._format_version,
2922 new_index_file_path=new_index_file_path,
2929 new_index_file_path=new_index_file_path,
2923 )
2930 )
2924
2931
2925 self._inline = False
2932 self._inline = False
2926 if new_index_file_path is not None:
2933 if new_index_file_path is not None:
2927 self._indexfile = new_index_file_path
2934 self._indexfile = new_index_file_path
2928
2935
2929 nodemaputil.setup_persistent_nodemap(tr, self)
2936 nodemaputil.setup_persistent_nodemap(tr, self)
2930
2937
2931 def _nodeduplicatecallback(self, transaction, node):
2938 def _nodeduplicatecallback(self, transaction, node):
2932 """called when trying to add a node already stored."""
2939 """called when trying to add a node already stored."""
2933
2940
2934 @contextlib.contextmanager
2941 @contextlib.contextmanager
2935 def reading(self):
2942 def reading(self):
2936 with self._inner.reading():
2943 with self._inner.reading():
2937 yield
2944 yield
2938
2945
2939 @contextlib.contextmanager
2946 @contextlib.contextmanager
2940 def _writing(self, transaction):
2947 def _writing(self, transaction):
2941 if self._trypending:
2948 if self._trypending:
2942 msg = b'try to write in a `trypending` revlog: %s'
2949 msg = b'try to write in a `trypending` revlog: %s'
2943 msg %= self.display_id
2950 msg %= self.display_id
2944 raise error.ProgrammingError(msg)
2951 raise error.ProgrammingError(msg)
2945 if self._inner.is_writing:
2952 if self._inner.is_writing:
2946 yield
2953 yield
2947 else:
2954 else:
2948 data_end = None
2955 data_end = None
2949 sidedata_end = None
2956 sidedata_end = None
2950 if self._docket is not None:
2957 if self._docket is not None:
2951 data_end = self._docket.data_end
2958 data_end = self._docket.data_end
2952 sidedata_end = self._docket.sidedata_end
2959 sidedata_end = self._docket.sidedata_end
2953 with self._inner.writing(
2960 with self._inner.writing(
2954 transaction,
2961 transaction,
2955 data_end=data_end,
2962 data_end=data_end,
2956 sidedata_end=sidedata_end,
2963 sidedata_end=sidedata_end,
2957 ):
2964 ):
2958 yield
2965 yield
2959 if self._docket is not None:
2966 if self._docket is not None:
2960 self._write_docket(transaction)
2967 self._write_docket(transaction)
2961
2968
2962 @property
2969 @property
2963 def is_delaying(self):
2970 def is_delaying(self):
2964 return self._inner.is_delaying
2971 return self._inner.is_delaying
2965
2972
2966 def _write_docket(self, transaction):
2973 def _write_docket(self, transaction):
2967 """write the current docket on disk
2974 """write the current docket on disk
2968
2975
2969 Exist as a method to help changelog to implement transaction logic
2976 Exist as a method to help changelog to implement transaction logic
2970
2977
2971 We could also imagine using the same transaction logic for all revlog
2978 We could also imagine using the same transaction logic for all revlog
2972 since docket are cheap."""
2979 since docket are cheap."""
2973 self._docket.write(transaction)
2980 self._docket.write(transaction)
2974
2981
2975 def addrevision(
2982 def addrevision(
2976 self,
2983 self,
2977 text,
2984 text,
2978 transaction,
2985 transaction,
2979 link,
2986 link,
2980 p1,
2987 p1,
2981 p2,
2988 p2,
2982 cachedelta=None,
2989 cachedelta=None,
2983 node=None,
2990 node=None,
2984 flags=REVIDX_DEFAULT_FLAGS,
2991 flags=REVIDX_DEFAULT_FLAGS,
2985 deltacomputer=None,
2992 deltacomputer=None,
2986 sidedata=None,
2993 sidedata=None,
2987 ):
2994 ):
2988 """add a revision to the log
2995 """add a revision to the log
2989
2996
2990 text - the revision data to add
2997 text - the revision data to add
2991 transaction - the transaction object used for rollback
2998 transaction - the transaction object used for rollback
2992 link - the linkrev data to add
2999 link - the linkrev data to add
2993 p1, p2 - the parent nodeids of the revision
3000 p1, p2 - the parent nodeids of the revision
2994 cachedelta - an optional precomputed delta
3001 cachedelta - an optional precomputed delta
2995 node - nodeid of revision; typically node is not specified, and it is
3002 node - nodeid of revision; typically node is not specified, and it is
2996 computed by default as hash(text, p1, p2), however subclasses might
3003 computed by default as hash(text, p1, p2), however subclasses might
2997 use different hashing method (and override checkhash() in such case)
3004 use different hashing method (and override checkhash() in such case)
2998 flags - the known flags to set on the revision
3005 flags - the known flags to set on the revision
2999 deltacomputer - an optional deltacomputer instance shared between
3006 deltacomputer - an optional deltacomputer instance shared between
3000 multiple calls
3007 multiple calls
3001 """
3008 """
3002 if link == nullrev:
3009 if link == nullrev:
3003 raise error.RevlogError(
3010 raise error.RevlogError(
3004 _(b"attempted to add linkrev -1 to %s") % self.display_id
3011 _(b"attempted to add linkrev -1 to %s") % self.display_id
3005 )
3012 )
3006
3013
3007 if sidedata is None:
3014 if sidedata is None:
3008 sidedata = {}
3015 sidedata = {}
3009 elif sidedata and not self.feature_config.has_side_data:
3016 elif sidedata and not self.feature_config.has_side_data:
3010 raise error.ProgrammingError(
3017 raise error.ProgrammingError(
3011 _(b"trying to add sidedata to a revlog who don't support them")
3018 _(b"trying to add sidedata to a revlog who don't support them")
3012 )
3019 )
3013
3020
3014 if flags:
3021 if flags:
3015 node = node or self.hash(text, p1, p2)
3022 node = node or self.hash(text, p1, p2)
3016
3023
3017 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3024 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3018
3025
3019 # If the flag processor modifies the revision data, ignore any provided
3026 # If the flag processor modifies the revision data, ignore any provided
3020 # cachedelta.
3027 # cachedelta.
3021 if rawtext != text:
3028 if rawtext != text:
3022 cachedelta = None
3029 cachedelta = None
3023
3030
3024 if len(rawtext) > _maxentrysize:
3031 if len(rawtext) > _maxentrysize:
3025 raise error.RevlogError(
3032 raise error.RevlogError(
3026 _(
3033 _(
3027 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3034 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3028 )
3035 )
3029 % (self.display_id, len(rawtext))
3036 % (self.display_id, len(rawtext))
3030 )
3037 )
3031
3038
3032 node = node or self.hash(rawtext, p1, p2)
3039 node = node or self.hash(rawtext, p1, p2)
3033 rev = self.index.get_rev(node)
3040 rev = self.index.get_rev(node)
3034 if rev is not None:
3041 if rev is not None:
3035 return rev
3042 return rev
3036
3043
3037 if validatehash:
3044 if validatehash:
3038 self.checkhash(rawtext, node, p1=p1, p2=p2)
3045 self.checkhash(rawtext, node, p1=p1, p2=p2)
3039
3046
3040 return self.addrawrevision(
3047 return self.addrawrevision(
3041 rawtext,
3048 rawtext,
3042 transaction,
3049 transaction,
3043 link,
3050 link,
3044 p1,
3051 p1,
3045 p2,
3052 p2,
3046 node,
3053 node,
3047 flags,
3054 flags,
3048 cachedelta=cachedelta,
3055 cachedelta=cachedelta,
3049 deltacomputer=deltacomputer,
3056 deltacomputer=deltacomputer,
3050 sidedata=sidedata,
3057 sidedata=sidedata,
3051 )
3058 )
3052
3059
3053 def addrawrevision(
3060 def addrawrevision(
3054 self,
3061 self,
3055 rawtext,
3062 rawtext,
3056 transaction,
3063 transaction,
3057 link,
3064 link,
3058 p1,
3065 p1,
3059 p2,
3066 p2,
3060 node,
3067 node,
3061 flags,
3068 flags,
3062 cachedelta=None,
3069 cachedelta=None,
3063 deltacomputer=None,
3070 deltacomputer=None,
3064 sidedata=None,
3071 sidedata=None,
3065 ):
3072 ):
3066 """add a raw revision with known flags, node and parents
3073 """add a raw revision with known flags, node and parents
3067 useful when reusing a revision not stored in this revlog (ex: received
3074 useful when reusing a revision not stored in this revlog (ex: received
3068 over wire, or read from an external bundle).
3075 over wire, or read from an external bundle).
3069 """
3076 """
3070 with self._writing(transaction):
3077 with self._writing(transaction):
3071 return self._addrevision(
3078 return self._addrevision(
3072 node,
3079 node,
3073 rawtext,
3080 rawtext,
3074 transaction,
3081 transaction,
3075 link,
3082 link,
3076 p1,
3083 p1,
3077 p2,
3084 p2,
3078 flags,
3085 flags,
3079 cachedelta,
3086 cachedelta,
3080 deltacomputer=deltacomputer,
3087 deltacomputer=deltacomputer,
3081 sidedata=sidedata,
3088 sidedata=sidedata,
3082 )
3089 )
3083
3090
3084 def compress(self, data):
3091 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
3085 return self._inner.compress(data)
3092 return self._inner.compress(data)
3086
3093
3087 def decompress(self, data):
3094 def decompress(self, data):
3088 return self._inner.decompress(data)
3095 return self._inner.decompress(data)
3089
3096
3090 def _addrevision(
3097 def _addrevision(
3091 self,
3098 self,
3092 node,
3099 node,
3093 rawtext,
3100 rawtext,
3094 transaction,
3101 transaction,
3095 link,
3102 link,
3096 p1,
3103 p1,
3097 p2,
3104 p2,
3098 flags,
3105 flags,
3099 cachedelta,
3106 cachedelta,
3100 alwayscache=False,
3107 alwayscache=False,
3101 deltacomputer=None,
3108 deltacomputer=None,
3102 sidedata=None,
3109 sidedata=None,
3103 ):
3110 ):
3104 """internal function to add revisions to the log
3111 """internal function to add revisions to the log
3105
3112
3106 see addrevision for argument descriptions.
3113 see addrevision for argument descriptions.
3107
3114
3108 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3115 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3109
3116
3110 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3117 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3111 be used.
3118 be used.
3112
3119
3113 invariants:
3120 invariants:
3114 - rawtext is optional (can be None); if not set, cachedelta must be set.
3121 - rawtext is optional (can be None); if not set, cachedelta must be set.
3115 if both are set, they must correspond to each other.
3122 if both are set, they must correspond to each other.
3116 """
3123 """
3117 if node == self.nullid:
3124 if node == self.nullid:
3118 raise error.RevlogError(
3125 raise error.RevlogError(
3119 _(b"%s: attempt to add null revision") % self.display_id
3126 _(b"%s: attempt to add null revision") % self.display_id
3120 )
3127 )
3121 if (
3128 if (
3122 node == self.nodeconstants.wdirid
3129 node == self.nodeconstants.wdirid
3123 or node in self.nodeconstants.wdirfilenodeids
3130 or node in self.nodeconstants.wdirfilenodeids
3124 ):
3131 ):
3125 raise error.RevlogError(
3132 raise error.RevlogError(
3126 _(b"%s: attempt to add wdir revision") % self.display_id
3133 _(b"%s: attempt to add wdir revision") % self.display_id
3127 )
3134 )
3128 if self._inner._writinghandles is None:
3135 if self._inner._writinghandles is None:
3129 msg = b'adding revision outside `revlog._writing` context'
3136 msg = b'adding revision outside `revlog._writing` context'
3130 raise error.ProgrammingError(msg)
3137 raise error.ProgrammingError(msg)
3131
3138
3132 btext = [rawtext]
3139 btext = [rawtext]
3133
3140
3134 curr = len(self)
3141 curr = len(self)
3135 prev = curr - 1
3142 prev = curr - 1
3136
3143
3137 offset = self._get_data_offset(prev)
3144 offset = self._get_data_offset(prev)
3138
3145
3139 if self._concurrencychecker:
3146 if self._concurrencychecker:
3140 ifh, dfh, sdfh = self._inner._writinghandles
3147 ifh, dfh, sdfh = self._inner._writinghandles
3141 # XXX no checking for the sidedata file
3148 # XXX no checking for the sidedata file
3142 if self._inline:
3149 if self._inline:
3143 # offset is "as if" it were in the .d file, so we need to add on
3150 # offset is "as if" it were in the .d file, so we need to add on
3144 # the size of the entry metadata.
3151 # the size of the entry metadata.
3145 self._concurrencychecker(
3152 self._concurrencychecker(
3146 ifh, self._indexfile, offset + curr * self.index.entry_size
3153 ifh, self._indexfile, offset + curr * self.index.entry_size
3147 )
3154 )
3148 else:
3155 else:
3149 # Entries in the .i are a consistent size.
3156 # Entries in the .i are a consistent size.
3150 self._concurrencychecker(
3157 self._concurrencychecker(
3151 ifh, self._indexfile, curr * self.index.entry_size
3158 ifh, self._indexfile, curr * self.index.entry_size
3152 )
3159 )
3153 self._concurrencychecker(dfh, self._datafile, offset)
3160 self._concurrencychecker(dfh, self._datafile, offset)
3154
3161
3155 p1r, p2r = self.rev(p1), self.rev(p2)
3162 p1r, p2r = self.rev(p1), self.rev(p2)
3156
3163
3157 # full versions are inserted when the needed deltas
3164 # full versions are inserted when the needed deltas
3158 # become comparable to the uncompressed text
3165 # become comparable to the uncompressed text
3159 if rawtext is None:
3166 if rawtext is None:
3160 # need rawtext size, before changed by flag processors, which is
3167 # need rawtext size, before changed by flag processors, which is
3161 # the non-raw size. use revlog explicitly to avoid filelog's extra
3168 # the non-raw size. use revlog explicitly to avoid filelog's extra
3162 # logic that might remove metadata size.
3169 # logic that might remove metadata size.
3163 textlen = mdiff.patchedsize(
3170 textlen = mdiff.patchedsize(
3164 revlog.size(self, cachedelta[0]), cachedelta[1]
3171 revlog.size(self, cachedelta[0]), cachedelta[1]
3165 )
3172 )
3166 else:
3173 else:
3167 textlen = len(rawtext)
3174 textlen = len(rawtext)
3168
3175
3169 if deltacomputer is None:
3176 if deltacomputer is None:
3170 write_debug = None
3177 write_debug = None
3171 if self.delta_config.debug_delta:
3178 if self.delta_config.debug_delta:
3172 write_debug = transaction._report
3179 write_debug = transaction._report
3173 deltacomputer = deltautil.deltacomputer(
3180 deltacomputer = deltautil.deltacomputer(
3174 self, write_debug=write_debug
3181 self, write_debug=write_debug
3175 )
3182 )
3176
3183
3177 if cachedelta is not None and len(cachedelta) == 2:
3184 if cachedelta is not None and len(cachedelta) == 2:
3178 # If the cached delta has no information about how it should be
3185 # If the cached delta has no information about how it should be
3179 # reused, add the default reuse instruction according to the
3186 # reused, add the default reuse instruction according to the
3180 # revlog's configuration.
3187 # revlog's configuration.
3181 if (
3188 if (
3182 self.delta_config.general_delta
3189 self.delta_config.general_delta
3183 and self.delta_config.lazy_delta_base
3190 and self.delta_config.lazy_delta_base
3184 ):
3191 ):
3185 delta_base_reuse = DELTA_BASE_REUSE_TRY
3192 delta_base_reuse = DELTA_BASE_REUSE_TRY
3186 else:
3193 else:
3187 delta_base_reuse = DELTA_BASE_REUSE_NO
3194 delta_base_reuse = DELTA_BASE_REUSE_NO
3188 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3195 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3189
3196
3190 revinfo = revlogutils.revisioninfo(
3197 revinfo = revlogutils.revisioninfo(
3191 node,
3198 node,
3192 p1,
3199 p1,
3193 p2,
3200 p2,
3194 btext,
3201 btext,
3195 textlen,
3202 textlen,
3196 cachedelta,
3203 cachedelta,
3197 flags,
3204 flags,
3198 )
3205 )
3199
3206
3200 deltainfo = deltacomputer.finddeltainfo(revinfo)
3207 deltainfo = deltacomputer.finddeltainfo(revinfo)
3201
3208
3202 compression_mode = COMP_MODE_INLINE
3209 compression_mode = COMP_MODE_INLINE
3203 if self._docket is not None:
3210 if self._docket is not None:
3204 default_comp = self._docket.default_compression_header
3211 default_comp = self._docket.default_compression_header
3205 r = deltautil.delta_compression(default_comp, deltainfo)
3212 r = deltautil.delta_compression(default_comp, deltainfo)
3206 compression_mode, deltainfo = r
3213 compression_mode, deltainfo = r
3207
3214
3208 sidedata_compression_mode = COMP_MODE_INLINE
3215 sidedata_compression_mode = COMP_MODE_INLINE
3209 if sidedata and self.feature_config.has_side_data:
3216 if sidedata and self.feature_config.has_side_data:
3210 sidedata_compression_mode = COMP_MODE_PLAIN
3217 sidedata_compression_mode = COMP_MODE_PLAIN
3211 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3218 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3212 sidedata_offset = self._docket.sidedata_end
3219 sidedata_offset = self._docket.sidedata_end
3213 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3220 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3214 if (
3221 if (
3215 h != b'u'
3222 h != b'u'
3216 and comp_sidedata[0:1] != b'\0'
3223 and comp_sidedata[0:1] != b'\0'
3217 and len(comp_sidedata) < len(serialized_sidedata)
3224 and len(comp_sidedata) < len(serialized_sidedata)
3218 ):
3225 ):
3219 assert not h
3226 assert not h
3220 if (
3227 if (
3221 comp_sidedata[0:1]
3228 comp_sidedata[0:1]
3222 == self._docket.default_compression_header
3229 == self._docket.default_compression_header
3223 ):
3230 ):
3224 sidedata_compression_mode = COMP_MODE_DEFAULT
3231 sidedata_compression_mode = COMP_MODE_DEFAULT
3225 serialized_sidedata = comp_sidedata
3232 serialized_sidedata = comp_sidedata
3226 else:
3233 else:
3227 sidedata_compression_mode = COMP_MODE_INLINE
3234 sidedata_compression_mode = COMP_MODE_INLINE
3228 serialized_sidedata = comp_sidedata
3235 serialized_sidedata = comp_sidedata
3229 else:
3236 else:
3230 serialized_sidedata = b""
3237 serialized_sidedata = b""
3231 # Don't store the offset if the sidedata is empty, that way
3238 # Don't store the offset if the sidedata is empty, that way
3232 # we can easily detect empty sidedata and they will be no different
3239 # we can easily detect empty sidedata and they will be no different
3233 # than ones we manually add.
3240 # than ones we manually add.
3234 sidedata_offset = 0
3241 sidedata_offset = 0
3235
3242
3236 rank = RANK_UNKNOWN
3243 rank = RANK_UNKNOWN
3237 if self.feature_config.compute_rank:
3244 if self.feature_config.compute_rank:
3238 if (p1r, p2r) == (nullrev, nullrev):
3245 if (p1r, p2r) == (nullrev, nullrev):
3239 rank = 1
3246 rank = 1
3240 elif p1r != nullrev and p2r == nullrev:
3247 elif p1r != nullrev and p2r == nullrev:
3241 rank = 1 + self.fast_rank(p1r)
3248 rank = 1 + self.fast_rank(p1r)
3242 elif p1r == nullrev and p2r != nullrev:
3249 elif p1r == nullrev and p2r != nullrev:
3243 rank = 1 + self.fast_rank(p2r)
3250 rank = 1 + self.fast_rank(p2r)
3244 else: # merge node
3251 else: # merge node
3245 if rustdagop is not None and self.index.rust_ext_compat:
3252 if rustdagop is not None and self.index.rust_ext_compat:
3246 rank = rustdagop.rank(self.index, p1r, p2r)
3253 rank = rustdagop.rank(self.index, p1r, p2r)
3247 else:
3254 else:
3248 pmin, pmax = sorted((p1r, p2r))
3255 pmin, pmax = sorted((p1r, p2r))
3249 rank = 1 + self.fast_rank(pmax)
3256 rank = 1 + self.fast_rank(pmax)
3250 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3257 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3251
3258
3252 e = revlogutils.entry(
3259 e = revlogutils.entry(
3253 flags=flags,
3260 flags=flags,
3254 data_offset=offset,
3261 data_offset=offset,
3255 data_compressed_length=deltainfo.deltalen,
3262 data_compressed_length=deltainfo.deltalen,
3256 data_uncompressed_length=textlen,
3263 data_uncompressed_length=textlen,
3257 data_compression_mode=compression_mode,
3264 data_compression_mode=compression_mode,
3258 data_delta_base=deltainfo.base,
3265 data_delta_base=deltainfo.base,
3259 link_rev=link,
3266 link_rev=link,
3260 parent_rev_1=p1r,
3267 parent_rev_1=p1r,
3261 parent_rev_2=p2r,
3268 parent_rev_2=p2r,
3262 node_id=node,
3269 node_id=node,
3263 sidedata_offset=sidedata_offset,
3270 sidedata_offset=sidedata_offset,
3264 sidedata_compressed_length=len(serialized_sidedata),
3271 sidedata_compressed_length=len(serialized_sidedata),
3265 sidedata_compression_mode=sidedata_compression_mode,
3272 sidedata_compression_mode=sidedata_compression_mode,
3266 rank=rank,
3273 rank=rank,
3267 )
3274 )
3268
3275
3269 self.index.append(e)
3276 self.index.append(e)
3270 entry = self.index.entry_binary(curr)
3277 entry = self.index.entry_binary(curr)
3271 if curr == 0 and self._docket is None:
3278 if curr == 0 and self._docket is None:
3272 header = self._format_flags | self._format_version
3279 header = self._format_flags | self._format_version
3273 header = self.index.pack_header(header)
3280 header = self.index.pack_header(header)
3274 entry = header + entry
3281 entry = header + entry
3275 self._writeentry(
3282 self._writeentry(
3276 transaction,
3283 transaction,
3277 entry,
3284 entry,
3278 deltainfo.data,
3285 deltainfo.data,
3279 link,
3286 link,
3280 offset,
3287 offset,
3281 serialized_sidedata,
3288 serialized_sidedata,
3282 sidedata_offset,
3289 sidedata_offset,
3283 )
3290 )
3284
3291
3285 rawtext = btext[0]
3292 rawtext = btext[0]
3286
3293
3287 if alwayscache and rawtext is None:
3294 if alwayscache and rawtext is None:
3288 rawtext = deltacomputer.buildtext(revinfo)
3295 rawtext = deltacomputer.buildtext(revinfo)
3289
3296
3290 if type(rawtext) == bytes: # only accept immutable objects
3297 if type(rawtext) == bytes: # only accept immutable objects
3291 self._inner._revisioncache = (node, curr, rawtext)
3298 self._inner._revisioncache = (node, curr, rawtext)
3292 self._chainbasecache[curr] = deltainfo.chainbase
3299 self._chainbasecache[curr] = deltainfo.chainbase
3293 return curr
3300 return curr
3294
3301
3295 def _get_data_offset(self, prev):
3302 def _get_data_offset(self, prev):
3296 """Returns the current offset in the (in-transaction) data file.
3303 """Returns the current offset in the (in-transaction) data file.
3297 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3304 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3298 file to store that information: since sidedata can be rewritten to the
3305 file to store that information: since sidedata can be rewritten to the
3299 end of the data file within a transaction, you can have cases where, for
3306 end of the data file within a transaction, you can have cases where, for
3300 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3307 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3301 to `n - 1`'s sidedata being written after `n`'s data.
3308 to `n - 1`'s sidedata being written after `n`'s data.
3302
3309
3303 TODO cache this in a docket file before getting out of experimental."""
3310 TODO cache this in a docket file before getting out of experimental."""
3304 if self._docket is None:
3311 if self._docket is None:
3305 return self.end(prev)
3312 return self.end(prev)
3306 else:
3313 else:
3307 return self._docket.data_end
3314 return self._docket.data_end
3308
3315
3309 def _writeentry(
3316 def _writeentry(
3310 self,
3317 self,
3311 transaction,
3318 transaction,
3312 entry,
3319 entry,
3313 data,
3320 data,
3314 link,
3321 link,
3315 offset,
3322 offset,
3316 sidedata,
3323 sidedata,
3317 sidedata_offset,
3324 sidedata_offset,
3318 ):
3325 ):
3319 # Files opened in a+ mode have inconsistent behavior on various
3326 # Files opened in a+ mode have inconsistent behavior on various
3320 # platforms. Windows requires that a file positioning call be made
3327 # platforms. Windows requires that a file positioning call be made
3321 # when the file handle transitions between reads and writes. See
3328 # when the file handle transitions between reads and writes. See
3322 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3329 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3323 # platforms, Python or the platform itself can be buggy. Some versions
3330 # platforms, Python or the platform itself can be buggy. Some versions
3324 # of Solaris have been observed to not append at the end of the file
3331 # of Solaris have been observed to not append at the end of the file
3325 # if the file was seeked to before the end. See issue4943 for more.
3332 # if the file was seeked to before the end. See issue4943 for more.
3326 #
3333 #
3327 # We work around this issue by inserting a seek() before writing.
3334 # We work around this issue by inserting a seek() before writing.
3328 # Note: This is likely not necessary on Python 3. However, because
3335 # Note: This is likely not necessary on Python 3. However, because
3329 # the file handle is reused for reads and may be seeked there, we need
3336 # the file handle is reused for reads and may be seeked there, we need
3330 # to be careful before changing this.
3337 # to be careful before changing this.
3331 index_end = data_end = sidedata_end = None
3338 index_end = data_end = sidedata_end = None
3332 if self._docket is not None:
3339 if self._docket is not None:
3333 index_end = self._docket.index_end
3340 index_end = self._docket.index_end
3334 data_end = self._docket.data_end
3341 data_end = self._docket.data_end
3335 sidedata_end = self._docket.sidedata_end
3342 sidedata_end = self._docket.sidedata_end
3336
3343
3337 files_end = self._inner.write_entry(
3344 files_end = self._inner.write_entry(
3338 transaction,
3345 transaction,
3339 entry,
3346 entry,
3340 data,
3347 data,
3341 link,
3348 link,
3342 offset,
3349 offset,
3343 sidedata,
3350 sidedata,
3344 sidedata_offset,
3351 sidedata_offset,
3345 index_end,
3352 index_end,
3346 data_end,
3353 data_end,
3347 sidedata_end,
3354 sidedata_end,
3348 )
3355 )
3349 self._enforceinlinesize(transaction)
3356 self._enforceinlinesize(transaction)
3350 if self._docket is not None:
3357 if self._docket is not None:
3351 self._docket.index_end = files_end[0]
3358 self._docket.index_end = files_end[0]
3352 self._docket.data_end = files_end[1]
3359 self._docket.data_end = files_end[1]
3353 self._docket.sidedata_end = files_end[2]
3360 self._docket.sidedata_end = files_end[2]
3354
3361
3355 nodemaputil.setup_persistent_nodemap(transaction, self)
3362 nodemaputil.setup_persistent_nodemap(transaction, self)
3356
3363
3357 def addgroup(
3364 def addgroup(
3358 self,
3365 self,
3359 deltas,
3366 deltas,
3360 linkmapper,
3367 linkmapper,
3361 transaction,
3368 transaction,
3362 alwayscache=False,
3369 alwayscache=False,
3363 addrevisioncb=None,
3370 addrevisioncb=None,
3364 duplicaterevisioncb=None,
3371 duplicaterevisioncb=None,
3365 debug_info=None,
3372 debug_info=None,
3366 delta_base_reuse_policy=None,
3373 delta_base_reuse_policy=None,
3367 ):
3374 ):
3368 """
3375 """
3369 add a delta group
3376 add a delta group
3370
3377
3371 given a set of deltas, add them to the revision log. the
3378 given a set of deltas, add them to the revision log. the
3372 first delta is against its parent, which should be in our
3379 first delta is against its parent, which should be in our
3373 log, the rest are against the previous delta.
3380 log, the rest are against the previous delta.
3374
3381
3375 If ``addrevisioncb`` is defined, it will be called with arguments of
3382 If ``addrevisioncb`` is defined, it will be called with arguments of
3376 this revlog and the node that was added.
3383 this revlog and the node that was added.
3377 """
3384 """
3378
3385
3379 if self._adding_group:
3386 if self._adding_group:
3380 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3387 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3381
3388
3382 # read the default delta-base reuse policy from revlog config if the
3389 # read the default delta-base reuse policy from revlog config if the
3383 # group did not specify one.
3390 # group did not specify one.
3384 if delta_base_reuse_policy is None:
3391 if delta_base_reuse_policy is None:
3385 if (
3392 if (
3386 self.delta_config.general_delta
3393 self.delta_config.general_delta
3387 and self.delta_config.lazy_delta_base
3394 and self.delta_config.lazy_delta_base
3388 ):
3395 ):
3389 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3396 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3390 else:
3397 else:
3391 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3398 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3392
3399
3393 self._adding_group = True
3400 self._adding_group = True
3394 empty = True
3401 empty = True
3395 try:
3402 try:
3396 with self._writing(transaction):
3403 with self._writing(transaction):
3397 write_debug = None
3404 write_debug = None
3398 if self.delta_config.debug_delta:
3405 if self.delta_config.debug_delta:
3399 write_debug = transaction._report
3406 write_debug = transaction._report
3400 deltacomputer = deltautil.deltacomputer(
3407 deltacomputer = deltautil.deltacomputer(
3401 self,
3408 self,
3402 write_debug=write_debug,
3409 write_debug=write_debug,
3403 debug_info=debug_info,
3410 debug_info=debug_info,
3404 )
3411 )
3405 # loop through our set of deltas
3412 # loop through our set of deltas
3406 for data in deltas:
3413 for data in deltas:
3407 (
3414 (
3408 node,
3415 node,
3409 p1,
3416 p1,
3410 p2,
3417 p2,
3411 linknode,
3418 linknode,
3412 deltabase,
3419 deltabase,
3413 delta,
3420 delta,
3414 flags,
3421 flags,
3415 sidedata,
3422 sidedata,
3416 ) = data
3423 ) = data
3417 link = linkmapper(linknode)
3424 link = linkmapper(linknode)
3418 flags = flags or REVIDX_DEFAULT_FLAGS
3425 flags = flags or REVIDX_DEFAULT_FLAGS
3419
3426
3420 rev = self.index.get_rev(node)
3427 rev = self.index.get_rev(node)
3421 if rev is not None:
3428 if rev is not None:
3422 # this can happen if two branches make the same change
3429 # this can happen if two branches make the same change
3423 self._nodeduplicatecallback(transaction, rev)
3430 self._nodeduplicatecallback(transaction, rev)
3424 if duplicaterevisioncb:
3431 if duplicaterevisioncb:
3425 duplicaterevisioncb(self, rev)
3432 duplicaterevisioncb(self, rev)
3426 empty = False
3433 empty = False
3427 continue
3434 continue
3428
3435
3429 for p in (p1, p2):
3436 for p in (p1, p2):
3430 if not self.index.has_node(p):
3437 if not self.index.has_node(p):
3431 raise error.LookupError(
3438 raise error.LookupError(
3432 p, self.radix, _(b'unknown parent')
3439 p, self.radix, _(b'unknown parent')
3433 )
3440 )
3434
3441
3435 if not self.index.has_node(deltabase):
3442 if not self.index.has_node(deltabase):
3436 raise error.LookupError(
3443 raise error.LookupError(
3437 deltabase, self.display_id, _(b'unknown delta base')
3444 deltabase, self.display_id, _(b'unknown delta base')
3438 )
3445 )
3439
3446
3440 baserev = self.rev(deltabase)
3447 baserev = self.rev(deltabase)
3441
3448
3442 if baserev != nullrev and self.iscensored(baserev):
3449 if baserev != nullrev and self.iscensored(baserev):
3443 # if base is censored, delta must be full replacement in a
3450 # if base is censored, delta must be full replacement in a
3444 # single patch operation
3451 # single patch operation
3445 hlen = struct.calcsize(b">lll")
3452 hlen = struct.calcsize(b">lll")
3446 oldlen = self.rawsize(baserev)
3453 oldlen = self.rawsize(baserev)
3447 newlen = len(delta) - hlen
3454 newlen = len(delta) - hlen
3448 if delta[:hlen] != mdiff.replacediffheader(
3455 if delta[:hlen] != mdiff.replacediffheader(
3449 oldlen, newlen
3456 oldlen, newlen
3450 ):
3457 ):
3451 raise error.CensoredBaseError(
3458 raise error.CensoredBaseError(
3452 self.display_id, self.node(baserev)
3459 self.display_id, self.node(baserev)
3453 )
3460 )
3454
3461
3455 if not flags and self._peek_iscensored(baserev, delta):
3462 if not flags and self._peek_iscensored(baserev, delta):
3456 flags |= REVIDX_ISCENSORED
3463 flags |= REVIDX_ISCENSORED
3457
3464
3458 # We assume consumers of addrevisioncb will want to retrieve
3465 # We assume consumers of addrevisioncb will want to retrieve
3459 # the added revision, which will require a call to
3466 # the added revision, which will require a call to
3460 # revision(). revision() will fast path if there is a cache
3467 # revision(). revision() will fast path if there is a cache
3461 # hit. So, we tell _addrevision() to always cache in this case.
3468 # hit. So, we tell _addrevision() to always cache in this case.
3462 # We're only using addgroup() in the context of changegroup
3469 # We're only using addgroup() in the context of changegroup
3463 # generation so the revision data can always be handled as raw
3470 # generation so the revision data can always be handled as raw
3464 # by the flagprocessor.
3471 # by the flagprocessor.
3465 rev = self._addrevision(
3472 rev = self._addrevision(
3466 node,
3473 node,
3467 None,
3474 None,
3468 transaction,
3475 transaction,
3469 link,
3476 link,
3470 p1,
3477 p1,
3471 p2,
3478 p2,
3472 flags,
3479 flags,
3473 (baserev, delta, delta_base_reuse_policy),
3480 (baserev, delta, delta_base_reuse_policy),
3474 alwayscache=alwayscache,
3481 alwayscache=alwayscache,
3475 deltacomputer=deltacomputer,
3482 deltacomputer=deltacomputer,
3476 sidedata=sidedata,
3483 sidedata=sidedata,
3477 )
3484 )
3478
3485
3479 if addrevisioncb:
3486 if addrevisioncb:
3480 addrevisioncb(self, rev)
3487 addrevisioncb(self, rev)
3481 empty = False
3488 empty = False
3482 finally:
3489 finally:
3483 self._adding_group = False
3490 self._adding_group = False
3484 return not empty
3491 return not empty
3485
3492
3486 def iscensored(self, rev):
3493 def iscensored(self, rev):
3487 """Check if a file revision is censored."""
3494 """Check if a file revision is censored."""
3488 if not self.feature_config.censorable:
3495 if not self.feature_config.censorable:
3489 return False
3496 return False
3490
3497
3491 return self.flags(rev) & REVIDX_ISCENSORED
3498 return self.flags(rev) & REVIDX_ISCENSORED
3492
3499
3493 def _peek_iscensored(self, baserev, delta):
3500 def _peek_iscensored(self, baserev, delta):
3494 """Quickly check if a delta produces a censored revision."""
3501 """Quickly check if a delta produces a censored revision."""
3495 if not self.feature_config.censorable:
3502 if not self.feature_config.censorable:
3496 return False
3503 return False
3497
3504
3498 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3505 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3499
3506
3500 def getstrippoint(self, minlink):
3507 def getstrippoint(self, minlink):
3501 """find the minimum rev that must be stripped to strip the linkrev
3508 """find the minimum rev that must be stripped to strip the linkrev
3502
3509
3503 Returns a tuple containing the minimum rev and a set of all revs that
3510 Returns a tuple containing the minimum rev and a set of all revs that
3504 have linkrevs that will be broken by this strip.
3511 have linkrevs that will be broken by this strip.
3505 """
3512 """
3506 return storageutil.resolvestripinfo(
3513 return storageutil.resolvestripinfo(
3507 minlink,
3514 minlink,
3508 len(self) - 1,
3515 len(self) - 1,
3509 self.headrevs(),
3516 self.headrevs(),
3510 self.linkrev,
3517 self.linkrev,
3511 self.parentrevs,
3518 self.parentrevs,
3512 )
3519 )
3513
3520
3514 def strip(self, minlink, transaction):
3521 def strip(self, minlink, transaction):
3515 """truncate the revlog on the first revision with a linkrev >= minlink
3522 """truncate the revlog on the first revision with a linkrev >= minlink
3516
3523
3517 This function is called when we're stripping revision minlink and
3524 This function is called when we're stripping revision minlink and
3518 its descendants from the repository.
3525 its descendants from the repository.
3519
3526
3520 We have to remove all revisions with linkrev >= minlink, because
3527 We have to remove all revisions with linkrev >= minlink, because
3521 the equivalent changelog revisions will be renumbered after the
3528 the equivalent changelog revisions will be renumbered after the
3522 strip.
3529 strip.
3523
3530
3524 So we truncate the revlog on the first of these revisions, and
3531 So we truncate the revlog on the first of these revisions, and
3525 trust that the caller has saved the revisions that shouldn't be
3532 trust that the caller has saved the revisions that shouldn't be
3526 removed and that it'll re-add them after this truncation.
3533 removed and that it'll re-add them after this truncation.
3527 """
3534 """
3528 if len(self) == 0:
3535 if len(self) == 0:
3529 return
3536 return
3530
3537
3531 rev, _ = self.getstrippoint(minlink)
3538 rev, _ = self.getstrippoint(minlink)
3532 if rev == len(self):
3539 if rev == len(self):
3533 return
3540 return
3534
3541
3535 # first truncate the files on disk
3542 # first truncate the files on disk
3536 data_end = self.start(rev)
3543 data_end = self.start(rev)
3537 if not self._inline:
3544 if not self._inline:
3538 transaction.add(self._datafile, data_end)
3545 transaction.add(self._datafile, data_end)
3539 end = rev * self.index.entry_size
3546 end = rev * self.index.entry_size
3540 else:
3547 else:
3541 end = data_end + (rev * self.index.entry_size)
3548 end = data_end + (rev * self.index.entry_size)
3542
3549
3543 if self._sidedatafile:
3550 if self._sidedatafile:
3544 sidedata_end = self.sidedata_cut_off(rev)
3551 sidedata_end = self.sidedata_cut_off(rev)
3545 transaction.add(self._sidedatafile, sidedata_end)
3552 transaction.add(self._sidedatafile, sidedata_end)
3546
3553
3547 transaction.add(self._indexfile, end)
3554 transaction.add(self._indexfile, end)
3548 if self._docket is not None:
3555 if self._docket is not None:
3549 # XXX we could, leverage the docket while stripping. However it is
3556 # XXX we could, leverage the docket while stripping. However it is
3550 # not powerfull enough at the time of this comment
3557 # not powerfull enough at the time of this comment
3551 self._docket.index_end = end
3558 self._docket.index_end = end
3552 self._docket.data_end = data_end
3559 self._docket.data_end = data_end
3553 self._docket.sidedata_end = sidedata_end
3560 self._docket.sidedata_end = sidedata_end
3554 self._docket.write(transaction, stripping=True)
3561 self._docket.write(transaction, stripping=True)
3555
3562
3556 # then reset internal state in memory to forget those revisions
3563 # then reset internal state in memory to forget those revisions
3557 self._chaininfocache = util.lrucachedict(500)
3564 self._chaininfocache = util.lrucachedict(500)
3558 self._inner.clear_cache()
3565 self._inner.clear_cache()
3559
3566
3560 del self.index[rev:-1]
3567 del self.index[rev:-1]
3561
3568
3562 def checksize(self):
3569 def checksize(self):
3563 """Check size of index and data files
3570 """Check size of index and data files
3564
3571
3565 return a (dd, di) tuple.
3572 return a (dd, di) tuple.
3566 - dd: extra bytes for the "data" file
3573 - dd: extra bytes for the "data" file
3567 - di: extra bytes for the "index" file
3574 - di: extra bytes for the "index" file
3568
3575
3569 A healthy revlog will return (0, 0).
3576 A healthy revlog will return (0, 0).
3570 """
3577 """
3571 expected = 0
3578 expected = 0
3572 if len(self):
3579 if len(self):
3573 expected = max(0, self.end(len(self) - 1))
3580 expected = max(0, self.end(len(self) - 1))
3574
3581
3575 try:
3582 try:
3576 with self._datafp() as f:
3583 with self._datafp() as f:
3577 f.seek(0, io.SEEK_END)
3584 f.seek(0, io.SEEK_END)
3578 actual = f.tell()
3585 actual = f.tell()
3579 dd = actual - expected
3586 dd = actual - expected
3580 except FileNotFoundError:
3587 except FileNotFoundError:
3581 dd = 0
3588 dd = 0
3582
3589
3583 try:
3590 try:
3584 f = self.opener(self._indexfile)
3591 f = self.opener(self._indexfile)
3585 f.seek(0, io.SEEK_END)
3592 f.seek(0, io.SEEK_END)
3586 actual = f.tell()
3593 actual = f.tell()
3587 f.close()
3594 f.close()
3588 s = self.index.entry_size
3595 s = self.index.entry_size
3589 i = max(0, actual // s)
3596 i = max(0, actual // s)
3590 di = actual - (i * s)
3597 di = actual - (i * s)
3591 if self._inline:
3598 if self._inline:
3592 databytes = 0
3599 databytes = 0
3593 for r in self:
3600 for r in self:
3594 databytes += max(0, self.length(r))
3601 databytes += max(0, self.length(r))
3595 dd = 0
3602 dd = 0
3596 di = actual - len(self) * s - databytes
3603 di = actual - len(self) * s - databytes
3597 except FileNotFoundError:
3604 except FileNotFoundError:
3598 di = 0
3605 di = 0
3599
3606
3600 return (dd, di)
3607 return (dd, di)
3601
3608
3602 def files(self):
3609 def files(self):
3603 """return list of files that compose this revlog"""
3610 """return list of files that compose this revlog"""
3604 res = [self._indexfile]
3611 res = [self._indexfile]
3605 if self._docket_file is None:
3612 if self._docket_file is None:
3606 if not self._inline:
3613 if not self._inline:
3607 res.append(self._datafile)
3614 res.append(self._datafile)
3608 else:
3615 else:
3609 res.append(self._docket_file)
3616 res.append(self._docket_file)
3610 res.extend(self._docket.old_index_filepaths(include_empty=False))
3617 res.extend(self._docket.old_index_filepaths(include_empty=False))
3611 if self._docket.data_end:
3618 if self._docket.data_end:
3612 res.append(self._datafile)
3619 res.append(self._datafile)
3613 res.extend(self._docket.old_data_filepaths(include_empty=False))
3620 res.extend(self._docket.old_data_filepaths(include_empty=False))
3614 if self._docket.sidedata_end:
3621 if self._docket.sidedata_end:
3615 res.append(self._sidedatafile)
3622 res.append(self._sidedatafile)
3616 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3623 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3617 return res
3624 return res
3618
3625
3619 def emitrevisions(
3626 def emitrevisions(
3620 self,
3627 self,
3621 nodes,
3628 nodes,
3622 nodesorder=None,
3629 nodesorder=None,
3623 revisiondata=False,
3630 revisiondata=False,
3624 assumehaveparentrevisions=False,
3631 assumehaveparentrevisions=False,
3625 deltamode=repository.CG_DELTAMODE_STD,
3632 deltamode=repository.CG_DELTAMODE_STD,
3626 sidedata_helpers=None,
3633 sidedata_helpers=None,
3627 debug_info=None,
3634 debug_info=None,
3628 ):
3635 ):
3629 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3636 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3630 raise error.ProgrammingError(
3637 raise error.ProgrammingError(
3631 b'unhandled value for nodesorder: %s' % nodesorder
3638 b'unhandled value for nodesorder: %s' % nodesorder
3632 )
3639 )
3633
3640
3634 if nodesorder is None and not self.delta_config.general_delta:
3641 if nodesorder is None and not self.delta_config.general_delta:
3635 nodesorder = b'storage'
3642 nodesorder = b'storage'
3636
3643
3637 if (
3644 if (
3638 not self._storedeltachains
3645 not self._storedeltachains
3639 and deltamode != repository.CG_DELTAMODE_PREV
3646 and deltamode != repository.CG_DELTAMODE_PREV
3640 ):
3647 ):
3641 deltamode = repository.CG_DELTAMODE_FULL
3648 deltamode = repository.CG_DELTAMODE_FULL
3642
3649
3643 return storageutil.emitrevisions(
3650 return storageutil.emitrevisions(
3644 self,
3651 self,
3645 nodes,
3652 nodes,
3646 nodesorder,
3653 nodesorder,
3647 revlogrevisiondelta,
3654 revlogrevisiondelta,
3648 deltaparentfn=self.deltaparent,
3655 deltaparentfn=self.deltaparent,
3649 candeltafn=self._candelta,
3656 candeltafn=self._candelta,
3650 rawsizefn=self.rawsize,
3657 rawsizefn=self.rawsize,
3651 revdifffn=self.revdiff,
3658 revdifffn=self.revdiff,
3652 flagsfn=self.flags,
3659 flagsfn=self.flags,
3653 deltamode=deltamode,
3660 deltamode=deltamode,
3654 revisiondata=revisiondata,
3661 revisiondata=revisiondata,
3655 assumehaveparentrevisions=assumehaveparentrevisions,
3662 assumehaveparentrevisions=assumehaveparentrevisions,
3656 sidedata_helpers=sidedata_helpers,
3663 sidedata_helpers=sidedata_helpers,
3657 debug_info=debug_info,
3664 debug_info=debug_info,
3658 )
3665 )
3659
3666
3660 DELTAREUSEALWAYS = b'always'
3667 DELTAREUSEALWAYS = b'always'
3661 DELTAREUSESAMEREVS = b'samerevs'
3668 DELTAREUSESAMEREVS = b'samerevs'
3662 DELTAREUSENEVER = b'never'
3669 DELTAREUSENEVER = b'never'
3663
3670
3664 DELTAREUSEFULLADD = b'fulladd'
3671 DELTAREUSEFULLADD = b'fulladd'
3665
3672
3666 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3673 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3667
3674
3668 def clone(
3675 def clone(
3669 self,
3676 self,
3670 tr,
3677 tr,
3671 destrevlog,
3678 destrevlog,
3672 addrevisioncb=None,
3679 addrevisioncb=None,
3673 deltareuse=DELTAREUSESAMEREVS,
3680 deltareuse=DELTAREUSESAMEREVS,
3674 forcedeltabothparents=None,
3681 forcedeltabothparents=None,
3675 sidedata_helpers=None,
3682 sidedata_helpers=None,
3676 ):
3683 ):
3677 """Copy this revlog to another, possibly with format changes.
3684 """Copy this revlog to another, possibly with format changes.
3678
3685
3679 The destination revlog will contain the same revisions and nodes.
3686 The destination revlog will contain the same revisions and nodes.
3680 However, it may not be bit-for-bit identical due to e.g. delta encoding
3687 However, it may not be bit-for-bit identical due to e.g. delta encoding
3681 differences.
3688 differences.
3682
3689
3683 The ``deltareuse`` argument control how deltas from the existing revlog
3690 The ``deltareuse`` argument control how deltas from the existing revlog
3684 are preserved in the destination revlog. The argument can have the
3691 are preserved in the destination revlog. The argument can have the
3685 following values:
3692 following values:
3686
3693
3687 DELTAREUSEALWAYS
3694 DELTAREUSEALWAYS
3688 Deltas will always be reused (if possible), even if the destination
3695 Deltas will always be reused (if possible), even if the destination
3689 revlog would not select the same revisions for the delta. This is the
3696 revlog would not select the same revisions for the delta. This is the
3690 fastest mode of operation.
3697 fastest mode of operation.
3691 DELTAREUSESAMEREVS
3698 DELTAREUSESAMEREVS
3692 Deltas will be reused if the destination revlog would pick the same
3699 Deltas will be reused if the destination revlog would pick the same
3693 revisions for the delta. This mode strikes a balance between speed
3700 revisions for the delta. This mode strikes a balance between speed
3694 and optimization.
3701 and optimization.
3695 DELTAREUSENEVER
3702 DELTAREUSENEVER
3696 Deltas will never be reused. This is the slowest mode of execution.
3703 Deltas will never be reused. This is the slowest mode of execution.
3697 This mode can be used to recompute deltas (e.g. if the diff/delta
3704 This mode can be used to recompute deltas (e.g. if the diff/delta
3698 algorithm changes).
3705 algorithm changes).
3699 DELTAREUSEFULLADD
3706 DELTAREUSEFULLADD
3700 Revision will be re-added as if their were new content. This is
3707 Revision will be re-added as if their were new content. This is
3701 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3708 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3702 eg: large file detection and handling.
3709 eg: large file detection and handling.
3703
3710
3704 Delta computation can be slow, so the choice of delta reuse policy can
3711 Delta computation can be slow, so the choice of delta reuse policy can
3705 significantly affect run time.
3712 significantly affect run time.
3706
3713
3707 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3714 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3708 two extremes. Deltas will be reused if they are appropriate. But if the
3715 two extremes. Deltas will be reused if they are appropriate. But if the
3709 delta could choose a better revision, it will do so. This means if you
3716 delta could choose a better revision, it will do so. This means if you
3710 are converting a non-generaldelta revlog to a generaldelta revlog,
3717 are converting a non-generaldelta revlog to a generaldelta revlog,
3711 deltas will be recomputed if the delta's parent isn't a parent of the
3718 deltas will be recomputed if the delta's parent isn't a parent of the
3712 revision.
3719 revision.
3713
3720
3714 In addition to the delta policy, the ``forcedeltabothparents``
3721 In addition to the delta policy, the ``forcedeltabothparents``
3715 argument controls whether to force compute deltas against both parents
3722 argument controls whether to force compute deltas against both parents
3716 for merges. By default, the current default is used.
3723 for merges. By default, the current default is used.
3717
3724
3718 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3725 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3719 `sidedata_helpers`.
3726 `sidedata_helpers`.
3720 """
3727 """
3721 if deltareuse not in self.DELTAREUSEALL:
3728 if deltareuse not in self.DELTAREUSEALL:
3722 raise ValueError(
3729 raise ValueError(
3723 _(b'value for deltareuse invalid: %s') % deltareuse
3730 _(b'value for deltareuse invalid: %s') % deltareuse
3724 )
3731 )
3725
3732
3726 if len(destrevlog):
3733 if len(destrevlog):
3727 raise ValueError(_(b'destination revlog is not empty'))
3734 raise ValueError(_(b'destination revlog is not empty'))
3728
3735
3729 if getattr(self, 'filteredrevs', None):
3736 if getattr(self, 'filteredrevs', None):
3730 raise ValueError(_(b'source revlog has filtered revisions'))
3737 raise ValueError(_(b'source revlog has filtered revisions'))
3731 if getattr(destrevlog, 'filteredrevs', None):
3738 if getattr(destrevlog, 'filteredrevs', None):
3732 raise ValueError(_(b'destination revlog has filtered revisions'))
3739 raise ValueError(_(b'destination revlog has filtered revisions'))
3733
3740
3734 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3741 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3735 # if possible.
3742 # if possible.
3736 old_delta_config = destrevlog.delta_config
3743 old_delta_config = destrevlog.delta_config
3737 destrevlog.delta_config = destrevlog.delta_config.copy()
3744 destrevlog.delta_config = destrevlog.delta_config.copy()
3738
3745
3739 try:
3746 try:
3740 if deltareuse == self.DELTAREUSEALWAYS:
3747 if deltareuse == self.DELTAREUSEALWAYS:
3741 destrevlog.delta_config.lazy_delta_base = True
3748 destrevlog.delta_config.lazy_delta_base = True
3742 destrevlog.delta_config.lazy_delta = True
3749 destrevlog.delta_config.lazy_delta = True
3743 elif deltareuse == self.DELTAREUSESAMEREVS:
3750 elif deltareuse == self.DELTAREUSESAMEREVS:
3744 destrevlog.delta_config.lazy_delta_base = False
3751 destrevlog.delta_config.lazy_delta_base = False
3745 destrevlog.delta_config.lazy_delta = True
3752 destrevlog.delta_config.lazy_delta = True
3746 elif deltareuse == self.DELTAREUSENEVER:
3753 elif deltareuse == self.DELTAREUSENEVER:
3747 destrevlog.delta_config.lazy_delta_base = False
3754 destrevlog.delta_config.lazy_delta_base = False
3748 destrevlog.delta_config.lazy_delta = False
3755 destrevlog.delta_config.lazy_delta = False
3749
3756
3750 delta_both_parents = (
3757 delta_both_parents = (
3751 forcedeltabothparents or old_delta_config.delta_both_parents
3758 forcedeltabothparents or old_delta_config.delta_both_parents
3752 )
3759 )
3753 destrevlog.delta_config.delta_both_parents = delta_both_parents
3760 destrevlog.delta_config.delta_both_parents = delta_both_parents
3754
3761
3755 with self.reading(), destrevlog._writing(tr):
3762 with self.reading(), destrevlog._writing(tr):
3756 self._clone(
3763 self._clone(
3757 tr,
3764 tr,
3758 destrevlog,
3765 destrevlog,
3759 addrevisioncb,
3766 addrevisioncb,
3760 deltareuse,
3767 deltareuse,
3761 forcedeltabothparents,
3768 forcedeltabothparents,
3762 sidedata_helpers,
3769 sidedata_helpers,
3763 )
3770 )
3764
3771
3765 finally:
3772 finally:
3766 destrevlog.delta_config = old_delta_config
3773 destrevlog.delta_config = old_delta_config
3767
3774
3768 def _clone(
3775 def _clone(
3769 self,
3776 self,
3770 tr,
3777 tr,
3771 destrevlog,
3778 destrevlog,
3772 addrevisioncb,
3779 addrevisioncb,
3773 deltareuse,
3780 deltareuse,
3774 forcedeltabothparents,
3781 forcedeltabothparents,
3775 sidedata_helpers,
3782 sidedata_helpers,
3776 ):
3783 ):
3777 """perform the core duty of `revlog.clone` after parameter processing"""
3784 """perform the core duty of `revlog.clone` after parameter processing"""
3778 write_debug = None
3785 write_debug = None
3779 if self.delta_config.debug_delta:
3786 if self.delta_config.debug_delta:
3780 write_debug = tr._report
3787 write_debug = tr._report
3781 deltacomputer = deltautil.deltacomputer(
3788 deltacomputer = deltautil.deltacomputer(
3782 destrevlog,
3789 destrevlog,
3783 write_debug=write_debug,
3790 write_debug=write_debug,
3784 )
3791 )
3785 index = self.index
3792 index = self.index
3786 for rev in self:
3793 for rev in self:
3787 entry = index[rev]
3794 entry = index[rev]
3788
3795
3789 # Some classes override linkrev to take filtered revs into
3796 # Some classes override linkrev to take filtered revs into
3790 # account. Use raw entry from index.
3797 # account. Use raw entry from index.
3791 flags = entry[0] & 0xFFFF
3798 flags = entry[0] & 0xFFFF
3792 linkrev = entry[4]
3799 linkrev = entry[4]
3793 p1 = index[entry[5]][7]
3800 p1 = index[entry[5]][7]
3794 p2 = index[entry[6]][7]
3801 p2 = index[entry[6]][7]
3795 node = entry[7]
3802 node = entry[7]
3796
3803
3797 # (Possibly) reuse the delta from the revlog if allowed and
3804 # (Possibly) reuse the delta from the revlog if allowed and
3798 # the revlog chunk is a delta.
3805 # the revlog chunk is a delta.
3799 cachedelta = None
3806 cachedelta = None
3800 rawtext = None
3807 rawtext = None
3801 if deltareuse == self.DELTAREUSEFULLADD:
3808 if deltareuse == self.DELTAREUSEFULLADD:
3802 text = self._revisiondata(rev)
3809 text = self._revisiondata(rev)
3803 sidedata = self.sidedata(rev)
3810 sidedata = self.sidedata(rev)
3804
3811
3805 if sidedata_helpers is not None:
3812 if sidedata_helpers is not None:
3806 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3813 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3807 self, sidedata_helpers, sidedata, rev
3814 self, sidedata_helpers, sidedata, rev
3808 )
3815 )
3809 flags = flags | new_flags[0] & ~new_flags[1]
3816 flags = flags | new_flags[0] & ~new_flags[1]
3810
3817
3811 destrevlog.addrevision(
3818 destrevlog.addrevision(
3812 text,
3819 text,
3813 tr,
3820 tr,
3814 linkrev,
3821 linkrev,
3815 p1,
3822 p1,
3816 p2,
3823 p2,
3817 cachedelta=cachedelta,
3824 cachedelta=cachedelta,
3818 node=node,
3825 node=node,
3819 flags=flags,
3826 flags=flags,
3820 deltacomputer=deltacomputer,
3827 deltacomputer=deltacomputer,
3821 sidedata=sidedata,
3828 sidedata=sidedata,
3822 )
3829 )
3823 else:
3830 else:
3824 if destrevlog.delta_config.lazy_delta:
3831 if destrevlog.delta_config.lazy_delta:
3825 dp = self.deltaparent(rev)
3832 dp = self.deltaparent(rev)
3826 if dp != nullrev:
3833 if dp != nullrev:
3827 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3834 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3828
3835
3829 sidedata = None
3836 sidedata = None
3830 if not cachedelta:
3837 if not cachedelta:
3831 try:
3838 try:
3832 rawtext = self._revisiondata(rev)
3839 rawtext = self._revisiondata(rev)
3833 except error.CensoredNodeError as censored:
3840 except error.CensoredNodeError as censored:
3834 assert flags & REVIDX_ISCENSORED
3841 assert flags & REVIDX_ISCENSORED
3835 rawtext = censored.tombstone
3842 rawtext = censored.tombstone
3836 sidedata = self.sidedata(rev)
3843 sidedata = self.sidedata(rev)
3837 if sidedata is None:
3844 if sidedata is None:
3838 sidedata = self.sidedata(rev)
3845 sidedata = self.sidedata(rev)
3839
3846
3840 if sidedata_helpers is not None:
3847 if sidedata_helpers is not None:
3841 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3848 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3842 self, sidedata_helpers, sidedata, rev
3849 self, sidedata_helpers, sidedata, rev
3843 )
3850 )
3844 flags = flags | new_flags[0] & ~new_flags[1]
3851 flags = flags | new_flags[0] & ~new_flags[1]
3845
3852
3846 destrevlog._addrevision(
3853 destrevlog._addrevision(
3847 node,
3854 node,
3848 rawtext,
3855 rawtext,
3849 tr,
3856 tr,
3850 linkrev,
3857 linkrev,
3851 p1,
3858 p1,
3852 p2,
3859 p2,
3853 flags,
3860 flags,
3854 cachedelta,
3861 cachedelta,
3855 deltacomputer=deltacomputer,
3862 deltacomputer=deltacomputer,
3856 sidedata=sidedata,
3863 sidedata=sidedata,
3857 )
3864 )
3858
3865
3859 if addrevisioncb:
3866 if addrevisioncb:
3860 addrevisioncb(self, rev, node)
3867 addrevisioncb(self, rev, node)
3861
3868
3862 def censorrevision(self, tr, censor_nodes, tombstone=b''):
3869 def censorrevision(self, tr, censor_nodes, tombstone=b''):
3863 if self._format_version == REVLOGV0:
3870 if self._format_version == REVLOGV0:
3864 raise error.RevlogError(
3871 raise error.RevlogError(
3865 _(b'cannot censor with version %d revlogs')
3872 _(b'cannot censor with version %d revlogs')
3866 % self._format_version
3873 % self._format_version
3867 )
3874 )
3868 elif self._format_version == REVLOGV1:
3875 elif self._format_version == REVLOGV1:
3869 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
3876 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
3870 else:
3877 else:
3871 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
3878 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
3872
3879
3873 def verifyintegrity(self, state):
3880 def verifyintegrity(self, state):
3874 """Verifies the integrity of the revlog.
3881 """Verifies the integrity of the revlog.
3875
3882
3876 Yields ``revlogproblem`` instances describing problems that are
3883 Yields ``revlogproblem`` instances describing problems that are
3877 found.
3884 found.
3878 """
3885 """
3879 dd, di = self.checksize()
3886 dd, di = self.checksize()
3880 if dd:
3887 if dd:
3881 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3888 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3882 if di:
3889 if di:
3883 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3890 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3884
3891
3885 version = self._format_version
3892 version = self._format_version
3886
3893
3887 # The verifier tells us what version revlog we should be.
3894 # The verifier tells us what version revlog we should be.
3888 if version != state[b'expectedversion']:
3895 if version != state[b'expectedversion']:
3889 yield revlogproblem(
3896 yield revlogproblem(
3890 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3897 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3891 % (self.display_id, version, state[b'expectedversion'])
3898 % (self.display_id, version, state[b'expectedversion'])
3892 )
3899 )
3893
3900
3894 state[b'skipread'] = set()
3901 state[b'skipread'] = set()
3895 state[b'safe_renamed'] = set()
3902 state[b'safe_renamed'] = set()
3896
3903
3897 for rev in self:
3904 for rev in self:
3898 node = self.node(rev)
3905 node = self.node(rev)
3899
3906
3900 # Verify contents. 4 cases to care about:
3907 # Verify contents. 4 cases to care about:
3901 #
3908 #
3902 # common: the most common case
3909 # common: the most common case
3903 # rename: with a rename
3910 # rename: with a rename
3904 # meta: file content starts with b'\1\n', the metadata
3911 # meta: file content starts with b'\1\n', the metadata
3905 # header defined in filelog.py, but without a rename
3912 # header defined in filelog.py, but without a rename
3906 # ext: content stored externally
3913 # ext: content stored externally
3907 #
3914 #
3908 # More formally, their differences are shown below:
3915 # More formally, their differences are shown below:
3909 #
3916 #
3910 # | common | rename | meta | ext
3917 # | common | rename | meta | ext
3911 # -------------------------------------------------------
3918 # -------------------------------------------------------
3912 # flags() | 0 | 0 | 0 | not 0
3919 # flags() | 0 | 0 | 0 | not 0
3913 # renamed() | False | True | False | ?
3920 # renamed() | False | True | False | ?
3914 # rawtext[0:2]=='\1\n'| False | True | True | ?
3921 # rawtext[0:2]=='\1\n'| False | True | True | ?
3915 #
3922 #
3916 # "rawtext" means the raw text stored in revlog data, which
3923 # "rawtext" means the raw text stored in revlog data, which
3917 # could be retrieved by "rawdata(rev)". "text"
3924 # could be retrieved by "rawdata(rev)". "text"
3918 # mentioned below is "revision(rev)".
3925 # mentioned below is "revision(rev)".
3919 #
3926 #
3920 # There are 3 different lengths stored physically:
3927 # There are 3 different lengths stored physically:
3921 # 1. L1: rawsize, stored in revlog index
3928 # 1. L1: rawsize, stored in revlog index
3922 # 2. L2: len(rawtext), stored in revlog data
3929 # 2. L2: len(rawtext), stored in revlog data
3923 # 3. L3: len(text), stored in revlog data if flags==0, or
3930 # 3. L3: len(text), stored in revlog data if flags==0, or
3924 # possibly somewhere else if flags!=0
3931 # possibly somewhere else if flags!=0
3925 #
3932 #
3926 # L1 should be equal to L2. L3 could be different from them.
3933 # L1 should be equal to L2. L3 could be different from them.
3927 # "text" may or may not affect commit hash depending on flag
3934 # "text" may or may not affect commit hash depending on flag
3928 # processors (see flagutil.addflagprocessor).
3935 # processors (see flagutil.addflagprocessor).
3929 #
3936 #
3930 # | common | rename | meta | ext
3937 # | common | rename | meta | ext
3931 # -------------------------------------------------
3938 # -------------------------------------------------
3932 # rawsize() | L1 | L1 | L1 | L1
3939 # rawsize() | L1 | L1 | L1 | L1
3933 # size() | L1 | L2-LM | L1(*) | L1 (?)
3940 # size() | L1 | L2-LM | L1(*) | L1 (?)
3934 # len(rawtext) | L2 | L2 | L2 | L2
3941 # len(rawtext) | L2 | L2 | L2 | L2
3935 # len(text) | L2 | L2 | L2 | L3
3942 # len(text) | L2 | L2 | L2 | L3
3936 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3943 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3937 #
3944 #
3938 # LM: length of metadata, depending on rawtext
3945 # LM: length of metadata, depending on rawtext
3939 # (*): not ideal, see comment in filelog.size
3946 # (*): not ideal, see comment in filelog.size
3940 # (?): could be "- len(meta)" if the resolved content has
3947 # (?): could be "- len(meta)" if the resolved content has
3941 # rename metadata
3948 # rename metadata
3942 #
3949 #
3943 # Checks needed to be done:
3950 # Checks needed to be done:
3944 # 1. length check: L1 == L2, in all cases.
3951 # 1. length check: L1 == L2, in all cases.
3945 # 2. hash check: depending on flag processor, we may need to
3952 # 2. hash check: depending on flag processor, we may need to
3946 # use either "text" (external), or "rawtext" (in revlog).
3953 # use either "text" (external), or "rawtext" (in revlog).
3947
3954
3948 try:
3955 try:
3949 skipflags = state.get(b'skipflags', 0)
3956 skipflags = state.get(b'skipflags', 0)
3950 if skipflags:
3957 if skipflags:
3951 skipflags &= self.flags(rev)
3958 skipflags &= self.flags(rev)
3952
3959
3953 _verify_revision(self, skipflags, state, node)
3960 _verify_revision(self, skipflags, state, node)
3954
3961
3955 l1 = self.rawsize(rev)
3962 l1 = self.rawsize(rev)
3956 l2 = len(self.rawdata(node))
3963 l2 = len(self.rawdata(node))
3957
3964
3958 if l1 != l2:
3965 if l1 != l2:
3959 yield revlogproblem(
3966 yield revlogproblem(
3960 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3967 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3961 node=node,
3968 node=node,
3962 )
3969 )
3963
3970
3964 except error.CensoredNodeError:
3971 except error.CensoredNodeError:
3965 if state[b'erroroncensored']:
3972 if state[b'erroroncensored']:
3966 yield revlogproblem(
3973 yield revlogproblem(
3967 error=_(b'censored file data'), node=node
3974 error=_(b'censored file data'), node=node
3968 )
3975 )
3969 state[b'skipread'].add(node)
3976 state[b'skipread'].add(node)
3970 except Exception as e:
3977 except Exception as e:
3971 yield revlogproblem(
3978 yield revlogproblem(
3972 error=_(b'unpacking %s: %s')
3979 error=_(b'unpacking %s: %s')
3973 % (short(node), stringutil.forcebytestr(e)),
3980 % (short(node), stringutil.forcebytestr(e)),
3974 node=node,
3981 node=node,
3975 )
3982 )
3976 state[b'skipread'].add(node)
3983 state[b'skipread'].add(node)
3977
3984
3978 def storageinfo(
3985 def storageinfo(
3979 self,
3986 self,
3980 exclusivefiles=False,
3987 exclusivefiles=False,
3981 sharedfiles=False,
3988 sharedfiles=False,
3982 revisionscount=False,
3989 revisionscount=False,
3983 trackedsize=False,
3990 trackedsize=False,
3984 storedsize=False,
3991 storedsize=False,
3985 ):
3992 ):
3986 d = {}
3993 d = {}
3987
3994
3988 if exclusivefiles:
3995 if exclusivefiles:
3989 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3996 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3990 if not self._inline:
3997 if not self._inline:
3991 d[b'exclusivefiles'].append((self.opener, self._datafile))
3998 d[b'exclusivefiles'].append((self.opener, self._datafile))
3992
3999
3993 if sharedfiles:
4000 if sharedfiles:
3994 d[b'sharedfiles'] = []
4001 d[b'sharedfiles'] = []
3995
4002
3996 if revisionscount:
4003 if revisionscount:
3997 d[b'revisionscount'] = len(self)
4004 d[b'revisionscount'] = len(self)
3998
4005
3999 if trackedsize:
4006 if trackedsize:
4000 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4007 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4001
4008
4002 if storedsize:
4009 if storedsize:
4003 d[b'storedsize'] = sum(
4010 d[b'storedsize'] = sum(
4004 self.opener.stat(path).st_size for path in self.files()
4011 self.opener.stat(path).st_size for path in self.files()
4005 )
4012 )
4006
4013
4007 return d
4014 return d
4008
4015
4009 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4016 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4010 if not self.feature_config.has_side_data:
4017 if not self.feature_config.has_side_data:
4011 return
4018 return
4012 # revlog formats with sidedata support does not support inline
4019 # revlog formats with sidedata support does not support inline
4013 assert not self._inline
4020 assert not self._inline
4014 if not helpers[1] and not helpers[2]:
4021 if not helpers[1] and not helpers[2]:
4015 # Nothing to generate or remove
4022 # Nothing to generate or remove
4016 return
4023 return
4017
4024
4018 new_entries = []
4025 new_entries = []
4019 # append the new sidedata
4026 # append the new sidedata
4020 with self._writing(transaction):
4027 with self._writing(transaction):
4021 ifh, dfh, sdfh = self._inner._writinghandles
4028 ifh, dfh, sdfh = self._inner._writinghandles
4022 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4029 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4023
4030
4024 current_offset = sdfh.tell()
4031 current_offset = sdfh.tell()
4025 for rev in range(startrev, endrev + 1):
4032 for rev in range(startrev, endrev + 1):
4026 entry = self.index[rev]
4033 entry = self.index[rev]
4027 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4034 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4028 store=self,
4035 store=self,
4029 sidedata_helpers=helpers,
4036 sidedata_helpers=helpers,
4030 sidedata={},
4037 sidedata={},
4031 rev=rev,
4038 rev=rev,
4032 )
4039 )
4033
4040
4034 serialized_sidedata = sidedatautil.serialize_sidedata(
4041 serialized_sidedata = sidedatautil.serialize_sidedata(
4035 new_sidedata
4042 new_sidedata
4036 )
4043 )
4037
4044
4038 sidedata_compression_mode = COMP_MODE_INLINE
4045 sidedata_compression_mode = COMP_MODE_INLINE
4039 if serialized_sidedata and self.feature_config.has_side_data:
4046 if serialized_sidedata and self.feature_config.has_side_data:
4040 sidedata_compression_mode = COMP_MODE_PLAIN
4047 sidedata_compression_mode = COMP_MODE_PLAIN
4041 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4048 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4042 if (
4049 if (
4043 h != b'u'
4050 h != b'u'
4044 and comp_sidedata[0] != b'\0'
4051 and comp_sidedata[0] != b'\0'
4045 and len(comp_sidedata) < len(serialized_sidedata)
4052 and len(comp_sidedata) < len(serialized_sidedata)
4046 ):
4053 ):
4047 assert not h
4054 assert not h
4048 if (
4055 if (
4049 comp_sidedata[0]
4056 comp_sidedata[0]
4050 == self._docket.default_compression_header
4057 == self._docket.default_compression_header
4051 ):
4058 ):
4052 sidedata_compression_mode = COMP_MODE_DEFAULT
4059 sidedata_compression_mode = COMP_MODE_DEFAULT
4053 serialized_sidedata = comp_sidedata
4060 serialized_sidedata = comp_sidedata
4054 else:
4061 else:
4055 sidedata_compression_mode = COMP_MODE_INLINE
4062 sidedata_compression_mode = COMP_MODE_INLINE
4056 serialized_sidedata = comp_sidedata
4063 serialized_sidedata = comp_sidedata
4057 if entry[8] != 0 or entry[9] != 0:
4064 if entry[8] != 0 or entry[9] != 0:
4058 # rewriting entries that already have sidedata is not
4065 # rewriting entries that already have sidedata is not
4059 # supported yet, because it introduces garbage data in the
4066 # supported yet, because it introduces garbage data in the
4060 # revlog.
4067 # revlog.
4061 msg = b"rewriting existing sidedata is not supported yet"
4068 msg = b"rewriting existing sidedata is not supported yet"
4062 raise error.Abort(msg)
4069 raise error.Abort(msg)
4063
4070
4064 # Apply (potential) flags to add and to remove after running
4071 # Apply (potential) flags to add and to remove after running
4065 # the sidedata helpers
4072 # the sidedata helpers
4066 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4073 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4067 entry_update = (
4074 entry_update = (
4068 current_offset,
4075 current_offset,
4069 len(serialized_sidedata),
4076 len(serialized_sidedata),
4070 new_offset_flags,
4077 new_offset_flags,
4071 sidedata_compression_mode,
4078 sidedata_compression_mode,
4072 )
4079 )
4073
4080
4074 # the sidedata computation might have move the file cursors around
4081 # the sidedata computation might have move the file cursors around
4075 sdfh.seek(current_offset, os.SEEK_SET)
4082 sdfh.seek(current_offset, os.SEEK_SET)
4076 sdfh.write(serialized_sidedata)
4083 sdfh.write(serialized_sidedata)
4077 new_entries.append(entry_update)
4084 new_entries.append(entry_update)
4078 current_offset += len(serialized_sidedata)
4085 current_offset += len(serialized_sidedata)
4079 self._docket.sidedata_end = sdfh.tell()
4086 self._docket.sidedata_end = sdfh.tell()
4080
4087
4081 # rewrite the new index entries
4088 # rewrite the new index entries
4082 ifh.seek(startrev * self.index.entry_size)
4089 ifh.seek(startrev * self.index.entry_size)
4083 for i, e in enumerate(new_entries):
4090 for i, e in enumerate(new_entries):
4084 rev = startrev + i
4091 rev = startrev + i
4085 self.index.replace_sidedata_info(rev, *e)
4092 self.index.replace_sidedata_info(rev, *e)
4086 packed = self.index.entry_binary(rev)
4093 packed = self.index.entry_binary(rev)
4087 if rev == 0 and self._docket is None:
4094 if rev == 0 and self._docket is None:
4088 header = self._format_flags | self._format_version
4095 header = self._format_flags | self._format_version
4089 header = self.index.pack_header(header)
4096 header = self.index.pack_header(header)
4090 packed = header + packed
4097 packed = header + packed
4091 ifh.write(packed)
4098 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now