##// END OF EJS Templates
revlog: minor refactor in the chunk gather process...
marmoute -
r52000:c2d2e5b6 default
parent child Browse files
Show More
@@ -1,4170 +1,4174 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of inline data embedded into a revlog
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class _Config:
244 class _Config:
245 def copy(self):
245 def copy(self):
246 return self.__class__(**self.__dict__)
246 return self.__class__(**self.__dict__)
247
247
248
248
249 @attr.s()
249 @attr.s()
250 class FeatureConfig(_Config):
250 class FeatureConfig(_Config):
251 """Hold configuration values about the available revlog features"""
251 """Hold configuration values about the available revlog features"""
252
252
253 # the default compression engine
253 # the default compression engine
254 compression_engine = attr.ib(default=b'zlib')
254 compression_engine = attr.ib(default=b'zlib')
255 # compression engines options
255 # compression engines options
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257
257
258 # can we use censor on this revlog
258 # can we use censor on this revlog
259 censorable = attr.ib(default=False)
259 censorable = attr.ib(default=False)
260 # does this revlog use the "side data" feature
260 # does this revlog use the "side data" feature
261 has_side_data = attr.ib(default=False)
261 has_side_data = attr.ib(default=False)
262 # might remove rank configuration once the computation has no impact
262 # might remove rank configuration once the computation has no impact
263 compute_rank = attr.ib(default=False)
263 compute_rank = attr.ib(default=False)
264 # parent order is supposed to be semantically irrelevant, so we
264 # parent order is supposed to be semantically irrelevant, so we
265 # normally resort parents to ensure that the first parent is non-null,
265 # normally resort parents to ensure that the first parent is non-null,
266 # if there is a non-null parent at all.
266 # if there is a non-null parent at all.
267 # filelog abuses the parent order as flag to mark some instances of
267 # filelog abuses the parent order as flag to mark some instances of
268 # meta-encoded files, so allow it to disable this behavior.
268 # meta-encoded files, so allow it to disable this behavior.
269 canonical_parent_order = attr.ib(default=False)
269 canonical_parent_order = attr.ib(default=False)
270 # can ellipsis commit be used
270 # can ellipsis commit be used
271 enable_ellipsis = attr.ib(default=False)
271 enable_ellipsis = attr.ib(default=False)
272
272
273 def copy(self):
273 def copy(self):
274 new = super().copy()
274 new = super().copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
276 return new
276 return new
277
277
278
278
279 @attr.s()
279 @attr.s()
280 class DataConfig(_Config):
280 class DataConfig(_Config):
281 """Hold configuration value about how the revlog data are read"""
281 """Hold configuration value about how the revlog data are read"""
282
282
283 # should we try to open the "pending" version of the revlog
283 # should we try to open the "pending" version of the revlog
284 try_pending = attr.ib(default=False)
284 try_pending = attr.ib(default=False)
285 # should we try to open the "splitted" version of the revlog
285 # should we try to open the "splitted" version of the revlog
286 try_split = attr.ib(default=False)
286 try_split = attr.ib(default=False)
287 # When True, indexfile should be opened with checkambig=True at writing,
287 # When True, indexfile should be opened with checkambig=True at writing,
288 # to avoid file stat ambiguity.
288 # to avoid file stat ambiguity.
289 check_ambig = attr.ib(default=False)
289 check_ambig = attr.ib(default=False)
290
290
291 # If true, use mmap instead of reading to deal with large index
291 # If true, use mmap instead of reading to deal with large index
292 mmap_large_index = attr.ib(default=False)
292 mmap_large_index = attr.ib(default=False)
293 # how much data is large
293 # how much data is large
294 mmap_index_threshold = attr.ib(default=None)
294 mmap_index_threshold = attr.ib(default=None)
295 # How much data to read and cache into the raw revlog data cache.
295 # How much data to read and cache into the raw revlog data cache.
296 chunk_cache_size = attr.ib(default=65536)
296 chunk_cache_size = attr.ib(default=65536)
297
297
298 # Allow sparse reading of the revlog data
298 # Allow sparse reading of the revlog data
299 with_sparse_read = attr.ib(default=False)
299 with_sparse_read = attr.ib(default=False)
300 # minimal density of a sparse read chunk
300 # minimal density of a sparse read chunk
301 sr_density_threshold = attr.ib(default=0.50)
301 sr_density_threshold = attr.ib(default=0.50)
302 # minimal size of data we skip when performing sparse read
302 # minimal size of data we skip when performing sparse read
303 sr_min_gap_size = attr.ib(default=262144)
303 sr_min_gap_size = attr.ib(default=262144)
304
304
305 # are delta encoded against arbitrary bases.
305 # are delta encoded against arbitrary bases.
306 generaldelta = attr.ib(default=False)
306 generaldelta = attr.ib(default=False)
307
307
308
308
309 @attr.s()
309 @attr.s()
310 class DeltaConfig(_Config):
310 class DeltaConfig(_Config):
311 """Hold configuration value about how new delta are computed
311 """Hold configuration value about how new delta are computed
312
312
313 Some attributes are duplicated from DataConfig to help havign each object
313 Some attributes are duplicated from DataConfig to help havign each object
314 self contained.
314 self contained.
315 """
315 """
316
316
317 # can delta be encoded against arbitrary bases.
317 # can delta be encoded against arbitrary bases.
318 general_delta = attr.ib(default=False)
318 general_delta = attr.ib(default=False)
319 # Allow sparse writing of the revlog data
319 # Allow sparse writing of the revlog data
320 sparse_revlog = attr.ib(default=False)
320 sparse_revlog = attr.ib(default=False)
321 # maximum length of a delta chain
321 # maximum length of a delta chain
322 max_chain_len = attr.ib(default=None)
322 max_chain_len = attr.ib(default=None)
323 # Maximum distance between delta chain base start and end
323 # Maximum distance between delta chain base start and end
324 max_deltachain_span = attr.ib(default=-1)
324 max_deltachain_span = attr.ib(default=-1)
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 # compression for the data content.
326 # compression for the data content.
327 upper_bound_comp = attr.ib(default=None)
327 upper_bound_comp = attr.ib(default=None)
328 # Should we try a delta against both parent
328 # Should we try a delta against both parent
329 delta_both_parents = attr.ib(default=True)
329 delta_both_parents = attr.ib(default=True)
330 # Test delta base candidate group by chunk of this maximal size.
330 # Test delta base candidate group by chunk of this maximal size.
331 candidate_group_chunk_size = attr.ib(default=0)
331 candidate_group_chunk_size = attr.ib(default=0)
332 # Should we display debug information about delta computation
332 # Should we display debug information about delta computation
333 debug_delta = attr.ib(default=False)
333 debug_delta = attr.ib(default=False)
334 # trust incoming delta by default
334 # trust incoming delta by default
335 lazy_delta = attr.ib(default=True)
335 lazy_delta = attr.ib(default=True)
336 # trust the base of incoming delta by default
336 # trust the base of incoming delta by default
337 lazy_delta_base = attr.ib(default=False)
337 lazy_delta_base = attr.ib(default=False)
338
338
339
339
340 class _InnerRevlog:
340 class _InnerRevlog:
341 """An inner layer of the revlog object
341 """An inner layer of the revlog object
342
342
343 That layer exist to be able to delegate some operation to Rust, its
343 That layer exist to be able to delegate some operation to Rust, its
344 boundaries are arbitrary and based on what we can delegate to Rust.
344 boundaries are arbitrary and based on what we can delegate to Rust.
345 """
345 """
346
346
347 def __init__(
347 def __init__(
348 self,
348 self,
349 opener,
349 opener,
350 index,
350 index,
351 index_file,
351 index_file,
352 data_file,
352 data_file,
353 sidedata_file,
353 sidedata_file,
354 inline,
354 inline,
355 data_config,
355 data_config,
356 delta_config,
356 delta_config,
357 feature_config,
357 feature_config,
358 chunk_cache,
358 chunk_cache,
359 default_compression_header,
359 default_compression_header,
360 ):
360 ):
361 self.opener = opener
361 self.opener = opener
362 self.index = index
362 self.index = index
363
363
364 self.__index_file = index_file
364 self.__index_file = index_file
365 self.data_file = data_file
365 self.data_file = data_file
366 self.sidedata_file = sidedata_file
366 self.sidedata_file = sidedata_file
367 self.inline = inline
367 self.inline = inline
368 self.data_config = data_config
368 self.data_config = data_config
369 self.delta_config = delta_config
369 self.delta_config = delta_config
370 self.feature_config = feature_config
370 self.feature_config = feature_config
371
371
372 # used during diverted write.
372 # used during diverted write.
373 self._orig_index_file = None
373 self._orig_index_file = None
374
374
375 self._default_compression_header = default_compression_header
375 self._default_compression_header = default_compression_header
376
376
377 # index
377 # index
378
378
379 # 3-tuple of file handles being used for active writing.
379 # 3-tuple of file handles being used for active writing.
380 self._writinghandles = None
380 self._writinghandles = None
381
381
382 self._segmentfile = randomaccessfile.randomaccessfile(
382 self._segmentfile = randomaccessfile.randomaccessfile(
383 self.opener,
383 self.opener,
384 (self.index_file if self.inline else self.data_file),
384 (self.index_file if self.inline else self.data_file),
385 self.data_config.chunk_cache_size,
385 self.data_config.chunk_cache_size,
386 chunk_cache,
386 chunk_cache,
387 )
387 )
388 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
388 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
389 self.opener,
389 self.opener,
390 self.sidedata_file,
390 self.sidedata_file,
391 self.data_config.chunk_cache_size,
391 self.data_config.chunk_cache_size,
392 )
392 )
393
393
394 # revlog header -> revlog compressor
394 # revlog header -> revlog compressor
395 self._decompressors = {}
395 self._decompressors = {}
396 # 3-tuple of (node, rev, text) for a raw revision.
396 # 3-tuple of (node, rev, text) for a raw revision.
397 self._revisioncache = None
397 self._revisioncache = None
398
398
399 self._delay_buffer = None
399 self._delay_buffer = None
400
400
401 @property
401 @property
402 def index_file(self):
402 def index_file(self):
403 return self.__index_file
403 return self.__index_file
404
404
405 @index_file.setter
405 @index_file.setter
406 def index_file(self, new_index_file):
406 def index_file(self, new_index_file):
407 self.__index_file = new_index_file
407 self.__index_file = new_index_file
408 if self.inline:
408 if self.inline:
409 self._segmentfile.filename = new_index_file
409 self._segmentfile.filename = new_index_file
410
410
411 def __len__(self):
411 def __len__(self):
412 return len(self.index)
412 return len(self.index)
413
413
414 def clear_cache(self):
414 def clear_cache(self):
415 assert not self.is_delaying
415 assert not self.is_delaying
416 self._revisioncache = None
416 self._revisioncache = None
417 self._segmentfile.clear_cache()
417 self._segmentfile.clear_cache()
418 self._segmentfile_sidedata.clear_cache()
418 self._segmentfile_sidedata.clear_cache()
419
419
420 @property
420 @property
421 def canonical_index_file(self):
421 def canonical_index_file(self):
422 if self._orig_index_file is not None:
422 if self._orig_index_file is not None:
423 return self._orig_index_file
423 return self._orig_index_file
424 return self.index_file
424 return self.index_file
425
425
426 @property
426 @property
427 def is_delaying(self):
427 def is_delaying(self):
428 """is the revlog is currently delaying the visibility of written data?
428 """is the revlog is currently delaying the visibility of written data?
429
429
430 The delaying mechanism can be either in-memory or written on disk in a
430 The delaying mechanism can be either in-memory or written on disk in a
431 side-file."""
431 side-file."""
432 return (self._delay_buffer is not None) or (
432 return (self._delay_buffer is not None) or (
433 self._orig_index_file is not None
433 self._orig_index_file is not None
434 )
434 )
435
435
436 # Derived from index values.
436 # Derived from index values.
437
437
438 def start(self, rev):
438 def start(self, rev):
439 """the offset of the data chunk for this revision"""
439 """the offset of the data chunk for this revision"""
440 return int(self.index[rev][0] >> 16)
440 return int(self.index[rev][0] >> 16)
441
441
442 def length(self, rev):
442 def length(self, rev):
443 """the length of the data chunk for this revision"""
443 """the length of the data chunk for this revision"""
444 return self.index[rev][1]
444 return self.index[rev][1]
445
445
446 def end(self, rev):
446 def end(self, rev):
447 """the end of the data chunk for this revision"""
447 """the end of the data chunk for this revision"""
448 return self.start(rev) + self.length(rev)
448 return self.start(rev) + self.length(rev)
449
449
450 def deltaparent(self, rev):
450 def deltaparent(self, rev):
451 """return deltaparent of the given revision"""
451 """return deltaparent of the given revision"""
452 base = self.index[rev][3]
452 base = self.index[rev][3]
453 if base == rev:
453 if base == rev:
454 return nullrev
454 return nullrev
455 elif self.delta_config.general_delta:
455 elif self.delta_config.general_delta:
456 return base
456 return base
457 else:
457 else:
458 return rev - 1
458 return rev - 1
459
459
460 def issnapshot(self, rev):
460 def issnapshot(self, rev):
461 """tells whether rev is a snapshot"""
461 """tells whether rev is a snapshot"""
462 if not self.delta_config.sparse_revlog:
462 if not self.delta_config.sparse_revlog:
463 return self.deltaparent(rev) == nullrev
463 return self.deltaparent(rev) == nullrev
464 elif hasattr(self.index, 'issnapshot'):
464 elif hasattr(self.index, 'issnapshot'):
465 # directly assign the method to cache the testing and access
465 # directly assign the method to cache the testing and access
466 self.issnapshot = self.index.issnapshot
466 self.issnapshot = self.index.issnapshot
467 return self.issnapshot(rev)
467 return self.issnapshot(rev)
468 if rev == nullrev:
468 if rev == nullrev:
469 return True
469 return True
470 entry = self.index[rev]
470 entry = self.index[rev]
471 base = entry[3]
471 base = entry[3]
472 if base == rev:
472 if base == rev:
473 return True
473 return True
474 if base == nullrev:
474 if base == nullrev:
475 return True
475 return True
476 p1 = entry[5]
476 p1 = entry[5]
477 while self.length(p1) == 0:
477 while self.length(p1) == 0:
478 b = self.deltaparent(p1)
478 b = self.deltaparent(p1)
479 if b == p1:
479 if b == p1:
480 break
480 break
481 p1 = b
481 p1 = b
482 p2 = entry[6]
482 p2 = entry[6]
483 while self.length(p2) == 0:
483 while self.length(p2) == 0:
484 b = self.deltaparent(p2)
484 b = self.deltaparent(p2)
485 if b == p2:
485 if b == p2:
486 break
486 break
487 p2 = b
487 p2 = b
488 if base == p1 or base == p2:
488 if base == p1 or base == p2:
489 return False
489 return False
490 return self.issnapshot(base)
490 return self.issnapshot(base)
491
491
492 def _deltachain(self, rev, stoprev=None):
492 def _deltachain(self, rev, stoprev=None):
493 """Obtain the delta chain for a revision.
493 """Obtain the delta chain for a revision.
494
494
495 ``stoprev`` specifies a revision to stop at. If not specified, we
495 ``stoprev`` specifies a revision to stop at. If not specified, we
496 stop at the base of the chain.
496 stop at the base of the chain.
497
497
498 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
498 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
499 revs in ascending order and ``stopped`` is a bool indicating whether
499 revs in ascending order and ``stopped`` is a bool indicating whether
500 ``stoprev`` was hit.
500 ``stoprev`` was hit.
501 """
501 """
502 generaldelta = self.delta_config.general_delta
502 generaldelta = self.delta_config.general_delta
503 # Try C implementation.
503 # Try C implementation.
504 try:
504 try:
505 return self.index.deltachain(rev, stoprev, generaldelta)
505 return self.index.deltachain(rev, stoprev, generaldelta)
506 except AttributeError:
506 except AttributeError:
507 pass
507 pass
508
508
509 chain = []
509 chain = []
510
510
511 # Alias to prevent attribute lookup in tight loop.
511 # Alias to prevent attribute lookup in tight loop.
512 index = self.index
512 index = self.index
513
513
514 iterrev = rev
514 iterrev = rev
515 e = index[iterrev]
515 e = index[iterrev]
516 while iterrev != e[3] and iterrev != stoprev:
516 while iterrev != e[3] and iterrev != stoprev:
517 chain.append(iterrev)
517 chain.append(iterrev)
518 if generaldelta:
518 if generaldelta:
519 iterrev = e[3]
519 iterrev = e[3]
520 else:
520 else:
521 iterrev -= 1
521 iterrev -= 1
522 e = index[iterrev]
522 e = index[iterrev]
523
523
524 if iterrev == stoprev:
524 if iterrev == stoprev:
525 stopped = True
525 stopped = True
526 else:
526 else:
527 chain.append(iterrev)
527 chain.append(iterrev)
528 stopped = False
528 stopped = False
529
529
530 chain.reverse()
530 chain.reverse()
531 return chain, stopped
531 return chain, stopped
532
532
533 @util.propertycache
533 @util.propertycache
534 def _compressor(self):
534 def _compressor(self):
535 engine = util.compengines[self.feature_config.compression_engine]
535 engine = util.compengines[self.feature_config.compression_engine]
536 return engine.revlogcompressor(
536 return engine.revlogcompressor(
537 self.feature_config.compression_engine_options
537 self.feature_config.compression_engine_options
538 )
538 )
539
539
540 @util.propertycache
540 @util.propertycache
541 def _decompressor(self):
541 def _decompressor(self):
542 """the default decompressor"""
542 """the default decompressor"""
543 if self._default_compression_header is None:
543 if self._default_compression_header is None:
544 return None
544 return None
545 t = self._default_compression_header
545 t = self._default_compression_header
546 c = self._get_decompressor(t)
546 c = self._get_decompressor(t)
547 return c.decompress
547 return c.decompress
548
548
549 def _get_decompressor(self, t):
549 def _get_decompressor(self, t):
550 try:
550 try:
551 compressor = self._decompressors[t]
551 compressor = self._decompressors[t]
552 except KeyError:
552 except KeyError:
553 try:
553 try:
554 engine = util.compengines.forrevlogheader(t)
554 engine = util.compengines.forrevlogheader(t)
555 compressor = engine.revlogcompressor(
555 compressor = engine.revlogcompressor(
556 self.feature_config.compression_engine_options
556 self.feature_config.compression_engine_options
557 )
557 )
558 self._decompressors[t] = compressor
558 self._decompressors[t] = compressor
559 except KeyError:
559 except KeyError:
560 raise error.RevlogError(
560 raise error.RevlogError(
561 _(b'unknown compression type %s') % binascii.hexlify(t)
561 _(b'unknown compression type %s') % binascii.hexlify(t)
562 )
562 )
563 return compressor
563 return compressor
564
564
565 def compress(self, data):
565 def compress(self, data):
566 """Generate a possibly-compressed representation of data."""
566 """Generate a possibly-compressed representation of data."""
567 if not data:
567 if not data:
568 return b'', data
568 return b'', data
569
569
570 compressed = self._compressor.compress(data)
570 compressed = self._compressor.compress(data)
571
571
572 if compressed:
572 if compressed:
573 # The revlog compressor added the header in the returned data.
573 # The revlog compressor added the header in the returned data.
574 return b'', compressed
574 return b'', compressed
575
575
576 if data[0:1] == b'\0':
576 if data[0:1] == b'\0':
577 return b'', data
577 return b'', data
578 return b'u', data
578 return b'u', data
579
579
580 def decompress(self, data):
580 def decompress(self, data):
581 """Decompress a revlog chunk.
581 """Decompress a revlog chunk.
582
582
583 The chunk is expected to begin with a header identifying the
583 The chunk is expected to begin with a header identifying the
584 format type so it can be routed to an appropriate decompressor.
584 format type so it can be routed to an appropriate decompressor.
585 """
585 """
586 if not data:
586 if not data:
587 return data
587 return data
588
588
589 # Revlogs are read much more frequently than they are written and many
589 # Revlogs are read much more frequently than they are written and many
590 # chunks only take microseconds to decompress, so performance is
590 # chunks only take microseconds to decompress, so performance is
591 # important here.
591 # important here.
592 #
592 #
593 # We can make a few assumptions about revlogs:
593 # We can make a few assumptions about revlogs:
594 #
594 #
595 # 1) the majority of chunks will be compressed (as opposed to inline
595 # 1) the majority of chunks will be compressed (as opposed to inline
596 # raw data).
596 # raw data).
597 # 2) decompressing *any* data will likely by at least 10x slower than
597 # 2) decompressing *any* data will likely by at least 10x slower than
598 # returning raw inline data.
598 # returning raw inline data.
599 # 3) we want to prioritize common and officially supported compression
599 # 3) we want to prioritize common and officially supported compression
600 # engines
600 # engines
601 #
601 #
602 # It follows that we want to optimize for "decompress compressed data
602 # It follows that we want to optimize for "decompress compressed data
603 # when encoded with common and officially supported compression engines"
603 # when encoded with common and officially supported compression engines"
604 # case over "raw data" and "data encoded by less common or non-official
604 # case over "raw data" and "data encoded by less common or non-official
605 # compression engines." That is why we have the inline lookup first
605 # compression engines." That is why we have the inline lookup first
606 # followed by the compengines lookup.
606 # followed by the compengines lookup.
607 #
607 #
608 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
608 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
609 # compressed chunks. And this matters for changelog and manifest reads.
609 # compressed chunks. And this matters for changelog and manifest reads.
610 t = data[0:1]
610 t = data[0:1]
611
611
612 if t == b'x':
612 if t == b'x':
613 try:
613 try:
614 return _zlibdecompress(data)
614 return _zlibdecompress(data)
615 except zlib.error as e:
615 except zlib.error as e:
616 raise error.RevlogError(
616 raise error.RevlogError(
617 _(b'revlog decompress error: %s')
617 _(b'revlog decompress error: %s')
618 % stringutil.forcebytestr(e)
618 % stringutil.forcebytestr(e)
619 )
619 )
620 # '\0' is more common than 'u' so it goes first.
620 # '\0' is more common than 'u' so it goes first.
621 elif t == b'\0':
621 elif t == b'\0':
622 return data
622 return data
623 elif t == b'u':
623 elif t == b'u':
624 return util.buffer(data, 1)
624 return util.buffer(data, 1)
625
625
626 compressor = self._get_decompressor(t)
626 compressor = self._get_decompressor(t)
627
627
628 return compressor.decompress(data)
628 return compressor.decompress(data)
629
629
630 @contextlib.contextmanager
630 @contextlib.contextmanager
631 def reading(self):
631 def reading(self):
632 """Context manager that keeps data and sidedata files open for reading"""
632 """Context manager that keeps data and sidedata files open for reading"""
633 if len(self.index) == 0:
633 if len(self.index) == 0:
634 yield # nothing to be read
634 yield # nothing to be read
635 else:
635 else:
636 with self._segmentfile.reading():
636 with self._segmentfile.reading():
637 with self._segmentfile_sidedata.reading():
637 with self._segmentfile_sidedata.reading():
638 yield
638 yield
639
639
640 @property
640 @property
641 def is_writing(self):
641 def is_writing(self):
642 """True is a writing context is open"""
642 """True is a writing context is open"""
643 return self._writinghandles is not None
643 return self._writinghandles is not None
644
644
645 @property
645 @property
646 def is_open(self):
646 def is_open(self):
647 """True if any file handle is being held
647 """True if any file handle is being held
648
648
649 Used for assert and debug in the python code"""
649 Used for assert and debug in the python code"""
650 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
650 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
651
651
652 @contextlib.contextmanager
652 @contextlib.contextmanager
653 def writing(self, transaction, data_end=None, sidedata_end=None):
653 def writing(self, transaction, data_end=None, sidedata_end=None):
654 """Open the revlog files for writing
654 """Open the revlog files for writing
655
655
656 Add content to a revlog should be done within such context.
656 Add content to a revlog should be done within such context.
657 """
657 """
658 if self.is_writing:
658 if self.is_writing:
659 yield
659 yield
660 else:
660 else:
661 ifh = dfh = sdfh = None
661 ifh = dfh = sdfh = None
662 try:
662 try:
663 r = len(self.index)
663 r = len(self.index)
664 # opening the data file.
664 # opening the data file.
665 dsize = 0
665 dsize = 0
666 if r:
666 if r:
667 dsize = self.end(r - 1)
667 dsize = self.end(r - 1)
668 dfh = None
668 dfh = None
669 if not self.inline:
669 if not self.inline:
670 try:
670 try:
671 dfh = self.opener(self.data_file, mode=b"r+")
671 dfh = self.opener(self.data_file, mode=b"r+")
672 if data_end is None:
672 if data_end is None:
673 dfh.seek(0, os.SEEK_END)
673 dfh.seek(0, os.SEEK_END)
674 else:
674 else:
675 dfh.seek(data_end, os.SEEK_SET)
675 dfh.seek(data_end, os.SEEK_SET)
676 except FileNotFoundError:
676 except FileNotFoundError:
677 dfh = self.opener(self.data_file, mode=b"w+")
677 dfh = self.opener(self.data_file, mode=b"w+")
678 transaction.add(self.data_file, dsize)
678 transaction.add(self.data_file, dsize)
679 if self.sidedata_file is not None:
679 if self.sidedata_file is not None:
680 assert sidedata_end is not None
680 assert sidedata_end is not None
681 # revlog-v2 does not inline, help Pytype
681 # revlog-v2 does not inline, help Pytype
682 assert dfh is not None
682 assert dfh is not None
683 try:
683 try:
684 sdfh = self.opener(self.sidedata_file, mode=b"r+")
684 sdfh = self.opener(self.sidedata_file, mode=b"r+")
685 dfh.seek(sidedata_end, os.SEEK_SET)
685 dfh.seek(sidedata_end, os.SEEK_SET)
686 except FileNotFoundError:
686 except FileNotFoundError:
687 sdfh = self.opener(self.sidedata_file, mode=b"w+")
687 sdfh = self.opener(self.sidedata_file, mode=b"w+")
688 transaction.add(self.sidedata_file, sidedata_end)
688 transaction.add(self.sidedata_file, sidedata_end)
689
689
690 # opening the index file.
690 # opening the index file.
691 isize = r * self.index.entry_size
691 isize = r * self.index.entry_size
692 ifh = self.__index_write_fp()
692 ifh = self.__index_write_fp()
693 if self.inline:
693 if self.inline:
694 transaction.add(self.index_file, dsize + isize)
694 transaction.add(self.index_file, dsize + isize)
695 else:
695 else:
696 transaction.add(self.index_file, isize)
696 transaction.add(self.index_file, isize)
697 # exposing all file handle for writing.
697 # exposing all file handle for writing.
698 self._writinghandles = (ifh, dfh, sdfh)
698 self._writinghandles = (ifh, dfh, sdfh)
699 self._segmentfile.writing_handle = ifh if self.inline else dfh
699 self._segmentfile.writing_handle = ifh if self.inline else dfh
700 self._segmentfile_sidedata.writing_handle = sdfh
700 self._segmentfile_sidedata.writing_handle = sdfh
701 yield
701 yield
702 finally:
702 finally:
703 self._writinghandles = None
703 self._writinghandles = None
704 self._segmentfile.writing_handle = None
704 self._segmentfile.writing_handle = None
705 self._segmentfile_sidedata.writing_handle = None
705 self._segmentfile_sidedata.writing_handle = None
706 if dfh is not None:
706 if dfh is not None:
707 dfh.close()
707 dfh.close()
708 if sdfh is not None:
708 if sdfh is not None:
709 sdfh.close()
709 sdfh.close()
710 # closing the index file last to avoid exposing referent to
710 # closing the index file last to avoid exposing referent to
711 # potential unflushed data content.
711 # potential unflushed data content.
712 if ifh is not None:
712 if ifh is not None:
713 ifh.close()
713 ifh.close()
714
714
715 def __index_write_fp(self, index_end=None):
715 def __index_write_fp(self, index_end=None):
716 """internal method to open the index file for writing
716 """internal method to open the index file for writing
717
717
718 You should not use this directly and use `_writing` instead
718 You should not use this directly and use `_writing` instead
719 """
719 """
720 try:
720 try:
721 if self._delay_buffer is None:
721 if self._delay_buffer is None:
722 f = self.opener(
722 f = self.opener(
723 self.index_file,
723 self.index_file,
724 mode=b"r+",
724 mode=b"r+",
725 checkambig=self.data_config.check_ambig,
725 checkambig=self.data_config.check_ambig,
726 )
726 )
727 else:
727 else:
728 # check_ambig affect we way we open file for writing, however
728 # check_ambig affect we way we open file for writing, however
729 # here, we do not actually open a file for writting as write
729 # here, we do not actually open a file for writting as write
730 # will appened to a delay_buffer. So check_ambig is not
730 # will appened to a delay_buffer. So check_ambig is not
731 # meaningful and unneeded here.
731 # meaningful and unneeded here.
732 f = randomaccessfile.appender(
732 f = randomaccessfile.appender(
733 self.opener, self.index_file, b"r+", self._delay_buffer
733 self.opener, self.index_file, b"r+", self._delay_buffer
734 )
734 )
735 if index_end is None:
735 if index_end is None:
736 f.seek(0, os.SEEK_END)
736 f.seek(0, os.SEEK_END)
737 else:
737 else:
738 f.seek(index_end, os.SEEK_SET)
738 f.seek(index_end, os.SEEK_SET)
739 return f
739 return f
740 except FileNotFoundError:
740 except FileNotFoundError:
741 if self._delay_buffer is None:
741 if self._delay_buffer is None:
742 return self.opener(
742 return self.opener(
743 self.index_file,
743 self.index_file,
744 mode=b"w+",
744 mode=b"w+",
745 checkambig=self.data_config.check_ambig,
745 checkambig=self.data_config.check_ambig,
746 )
746 )
747 else:
747 else:
748 return randomaccessfile.appender(
748 return randomaccessfile.appender(
749 self.opener, self.index_file, b"w+", self._delay_buffer
749 self.opener, self.index_file, b"w+", self._delay_buffer
750 )
750 )
751
751
752 def __index_new_fp(self):
752 def __index_new_fp(self):
753 """internal method to create a new index file for writing
753 """internal method to create a new index file for writing
754
754
755 You should not use this unless you are upgrading from inline revlog
755 You should not use this unless you are upgrading from inline revlog
756 """
756 """
757 return self.opener(
757 return self.opener(
758 self.index_file,
758 self.index_file,
759 mode=b"w",
759 mode=b"w",
760 checkambig=self.data_config.check_ambig,
760 checkambig=self.data_config.check_ambig,
761 atomictemp=True,
761 atomictemp=True,
762 )
762 )
763
763
764 def split_inline(self, tr, header, new_index_file_path=None):
764 def split_inline(self, tr, header, new_index_file_path=None):
765 """split the data of an inline revlog into an index and a data file"""
765 """split the data of an inline revlog into an index and a data file"""
766 existing_handles = False
766 existing_handles = False
767 if self._writinghandles is not None:
767 if self._writinghandles is not None:
768 existing_handles = True
768 existing_handles = True
769 fp = self._writinghandles[0]
769 fp = self._writinghandles[0]
770 fp.flush()
770 fp.flush()
771 fp.close()
771 fp.close()
772 # We can't use the cached file handle after close(). So prevent
772 # We can't use the cached file handle after close(). So prevent
773 # its usage.
773 # its usage.
774 self._writinghandles = None
774 self._writinghandles = None
775 self._segmentfile.writing_handle = None
775 self._segmentfile.writing_handle = None
776 # No need to deal with sidedata writing handle as it is only
776 # No need to deal with sidedata writing handle as it is only
777 # relevant with revlog-v2 which is never inline, not reaching
777 # relevant with revlog-v2 which is never inline, not reaching
778 # this code
778 # this code
779
779
780 new_dfh = self.opener(self.data_file, mode=b"w+")
780 new_dfh = self.opener(self.data_file, mode=b"w+")
781 new_dfh.truncate(0) # drop any potentially existing data
781 new_dfh.truncate(0) # drop any potentially existing data
782 try:
782 try:
783 with self.reading():
783 with self.reading():
784 for r in range(len(self.index)):
784 for r in range(len(self.index)):
785 new_dfh.write(self.get_segment_for_revs(r, r)[1])
785 new_dfh.write(self.get_segment_for_revs(r, r)[1])
786 new_dfh.flush()
786 new_dfh.flush()
787
787
788 if new_index_file_path is not None:
788 if new_index_file_path is not None:
789 self.index_file = new_index_file_path
789 self.index_file = new_index_file_path
790 with self.__index_new_fp() as fp:
790 with self.__index_new_fp() as fp:
791 self.inline = False
791 self.inline = False
792 for i in range(len(self.index)):
792 for i in range(len(self.index)):
793 e = self.index.entry_binary(i)
793 e = self.index.entry_binary(i)
794 if i == 0:
794 if i == 0:
795 packed_header = self.index.pack_header(header)
795 packed_header = self.index.pack_header(header)
796 e = packed_header + e
796 e = packed_header + e
797 fp.write(e)
797 fp.write(e)
798
798
799 # If we don't use side-write, the temp file replace the real
799 # If we don't use side-write, the temp file replace the real
800 # index when we exit the context manager
800 # index when we exit the context manager
801
801
802 self._segmentfile = randomaccessfile.randomaccessfile(
802 self._segmentfile = randomaccessfile.randomaccessfile(
803 self.opener,
803 self.opener,
804 self.data_file,
804 self.data_file,
805 self.data_config.chunk_cache_size,
805 self.data_config.chunk_cache_size,
806 )
806 )
807
807
808 if existing_handles:
808 if existing_handles:
809 # switched from inline to conventional reopen the index
809 # switched from inline to conventional reopen the index
810 ifh = self.__index_write_fp()
810 ifh = self.__index_write_fp()
811 self._writinghandles = (ifh, new_dfh, None)
811 self._writinghandles = (ifh, new_dfh, None)
812 self._segmentfile.writing_handle = new_dfh
812 self._segmentfile.writing_handle = new_dfh
813 new_dfh = None
813 new_dfh = None
814 # No need to deal with sidedata writing handle as it is only
814 # No need to deal with sidedata writing handle as it is only
815 # relevant with revlog-v2 which is never inline, not reaching
815 # relevant with revlog-v2 which is never inline, not reaching
816 # this code
816 # this code
817 finally:
817 finally:
818 if new_dfh is not None:
818 if new_dfh is not None:
819 new_dfh.close()
819 new_dfh.close()
820 return self.index_file
820 return self.index_file
821
821
822 def get_segment_for_revs(self, startrev, endrev):
822 def get_segment_for_revs(self, startrev, endrev):
823 """Obtain a segment of raw data corresponding to a range of revisions.
823 """Obtain a segment of raw data corresponding to a range of revisions.
824
824
825 Accepts the start and end revisions and an optional already-open
825 Accepts the start and end revisions and an optional already-open
826 file handle to be used for reading. If the file handle is read, its
826 file handle to be used for reading. If the file handle is read, its
827 seek position will not be preserved.
827 seek position will not be preserved.
828
828
829 Requests for data may be satisfied by a cache.
829 Requests for data may be satisfied by a cache.
830
830
831 Returns a 2-tuple of (offset, data) for the requested range of
831 Returns a 2-tuple of (offset, data) for the requested range of
832 revisions. Offset is the integer offset from the beginning of the
832 revisions. Offset is the integer offset from the beginning of the
833 revlog and data is a str or buffer of the raw byte data.
833 revlog and data is a str or buffer of the raw byte data.
834
834
835 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
835 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
836 to determine where each revision's data begins and ends.
836 to determine where each revision's data begins and ends.
837
837
838 API: we should consider making this a private part of the InnerRevlog
838 API: we should consider making this a private part of the InnerRevlog
839 at some point.
839 at some point.
840 """
840 """
841 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
841 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
842 # (functions are expensive).
842 # (functions are expensive).
843 index = self.index
843 index = self.index
844 istart = index[startrev]
844 istart = index[startrev]
845 start = int(istart[0] >> 16)
845 start = int(istart[0] >> 16)
846 if startrev == endrev:
846 if startrev == endrev:
847 end = start + istart[1]
847 end = start + istart[1]
848 else:
848 else:
849 iend = index[endrev]
849 iend = index[endrev]
850 end = int(iend[0] >> 16) + iend[1]
850 end = int(iend[0] >> 16) + iend[1]
851
851
852 if self.inline:
852 if self.inline:
853 start += (startrev + 1) * self.index.entry_size
853 start += (startrev + 1) * self.index.entry_size
854 end += (endrev + 1) * self.index.entry_size
854 end += (endrev + 1) * self.index.entry_size
855 length = end - start
855 length = end - start
856
856
857 return start, self._segmentfile.read_chunk(start, length)
857 return start, self._segmentfile.read_chunk(start, length)
858
858
859 def _chunk(self, rev):
859 def _chunk(self, rev):
860 """Obtain a single decompressed chunk for a revision.
860 """Obtain a single decompressed chunk for a revision.
861
861
862 Accepts an integer revision and an optional already-open file handle
862 Accepts an integer revision and an optional already-open file handle
863 to be used for reading. If used, the seek position of the file will not
863 to be used for reading. If used, the seek position of the file will not
864 be preserved.
864 be preserved.
865
865
866 Returns a str holding uncompressed data for the requested revision.
866 Returns a str holding uncompressed data for the requested revision.
867 """
867 """
868 compression_mode = self.index[rev][10]
868 compression_mode = self.index[rev][10]
869 data = self.get_segment_for_revs(rev, rev)[1]
869 data = self.get_segment_for_revs(rev, rev)[1]
870 if compression_mode == COMP_MODE_PLAIN:
870 if compression_mode == COMP_MODE_PLAIN:
871 return data
871 return data
872 elif compression_mode == COMP_MODE_DEFAULT:
872 elif compression_mode == COMP_MODE_DEFAULT:
873 return self._decompressor(data)
873 return self._decompressor(data)
874 elif compression_mode == COMP_MODE_INLINE:
874 elif compression_mode == COMP_MODE_INLINE:
875 return self.decompress(data)
875 return self.decompress(data)
876 else:
876 else:
877 msg = b'unknown compression mode %d'
877 msg = b'unknown compression mode %d'
878 msg %= compression_mode
878 msg %= compression_mode
879 raise error.RevlogError(msg)
879 raise error.RevlogError(msg)
880
880
881 def _chunks(self, revs, targetsize=None):
881 def _chunks(self, revs, targetsize=None):
882 """Obtain decompressed chunks for the specified revisions.
882 """Obtain decompressed chunks for the specified revisions.
883
883
884 Accepts an iterable of numeric revisions that are assumed to be in
884 Accepts an iterable of numeric revisions that are assumed to be in
885 ascending order. Also accepts an optional already-open file handle
885 ascending order. Also accepts an optional already-open file handle
886 to be used for reading. If used, the seek position of the file will
886 to be used for reading. If used, the seek position of the file will
887 not be preserved.
887 not be preserved.
888
888
889 This function is similar to calling ``self._chunk()`` multiple times,
889 This function is similar to calling ``self._chunk()`` multiple times,
890 but is faster.
890 but is faster.
891
891
892 Returns a list with decompressed data for each requested revision.
892 Returns a list with decompressed data for each requested revision.
893 """
893 """
894 if not revs:
894 if not revs:
895 return []
895 return []
896 start = self.start
896 start = self.start
897 length = self.length
897 length = self.length
898 inline = self.inline
898 inline = self.inline
899 iosize = self.index.entry_size
899 iosize = self.index.entry_size
900 buffer = util.buffer
900 buffer = util.buffer
901
901
902 l = []
902 l = []
903 ladd = l.append
903 ladd = l.append
904 chunks = []
905 ladd = chunks.append
904
906
905 if not self.data_config.with_sparse_read:
907 if not self.data_config.with_sparse_read:
906 slicedchunks = (revs,)
908 slicedchunks = (revs,)
907 else:
909 else:
908 slicedchunks = deltautil.slicechunk(
910 slicedchunks = deltautil.slicechunk(
909 self,
911 self,
910 revs,
912 revs,
911 targetsize=targetsize,
913 targetsize=targetsize,
912 )
914 )
913
915
914 for revschunk in slicedchunks:
916 for revschunk in slicedchunks:
915 firstrev = revschunk[0]
917 firstrev = revschunk[0]
916 # Skip trailing revisions with empty diff
918 # Skip trailing revisions with empty diff
917 for lastrev in revschunk[::-1]:
919 for lastrev in revschunk[::-1]:
918 if length(lastrev) != 0:
920 if length(lastrev) != 0:
919 break
921 break
920
922
921 try:
923 try:
922 offset, data = self.get_segment_for_revs(firstrev, lastrev)
924 offset, data = self.get_segment_for_revs(firstrev, lastrev)
923 except OverflowError:
925 except OverflowError:
924 # issue4215 - we can't cache a run of chunks greater than
926 # issue4215 - we can't cache a run of chunks greater than
925 # 2G on Windows
927 # 2G on Windows
926 return [self._chunk(rev) for rev in revschunk]
928 for rev in revschunk:
929 ladd((rev, self._chunk(rev)))
927
930
928 decomp = self.decompress
931 decomp = self.decompress
929 # self._decompressor might be None, but will not be used in that case
932 # self._decompressor might be None, but will not be used in that case
930 def_decomp = self._decompressor
933 def_decomp = self._decompressor
931 for rev in revschunk:
934 for rev in revschunk:
932 chunkstart = start(rev)
935 chunkstart = start(rev)
933 if inline:
936 if inline:
934 chunkstart += (rev + 1) * iosize
937 chunkstart += (rev + 1) * iosize
935 chunklength = length(rev)
938 chunklength = length(rev)
936 comp_mode = self.index[rev][10]
939 comp_mode = self.index[rev][10]
937 c = buffer(data, chunkstart - offset, chunklength)
940 c = buffer(data, chunkstart - offset, chunklength)
938 if comp_mode == COMP_MODE_PLAIN:
941 if comp_mode == COMP_MODE_PLAIN:
939 ladd(c)
942 c = c
940 elif comp_mode == COMP_MODE_INLINE:
943 elif comp_mode == COMP_MODE_INLINE:
941 ladd(decomp(c))
944 c = decomp(c)
942 elif comp_mode == COMP_MODE_DEFAULT:
945 elif comp_mode == COMP_MODE_DEFAULT:
943 ladd(def_decomp(c))
946 c = def_decomp(c)
944 else:
947 else:
945 msg = b'unknown compression mode %d'
948 msg = b'unknown compression mode %d'
946 msg %= comp_mode
949 msg %= comp_mode
947 raise error.RevlogError(msg)
950 raise error.RevlogError(msg)
948
951 ladd((rev, c))
949 return l
952
953 return [x[1] for x in chunks]
950
954
951 def raw_text(self, node, rev):
955 def raw_text(self, node, rev):
952 """return the possibly unvalidated rawtext for a revision
956 """return the possibly unvalidated rawtext for a revision
953
957
954 returns (rev, rawtext, validated)
958 returns (rev, rawtext, validated)
955 """
959 """
956
960
957 # revision in the cache (could be useful to apply delta)
961 # revision in the cache (could be useful to apply delta)
958 cachedrev = None
962 cachedrev = None
959 # An intermediate text to apply deltas to
963 # An intermediate text to apply deltas to
960 basetext = None
964 basetext = None
961
965
962 # Check if we have the entry in cache
966 # Check if we have the entry in cache
963 # The cache entry looks like (node, rev, rawtext)
967 # The cache entry looks like (node, rev, rawtext)
964 if self._revisioncache:
968 if self._revisioncache:
965 cachedrev = self._revisioncache[1]
969 cachedrev = self._revisioncache[1]
966
970
967 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
971 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
968 if stopped:
972 if stopped:
969 basetext = self._revisioncache[2]
973 basetext = self._revisioncache[2]
970
974
971 # drop cache to save memory, the caller is expected to
975 # drop cache to save memory, the caller is expected to
972 # update self._inner._revisioncache after validating the text
976 # update self._inner._revisioncache after validating the text
973 self._revisioncache = None
977 self._revisioncache = None
974
978
975 targetsize = None
979 targetsize = None
976 rawsize = self.index[rev][2]
980 rawsize = self.index[rev][2]
977 if 0 <= rawsize:
981 if 0 <= rawsize:
978 targetsize = 4 * rawsize
982 targetsize = 4 * rawsize
979
983
980 bins = self._chunks(chain, targetsize=targetsize)
984 bins = self._chunks(chain, targetsize=targetsize)
981 if basetext is None:
985 if basetext is None:
982 basetext = bytes(bins[0])
986 basetext = bytes(bins[0])
983 bins = bins[1:]
987 bins = bins[1:]
984
988
985 rawtext = mdiff.patches(basetext, bins)
989 rawtext = mdiff.patches(basetext, bins)
986 del basetext # let us have a chance to free memory early
990 del basetext # let us have a chance to free memory early
987 return (rev, rawtext, False)
991 return (rev, rawtext, False)
988
992
989 def sidedata(self, rev, sidedata_end):
993 def sidedata(self, rev, sidedata_end):
990 """Return the sidedata for a given revision number."""
994 """Return the sidedata for a given revision number."""
991 index_entry = self.index[rev]
995 index_entry = self.index[rev]
992 sidedata_offset = index_entry[8]
996 sidedata_offset = index_entry[8]
993 sidedata_size = index_entry[9]
997 sidedata_size = index_entry[9]
994
998
995 if self.inline:
999 if self.inline:
996 sidedata_offset += self.index.entry_size * (1 + rev)
1000 sidedata_offset += self.index.entry_size * (1 + rev)
997 if sidedata_size == 0:
1001 if sidedata_size == 0:
998 return {}
1002 return {}
999
1003
1000 if sidedata_end < sidedata_offset + sidedata_size:
1004 if sidedata_end < sidedata_offset + sidedata_size:
1001 filename = self.sidedata_file
1005 filename = self.sidedata_file
1002 end = sidedata_end
1006 end = sidedata_end
1003 offset = sidedata_offset
1007 offset = sidedata_offset
1004 length = sidedata_size
1008 length = sidedata_size
1005 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1009 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1006 raise error.RevlogError(m)
1010 raise error.RevlogError(m)
1007
1011
1008 comp_segment = self._segmentfile_sidedata.read_chunk(
1012 comp_segment = self._segmentfile_sidedata.read_chunk(
1009 sidedata_offset, sidedata_size
1013 sidedata_offset, sidedata_size
1010 )
1014 )
1011
1015
1012 comp = self.index[rev][11]
1016 comp = self.index[rev][11]
1013 if comp == COMP_MODE_PLAIN:
1017 if comp == COMP_MODE_PLAIN:
1014 segment = comp_segment
1018 segment = comp_segment
1015 elif comp == COMP_MODE_DEFAULT:
1019 elif comp == COMP_MODE_DEFAULT:
1016 segment = self._decompressor(comp_segment)
1020 segment = self._decompressor(comp_segment)
1017 elif comp == COMP_MODE_INLINE:
1021 elif comp == COMP_MODE_INLINE:
1018 segment = self.decompress(comp_segment)
1022 segment = self.decompress(comp_segment)
1019 else:
1023 else:
1020 msg = b'unknown compression mode %d'
1024 msg = b'unknown compression mode %d'
1021 msg %= comp
1025 msg %= comp
1022 raise error.RevlogError(msg)
1026 raise error.RevlogError(msg)
1023
1027
1024 sidedata = sidedatautil.deserialize_sidedata(segment)
1028 sidedata = sidedatautil.deserialize_sidedata(segment)
1025 return sidedata
1029 return sidedata
1026
1030
1027 def write_entry(
1031 def write_entry(
1028 self,
1032 self,
1029 transaction,
1033 transaction,
1030 entry,
1034 entry,
1031 data,
1035 data,
1032 link,
1036 link,
1033 offset,
1037 offset,
1034 sidedata,
1038 sidedata,
1035 sidedata_offset,
1039 sidedata_offset,
1036 index_end,
1040 index_end,
1037 data_end,
1041 data_end,
1038 sidedata_end,
1042 sidedata_end,
1039 ):
1043 ):
1040 # Files opened in a+ mode have inconsistent behavior on various
1044 # Files opened in a+ mode have inconsistent behavior on various
1041 # platforms. Windows requires that a file positioning call be made
1045 # platforms. Windows requires that a file positioning call be made
1042 # when the file handle transitions between reads and writes. See
1046 # when the file handle transitions between reads and writes. See
1043 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1047 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1044 # platforms, Python or the platform itself can be buggy. Some versions
1048 # platforms, Python or the platform itself can be buggy. Some versions
1045 # of Solaris have been observed to not append at the end of the file
1049 # of Solaris have been observed to not append at the end of the file
1046 # if the file was seeked to before the end. See issue4943 for more.
1050 # if the file was seeked to before the end. See issue4943 for more.
1047 #
1051 #
1048 # We work around this issue by inserting a seek() before writing.
1052 # We work around this issue by inserting a seek() before writing.
1049 # Note: This is likely not necessary on Python 3. However, because
1053 # Note: This is likely not necessary on Python 3. However, because
1050 # the file handle is reused for reads and may be seeked there, we need
1054 # the file handle is reused for reads and may be seeked there, we need
1051 # to be careful before changing this.
1055 # to be careful before changing this.
1052 if self._writinghandles is None:
1056 if self._writinghandles is None:
1053 msg = b'adding revision outside `revlog._writing` context'
1057 msg = b'adding revision outside `revlog._writing` context'
1054 raise error.ProgrammingError(msg)
1058 raise error.ProgrammingError(msg)
1055 ifh, dfh, sdfh = self._writinghandles
1059 ifh, dfh, sdfh = self._writinghandles
1056 if index_end is None:
1060 if index_end is None:
1057 ifh.seek(0, os.SEEK_END)
1061 ifh.seek(0, os.SEEK_END)
1058 else:
1062 else:
1059 ifh.seek(index_end, os.SEEK_SET)
1063 ifh.seek(index_end, os.SEEK_SET)
1060 if dfh:
1064 if dfh:
1061 if data_end is None:
1065 if data_end is None:
1062 dfh.seek(0, os.SEEK_END)
1066 dfh.seek(0, os.SEEK_END)
1063 else:
1067 else:
1064 dfh.seek(data_end, os.SEEK_SET)
1068 dfh.seek(data_end, os.SEEK_SET)
1065 if sdfh:
1069 if sdfh:
1066 sdfh.seek(sidedata_end, os.SEEK_SET)
1070 sdfh.seek(sidedata_end, os.SEEK_SET)
1067
1071
1068 curr = len(self.index) - 1
1072 curr = len(self.index) - 1
1069 if not self.inline:
1073 if not self.inline:
1070 transaction.add(self.data_file, offset)
1074 transaction.add(self.data_file, offset)
1071 if self.sidedata_file:
1075 if self.sidedata_file:
1072 transaction.add(self.sidedata_file, sidedata_offset)
1076 transaction.add(self.sidedata_file, sidedata_offset)
1073 transaction.add(self.canonical_index_file, curr * len(entry))
1077 transaction.add(self.canonical_index_file, curr * len(entry))
1074 if data[0]:
1078 if data[0]:
1075 dfh.write(data[0])
1079 dfh.write(data[0])
1076 dfh.write(data[1])
1080 dfh.write(data[1])
1077 if sidedata:
1081 if sidedata:
1078 sdfh.write(sidedata)
1082 sdfh.write(sidedata)
1079 if self._delay_buffer is None:
1083 if self._delay_buffer is None:
1080 ifh.write(entry)
1084 ifh.write(entry)
1081 else:
1085 else:
1082 self._delay_buffer.append(entry)
1086 self._delay_buffer.append(entry)
1083 else:
1087 else:
1084 offset += curr * self.index.entry_size
1088 offset += curr * self.index.entry_size
1085 transaction.add(self.canonical_index_file, offset)
1089 transaction.add(self.canonical_index_file, offset)
1086 assert not sidedata
1090 assert not sidedata
1087 if self._delay_buffer is None:
1091 if self._delay_buffer is None:
1088 ifh.write(entry)
1092 ifh.write(entry)
1089 ifh.write(data[0])
1093 ifh.write(data[0])
1090 ifh.write(data[1])
1094 ifh.write(data[1])
1091 else:
1095 else:
1092 self._delay_buffer.append(entry)
1096 self._delay_buffer.append(entry)
1093 self._delay_buffer.append(data[0])
1097 self._delay_buffer.append(data[0])
1094 self._delay_buffer.append(data[1])
1098 self._delay_buffer.append(data[1])
1095 return (
1099 return (
1096 ifh.tell(),
1100 ifh.tell(),
1097 dfh.tell() if dfh else None,
1101 dfh.tell() if dfh else None,
1098 sdfh.tell() if sdfh else None,
1102 sdfh.tell() if sdfh else None,
1099 )
1103 )
1100
1104
1101 def _divert_index(self):
1105 def _divert_index(self):
1102 return self.index_file + b'.a'
1106 return self.index_file + b'.a'
1103
1107
1104 def delay(self):
1108 def delay(self):
1105 assert not self.is_open
1109 assert not self.is_open
1106 if self._delay_buffer is not None or self._orig_index_file is not None:
1110 if self._delay_buffer is not None or self._orig_index_file is not None:
1107 # delay or divert already in place
1111 # delay or divert already in place
1108 return None
1112 return None
1109 elif len(self.index) == 0:
1113 elif len(self.index) == 0:
1110 self._orig_index_file = self.index_file
1114 self._orig_index_file = self.index_file
1111 self.index_file = self._divert_index()
1115 self.index_file = self._divert_index()
1112 self._segmentfile.filename = self.index_file
1116 self._segmentfile.filename = self.index_file
1113 assert self._orig_index_file is not None
1117 assert self._orig_index_file is not None
1114 assert self.index_file is not None
1118 assert self.index_file is not None
1115 if self.opener.exists(self.index_file):
1119 if self.opener.exists(self.index_file):
1116 self.opener.unlink(self.index_file)
1120 self.opener.unlink(self.index_file)
1117 return self.index_file
1121 return self.index_file
1118 else:
1122 else:
1119 self._segmentfile._delay_buffer = self._delay_buffer = []
1123 self._segmentfile._delay_buffer = self._delay_buffer = []
1120 return None
1124 return None
1121
1125
1122 def write_pending(self):
1126 def write_pending(self):
1123 assert not self.is_open
1127 assert not self.is_open
1124 if self._orig_index_file is not None:
1128 if self._orig_index_file is not None:
1125 return None, True
1129 return None, True
1126 any_pending = False
1130 any_pending = False
1127 pending_index_file = self._divert_index()
1131 pending_index_file = self._divert_index()
1128 if self.opener.exists(pending_index_file):
1132 if self.opener.exists(pending_index_file):
1129 self.opener.unlink(pending_index_file)
1133 self.opener.unlink(pending_index_file)
1130 util.copyfile(
1134 util.copyfile(
1131 self.opener.join(self.index_file),
1135 self.opener.join(self.index_file),
1132 self.opener.join(pending_index_file),
1136 self.opener.join(pending_index_file),
1133 )
1137 )
1134 if self._delay_buffer:
1138 if self._delay_buffer:
1135 with self.opener(pending_index_file, b'r+') as ifh:
1139 with self.opener(pending_index_file, b'r+') as ifh:
1136 ifh.seek(0, os.SEEK_END)
1140 ifh.seek(0, os.SEEK_END)
1137 ifh.write(b"".join(self._delay_buffer))
1141 ifh.write(b"".join(self._delay_buffer))
1138 any_pending = True
1142 any_pending = True
1139 self._segmentfile._delay_buffer = self._delay_buffer = None
1143 self._segmentfile._delay_buffer = self._delay_buffer = None
1140 self._orig_index_file = self.index_file
1144 self._orig_index_file = self.index_file
1141 self.index_file = pending_index_file
1145 self.index_file = pending_index_file
1142 self._segmentfile.filename = self.index_file
1146 self._segmentfile.filename = self.index_file
1143 return self.index_file, any_pending
1147 return self.index_file, any_pending
1144
1148
1145 def finalize_pending(self):
1149 def finalize_pending(self):
1146 assert not self.is_open
1150 assert not self.is_open
1147
1151
1148 delay = self._delay_buffer is not None
1152 delay = self._delay_buffer is not None
1149 divert = self._orig_index_file is not None
1153 divert = self._orig_index_file is not None
1150
1154
1151 if delay and divert:
1155 if delay and divert:
1152 assert False, "unreachable"
1156 assert False, "unreachable"
1153 elif delay:
1157 elif delay:
1154 if self._delay_buffer:
1158 if self._delay_buffer:
1155 with self.opener(self.index_file, b'r+') as ifh:
1159 with self.opener(self.index_file, b'r+') as ifh:
1156 ifh.seek(0, os.SEEK_END)
1160 ifh.seek(0, os.SEEK_END)
1157 ifh.write(b"".join(self._delay_buffer))
1161 ifh.write(b"".join(self._delay_buffer))
1158 self._segmentfile._delay_buffer = self._delay_buffer = None
1162 self._segmentfile._delay_buffer = self._delay_buffer = None
1159 elif divert:
1163 elif divert:
1160 if self.opener.exists(self.index_file):
1164 if self.opener.exists(self.index_file):
1161 self.opener.rename(
1165 self.opener.rename(
1162 self.index_file,
1166 self.index_file,
1163 self._orig_index_file,
1167 self._orig_index_file,
1164 checkambig=True,
1168 checkambig=True,
1165 )
1169 )
1166 self.index_file = self._orig_index_file
1170 self.index_file = self._orig_index_file
1167 self._orig_index_file = None
1171 self._orig_index_file = None
1168 self._segmentfile.filename = self.index_file
1172 self._segmentfile.filename = self.index_file
1169 else:
1173 else:
1170 msg = b"not delay or divert found on this revlog"
1174 msg = b"not delay or divert found on this revlog"
1171 raise error.ProgrammingError(msg)
1175 raise error.ProgrammingError(msg)
1172 return self.canonical_index_file
1176 return self.canonical_index_file
1173
1177
1174
1178
1175 class revlog:
1179 class revlog:
1176 """
1180 """
1177 the underlying revision storage object
1181 the underlying revision storage object
1178
1182
1179 A revlog consists of two parts, an index and the revision data.
1183 A revlog consists of two parts, an index and the revision data.
1180
1184
1181 The index is a file with a fixed record size containing
1185 The index is a file with a fixed record size containing
1182 information on each revision, including its nodeid (hash), the
1186 information on each revision, including its nodeid (hash), the
1183 nodeids of its parents, the position and offset of its data within
1187 nodeids of its parents, the position and offset of its data within
1184 the data file, and the revision it's based on. Finally, each entry
1188 the data file, and the revision it's based on. Finally, each entry
1185 contains a linkrev entry that can serve as a pointer to external
1189 contains a linkrev entry that can serve as a pointer to external
1186 data.
1190 data.
1187
1191
1188 The revision data itself is a linear collection of data chunks.
1192 The revision data itself is a linear collection of data chunks.
1189 Each chunk represents a revision and is usually represented as a
1193 Each chunk represents a revision and is usually represented as a
1190 delta against the previous chunk. To bound lookup time, runs of
1194 delta against the previous chunk. To bound lookup time, runs of
1191 deltas are limited to about 2 times the length of the original
1195 deltas are limited to about 2 times the length of the original
1192 version data. This makes retrieval of a version proportional to
1196 version data. This makes retrieval of a version proportional to
1193 its size, or O(1) relative to the number of revisions.
1197 its size, or O(1) relative to the number of revisions.
1194
1198
1195 Both pieces of the revlog are written to in an append-only
1199 Both pieces of the revlog are written to in an append-only
1196 fashion, which means we never need to rewrite a file to insert or
1200 fashion, which means we never need to rewrite a file to insert or
1197 remove data, and can use some simple techniques to avoid the need
1201 remove data, and can use some simple techniques to avoid the need
1198 for locking while reading.
1202 for locking while reading.
1199
1203
1200 If checkambig, indexfile is opened with checkambig=True at
1204 If checkambig, indexfile is opened with checkambig=True at
1201 writing, to avoid file stat ambiguity.
1205 writing, to avoid file stat ambiguity.
1202
1206
1203 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1207 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1204 index will be mmapped rather than read if it is larger than the
1208 index will be mmapped rather than read if it is larger than the
1205 configured threshold.
1209 configured threshold.
1206
1210
1207 If censorable is True, the revlog can have censored revisions.
1211 If censorable is True, the revlog can have censored revisions.
1208
1212
1209 If `upperboundcomp` is not None, this is the expected maximal gain from
1213 If `upperboundcomp` is not None, this is the expected maximal gain from
1210 compression for the data content.
1214 compression for the data content.
1211
1215
1212 `concurrencychecker` is an optional function that receives 3 arguments: a
1216 `concurrencychecker` is an optional function that receives 3 arguments: a
1213 file handle, a filename, and an expected position. It should check whether
1217 file handle, a filename, and an expected position. It should check whether
1214 the current position in the file handle is valid, and log/warn/fail (by
1218 the current position in the file handle is valid, and log/warn/fail (by
1215 raising).
1219 raising).
1216
1220
1217 See mercurial/revlogutils/contants.py for details about the content of an
1221 See mercurial/revlogutils/contants.py for details about the content of an
1218 index entry.
1222 index entry.
1219 """
1223 """
1220
1224
1221 _flagserrorclass = error.RevlogError
1225 _flagserrorclass = error.RevlogError
1222
1226
1223 @staticmethod
1227 @staticmethod
1224 def is_inline_index(header_bytes):
1228 def is_inline_index(header_bytes):
1225 """Determine if a revlog is inline from the initial bytes of the index"""
1229 """Determine if a revlog is inline from the initial bytes of the index"""
1226 header = INDEX_HEADER.unpack(header_bytes)[0]
1230 header = INDEX_HEADER.unpack(header_bytes)[0]
1227
1231
1228 _format_flags = header & ~0xFFFF
1232 _format_flags = header & ~0xFFFF
1229 _format_version = header & 0xFFFF
1233 _format_version = header & 0xFFFF
1230
1234
1231 features = FEATURES_BY_VERSION[_format_version]
1235 features = FEATURES_BY_VERSION[_format_version]
1232 return features[b'inline'](_format_flags)
1236 return features[b'inline'](_format_flags)
1233
1237
1234 def __init__(
1238 def __init__(
1235 self,
1239 self,
1236 opener,
1240 opener,
1237 target,
1241 target,
1238 radix,
1242 radix,
1239 postfix=None, # only exist for `tmpcensored` now
1243 postfix=None, # only exist for `tmpcensored` now
1240 checkambig=False,
1244 checkambig=False,
1241 mmaplargeindex=False,
1245 mmaplargeindex=False,
1242 censorable=False,
1246 censorable=False,
1243 upperboundcomp=None,
1247 upperboundcomp=None,
1244 persistentnodemap=False,
1248 persistentnodemap=False,
1245 concurrencychecker=None,
1249 concurrencychecker=None,
1246 trypending=False,
1250 trypending=False,
1247 try_split=False,
1251 try_split=False,
1248 canonical_parent_order=True,
1252 canonical_parent_order=True,
1249 ):
1253 ):
1250 """
1254 """
1251 create a revlog object
1255 create a revlog object
1252
1256
1253 opener is a function that abstracts the file opening operation
1257 opener is a function that abstracts the file opening operation
1254 and can be used to implement COW semantics or the like.
1258 and can be used to implement COW semantics or the like.
1255
1259
1256 `target`: a (KIND, ID) tuple that identify the content stored in
1260 `target`: a (KIND, ID) tuple that identify the content stored in
1257 this revlog. It help the rest of the code to understand what the revlog
1261 this revlog. It help the rest of the code to understand what the revlog
1258 is about without having to resort to heuristic and index filename
1262 is about without having to resort to heuristic and index filename
1259 analysis. Note: that this must be reliably be set by normal code, but
1263 analysis. Note: that this must be reliably be set by normal code, but
1260 that test, debug, or performance measurement code might not set this to
1264 that test, debug, or performance measurement code might not set this to
1261 accurate value.
1265 accurate value.
1262 """
1266 """
1263
1267
1264 self.radix = radix
1268 self.radix = radix
1265
1269
1266 self._docket_file = None
1270 self._docket_file = None
1267 self._indexfile = None
1271 self._indexfile = None
1268 self._datafile = None
1272 self._datafile = None
1269 self._sidedatafile = None
1273 self._sidedatafile = None
1270 self._nodemap_file = None
1274 self._nodemap_file = None
1271 self.postfix = postfix
1275 self.postfix = postfix
1272 self._trypending = trypending
1276 self._trypending = trypending
1273 self._try_split = try_split
1277 self._try_split = try_split
1274 self.opener = opener
1278 self.opener = opener
1275 if persistentnodemap:
1279 if persistentnodemap:
1276 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1280 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1277
1281
1278 assert target[0] in ALL_KINDS
1282 assert target[0] in ALL_KINDS
1279 assert len(target) == 2
1283 assert len(target) == 2
1280 self.target = target
1284 self.target = target
1281 if b'feature-config' in self.opener.options:
1285 if b'feature-config' in self.opener.options:
1282 self.feature_config = self.opener.options[b'feature-config'].copy()
1286 self.feature_config = self.opener.options[b'feature-config'].copy()
1283 else:
1287 else:
1284 self.feature_config = FeatureConfig()
1288 self.feature_config = FeatureConfig()
1285 self.feature_config.censorable = censorable
1289 self.feature_config.censorable = censorable
1286 self.feature_config.canonical_parent_order = canonical_parent_order
1290 self.feature_config.canonical_parent_order = canonical_parent_order
1287 if b'data-config' in self.opener.options:
1291 if b'data-config' in self.opener.options:
1288 self.data_config = self.opener.options[b'data-config'].copy()
1292 self.data_config = self.opener.options[b'data-config'].copy()
1289 else:
1293 else:
1290 self.data_config = DataConfig()
1294 self.data_config = DataConfig()
1291 self.data_config.check_ambig = checkambig
1295 self.data_config.check_ambig = checkambig
1292 self.data_config.mmap_large_index = mmaplargeindex
1296 self.data_config.mmap_large_index = mmaplargeindex
1293 if b'delta-config' in self.opener.options:
1297 if b'delta-config' in self.opener.options:
1294 self.delta_config = self.opener.options[b'delta-config'].copy()
1298 self.delta_config = self.opener.options[b'delta-config'].copy()
1295 else:
1299 else:
1296 self.delta_config = DeltaConfig()
1300 self.delta_config = DeltaConfig()
1297 self.delta_config.upper_bound_comp = upperboundcomp
1301 self.delta_config.upper_bound_comp = upperboundcomp
1298
1302
1299 # Maps rev to chain base rev.
1303 # Maps rev to chain base rev.
1300 self._chainbasecache = util.lrucachedict(100)
1304 self._chainbasecache = util.lrucachedict(100)
1301
1305
1302 self.index = None
1306 self.index = None
1303 self._docket = None
1307 self._docket = None
1304 self._nodemap_docket = None
1308 self._nodemap_docket = None
1305 # Mapping of partial identifiers to full nodes.
1309 # Mapping of partial identifiers to full nodes.
1306 self._pcache = {}
1310 self._pcache = {}
1307
1311
1308 # other optionnals features
1312 # other optionnals features
1309
1313
1310 # Make copy of flag processors so each revlog instance can support
1314 # Make copy of flag processors so each revlog instance can support
1311 # custom flags.
1315 # custom flags.
1312 self._flagprocessors = dict(flagutil.flagprocessors)
1316 self._flagprocessors = dict(flagutil.flagprocessors)
1313 # prevent nesting of addgroup
1317 # prevent nesting of addgroup
1314 self._adding_group = None
1318 self._adding_group = None
1315
1319
1316 chunk_cache = self._loadindex()
1320 chunk_cache = self._loadindex()
1317 self._load_inner(chunk_cache)
1321 self._load_inner(chunk_cache)
1318 self._concurrencychecker = concurrencychecker
1322 self._concurrencychecker = concurrencychecker
1319
1323
1320 @property
1324 @property
1321 def _generaldelta(self):
1325 def _generaldelta(self):
1322 """temporary compatibility proxy"""
1326 """temporary compatibility proxy"""
1323 util.nouideprecwarn(
1327 util.nouideprecwarn(
1324 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1328 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1325 )
1329 )
1326 return self.delta_config.general_delta
1330 return self.delta_config.general_delta
1327
1331
1328 @property
1332 @property
1329 def _checkambig(self):
1333 def _checkambig(self):
1330 """temporary compatibility proxy"""
1334 """temporary compatibility proxy"""
1331 util.nouideprecwarn(
1335 util.nouideprecwarn(
1332 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1336 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1333 )
1337 )
1334 return self.data_config.check_ambig
1338 return self.data_config.check_ambig
1335
1339
1336 @property
1340 @property
1337 def _mmaplargeindex(self):
1341 def _mmaplargeindex(self):
1338 """temporary compatibility proxy"""
1342 """temporary compatibility proxy"""
1339 util.nouideprecwarn(
1343 util.nouideprecwarn(
1340 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1344 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1341 )
1345 )
1342 return self.data_config.mmap_large_index
1346 return self.data_config.mmap_large_index
1343
1347
1344 @property
1348 @property
1345 def _censorable(self):
1349 def _censorable(self):
1346 """temporary compatibility proxy"""
1350 """temporary compatibility proxy"""
1347 util.nouideprecwarn(
1351 util.nouideprecwarn(
1348 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1352 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1349 )
1353 )
1350 return self.feature_config.censorable
1354 return self.feature_config.censorable
1351
1355
1352 @property
1356 @property
1353 def _chunkcachesize(self):
1357 def _chunkcachesize(self):
1354 """temporary compatibility proxy"""
1358 """temporary compatibility proxy"""
1355 util.nouideprecwarn(
1359 util.nouideprecwarn(
1356 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1360 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1357 )
1361 )
1358 return self.data_config.chunk_cache_size
1362 return self.data_config.chunk_cache_size
1359
1363
1360 @property
1364 @property
1361 def _maxchainlen(self):
1365 def _maxchainlen(self):
1362 """temporary compatibility proxy"""
1366 """temporary compatibility proxy"""
1363 util.nouideprecwarn(
1367 util.nouideprecwarn(
1364 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1368 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1365 )
1369 )
1366 return self.delta_config.max_chain_len
1370 return self.delta_config.max_chain_len
1367
1371
1368 @property
1372 @property
1369 def _deltabothparents(self):
1373 def _deltabothparents(self):
1370 """temporary compatibility proxy"""
1374 """temporary compatibility proxy"""
1371 util.nouideprecwarn(
1375 util.nouideprecwarn(
1372 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1376 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1373 )
1377 )
1374 return self.delta_config.delta_both_parents
1378 return self.delta_config.delta_both_parents
1375
1379
1376 @property
1380 @property
1377 def _candidate_group_chunk_size(self):
1381 def _candidate_group_chunk_size(self):
1378 """temporary compatibility proxy"""
1382 """temporary compatibility proxy"""
1379 util.nouideprecwarn(
1383 util.nouideprecwarn(
1380 b"use revlog.delta_config.candidate_group_chunk_size",
1384 b"use revlog.delta_config.candidate_group_chunk_size",
1381 b"6.6",
1385 b"6.6",
1382 stacklevel=2,
1386 stacklevel=2,
1383 )
1387 )
1384 return self.delta_config.candidate_group_chunk_size
1388 return self.delta_config.candidate_group_chunk_size
1385
1389
1386 @property
1390 @property
1387 def _debug_delta(self):
1391 def _debug_delta(self):
1388 """temporary compatibility proxy"""
1392 """temporary compatibility proxy"""
1389 util.nouideprecwarn(
1393 util.nouideprecwarn(
1390 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1394 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1391 )
1395 )
1392 return self.delta_config.debug_delta
1396 return self.delta_config.debug_delta
1393
1397
1394 @property
1398 @property
1395 def _compengine(self):
1399 def _compengine(self):
1396 """temporary compatibility proxy"""
1400 """temporary compatibility proxy"""
1397 util.nouideprecwarn(
1401 util.nouideprecwarn(
1398 b"use revlog.feature_config.compression_engine",
1402 b"use revlog.feature_config.compression_engine",
1399 b"6.6",
1403 b"6.6",
1400 stacklevel=2,
1404 stacklevel=2,
1401 )
1405 )
1402 return self.feature_config.compression_engine
1406 return self.feature_config.compression_engine
1403
1407
1404 @property
1408 @property
1405 def upperboundcomp(self):
1409 def upperboundcomp(self):
1406 """temporary compatibility proxy"""
1410 """temporary compatibility proxy"""
1407 util.nouideprecwarn(
1411 util.nouideprecwarn(
1408 b"use revlog.delta_config.upper_bound_comp",
1412 b"use revlog.delta_config.upper_bound_comp",
1409 b"6.6",
1413 b"6.6",
1410 stacklevel=2,
1414 stacklevel=2,
1411 )
1415 )
1412 return self.delta_config.upper_bound_comp
1416 return self.delta_config.upper_bound_comp
1413
1417
1414 @property
1418 @property
1415 def _compengineopts(self):
1419 def _compengineopts(self):
1416 """temporary compatibility proxy"""
1420 """temporary compatibility proxy"""
1417 util.nouideprecwarn(
1421 util.nouideprecwarn(
1418 b"use revlog.feature_config.compression_engine_options",
1422 b"use revlog.feature_config.compression_engine_options",
1419 b"6.6",
1423 b"6.6",
1420 stacklevel=2,
1424 stacklevel=2,
1421 )
1425 )
1422 return self.feature_config.compression_engine_options
1426 return self.feature_config.compression_engine_options
1423
1427
1424 @property
1428 @property
1425 def _maxdeltachainspan(self):
1429 def _maxdeltachainspan(self):
1426 """temporary compatibility proxy"""
1430 """temporary compatibility proxy"""
1427 util.nouideprecwarn(
1431 util.nouideprecwarn(
1428 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1432 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1429 )
1433 )
1430 return self.delta_config.max_deltachain_span
1434 return self.delta_config.max_deltachain_span
1431
1435
1432 @property
1436 @property
1433 def _withsparseread(self):
1437 def _withsparseread(self):
1434 """temporary compatibility proxy"""
1438 """temporary compatibility proxy"""
1435 util.nouideprecwarn(
1439 util.nouideprecwarn(
1436 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1440 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1437 )
1441 )
1438 return self.data_config.with_sparse_read
1442 return self.data_config.with_sparse_read
1439
1443
1440 @property
1444 @property
1441 def _sparserevlog(self):
1445 def _sparserevlog(self):
1442 """temporary compatibility proxy"""
1446 """temporary compatibility proxy"""
1443 util.nouideprecwarn(
1447 util.nouideprecwarn(
1444 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1448 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1445 )
1449 )
1446 return self.delta_config.sparse_revlog
1450 return self.delta_config.sparse_revlog
1447
1451
1448 @property
1452 @property
1449 def hassidedata(self):
1453 def hassidedata(self):
1450 """temporary compatibility proxy"""
1454 """temporary compatibility proxy"""
1451 util.nouideprecwarn(
1455 util.nouideprecwarn(
1452 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1456 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1453 )
1457 )
1454 return self.feature_config.has_side_data
1458 return self.feature_config.has_side_data
1455
1459
1456 @property
1460 @property
1457 def _srdensitythreshold(self):
1461 def _srdensitythreshold(self):
1458 """temporary compatibility proxy"""
1462 """temporary compatibility proxy"""
1459 util.nouideprecwarn(
1463 util.nouideprecwarn(
1460 b"use revlog.data_config.sr_density_threshold",
1464 b"use revlog.data_config.sr_density_threshold",
1461 b"6.6",
1465 b"6.6",
1462 stacklevel=2,
1466 stacklevel=2,
1463 )
1467 )
1464 return self.data_config.sr_density_threshold
1468 return self.data_config.sr_density_threshold
1465
1469
1466 @property
1470 @property
1467 def _srmingapsize(self):
1471 def _srmingapsize(self):
1468 """temporary compatibility proxy"""
1472 """temporary compatibility proxy"""
1469 util.nouideprecwarn(
1473 util.nouideprecwarn(
1470 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1474 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1471 )
1475 )
1472 return self.data_config.sr_min_gap_size
1476 return self.data_config.sr_min_gap_size
1473
1477
1474 @property
1478 @property
1475 def _compute_rank(self):
1479 def _compute_rank(self):
1476 """temporary compatibility proxy"""
1480 """temporary compatibility proxy"""
1477 util.nouideprecwarn(
1481 util.nouideprecwarn(
1478 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1482 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1479 )
1483 )
1480 return self.feature_config.compute_rank
1484 return self.feature_config.compute_rank
1481
1485
1482 @property
1486 @property
1483 def canonical_parent_order(self):
1487 def canonical_parent_order(self):
1484 """temporary compatibility proxy"""
1488 """temporary compatibility proxy"""
1485 util.nouideprecwarn(
1489 util.nouideprecwarn(
1486 b"use revlog.feature_config.canonical_parent_order",
1490 b"use revlog.feature_config.canonical_parent_order",
1487 b"6.6",
1491 b"6.6",
1488 stacklevel=2,
1492 stacklevel=2,
1489 )
1493 )
1490 return self.feature_config.canonical_parent_order
1494 return self.feature_config.canonical_parent_order
1491
1495
1492 @property
1496 @property
1493 def _lazydelta(self):
1497 def _lazydelta(self):
1494 """temporary compatibility proxy"""
1498 """temporary compatibility proxy"""
1495 util.nouideprecwarn(
1499 util.nouideprecwarn(
1496 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1500 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1497 )
1501 )
1498 return self.delta_config.lazy_delta
1502 return self.delta_config.lazy_delta
1499
1503
1500 @property
1504 @property
1501 def _lazydeltabase(self):
1505 def _lazydeltabase(self):
1502 """temporary compatibility proxy"""
1506 """temporary compatibility proxy"""
1503 util.nouideprecwarn(
1507 util.nouideprecwarn(
1504 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1508 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1505 )
1509 )
1506 return self.delta_config.lazy_delta_base
1510 return self.delta_config.lazy_delta_base
1507
1511
1508 def _init_opts(self):
1512 def _init_opts(self):
1509 """process options (from above/config) to setup associated default revlog mode
1513 """process options (from above/config) to setup associated default revlog mode
1510
1514
1511 These values might be affected when actually reading on disk information.
1515 These values might be affected when actually reading on disk information.
1512
1516
1513 The relevant values are returned for use in _loadindex().
1517 The relevant values are returned for use in _loadindex().
1514
1518
1515 * newversionflags:
1519 * newversionflags:
1516 version header to use if we need to create a new revlog
1520 version header to use if we need to create a new revlog
1517
1521
1518 * mmapindexthreshold:
1522 * mmapindexthreshold:
1519 minimal index size for start to use mmap
1523 minimal index size for start to use mmap
1520
1524
1521 * force_nodemap:
1525 * force_nodemap:
1522 force the usage of a "development" version of the nodemap code
1526 force the usage of a "development" version of the nodemap code
1523 """
1527 """
1524 opts = self.opener.options
1528 opts = self.opener.options
1525
1529
1526 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1530 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1527 new_header = CHANGELOGV2
1531 new_header = CHANGELOGV2
1528 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1532 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1529 self.feature_config.compute_rank = compute_rank
1533 self.feature_config.compute_rank = compute_rank
1530 elif b'revlogv2' in opts:
1534 elif b'revlogv2' in opts:
1531 new_header = REVLOGV2
1535 new_header = REVLOGV2
1532 elif b'revlogv1' in opts:
1536 elif b'revlogv1' in opts:
1533 new_header = REVLOGV1 | FLAG_INLINE_DATA
1537 new_header = REVLOGV1 | FLAG_INLINE_DATA
1534 if b'generaldelta' in opts:
1538 if b'generaldelta' in opts:
1535 new_header |= FLAG_GENERALDELTA
1539 new_header |= FLAG_GENERALDELTA
1536 elif b'revlogv0' in self.opener.options:
1540 elif b'revlogv0' in self.opener.options:
1537 new_header = REVLOGV0
1541 new_header = REVLOGV0
1538 else:
1542 else:
1539 new_header = REVLOG_DEFAULT_VERSION
1543 new_header = REVLOG_DEFAULT_VERSION
1540
1544
1541 mmapindexthreshold = None
1545 mmapindexthreshold = None
1542 if self.data_config.mmap_large_index:
1546 if self.data_config.mmap_large_index:
1543 mmapindexthreshold = self.data_config.mmap_index_threshold
1547 mmapindexthreshold = self.data_config.mmap_index_threshold
1544 if self.feature_config.enable_ellipsis:
1548 if self.feature_config.enable_ellipsis:
1545 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1549 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1546
1550
1547 # revlog v0 doesn't have flag processors
1551 # revlog v0 doesn't have flag processors
1548 for flag, processor in opts.get(b'flagprocessors', {}).items():
1552 for flag, processor in opts.get(b'flagprocessors', {}).items():
1549 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1553 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1550
1554
1551 chunk_cache_size = self.data_config.chunk_cache_size
1555 chunk_cache_size = self.data_config.chunk_cache_size
1552 if chunk_cache_size <= 0:
1556 if chunk_cache_size <= 0:
1553 raise error.RevlogError(
1557 raise error.RevlogError(
1554 _(b'revlog chunk cache size %r is not greater than 0')
1558 _(b'revlog chunk cache size %r is not greater than 0')
1555 % chunk_cache_size
1559 % chunk_cache_size
1556 )
1560 )
1557 elif chunk_cache_size & (chunk_cache_size - 1):
1561 elif chunk_cache_size & (chunk_cache_size - 1):
1558 raise error.RevlogError(
1562 raise error.RevlogError(
1559 _(b'revlog chunk cache size %r is not a power of 2')
1563 _(b'revlog chunk cache size %r is not a power of 2')
1560 % chunk_cache_size
1564 % chunk_cache_size
1561 )
1565 )
1562 force_nodemap = opts.get(b'devel-force-nodemap', False)
1566 force_nodemap = opts.get(b'devel-force-nodemap', False)
1563 return new_header, mmapindexthreshold, force_nodemap
1567 return new_header, mmapindexthreshold, force_nodemap
1564
1568
1565 def _get_data(self, filepath, mmap_threshold, size=None):
1569 def _get_data(self, filepath, mmap_threshold, size=None):
1566 """return a file content with or without mmap
1570 """return a file content with or without mmap
1567
1571
1568 If the file is missing return the empty string"""
1572 If the file is missing return the empty string"""
1569 try:
1573 try:
1570 with self.opener(filepath) as fp:
1574 with self.opener(filepath) as fp:
1571 if mmap_threshold is not None:
1575 if mmap_threshold is not None:
1572 file_size = self.opener.fstat(fp).st_size
1576 file_size = self.opener.fstat(fp).st_size
1573 if file_size >= mmap_threshold:
1577 if file_size >= mmap_threshold:
1574 if size is not None:
1578 if size is not None:
1575 # avoid potentiel mmap crash
1579 # avoid potentiel mmap crash
1576 size = min(file_size, size)
1580 size = min(file_size, size)
1577 # TODO: should .close() to release resources without
1581 # TODO: should .close() to release resources without
1578 # relying on Python GC
1582 # relying on Python GC
1579 if size is None:
1583 if size is None:
1580 return util.buffer(util.mmapread(fp))
1584 return util.buffer(util.mmapread(fp))
1581 else:
1585 else:
1582 return util.buffer(util.mmapread(fp, size))
1586 return util.buffer(util.mmapread(fp, size))
1583 if size is None:
1587 if size is None:
1584 return fp.read()
1588 return fp.read()
1585 else:
1589 else:
1586 return fp.read(size)
1590 return fp.read(size)
1587 except FileNotFoundError:
1591 except FileNotFoundError:
1588 return b''
1592 return b''
1589
1593
1590 def get_streams(self, max_linkrev, force_inline=False):
1594 def get_streams(self, max_linkrev, force_inline=False):
1591 """return a list of streams that represent this revlog
1595 """return a list of streams that represent this revlog
1592
1596
1593 This is used by stream-clone to do bytes to bytes copies of a repository.
1597 This is used by stream-clone to do bytes to bytes copies of a repository.
1594
1598
1595 This streams data for all revisions that refer to a changelog revision up
1599 This streams data for all revisions that refer to a changelog revision up
1596 to `max_linkrev`.
1600 to `max_linkrev`.
1597
1601
1598 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1602 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1599
1603
1600 It returns is a list of three-tuple:
1604 It returns is a list of three-tuple:
1601
1605
1602 [
1606 [
1603 (filename, bytes_stream, stream_size),
1607 (filename, bytes_stream, stream_size),
1604 …
1608 …
1605 ]
1609 ]
1606 """
1610 """
1607 n = len(self)
1611 n = len(self)
1608 index = self.index
1612 index = self.index
1609 while n > 0:
1613 while n > 0:
1610 linkrev = index[n - 1][4]
1614 linkrev = index[n - 1][4]
1611 if linkrev < max_linkrev:
1615 if linkrev < max_linkrev:
1612 break
1616 break
1613 # note: this loop will rarely go through multiple iterations, since
1617 # note: this loop will rarely go through multiple iterations, since
1614 # it only traverses commits created during the current streaming
1618 # it only traverses commits created during the current streaming
1615 # pull operation.
1619 # pull operation.
1616 #
1620 #
1617 # If this become a problem, using a binary search should cap the
1621 # If this become a problem, using a binary search should cap the
1618 # runtime of this.
1622 # runtime of this.
1619 n = n - 1
1623 n = n - 1
1620 if n == 0:
1624 if n == 0:
1621 # no data to send
1625 # no data to send
1622 return []
1626 return []
1623 index_size = n * index.entry_size
1627 index_size = n * index.entry_size
1624 data_size = self.end(n - 1)
1628 data_size = self.end(n - 1)
1625
1629
1626 # XXX we might have been split (or stripped) since the object
1630 # XXX we might have been split (or stripped) since the object
1627 # initialization, We need to close this race too, but having a way to
1631 # initialization, We need to close this race too, but having a way to
1628 # pre-open the file we feed to the revlog and never closing them before
1632 # pre-open the file we feed to the revlog and never closing them before
1629 # we are done streaming.
1633 # we are done streaming.
1630
1634
1631 if self._inline:
1635 if self._inline:
1632
1636
1633 def get_stream():
1637 def get_stream():
1634 with self.opener(self._indexfile, mode=b"r") as fp:
1638 with self.opener(self._indexfile, mode=b"r") as fp:
1635 yield None
1639 yield None
1636 size = index_size + data_size
1640 size = index_size + data_size
1637 if size <= 65536:
1641 if size <= 65536:
1638 yield fp.read(size)
1642 yield fp.read(size)
1639 else:
1643 else:
1640 yield from util.filechunkiter(fp, limit=size)
1644 yield from util.filechunkiter(fp, limit=size)
1641
1645
1642 inline_stream = get_stream()
1646 inline_stream = get_stream()
1643 next(inline_stream)
1647 next(inline_stream)
1644 return [
1648 return [
1645 (self._indexfile, inline_stream, index_size + data_size),
1649 (self._indexfile, inline_stream, index_size + data_size),
1646 ]
1650 ]
1647 elif force_inline:
1651 elif force_inline:
1648
1652
1649 def get_stream():
1653 def get_stream():
1650 with self.reading():
1654 with self.reading():
1651 yield None
1655 yield None
1652
1656
1653 for rev in range(n):
1657 for rev in range(n):
1654 idx = self.index.entry_binary(rev)
1658 idx = self.index.entry_binary(rev)
1655 if rev == 0 and self._docket is None:
1659 if rev == 0 and self._docket is None:
1656 # re-inject the inline flag
1660 # re-inject the inline flag
1657 header = self._format_flags
1661 header = self._format_flags
1658 header |= self._format_version
1662 header |= self._format_version
1659 header |= FLAG_INLINE_DATA
1663 header |= FLAG_INLINE_DATA
1660 header = self.index.pack_header(header)
1664 header = self.index.pack_header(header)
1661 idx = header + idx
1665 idx = header + idx
1662 yield idx
1666 yield idx
1663 yield self._inner.get_segment_for_revs(rev, rev)[1]
1667 yield self._inner.get_segment_for_revs(rev, rev)[1]
1664
1668
1665 inline_stream = get_stream()
1669 inline_stream = get_stream()
1666 next(inline_stream)
1670 next(inline_stream)
1667 return [
1671 return [
1668 (self._indexfile, inline_stream, index_size + data_size),
1672 (self._indexfile, inline_stream, index_size + data_size),
1669 ]
1673 ]
1670 else:
1674 else:
1671
1675
1672 def get_index_stream():
1676 def get_index_stream():
1673 with self.opener(self._indexfile, mode=b"r") as fp:
1677 with self.opener(self._indexfile, mode=b"r") as fp:
1674 yield None
1678 yield None
1675 if index_size <= 65536:
1679 if index_size <= 65536:
1676 yield fp.read(index_size)
1680 yield fp.read(index_size)
1677 else:
1681 else:
1678 yield from util.filechunkiter(fp, limit=index_size)
1682 yield from util.filechunkiter(fp, limit=index_size)
1679
1683
1680 def get_data_stream():
1684 def get_data_stream():
1681 with self._datafp() as fp:
1685 with self._datafp() as fp:
1682 yield None
1686 yield None
1683 if data_size <= 65536:
1687 if data_size <= 65536:
1684 yield fp.read(data_size)
1688 yield fp.read(data_size)
1685 else:
1689 else:
1686 yield from util.filechunkiter(fp, limit=data_size)
1690 yield from util.filechunkiter(fp, limit=data_size)
1687
1691
1688 index_stream = get_index_stream()
1692 index_stream = get_index_stream()
1689 next(index_stream)
1693 next(index_stream)
1690 data_stream = get_data_stream()
1694 data_stream = get_data_stream()
1691 next(data_stream)
1695 next(data_stream)
1692 return [
1696 return [
1693 (self._datafile, data_stream, data_size),
1697 (self._datafile, data_stream, data_size),
1694 (self._indexfile, index_stream, index_size),
1698 (self._indexfile, index_stream, index_size),
1695 ]
1699 ]
1696
1700
1697 def _loadindex(self, docket=None):
1701 def _loadindex(self, docket=None):
1698
1702
1699 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1703 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1700
1704
1701 if self.postfix is not None:
1705 if self.postfix is not None:
1702 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1706 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1703 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1707 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1704 entry_point = b'%s.i.a' % self.radix
1708 entry_point = b'%s.i.a' % self.radix
1705 elif self._try_split and self.opener.exists(self._split_index_file):
1709 elif self._try_split and self.opener.exists(self._split_index_file):
1706 entry_point = self._split_index_file
1710 entry_point = self._split_index_file
1707 else:
1711 else:
1708 entry_point = b'%s.i' % self.radix
1712 entry_point = b'%s.i' % self.radix
1709
1713
1710 if docket is not None:
1714 if docket is not None:
1711 self._docket = docket
1715 self._docket = docket
1712 self._docket_file = entry_point
1716 self._docket_file = entry_point
1713 else:
1717 else:
1714 self._initempty = True
1718 self._initempty = True
1715 entry_data = self._get_data(entry_point, mmapindexthreshold)
1719 entry_data = self._get_data(entry_point, mmapindexthreshold)
1716 if len(entry_data) > 0:
1720 if len(entry_data) > 0:
1717 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1721 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1718 self._initempty = False
1722 self._initempty = False
1719 else:
1723 else:
1720 header = new_header
1724 header = new_header
1721
1725
1722 self._format_flags = header & ~0xFFFF
1726 self._format_flags = header & ~0xFFFF
1723 self._format_version = header & 0xFFFF
1727 self._format_version = header & 0xFFFF
1724
1728
1725 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1729 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1726 if supported_flags is None:
1730 if supported_flags is None:
1727 msg = _(b'unknown version (%d) in revlog %s')
1731 msg = _(b'unknown version (%d) in revlog %s')
1728 msg %= (self._format_version, self.display_id)
1732 msg %= (self._format_version, self.display_id)
1729 raise error.RevlogError(msg)
1733 raise error.RevlogError(msg)
1730 elif self._format_flags & ~supported_flags:
1734 elif self._format_flags & ~supported_flags:
1731 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1735 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1732 display_flag = self._format_flags >> 16
1736 display_flag = self._format_flags >> 16
1733 msg %= (display_flag, self._format_version, self.display_id)
1737 msg %= (display_flag, self._format_version, self.display_id)
1734 raise error.RevlogError(msg)
1738 raise error.RevlogError(msg)
1735
1739
1736 features = FEATURES_BY_VERSION[self._format_version]
1740 features = FEATURES_BY_VERSION[self._format_version]
1737 self._inline = features[b'inline'](self._format_flags)
1741 self._inline = features[b'inline'](self._format_flags)
1738 self.delta_config.general_delta = features[b'generaldelta'](
1742 self.delta_config.general_delta = features[b'generaldelta'](
1739 self._format_flags
1743 self._format_flags
1740 )
1744 )
1741 self.feature_config.has_side_data = features[b'sidedata']
1745 self.feature_config.has_side_data = features[b'sidedata']
1742
1746
1743 if not features[b'docket']:
1747 if not features[b'docket']:
1744 self._indexfile = entry_point
1748 self._indexfile = entry_point
1745 index_data = entry_data
1749 index_data = entry_data
1746 else:
1750 else:
1747 self._docket_file = entry_point
1751 self._docket_file = entry_point
1748 if self._initempty:
1752 if self._initempty:
1749 self._docket = docketutil.default_docket(self, header)
1753 self._docket = docketutil.default_docket(self, header)
1750 else:
1754 else:
1751 self._docket = docketutil.parse_docket(
1755 self._docket = docketutil.parse_docket(
1752 self, entry_data, use_pending=self._trypending
1756 self, entry_data, use_pending=self._trypending
1753 )
1757 )
1754
1758
1755 if self._docket is not None:
1759 if self._docket is not None:
1756 self._indexfile = self._docket.index_filepath()
1760 self._indexfile = self._docket.index_filepath()
1757 index_data = b''
1761 index_data = b''
1758 index_size = self._docket.index_end
1762 index_size = self._docket.index_end
1759 if index_size > 0:
1763 if index_size > 0:
1760 index_data = self._get_data(
1764 index_data = self._get_data(
1761 self._indexfile, mmapindexthreshold, size=index_size
1765 self._indexfile, mmapindexthreshold, size=index_size
1762 )
1766 )
1763 if len(index_data) < index_size:
1767 if len(index_data) < index_size:
1764 msg = _(b'too few index data for %s: got %d, expected %d')
1768 msg = _(b'too few index data for %s: got %d, expected %d')
1765 msg %= (self.display_id, len(index_data), index_size)
1769 msg %= (self.display_id, len(index_data), index_size)
1766 raise error.RevlogError(msg)
1770 raise error.RevlogError(msg)
1767
1771
1768 self._inline = False
1772 self._inline = False
1769 # generaldelta implied by version 2 revlogs.
1773 # generaldelta implied by version 2 revlogs.
1770 self.delta_config.general_delta = True
1774 self.delta_config.general_delta = True
1771 # the logic for persistent nodemap will be dealt with within the
1775 # the logic for persistent nodemap will be dealt with within the
1772 # main docket, so disable it for now.
1776 # main docket, so disable it for now.
1773 self._nodemap_file = None
1777 self._nodemap_file = None
1774
1778
1775 if self._docket is not None:
1779 if self._docket is not None:
1776 self._datafile = self._docket.data_filepath()
1780 self._datafile = self._docket.data_filepath()
1777 self._sidedatafile = self._docket.sidedata_filepath()
1781 self._sidedatafile = self._docket.sidedata_filepath()
1778 elif self.postfix is None:
1782 elif self.postfix is None:
1779 self._datafile = b'%s.d' % self.radix
1783 self._datafile = b'%s.d' % self.radix
1780 else:
1784 else:
1781 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1785 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1782
1786
1783 self.nodeconstants = sha1nodeconstants
1787 self.nodeconstants = sha1nodeconstants
1784 self.nullid = self.nodeconstants.nullid
1788 self.nullid = self.nodeconstants.nullid
1785
1789
1786 # sparse-revlog can't be on without general-delta (issue6056)
1790 # sparse-revlog can't be on without general-delta (issue6056)
1787 if not self.delta_config.general_delta:
1791 if not self.delta_config.general_delta:
1788 self.delta_config.sparse_revlog = False
1792 self.delta_config.sparse_revlog = False
1789
1793
1790 self._storedeltachains = True
1794 self._storedeltachains = True
1791
1795
1792 devel_nodemap = (
1796 devel_nodemap = (
1793 self._nodemap_file
1797 self._nodemap_file
1794 and force_nodemap
1798 and force_nodemap
1795 and parse_index_v1_nodemap is not None
1799 and parse_index_v1_nodemap is not None
1796 )
1800 )
1797
1801
1798 use_rust_index = False
1802 use_rust_index = False
1799 if rustrevlog is not None:
1803 if rustrevlog is not None:
1800 if self._nodemap_file is not None:
1804 if self._nodemap_file is not None:
1801 use_rust_index = True
1805 use_rust_index = True
1802 else:
1806 else:
1803 use_rust_index = self.opener.options.get(b'rust.index')
1807 use_rust_index = self.opener.options.get(b'rust.index')
1804
1808
1805 self._parse_index = parse_index_v1
1809 self._parse_index = parse_index_v1
1806 if self._format_version == REVLOGV0:
1810 if self._format_version == REVLOGV0:
1807 self._parse_index = revlogv0.parse_index_v0
1811 self._parse_index = revlogv0.parse_index_v0
1808 elif self._format_version == REVLOGV2:
1812 elif self._format_version == REVLOGV2:
1809 self._parse_index = parse_index_v2
1813 self._parse_index = parse_index_v2
1810 elif self._format_version == CHANGELOGV2:
1814 elif self._format_version == CHANGELOGV2:
1811 self._parse_index = parse_index_cl_v2
1815 self._parse_index = parse_index_cl_v2
1812 elif devel_nodemap:
1816 elif devel_nodemap:
1813 self._parse_index = parse_index_v1_nodemap
1817 self._parse_index = parse_index_v1_nodemap
1814 elif use_rust_index:
1818 elif use_rust_index:
1815 self._parse_index = parse_index_v1_mixed
1819 self._parse_index = parse_index_v1_mixed
1816 try:
1820 try:
1817 d = self._parse_index(index_data, self._inline)
1821 d = self._parse_index(index_data, self._inline)
1818 index, chunkcache = d
1822 index, chunkcache = d
1819 use_nodemap = (
1823 use_nodemap = (
1820 not self._inline
1824 not self._inline
1821 and self._nodemap_file is not None
1825 and self._nodemap_file is not None
1822 and hasattr(index, 'update_nodemap_data')
1826 and hasattr(index, 'update_nodemap_data')
1823 )
1827 )
1824 if use_nodemap:
1828 if use_nodemap:
1825 nodemap_data = nodemaputil.persisted_data(self)
1829 nodemap_data = nodemaputil.persisted_data(self)
1826 if nodemap_data is not None:
1830 if nodemap_data is not None:
1827 docket = nodemap_data[0]
1831 docket = nodemap_data[0]
1828 if (
1832 if (
1829 len(d[0]) > docket.tip_rev
1833 len(d[0]) > docket.tip_rev
1830 and d[0][docket.tip_rev][7] == docket.tip_node
1834 and d[0][docket.tip_rev][7] == docket.tip_node
1831 ):
1835 ):
1832 # no changelog tampering
1836 # no changelog tampering
1833 self._nodemap_docket = docket
1837 self._nodemap_docket = docket
1834 index.update_nodemap_data(*nodemap_data)
1838 index.update_nodemap_data(*nodemap_data)
1835 except (ValueError, IndexError):
1839 except (ValueError, IndexError):
1836 raise error.RevlogError(
1840 raise error.RevlogError(
1837 _(b"index %s is corrupted") % self.display_id
1841 _(b"index %s is corrupted") % self.display_id
1838 )
1842 )
1839 self.index = index
1843 self.index = index
1840 # revnum -> (chain-length, sum-delta-length)
1844 # revnum -> (chain-length, sum-delta-length)
1841 self._chaininfocache = util.lrucachedict(500)
1845 self._chaininfocache = util.lrucachedict(500)
1842
1846
1843 return chunkcache
1847 return chunkcache
1844
1848
1845 def _load_inner(self, chunk_cache):
1849 def _load_inner(self, chunk_cache):
1846 if self._docket is None:
1850 if self._docket is None:
1847 default_compression_header = None
1851 default_compression_header = None
1848 else:
1852 else:
1849 default_compression_header = self._docket.default_compression_header
1853 default_compression_header = self._docket.default_compression_header
1850
1854
1851 self._inner = _InnerRevlog(
1855 self._inner = _InnerRevlog(
1852 opener=self.opener,
1856 opener=self.opener,
1853 index=self.index,
1857 index=self.index,
1854 index_file=self._indexfile,
1858 index_file=self._indexfile,
1855 data_file=self._datafile,
1859 data_file=self._datafile,
1856 sidedata_file=self._sidedatafile,
1860 sidedata_file=self._sidedatafile,
1857 inline=self._inline,
1861 inline=self._inline,
1858 data_config=self.data_config,
1862 data_config=self.data_config,
1859 delta_config=self.delta_config,
1863 delta_config=self.delta_config,
1860 feature_config=self.feature_config,
1864 feature_config=self.feature_config,
1861 chunk_cache=chunk_cache,
1865 chunk_cache=chunk_cache,
1862 default_compression_header=default_compression_header,
1866 default_compression_header=default_compression_header,
1863 )
1867 )
1864
1868
1865 def get_revlog(self):
1869 def get_revlog(self):
1866 """simple function to mirror API of other not-really-revlog API"""
1870 """simple function to mirror API of other not-really-revlog API"""
1867 return self
1871 return self
1868
1872
1869 @util.propertycache
1873 @util.propertycache
1870 def revlog_kind(self):
1874 def revlog_kind(self):
1871 return self.target[0]
1875 return self.target[0]
1872
1876
1873 @util.propertycache
1877 @util.propertycache
1874 def display_id(self):
1878 def display_id(self):
1875 """The public facing "ID" of the revlog that we use in message"""
1879 """The public facing "ID" of the revlog that we use in message"""
1876 if self.revlog_kind == KIND_FILELOG:
1880 if self.revlog_kind == KIND_FILELOG:
1877 # Reference the file without the "data/" prefix, so it is familiar
1881 # Reference the file without the "data/" prefix, so it is familiar
1878 # to the user.
1882 # to the user.
1879 return self.target[1]
1883 return self.target[1]
1880 else:
1884 else:
1881 return self.radix
1885 return self.radix
1882
1886
1883 def _datafp(self, mode=b'r'):
1887 def _datafp(self, mode=b'r'):
1884 """file object for the revlog's data file"""
1888 """file object for the revlog's data file"""
1885 return self.opener(self._datafile, mode=mode)
1889 return self.opener(self._datafile, mode=mode)
1886
1890
1887 def tiprev(self):
1891 def tiprev(self):
1888 return len(self.index) - 1
1892 return len(self.index) - 1
1889
1893
1890 def tip(self):
1894 def tip(self):
1891 return self.node(self.tiprev())
1895 return self.node(self.tiprev())
1892
1896
1893 def __contains__(self, rev):
1897 def __contains__(self, rev):
1894 return 0 <= rev < len(self)
1898 return 0 <= rev < len(self)
1895
1899
1896 def __len__(self):
1900 def __len__(self):
1897 return len(self.index)
1901 return len(self.index)
1898
1902
1899 def __iter__(self):
1903 def __iter__(self):
1900 return iter(range(len(self)))
1904 return iter(range(len(self)))
1901
1905
1902 def revs(self, start=0, stop=None):
1906 def revs(self, start=0, stop=None):
1903 """iterate over all rev in this revlog (from start to stop)"""
1907 """iterate over all rev in this revlog (from start to stop)"""
1904 return storageutil.iterrevs(len(self), start=start, stop=stop)
1908 return storageutil.iterrevs(len(self), start=start, stop=stop)
1905
1909
1906 def hasnode(self, node):
1910 def hasnode(self, node):
1907 try:
1911 try:
1908 self.rev(node)
1912 self.rev(node)
1909 return True
1913 return True
1910 except KeyError:
1914 except KeyError:
1911 return False
1915 return False
1912
1916
1913 def _candelta(self, baserev, rev):
1917 def _candelta(self, baserev, rev):
1914 """whether two revisions (baserev, rev) can be delta-ed or not"""
1918 """whether two revisions (baserev, rev) can be delta-ed or not"""
1915 # Disable delta if either rev requires a content-changing flag
1919 # Disable delta if either rev requires a content-changing flag
1916 # processor (ex. LFS). This is because such flag processor can alter
1920 # processor (ex. LFS). This is because such flag processor can alter
1917 # the rawtext content that the delta will be based on, and two clients
1921 # the rawtext content that the delta will be based on, and two clients
1918 # could have a same revlog node with different flags (i.e. different
1922 # could have a same revlog node with different flags (i.e. different
1919 # rawtext contents) and the delta could be incompatible.
1923 # rawtext contents) and the delta could be incompatible.
1920 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1924 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1921 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1925 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1922 ):
1926 ):
1923 return False
1927 return False
1924 return True
1928 return True
1925
1929
1926 def update_caches(self, transaction):
1930 def update_caches(self, transaction):
1927 """update on disk cache
1931 """update on disk cache
1928
1932
1929 If a transaction is passed, the update may be delayed to transaction
1933 If a transaction is passed, the update may be delayed to transaction
1930 commit."""
1934 commit."""
1931 if self._nodemap_file is not None:
1935 if self._nodemap_file is not None:
1932 if transaction is None:
1936 if transaction is None:
1933 nodemaputil.update_persistent_nodemap(self)
1937 nodemaputil.update_persistent_nodemap(self)
1934 else:
1938 else:
1935 nodemaputil.setup_persistent_nodemap(transaction, self)
1939 nodemaputil.setup_persistent_nodemap(transaction, self)
1936
1940
1937 def clearcaches(self):
1941 def clearcaches(self):
1938 """Clear in-memory caches"""
1942 """Clear in-memory caches"""
1939 self._chainbasecache.clear()
1943 self._chainbasecache.clear()
1940 self._inner.clear_cache()
1944 self._inner.clear_cache()
1941 self._pcache = {}
1945 self._pcache = {}
1942 self._nodemap_docket = None
1946 self._nodemap_docket = None
1943 self.index.clearcaches()
1947 self.index.clearcaches()
1944 # The python code is the one responsible for validating the docket, we
1948 # The python code is the one responsible for validating the docket, we
1945 # end up having to refresh it here.
1949 # end up having to refresh it here.
1946 use_nodemap = (
1950 use_nodemap = (
1947 not self._inline
1951 not self._inline
1948 and self._nodemap_file is not None
1952 and self._nodemap_file is not None
1949 and hasattr(self.index, 'update_nodemap_data')
1953 and hasattr(self.index, 'update_nodemap_data')
1950 )
1954 )
1951 if use_nodemap:
1955 if use_nodemap:
1952 nodemap_data = nodemaputil.persisted_data(self)
1956 nodemap_data = nodemaputil.persisted_data(self)
1953 if nodemap_data is not None:
1957 if nodemap_data is not None:
1954 self._nodemap_docket = nodemap_data[0]
1958 self._nodemap_docket = nodemap_data[0]
1955 self.index.update_nodemap_data(*nodemap_data)
1959 self.index.update_nodemap_data(*nodemap_data)
1956
1960
1957 def rev(self, node):
1961 def rev(self, node):
1958 """return the revision number associated with a <nodeid>"""
1962 """return the revision number associated with a <nodeid>"""
1959 try:
1963 try:
1960 return self.index.rev(node)
1964 return self.index.rev(node)
1961 except TypeError:
1965 except TypeError:
1962 raise
1966 raise
1963 except error.RevlogError:
1967 except error.RevlogError:
1964 # parsers.c radix tree lookup failed
1968 # parsers.c radix tree lookup failed
1965 if (
1969 if (
1966 node == self.nodeconstants.wdirid
1970 node == self.nodeconstants.wdirid
1967 or node in self.nodeconstants.wdirfilenodeids
1971 or node in self.nodeconstants.wdirfilenodeids
1968 ):
1972 ):
1969 raise error.WdirUnsupported
1973 raise error.WdirUnsupported
1970 raise error.LookupError(node, self.display_id, _(b'no node'))
1974 raise error.LookupError(node, self.display_id, _(b'no node'))
1971
1975
1972 # Accessors for index entries.
1976 # Accessors for index entries.
1973
1977
1974 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1978 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1975 # are flags.
1979 # are flags.
1976 def start(self, rev):
1980 def start(self, rev):
1977 return int(self.index[rev][0] >> 16)
1981 return int(self.index[rev][0] >> 16)
1978
1982
1979 def sidedata_cut_off(self, rev):
1983 def sidedata_cut_off(self, rev):
1980 sd_cut_off = self.index[rev][8]
1984 sd_cut_off = self.index[rev][8]
1981 if sd_cut_off != 0:
1985 if sd_cut_off != 0:
1982 return sd_cut_off
1986 return sd_cut_off
1983 # This is some annoying dance, because entries without sidedata
1987 # This is some annoying dance, because entries without sidedata
1984 # currently use 0 as their ofsset. (instead of previous-offset +
1988 # currently use 0 as their ofsset. (instead of previous-offset +
1985 # previous-size)
1989 # previous-size)
1986 #
1990 #
1987 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1991 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1988 # In the meantime, we need this.
1992 # In the meantime, we need this.
1989 while 0 <= rev:
1993 while 0 <= rev:
1990 e = self.index[rev]
1994 e = self.index[rev]
1991 if e[9] != 0:
1995 if e[9] != 0:
1992 return e[8] + e[9]
1996 return e[8] + e[9]
1993 rev -= 1
1997 rev -= 1
1994 return 0
1998 return 0
1995
1999
1996 def flags(self, rev):
2000 def flags(self, rev):
1997 return self.index[rev][0] & 0xFFFF
2001 return self.index[rev][0] & 0xFFFF
1998
2002
1999 def length(self, rev):
2003 def length(self, rev):
2000 return self.index[rev][1]
2004 return self.index[rev][1]
2001
2005
2002 def sidedata_length(self, rev):
2006 def sidedata_length(self, rev):
2003 if not self.feature_config.has_side_data:
2007 if not self.feature_config.has_side_data:
2004 return 0
2008 return 0
2005 return self.index[rev][9]
2009 return self.index[rev][9]
2006
2010
2007 def rawsize(self, rev):
2011 def rawsize(self, rev):
2008 """return the length of the uncompressed text for a given revision"""
2012 """return the length of the uncompressed text for a given revision"""
2009 l = self.index[rev][2]
2013 l = self.index[rev][2]
2010 if l >= 0:
2014 if l >= 0:
2011 return l
2015 return l
2012
2016
2013 t = self.rawdata(rev)
2017 t = self.rawdata(rev)
2014 return len(t)
2018 return len(t)
2015
2019
2016 def size(self, rev):
2020 def size(self, rev):
2017 """length of non-raw text (processed by a "read" flag processor)"""
2021 """length of non-raw text (processed by a "read" flag processor)"""
2018 # fast path: if no "read" flag processor could change the content,
2022 # fast path: if no "read" flag processor could change the content,
2019 # size is rawsize. note: ELLIPSIS is known to not change the content.
2023 # size is rawsize. note: ELLIPSIS is known to not change the content.
2020 flags = self.flags(rev)
2024 flags = self.flags(rev)
2021 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
2025 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
2022 return self.rawsize(rev)
2026 return self.rawsize(rev)
2023
2027
2024 return len(self.revision(rev))
2028 return len(self.revision(rev))
2025
2029
2026 def fast_rank(self, rev):
2030 def fast_rank(self, rev):
2027 """Return the rank of a revision if already known, or None otherwise.
2031 """Return the rank of a revision if already known, or None otherwise.
2028
2032
2029 The rank of a revision is the size of the sub-graph it defines as a
2033 The rank of a revision is the size of the sub-graph it defines as a
2030 head. Equivalently, the rank of a revision `r` is the size of the set
2034 head. Equivalently, the rank of a revision `r` is the size of the set
2031 `ancestors(r)`, `r` included.
2035 `ancestors(r)`, `r` included.
2032
2036
2033 This method returns the rank retrieved from the revlog in constant
2037 This method returns the rank retrieved from the revlog in constant
2034 time. It makes no attempt at computing unknown values for versions of
2038 time. It makes no attempt at computing unknown values for versions of
2035 the revlog which do not persist the rank.
2039 the revlog which do not persist the rank.
2036 """
2040 """
2037 rank = self.index[rev][ENTRY_RANK]
2041 rank = self.index[rev][ENTRY_RANK]
2038 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
2042 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
2039 return None
2043 return None
2040 if rev == nullrev:
2044 if rev == nullrev:
2041 return 0 # convention
2045 return 0 # convention
2042 return rank
2046 return rank
2043
2047
2044 def chainbase(self, rev):
2048 def chainbase(self, rev):
2045 base = self._chainbasecache.get(rev)
2049 base = self._chainbasecache.get(rev)
2046 if base is not None:
2050 if base is not None:
2047 return base
2051 return base
2048
2052
2049 index = self.index
2053 index = self.index
2050 iterrev = rev
2054 iterrev = rev
2051 base = index[iterrev][3]
2055 base = index[iterrev][3]
2052 while base != iterrev:
2056 while base != iterrev:
2053 iterrev = base
2057 iterrev = base
2054 base = index[iterrev][3]
2058 base = index[iterrev][3]
2055
2059
2056 self._chainbasecache[rev] = base
2060 self._chainbasecache[rev] = base
2057 return base
2061 return base
2058
2062
2059 def linkrev(self, rev):
2063 def linkrev(self, rev):
2060 return self.index[rev][4]
2064 return self.index[rev][4]
2061
2065
2062 def parentrevs(self, rev):
2066 def parentrevs(self, rev):
2063 try:
2067 try:
2064 entry = self.index[rev]
2068 entry = self.index[rev]
2065 except IndexError:
2069 except IndexError:
2066 if rev == wdirrev:
2070 if rev == wdirrev:
2067 raise error.WdirUnsupported
2071 raise error.WdirUnsupported
2068 raise
2072 raise
2069
2073
2070 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
2074 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
2071 return entry[6], entry[5]
2075 return entry[6], entry[5]
2072 else:
2076 else:
2073 return entry[5], entry[6]
2077 return entry[5], entry[6]
2074
2078
2075 # fast parentrevs(rev) where rev isn't filtered
2079 # fast parentrevs(rev) where rev isn't filtered
2076 _uncheckedparentrevs = parentrevs
2080 _uncheckedparentrevs = parentrevs
2077
2081
2078 def node(self, rev):
2082 def node(self, rev):
2079 try:
2083 try:
2080 return self.index[rev][7]
2084 return self.index[rev][7]
2081 except IndexError:
2085 except IndexError:
2082 if rev == wdirrev:
2086 if rev == wdirrev:
2083 raise error.WdirUnsupported
2087 raise error.WdirUnsupported
2084 raise
2088 raise
2085
2089
2086 # Derived from index values.
2090 # Derived from index values.
2087
2091
2088 def end(self, rev):
2092 def end(self, rev):
2089 return self.start(rev) + self.length(rev)
2093 return self.start(rev) + self.length(rev)
2090
2094
2091 def parents(self, node):
2095 def parents(self, node):
2092 i = self.index
2096 i = self.index
2093 d = i[self.rev(node)]
2097 d = i[self.rev(node)]
2094 # inline node() to avoid function call overhead
2098 # inline node() to avoid function call overhead
2095 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2099 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2096 return i[d[6]][7], i[d[5]][7]
2100 return i[d[6]][7], i[d[5]][7]
2097 else:
2101 else:
2098 return i[d[5]][7], i[d[6]][7]
2102 return i[d[5]][7], i[d[6]][7]
2099
2103
2100 def chainlen(self, rev):
2104 def chainlen(self, rev):
2101 return self._chaininfo(rev)[0]
2105 return self._chaininfo(rev)[0]
2102
2106
2103 def _chaininfo(self, rev):
2107 def _chaininfo(self, rev):
2104 chaininfocache = self._chaininfocache
2108 chaininfocache = self._chaininfocache
2105 if rev in chaininfocache:
2109 if rev in chaininfocache:
2106 return chaininfocache[rev]
2110 return chaininfocache[rev]
2107 index = self.index
2111 index = self.index
2108 generaldelta = self.delta_config.general_delta
2112 generaldelta = self.delta_config.general_delta
2109 iterrev = rev
2113 iterrev = rev
2110 e = index[iterrev]
2114 e = index[iterrev]
2111 clen = 0
2115 clen = 0
2112 compresseddeltalen = 0
2116 compresseddeltalen = 0
2113 while iterrev != e[3]:
2117 while iterrev != e[3]:
2114 clen += 1
2118 clen += 1
2115 compresseddeltalen += e[1]
2119 compresseddeltalen += e[1]
2116 if generaldelta:
2120 if generaldelta:
2117 iterrev = e[3]
2121 iterrev = e[3]
2118 else:
2122 else:
2119 iterrev -= 1
2123 iterrev -= 1
2120 if iterrev in chaininfocache:
2124 if iterrev in chaininfocache:
2121 t = chaininfocache[iterrev]
2125 t = chaininfocache[iterrev]
2122 clen += t[0]
2126 clen += t[0]
2123 compresseddeltalen += t[1]
2127 compresseddeltalen += t[1]
2124 break
2128 break
2125 e = index[iterrev]
2129 e = index[iterrev]
2126 else:
2130 else:
2127 # Add text length of base since decompressing that also takes
2131 # Add text length of base since decompressing that also takes
2128 # work. For cache hits the length is already included.
2132 # work. For cache hits the length is already included.
2129 compresseddeltalen += e[1]
2133 compresseddeltalen += e[1]
2130 r = (clen, compresseddeltalen)
2134 r = (clen, compresseddeltalen)
2131 chaininfocache[rev] = r
2135 chaininfocache[rev] = r
2132 return r
2136 return r
2133
2137
2134 def _deltachain(self, rev, stoprev=None):
2138 def _deltachain(self, rev, stoprev=None):
2135 return self._inner._deltachain(rev, stoprev=stoprev)
2139 return self._inner._deltachain(rev, stoprev=stoprev)
2136
2140
2137 def ancestors(self, revs, stoprev=0, inclusive=False):
2141 def ancestors(self, revs, stoprev=0, inclusive=False):
2138 """Generate the ancestors of 'revs' in reverse revision order.
2142 """Generate the ancestors of 'revs' in reverse revision order.
2139 Does not generate revs lower than stoprev.
2143 Does not generate revs lower than stoprev.
2140
2144
2141 See the documentation for ancestor.lazyancestors for more details."""
2145 See the documentation for ancestor.lazyancestors for more details."""
2142
2146
2143 # first, make sure start revisions aren't filtered
2147 # first, make sure start revisions aren't filtered
2144 revs = list(revs)
2148 revs = list(revs)
2145 checkrev = self.node
2149 checkrev = self.node
2146 for r in revs:
2150 for r in revs:
2147 checkrev(r)
2151 checkrev(r)
2148 # and we're sure ancestors aren't filtered as well
2152 # and we're sure ancestors aren't filtered as well
2149
2153
2150 if rustancestor is not None and self.index.rust_ext_compat:
2154 if rustancestor is not None and self.index.rust_ext_compat:
2151 lazyancestors = rustancestor.LazyAncestors
2155 lazyancestors = rustancestor.LazyAncestors
2152 arg = self.index
2156 arg = self.index
2153 else:
2157 else:
2154 lazyancestors = ancestor.lazyancestors
2158 lazyancestors = ancestor.lazyancestors
2155 arg = self._uncheckedparentrevs
2159 arg = self._uncheckedparentrevs
2156 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2160 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2157
2161
2158 def descendants(self, revs):
2162 def descendants(self, revs):
2159 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2163 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2160
2164
2161 def findcommonmissing(self, common=None, heads=None):
2165 def findcommonmissing(self, common=None, heads=None):
2162 """Return a tuple of the ancestors of common and the ancestors of heads
2166 """Return a tuple of the ancestors of common and the ancestors of heads
2163 that are not ancestors of common. In revset terminology, we return the
2167 that are not ancestors of common. In revset terminology, we return the
2164 tuple:
2168 tuple:
2165
2169
2166 ::common, (::heads) - (::common)
2170 ::common, (::heads) - (::common)
2167
2171
2168 The list is sorted by revision number, meaning it is
2172 The list is sorted by revision number, meaning it is
2169 topologically sorted.
2173 topologically sorted.
2170
2174
2171 'heads' and 'common' are both lists of node IDs. If heads is
2175 'heads' and 'common' are both lists of node IDs. If heads is
2172 not supplied, uses all of the revlog's heads. If common is not
2176 not supplied, uses all of the revlog's heads. If common is not
2173 supplied, uses nullid."""
2177 supplied, uses nullid."""
2174 if common is None:
2178 if common is None:
2175 common = [self.nullid]
2179 common = [self.nullid]
2176 if heads is None:
2180 if heads is None:
2177 heads = self.heads()
2181 heads = self.heads()
2178
2182
2179 common = [self.rev(n) for n in common]
2183 common = [self.rev(n) for n in common]
2180 heads = [self.rev(n) for n in heads]
2184 heads = [self.rev(n) for n in heads]
2181
2185
2182 # we want the ancestors, but inclusive
2186 # we want the ancestors, but inclusive
2183 class lazyset:
2187 class lazyset:
2184 def __init__(self, lazyvalues):
2188 def __init__(self, lazyvalues):
2185 self.addedvalues = set()
2189 self.addedvalues = set()
2186 self.lazyvalues = lazyvalues
2190 self.lazyvalues = lazyvalues
2187
2191
2188 def __contains__(self, value):
2192 def __contains__(self, value):
2189 return value in self.addedvalues or value in self.lazyvalues
2193 return value in self.addedvalues or value in self.lazyvalues
2190
2194
2191 def __iter__(self):
2195 def __iter__(self):
2192 added = self.addedvalues
2196 added = self.addedvalues
2193 for r in added:
2197 for r in added:
2194 yield r
2198 yield r
2195 for r in self.lazyvalues:
2199 for r in self.lazyvalues:
2196 if not r in added:
2200 if not r in added:
2197 yield r
2201 yield r
2198
2202
2199 def add(self, value):
2203 def add(self, value):
2200 self.addedvalues.add(value)
2204 self.addedvalues.add(value)
2201
2205
2202 def update(self, values):
2206 def update(self, values):
2203 self.addedvalues.update(values)
2207 self.addedvalues.update(values)
2204
2208
2205 has = lazyset(self.ancestors(common))
2209 has = lazyset(self.ancestors(common))
2206 has.add(nullrev)
2210 has.add(nullrev)
2207 has.update(common)
2211 has.update(common)
2208
2212
2209 # take all ancestors from heads that aren't in has
2213 # take all ancestors from heads that aren't in has
2210 missing = set()
2214 missing = set()
2211 visit = collections.deque(r for r in heads if r not in has)
2215 visit = collections.deque(r for r in heads if r not in has)
2212 while visit:
2216 while visit:
2213 r = visit.popleft()
2217 r = visit.popleft()
2214 if r in missing:
2218 if r in missing:
2215 continue
2219 continue
2216 else:
2220 else:
2217 missing.add(r)
2221 missing.add(r)
2218 for p in self.parentrevs(r):
2222 for p in self.parentrevs(r):
2219 if p not in has:
2223 if p not in has:
2220 visit.append(p)
2224 visit.append(p)
2221 missing = list(missing)
2225 missing = list(missing)
2222 missing.sort()
2226 missing.sort()
2223 return has, [self.node(miss) for miss in missing]
2227 return has, [self.node(miss) for miss in missing]
2224
2228
2225 def incrementalmissingrevs(self, common=None):
2229 def incrementalmissingrevs(self, common=None):
2226 """Return an object that can be used to incrementally compute the
2230 """Return an object that can be used to incrementally compute the
2227 revision numbers of the ancestors of arbitrary sets that are not
2231 revision numbers of the ancestors of arbitrary sets that are not
2228 ancestors of common. This is an ancestor.incrementalmissingancestors
2232 ancestors of common. This is an ancestor.incrementalmissingancestors
2229 object.
2233 object.
2230
2234
2231 'common' is a list of revision numbers. If common is not supplied, uses
2235 'common' is a list of revision numbers. If common is not supplied, uses
2232 nullrev.
2236 nullrev.
2233 """
2237 """
2234 if common is None:
2238 if common is None:
2235 common = [nullrev]
2239 common = [nullrev]
2236
2240
2237 if rustancestor is not None and self.index.rust_ext_compat:
2241 if rustancestor is not None and self.index.rust_ext_compat:
2238 return rustancestor.MissingAncestors(self.index, common)
2242 return rustancestor.MissingAncestors(self.index, common)
2239 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2243 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2240
2244
2241 def findmissingrevs(self, common=None, heads=None):
2245 def findmissingrevs(self, common=None, heads=None):
2242 """Return the revision numbers of the ancestors of heads that
2246 """Return the revision numbers of the ancestors of heads that
2243 are not ancestors of common.
2247 are not ancestors of common.
2244
2248
2245 More specifically, return a list of revision numbers corresponding to
2249 More specifically, return a list of revision numbers corresponding to
2246 nodes N such that every N satisfies the following constraints:
2250 nodes N such that every N satisfies the following constraints:
2247
2251
2248 1. N is an ancestor of some node in 'heads'
2252 1. N is an ancestor of some node in 'heads'
2249 2. N is not an ancestor of any node in 'common'
2253 2. N is not an ancestor of any node in 'common'
2250
2254
2251 The list is sorted by revision number, meaning it is
2255 The list is sorted by revision number, meaning it is
2252 topologically sorted.
2256 topologically sorted.
2253
2257
2254 'heads' and 'common' are both lists of revision numbers. If heads is
2258 'heads' and 'common' are both lists of revision numbers. If heads is
2255 not supplied, uses all of the revlog's heads. If common is not
2259 not supplied, uses all of the revlog's heads. If common is not
2256 supplied, uses nullid."""
2260 supplied, uses nullid."""
2257 if common is None:
2261 if common is None:
2258 common = [nullrev]
2262 common = [nullrev]
2259 if heads is None:
2263 if heads is None:
2260 heads = self.headrevs()
2264 heads = self.headrevs()
2261
2265
2262 inc = self.incrementalmissingrevs(common=common)
2266 inc = self.incrementalmissingrevs(common=common)
2263 return inc.missingancestors(heads)
2267 return inc.missingancestors(heads)
2264
2268
2265 def findmissing(self, common=None, heads=None):
2269 def findmissing(self, common=None, heads=None):
2266 """Return the ancestors of heads that are not ancestors of common.
2270 """Return the ancestors of heads that are not ancestors of common.
2267
2271
2268 More specifically, return a list of nodes N such that every N
2272 More specifically, return a list of nodes N such that every N
2269 satisfies the following constraints:
2273 satisfies the following constraints:
2270
2274
2271 1. N is an ancestor of some node in 'heads'
2275 1. N is an ancestor of some node in 'heads'
2272 2. N is not an ancestor of any node in 'common'
2276 2. N is not an ancestor of any node in 'common'
2273
2277
2274 The list is sorted by revision number, meaning it is
2278 The list is sorted by revision number, meaning it is
2275 topologically sorted.
2279 topologically sorted.
2276
2280
2277 'heads' and 'common' are both lists of node IDs. If heads is
2281 'heads' and 'common' are both lists of node IDs. If heads is
2278 not supplied, uses all of the revlog's heads. If common is not
2282 not supplied, uses all of the revlog's heads. If common is not
2279 supplied, uses nullid."""
2283 supplied, uses nullid."""
2280 if common is None:
2284 if common is None:
2281 common = [self.nullid]
2285 common = [self.nullid]
2282 if heads is None:
2286 if heads is None:
2283 heads = self.heads()
2287 heads = self.heads()
2284
2288
2285 common = [self.rev(n) for n in common]
2289 common = [self.rev(n) for n in common]
2286 heads = [self.rev(n) for n in heads]
2290 heads = [self.rev(n) for n in heads]
2287
2291
2288 inc = self.incrementalmissingrevs(common=common)
2292 inc = self.incrementalmissingrevs(common=common)
2289 return [self.node(r) for r in inc.missingancestors(heads)]
2293 return [self.node(r) for r in inc.missingancestors(heads)]
2290
2294
2291 def nodesbetween(self, roots=None, heads=None):
2295 def nodesbetween(self, roots=None, heads=None):
2292 """Return a topological path from 'roots' to 'heads'.
2296 """Return a topological path from 'roots' to 'heads'.
2293
2297
2294 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2298 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2295 topologically sorted list of all nodes N that satisfy both of
2299 topologically sorted list of all nodes N that satisfy both of
2296 these constraints:
2300 these constraints:
2297
2301
2298 1. N is a descendant of some node in 'roots'
2302 1. N is a descendant of some node in 'roots'
2299 2. N is an ancestor of some node in 'heads'
2303 2. N is an ancestor of some node in 'heads'
2300
2304
2301 Every node is considered to be both a descendant and an ancestor
2305 Every node is considered to be both a descendant and an ancestor
2302 of itself, so every reachable node in 'roots' and 'heads' will be
2306 of itself, so every reachable node in 'roots' and 'heads' will be
2303 included in 'nodes'.
2307 included in 'nodes'.
2304
2308
2305 'outroots' is the list of reachable nodes in 'roots', i.e., the
2309 'outroots' is the list of reachable nodes in 'roots', i.e., the
2306 subset of 'roots' that is returned in 'nodes'. Likewise,
2310 subset of 'roots' that is returned in 'nodes'. Likewise,
2307 'outheads' is the subset of 'heads' that is also in 'nodes'.
2311 'outheads' is the subset of 'heads' that is also in 'nodes'.
2308
2312
2309 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2313 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2310 unspecified, uses nullid as the only root. If 'heads' is
2314 unspecified, uses nullid as the only root. If 'heads' is
2311 unspecified, uses list of all of the revlog's heads."""
2315 unspecified, uses list of all of the revlog's heads."""
2312 nonodes = ([], [], [])
2316 nonodes = ([], [], [])
2313 if roots is not None:
2317 if roots is not None:
2314 roots = list(roots)
2318 roots = list(roots)
2315 if not roots:
2319 if not roots:
2316 return nonodes
2320 return nonodes
2317 lowestrev = min([self.rev(n) for n in roots])
2321 lowestrev = min([self.rev(n) for n in roots])
2318 else:
2322 else:
2319 roots = [self.nullid] # Everybody's a descendant of nullid
2323 roots = [self.nullid] # Everybody's a descendant of nullid
2320 lowestrev = nullrev
2324 lowestrev = nullrev
2321 if (lowestrev == nullrev) and (heads is None):
2325 if (lowestrev == nullrev) and (heads is None):
2322 # We want _all_ the nodes!
2326 # We want _all_ the nodes!
2323 return (
2327 return (
2324 [self.node(r) for r in self],
2328 [self.node(r) for r in self],
2325 [self.nullid],
2329 [self.nullid],
2326 list(self.heads()),
2330 list(self.heads()),
2327 )
2331 )
2328 if heads is None:
2332 if heads is None:
2329 # All nodes are ancestors, so the latest ancestor is the last
2333 # All nodes are ancestors, so the latest ancestor is the last
2330 # node.
2334 # node.
2331 highestrev = len(self) - 1
2335 highestrev = len(self) - 1
2332 # Set ancestors to None to signal that every node is an ancestor.
2336 # Set ancestors to None to signal that every node is an ancestor.
2333 ancestors = None
2337 ancestors = None
2334 # Set heads to an empty dictionary for later discovery of heads
2338 # Set heads to an empty dictionary for later discovery of heads
2335 heads = {}
2339 heads = {}
2336 else:
2340 else:
2337 heads = list(heads)
2341 heads = list(heads)
2338 if not heads:
2342 if not heads:
2339 return nonodes
2343 return nonodes
2340 ancestors = set()
2344 ancestors = set()
2341 # Turn heads into a dictionary so we can remove 'fake' heads.
2345 # Turn heads into a dictionary so we can remove 'fake' heads.
2342 # Also, later we will be using it to filter out the heads we can't
2346 # Also, later we will be using it to filter out the heads we can't
2343 # find from roots.
2347 # find from roots.
2344 heads = dict.fromkeys(heads, False)
2348 heads = dict.fromkeys(heads, False)
2345 # Start at the top and keep marking parents until we're done.
2349 # Start at the top and keep marking parents until we're done.
2346 nodestotag = set(heads)
2350 nodestotag = set(heads)
2347 # Remember where the top was so we can use it as a limit later.
2351 # Remember where the top was so we can use it as a limit later.
2348 highestrev = max([self.rev(n) for n in nodestotag])
2352 highestrev = max([self.rev(n) for n in nodestotag])
2349 while nodestotag:
2353 while nodestotag:
2350 # grab a node to tag
2354 # grab a node to tag
2351 n = nodestotag.pop()
2355 n = nodestotag.pop()
2352 # Never tag nullid
2356 # Never tag nullid
2353 if n == self.nullid:
2357 if n == self.nullid:
2354 continue
2358 continue
2355 # A node's revision number represents its place in a
2359 # A node's revision number represents its place in a
2356 # topologically sorted list of nodes.
2360 # topologically sorted list of nodes.
2357 r = self.rev(n)
2361 r = self.rev(n)
2358 if r >= lowestrev:
2362 if r >= lowestrev:
2359 if n not in ancestors:
2363 if n not in ancestors:
2360 # If we are possibly a descendant of one of the roots
2364 # If we are possibly a descendant of one of the roots
2361 # and we haven't already been marked as an ancestor
2365 # and we haven't already been marked as an ancestor
2362 ancestors.add(n) # Mark as ancestor
2366 ancestors.add(n) # Mark as ancestor
2363 # Add non-nullid parents to list of nodes to tag.
2367 # Add non-nullid parents to list of nodes to tag.
2364 nodestotag.update(
2368 nodestotag.update(
2365 [p for p in self.parents(n) if p != self.nullid]
2369 [p for p in self.parents(n) if p != self.nullid]
2366 )
2370 )
2367 elif n in heads: # We've seen it before, is it a fake head?
2371 elif n in heads: # We've seen it before, is it a fake head?
2368 # So it is, real heads should not be the ancestors of
2372 # So it is, real heads should not be the ancestors of
2369 # any other heads.
2373 # any other heads.
2370 heads.pop(n)
2374 heads.pop(n)
2371 if not ancestors:
2375 if not ancestors:
2372 return nonodes
2376 return nonodes
2373 # Now that we have our set of ancestors, we want to remove any
2377 # Now that we have our set of ancestors, we want to remove any
2374 # roots that are not ancestors.
2378 # roots that are not ancestors.
2375
2379
2376 # If one of the roots was nullid, everything is included anyway.
2380 # If one of the roots was nullid, everything is included anyway.
2377 if lowestrev > nullrev:
2381 if lowestrev > nullrev:
2378 # But, since we weren't, let's recompute the lowest rev to not
2382 # But, since we weren't, let's recompute the lowest rev to not
2379 # include roots that aren't ancestors.
2383 # include roots that aren't ancestors.
2380
2384
2381 # Filter out roots that aren't ancestors of heads
2385 # Filter out roots that aren't ancestors of heads
2382 roots = [root for root in roots if root in ancestors]
2386 roots = [root for root in roots if root in ancestors]
2383 # Recompute the lowest revision
2387 # Recompute the lowest revision
2384 if roots:
2388 if roots:
2385 lowestrev = min([self.rev(root) for root in roots])
2389 lowestrev = min([self.rev(root) for root in roots])
2386 else:
2390 else:
2387 # No more roots? Return empty list
2391 # No more roots? Return empty list
2388 return nonodes
2392 return nonodes
2389 else:
2393 else:
2390 # We are descending from nullid, and don't need to care about
2394 # We are descending from nullid, and don't need to care about
2391 # any other roots.
2395 # any other roots.
2392 lowestrev = nullrev
2396 lowestrev = nullrev
2393 roots = [self.nullid]
2397 roots = [self.nullid]
2394 # Transform our roots list into a set.
2398 # Transform our roots list into a set.
2395 descendants = set(roots)
2399 descendants = set(roots)
2396 # Also, keep the original roots so we can filter out roots that aren't
2400 # Also, keep the original roots so we can filter out roots that aren't
2397 # 'real' roots (i.e. are descended from other roots).
2401 # 'real' roots (i.e. are descended from other roots).
2398 roots = descendants.copy()
2402 roots = descendants.copy()
2399 # Our topologically sorted list of output nodes.
2403 # Our topologically sorted list of output nodes.
2400 orderedout = []
2404 orderedout = []
2401 # Don't start at nullid since we don't want nullid in our output list,
2405 # Don't start at nullid since we don't want nullid in our output list,
2402 # and if nullid shows up in descendants, empty parents will look like
2406 # and if nullid shows up in descendants, empty parents will look like
2403 # they're descendants.
2407 # they're descendants.
2404 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2408 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2405 n = self.node(r)
2409 n = self.node(r)
2406 isdescendant = False
2410 isdescendant = False
2407 if lowestrev == nullrev: # Everybody is a descendant of nullid
2411 if lowestrev == nullrev: # Everybody is a descendant of nullid
2408 isdescendant = True
2412 isdescendant = True
2409 elif n in descendants:
2413 elif n in descendants:
2410 # n is already a descendant
2414 # n is already a descendant
2411 isdescendant = True
2415 isdescendant = True
2412 # This check only needs to be done here because all the roots
2416 # This check only needs to be done here because all the roots
2413 # will start being marked is descendants before the loop.
2417 # will start being marked is descendants before the loop.
2414 if n in roots:
2418 if n in roots:
2415 # If n was a root, check if it's a 'real' root.
2419 # If n was a root, check if it's a 'real' root.
2416 p = tuple(self.parents(n))
2420 p = tuple(self.parents(n))
2417 # If any of its parents are descendants, it's not a root.
2421 # If any of its parents are descendants, it's not a root.
2418 if (p[0] in descendants) or (p[1] in descendants):
2422 if (p[0] in descendants) or (p[1] in descendants):
2419 roots.remove(n)
2423 roots.remove(n)
2420 else:
2424 else:
2421 p = tuple(self.parents(n))
2425 p = tuple(self.parents(n))
2422 # A node is a descendant if either of its parents are
2426 # A node is a descendant if either of its parents are
2423 # descendants. (We seeded the dependents list with the roots
2427 # descendants. (We seeded the dependents list with the roots
2424 # up there, remember?)
2428 # up there, remember?)
2425 if (p[0] in descendants) or (p[1] in descendants):
2429 if (p[0] in descendants) or (p[1] in descendants):
2426 descendants.add(n)
2430 descendants.add(n)
2427 isdescendant = True
2431 isdescendant = True
2428 if isdescendant and ((ancestors is None) or (n in ancestors)):
2432 if isdescendant and ((ancestors is None) or (n in ancestors)):
2429 # Only include nodes that are both descendants and ancestors.
2433 # Only include nodes that are both descendants and ancestors.
2430 orderedout.append(n)
2434 orderedout.append(n)
2431 if (ancestors is not None) and (n in heads):
2435 if (ancestors is not None) and (n in heads):
2432 # We're trying to figure out which heads are reachable
2436 # We're trying to figure out which heads are reachable
2433 # from roots.
2437 # from roots.
2434 # Mark this head as having been reached
2438 # Mark this head as having been reached
2435 heads[n] = True
2439 heads[n] = True
2436 elif ancestors is None:
2440 elif ancestors is None:
2437 # Otherwise, we're trying to discover the heads.
2441 # Otherwise, we're trying to discover the heads.
2438 # Assume this is a head because if it isn't, the next step
2442 # Assume this is a head because if it isn't, the next step
2439 # will eventually remove it.
2443 # will eventually remove it.
2440 heads[n] = True
2444 heads[n] = True
2441 # But, obviously its parents aren't.
2445 # But, obviously its parents aren't.
2442 for p in self.parents(n):
2446 for p in self.parents(n):
2443 heads.pop(p, None)
2447 heads.pop(p, None)
2444 heads = [head for head, flag in heads.items() if flag]
2448 heads = [head for head, flag in heads.items() if flag]
2445 roots = list(roots)
2449 roots = list(roots)
2446 assert orderedout
2450 assert orderedout
2447 assert roots
2451 assert roots
2448 assert heads
2452 assert heads
2449 return (orderedout, roots, heads)
2453 return (orderedout, roots, heads)
2450
2454
2451 def headrevs(self, revs=None):
2455 def headrevs(self, revs=None):
2452 if revs is None:
2456 if revs is None:
2453 try:
2457 try:
2454 return self.index.headrevs()
2458 return self.index.headrevs()
2455 except AttributeError:
2459 except AttributeError:
2456 return self._headrevs()
2460 return self._headrevs()
2457 if rustdagop is not None and self.index.rust_ext_compat:
2461 if rustdagop is not None and self.index.rust_ext_compat:
2458 return rustdagop.headrevs(self.index, revs)
2462 return rustdagop.headrevs(self.index, revs)
2459 return dagop.headrevs(revs, self._uncheckedparentrevs)
2463 return dagop.headrevs(revs, self._uncheckedparentrevs)
2460
2464
2461 def computephases(self, roots):
2465 def computephases(self, roots):
2462 return self.index.computephasesmapsets(roots)
2466 return self.index.computephasesmapsets(roots)
2463
2467
2464 def _headrevs(self):
2468 def _headrevs(self):
2465 count = len(self)
2469 count = len(self)
2466 if not count:
2470 if not count:
2467 return [nullrev]
2471 return [nullrev]
2468 # we won't iter over filtered rev so nobody is a head at start
2472 # we won't iter over filtered rev so nobody is a head at start
2469 ishead = [0] * (count + 1)
2473 ishead = [0] * (count + 1)
2470 index = self.index
2474 index = self.index
2471 for r in self:
2475 for r in self:
2472 ishead[r] = 1 # I may be an head
2476 ishead[r] = 1 # I may be an head
2473 e = index[r]
2477 e = index[r]
2474 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2478 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2475 return [r for r, val in enumerate(ishead) if val]
2479 return [r for r, val in enumerate(ishead) if val]
2476
2480
2477 def heads(self, start=None, stop=None):
2481 def heads(self, start=None, stop=None):
2478 """return the list of all nodes that have no children
2482 """return the list of all nodes that have no children
2479
2483
2480 if start is specified, only heads that are descendants of
2484 if start is specified, only heads that are descendants of
2481 start will be returned
2485 start will be returned
2482 if stop is specified, it will consider all the revs from stop
2486 if stop is specified, it will consider all the revs from stop
2483 as if they had no children
2487 as if they had no children
2484 """
2488 """
2485 if start is None and stop is None:
2489 if start is None and stop is None:
2486 if not len(self):
2490 if not len(self):
2487 return [self.nullid]
2491 return [self.nullid]
2488 return [self.node(r) for r in self.headrevs()]
2492 return [self.node(r) for r in self.headrevs()]
2489
2493
2490 if start is None:
2494 if start is None:
2491 start = nullrev
2495 start = nullrev
2492 else:
2496 else:
2493 start = self.rev(start)
2497 start = self.rev(start)
2494
2498
2495 stoprevs = {self.rev(n) for n in stop or []}
2499 stoprevs = {self.rev(n) for n in stop or []}
2496
2500
2497 revs = dagop.headrevssubset(
2501 revs = dagop.headrevssubset(
2498 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2502 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2499 )
2503 )
2500
2504
2501 return [self.node(rev) for rev in revs]
2505 return [self.node(rev) for rev in revs]
2502
2506
2503 def children(self, node):
2507 def children(self, node):
2504 """find the children of a given node"""
2508 """find the children of a given node"""
2505 c = []
2509 c = []
2506 p = self.rev(node)
2510 p = self.rev(node)
2507 for r in self.revs(start=p + 1):
2511 for r in self.revs(start=p + 1):
2508 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2512 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2509 if prevs:
2513 if prevs:
2510 for pr in prevs:
2514 for pr in prevs:
2511 if pr == p:
2515 if pr == p:
2512 c.append(self.node(r))
2516 c.append(self.node(r))
2513 elif p == nullrev:
2517 elif p == nullrev:
2514 c.append(self.node(r))
2518 c.append(self.node(r))
2515 return c
2519 return c
2516
2520
2517 def commonancestorsheads(self, a, b):
2521 def commonancestorsheads(self, a, b):
2518 """calculate all the heads of the common ancestors of nodes a and b"""
2522 """calculate all the heads of the common ancestors of nodes a and b"""
2519 a, b = self.rev(a), self.rev(b)
2523 a, b = self.rev(a), self.rev(b)
2520 ancs = self._commonancestorsheads(a, b)
2524 ancs = self._commonancestorsheads(a, b)
2521 return pycompat.maplist(self.node, ancs)
2525 return pycompat.maplist(self.node, ancs)
2522
2526
2523 def _commonancestorsheads(self, *revs):
2527 def _commonancestorsheads(self, *revs):
2524 """calculate all the heads of the common ancestors of revs"""
2528 """calculate all the heads of the common ancestors of revs"""
2525 try:
2529 try:
2526 ancs = self.index.commonancestorsheads(*revs)
2530 ancs = self.index.commonancestorsheads(*revs)
2527 except (AttributeError, OverflowError): # C implementation failed
2531 except (AttributeError, OverflowError): # C implementation failed
2528 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2532 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2529 return ancs
2533 return ancs
2530
2534
2531 def isancestor(self, a, b):
2535 def isancestor(self, a, b):
2532 """return True if node a is an ancestor of node b
2536 """return True if node a is an ancestor of node b
2533
2537
2534 A revision is considered an ancestor of itself."""
2538 A revision is considered an ancestor of itself."""
2535 a, b = self.rev(a), self.rev(b)
2539 a, b = self.rev(a), self.rev(b)
2536 return self.isancestorrev(a, b)
2540 return self.isancestorrev(a, b)
2537
2541
2538 def isancestorrev(self, a, b):
2542 def isancestorrev(self, a, b):
2539 """return True if revision a is an ancestor of revision b
2543 """return True if revision a is an ancestor of revision b
2540
2544
2541 A revision is considered an ancestor of itself.
2545 A revision is considered an ancestor of itself.
2542
2546
2543 The implementation of this is trivial but the use of
2547 The implementation of this is trivial but the use of
2544 reachableroots is not."""
2548 reachableroots is not."""
2545 if a == nullrev:
2549 if a == nullrev:
2546 return True
2550 return True
2547 elif a == b:
2551 elif a == b:
2548 return True
2552 return True
2549 elif a > b:
2553 elif a > b:
2550 return False
2554 return False
2551 return bool(self.reachableroots(a, [b], [a], includepath=False))
2555 return bool(self.reachableroots(a, [b], [a], includepath=False))
2552
2556
2553 def reachableroots(self, minroot, heads, roots, includepath=False):
2557 def reachableroots(self, minroot, heads, roots, includepath=False):
2554 """return (heads(::(<roots> and <roots>::<heads>)))
2558 """return (heads(::(<roots> and <roots>::<heads>)))
2555
2559
2556 If includepath is True, return (<roots>::<heads>)."""
2560 If includepath is True, return (<roots>::<heads>)."""
2557 try:
2561 try:
2558 return self.index.reachableroots2(
2562 return self.index.reachableroots2(
2559 minroot, heads, roots, includepath
2563 minroot, heads, roots, includepath
2560 )
2564 )
2561 except AttributeError:
2565 except AttributeError:
2562 return dagop._reachablerootspure(
2566 return dagop._reachablerootspure(
2563 self.parentrevs, minroot, roots, heads, includepath
2567 self.parentrevs, minroot, roots, heads, includepath
2564 )
2568 )
2565
2569
2566 def ancestor(self, a, b):
2570 def ancestor(self, a, b):
2567 """calculate the "best" common ancestor of nodes a and b"""
2571 """calculate the "best" common ancestor of nodes a and b"""
2568
2572
2569 a, b = self.rev(a), self.rev(b)
2573 a, b = self.rev(a), self.rev(b)
2570 try:
2574 try:
2571 ancs = self.index.ancestors(a, b)
2575 ancs = self.index.ancestors(a, b)
2572 except (AttributeError, OverflowError):
2576 except (AttributeError, OverflowError):
2573 ancs = ancestor.ancestors(self.parentrevs, a, b)
2577 ancs = ancestor.ancestors(self.parentrevs, a, b)
2574 if ancs:
2578 if ancs:
2575 # choose a consistent winner when there's a tie
2579 # choose a consistent winner when there's a tie
2576 return min(map(self.node, ancs))
2580 return min(map(self.node, ancs))
2577 return self.nullid
2581 return self.nullid
2578
2582
2579 def _match(self, id):
2583 def _match(self, id):
2580 if isinstance(id, int):
2584 if isinstance(id, int):
2581 # rev
2585 # rev
2582 return self.node(id)
2586 return self.node(id)
2583 if len(id) == self.nodeconstants.nodelen:
2587 if len(id) == self.nodeconstants.nodelen:
2584 # possibly a binary node
2588 # possibly a binary node
2585 # odds of a binary node being all hex in ASCII are 1 in 10**25
2589 # odds of a binary node being all hex in ASCII are 1 in 10**25
2586 try:
2590 try:
2587 node = id
2591 node = id
2588 self.rev(node) # quick search the index
2592 self.rev(node) # quick search the index
2589 return node
2593 return node
2590 except error.LookupError:
2594 except error.LookupError:
2591 pass # may be partial hex id
2595 pass # may be partial hex id
2592 try:
2596 try:
2593 # str(rev)
2597 # str(rev)
2594 rev = int(id)
2598 rev = int(id)
2595 if b"%d" % rev != id:
2599 if b"%d" % rev != id:
2596 raise ValueError
2600 raise ValueError
2597 if rev < 0:
2601 if rev < 0:
2598 rev = len(self) + rev
2602 rev = len(self) + rev
2599 if rev < 0 or rev >= len(self):
2603 if rev < 0 or rev >= len(self):
2600 raise ValueError
2604 raise ValueError
2601 return self.node(rev)
2605 return self.node(rev)
2602 except (ValueError, OverflowError):
2606 except (ValueError, OverflowError):
2603 pass
2607 pass
2604 if len(id) == 2 * self.nodeconstants.nodelen:
2608 if len(id) == 2 * self.nodeconstants.nodelen:
2605 try:
2609 try:
2606 # a full hex nodeid?
2610 # a full hex nodeid?
2607 node = bin(id)
2611 node = bin(id)
2608 self.rev(node)
2612 self.rev(node)
2609 return node
2613 return node
2610 except (binascii.Error, error.LookupError):
2614 except (binascii.Error, error.LookupError):
2611 pass
2615 pass
2612
2616
2613 def _partialmatch(self, id):
2617 def _partialmatch(self, id):
2614 # we don't care wdirfilenodeids as they should be always full hash
2618 # we don't care wdirfilenodeids as they should be always full hash
2615 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2619 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2616 ambiguous = False
2620 ambiguous = False
2617 try:
2621 try:
2618 partial = self.index.partialmatch(id)
2622 partial = self.index.partialmatch(id)
2619 if partial and self.hasnode(partial):
2623 if partial and self.hasnode(partial):
2620 if maybewdir:
2624 if maybewdir:
2621 # single 'ff...' match in radix tree, ambiguous with wdir
2625 # single 'ff...' match in radix tree, ambiguous with wdir
2622 ambiguous = True
2626 ambiguous = True
2623 else:
2627 else:
2624 return partial
2628 return partial
2625 elif maybewdir:
2629 elif maybewdir:
2626 # no 'ff...' match in radix tree, wdir identified
2630 # no 'ff...' match in radix tree, wdir identified
2627 raise error.WdirUnsupported
2631 raise error.WdirUnsupported
2628 else:
2632 else:
2629 return None
2633 return None
2630 except error.RevlogError:
2634 except error.RevlogError:
2631 # parsers.c radix tree lookup gave multiple matches
2635 # parsers.c radix tree lookup gave multiple matches
2632 # fast path: for unfiltered changelog, radix tree is accurate
2636 # fast path: for unfiltered changelog, radix tree is accurate
2633 if not getattr(self, 'filteredrevs', None):
2637 if not getattr(self, 'filteredrevs', None):
2634 ambiguous = True
2638 ambiguous = True
2635 # fall through to slow path that filters hidden revisions
2639 # fall through to slow path that filters hidden revisions
2636 except (AttributeError, ValueError):
2640 except (AttributeError, ValueError):
2637 # we are pure python, or key is not hex
2641 # we are pure python, or key is not hex
2638 pass
2642 pass
2639 if ambiguous:
2643 if ambiguous:
2640 raise error.AmbiguousPrefixLookupError(
2644 raise error.AmbiguousPrefixLookupError(
2641 id, self.display_id, _(b'ambiguous identifier')
2645 id, self.display_id, _(b'ambiguous identifier')
2642 )
2646 )
2643
2647
2644 if id in self._pcache:
2648 if id in self._pcache:
2645 return self._pcache[id]
2649 return self._pcache[id]
2646
2650
2647 if len(id) <= 40:
2651 if len(id) <= 40:
2648 # hex(node)[:...]
2652 # hex(node)[:...]
2649 l = len(id) // 2 * 2 # grab an even number of digits
2653 l = len(id) // 2 * 2 # grab an even number of digits
2650 try:
2654 try:
2651 # we're dropping the last digit, so let's check that it's hex,
2655 # we're dropping the last digit, so let's check that it's hex,
2652 # to avoid the expensive computation below if it's not
2656 # to avoid the expensive computation below if it's not
2653 if len(id) % 2 > 0:
2657 if len(id) % 2 > 0:
2654 if not (id[-1] in hexdigits):
2658 if not (id[-1] in hexdigits):
2655 return None
2659 return None
2656 prefix = bin(id[:l])
2660 prefix = bin(id[:l])
2657 except binascii.Error:
2661 except binascii.Error:
2658 pass
2662 pass
2659 else:
2663 else:
2660 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2664 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2661 nl = [
2665 nl = [
2662 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2666 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2663 ]
2667 ]
2664 if self.nodeconstants.nullhex.startswith(id):
2668 if self.nodeconstants.nullhex.startswith(id):
2665 nl.append(self.nullid)
2669 nl.append(self.nullid)
2666 if len(nl) > 0:
2670 if len(nl) > 0:
2667 if len(nl) == 1 and not maybewdir:
2671 if len(nl) == 1 and not maybewdir:
2668 self._pcache[id] = nl[0]
2672 self._pcache[id] = nl[0]
2669 return nl[0]
2673 return nl[0]
2670 raise error.AmbiguousPrefixLookupError(
2674 raise error.AmbiguousPrefixLookupError(
2671 id, self.display_id, _(b'ambiguous identifier')
2675 id, self.display_id, _(b'ambiguous identifier')
2672 )
2676 )
2673 if maybewdir:
2677 if maybewdir:
2674 raise error.WdirUnsupported
2678 raise error.WdirUnsupported
2675 return None
2679 return None
2676
2680
2677 def lookup(self, id):
2681 def lookup(self, id):
2678 """locate a node based on:
2682 """locate a node based on:
2679 - revision number or str(revision number)
2683 - revision number or str(revision number)
2680 - nodeid or subset of hex nodeid
2684 - nodeid or subset of hex nodeid
2681 """
2685 """
2682 n = self._match(id)
2686 n = self._match(id)
2683 if n is not None:
2687 if n is not None:
2684 return n
2688 return n
2685 n = self._partialmatch(id)
2689 n = self._partialmatch(id)
2686 if n:
2690 if n:
2687 return n
2691 return n
2688
2692
2689 raise error.LookupError(id, self.display_id, _(b'no match found'))
2693 raise error.LookupError(id, self.display_id, _(b'no match found'))
2690
2694
2691 def shortest(self, node, minlength=1):
2695 def shortest(self, node, minlength=1):
2692 """Find the shortest unambiguous prefix that matches node."""
2696 """Find the shortest unambiguous prefix that matches node."""
2693
2697
2694 def isvalid(prefix):
2698 def isvalid(prefix):
2695 try:
2699 try:
2696 matchednode = self._partialmatch(prefix)
2700 matchednode = self._partialmatch(prefix)
2697 except error.AmbiguousPrefixLookupError:
2701 except error.AmbiguousPrefixLookupError:
2698 return False
2702 return False
2699 except error.WdirUnsupported:
2703 except error.WdirUnsupported:
2700 # single 'ff...' match
2704 # single 'ff...' match
2701 return True
2705 return True
2702 if matchednode is None:
2706 if matchednode is None:
2703 raise error.LookupError(node, self.display_id, _(b'no node'))
2707 raise error.LookupError(node, self.display_id, _(b'no node'))
2704 return True
2708 return True
2705
2709
2706 def maybewdir(prefix):
2710 def maybewdir(prefix):
2707 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2711 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2708
2712
2709 hexnode = hex(node)
2713 hexnode = hex(node)
2710
2714
2711 def disambiguate(hexnode, minlength):
2715 def disambiguate(hexnode, minlength):
2712 """Disambiguate against wdirid."""
2716 """Disambiguate against wdirid."""
2713 for length in range(minlength, len(hexnode) + 1):
2717 for length in range(minlength, len(hexnode) + 1):
2714 prefix = hexnode[:length]
2718 prefix = hexnode[:length]
2715 if not maybewdir(prefix):
2719 if not maybewdir(prefix):
2716 return prefix
2720 return prefix
2717
2721
2718 if not getattr(self, 'filteredrevs', None):
2722 if not getattr(self, 'filteredrevs', None):
2719 try:
2723 try:
2720 length = max(self.index.shortest(node), minlength)
2724 length = max(self.index.shortest(node), minlength)
2721 return disambiguate(hexnode, length)
2725 return disambiguate(hexnode, length)
2722 except error.RevlogError:
2726 except error.RevlogError:
2723 if node != self.nodeconstants.wdirid:
2727 if node != self.nodeconstants.wdirid:
2724 raise error.LookupError(
2728 raise error.LookupError(
2725 node, self.display_id, _(b'no node')
2729 node, self.display_id, _(b'no node')
2726 )
2730 )
2727 except AttributeError:
2731 except AttributeError:
2728 # Fall through to pure code
2732 # Fall through to pure code
2729 pass
2733 pass
2730
2734
2731 if node == self.nodeconstants.wdirid:
2735 if node == self.nodeconstants.wdirid:
2732 for length in range(minlength, len(hexnode) + 1):
2736 for length in range(minlength, len(hexnode) + 1):
2733 prefix = hexnode[:length]
2737 prefix = hexnode[:length]
2734 if isvalid(prefix):
2738 if isvalid(prefix):
2735 return prefix
2739 return prefix
2736
2740
2737 for length in range(minlength, len(hexnode) + 1):
2741 for length in range(minlength, len(hexnode) + 1):
2738 prefix = hexnode[:length]
2742 prefix = hexnode[:length]
2739 if isvalid(prefix):
2743 if isvalid(prefix):
2740 return disambiguate(hexnode, length)
2744 return disambiguate(hexnode, length)
2741
2745
2742 def cmp(self, node, text):
2746 def cmp(self, node, text):
2743 """compare text with a given file revision
2747 """compare text with a given file revision
2744
2748
2745 returns True if text is different than what is stored.
2749 returns True if text is different than what is stored.
2746 """
2750 """
2747 p1, p2 = self.parents(node)
2751 p1, p2 = self.parents(node)
2748 return storageutil.hashrevisionsha1(text, p1, p2) != node
2752 return storageutil.hashrevisionsha1(text, p1, p2) != node
2749
2753
2750 def deltaparent(self, rev):
2754 def deltaparent(self, rev):
2751 """return deltaparent of the given revision"""
2755 """return deltaparent of the given revision"""
2752 base = self.index[rev][3]
2756 base = self.index[rev][3]
2753 if base == rev:
2757 if base == rev:
2754 return nullrev
2758 return nullrev
2755 elif self.delta_config.general_delta:
2759 elif self.delta_config.general_delta:
2756 return base
2760 return base
2757 else:
2761 else:
2758 return rev - 1
2762 return rev - 1
2759
2763
2760 def issnapshot(self, rev):
2764 def issnapshot(self, rev):
2761 """tells whether rev is a snapshot"""
2765 """tells whether rev is a snapshot"""
2762 ret = self._inner.issnapshot(rev)
2766 ret = self._inner.issnapshot(rev)
2763 self.issnapshot = self._inner.issnapshot
2767 self.issnapshot = self._inner.issnapshot
2764 return ret
2768 return ret
2765
2769
2766 def snapshotdepth(self, rev):
2770 def snapshotdepth(self, rev):
2767 """number of snapshot in the chain before this one"""
2771 """number of snapshot in the chain before this one"""
2768 if not self.issnapshot(rev):
2772 if not self.issnapshot(rev):
2769 raise error.ProgrammingError(b'revision %d not a snapshot')
2773 raise error.ProgrammingError(b'revision %d not a snapshot')
2770 return len(self._inner._deltachain(rev)[0]) - 1
2774 return len(self._inner._deltachain(rev)[0]) - 1
2771
2775
2772 def revdiff(self, rev1, rev2):
2776 def revdiff(self, rev1, rev2):
2773 """return or calculate a delta between two revisions
2777 """return or calculate a delta between two revisions
2774
2778
2775 The delta calculated is in binary form and is intended to be written to
2779 The delta calculated is in binary form and is intended to be written to
2776 revlog data directly. So this function needs raw revision data.
2780 revlog data directly. So this function needs raw revision data.
2777 """
2781 """
2778 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2782 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2779 return bytes(self._inner._chunk(rev2))
2783 return bytes(self._inner._chunk(rev2))
2780
2784
2781 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2785 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2782
2786
2783 def revision(self, nodeorrev):
2787 def revision(self, nodeorrev):
2784 """return an uncompressed revision of a given node or revision
2788 """return an uncompressed revision of a given node or revision
2785 number.
2789 number.
2786 """
2790 """
2787 return self._revisiondata(nodeorrev)
2791 return self._revisiondata(nodeorrev)
2788
2792
2789 def sidedata(self, nodeorrev):
2793 def sidedata(self, nodeorrev):
2790 """a map of extra data related to the changeset but not part of the hash
2794 """a map of extra data related to the changeset but not part of the hash
2791
2795
2792 This function currently return a dictionary. However, more advanced
2796 This function currently return a dictionary. However, more advanced
2793 mapping object will likely be used in the future for a more
2797 mapping object will likely be used in the future for a more
2794 efficient/lazy code.
2798 efficient/lazy code.
2795 """
2799 """
2796 # deal with <nodeorrev> argument type
2800 # deal with <nodeorrev> argument type
2797 if isinstance(nodeorrev, int):
2801 if isinstance(nodeorrev, int):
2798 rev = nodeorrev
2802 rev = nodeorrev
2799 else:
2803 else:
2800 rev = self.rev(nodeorrev)
2804 rev = self.rev(nodeorrev)
2801 return self._sidedata(rev)
2805 return self._sidedata(rev)
2802
2806
2803 def _rawtext(self, node, rev):
2807 def _rawtext(self, node, rev):
2804 """return the possibly unvalidated rawtext for a revision
2808 """return the possibly unvalidated rawtext for a revision
2805
2809
2806 returns (rev, rawtext, validated)
2810 returns (rev, rawtext, validated)
2807 """
2811 """
2808 # Check if we have the entry in cache
2812 # Check if we have the entry in cache
2809 # The cache entry looks like (node, rev, rawtext)
2813 # The cache entry looks like (node, rev, rawtext)
2810 if self._inner._revisioncache:
2814 if self._inner._revisioncache:
2811 if self._inner._revisioncache[0] == node:
2815 if self._inner._revisioncache[0] == node:
2812 return (rev, self._inner._revisioncache[2], True)
2816 return (rev, self._inner._revisioncache[2], True)
2813
2817
2814 if rev is None:
2818 if rev is None:
2815 rev = self.rev(node)
2819 rev = self.rev(node)
2816
2820
2817 return self._inner.raw_text(node, rev)
2821 return self._inner.raw_text(node, rev)
2818
2822
2819 def _revisiondata(self, nodeorrev, raw=False):
2823 def _revisiondata(self, nodeorrev, raw=False):
2820 # deal with <nodeorrev> argument type
2824 # deal with <nodeorrev> argument type
2821 if isinstance(nodeorrev, int):
2825 if isinstance(nodeorrev, int):
2822 rev = nodeorrev
2826 rev = nodeorrev
2823 node = self.node(rev)
2827 node = self.node(rev)
2824 else:
2828 else:
2825 node = nodeorrev
2829 node = nodeorrev
2826 rev = None
2830 rev = None
2827
2831
2828 # fast path the special `nullid` rev
2832 # fast path the special `nullid` rev
2829 if node == self.nullid:
2833 if node == self.nullid:
2830 return b""
2834 return b""
2831
2835
2832 # ``rawtext`` is the text as stored inside the revlog. Might be the
2836 # ``rawtext`` is the text as stored inside the revlog. Might be the
2833 # revision or might need to be processed to retrieve the revision.
2837 # revision or might need to be processed to retrieve the revision.
2834 rev, rawtext, validated = self._rawtext(node, rev)
2838 rev, rawtext, validated = self._rawtext(node, rev)
2835
2839
2836 if raw and validated:
2840 if raw and validated:
2837 # if we don't want to process the raw text and that raw
2841 # if we don't want to process the raw text and that raw
2838 # text is cached, we can exit early.
2842 # text is cached, we can exit early.
2839 return rawtext
2843 return rawtext
2840 if rev is None:
2844 if rev is None:
2841 rev = self.rev(node)
2845 rev = self.rev(node)
2842 # the revlog's flag for this revision
2846 # the revlog's flag for this revision
2843 # (usually alter its state or content)
2847 # (usually alter its state or content)
2844 flags = self.flags(rev)
2848 flags = self.flags(rev)
2845
2849
2846 if validated and flags == REVIDX_DEFAULT_FLAGS:
2850 if validated and flags == REVIDX_DEFAULT_FLAGS:
2847 # no extra flags set, no flag processor runs, text = rawtext
2851 # no extra flags set, no flag processor runs, text = rawtext
2848 return rawtext
2852 return rawtext
2849
2853
2850 if raw:
2854 if raw:
2851 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2855 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2852 text = rawtext
2856 text = rawtext
2853 else:
2857 else:
2854 r = flagutil.processflagsread(self, rawtext, flags)
2858 r = flagutil.processflagsread(self, rawtext, flags)
2855 text, validatehash = r
2859 text, validatehash = r
2856 if validatehash:
2860 if validatehash:
2857 self.checkhash(text, node, rev=rev)
2861 self.checkhash(text, node, rev=rev)
2858 if not validated:
2862 if not validated:
2859 self._inner._revisioncache = (node, rev, rawtext)
2863 self._inner._revisioncache = (node, rev, rawtext)
2860
2864
2861 return text
2865 return text
2862
2866
2863 def _sidedata(self, rev):
2867 def _sidedata(self, rev):
2864 """Return the sidedata for a given revision number."""
2868 """Return the sidedata for a given revision number."""
2865 sidedata_end = None
2869 sidedata_end = None
2866 if self._docket is not None:
2870 if self._docket is not None:
2867 sidedata_end = self._docket.sidedata_end
2871 sidedata_end = self._docket.sidedata_end
2868 return self._inner.sidedata(rev, sidedata_end)
2872 return self._inner.sidedata(rev, sidedata_end)
2869
2873
2870 def rawdata(self, nodeorrev):
2874 def rawdata(self, nodeorrev):
2871 """return an uncompressed raw data of a given node or revision number."""
2875 """return an uncompressed raw data of a given node or revision number."""
2872 return self._revisiondata(nodeorrev, raw=True)
2876 return self._revisiondata(nodeorrev, raw=True)
2873
2877
2874 def hash(self, text, p1, p2):
2878 def hash(self, text, p1, p2):
2875 """Compute a node hash.
2879 """Compute a node hash.
2876
2880
2877 Available as a function so that subclasses can replace the hash
2881 Available as a function so that subclasses can replace the hash
2878 as needed.
2882 as needed.
2879 """
2883 """
2880 return storageutil.hashrevisionsha1(text, p1, p2)
2884 return storageutil.hashrevisionsha1(text, p1, p2)
2881
2885
2882 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2886 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2883 """Check node hash integrity.
2887 """Check node hash integrity.
2884
2888
2885 Available as a function so that subclasses can extend hash mismatch
2889 Available as a function so that subclasses can extend hash mismatch
2886 behaviors as needed.
2890 behaviors as needed.
2887 """
2891 """
2888 try:
2892 try:
2889 if p1 is None and p2 is None:
2893 if p1 is None and p2 is None:
2890 p1, p2 = self.parents(node)
2894 p1, p2 = self.parents(node)
2891 if node != self.hash(text, p1, p2):
2895 if node != self.hash(text, p1, p2):
2892 # Clear the revision cache on hash failure. The revision cache
2896 # Clear the revision cache on hash failure. The revision cache
2893 # only stores the raw revision and clearing the cache does have
2897 # only stores the raw revision and clearing the cache does have
2894 # the side-effect that we won't have a cache hit when the raw
2898 # the side-effect that we won't have a cache hit when the raw
2895 # revision data is accessed. But this case should be rare and
2899 # revision data is accessed. But this case should be rare and
2896 # it is extra work to teach the cache about the hash
2900 # it is extra work to teach the cache about the hash
2897 # verification state.
2901 # verification state.
2898 if (
2902 if (
2899 self._inner._revisioncache
2903 self._inner._revisioncache
2900 and self._inner._revisioncache[0] == node
2904 and self._inner._revisioncache[0] == node
2901 ):
2905 ):
2902 self._inner._revisioncache = None
2906 self._inner._revisioncache = None
2903
2907
2904 revornode = rev
2908 revornode = rev
2905 if revornode is None:
2909 if revornode is None:
2906 revornode = templatefilters.short(hex(node))
2910 revornode = templatefilters.short(hex(node))
2907 raise error.RevlogError(
2911 raise error.RevlogError(
2908 _(b"integrity check failed on %s:%s")
2912 _(b"integrity check failed on %s:%s")
2909 % (self.display_id, pycompat.bytestr(revornode))
2913 % (self.display_id, pycompat.bytestr(revornode))
2910 )
2914 )
2911 except error.RevlogError:
2915 except error.RevlogError:
2912 if self.feature_config.censorable and storageutil.iscensoredtext(
2916 if self.feature_config.censorable and storageutil.iscensoredtext(
2913 text
2917 text
2914 ):
2918 ):
2915 raise error.CensoredNodeError(self.display_id, node, text)
2919 raise error.CensoredNodeError(self.display_id, node, text)
2916 raise
2920 raise
2917
2921
2918 @property
2922 @property
2919 def _split_index_file(self):
2923 def _split_index_file(self):
2920 """the path where to expect the index of an ongoing splitting operation
2924 """the path where to expect the index of an ongoing splitting operation
2921
2925
2922 The file will only exist if a splitting operation is in progress, but
2926 The file will only exist if a splitting operation is in progress, but
2923 it is always expected at the same location."""
2927 it is always expected at the same location."""
2924 parts = self.radix.split(b'/')
2928 parts = self.radix.split(b'/')
2925 if len(parts) > 1:
2929 if len(parts) > 1:
2926 # adds a '-s' prefix to the ``data/` or `meta/` base
2930 # adds a '-s' prefix to the ``data/` or `meta/` base
2927 head = parts[0] + b'-s'
2931 head = parts[0] + b'-s'
2928 mids = parts[1:-1]
2932 mids = parts[1:-1]
2929 tail = parts[-1] + b'.i'
2933 tail = parts[-1] + b'.i'
2930 pieces = [head] + mids + [tail]
2934 pieces = [head] + mids + [tail]
2931 return b'/'.join(pieces)
2935 return b'/'.join(pieces)
2932 else:
2936 else:
2933 # the revlog is stored at the root of the store (changelog or
2937 # the revlog is stored at the root of the store (changelog or
2934 # manifest), no risk of collision.
2938 # manifest), no risk of collision.
2935 return self.radix + b'.i.s'
2939 return self.radix + b'.i.s'
2936
2940
2937 def _enforceinlinesize(self, tr, side_write=True):
2941 def _enforceinlinesize(self, tr, side_write=True):
2938 """Check if the revlog is too big for inline and convert if so.
2942 """Check if the revlog is too big for inline and convert if so.
2939
2943
2940 This should be called after revisions are added to the revlog. If the
2944 This should be called after revisions are added to the revlog. If the
2941 revlog has grown too large to be an inline revlog, it will convert it
2945 revlog has grown too large to be an inline revlog, it will convert it
2942 to use multiple index and data files.
2946 to use multiple index and data files.
2943 """
2947 """
2944 tiprev = len(self) - 1
2948 tiprev = len(self) - 1
2945 total_size = self.start(tiprev) + self.length(tiprev)
2949 total_size = self.start(tiprev) + self.length(tiprev)
2946 if not self._inline or total_size < _maxinline:
2950 if not self._inline or total_size < _maxinline:
2947 return
2951 return
2948
2952
2949 if self._docket is not None:
2953 if self._docket is not None:
2950 msg = b"inline revlog should not have a docket"
2954 msg = b"inline revlog should not have a docket"
2951 raise error.ProgrammingError(msg)
2955 raise error.ProgrammingError(msg)
2952
2956
2953 troffset = tr.findoffset(self._inner.canonical_index_file)
2957 troffset = tr.findoffset(self._inner.canonical_index_file)
2954 if troffset is None:
2958 if troffset is None:
2955 raise error.RevlogError(
2959 raise error.RevlogError(
2956 _(b"%s not found in the transaction") % self._indexfile
2960 _(b"%s not found in the transaction") % self._indexfile
2957 )
2961 )
2958 if troffset:
2962 if troffset:
2959 tr.addbackup(self._inner.canonical_index_file, for_offset=True)
2963 tr.addbackup(self._inner.canonical_index_file, for_offset=True)
2960 tr.add(self._datafile, 0)
2964 tr.add(self._datafile, 0)
2961
2965
2962 new_index_file_path = None
2966 new_index_file_path = None
2963 if side_write:
2967 if side_write:
2964 old_index_file_path = self._indexfile
2968 old_index_file_path = self._indexfile
2965 new_index_file_path = self._split_index_file
2969 new_index_file_path = self._split_index_file
2966 opener = self.opener
2970 opener = self.opener
2967 weak_self = weakref.ref(self)
2971 weak_self = weakref.ref(self)
2968
2972
2969 # the "split" index replace the real index when the transaction is
2973 # the "split" index replace the real index when the transaction is
2970 # finalized
2974 # finalized
2971 def finalize_callback(tr):
2975 def finalize_callback(tr):
2972 opener.rename(
2976 opener.rename(
2973 new_index_file_path,
2977 new_index_file_path,
2974 old_index_file_path,
2978 old_index_file_path,
2975 checkambig=True,
2979 checkambig=True,
2976 )
2980 )
2977 maybe_self = weak_self()
2981 maybe_self = weak_self()
2978 if maybe_self is not None:
2982 if maybe_self is not None:
2979 maybe_self._indexfile = old_index_file_path
2983 maybe_self._indexfile = old_index_file_path
2980 maybe_self._inner.index_file = maybe_self._indexfile
2984 maybe_self._inner.index_file = maybe_self._indexfile
2981
2985
2982 def abort_callback(tr):
2986 def abort_callback(tr):
2983 maybe_self = weak_self()
2987 maybe_self = weak_self()
2984 if maybe_self is not None:
2988 if maybe_self is not None:
2985 maybe_self._indexfile = old_index_file_path
2989 maybe_self._indexfile = old_index_file_path
2986 maybe_self._inner.inline = True
2990 maybe_self._inner.inline = True
2987 maybe_self._inner.index_file = old_index_file_path
2991 maybe_self._inner.index_file = old_index_file_path
2988
2992
2989 tr.registertmp(new_index_file_path)
2993 tr.registertmp(new_index_file_path)
2990 if self.target[1] is not None:
2994 if self.target[1] is not None:
2991 callback_id = b'000-revlog-split-%d-%s' % self.target
2995 callback_id = b'000-revlog-split-%d-%s' % self.target
2992 else:
2996 else:
2993 callback_id = b'000-revlog-split-%d' % self.target[0]
2997 callback_id = b'000-revlog-split-%d' % self.target[0]
2994 tr.addfinalize(callback_id, finalize_callback)
2998 tr.addfinalize(callback_id, finalize_callback)
2995 tr.addabort(callback_id, abort_callback)
2999 tr.addabort(callback_id, abort_callback)
2996
3000
2997 self._format_flags &= ~FLAG_INLINE_DATA
3001 self._format_flags &= ~FLAG_INLINE_DATA
2998 self._inner.split_inline(
3002 self._inner.split_inline(
2999 tr,
3003 tr,
3000 self._format_flags | self._format_version,
3004 self._format_flags | self._format_version,
3001 new_index_file_path=new_index_file_path,
3005 new_index_file_path=new_index_file_path,
3002 )
3006 )
3003
3007
3004 self._inline = False
3008 self._inline = False
3005 if new_index_file_path is not None:
3009 if new_index_file_path is not None:
3006 self._indexfile = new_index_file_path
3010 self._indexfile = new_index_file_path
3007
3011
3008 nodemaputil.setup_persistent_nodemap(tr, self)
3012 nodemaputil.setup_persistent_nodemap(tr, self)
3009
3013
3010 def _nodeduplicatecallback(self, transaction, node):
3014 def _nodeduplicatecallback(self, transaction, node):
3011 """called when trying to add a node already stored."""
3015 """called when trying to add a node already stored."""
3012
3016
3013 @contextlib.contextmanager
3017 @contextlib.contextmanager
3014 def reading(self):
3018 def reading(self):
3015 with self._inner.reading():
3019 with self._inner.reading():
3016 yield
3020 yield
3017
3021
3018 @contextlib.contextmanager
3022 @contextlib.contextmanager
3019 def _writing(self, transaction):
3023 def _writing(self, transaction):
3020 if self._trypending:
3024 if self._trypending:
3021 msg = b'try to write in a `trypending` revlog: %s'
3025 msg = b'try to write in a `trypending` revlog: %s'
3022 msg %= self.display_id
3026 msg %= self.display_id
3023 raise error.ProgrammingError(msg)
3027 raise error.ProgrammingError(msg)
3024 if self._inner.is_writing:
3028 if self._inner.is_writing:
3025 yield
3029 yield
3026 else:
3030 else:
3027 data_end = None
3031 data_end = None
3028 sidedata_end = None
3032 sidedata_end = None
3029 if self._docket is not None:
3033 if self._docket is not None:
3030 data_end = self._docket.data_end
3034 data_end = self._docket.data_end
3031 sidedata_end = self._docket.sidedata_end
3035 sidedata_end = self._docket.sidedata_end
3032 with self._inner.writing(
3036 with self._inner.writing(
3033 transaction,
3037 transaction,
3034 data_end=data_end,
3038 data_end=data_end,
3035 sidedata_end=sidedata_end,
3039 sidedata_end=sidedata_end,
3036 ):
3040 ):
3037 yield
3041 yield
3038 if self._docket is not None:
3042 if self._docket is not None:
3039 self._write_docket(transaction)
3043 self._write_docket(transaction)
3040
3044
3041 @property
3045 @property
3042 def is_delaying(self):
3046 def is_delaying(self):
3043 return self._inner.is_delaying
3047 return self._inner.is_delaying
3044
3048
3045 def _write_docket(self, transaction):
3049 def _write_docket(self, transaction):
3046 """write the current docket on disk
3050 """write the current docket on disk
3047
3051
3048 Exist as a method to help changelog to implement transaction logic
3052 Exist as a method to help changelog to implement transaction logic
3049
3053
3050 We could also imagine using the same transaction logic for all revlog
3054 We could also imagine using the same transaction logic for all revlog
3051 since docket are cheap."""
3055 since docket are cheap."""
3052 self._docket.write(transaction)
3056 self._docket.write(transaction)
3053
3057
3054 def addrevision(
3058 def addrevision(
3055 self,
3059 self,
3056 text,
3060 text,
3057 transaction,
3061 transaction,
3058 link,
3062 link,
3059 p1,
3063 p1,
3060 p2,
3064 p2,
3061 cachedelta=None,
3065 cachedelta=None,
3062 node=None,
3066 node=None,
3063 flags=REVIDX_DEFAULT_FLAGS,
3067 flags=REVIDX_DEFAULT_FLAGS,
3064 deltacomputer=None,
3068 deltacomputer=None,
3065 sidedata=None,
3069 sidedata=None,
3066 ):
3070 ):
3067 """add a revision to the log
3071 """add a revision to the log
3068
3072
3069 text - the revision data to add
3073 text - the revision data to add
3070 transaction - the transaction object used for rollback
3074 transaction - the transaction object used for rollback
3071 link - the linkrev data to add
3075 link - the linkrev data to add
3072 p1, p2 - the parent nodeids of the revision
3076 p1, p2 - the parent nodeids of the revision
3073 cachedelta - an optional precomputed delta
3077 cachedelta - an optional precomputed delta
3074 node - nodeid of revision; typically node is not specified, and it is
3078 node - nodeid of revision; typically node is not specified, and it is
3075 computed by default as hash(text, p1, p2), however subclasses might
3079 computed by default as hash(text, p1, p2), however subclasses might
3076 use different hashing method (and override checkhash() in such case)
3080 use different hashing method (and override checkhash() in such case)
3077 flags - the known flags to set on the revision
3081 flags - the known flags to set on the revision
3078 deltacomputer - an optional deltacomputer instance shared between
3082 deltacomputer - an optional deltacomputer instance shared between
3079 multiple calls
3083 multiple calls
3080 """
3084 """
3081 if link == nullrev:
3085 if link == nullrev:
3082 raise error.RevlogError(
3086 raise error.RevlogError(
3083 _(b"attempted to add linkrev -1 to %s") % self.display_id
3087 _(b"attempted to add linkrev -1 to %s") % self.display_id
3084 )
3088 )
3085
3089
3086 if sidedata is None:
3090 if sidedata is None:
3087 sidedata = {}
3091 sidedata = {}
3088 elif sidedata and not self.feature_config.has_side_data:
3092 elif sidedata and not self.feature_config.has_side_data:
3089 raise error.ProgrammingError(
3093 raise error.ProgrammingError(
3090 _(b"trying to add sidedata to a revlog who don't support them")
3094 _(b"trying to add sidedata to a revlog who don't support them")
3091 )
3095 )
3092
3096
3093 if flags:
3097 if flags:
3094 node = node or self.hash(text, p1, p2)
3098 node = node or self.hash(text, p1, p2)
3095
3099
3096 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3100 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3097
3101
3098 # If the flag processor modifies the revision data, ignore any provided
3102 # If the flag processor modifies the revision data, ignore any provided
3099 # cachedelta.
3103 # cachedelta.
3100 if rawtext != text:
3104 if rawtext != text:
3101 cachedelta = None
3105 cachedelta = None
3102
3106
3103 if len(rawtext) > _maxentrysize:
3107 if len(rawtext) > _maxentrysize:
3104 raise error.RevlogError(
3108 raise error.RevlogError(
3105 _(
3109 _(
3106 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3110 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3107 )
3111 )
3108 % (self.display_id, len(rawtext))
3112 % (self.display_id, len(rawtext))
3109 )
3113 )
3110
3114
3111 node = node or self.hash(rawtext, p1, p2)
3115 node = node or self.hash(rawtext, p1, p2)
3112 rev = self.index.get_rev(node)
3116 rev = self.index.get_rev(node)
3113 if rev is not None:
3117 if rev is not None:
3114 return rev
3118 return rev
3115
3119
3116 if validatehash:
3120 if validatehash:
3117 self.checkhash(rawtext, node, p1=p1, p2=p2)
3121 self.checkhash(rawtext, node, p1=p1, p2=p2)
3118
3122
3119 return self.addrawrevision(
3123 return self.addrawrevision(
3120 rawtext,
3124 rawtext,
3121 transaction,
3125 transaction,
3122 link,
3126 link,
3123 p1,
3127 p1,
3124 p2,
3128 p2,
3125 node,
3129 node,
3126 flags,
3130 flags,
3127 cachedelta=cachedelta,
3131 cachedelta=cachedelta,
3128 deltacomputer=deltacomputer,
3132 deltacomputer=deltacomputer,
3129 sidedata=sidedata,
3133 sidedata=sidedata,
3130 )
3134 )
3131
3135
3132 def addrawrevision(
3136 def addrawrevision(
3133 self,
3137 self,
3134 rawtext,
3138 rawtext,
3135 transaction,
3139 transaction,
3136 link,
3140 link,
3137 p1,
3141 p1,
3138 p2,
3142 p2,
3139 node,
3143 node,
3140 flags,
3144 flags,
3141 cachedelta=None,
3145 cachedelta=None,
3142 deltacomputer=None,
3146 deltacomputer=None,
3143 sidedata=None,
3147 sidedata=None,
3144 ):
3148 ):
3145 """add a raw revision with known flags, node and parents
3149 """add a raw revision with known flags, node and parents
3146 useful when reusing a revision not stored in this revlog (ex: received
3150 useful when reusing a revision not stored in this revlog (ex: received
3147 over wire, or read from an external bundle).
3151 over wire, or read from an external bundle).
3148 """
3152 """
3149 with self._writing(transaction):
3153 with self._writing(transaction):
3150 return self._addrevision(
3154 return self._addrevision(
3151 node,
3155 node,
3152 rawtext,
3156 rawtext,
3153 transaction,
3157 transaction,
3154 link,
3158 link,
3155 p1,
3159 p1,
3156 p2,
3160 p2,
3157 flags,
3161 flags,
3158 cachedelta,
3162 cachedelta,
3159 deltacomputer=deltacomputer,
3163 deltacomputer=deltacomputer,
3160 sidedata=sidedata,
3164 sidedata=sidedata,
3161 )
3165 )
3162
3166
3163 def compress(self, data):
3167 def compress(self, data):
3164 return self._inner.compress(data)
3168 return self._inner.compress(data)
3165
3169
3166 def decompress(self, data):
3170 def decompress(self, data):
3167 return self._inner.decompress(data)
3171 return self._inner.decompress(data)
3168
3172
3169 def _addrevision(
3173 def _addrevision(
3170 self,
3174 self,
3171 node,
3175 node,
3172 rawtext,
3176 rawtext,
3173 transaction,
3177 transaction,
3174 link,
3178 link,
3175 p1,
3179 p1,
3176 p2,
3180 p2,
3177 flags,
3181 flags,
3178 cachedelta,
3182 cachedelta,
3179 alwayscache=False,
3183 alwayscache=False,
3180 deltacomputer=None,
3184 deltacomputer=None,
3181 sidedata=None,
3185 sidedata=None,
3182 ):
3186 ):
3183 """internal function to add revisions to the log
3187 """internal function to add revisions to the log
3184
3188
3185 see addrevision for argument descriptions.
3189 see addrevision for argument descriptions.
3186
3190
3187 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3191 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3188
3192
3189 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3193 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3190 be used.
3194 be used.
3191
3195
3192 invariants:
3196 invariants:
3193 - rawtext is optional (can be None); if not set, cachedelta must be set.
3197 - rawtext is optional (can be None); if not set, cachedelta must be set.
3194 if both are set, they must correspond to each other.
3198 if both are set, they must correspond to each other.
3195 """
3199 """
3196 if node == self.nullid:
3200 if node == self.nullid:
3197 raise error.RevlogError(
3201 raise error.RevlogError(
3198 _(b"%s: attempt to add null revision") % self.display_id
3202 _(b"%s: attempt to add null revision") % self.display_id
3199 )
3203 )
3200 if (
3204 if (
3201 node == self.nodeconstants.wdirid
3205 node == self.nodeconstants.wdirid
3202 or node in self.nodeconstants.wdirfilenodeids
3206 or node in self.nodeconstants.wdirfilenodeids
3203 ):
3207 ):
3204 raise error.RevlogError(
3208 raise error.RevlogError(
3205 _(b"%s: attempt to add wdir revision") % self.display_id
3209 _(b"%s: attempt to add wdir revision") % self.display_id
3206 )
3210 )
3207 if self._inner._writinghandles is None:
3211 if self._inner._writinghandles is None:
3208 msg = b'adding revision outside `revlog._writing` context'
3212 msg = b'adding revision outside `revlog._writing` context'
3209 raise error.ProgrammingError(msg)
3213 raise error.ProgrammingError(msg)
3210
3214
3211 btext = [rawtext]
3215 btext = [rawtext]
3212
3216
3213 curr = len(self)
3217 curr = len(self)
3214 prev = curr - 1
3218 prev = curr - 1
3215
3219
3216 offset = self._get_data_offset(prev)
3220 offset = self._get_data_offset(prev)
3217
3221
3218 if self._concurrencychecker:
3222 if self._concurrencychecker:
3219 ifh, dfh, sdfh = self._inner._writinghandles
3223 ifh, dfh, sdfh = self._inner._writinghandles
3220 # XXX no checking for the sidedata file
3224 # XXX no checking for the sidedata file
3221 if self._inline:
3225 if self._inline:
3222 # offset is "as if" it were in the .d file, so we need to add on
3226 # offset is "as if" it were in the .d file, so we need to add on
3223 # the size of the entry metadata.
3227 # the size of the entry metadata.
3224 self._concurrencychecker(
3228 self._concurrencychecker(
3225 ifh, self._indexfile, offset + curr * self.index.entry_size
3229 ifh, self._indexfile, offset + curr * self.index.entry_size
3226 )
3230 )
3227 else:
3231 else:
3228 # Entries in the .i are a consistent size.
3232 # Entries in the .i are a consistent size.
3229 self._concurrencychecker(
3233 self._concurrencychecker(
3230 ifh, self._indexfile, curr * self.index.entry_size
3234 ifh, self._indexfile, curr * self.index.entry_size
3231 )
3235 )
3232 self._concurrencychecker(dfh, self._datafile, offset)
3236 self._concurrencychecker(dfh, self._datafile, offset)
3233
3237
3234 p1r, p2r = self.rev(p1), self.rev(p2)
3238 p1r, p2r = self.rev(p1), self.rev(p2)
3235
3239
3236 # full versions are inserted when the needed deltas
3240 # full versions are inserted when the needed deltas
3237 # become comparable to the uncompressed text
3241 # become comparable to the uncompressed text
3238 if rawtext is None:
3242 if rawtext is None:
3239 # need rawtext size, before changed by flag processors, which is
3243 # need rawtext size, before changed by flag processors, which is
3240 # the non-raw size. use revlog explicitly to avoid filelog's extra
3244 # the non-raw size. use revlog explicitly to avoid filelog's extra
3241 # logic that might remove metadata size.
3245 # logic that might remove metadata size.
3242 textlen = mdiff.patchedsize(
3246 textlen = mdiff.patchedsize(
3243 revlog.size(self, cachedelta[0]), cachedelta[1]
3247 revlog.size(self, cachedelta[0]), cachedelta[1]
3244 )
3248 )
3245 else:
3249 else:
3246 textlen = len(rawtext)
3250 textlen = len(rawtext)
3247
3251
3248 if deltacomputer is None:
3252 if deltacomputer is None:
3249 write_debug = None
3253 write_debug = None
3250 if self.delta_config.debug_delta:
3254 if self.delta_config.debug_delta:
3251 write_debug = transaction._report
3255 write_debug = transaction._report
3252 deltacomputer = deltautil.deltacomputer(
3256 deltacomputer = deltautil.deltacomputer(
3253 self, write_debug=write_debug
3257 self, write_debug=write_debug
3254 )
3258 )
3255
3259
3256 if cachedelta is not None and len(cachedelta) == 2:
3260 if cachedelta is not None and len(cachedelta) == 2:
3257 # If the cached delta has no information about how it should be
3261 # If the cached delta has no information about how it should be
3258 # reused, add the default reuse instruction according to the
3262 # reused, add the default reuse instruction according to the
3259 # revlog's configuration.
3263 # revlog's configuration.
3260 if (
3264 if (
3261 self.delta_config.general_delta
3265 self.delta_config.general_delta
3262 and self.delta_config.lazy_delta_base
3266 and self.delta_config.lazy_delta_base
3263 ):
3267 ):
3264 delta_base_reuse = DELTA_BASE_REUSE_TRY
3268 delta_base_reuse = DELTA_BASE_REUSE_TRY
3265 else:
3269 else:
3266 delta_base_reuse = DELTA_BASE_REUSE_NO
3270 delta_base_reuse = DELTA_BASE_REUSE_NO
3267 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3271 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3268
3272
3269 revinfo = revlogutils.revisioninfo(
3273 revinfo = revlogutils.revisioninfo(
3270 node,
3274 node,
3271 p1,
3275 p1,
3272 p2,
3276 p2,
3273 btext,
3277 btext,
3274 textlen,
3278 textlen,
3275 cachedelta,
3279 cachedelta,
3276 flags,
3280 flags,
3277 )
3281 )
3278
3282
3279 deltainfo = deltacomputer.finddeltainfo(revinfo)
3283 deltainfo = deltacomputer.finddeltainfo(revinfo)
3280
3284
3281 compression_mode = COMP_MODE_INLINE
3285 compression_mode = COMP_MODE_INLINE
3282 if self._docket is not None:
3286 if self._docket is not None:
3283 default_comp = self._docket.default_compression_header
3287 default_comp = self._docket.default_compression_header
3284 r = deltautil.delta_compression(default_comp, deltainfo)
3288 r = deltautil.delta_compression(default_comp, deltainfo)
3285 compression_mode, deltainfo = r
3289 compression_mode, deltainfo = r
3286
3290
3287 sidedata_compression_mode = COMP_MODE_INLINE
3291 sidedata_compression_mode = COMP_MODE_INLINE
3288 if sidedata and self.feature_config.has_side_data:
3292 if sidedata and self.feature_config.has_side_data:
3289 sidedata_compression_mode = COMP_MODE_PLAIN
3293 sidedata_compression_mode = COMP_MODE_PLAIN
3290 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3294 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3291 sidedata_offset = self._docket.sidedata_end
3295 sidedata_offset = self._docket.sidedata_end
3292 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3296 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3293 if (
3297 if (
3294 h != b'u'
3298 h != b'u'
3295 and comp_sidedata[0:1] != b'\0'
3299 and comp_sidedata[0:1] != b'\0'
3296 and len(comp_sidedata) < len(serialized_sidedata)
3300 and len(comp_sidedata) < len(serialized_sidedata)
3297 ):
3301 ):
3298 assert not h
3302 assert not h
3299 if (
3303 if (
3300 comp_sidedata[0:1]
3304 comp_sidedata[0:1]
3301 == self._docket.default_compression_header
3305 == self._docket.default_compression_header
3302 ):
3306 ):
3303 sidedata_compression_mode = COMP_MODE_DEFAULT
3307 sidedata_compression_mode = COMP_MODE_DEFAULT
3304 serialized_sidedata = comp_sidedata
3308 serialized_sidedata = comp_sidedata
3305 else:
3309 else:
3306 sidedata_compression_mode = COMP_MODE_INLINE
3310 sidedata_compression_mode = COMP_MODE_INLINE
3307 serialized_sidedata = comp_sidedata
3311 serialized_sidedata = comp_sidedata
3308 else:
3312 else:
3309 serialized_sidedata = b""
3313 serialized_sidedata = b""
3310 # Don't store the offset if the sidedata is empty, that way
3314 # Don't store the offset if the sidedata is empty, that way
3311 # we can easily detect empty sidedata and they will be no different
3315 # we can easily detect empty sidedata and they will be no different
3312 # than ones we manually add.
3316 # than ones we manually add.
3313 sidedata_offset = 0
3317 sidedata_offset = 0
3314
3318
3315 rank = RANK_UNKNOWN
3319 rank = RANK_UNKNOWN
3316 if self.feature_config.compute_rank:
3320 if self.feature_config.compute_rank:
3317 if (p1r, p2r) == (nullrev, nullrev):
3321 if (p1r, p2r) == (nullrev, nullrev):
3318 rank = 1
3322 rank = 1
3319 elif p1r != nullrev and p2r == nullrev:
3323 elif p1r != nullrev and p2r == nullrev:
3320 rank = 1 + self.fast_rank(p1r)
3324 rank = 1 + self.fast_rank(p1r)
3321 elif p1r == nullrev and p2r != nullrev:
3325 elif p1r == nullrev and p2r != nullrev:
3322 rank = 1 + self.fast_rank(p2r)
3326 rank = 1 + self.fast_rank(p2r)
3323 else: # merge node
3327 else: # merge node
3324 if rustdagop is not None and self.index.rust_ext_compat:
3328 if rustdagop is not None and self.index.rust_ext_compat:
3325 rank = rustdagop.rank(self.index, p1r, p2r)
3329 rank = rustdagop.rank(self.index, p1r, p2r)
3326 else:
3330 else:
3327 pmin, pmax = sorted((p1r, p2r))
3331 pmin, pmax = sorted((p1r, p2r))
3328 rank = 1 + self.fast_rank(pmax)
3332 rank = 1 + self.fast_rank(pmax)
3329 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3333 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3330
3334
3331 e = revlogutils.entry(
3335 e = revlogutils.entry(
3332 flags=flags,
3336 flags=flags,
3333 data_offset=offset,
3337 data_offset=offset,
3334 data_compressed_length=deltainfo.deltalen,
3338 data_compressed_length=deltainfo.deltalen,
3335 data_uncompressed_length=textlen,
3339 data_uncompressed_length=textlen,
3336 data_compression_mode=compression_mode,
3340 data_compression_mode=compression_mode,
3337 data_delta_base=deltainfo.base,
3341 data_delta_base=deltainfo.base,
3338 link_rev=link,
3342 link_rev=link,
3339 parent_rev_1=p1r,
3343 parent_rev_1=p1r,
3340 parent_rev_2=p2r,
3344 parent_rev_2=p2r,
3341 node_id=node,
3345 node_id=node,
3342 sidedata_offset=sidedata_offset,
3346 sidedata_offset=sidedata_offset,
3343 sidedata_compressed_length=len(serialized_sidedata),
3347 sidedata_compressed_length=len(serialized_sidedata),
3344 sidedata_compression_mode=sidedata_compression_mode,
3348 sidedata_compression_mode=sidedata_compression_mode,
3345 rank=rank,
3349 rank=rank,
3346 )
3350 )
3347
3351
3348 self.index.append(e)
3352 self.index.append(e)
3349 entry = self.index.entry_binary(curr)
3353 entry = self.index.entry_binary(curr)
3350 if curr == 0 and self._docket is None:
3354 if curr == 0 and self._docket is None:
3351 header = self._format_flags | self._format_version
3355 header = self._format_flags | self._format_version
3352 header = self.index.pack_header(header)
3356 header = self.index.pack_header(header)
3353 entry = header + entry
3357 entry = header + entry
3354 self._writeentry(
3358 self._writeentry(
3355 transaction,
3359 transaction,
3356 entry,
3360 entry,
3357 deltainfo.data,
3361 deltainfo.data,
3358 link,
3362 link,
3359 offset,
3363 offset,
3360 serialized_sidedata,
3364 serialized_sidedata,
3361 sidedata_offset,
3365 sidedata_offset,
3362 )
3366 )
3363
3367
3364 rawtext = btext[0]
3368 rawtext = btext[0]
3365
3369
3366 if alwayscache and rawtext is None:
3370 if alwayscache and rawtext is None:
3367 rawtext = deltacomputer.buildtext(revinfo)
3371 rawtext = deltacomputer.buildtext(revinfo)
3368
3372
3369 if type(rawtext) == bytes: # only accept immutable objects
3373 if type(rawtext) == bytes: # only accept immutable objects
3370 self._inner._revisioncache = (node, curr, rawtext)
3374 self._inner._revisioncache = (node, curr, rawtext)
3371 self._chainbasecache[curr] = deltainfo.chainbase
3375 self._chainbasecache[curr] = deltainfo.chainbase
3372 return curr
3376 return curr
3373
3377
3374 def _get_data_offset(self, prev):
3378 def _get_data_offset(self, prev):
3375 """Returns the current offset in the (in-transaction) data file.
3379 """Returns the current offset in the (in-transaction) data file.
3376 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3380 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3377 file to store that information: since sidedata can be rewritten to the
3381 file to store that information: since sidedata can be rewritten to the
3378 end of the data file within a transaction, you can have cases where, for
3382 end of the data file within a transaction, you can have cases where, for
3379 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3383 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3380 to `n - 1`'s sidedata being written after `n`'s data.
3384 to `n - 1`'s sidedata being written after `n`'s data.
3381
3385
3382 TODO cache this in a docket file before getting out of experimental."""
3386 TODO cache this in a docket file before getting out of experimental."""
3383 if self._docket is None:
3387 if self._docket is None:
3384 return self.end(prev)
3388 return self.end(prev)
3385 else:
3389 else:
3386 return self._docket.data_end
3390 return self._docket.data_end
3387
3391
3388 def _writeentry(
3392 def _writeentry(
3389 self,
3393 self,
3390 transaction,
3394 transaction,
3391 entry,
3395 entry,
3392 data,
3396 data,
3393 link,
3397 link,
3394 offset,
3398 offset,
3395 sidedata,
3399 sidedata,
3396 sidedata_offset,
3400 sidedata_offset,
3397 ):
3401 ):
3398 # Files opened in a+ mode have inconsistent behavior on various
3402 # Files opened in a+ mode have inconsistent behavior on various
3399 # platforms. Windows requires that a file positioning call be made
3403 # platforms. Windows requires that a file positioning call be made
3400 # when the file handle transitions between reads and writes. See
3404 # when the file handle transitions between reads and writes. See
3401 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3405 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3402 # platforms, Python or the platform itself can be buggy. Some versions
3406 # platforms, Python or the platform itself can be buggy. Some versions
3403 # of Solaris have been observed to not append at the end of the file
3407 # of Solaris have been observed to not append at the end of the file
3404 # if the file was seeked to before the end. See issue4943 for more.
3408 # if the file was seeked to before the end. See issue4943 for more.
3405 #
3409 #
3406 # We work around this issue by inserting a seek() before writing.
3410 # We work around this issue by inserting a seek() before writing.
3407 # Note: This is likely not necessary on Python 3. However, because
3411 # Note: This is likely not necessary on Python 3. However, because
3408 # the file handle is reused for reads and may be seeked there, we need
3412 # the file handle is reused for reads and may be seeked there, we need
3409 # to be careful before changing this.
3413 # to be careful before changing this.
3410 index_end = data_end = sidedata_end = None
3414 index_end = data_end = sidedata_end = None
3411 if self._docket is not None:
3415 if self._docket is not None:
3412 index_end = self._docket.index_end
3416 index_end = self._docket.index_end
3413 data_end = self._docket.data_end
3417 data_end = self._docket.data_end
3414 sidedata_end = self._docket.sidedata_end
3418 sidedata_end = self._docket.sidedata_end
3415
3419
3416 files_end = self._inner.write_entry(
3420 files_end = self._inner.write_entry(
3417 transaction,
3421 transaction,
3418 entry,
3422 entry,
3419 data,
3423 data,
3420 link,
3424 link,
3421 offset,
3425 offset,
3422 sidedata,
3426 sidedata,
3423 sidedata_offset,
3427 sidedata_offset,
3424 index_end,
3428 index_end,
3425 data_end,
3429 data_end,
3426 sidedata_end,
3430 sidedata_end,
3427 )
3431 )
3428 self._enforceinlinesize(transaction)
3432 self._enforceinlinesize(transaction)
3429 if self._docket is not None:
3433 if self._docket is not None:
3430 self._docket.index_end = files_end[0]
3434 self._docket.index_end = files_end[0]
3431 self._docket.data_end = files_end[1]
3435 self._docket.data_end = files_end[1]
3432 self._docket.sidedata_end = files_end[2]
3436 self._docket.sidedata_end = files_end[2]
3433
3437
3434 nodemaputil.setup_persistent_nodemap(transaction, self)
3438 nodemaputil.setup_persistent_nodemap(transaction, self)
3435
3439
3436 def addgroup(
3440 def addgroup(
3437 self,
3441 self,
3438 deltas,
3442 deltas,
3439 linkmapper,
3443 linkmapper,
3440 transaction,
3444 transaction,
3441 alwayscache=False,
3445 alwayscache=False,
3442 addrevisioncb=None,
3446 addrevisioncb=None,
3443 duplicaterevisioncb=None,
3447 duplicaterevisioncb=None,
3444 debug_info=None,
3448 debug_info=None,
3445 delta_base_reuse_policy=None,
3449 delta_base_reuse_policy=None,
3446 ):
3450 ):
3447 """
3451 """
3448 add a delta group
3452 add a delta group
3449
3453
3450 given a set of deltas, add them to the revision log. the
3454 given a set of deltas, add them to the revision log. the
3451 first delta is against its parent, which should be in our
3455 first delta is against its parent, which should be in our
3452 log, the rest are against the previous delta.
3456 log, the rest are against the previous delta.
3453
3457
3454 If ``addrevisioncb`` is defined, it will be called with arguments of
3458 If ``addrevisioncb`` is defined, it will be called with arguments of
3455 this revlog and the node that was added.
3459 this revlog and the node that was added.
3456 """
3460 """
3457
3461
3458 if self._adding_group:
3462 if self._adding_group:
3459 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3463 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3460
3464
3461 # read the default delta-base reuse policy from revlog config if the
3465 # read the default delta-base reuse policy from revlog config if the
3462 # group did not specify one.
3466 # group did not specify one.
3463 if delta_base_reuse_policy is None:
3467 if delta_base_reuse_policy is None:
3464 if (
3468 if (
3465 self.delta_config.general_delta
3469 self.delta_config.general_delta
3466 and self.delta_config.lazy_delta_base
3470 and self.delta_config.lazy_delta_base
3467 ):
3471 ):
3468 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3472 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3469 else:
3473 else:
3470 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3474 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3471
3475
3472 self._adding_group = True
3476 self._adding_group = True
3473 empty = True
3477 empty = True
3474 try:
3478 try:
3475 with self._writing(transaction):
3479 with self._writing(transaction):
3476 write_debug = None
3480 write_debug = None
3477 if self.delta_config.debug_delta:
3481 if self.delta_config.debug_delta:
3478 write_debug = transaction._report
3482 write_debug = transaction._report
3479 deltacomputer = deltautil.deltacomputer(
3483 deltacomputer = deltautil.deltacomputer(
3480 self,
3484 self,
3481 write_debug=write_debug,
3485 write_debug=write_debug,
3482 debug_info=debug_info,
3486 debug_info=debug_info,
3483 )
3487 )
3484 # loop through our set of deltas
3488 # loop through our set of deltas
3485 for data in deltas:
3489 for data in deltas:
3486 (
3490 (
3487 node,
3491 node,
3488 p1,
3492 p1,
3489 p2,
3493 p2,
3490 linknode,
3494 linknode,
3491 deltabase,
3495 deltabase,
3492 delta,
3496 delta,
3493 flags,
3497 flags,
3494 sidedata,
3498 sidedata,
3495 ) = data
3499 ) = data
3496 link = linkmapper(linknode)
3500 link = linkmapper(linknode)
3497 flags = flags or REVIDX_DEFAULT_FLAGS
3501 flags = flags or REVIDX_DEFAULT_FLAGS
3498
3502
3499 rev = self.index.get_rev(node)
3503 rev = self.index.get_rev(node)
3500 if rev is not None:
3504 if rev is not None:
3501 # this can happen if two branches make the same change
3505 # this can happen if two branches make the same change
3502 self._nodeduplicatecallback(transaction, rev)
3506 self._nodeduplicatecallback(transaction, rev)
3503 if duplicaterevisioncb:
3507 if duplicaterevisioncb:
3504 duplicaterevisioncb(self, rev)
3508 duplicaterevisioncb(self, rev)
3505 empty = False
3509 empty = False
3506 continue
3510 continue
3507
3511
3508 for p in (p1, p2):
3512 for p in (p1, p2):
3509 if not self.index.has_node(p):
3513 if not self.index.has_node(p):
3510 raise error.LookupError(
3514 raise error.LookupError(
3511 p, self.radix, _(b'unknown parent')
3515 p, self.radix, _(b'unknown parent')
3512 )
3516 )
3513
3517
3514 if not self.index.has_node(deltabase):
3518 if not self.index.has_node(deltabase):
3515 raise error.LookupError(
3519 raise error.LookupError(
3516 deltabase, self.display_id, _(b'unknown delta base')
3520 deltabase, self.display_id, _(b'unknown delta base')
3517 )
3521 )
3518
3522
3519 baserev = self.rev(deltabase)
3523 baserev = self.rev(deltabase)
3520
3524
3521 if baserev != nullrev and self.iscensored(baserev):
3525 if baserev != nullrev and self.iscensored(baserev):
3522 # if base is censored, delta must be full replacement in a
3526 # if base is censored, delta must be full replacement in a
3523 # single patch operation
3527 # single patch operation
3524 hlen = struct.calcsize(b">lll")
3528 hlen = struct.calcsize(b">lll")
3525 oldlen = self.rawsize(baserev)
3529 oldlen = self.rawsize(baserev)
3526 newlen = len(delta) - hlen
3530 newlen = len(delta) - hlen
3527 if delta[:hlen] != mdiff.replacediffheader(
3531 if delta[:hlen] != mdiff.replacediffheader(
3528 oldlen, newlen
3532 oldlen, newlen
3529 ):
3533 ):
3530 raise error.CensoredBaseError(
3534 raise error.CensoredBaseError(
3531 self.display_id, self.node(baserev)
3535 self.display_id, self.node(baserev)
3532 )
3536 )
3533
3537
3534 if not flags and self._peek_iscensored(baserev, delta):
3538 if not flags and self._peek_iscensored(baserev, delta):
3535 flags |= REVIDX_ISCENSORED
3539 flags |= REVIDX_ISCENSORED
3536
3540
3537 # We assume consumers of addrevisioncb will want to retrieve
3541 # We assume consumers of addrevisioncb will want to retrieve
3538 # the added revision, which will require a call to
3542 # the added revision, which will require a call to
3539 # revision(). revision() will fast path if there is a cache
3543 # revision(). revision() will fast path if there is a cache
3540 # hit. So, we tell _addrevision() to always cache in this case.
3544 # hit. So, we tell _addrevision() to always cache in this case.
3541 # We're only using addgroup() in the context of changegroup
3545 # We're only using addgroup() in the context of changegroup
3542 # generation so the revision data can always be handled as raw
3546 # generation so the revision data can always be handled as raw
3543 # by the flagprocessor.
3547 # by the flagprocessor.
3544 rev = self._addrevision(
3548 rev = self._addrevision(
3545 node,
3549 node,
3546 None,
3550 None,
3547 transaction,
3551 transaction,
3548 link,
3552 link,
3549 p1,
3553 p1,
3550 p2,
3554 p2,
3551 flags,
3555 flags,
3552 (baserev, delta, delta_base_reuse_policy),
3556 (baserev, delta, delta_base_reuse_policy),
3553 alwayscache=alwayscache,
3557 alwayscache=alwayscache,
3554 deltacomputer=deltacomputer,
3558 deltacomputer=deltacomputer,
3555 sidedata=sidedata,
3559 sidedata=sidedata,
3556 )
3560 )
3557
3561
3558 if addrevisioncb:
3562 if addrevisioncb:
3559 addrevisioncb(self, rev)
3563 addrevisioncb(self, rev)
3560 empty = False
3564 empty = False
3561 finally:
3565 finally:
3562 self._adding_group = False
3566 self._adding_group = False
3563 return not empty
3567 return not empty
3564
3568
3565 def iscensored(self, rev):
3569 def iscensored(self, rev):
3566 """Check if a file revision is censored."""
3570 """Check if a file revision is censored."""
3567 if not self.feature_config.censorable:
3571 if not self.feature_config.censorable:
3568 return False
3572 return False
3569
3573
3570 return self.flags(rev) & REVIDX_ISCENSORED
3574 return self.flags(rev) & REVIDX_ISCENSORED
3571
3575
3572 def _peek_iscensored(self, baserev, delta):
3576 def _peek_iscensored(self, baserev, delta):
3573 """Quickly check if a delta produces a censored revision."""
3577 """Quickly check if a delta produces a censored revision."""
3574 if not self.feature_config.censorable:
3578 if not self.feature_config.censorable:
3575 return False
3579 return False
3576
3580
3577 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3581 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3578
3582
3579 def getstrippoint(self, minlink):
3583 def getstrippoint(self, minlink):
3580 """find the minimum rev that must be stripped to strip the linkrev
3584 """find the minimum rev that must be stripped to strip the linkrev
3581
3585
3582 Returns a tuple containing the minimum rev and a set of all revs that
3586 Returns a tuple containing the minimum rev and a set of all revs that
3583 have linkrevs that will be broken by this strip.
3587 have linkrevs that will be broken by this strip.
3584 """
3588 """
3585 return storageutil.resolvestripinfo(
3589 return storageutil.resolvestripinfo(
3586 minlink,
3590 minlink,
3587 len(self) - 1,
3591 len(self) - 1,
3588 self.headrevs(),
3592 self.headrevs(),
3589 self.linkrev,
3593 self.linkrev,
3590 self.parentrevs,
3594 self.parentrevs,
3591 )
3595 )
3592
3596
3593 def strip(self, minlink, transaction):
3597 def strip(self, minlink, transaction):
3594 """truncate the revlog on the first revision with a linkrev >= minlink
3598 """truncate the revlog on the first revision with a linkrev >= minlink
3595
3599
3596 This function is called when we're stripping revision minlink and
3600 This function is called when we're stripping revision minlink and
3597 its descendants from the repository.
3601 its descendants from the repository.
3598
3602
3599 We have to remove all revisions with linkrev >= minlink, because
3603 We have to remove all revisions with linkrev >= minlink, because
3600 the equivalent changelog revisions will be renumbered after the
3604 the equivalent changelog revisions will be renumbered after the
3601 strip.
3605 strip.
3602
3606
3603 So we truncate the revlog on the first of these revisions, and
3607 So we truncate the revlog on the first of these revisions, and
3604 trust that the caller has saved the revisions that shouldn't be
3608 trust that the caller has saved the revisions that shouldn't be
3605 removed and that it'll re-add them after this truncation.
3609 removed and that it'll re-add them after this truncation.
3606 """
3610 """
3607 if len(self) == 0:
3611 if len(self) == 0:
3608 return
3612 return
3609
3613
3610 rev, _ = self.getstrippoint(minlink)
3614 rev, _ = self.getstrippoint(minlink)
3611 if rev == len(self):
3615 if rev == len(self):
3612 return
3616 return
3613
3617
3614 # first truncate the files on disk
3618 # first truncate the files on disk
3615 data_end = self.start(rev)
3619 data_end = self.start(rev)
3616 if not self._inline:
3620 if not self._inline:
3617 transaction.add(self._datafile, data_end)
3621 transaction.add(self._datafile, data_end)
3618 end = rev * self.index.entry_size
3622 end = rev * self.index.entry_size
3619 else:
3623 else:
3620 end = data_end + (rev * self.index.entry_size)
3624 end = data_end + (rev * self.index.entry_size)
3621
3625
3622 if self._sidedatafile:
3626 if self._sidedatafile:
3623 sidedata_end = self.sidedata_cut_off(rev)
3627 sidedata_end = self.sidedata_cut_off(rev)
3624 transaction.add(self._sidedatafile, sidedata_end)
3628 transaction.add(self._sidedatafile, sidedata_end)
3625
3629
3626 transaction.add(self._indexfile, end)
3630 transaction.add(self._indexfile, end)
3627 if self._docket is not None:
3631 if self._docket is not None:
3628 # XXX we could, leverage the docket while stripping. However it is
3632 # XXX we could, leverage the docket while stripping. However it is
3629 # not powerfull enough at the time of this comment
3633 # not powerfull enough at the time of this comment
3630 self._docket.index_end = end
3634 self._docket.index_end = end
3631 self._docket.data_end = data_end
3635 self._docket.data_end = data_end
3632 self._docket.sidedata_end = sidedata_end
3636 self._docket.sidedata_end = sidedata_end
3633 self._docket.write(transaction, stripping=True)
3637 self._docket.write(transaction, stripping=True)
3634
3638
3635 # then reset internal state in memory to forget those revisions
3639 # then reset internal state in memory to forget those revisions
3636 self._chaininfocache = util.lrucachedict(500)
3640 self._chaininfocache = util.lrucachedict(500)
3637 self._inner.clear_cache()
3641 self._inner.clear_cache()
3638
3642
3639 del self.index[rev:-1]
3643 del self.index[rev:-1]
3640
3644
3641 def checksize(self):
3645 def checksize(self):
3642 """Check size of index and data files
3646 """Check size of index and data files
3643
3647
3644 return a (dd, di) tuple.
3648 return a (dd, di) tuple.
3645 - dd: extra bytes for the "data" file
3649 - dd: extra bytes for the "data" file
3646 - di: extra bytes for the "index" file
3650 - di: extra bytes for the "index" file
3647
3651
3648 A healthy revlog will return (0, 0).
3652 A healthy revlog will return (0, 0).
3649 """
3653 """
3650 expected = 0
3654 expected = 0
3651 if len(self):
3655 if len(self):
3652 expected = max(0, self.end(len(self) - 1))
3656 expected = max(0, self.end(len(self) - 1))
3653
3657
3654 try:
3658 try:
3655 with self._datafp() as f:
3659 with self._datafp() as f:
3656 f.seek(0, io.SEEK_END)
3660 f.seek(0, io.SEEK_END)
3657 actual = f.tell()
3661 actual = f.tell()
3658 dd = actual - expected
3662 dd = actual - expected
3659 except FileNotFoundError:
3663 except FileNotFoundError:
3660 dd = 0
3664 dd = 0
3661
3665
3662 try:
3666 try:
3663 f = self.opener(self._indexfile)
3667 f = self.opener(self._indexfile)
3664 f.seek(0, io.SEEK_END)
3668 f.seek(0, io.SEEK_END)
3665 actual = f.tell()
3669 actual = f.tell()
3666 f.close()
3670 f.close()
3667 s = self.index.entry_size
3671 s = self.index.entry_size
3668 i = max(0, actual // s)
3672 i = max(0, actual // s)
3669 di = actual - (i * s)
3673 di = actual - (i * s)
3670 if self._inline:
3674 if self._inline:
3671 databytes = 0
3675 databytes = 0
3672 for r in self:
3676 for r in self:
3673 databytes += max(0, self.length(r))
3677 databytes += max(0, self.length(r))
3674 dd = 0
3678 dd = 0
3675 di = actual - len(self) * s - databytes
3679 di = actual - len(self) * s - databytes
3676 except FileNotFoundError:
3680 except FileNotFoundError:
3677 di = 0
3681 di = 0
3678
3682
3679 return (dd, di)
3683 return (dd, di)
3680
3684
3681 def files(self):
3685 def files(self):
3682 """return list of files that compose this revlog"""
3686 """return list of files that compose this revlog"""
3683 res = [self._indexfile]
3687 res = [self._indexfile]
3684 if self._docket_file is None:
3688 if self._docket_file is None:
3685 if not self._inline:
3689 if not self._inline:
3686 res.append(self._datafile)
3690 res.append(self._datafile)
3687 else:
3691 else:
3688 res.append(self._docket_file)
3692 res.append(self._docket_file)
3689 res.extend(self._docket.old_index_filepaths(include_empty=False))
3693 res.extend(self._docket.old_index_filepaths(include_empty=False))
3690 if self._docket.data_end:
3694 if self._docket.data_end:
3691 res.append(self._datafile)
3695 res.append(self._datafile)
3692 res.extend(self._docket.old_data_filepaths(include_empty=False))
3696 res.extend(self._docket.old_data_filepaths(include_empty=False))
3693 if self._docket.sidedata_end:
3697 if self._docket.sidedata_end:
3694 res.append(self._sidedatafile)
3698 res.append(self._sidedatafile)
3695 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3699 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3696 return res
3700 return res
3697
3701
3698 def emitrevisions(
3702 def emitrevisions(
3699 self,
3703 self,
3700 nodes,
3704 nodes,
3701 nodesorder=None,
3705 nodesorder=None,
3702 revisiondata=False,
3706 revisiondata=False,
3703 assumehaveparentrevisions=False,
3707 assumehaveparentrevisions=False,
3704 deltamode=repository.CG_DELTAMODE_STD,
3708 deltamode=repository.CG_DELTAMODE_STD,
3705 sidedata_helpers=None,
3709 sidedata_helpers=None,
3706 debug_info=None,
3710 debug_info=None,
3707 ):
3711 ):
3708 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3712 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3709 raise error.ProgrammingError(
3713 raise error.ProgrammingError(
3710 b'unhandled value for nodesorder: %s' % nodesorder
3714 b'unhandled value for nodesorder: %s' % nodesorder
3711 )
3715 )
3712
3716
3713 if nodesorder is None and not self.delta_config.general_delta:
3717 if nodesorder is None and not self.delta_config.general_delta:
3714 nodesorder = b'storage'
3718 nodesorder = b'storage'
3715
3719
3716 if (
3720 if (
3717 not self._storedeltachains
3721 not self._storedeltachains
3718 and deltamode != repository.CG_DELTAMODE_PREV
3722 and deltamode != repository.CG_DELTAMODE_PREV
3719 ):
3723 ):
3720 deltamode = repository.CG_DELTAMODE_FULL
3724 deltamode = repository.CG_DELTAMODE_FULL
3721
3725
3722 return storageutil.emitrevisions(
3726 return storageutil.emitrevisions(
3723 self,
3727 self,
3724 nodes,
3728 nodes,
3725 nodesorder,
3729 nodesorder,
3726 revlogrevisiondelta,
3730 revlogrevisiondelta,
3727 deltaparentfn=self.deltaparent,
3731 deltaparentfn=self.deltaparent,
3728 candeltafn=self._candelta,
3732 candeltafn=self._candelta,
3729 rawsizefn=self.rawsize,
3733 rawsizefn=self.rawsize,
3730 revdifffn=self.revdiff,
3734 revdifffn=self.revdiff,
3731 flagsfn=self.flags,
3735 flagsfn=self.flags,
3732 deltamode=deltamode,
3736 deltamode=deltamode,
3733 revisiondata=revisiondata,
3737 revisiondata=revisiondata,
3734 assumehaveparentrevisions=assumehaveparentrevisions,
3738 assumehaveparentrevisions=assumehaveparentrevisions,
3735 sidedata_helpers=sidedata_helpers,
3739 sidedata_helpers=sidedata_helpers,
3736 debug_info=debug_info,
3740 debug_info=debug_info,
3737 )
3741 )
3738
3742
3739 DELTAREUSEALWAYS = b'always'
3743 DELTAREUSEALWAYS = b'always'
3740 DELTAREUSESAMEREVS = b'samerevs'
3744 DELTAREUSESAMEREVS = b'samerevs'
3741 DELTAREUSENEVER = b'never'
3745 DELTAREUSENEVER = b'never'
3742
3746
3743 DELTAREUSEFULLADD = b'fulladd'
3747 DELTAREUSEFULLADD = b'fulladd'
3744
3748
3745 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3749 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3746
3750
3747 def clone(
3751 def clone(
3748 self,
3752 self,
3749 tr,
3753 tr,
3750 destrevlog,
3754 destrevlog,
3751 addrevisioncb=None,
3755 addrevisioncb=None,
3752 deltareuse=DELTAREUSESAMEREVS,
3756 deltareuse=DELTAREUSESAMEREVS,
3753 forcedeltabothparents=None,
3757 forcedeltabothparents=None,
3754 sidedata_helpers=None,
3758 sidedata_helpers=None,
3755 ):
3759 ):
3756 """Copy this revlog to another, possibly with format changes.
3760 """Copy this revlog to another, possibly with format changes.
3757
3761
3758 The destination revlog will contain the same revisions and nodes.
3762 The destination revlog will contain the same revisions and nodes.
3759 However, it may not be bit-for-bit identical due to e.g. delta encoding
3763 However, it may not be bit-for-bit identical due to e.g. delta encoding
3760 differences.
3764 differences.
3761
3765
3762 The ``deltareuse`` argument control how deltas from the existing revlog
3766 The ``deltareuse`` argument control how deltas from the existing revlog
3763 are preserved in the destination revlog. The argument can have the
3767 are preserved in the destination revlog. The argument can have the
3764 following values:
3768 following values:
3765
3769
3766 DELTAREUSEALWAYS
3770 DELTAREUSEALWAYS
3767 Deltas will always be reused (if possible), even if the destination
3771 Deltas will always be reused (if possible), even if the destination
3768 revlog would not select the same revisions for the delta. This is the
3772 revlog would not select the same revisions for the delta. This is the
3769 fastest mode of operation.
3773 fastest mode of operation.
3770 DELTAREUSESAMEREVS
3774 DELTAREUSESAMEREVS
3771 Deltas will be reused if the destination revlog would pick the same
3775 Deltas will be reused if the destination revlog would pick the same
3772 revisions for the delta. This mode strikes a balance between speed
3776 revisions for the delta. This mode strikes a balance between speed
3773 and optimization.
3777 and optimization.
3774 DELTAREUSENEVER
3778 DELTAREUSENEVER
3775 Deltas will never be reused. This is the slowest mode of execution.
3779 Deltas will never be reused. This is the slowest mode of execution.
3776 This mode can be used to recompute deltas (e.g. if the diff/delta
3780 This mode can be used to recompute deltas (e.g. if the diff/delta
3777 algorithm changes).
3781 algorithm changes).
3778 DELTAREUSEFULLADD
3782 DELTAREUSEFULLADD
3779 Revision will be re-added as if their were new content. This is
3783 Revision will be re-added as if their were new content. This is
3780 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3784 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3781 eg: large file detection and handling.
3785 eg: large file detection and handling.
3782
3786
3783 Delta computation can be slow, so the choice of delta reuse policy can
3787 Delta computation can be slow, so the choice of delta reuse policy can
3784 significantly affect run time.
3788 significantly affect run time.
3785
3789
3786 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3790 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3787 two extremes. Deltas will be reused if they are appropriate. But if the
3791 two extremes. Deltas will be reused if they are appropriate. But if the
3788 delta could choose a better revision, it will do so. This means if you
3792 delta could choose a better revision, it will do so. This means if you
3789 are converting a non-generaldelta revlog to a generaldelta revlog,
3793 are converting a non-generaldelta revlog to a generaldelta revlog,
3790 deltas will be recomputed if the delta's parent isn't a parent of the
3794 deltas will be recomputed if the delta's parent isn't a parent of the
3791 revision.
3795 revision.
3792
3796
3793 In addition to the delta policy, the ``forcedeltabothparents``
3797 In addition to the delta policy, the ``forcedeltabothparents``
3794 argument controls whether to force compute deltas against both parents
3798 argument controls whether to force compute deltas against both parents
3795 for merges. By default, the current default is used.
3799 for merges. By default, the current default is used.
3796
3800
3797 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3801 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3798 `sidedata_helpers`.
3802 `sidedata_helpers`.
3799 """
3803 """
3800 if deltareuse not in self.DELTAREUSEALL:
3804 if deltareuse not in self.DELTAREUSEALL:
3801 raise ValueError(
3805 raise ValueError(
3802 _(b'value for deltareuse invalid: %s') % deltareuse
3806 _(b'value for deltareuse invalid: %s') % deltareuse
3803 )
3807 )
3804
3808
3805 if len(destrevlog):
3809 if len(destrevlog):
3806 raise ValueError(_(b'destination revlog is not empty'))
3810 raise ValueError(_(b'destination revlog is not empty'))
3807
3811
3808 if getattr(self, 'filteredrevs', None):
3812 if getattr(self, 'filteredrevs', None):
3809 raise ValueError(_(b'source revlog has filtered revisions'))
3813 raise ValueError(_(b'source revlog has filtered revisions'))
3810 if getattr(destrevlog, 'filteredrevs', None):
3814 if getattr(destrevlog, 'filteredrevs', None):
3811 raise ValueError(_(b'destination revlog has filtered revisions'))
3815 raise ValueError(_(b'destination revlog has filtered revisions'))
3812
3816
3813 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3817 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3814 # if possible.
3818 # if possible.
3815 old_delta_config = destrevlog.delta_config
3819 old_delta_config = destrevlog.delta_config
3816 destrevlog.delta_config = destrevlog.delta_config.copy()
3820 destrevlog.delta_config = destrevlog.delta_config.copy()
3817
3821
3818 try:
3822 try:
3819 if deltareuse == self.DELTAREUSEALWAYS:
3823 if deltareuse == self.DELTAREUSEALWAYS:
3820 destrevlog.delta_config.lazy_delta_base = True
3824 destrevlog.delta_config.lazy_delta_base = True
3821 destrevlog.delta_config.lazy_delta = True
3825 destrevlog.delta_config.lazy_delta = True
3822 elif deltareuse == self.DELTAREUSESAMEREVS:
3826 elif deltareuse == self.DELTAREUSESAMEREVS:
3823 destrevlog.delta_config.lazy_delta_base = False
3827 destrevlog.delta_config.lazy_delta_base = False
3824 destrevlog.delta_config.lazy_delta = True
3828 destrevlog.delta_config.lazy_delta = True
3825 elif deltareuse == self.DELTAREUSENEVER:
3829 elif deltareuse == self.DELTAREUSENEVER:
3826 destrevlog.delta_config.lazy_delta_base = False
3830 destrevlog.delta_config.lazy_delta_base = False
3827 destrevlog.delta_config.lazy_delta = False
3831 destrevlog.delta_config.lazy_delta = False
3828
3832
3829 delta_both_parents = (
3833 delta_both_parents = (
3830 forcedeltabothparents or old_delta_config.delta_both_parents
3834 forcedeltabothparents or old_delta_config.delta_both_parents
3831 )
3835 )
3832 destrevlog.delta_config.delta_both_parents = delta_both_parents
3836 destrevlog.delta_config.delta_both_parents = delta_both_parents
3833
3837
3834 with self.reading(), destrevlog._writing(tr):
3838 with self.reading(), destrevlog._writing(tr):
3835 self._clone(
3839 self._clone(
3836 tr,
3840 tr,
3837 destrevlog,
3841 destrevlog,
3838 addrevisioncb,
3842 addrevisioncb,
3839 deltareuse,
3843 deltareuse,
3840 forcedeltabothparents,
3844 forcedeltabothparents,
3841 sidedata_helpers,
3845 sidedata_helpers,
3842 )
3846 )
3843
3847
3844 finally:
3848 finally:
3845 destrevlog.delta_config = old_delta_config
3849 destrevlog.delta_config = old_delta_config
3846
3850
3847 def _clone(
3851 def _clone(
3848 self,
3852 self,
3849 tr,
3853 tr,
3850 destrevlog,
3854 destrevlog,
3851 addrevisioncb,
3855 addrevisioncb,
3852 deltareuse,
3856 deltareuse,
3853 forcedeltabothparents,
3857 forcedeltabothparents,
3854 sidedata_helpers,
3858 sidedata_helpers,
3855 ):
3859 ):
3856 """perform the core duty of `revlog.clone` after parameter processing"""
3860 """perform the core duty of `revlog.clone` after parameter processing"""
3857 write_debug = None
3861 write_debug = None
3858 if self.delta_config.debug_delta:
3862 if self.delta_config.debug_delta:
3859 write_debug = tr._report
3863 write_debug = tr._report
3860 deltacomputer = deltautil.deltacomputer(
3864 deltacomputer = deltautil.deltacomputer(
3861 destrevlog,
3865 destrevlog,
3862 write_debug=write_debug,
3866 write_debug=write_debug,
3863 )
3867 )
3864 index = self.index
3868 index = self.index
3865 for rev in self:
3869 for rev in self:
3866 entry = index[rev]
3870 entry = index[rev]
3867
3871
3868 # Some classes override linkrev to take filtered revs into
3872 # Some classes override linkrev to take filtered revs into
3869 # account. Use raw entry from index.
3873 # account. Use raw entry from index.
3870 flags = entry[0] & 0xFFFF
3874 flags = entry[0] & 0xFFFF
3871 linkrev = entry[4]
3875 linkrev = entry[4]
3872 p1 = index[entry[5]][7]
3876 p1 = index[entry[5]][7]
3873 p2 = index[entry[6]][7]
3877 p2 = index[entry[6]][7]
3874 node = entry[7]
3878 node = entry[7]
3875
3879
3876 # (Possibly) reuse the delta from the revlog if allowed and
3880 # (Possibly) reuse the delta from the revlog if allowed and
3877 # the revlog chunk is a delta.
3881 # the revlog chunk is a delta.
3878 cachedelta = None
3882 cachedelta = None
3879 rawtext = None
3883 rawtext = None
3880 if deltareuse == self.DELTAREUSEFULLADD:
3884 if deltareuse == self.DELTAREUSEFULLADD:
3881 text = self._revisiondata(rev)
3885 text = self._revisiondata(rev)
3882 sidedata = self.sidedata(rev)
3886 sidedata = self.sidedata(rev)
3883
3887
3884 if sidedata_helpers is not None:
3888 if sidedata_helpers is not None:
3885 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3889 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3886 self, sidedata_helpers, sidedata, rev
3890 self, sidedata_helpers, sidedata, rev
3887 )
3891 )
3888 flags = flags | new_flags[0] & ~new_flags[1]
3892 flags = flags | new_flags[0] & ~new_flags[1]
3889
3893
3890 destrevlog.addrevision(
3894 destrevlog.addrevision(
3891 text,
3895 text,
3892 tr,
3896 tr,
3893 linkrev,
3897 linkrev,
3894 p1,
3898 p1,
3895 p2,
3899 p2,
3896 cachedelta=cachedelta,
3900 cachedelta=cachedelta,
3897 node=node,
3901 node=node,
3898 flags=flags,
3902 flags=flags,
3899 deltacomputer=deltacomputer,
3903 deltacomputer=deltacomputer,
3900 sidedata=sidedata,
3904 sidedata=sidedata,
3901 )
3905 )
3902 else:
3906 else:
3903 if destrevlog.delta_config.lazy_delta:
3907 if destrevlog.delta_config.lazy_delta:
3904 dp = self.deltaparent(rev)
3908 dp = self.deltaparent(rev)
3905 if dp != nullrev:
3909 if dp != nullrev:
3906 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3910 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3907
3911
3908 sidedata = None
3912 sidedata = None
3909 if not cachedelta:
3913 if not cachedelta:
3910 try:
3914 try:
3911 rawtext = self._revisiondata(rev)
3915 rawtext = self._revisiondata(rev)
3912 except error.CensoredNodeError as censored:
3916 except error.CensoredNodeError as censored:
3913 assert flags & REVIDX_ISCENSORED
3917 assert flags & REVIDX_ISCENSORED
3914 rawtext = censored.tombstone
3918 rawtext = censored.tombstone
3915 sidedata = self.sidedata(rev)
3919 sidedata = self.sidedata(rev)
3916 if sidedata is None:
3920 if sidedata is None:
3917 sidedata = self.sidedata(rev)
3921 sidedata = self.sidedata(rev)
3918
3922
3919 if sidedata_helpers is not None:
3923 if sidedata_helpers is not None:
3920 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3924 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3921 self, sidedata_helpers, sidedata, rev
3925 self, sidedata_helpers, sidedata, rev
3922 )
3926 )
3923 flags = flags | new_flags[0] & ~new_flags[1]
3927 flags = flags | new_flags[0] & ~new_flags[1]
3924
3928
3925 destrevlog._addrevision(
3929 destrevlog._addrevision(
3926 node,
3930 node,
3927 rawtext,
3931 rawtext,
3928 tr,
3932 tr,
3929 linkrev,
3933 linkrev,
3930 p1,
3934 p1,
3931 p2,
3935 p2,
3932 flags,
3936 flags,
3933 cachedelta,
3937 cachedelta,
3934 deltacomputer=deltacomputer,
3938 deltacomputer=deltacomputer,
3935 sidedata=sidedata,
3939 sidedata=sidedata,
3936 )
3940 )
3937
3941
3938 if addrevisioncb:
3942 if addrevisioncb:
3939 addrevisioncb(self, rev, node)
3943 addrevisioncb(self, rev, node)
3940
3944
3941 def censorrevision(self, tr, censornode, tombstone=b''):
3945 def censorrevision(self, tr, censornode, tombstone=b''):
3942 if self._format_version == REVLOGV0:
3946 if self._format_version == REVLOGV0:
3943 raise error.RevlogError(
3947 raise error.RevlogError(
3944 _(b'cannot censor with version %d revlogs')
3948 _(b'cannot censor with version %d revlogs')
3945 % self._format_version
3949 % self._format_version
3946 )
3950 )
3947 elif self._format_version == REVLOGV1:
3951 elif self._format_version == REVLOGV1:
3948 rewrite.v1_censor(self, tr, censornode, tombstone)
3952 rewrite.v1_censor(self, tr, censornode, tombstone)
3949 else:
3953 else:
3950 rewrite.v2_censor(self, tr, censornode, tombstone)
3954 rewrite.v2_censor(self, tr, censornode, tombstone)
3951
3955
3952 def verifyintegrity(self, state):
3956 def verifyintegrity(self, state):
3953 """Verifies the integrity of the revlog.
3957 """Verifies the integrity of the revlog.
3954
3958
3955 Yields ``revlogproblem`` instances describing problems that are
3959 Yields ``revlogproblem`` instances describing problems that are
3956 found.
3960 found.
3957 """
3961 """
3958 dd, di = self.checksize()
3962 dd, di = self.checksize()
3959 if dd:
3963 if dd:
3960 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3964 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3961 if di:
3965 if di:
3962 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3966 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3963
3967
3964 version = self._format_version
3968 version = self._format_version
3965
3969
3966 # The verifier tells us what version revlog we should be.
3970 # The verifier tells us what version revlog we should be.
3967 if version != state[b'expectedversion']:
3971 if version != state[b'expectedversion']:
3968 yield revlogproblem(
3972 yield revlogproblem(
3969 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3973 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3970 % (self.display_id, version, state[b'expectedversion'])
3974 % (self.display_id, version, state[b'expectedversion'])
3971 )
3975 )
3972
3976
3973 state[b'skipread'] = set()
3977 state[b'skipread'] = set()
3974 state[b'safe_renamed'] = set()
3978 state[b'safe_renamed'] = set()
3975
3979
3976 for rev in self:
3980 for rev in self:
3977 node = self.node(rev)
3981 node = self.node(rev)
3978
3982
3979 # Verify contents. 4 cases to care about:
3983 # Verify contents. 4 cases to care about:
3980 #
3984 #
3981 # common: the most common case
3985 # common: the most common case
3982 # rename: with a rename
3986 # rename: with a rename
3983 # meta: file content starts with b'\1\n', the metadata
3987 # meta: file content starts with b'\1\n', the metadata
3984 # header defined in filelog.py, but without a rename
3988 # header defined in filelog.py, but without a rename
3985 # ext: content stored externally
3989 # ext: content stored externally
3986 #
3990 #
3987 # More formally, their differences are shown below:
3991 # More formally, their differences are shown below:
3988 #
3992 #
3989 # | common | rename | meta | ext
3993 # | common | rename | meta | ext
3990 # -------------------------------------------------------
3994 # -------------------------------------------------------
3991 # flags() | 0 | 0 | 0 | not 0
3995 # flags() | 0 | 0 | 0 | not 0
3992 # renamed() | False | True | False | ?
3996 # renamed() | False | True | False | ?
3993 # rawtext[0:2]=='\1\n'| False | True | True | ?
3997 # rawtext[0:2]=='\1\n'| False | True | True | ?
3994 #
3998 #
3995 # "rawtext" means the raw text stored in revlog data, which
3999 # "rawtext" means the raw text stored in revlog data, which
3996 # could be retrieved by "rawdata(rev)". "text"
4000 # could be retrieved by "rawdata(rev)". "text"
3997 # mentioned below is "revision(rev)".
4001 # mentioned below is "revision(rev)".
3998 #
4002 #
3999 # There are 3 different lengths stored physically:
4003 # There are 3 different lengths stored physically:
4000 # 1. L1: rawsize, stored in revlog index
4004 # 1. L1: rawsize, stored in revlog index
4001 # 2. L2: len(rawtext), stored in revlog data
4005 # 2. L2: len(rawtext), stored in revlog data
4002 # 3. L3: len(text), stored in revlog data if flags==0, or
4006 # 3. L3: len(text), stored in revlog data if flags==0, or
4003 # possibly somewhere else if flags!=0
4007 # possibly somewhere else if flags!=0
4004 #
4008 #
4005 # L1 should be equal to L2. L3 could be different from them.
4009 # L1 should be equal to L2. L3 could be different from them.
4006 # "text" may or may not affect commit hash depending on flag
4010 # "text" may or may not affect commit hash depending on flag
4007 # processors (see flagutil.addflagprocessor).
4011 # processors (see flagutil.addflagprocessor).
4008 #
4012 #
4009 # | common | rename | meta | ext
4013 # | common | rename | meta | ext
4010 # -------------------------------------------------
4014 # -------------------------------------------------
4011 # rawsize() | L1 | L1 | L1 | L1
4015 # rawsize() | L1 | L1 | L1 | L1
4012 # size() | L1 | L2-LM | L1(*) | L1 (?)
4016 # size() | L1 | L2-LM | L1(*) | L1 (?)
4013 # len(rawtext) | L2 | L2 | L2 | L2
4017 # len(rawtext) | L2 | L2 | L2 | L2
4014 # len(text) | L2 | L2 | L2 | L3
4018 # len(text) | L2 | L2 | L2 | L3
4015 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
4019 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
4016 #
4020 #
4017 # LM: length of metadata, depending on rawtext
4021 # LM: length of metadata, depending on rawtext
4018 # (*): not ideal, see comment in filelog.size
4022 # (*): not ideal, see comment in filelog.size
4019 # (?): could be "- len(meta)" if the resolved content has
4023 # (?): could be "- len(meta)" if the resolved content has
4020 # rename metadata
4024 # rename metadata
4021 #
4025 #
4022 # Checks needed to be done:
4026 # Checks needed to be done:
4023 # 1. length check: L1 == L2, in all cases.
4027 # 1. length check: L1 == L2, in all cases.
4024 # 2. hash check: depending on flag processor, we may need to
4028 # 2. hash check: depending on flag processor, we may need to
4025 # use either "text" (external), or "rawtext" (in revlog).
4029 # use either "text" (external), or "rawtext" (in revlog).
4026
4030
4027 try:
4031 try:
4028 skipflags = state.get(b'skipflags', 0)
4032 skipflags = state.get(b'skipflags', 0)
4029 if skipflags:
4033 if skipflags:
4030 skipflags &= self.flags(rev)
4034 skipflags &= self.flags(rev)
4031
4035
4032 _verify_revision(self, skipflags, state, node)
4036 _verify_revision(self, skipflags, state, node)
4033
4037
4034 l1 = self.rawsize(rev)
4038 l1 = self.rawsize(rev)
4035 l2 = len(self.rawdata(node))
4039 l2 = len(self.rawdata(node))
4036
4040
4037 if l1 != l2:
4041 if l1 != l2:
4038 yield revlogproblem(
4042 yield revlogproblem(
4039 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
4043 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
4040 node=node,
4044 node=node,
4041 )
4045 )
4042
4046
4043 except error.CensoredNodeError:
4047 except error.CensoredNodeError:
4044 if state[b'erroroncensored']:
4048 if state[b'erroroncensored']:
4045 yield revlogproblem(
4049 yield revlogproblem(
4046 error=_(b'censored file data'), node=node
4050 error=_(b'censored file data'), node=node
4047 )
4051 )
4048 state[b'skipread'].add(node)
4052 state[b'skipread'].add(node)
4049 except Exception as e:
4053 except Exception as e:
4050 yield revlogproblem(
4054 yield revlogproblem(
4051 error=_(b'unpacking %s: %s')
4055 error=_(b'unpacking %s: %s')
4052 % (short(node), stringutil.forcebytestr(e)),
4056 % (short(node), stringutil.forcebytestr(e)),
4053 node=node,
4057 node=node,
4054 )
4058 )
4055 state[b'skipread'].add(node)
4059 state[b'skipread'].add(node)
4056
4060
4057 def storageinfo(
4061 def storageinfo(
4058 self,
4062 self,
4059 exclusivefiles=False,
4063 exclusivefiles=False,
4060 sharedfiles=False,
4064 sharedfiles=False,
4061 revisionscount=False,
4065 revisionscount=False,
4062 trackedsize=False,
4066 trackedsize=False,
4063 storedsize=False,
4067 storedsize=False,
4064 ):
4068 ):
4065 d = {}
4069 d = {}
4066
4070
4067 if exclusivefiles:
4071 if exclusivefiles:
4068 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4072 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4069 if not self._inline:
4073 if not self._inline:
4070 d[b'exclusivefiles'].append((self.opener, self._datafile))
4074 d[b'exclusivefiles'].append((self.opener, self._datafile))
4071
4075
4072 if sharedfiles:
4076 if sharedfiles:
4073 d[b'sharedfiles'] = []
4077 d[b'sharedfiles'] = []
4074
4078
4075 if revisionscount:
4079 if revisionscount:
4076 d[b'revisionscount'] = len(self)
4080 d[b'revisionscount'] = len(self)
4077
4081
4078 if trackedsize:
4082 if trackedsize:
4079 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4083 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4080
4084
4081 if storedsize:
4085 if storedsize:
4082 d[b'storedsize'] = sum(
4086 d[b'storedsize'] = sum(
4083 self.opener.stat(path).st_size for path in self.files()
4087 self.opener.stat(path).st_size for path in self.files()
4084 )
4088 )
4085
4089
4086 return d
4090 return d
4087
4091
4088 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4092 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4089 if not self.feature_config.has_side_data:
4093 if not self.feature_config.has_side_data:
4090 return
4094 return
4091 # revlog formats with sidedata support does not support inline
4095 # revlog formats with sidedata support does not support inline
4092 assert not self._inline
4096 assert not self._inline
4093 if not helpers[1] and not helpers[2]:
4097 if not helpers[1] and not helpers[2]:
4094 # Nothing to generate or remove
4098 # Nothing to generate or remove
4095 return
4099 return
4096
4100
4097 new_entries = []
4101 new_entries = []
4098 # append the new sidedata
4102 # append the new sidedata
4099 with self._writing(transaction):
4103 with self._writing(transaction):
4100 ifh, dfh, sdfh = self._inner._writinghandles
4104 ifh, dfh, sdfh = self._inner._writinghandles
4101 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4105 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4102
4106
4103 current_offset = sdfh.tell()
4107 current_offset = sdfh.tell()
4104 for rev in range(startrev, endrev + 1):
4108 for rev in range(startrev, endrev + 1):
4105 entry = self.index[rev]
4109 entry = self.index[rev]
4106 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4110 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4107 store=self,
4111 store=self,
4108 sidedata_helpers=helpers,
4112 sidedata_helpers=helpers,
4109 sidedata={},
4113 sidedata={},
4110 rev=rev,
4114 rev=rev,
4111 )
4115 )
4112
4116
4113 serialized_sidedata = sidedatautil.serialize_sidedata(
4117 serialized_sidedata = sidedatautil.serialize_sidedata(
4114 new_sidedata
4118 new_sidedata
4115 )
4119 )
4116
4120
4117 sidedata_compression_mode = COMP_MODE_INLINE
4121 sidedata_compression_mode = COMP_MODE_INLINE
4118 if serialized_sidedata and self.feature_config.has_side_data:
4122 if serialized_sidedata and self.feature_config.has_side_data:
4119 sidedata_compression_mode = COMP_MODE_PLAIN
4123 sidedata_compression_mode = COMP_MODE_PLAIN
4120 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4124 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4121 if (
4125 if (
4122 h != b'u'
4126 h != b'u'
4123 and comp_sidedata[0] != b'\0'
4127 and comp_sidedata[0] != b'\0'
4124 and len(comp_sidedata) < len(serialized_sidedata)
4128 and len(comp_sidedata) < len(serialized_sidedata)
4125 ):
4129 ):
4126 assert not h
4130 assert not h
4127 if (
4131 if (
4128 comp_sidedata[0]
4132 comp_sidedata[0]
4129 == self._docket.default_compression_header
4133 == self._docket.default_compression_header
4130 ):
4134 ):
4131 sidedata_compression_mode = COMP_MODE_DEFAULT
4135 sidedata_compression_mode = COMP_MODE_DEFAULT
4132 serialized_sidedata = comp_sidedata
4136 serialized_sidedata = comp_sidedata
4133 else:
4137 else:
4134 sidedata_compression_mode = COMP_MODE_INLINE
4138 sidedata_compression_mode = COMP_MODE_INLINE
4135 serialized_sidedata = comp_sidedata
4139 serialized_sidedata = comp_sidedata
4136 if entry[8] != 0 or entry[9] != 0:
4140 if entry[8] != 0 or entry[9] != 0:
4137 # rewriting entries that already have sidedata is not
4141 # rewriting entries that already have sidedata is not
4138 # supported yet, because it introduces garbage data in the
4142 # supported yet, because it introduces garbage data in the
4139 # revlog.
4143 # revlog.
4140 msg = b"rewriting existing sidedata is not supported yet"
4144 msg = b"rewriting existing sidedata is not supported yet"
4141 raise error.Abort(msg)
4145 raise error.Abort(msg)
4142
4146
4143 # Apply (potential) flags to add and to remove after running
4147 # Apply (potential) flags to add and to remove after running
4144 # the sidedata helpers
4148 # the sidedata helpers
4145 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4149 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4146 entry_update = (
4150 entry_update = (
4147 current_offset,
4151 current_offset,
4148 len(serialized_sidedata),
4152 len(serialized_sidedata),
4149 new_offset_flags,
4153 new_offset_flags,
4150 sidedata_compression_mode,
4154 sidedata_compression_mode,
4151 )
4155 )
4152
4156
4153 # the sidedata computation might have move the file cursors around
4157 # the sidedata computation might have move the file cursors around
4154 sdfh.seek(current_offset, os.SEEK_SET)
4158 sdfh.seek(current_offset, os.SEEK_SET)
4155 sdfh.write(serialized_sidedata)
4159 sdfh.write(serialized_sidedata)
4156 new_entries.append(entry_update)
4160 new_entries.append(entry_update)
4157 current_offset += len(serialized_sidedata)
4161 current_offset += len(serialized_sidedata)
4158 self._docket.sidedata_end = sdfh.tell()
4162 self._docket.sidedata_end = sdfh.tell()
4159
4163
4160 # rewrite the new index entries
4164 # rewrite the new index entries
4161 ifh.seek(startrev * self.index.entry_size)
4165 ifh.seek(startrev * self.index.entry_size)
4162 for i, e in enumerate(new_entries):
4166 for i, e in enumerate(new_entries):
4163 rev = startrev + i
4167 rev = startrev + i
4164 self.index.replace_sidedata_info(rev, *e)
4168 self.index.replace_sidedata_info(rev, *e)
4165 packed = self.index.entry_binary(rev)
4169 packed = self.index.entry_binary(rev)
4166 if rev == 0 and self._docket is None:
4170 if rev == 0 and self._docket is None:
4167 header = self._format_flags | self._format_version
4171 header = self._format_flags | self._format_version
4168 header = self.index.pack_header(header)
4172 header = self.index.pack_header(header)
4169 packed = header + packed
4173 packed = header + packed
4170 ifh.write(packed)
4174 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now