##// END OF EJS Templates
revlog: move the `deltachain` method on the inner object...
marmoute -
r51988:30f458fc default
parent child Browse files
Show More
@@ -1,3969 +1,3971 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of inline data embedded into a revlog
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class _Config:
244 class _Config:
245 def copy(self):
245 def copy(self):
246 return self.__class__(**self.__dict__)
246 return self.__class__(**self.__dict__)
247
247
248
248
249 @attr.s()
249 @attr.s()
250 class FeatureConfig(_Config):
250 class FeatureConfig(_Config):
251 """Hold configuration values about the available revlog features"""
251 """Hold configuration values about the available revlog features"""
252
252
253 # the default compression engine
253 # the default compression engine
254 compression_engine = attr.ib(default=b'zlib')
254 compression_engine = attr.ib(default=b'zlib')
255 # compression engines options
255 # compression engines options
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257
257
258 # can we use censor on this revlog
258 # can we use censor on this revlog
259 censorable = attr.ib(default=False)
259 censorable = attr.ib(default=False)
260 # does this revlog use the "side data" feature
260 # does this revlog use the "side data" feature
261 has_side_data = attr.ib(default=False)
261 has_side_data = attr.ib(default=False)
262 # might remove rank configuration once the computation has no impact
262 # might remove rank configuration once the computation has no impact
263 compute_rank = attr.ib(default=False)
263 compute_rank = attr.ib(default=False)
264 # parent order is supposed to be semantically irrelevant, so we
264 # parent order is supposed to be semantically irrelevant, so we
265 # normally resort parents to ensure that the first parent is non-null,
265 # normally resort parents to ensure that the first parent is non-null,
266 # if there is a non-null parent at all.
266 # if there is a non-null parent at all.
267 # filelog abuses the parent order as flag to mark some instances of
267 # filelog abuses the parent order as flag to mark some instances of
268 # meta-encoded files, so allow it to disable this behavior.
268 # meta-encoded files, so allow it to disable this behavior.
269 canonical_parent_order = attr.ib(default=False)
269 canonical_parent_order = attr.ib(default=False)
270 # can ellipsis commit be used
270 # can ellipsis commit be used
271 enable_ellipsis = attr.ib(default=False)
271 enable_ellipsis = attr.ib(default=False)
272
272
273 def copy(self):
273 def copy(self):
274 new = super().copy()
274 new = super().copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
276 return new
276 return new
277
277
278
278
279 @attr.s()
279 @attr.s()
280 class DataConfig(_Config):
280 class DataConfig(_Config):
281 """Hold configuration value about how the revlog data are read"""
281 """Hold configuration value about how the revlog data are read"""
282
282
283 # should we try to open the "pending" version of the revlog
283 # should we try to open the "pending" version of the revlog
284 try_pending = attr.ib(default=False)
284 try_pending = attr.ib(default=False)
285 # should we try to open the "splitted" version of the revlog
285 # should we try to open the "splitted" version of the revlog
286 try_split = attr.ib(default=False)
286 try_split = attr.ib(default=False)
287 # When True, indexfile should be opened with checkambig=True at writing,
287 # When True, indexfile should be opened with checkambig=True at writing,
288 # to avoid file stat ambiguity.
288 # to avoid file stat ambiguity.
289 check_ambig = attr.ib(default=False)
289 check_ambig = attr.ib(default=False)
290
290
291 # If true, use mmap instead of reading to deal with large index
291 # If true, use mmap instead of reading to deal with large index
292 mmap_large_index = attr.ib(default=False)
292 mmap_large_index = attr.ib(default=False)
293 # how much data is large
293 # how much data is large
294 mmap_index_threshold = attr.ib(default=None)
294 mmap_index_threshold = attr.ib(default=None)
295 # How much data to read and cache into the raw revlog data cache.
295 # How much data to read and cache into the raw revlog data cache.
296 chunk_cache_size = attr.ib(default=65536)
296 chunk_cache_size = attr.ib(default=65536)
297
297
298 # Allow sparse reading of the revlog data
298 # Allow sparse reading of the revlog data
299 with_sparse_read = attr.ib(default=False)
299 with_sparse_read = attr.ib(default=False)
300 # minimal density of a sparse read chunk
300 # minimal density of a sparse read chunk
301 sr_density_threshold = attr.ib(default=0.50)
301 sr_density_threshold = attr.ib(default=0.50)
302 # minimal size of data we skip when performing sparse read
302 # minimal size of data we skip when performing sparse read
303 sr_min_gap_size = attr.ib(default=262144)
303 sr_min_gap_size = attr.ib(default=262144)
304
304
305 # are delta encoded against arbitrary bases.
305 # are delta encoded against arbitrary bases.
306 generaldelta = attr.ib(default=False)
306 generaldelta = attr.ib(default=False)
307
307
308
308
309 @attr.s()
309 @attr.s()
310 class DeltaConfig(_Config):
310 class DeltaConfig(_Config):
311 """Hold configuration value about how new delta are computed
311 """Hold configuration value about how new delta are computed
312
312
313 Some attributes are duplicated from DataConfig to help havign each object
313 Some attributes are duplicated from DataConfig to help havign each object
314 self contained.
314 self contained.
315 """
315 """
316
316
317 # can delta be encoded against arbitrary bases.
317 # can delta be encoded against arbitrary bases.
318 general_delta = attr.ib(default=False)
318 general_delta = attr.ib(default=False)
319 # Allow sparse writing of the revlog data
319 # Allow sparse writing of the revlog data
320 sparse_revlog = attr.ib(default=False)
320 sparse_revlog = attr.ib(default=False)
321 # maximum length of a delta chain
321 # maximum length of a delta chain
322 max_chain_len = attr.ib(default=None)
322 max_chain_len = attr.ib(default=None)
323 # Maximum distance between delta chain base start and end
323 # Maximum distance between delta chain base start and end
324 max_deltachain_span = attr.ib(default=-1)
324 max_deltachain_span = attr.ib(default=-1)
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 # compression for the data content.
326 # compression for the data content.
327 upper_bound_comp = attr.ib(default=None)
327 upper_bound_comp = attr.ib(default=None)
328 # Should we try a delta against both parent
328 # Should we try a delta against both parent
329 delta_both_parents = attr.ib(default=True)
329 delta_both_parents = attr.ib(default=True)
330 # Test delta base candidate group by chunk of this maximal size.
330 # Test delta base candidate group by chunk of this maximal size.
331 candidate_group_chunk_size = attr.ib(default=0)
331 candidate_group_chunk_size = attr.ib(default=0)
332 # Should we display debug information about delta computation
332 # Should we display debug information about delta computation
333 debug_delta = attr.ib(default=False)
333 debug_delta = attr.ib(default=False)
334 # trust incoming delta by default
334 # trust incoming delta by default
335 lazy_delta = attr.ib(default=True)
335 lazy_delta = attr.ib(default=True)
336 # trust the base of incoming delta by default
336 # trust the base of incoming delta by default
337 lazy_delta_base = attr.ib(default=False)
337 lazy_delta_base = attr.ib(default=False)
338
338
339
339
340 class _InnerRevlog:
340 class _InnerRevlog:
341 """An inner layer of the revlog object
341 """An inner layer of the revlog object
342
342
343 That layer exist to be able to delegate some operation to Rust, its
343 That layer exist to be able to delegate some operation to Rust, its
344 boundaries are arbitrary and based on what we can delegate to Rust.
344 boundaries are arbitrary and based on what we can delegate to Rust.
345 """
345 """
346
346
347 def __init__(
347 def __init__(
348 self,
348 self,
349 opener,
349 opener,
350 index,
350 index,
351 index_file,
351 index_file,
352 data_file,
352 data_file,
353 sidedata_file,
353 sidedata_file,
354 inline,
354 inline,
355 data_config,
355 data_config,
356 delta_config,
356 delta_config,
357 feature_config,
357 feature_config,
358 chunk_cache,
358 chunk_cache,
359 default_compression_header,
359 default_compression_header,
360 ):
360 ):
361 self.opener = opener
361 self.opener = opener
362 self.index = index
362 self.index = index
363
363
364 self.__index_file = index_file
364 self.__index_file = index_file
365 self.data_file = data_file
365 self.data_file = data_file
366 self.sidedata_file = sidedata_file
366 self.sidedata_file = sidedata_file
367 self.inline = inline
367 self.inline = inline
368 self.data_config = data_config
368 self.data_config = data_config
369 self.delta_config = delta_config
369 self.delta_config = delta_config
370 self.feature_config = feature_config
370 self.feature_config = feature_config
371
371
372 self._default_compression_header = default_compression_header
372 self._default_compression_header = default_compression_header
373
373
374 # index
374 # index
375
375
376 # 3-tuple of file handles being used for active writing.
376 # 3-tuple of file handles being used for active writing.
377 self._writinghandles = None
377 self._writinghandles = None
378
378
379 self._segmentfile = randomaccessfile.randomaccessfile(
379 self._segmentfile = randomaccessfile.randomaccessfile(
380 self.opener,
380 self.opener,
381 (self.index_file if self.inline else self.data_file),
381 (self.index_file if self.inline else self.data_file),
382 self.data_config.chunk_cache_size,
382 self.data_config.chunk_cache_size,
383 chunk_cache,
383 chunk_cache,
384 )
384 )
385 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
385 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
386 self.opener,
386 self.opener,
387 self.sidedata_file,
387 self.sidedata_file,
388 self.data_config.chunk_cache_size,
388 self.data_config.chunk_cache_size,
389 )
389 )
390
390
391 # revlog header -> revlog compressor
391 # revlog header -> revlog compressor
392 self._decompressors = {}
392 self._decompressors = {}
393
393
394 @property
394 @property
395 def index_file(self):
395 def index_file(self):
396 return self.__index_file
396 return self.__index_file
397
397
398 @index_file.setter
398 @index_file.setter
399 def index_file(self, new_index_file):
399 def index_file(self, new_index_file):
400 self.__index_file = new_index_file
400 self.__index_file = new_index_file
401 if self.inline:
401 if self.inline:
402 self._segmentfile.filename = new_index_file
402 self._segmentfile.filename = new_index_file
403
403
404 def __len__(self):
404 def __len__(self):
405 return len(self.index)
405 return len(self.index)
406
406
407 # Derived from index values.
407 # Derived from index values.
408
408
409 def start(self, rev):
409 def start(self, rev):
410 """the offset of the data chunk for this revision"""
410 """the offset of the data chunk for this revision"""
411 return int(self.index[rev][0] >> 16)
411 return int(self.index[rev][0] >> 16)
412
412
413 def length(self, rev):
413 def length(self, rev):
414 """the length of the data chunk for this revision"""
414 """the length of the data chunk for this revision"""
415 return self.index[rev][1]
415 return self.index[rev][1]
416
416
417 def end(self, rev):
417 def end(self, rev):
418 """the end of the data chunk for this revision"""
418 """the end of the data chunk for this revision"""
419 return self.start(rev) + self.length(rev)
419 return self.start(rev) + self.length(rev)
420
420
421 def deltaparent(self, rev):
421 def deltaparent(self, rev):
422 """return deltaparent of the given revision"""
422 """return deltaparent of the given revision"""
423 base = self.index[rev][3]
423 base = self.index[rev][3]
424 if base == rev:
424 if base == rev:
425 return nullrev
425 return nullrev
426 elif self.delta_config.general_delta:
426 elif self.delta_config.general_delta:
427 return base
427 return base
428 else:
428 else:
429 return rev - 1
429 return rev - 1
430
430
431 def issnapshot(self, rev):
431 def issnapshot(self, rev):
432 """tells whether rev is a snapshot"""
432 """tells whether rev is a snapshot"""
433 if not self.delta_config.sparse_revlog:
433 if not self.delta_config.sparse_revlog:
434 return self.deltaparent(rev) == nullrev
434 return self.deltaparent(rev) == nullrev
435 elif hasattr(self.index, 'issnapshot'):
435 elif hasattr(self.index, 'issnapshot'):
436 # directly assign the method to cache the testing and access
436 # directly assign the method to cache the testing and access
437 self.issnapshot = self.index.issnapshot
437 self.issnapshot = self.index.issnapshot
438 return self.issnapshot(rev)
438 return self.issnapshot(rev)
439 if rev == nullrev:
439 if rev == nullrev:
440 return True
440 return True
441 entry = self.index[rev]
441 entry = self.index[rev]
442 base = entry[3]
442 base = entry[3]
443 if base == rev:
443 if base == rev:
444 return True
444 return True
445 if base == nullrev:
445 if base == nullrev:
446 return True
446 return True
447 p1 = entry[5]
447 p1 = entry[5]
448 while self.length(p1) == 0:
448 while self.length(p1) == 0:
449 b = self.deltaparent(p1)
449 b = self.deltaparent(p1)
450 if b == p1:
450 if b == p1:
451 break
451 break
452 p1 = b
452 p1 = b
453 p2 = entry[6]
453 p2 = entry[6]
454 while self.length(p2) == 0:
454 while self.length(p2) == 0:
455 b = self.deltaparent(p2)
455 b = self.deltaparent(p2)
456 if b == p2:
456 if b == p2:
457 break
457 break
458 p2 = b
458 p2 = b
459 if base == p1 or base == p2:
459 if base == p1 or base == p2:
460 return False
460 return False
461 return self.issnapshot(base)
461 return self.issnapshot(base)
462
462
463 def _deltachain(self, rev, stoprev=None):
464 """Obtain the delta chain for a revision.
465
466 ``stoprev`` specifies a revision to stop at. If not specified, we
467 stop at the base of the chain.
468
469 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
470 revs in ascending order and ``stopped`` is a bool indicating whether
471 ``stoprev`` was hit.
472 """
473 generaldelta = self.delta_config.general_delta
474 # Try C implementation.
475 try:
476 return self.index.deltachain(rev, stoprev, generaldelta)
477 except AttributeError:
478 pass
479
480 chain = []
481
482 # Alias to prevent attribute lookup in tight loop.
483 index = self.index
484
485 iterrev = rev
486 e = index[iterrev]
487 while iterrev != e[3] and iterrev != stoprev:
488 chain.append(iterrev)
489 if generaldelta:
490 iterrev = e[3]
491 else:
492 iterrev -= 1
493 e = index[iterrev]
494
495 if iterrev == stoprev:
496 stopped = True
497 else:
498 chain.append(iterrev)
499 stopped = False
500
501 chain.reverse()
502 return chain, stopped
503
463 @util.propertycache
504 @util.propertycache
464 def _compressor(self):
505 def _compressor(self):
465 engine = util.compengines[self.feature_config.compression_engine]
506 engine = util.compengines[self.feature_config.compression_engine]
466 return engine.revlogcompressor(
507 return engine.revlogcompressor(
467 self.feature_config.compression_engine_options
508 self.feature_config.compression_engine_options
468 )
509 )
469
510
470 @util.propertycache
511 @util.propertycache
471 def _decompressor(self):
512 def _decompressor(self):
472 """the default decompressor"""
513 """the default decompressor"""
473 if self._default_compression_header is None:
514 if self._default_compression_header is None:
474 return None
515 return None
475 t = self._default_compression_header
516 t = self._default_compression_header
476 c = self._get_decompressor(t)
517 c = self._get_decompressor(t)
477 return c.decompress
518 return c.decompress
478
519
479 def _get_decompressor(self, t):
520 def _get_decompressor(self, t):
480 try:
521 try:
481 compressor = self._decompressors[t]
522 compressor = self._decompressors[t]
482 except KeyError:
523 except KeyError:
483 try:
524 try:
484 engine = util.compengines.forrevlogheader(t)
525 engine = util.compengines.forrevlogheader(t)
485 compressor = engine.revlogcompressor(
526 compressor = engine.revlogcompressor(
486 self.feature_config.compression_engine_options
527 self.feature_config.compression_engine_options
487 )
528 )
488 self._decompressors[t] = compressor
529 self._decompressors[t] = compressor
489 except KeyError:
530 except KeyError:
490 raise error.RevlogError(
531 raise error.RevlogError(
491 _(b'unknown compression type %s') % binascii.hexlify(t)
532 _(b'unknown compression type %s') % binascii.hexlify(t)
492 )
533 )
493 return compressor
534 return compressor
494
535
495 def compress(self, data):
536 def compress(self, data):
496 """Generate a possibly-compressed representation of data."""
537 """Generate a possibly-compressed representation of data."""
497 if not data:
538 if not data:
498 return b'', data
539 return b'', data
499
540
500 compressed = self._compressor.compress(data)
541 compressed = self._compressor.compress(data)
501
542
502 if compressed:
543 if compressed:
503 # The revlog compressor added the header in the returned data.
544 # The revlog compressor added the header in the returned data.
504 return b'', compressed
545 return b'', compressed
505
546
506 if data[0:1] == b'\0':
547 if data[0:1] == b'\0':
507 return b'', data
548 return b'', data
508 return b'u', data
549 return b'u', data
509
550
510 def decompress(self, data):
551 def decompress(self, data):
511 """Decompress a revlog chunk.
552 """Decompress a revlog chunk.
512
553
513 The chunk is expected to begin with a header identifying the
554 The chunk is expected to begin with a header identifying the
514 format type so it can be routed to an appropriate decompressor.
555 format type so it can be routed to an appropriate decompressor.
515 """
556 """
516 if not data:
557 if not data:
517 return data
558 return data
518
559
519 # Revlogs are read much more frequently than they are written and many
560 # Revlogs are read much more frequently than they are written and many
520 # chunks only take microseconds to decompress, so performance is
561 # chunks only take microseconds to decompress, so performance is
521 # important here.
562 # important here.
522 #
563 #
523 # We can make a few assumptions about revlogs:
564 # We can make a few assumptions about revlogs:
524 #
565 #
525 # 1) the majority of chunks will be compressed (as opposed to inline
566 # 1) the majority of chunks will be compressed (as opposed to inline
526 # raw data).
567 # raw data).
527 # 2) decompressing *any* data will likely by at least 10x slower than
568 # 2) decompressing *any* data will likely by at least 10x slower than
528 # returning raw inline data.
569 # returning raw inline data.
529 # 3) we want to prioritize common and officially supported compression
570 # 3) we want to prioritize common and officially supported compression
530 # engines
571 # engines
531 #
572 #
532 # It follows that we want to optimize for "decompress compressed data
573 # It follows that we want to optimize for "decompress compressed data
533 # when encoded with common and officially supported compression engines"
574 # when encoded with common and officially supported compression engines"
534 # case over "raw data" and "data encoded by less common or non-official
575 # case over "raw data" and "data encoded by less common or non-official
535 # compression engines." That is why we have the inline lookup first
576 # compression engines." That is why we have the inline lookup first
536 # followed by the compengines lookup.
577 # followed by the compengines lookup.
537 #
578 #
538 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
579 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
539 # compressed chunks. And this matters for changelog and manifest reads.
580 # compressed chunks. And this matters for changelog and manifest reads.
540 t = data[0:1]
581 t = data[0:1]
541
582
542 if t == b'x':
583 if t == b'x':
543 try:
584 try:
544 return _zlibdecompress(data)
585 return _zlibdecompress(data)
545 except zlib.error as e:
586 except zlib.error as e:
546 raise error.RevlogError(
587 raise error.RevlogError(
547 _(b'revlog decompress error: %s')
588 _(b'revlog decompress error: %s')
548 % stringutil.forcebytestr(e)
589 % stringutil.forcebytestr(e)
549 )
590 )
550 # '\0' is more common than 'u' so it goes first.
591 # '\0' is more common than 'u' so it goes first.
551 elif t == b'\0':
592 elif t == b'\0':
552 return data
593 return data
553 elif t == b'u':
594 elif t == b'u':
554 return util.buffer(data, 1)
595 return util.buffer(data, 1)
555
596
556 compressor = self._get_decompressor(t)
597 compressor = self._get_decompressor(t)
557
598
558 return compressor.decompress(data)
599 return compressor.decompress(data)
559
600
560 @contextlib.contextmanager
601 @contextlib.contextmanager
561 def reading(self):
602 def reading(self):
562 """Context manager that keeps data and sidedata files open for reading"""
603 """Context manager that keeps data and sidedata files open for reading"""
563 if len(self.index) == 0:
604 if len(self.index) == 0:
564 yield # nothing to be read
605 yield # nothing to be read
565 else:
606 else:
566 with self._segmentfile.reading():
607 with self._segmentfile.reading():
567 with self._segmentfile_sidedata.reading():
608 with self._segmentfile_sidedata.reading():
568 yield
609 yield
569
610
570 @property
611 @property
571 def is_writing(self):
612 def is_writing(self):
572 """True is a writing context is open"""
613 """True is a writing context is open"""
573 return self._writinghandles is not None
614 return self._writinghandles is not None
574
615
575 @contextlib.contextmanager
616 @contextlib.contextmanager
576 def writing(self, transaction, data_end=None, sidedata_end=None):
617 def writing(self, transaction, data_end=None, sidedata_end=None):
577 """Open the revlog files for writing
618 """Open the revlog files for writing
578
619
579 Add content to a revlog should be done within such context.
620 Add content to a revlog should be done within such context.
580 """
621 """
581 if self.is_writing:
622 if self.is_writing:
582 yield
623 yield
583 else:
624 else:
584 ifh = dfh = sdfh = None
625 ifh = dfh = sdfh = None
585 try:
626 try:
586 r = len(self.index)
627 r = len(self.index)
587 # opening the data file.
628 # opening the data file.
588 dsize = 0
629 dsize = 0
589 if r:
630 if r:
590 dsize = self.end(r - 1)
631 dsize = self.end(r - 1)
591 dfh = None
632 dfh = None
592 if not self.inline:
633 if not self.inline:
593 try:
634 try:
594 dfh = self.opener(self.data_file, mode=b"r+")
635 dfh = self.opener(self.data_file, mode=b"r+")
595 if data_end is None:
636 if data_end is None:
596 dfh.seek(0, os.SEEK_END)
637 dfh.seek(0, os.SEEK_END)
597 else:
638 else:
598 dfh.seek(data_end, os.SEEK_SET)
639 dfh.seek(data_end, os.SEEK_SET)
599 except FileNotFoundError:
640 except FileNotFoundError:
600 dfh = self.opener(self.data_file, mode=b"w+")
641 dfh = self.opener(self.data_file, mode=b"w+")
601 transaction.add(self.data_file, dsize)
642 transaction.add(self.data_file, dsize)
602 if self.sidedata_file is not None:
643 if self.sidedata_file is not None:
603 assert sidedata_end is not None
644 assert sidedata_end is not None
604 # revlog-v2 does not inline, help Pytype
645 # revlog-v2 does not inline, help Pytype
605 assert dfh is not None
646 assert dfh is not None
606 try:
647 try:
607 sdfh = self.opener(self.sidedata_file, mode=b"r+")
648 sdfh = self.opener(self.sidedata_file, mode=b"r+")
608 dfh.seek(sidedata_end, os.SEEK_SET)
649 dfh.seek(sidedata_end, os.SEEK_SET)
609 except FileNotFoundError:
650 except FileNotFoundError:
610 sdfh = self.opener(self.sidedata_file, mode=b"w+")
651 sdfh = self.opener(self.sidedata_file, mode=b"w+")
611 transaction.add(self.sidedata_file, sidedata_end)
652 transaction.add(self.sidedata_file, sidedata_end)
612
653
613 # opening the index file.
654 # opening the index file.
614 isize = r * self.index.entry_size
655 isize = r * self.index.entry_size
615 ifh = self.__index_write_fp()
656 ifh = self.__index_write_fp()
616 if self.inline:
657 if self.inline:
617 transaction.add(self.index_file, dsize + isize)
658 transaction.add(self.index_file, dsize + isize)
618 else:
659 else:
619 transaction.add(self.index_file, isize)
660 transaction.add(self.index_file, isize)
620 # exposing all file handle for writing.
661 # exposing all file handle for writing.
621 self._writinghandles = (ifh, dfh, sdfh)
662 self._writinghandles = (ifh, dfh, sdfh)
622 self._segmentfile.writing_handle = ifh if self.inline else dfh
663 self._segmentfile.writing_handle = ifh if self.inline else dfh
623 self._segmentfile_sidedata.writing_handle = sdfh
664 self._segmentfile_sidedata.writing_handle = sdfh
624 yield
665 yield
625 finally:
666 finally:
626 self._writinghandles = None
667 self._writinghandles = None
627 self._segmentfile.writing_handle = None
668 self._segmentfile.writing_handle = None
628 self._segmentfile_sidedata.writing_handle = None
669 self._segmentfile_sidedata.writing_handle = None
629 if dfh is not None:
670 if dfh is not None:
630 dfh.close()
671 dfh.close()
631 if sdfh is not None:
672 if sdfh is not None:
632 sdfh.close()
673 sdfh.close()
633 # closing the index file last to avoid exposing referent to
674 # closing the index file last to avoid exposing referent to
634 # potential unflushed data content.
675 # potential unflushed data content.
635 if ifh is not None:
676 if ifh is not None:
636 ifh.close()
677 ifh.close()
637
678
638 def __index_write_fp(self, index_end=None):
679 def __index_write_fp(self, index_end=None):
639 """internal method to open the index file for writing
680 """internal method to open the index file for writing
640
681
641 You should not use this directly and use `_writing` instead
682 You should not use this directly and use `_writing` instead
642 """
683 """
643 try:
684 try:
644 f = self.opener(
685 f = self.opener(
645 self.index_file,
686 self.index_file,
646 mode=b"r+",
687 mode=b"r+",
647 checkambig=self.data_config.check_ambig,
688 checkambig=self.data_config.check_ambig,
648 )
689 )
649 if index_end is None:
690 if index_end is None:
650 f.seek(0, os.SEEK_END)
691 f.seek(0, os.SEEK_END)
651 else:
692 else:
652 f.seek(index_end, os.SEEK_SET)
693 f.seek(index_end, os.SEEK_SET)
653 return f
694 return f
654 except FileNotFoundError:
695 except FileNotFoundError:
655 return self.opener(
696 return self.opener(
656 self.index_file,
697 self.index_file,
657 mode=b"w+",
698 mode=b"w+",
658 checkambig=self.data_config.check_ambig,
699 checkambig=self.data_config.check_ambig,
659 )
700 )
660
701
661 def __index_new_fp(self):
702 def __index_new_fp(self):
662 """internal method to create a new index file for writing
703 """internal method to create a new index file for writing
663
704
664 You should not use this unless you are upgrading from inline revlog
705 You should not use this unless you are upgrading from inline revlog
665 """
706 """
666 return self.opener(
707 return self.opener(
667 self.index_file,
708 self.index_file,
668 mode=b"w",
709 mode=b"w",
669 checkambig=self.data_config.check_ambig,
710 checkambig=self.data_config.check_ambig,
670 atomictemp=True,
711 atomictemp=True,
671 )
712 )
672
713
673 def split_inline(self, tr, header, new_index_file_path=None):
714 def split_inline(self, tr, header, new_index_file_path=None):
674 """split the data of an inline revlog into an index and a data file"""
715 """split the data of an inline revlog into an index and a data file"""
675 existing_handles = False
716 existing_handles = False
676 if self._writinghandles is not None:
717 if self._writinghandles is not None:
677 existing_handles = True
718 existing_handles = True
678 fp = self._writinghandles[0]
719 fp = self._writinghandles[0]
679 fp.flush()
720 fp.flush()
680 fp.close()
721 fp.close()
681 # We can't use the cached file handle after close(). So prevent
722 # We can't use the cached file handle after close(). So prevent
682 # its usage.
723 # its usage.
683 self._writinghandles = None
724 self._writinghandles = None
684 self._segmentfile.writing_handle = None
725 self._segmentfile.writing_handle = None
685 # No need to deal with sidedata writing handle as it is only
726 # No need to deal with sidedata writing handle as it is only
686 # relevant with revlog-v2 which is never inline, not reaching
727 # relevant with revlog-v2 which is never inline, not reaching
687 # this code
728 # this code
688
729
689 new_dfh = self.opener(self.data_file, mode=b"w+")
730 new_dfh = self.opener(self.data_file, mode=b"w+")
690 new_dfh.truncate(0) # drop any potentially existing data
731 new_dfh.truncate(0) # drop any potentially existing data
691 try:
732 try:
692 with self.reading():
733 with self.reading():
693 for r in range(len(self.index)):
734 for r in range(len(self.index)):
694 new_dfh.write(self.get_segment_for_revs(r, r)[1])
735 new_dfh.write(self.get_segment_for_revs(r, r)[1])
695 new_dfh.flush()
736 new_dfh.flush()
696
737
697 if new_index_file_path is not None:
738 if new_index_file_path is not None:
698 self.index_file = new_index_file_path
739 self.index_file = new_index_file_path
699 with self.__index_new_fp() as fp:
740 with self.__index_new_fp() as fp:
700 self.inline = False
741 self.inline = False
701 for i in range(len(self.index)):
742 for i in range(len(self.index)):
702 e = self.index.entry_binary(i)
743 e = self.index.entry_binary(i)
703 if i == 0:
744 if i == 0:
704 packed_header = self.index.pack_header(header)
745 packed_header = self.index.pack_header(header)
705 e = packed_header + e
746 e = packed_header + e
706 fp.write(e)
747 fp.write(e)
707
748
708 # If we don't use side-write, the temp file replace the real
749 # If we don't use side-write, the temp file replace the real
709 # index when we exit the context manager
750 # index when we exit the context manager
710
751
711 self._segmentfile = randomaccessfile.randomaccessfile(
752 self._segmentfile = randomaccessfile.randomaccessfile(
712 self.opener,
753 self.opener,
713 self.data_file,
754 self.data_file,
714 self.data_config.chunk_cache_size,
755 self.data_config.chunk_cache_size,
715 )
756 )
716
757
717 if existing_handles:
758 if existing_handles:
718 # switched from inline to conventional reopen the index
759 # switched from inline to conventional reopen the index
719 ifh = self.__index_write_fp()
760 ifh = self.__index_write_fp()
720 self._writinghandles = (ifh, new_dfh, None)
761 self._writinghandles = (ifh, new_dfh, None)
721 self._segmentfile.writing_handle = new_dfh
762 self._segmentfile.writing_handle = new_dfh
722 new_dfh = None
763 new_dfh = None
723 # No need to deal with sidedata writing handle as it is only
764 # No need to deal with sidedata writing handle as it is only
724 # relevant with revlog-v2 which is never inline, not reaching
765 # relevant with revlog-v2 which is never inline, not reaching
725 # this code
766 # this code
726 finally:
767 finally:
727 if new_dfh is not None:
768 if new_dfh is not None:
728 new_dfh.close()
769 new_dfh.close()
729 return self.index_file
770 return self.index_file
730
771
731 def get_segment_for_revs(self, startrev, endrev):
772 def get_segment_for_revs(self, startrev, endrev):
732 """Obtain a segment of raw data corresponding to a range of revisions.
773 """Obtain a segment of raw data corresponding to a range of revisions.
733
774
734 Accepts the start and end revisions and an optional already-open
775 Accepts the start and end revisions and an optional already-open
735 file handle to be used for reading. If the file handle is read, its
776 file handle to be used for reading. If the file handle is read, its
736 seek position will not be preserved.
777 seek position will not be preserved.
737
778
738 Requests for data may be satisfied by a cache.
779 Requests for data may be satisfied by a cache.
739
780
740 Returns a 2-tuple of (offset, data) for the requested range of
781 Returns a 2-tuple of (offset, data) for the requested range of
741 revisions. Offset is the integer offset from the beginning of the
782 revisions. Offset is the integer offset from the beginning of the
742 revlog and data is a str or buffer of the raw byte data.
783 revlog and data is a str or buffer of the raw byte data.
743
784
744 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
785 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
745 to determine where each revision's data begins and ends.
786 to determine where each revision's data begins and ends.
746
787
747 API: we should consider making this a private part of the InnerRevlog
788 API: we should consider making this a private part of the InnerRevlog
748 at some point.
789 at some point.
749 """
790 """
750 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
791 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
751 # (functions are expensive).
792 # (functions are expensive).
752 index = self.index
793 index = self.index
753 istart = index[startrev]
794 istart = index[startrev]
754 start = int(istart[0] >> 16)
795 start = int(istart[0] >> 16)
755 if startrev == endrev:
796 if startrev == endrev:
756 end = start + istart[1]
797 end = start + istart[1]
757 else:
798 else:
758 iend = index[endrev]
799 iend = index[endrev]
759 end = int(iend[0] >> 16) + iend[1]
800 end = int(iend[0] >> 16) + iend[1]
760
801
761 if self.inline:
802 if self.inline:
762 start += (startrev + 1) * self.index.entry_size
803 start += (startrev + 1) * self.index.entry_size
763 end += (endrev + 1) * self.index.entry_size
804 end += (endrev + 1) * self.index.entry_size
764 length = end - start
805 length = end - start
765
806
766 return start, self._segmentfile.read_chunk(start, length)
807 return start, self._segmentfile.read_chunk(start, length)
767
808
768 def _chunk(self, rev):
809 def _chunk(self, rev):
769 """Obtain a single decompressed chunk for a revision.
810 """Obtain a single decompressed chunk for a revision.
770
811
771 Accepts an integer revision and an optional already-open file handle
812 Accepts an integer revision and an optional already-open file handle
772 to be used for reading. If used, the seek position of the file will not
813 to be used for reading. If used, the seek position of the file will not
773 be preserved.
814 be preserved.
774
815
775 Returns a str holding uncompressed data for the requested revision.
816 Returns a str holding uncompressed data for the requested revision.
776 """
817 """
777 compression_mode = self.index[rev][10]
818 compression_mode = self.index[rev][10]
778 data = self.get_segment_for_revs(rev, rev)[1]
819 data = self.get_segment_for_revs(rev, rev)[1]
779 if compression_mode == COMP_MODE_PLAIN:
820 if compression_mode == COMP_MODE_PLAIN:
780 return data
821 return data
781 elif compression_mode == COMP_MODE_DEFAULT:
822 elif compression_mode == COMP_MODE_DEFAULT:
782 return self._decompressor(data)
823 return self._decompressor(data)
783 elif compression_mode == COMP_MODE_INLINE:
824 elif compression_mode == COMP_MODE_INLINE:
784 return self.decompress(data)
825 return self.decompress(data)
785 else:
826 else:
786 msg = b'unknown compression mode %d'
827 msg = b'unknown compression mode %d'
787 msg %= compression_mode
828 msg %= compression_mode
788 raise error.RevlogError(msg)
829 raise error.RevlogError(msg)
789
830
790 def _chunks(self, revs, targetsize=None):
831 def _chunks(self, revs, targetsize=None):
791 """Obtain decompressed chunks for the specified revisions.
832 """Obtain decompressed chunks for the specified revisions.
792
833
793 Accepts an iterable of numeric revisions that are assumed to be in
834 Accepts an iterable of numeric revisions that are assumed to be in
794 ascending order. Also accepts an optional already-open file handle
835 ascending order. Also accepts an optional already-open file handle
795 to be used for reading. If used, the seek position of the file will
836 to be used for reading. If used, the seek position of the file will
796 not be preserved.
837 not be preserved.
797
838
798 This function is similar to calling ``self._chunk()`` multiple times,
839 This function is similar to calling ``self._chunk()`` multiple times,
799 but is faster.
840 but is faster.
800
841
801 Returns a list with decompressed data for each requested revision.
842 Returns a list with decompressed data for each requested revision.
802 """
843 """
803 if not revs:
844 if not revs:
804 return []
845 return []
805 start = self.start
846 start = self.start
806 length = self.length
847 length = self.length
807 inline = self.inline
848 inline = self.inline
808 iosize = self.index.entry_size
849 iosize = self.index.entry_size
809 buffer = util.buffer
850 buffer = util.buffer
810
851
811 l = []
852 l = []
812 ladd = l.append
853 ladd = l.append
813
854
814 if not self.data_config.with_sparse_read:
855 if not self.data_config.with_sparse_read:
815 slicedchunks = (revs,)
856 slicedchunks = (revs,)
816 else:
857 else:
817 slicedchunks = deltautil.slicechunk(
858 slicedchunks = deltautil.slicechunk(
818 self,
859 self,
819 revs,
860 revs,
820 targetsize=targetsize,
861 targetsize=targetsize,
821 )
862 )
822
863
823 for revschunk in slicedchunks:
864 for revschunk in slicedchunks:
824 firstrev = revschunk[0]
865 firstrev = revschunk[0]
825 # Skip trailing revisions with empty diff
866 # Skip trailing revisions with empty diff
826 for lastrev in revschunk[::-1]:
867 for lastrev in revschunk[::-1]:
827 if length(lastrev) != 0:
868 if length(lastrev) != 0:
828 break
869 break
829
870
830 try:
871 try:
831 offset, data = self.get_segment_for_revs(firstrev, lastrev)
872 offset, data = self.get_segment_for_revs(firstrev, lastrev)
832 except OverflowError:
873 except OverflowError:
833 # issue4215 - we can't cache a run of chunks greater than
874 # issue4215 - we can't cache a run of chunks greater than
834 # 2G on Windows
875 # 2G on Windows
835 return [self._chunk(rev) for rev in revschunk]
876 return [self._chunk(rev) for rev in revschunk]
836
877
837 decomp = self.decompress
878 decomp = self.decompress
838 # self._decompressor might be None, but will not be used in that case
879 # self._decompressor might be None, but will not be used in that case
839 def_decomp = self._decompressor
880 def_decomp = self._decompressor
840 for rev in revschunk:
881 for rev in revschunk:
841 chunkstart = start(rev)
882 chunkstart = start(rev)
842 if inline:
883 if inline:
843 chunkstart += (rev + 1) * iosize
884 chunkstart += (rev + 1) * iosize
844 chunklength = length(rev)
885 chunklength = length(rev)
845 comp_mode = self.index[rev][10]
886 comp_mode = self.index[rev][10]
846 c = buffer(data, chunkstart - offset, chunklength)
887 c = buffer(data, chunkstart - offset, chunklength)
847 if comp_mode == COMP_MODE_PLAIN:
888 if comp_mode == COMP_MODE_PLAIN:
848 ladd(c)
889 ladd(c)
849 elif comp_mode == COMP_MODE_INLINE:
890 elif comp_mode == COMP_MODE_INLINE:
850 ladd(decomp(c))
891 ladd(decomp(c))
851 elif comp_mode == COMP_MODE_DEFAULT:
892 elif comp_mode == COMP_MODE_DEFAULT:
852 ladd(def_decomp(c))
893 ladd(def_decomp(c))
853 else:
894 else:
854 msg = b'unknown compression mode %d'
895 msg = b'unknown compression mode %d'
855 msg %= comp_mode
896 msg %= comp_mode
856 raise error.RevlogError(msg)
897 raise error.RevlogError(msg)
857
898
858 return l
899 return l
859
900
860
901
861 class revlog:
902 class revlog:
862 """
903 """
863 the underlying revision storage object
904 the underlying revision storage object
864
905
865 A revlog consists of two parts, an index and the revision data.
906 A revlog consists of two parts, an index and the revision data.
866
907
867 The index is a file with a fixed record size containing
908 The index is a file with a fixed record size containing
868 information on each revision, including its nodeid (hash), the
909 information on each revision, including its nodeid (hash), the
869 nodeids of its parents, the position and offset of its data within
910 nodeids of its parents, the position and offset of its data within
870 the data file, and the revision it's based on. Finally, each entry
911 the data file, and the revision it's based on. Finally, each entry
871 contains a linkrev entry that can serve as a pointer to external
912 contains a linkrev entry that can serve as a pointer to external
872 data.
913 data.
873
914
874 The revision data itself is a linear collection of data chunks.
915 The revision data itself is a linear collection of data chunks.
875 Each chunk represents a revision and is usually represented as a
916 Each chunk represents a revision and is usually represented as a
876 delta against the previous chunk. To bound lookup time, runs of
917 delta against the previous chunk. To bound lookup time, runs of
877 deltas are limited to about 2 times the length of the original
918 deltas are limited to about 2 times the length of the original
878 version data. This makes retrieval of a version proportional to
919 version data. This makes retrieval of a version proportional to
879 its size, or O(1) relative to the number of revisions.
920 its size, or O(1) relative to the number of revisions.
880
921
881 Both pieces of the revlog are written to in an append-only
922 Both pieces of the revlog are written to in an append-only
882 fashion, which means we never need to rewrite a file to insert or
923 fashion, which means we never need to rewrite a file to insert or
883 remove data, and can use some simple techniques to avoid the need
924 remove data, and can use some simple techniques to avoid the need
884 for locking while reading.
925 for locking while reading.
885
926
886 If checkambig, indexfile is opened with checkambig=True at
927 If checkambig, indexfile is opened with checkambig=True at
887 writing, to avoid file stat ambiguity.
928 writing, to avoid file stat ambiguity.
888
929
889 If mmaplargeindex is True, and an mmapindexthreshold is set, the
930 If mmaplargeindex is True, and an mmapindexthreshold is set, the
890 index will be mmapped rather than read if it is larger than the
931 index will be mmapped rather than read if it is larger than the
891 configured threshold.
932 configured threshold.
892
933
893 If censorable is True, the revlog can have censored revisions.
934 If censorable is True, the revlog can have censored revisions.
894
935
895 If `upperboundcomp` is not None, this is the expected maximal gain from
936 If `upperboundcomp` is not None, this is the expected maximal gain from
896 compression for the data content.
937 compression for the data content.
897
938
898 `concurrencychecker` is an optional function that receives 3 arguments: a
939 `concurrencychecker` is an optional function that receives 3 arguments: a
899 file handle, a filename, and an expected position. It should check whether
940 file handle, a filename, and an expected position. It should check whether
900 the current position in the file handle is valid, and log/warn/fail (by
941 the current position in the file handle is valid, and log/warn/fail (by
901 raising).
942 raising).
902
943
903 See mercurial/revlogutils/contants.py for details about the content of an
944 See mercurial/revlogutils/contants.py for details about the content of an
904 index entry.
945 index entry.
905 """
946 """
906
947
907 _flagserrorclass = error.RevlogError
948 _flagserrorclass = error.RevlogError
908
949
909 @staticmethod
950 @staticmethod
910 def is_inline_index(header_bytes):
951 def is_inline_index(header_bytes):
911 """Determine if a revlog is inline from the initial bytes of the index"""
952 """Determine if a revlog is inline from the initial bytes of the index"""
912 header = INDEX_HEADER.unpack(header_bytes)[0]
953 header = INDEX_HEADER.unpack(header_bytes)[0]
913
954
914 _format_flags = header & ~0xFFFF
955 _format_flags = header & ~0xFFFF
915 _format_version = header & 0xFFFF
956 _format_version = header & 0xFFFF
916
957
917 features = FEATURES_BY_VERSION[_format_version]
958 features = FEATURES_BY_VERSION[_format_version]
918 return features[b'inline'](_format_flags)
959 return features[b'inline'](_format_flags)
919
960
920 def __init__(
961 def __init__(
921 self,
962 self,
922 opener,
963 opener,
923 target,
964 target,
924 radix,
965 radix,
925 postfix=None, # only exist for `tmpcensored` now
966 postfix=None, # only exist for `tmpcensored` now
926 checkambig=False,
967 checkambig=False,
927 mmaplargeindex=False,
968 mmaplargeindex=False,
928 censorable=False,
969 censorable=False,
929 upperboundcomp=None,
970 upperboundcomp=None,
930 persistentnodemap=False,
971 persistentnodemap=False,
931 concurrencychecker=None,
972 concurrencychecker=None,
932 trypending=False,
973 trypending=False,
933 try_split=False,
974 try_split=False,
934 canonical_parent_order=True,
975 canonical_parent_order=True,
935 ):
976 ):
936 """
977 """
937 create a revlog object
978 create a revlog object
938
979
939 opener is a function that abstracts the file opening operation
980 opener is a function that abstracts the file opening operation
940 and can be used to implement COW semantics or the like.
981 and can be used to implement COW semantics or the like.
941
982
942 `target`: a (KIND, ID) tuple that identify the content stored in
983 `target`: a (KIND, ID) tuple that identify the content stored in
943 this revlog. It help the rest of the code to understand what the revlog
984 this revlog. It help the rest of the code to understand what the revlog
944 is about without having to resort to heuristic and index filename
985 is about without having to resort to heuristic and index filename
945 analysis. Note: that this must be reliably be set by normal code, but
986 analysis. Note: that this must be reliably be set by normal code, but
946 that test, debug, or performance measurement code might not set this to
987 that test, debug, or performance measurement code might not set this to
947 accurate value.
988 accurate value.
948 """
989 """
949
990
950 self.radix = radix
991 self.radix = radix
951
992
952 self._docket_file = None
993 self._docket_file = None
953 self._indexfile = None
994 self._indexfile = None
954 self._datafile = None
995 self._datafile = None
955 self._sidedatafile = None
996 self._sidedatafile = None
956 self._nodemap_file = None
997 self._nodemap_file = None
957 self.postfix = postfix
998 self.postfix = postfix
958 self._trypending = trypending
999 self._trypending = trypending
959 self._try_split = try_split
1000 self._try_split = try_split
960 self.opener = opener
1001 self.opener = opener
961 if persistentnodemap:
1002 if persistentnodemap:
962 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1003 self._nodemap_file = nodemaputil.get_nodemap_file(self)
963
1004
964 assert target[0] in ALL_KINDS
1005 assert target[0] in ALL_KINDS
965 assert len(target) == 2
1006 assert len(target) == 2
966 self.target = target
1007 self.target = target
967 if b'feature-config' in self.opener.options:
1008 if b'feature-config' in self.opener.options:
968 self.feature_config = self.opener.options[b'feature-config'].copy()
1009 self.feature_config = self.opener.options[b'feature-config'].copy()
969 else:
1010 else:
970 self.feature_config = FeatureConfig()
1011 self.feature_config = FeatureConfig()
971 self.feature_config.censorable = censorable
1012 self.feature_config.censorable = censorable
972 self.feature_config.canonical_parent_order = canonical_parent_order
1013 self.feature_config.canonical_parent_order = canonical_parent_order
973 if b'data-config' in self.opener.options:
1014 if b'data-config' in self.opener.options:
974 self.data_config = self.opener.options[b'data-config'].copy()
1015 self.data_config = self.opener.options[b'data-config'].copy()
975 else:
1016 else:
976 self.data_config = DataConfig()
1017 self.data_config = DataConfig()
977 self.data_config.check_ambig = checkambig
1018 self.data_config.check_ambig = checkambig
978 self.data_config.mmap_large_index = mmaplargeindex
1019 self.data_config.mmap_large_index = mmaplargeindex
979 if b'delta-config' in self.opener.options:
1020 if b'delta-config' in self.opener.options:
980 self.delta_config = self.opener.options[b'delta-config'].copy()
1021 self.delta_config = self.opener.options[b'delta-config'].copy()
981 else:
1022 else:
982 self.delta_config = DeltaConfig()
1023 self.delta_config = DeltaConfig()
983 self.delta_config.upper_bound_comp = upperboundcomp
1024 self.delta_config.upper_bound_comp = upperboundcomp
984
1025
985 # 3-tuple of (node, rev, text) for a raw revision.
1026 # 3-tuple of (node, rev, text) for a raw revision.
986 self._revisioncache = None
1027 self._revisioncache = None
987 # Maps rev to chain base rev.
1028 # Maps rev to chain base rev.
988 self._chainbasecache = util.lrucachedict(100)
1029 self._chainbasecache = util.lrucachedict(100)
989
1030
990 self.index = None
1031 self.index = None
991 self._docket = None
1032 self._docket = None
992 self._nodemap_docket = None
1033 self._nodemap_docket = None
993 # Mapping of partial identifiers to full nodes.
1034 # Mapping of partial identifiers to full nodes.
994 self._pcache = {}
1035 self._pcache = {}
995
1036
996 # other optionnals features
1037 # other optionnals features
997
1038
998 # Make copy of flag processors so each revlog instance can support
1039 # Make copy of flag processors so each revlog instance can support
999 # custom flags.
1040 # custom flags.
1000 self._flagprocessors = dict(flagutil.flagprocessors)
1041 self._flagprocessors = dict(flagutil.flagprocessors)
1001 # prevent nesting of addgroup
1042 # prevent nesting of addgroup
1002 self._adding_group = None
1043 self._adding_group = None
1003
1044
1004 chunk_cache = self._loadindex()
1045 chunk_cache = self._loadindex()
1005 self._load_inner(chunk_cache)
1046 self._load_inner(chunk_cache)
1006
1007 self._concurrencychecker = concurrencychecker
1047 self._concurrencychecker = concurrencychecker
1008
1048
1009 @property
1049 @property
1010 def _generaldelta(self):
1050 def _generaldelta(self):
1011 """temporary compatibility proxy"""
1051 """temporary compatibility proxy"""
1012 util.nouideprecwarn(
1052 util.nouideprecwarn(
1013 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1053 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1014 )
1054 )
1015 return self.delta_config.general_delta
1055 return self.delta_config.general_delta
1016
1056
1017 @property
1057 @property
1018 def _checkambig(self):
1058 def _checkambig(self):
1019 """temporary compatibility proxy"""
1059 """temporary compatibility proxy"""
1020 util.nouideprecwarn(
1060 util.nouideprecwarn(
1021 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1061 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1022 )
1062 )
1023 return self.data_config.check_ambig
1063 return self.data_config.check_ambig
1024
1064
1025 @property
1065 @property
1026 def _mmaplargeindex(self):
1066 def _mmaplargeindex(self):
1027 """temporary compatibility proxy"""
1067 """temporary compatibility proxy"""
1028 util.nouideprecwarn(
1068 util.nouideprecwarn(
1029 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1069 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1030 )
1070 )
1031 return self.data_config.mmap_large_index
1071 return self.data_config.mmap_large_index
1032
1072
1033 @property
1073 @property
1034 def _censorable(self):
1074 def _censorable(self):
1035 """temporary compatibility proxy"""
1075 """temporary compatibility proxy"""
1036 util.nouideprecwarn(
1076 util.nouideprecwarn(
1037 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1077 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1038 )
1078 )
1039 return self.feature_config.censorable
1079 return self.feature_config.censorable
1040
1080
1041 @property
1081 @property
1042 def _chunkcachesize(self):
1082 def _chunkcachesize(self):
1043 """temporary compatibility proxy"""
1083 """temporary compatibility proxy"""
1044 util.nouideprecwarn(
1084 util.nouideprecwarn(
1045 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1085 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1046 )
1086 )
1047 return self.data_config.chunk_cache_size
1087 return self.data_config.chunk_cache_size
1048
1088
1049 @property
1089 @property
1050 def _maxchainlen(self):
1090 def _maxchainlen(self):
1051 """temporary compatibility proxy"""
1091 """temporary compatibility proxy"""
1052 util.nouideprecwarn(
1092 util.nouideprecwarn(
1053 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1093 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1054 )
1094 )
1055 return self.delta_config.max_chain_len
1095 return self.delta_config.max_chain_len
1056
1096
1057 @property
1097 @property
1058 def _deltabothparents(self):
1098 def _deltabothparents(self):
1059 """temporary compatibility proxy"""
1099 """temporary compatibility proxy"""
1060 util.nouideprecwarn(
1100 util.nouideprecwarn(
1061 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1101 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1062 )
1102 )
1063 return self.delta_config.delta_both_parents
1103 return self.delta_config.delta_both_parents
1064
1104
1065 @property
1105 @property
1066 def _candidate_group_chunk_size(self):
1106 def _candidate_group_chunk_size(self):
1067 """temporary compatibility proxy"""
1107 """temporary compatibility proxy"""
1068 util.nouideprecwarn(
1108 util.nouideprecwarn(
1069 b"use revlog.delta_config.candidate_group_chunk_size",
1109 b"use revlog.delta_config.candidate_group_chunk_size",
1070 b"6.6",
1110 b"6.6",
1071 stacklevel=2,
1111 stacklevel=2,
1072 )
1112 )
1073 return self.delta_config.candidate_group_chunk_size
1113 return self.delta_config.candidate_group_chunk_size
1074
1114
1075 @property
1115 @property
1076 def _debug_delta(self):
1116 def _debug_delta(self):
1077 """temporary compatibility proxy"""
1117 """temporary compatibility proxy"""
1078 util.nouideprecwarn(
1118 util.nouideprecwarn(
1079 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1119 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1080 )
1120 )
1081 return self.delta_config.debug_delta
1121 return self.delta_config.debug_delta
1082
1122
1083 @property
1123 @property
1084 def _compengine(self):
1124 def _compengine(self):
1085 """temporary compatibility proxy"""
1125 """temporary compatibility proxy"""
1086 util.nouideprecwarn(
1126 util.nouideprecwarn(
1087 b"use revlog.feature_config.compression_engine",
1127 b"use revlog.feature_config.compression_engine",
1088 b"6.6",
1128 b"6.6",
1089 stacklevel=2,
1129 stacklevel=2,
1090 )
1130 )
1091 return self.feature_config.compression_engine
1131 return self.feature_config.compression_engine
1092
1132
1093 @property
1133 @property
1094 def upperboundcomp(self):
1134 def upperboundcomp(self):
1095 """temporary compatibility proxy"""
1135 """temporary compatibility proxy"""
1096 util.nouideprecwarn(
1136 util.nouideprecwarn(
1097 b"use revlog.delta_config.upper_bound_comp",
1137 b"use revlog.delta_config.upper_bound_comp",
1098 b"6.6",
1138 b"6.6",
1099 stacklevel=2,
1139 stacklevel=2,
1100 )
1140 )
1101 return self.delta_config.upper_bound_comp
1141 return self.delta_config.upper_bound_comp
1102
1142
1103 @property
1143 @property
1104 def _compengineopts(self):
1144 def _compengineopts(self):
1105 """temporary compatibility proxy"""
1145 """temporary compatibility proxy"""
1106 util.nouideprecwarn(
1146 util.nouideprecwarn(
1107 b"use revlog.feature_config.compression_engine_options",
1147 b"use revlog.feature_config.compression_engine_options",
1108 b"6.6",
1148 b"6.6",
1109 stacklevel=2,
1149 stacklevel=2,
1110 )
1150 )
1111 return self.feature_config.compression_engine_options
1151 return self.feature_config.compression_engine_options
1112
1152
1113 @property
1153 @property
1114 def _maxdeltachainspan(self):
1154 def _maxdeltachainspan(self):
1115 """temporary compatibility proxy"""
1155 """temporary compatibility proxy"""
1116 util.nouideprecwarn(
1156 util.nouideprecwarn(
1117 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1157 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1118 )
1158 )
1119 return self.delta_config.max_deltachain_span
1159 return self.delta_config.max_deltachain_span
1120
1160
1121 @property
1161 @property
1122 def _withsparseread(self):
1162 def _withsparseread(self):
1123 """temporary compatibility proxy"""
1163 """temporary compatibility proxy"""
1124 util.nouideprecwarn(
1164 util.nouideprecwarn(
1125 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1165 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1126 )
1166 )
1127 return self.data_config.with_sparse_read
1167 return self.data_config.with_sparse_read
1128
1168
1129 @property
1169 @property
1130 def _sparserevlog(self):
1170 def _sparserevlog(self):
1131 """temporary compatibility proxy"""
1171 """temporary compatibility proxy"""
1132 util.nouideprecwarn(
1172 util.nouideprecwarn(
1133 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1173 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1134 )
1174 )
1135 return self.delta_config.sparse_revlog
1175 return self.delta_config.sparse_revlog
1136
1176
1137 @property
1177 @property
1138 def hassidedata(self):
1178 def hassidedata(self):
1139 """temporary compatibility proxy"""
1179 """temporary compatibility proxy"""
1140 util.nouideprecwarn(
1180 util.nouideprecwarn(
1141 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1181 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1142 )
1182 )
1143 return self.feature_config.has_side_data
1183 return self.feature_config.has_side_data
1144
1184
1145 @property
1185 @property
1146 def _srdensitythreshold(self):
1186 def _srdensitythreshold(self):
1147 """temporary compatibility proxy"""
1187 """temporary compatibility proxy"""
1148 util.nouideprecwarn(
1188 util.nouideprecwarn(
1149 b"use revlog.data_config.sr_density_threshold",
1189 b"use revlog.data_config.sr_density_threshold",
1150 b"6.6",
1190 b"6.6",
1151 stacklevel=2,
1191 stacklevel=2,
1152 )
1192 )
1153 return self.data_config.sr_density_threshold
1193 return self.data_config.sr_density_threshold
1154
1194
1155 @property
1195 @property
1156 def _srmingapsize(self):
1196 def _srmingapsize(self):
1157 """temporary compatibility proxy"""
1197 """temporary compatibility proxy"""
1158 util.nouideprecwarn(
1198 util.nouideprecwarn(
1159 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1199 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1160 )
1200 )
1161 return self.data_config.sr_min_gap_size
1201 return self.data_config.sr_min_gap_size
1162
1202
1163 @property
1203 @property
1164 def _compute_rank(self):
1204 def _compute_rank(self):
1165 """temporary compatibility proxy"""
1205 """temporary compatibility proxy"""
1166 util.nouideprecwarn(
1206 util.nouideprecwarn(
1167 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1207 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1168 )
1208 )
1169 return self.feature_config.compute_rank
1209 return self.feature_config.compute_rank
1170
1210
1171 @property
1211 @property
1172 def canonical_parent_order(self):
1212 def canonical_parent_order(self):
1173 """temporary compatibility proxy"""
1213 """temporary compatibility proxy"""
1174 util.nouideprecwarn(
1214 util.nouideprecwarn(
1175 b"use revlog.feature_config.canonical_parent_order",
1215 b"use revlog.feature_config.canonical_parent_order",
1176 b"6.6",
1216 b"6.6",
1177 stacklevel=2,
1217 stacklevel=2,
1178 )
1218 )
1179 return self.feature_config.canonical_parent_order
1219 return self.feature_config.canonical_parent_order
1180
1220
1181 @property
1221 @property
1182 def _lazydelta(self):
1222 def _lazydelta(self):
1183 """temporary compatibility proxy"""
1223 """temporary compatibility proxy"""
1184 util.nouideprecwarn(
1224 util.nouideprecwarn(
1185 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1225 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1186 )
1226 )
1187 return self.delta_config.lazy_delta
1227 return self.delta_config.lazy_delta
1188
1228
1189 @property
1229 @property
1190 def _lazydeltabase(self):
1230 def _lazydeltabase(self):
1191 """temporary compatibility proxy"""
1231 """temporary compatibility proxy"""
1192 util.nouideprecwarn(
1232 util.nouideprecwarn(
1193 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1233 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1194 )
1234 )
1195 return self.delta_config.lazy_delta_base
1235 return self.delta_config.lazy_delta_base
1196
1236
1197 def _init_opts(self):
1237 def _init_opts(self):
1198 """process options (from above/config) to setup associated default revlog mode
1238 """process options (from above/config) to setup associated default revlog mode
1199
1239
1200 These values might be affected when actually reading on disk information.
1240 These values might be affected when actually reading on disk information.
1201
1241
1202 The relevant values are returned for use in _loadindex().
1242 The relevant values are returned for use in _loadindex().
1203
1243
1204 * newversionflags:
1244 * newversionflags:
1205 version header to use if we need to create a new revlog
1245 version header to use if we need to create a new revlog
1206
1246
1207 * mmapindexthreshold:
1247 * mmapindexthreshold:
1208 minimal index size for start to use mmap
1248 minimal index size for start to use mmap
1209
1249
1210 * force_nodemap:
1250 * force_nodemap:
1211 force the usage of a "development" version of the nodemap code
1251 force the usage of a "development" version of the nodemap code
1212 """
1252 """
1213 opts = self.opener.options
1253 opts = self.opener.options
1214
1254
1215 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1255 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1216 new_header = CHANGELOGV2
1256 new_header = CHANGELOGV2
1217 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1257 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1218 self.feature_config.compute_rank = compute_rank
1258 self.feature_config.compute_rank = compute_rank
1219 elif b'revlogv2' in opts:
1259 elif b'revlogv2' in opts:
1220 new_header = REVLOGV2
1260 new_header = REVLOGV2
1221 elif b'revlogv1' in opts:
1261 elif b'revlogv1' in opts:
1222 new_header = REVLOGV1 | FLAG_INLINE_DATA
1262 new_header = REVLOGV1 | FLAG_INLINE_DATA
1223 if b'generaldelta' in opts:
1263 if b'generaldelta' in opts:
1224 new_header |= FLAG_GENERALDELTA
1264 new_header |= FLAG_GENERALDELTA
1225 elif b'revlogv0' in self.opener.options:
1265 elif b'revlogv0' in self.opener.options:
1226 new_header = REVLOGV0
1266 new_header = REVLOGV0
1227 else:
1267 else:
1228 new_header = REVLOG_DEFAULT_VERSION
1268 new_header = REVLOG_DEFAULT_VERSION
1229
1269
1230 mmapindexthreshold = None
1270 mmapindexthreshold = None
1231 if self.data_config.mmap_large_index:
1271 if self.data_config.mmap_large_index:
1232 mmapindexthreshold = self.data_config.mmap_index_threshold
1272 mmapindexthreshold = self.data_config.mmap_index_threshold
1233 if self.feature_config.enable_ellipsis:
1273 if self.feature_config.enable_ellipsis:
1234 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1274 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1235
1275
1236 # revlog v0 doesn't have flag processors
1276 # revlog v0 doesn't have flag processors
1237 for flag, processor in opts.get(b'flagprocessors', {}).items():
1277 for flag, processor in opts.get(b'flagprocessors', {}).items():
1238 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1278 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1239
1279
1240 chunk_cache_size = self.data_config.chunk_cache_size
1280 chunk_cache_size = self.data_config.chunk_cache_size
1241 if chunk_cache_size <= 0:
1281 if chunk_cache_size <= 0:
1242 raise error.RevlogError(
1282 raise error.RevlogError(
1243 _(b'revlog chunk cache size %r is not greater than 0')
1283 _(b'revlog chunk cache size %r is not greater than 0')
1244 % chunk_cache_size
1284 % chunk_cache_size
1245 )
1285 )
1246 elif chunk_cache_size & (chunk_cache_size - 1):
1286 elif chunk_cache_size & (chunk_cache_size - 1):
1247 raise error.RevlogError(
1287 raise error.RevlogError(
1248 _(b'revlog chunk cache size %r is not a power of 2')
1288 _(b'revlog chunk cache size %r is not a power of 2')
1249 % chunk_cache_size
1289 % chunk_cache_size
1250 )
1290 )
1251 force_nodemap = opts.get(b'devel-force-nodemap', False)
1291 force_nodemap = opts.get(b'devel-force-nodemap', False)
1252 return new_header, mmapindexthreshold, force_nodemap
1292 return new_header, mmapindexthreshold, force_nodemap
1253
1293
1254 def _get_data(self, filepath, mmap_threshold, size=None):
1294 def _get_data(self, filepath, mmap_threshold, size=None):
1255 """return a file content with or without mmap
1295 """return a file content with or without mmap
1256
1296
1257 If the file is missing return the empty string"""
1297 If the file is missing return the empty string"""
1258 try:
1298 try:
1259 with self.opener(filepath) as fp:
1299 with self.opener(filepath) as fp:
1260 if mmap_threshold is not None:
1300 if mmap_threshold is not None:
1261 file_size = self.opener.fstat(fp).st_size
1301 file_size = self.opener.fstat(fp).st_size
1262 if file_size >= mmap_threshold:
1302 if file_size >= mmap_threshold:
1263 if size is not None:
1303 if size is not None:
1264 # avoid potentiel mmap crash
1304 # avoid potentiel mmap crash
1265 size = min(file_size, size)
1305 size = min(file_size, size)
1266 # TODO: should .close() to release resources without
1306 # TODO: should .close() to release resources without
1267 # relying on Python GC
1307 # relying on Python GC
1268 if size is None:
1308 if size is None:
1269 return util.buffer(util.mmapread(fp))
1309 return util.buffer(util.mmapread(fp))
1270 else:
1310 else:
1271 return util.buffer(util.mmapread(fp, size))
1311 return util.buffer(util.mmapread(fp, size))
1272 if size is None:
1312 if size is None:
1273 return fp.read()
1313 return fp.read()
1274 else:
1314 else:
1275 return fp.read(size)
1315 return fp.read(size)
1276 except FileNotFoundError:
1316 except FileNotFoundError:
1277 return b''
1317 return b''
1278
1318
1279 def get_streams(self, max_linkrev, force_inline=False):
1319 def get_streams(self, max_linkrev, force_inline=False):
1280 """return a list of streams that represent this revlog
1320 """return a list of streams that represent this revlog
1281
1321
1282 This is used by stream-clone to do bytes to bytes copies of a repository.
1322 This is used by stream-clone to do bytes to bytes copies of a repository.
1283
1323
1284 This streams data for all revisions that refer to a changelog revision up
1324 This streams data for all revisions that refer to a changelog revision up
1285 to `max_linkrev`.
1325 to `max_linkrev`.
1286
1326
1287 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1327 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1288
1328
1289 It returns is a list of three-tuple:
1329 It returns is a list of three-tuple:
1290
1330
1291 [
1331 [
1292 (filename, bytes_stream, stream_size),
1332 (filename, bytes_stream, stream_size),
1293 …
1333 …
1294 ]
1334 ]
1295 """
1335 """
1296 n = len(self)
1336 n = len(self)
1297 index = self.index
1337 index = self.index
1298 while n > 0:
1338 while n > 0:
1299 linkrev = index[n - 1][4]
1339 linkrev = index[n - 1][4]
1300 if linkrev < max_linkrev:
1340 if linkrev < max_linkrev:
1301 break
1341 break
1302 # note: this loop will rarely go through multiple iterations, since
1342 # note: this loop will rarely go through multiple iterations, since
1303 # it only traverses commits created during the current streaming
1343 # it only traverses commits created during the current streaming
1304 # pull operation.
1344 # pull operation.
1305 #
1345 #
1306 # If this become a problem, using a binary search should cap the
1346 # If this become a problem, using a binary search should cap the
1307 # runtime of this.
1347 # runtime of this.
1308 n = n - 1
1348 n = n - 1
1309 if n == 0:
1349 if n == 0:
1310 # no data to send
1350 # no data to send
1311 return []
1351 return []
1312 index_size = n * index.entry_size
1352 index_size = n * index.entry_size
1313 data_size = self.end(n - 1)
1353 data_size = self.end(n - 1)
1314
1354
1315 # XXX we might have been split (or stripped) since the object
1355 # XXX we might have been split (or stripped) since the object
1316 # initialization, We need to close this race too, but having a way to
1356 # initialization, We need to close this race too, but having a way to
1317 # pre-open the file we feed to the revlog and never closing them before
1357 # pre-open the file we feed to the revlog and never closing them before
1318 # we are done streaming.
1358 # we are done streaming.
1319
1359
1320 if self._inline:
1360 if self._inline:
1321
1361
1322 def get_stream():
1362 def get_stream():
1323 with self.opener(self._indexfile, mode=b"r") as fp:
1363 with self.opener(self._indexfile, mode=b"r") as fp:
1324 yield None
1364 yield None
1325 size = index_size + data_size
1365 size = index_size + data_size
1326 if size <= 65536:
1366 if size <= 65536:
1327 yield fp.read(size)
1367 yield fp.read(size)
1328 else:
1368 else:
1329 yield from util.filechunkiter(fp, limit=size)
1369 yield from util.filechunkiter(fp, limit=size)
1330
1370
1331 inline_stream = get_stream()
1371 inline_stream = get_stream()
1332 next(inline_stream)
1372 next(inline_stream)
1333 return [
1373 return [
1334 (self._indexfile, inline_stream, index_size + data_size),
1374 (self._indexfile, inline_stream, index_size + data_size),
1335 ]
1375 ]
1336 elif force_inline:
1376 elif force_inline:
1337
1377
1338 def get_stream():
1378 def get_stream():
1339 with self.reading():
1379 with self.reading():
1340 yield None
1380 yield None
1341
1381
1342 for rev in range(n):
1382 for rev in range(n):
1343 idx = self.index.entry_binary(rev)
1383 idx = self.index.entry_binary(rev)
1344 if rev == 0 and self._docket is None:
1384 if rev == 0 and self._docket is None:
1345 # re-inject the inline flag
1385 # re-inject the inline flag
1346 header = self._format_flags
1386 header = self._format_flags
1347 header |= self._format_version
1387 header |= self._format_version
1348 header |= FLAG_INLINE_DATA
1388 header |= FLAG_INLINE_DATA
1349 header = self.index.pack_header(header)
1389 header = self.index.pack_header(header)
1350 idx = header + idx
1390 idx = header + idx
1351 yield idx
1391 yield idx
1352 yield self._inner.get_segment_for_revs(rev, rev)[1]
1392 yield self._inner.get_segment_for_revs(rev, rev)[1]
1353
1393
1354 inline_stream = get_stream()
1394 inline_stream = get_stream()
1355 next(inline_stream)
1395 next(inline_stream)
1356 return [
1396 return [
1357 (self._indexfile, inline_stream, index_size + data_size),
1397 (self._indexfile, inline_stream, index_size + data_size),
1358 ]
1398 ]
1359 else:
1399 else:
1360
1400
1361 def get_index_stream():
1401 def get_index_stream():
1362 with self.opener(self._indexfile, mode=b"r") as fp:
1402 with self.opener(self._indexfile, mode=b"r") as fp:
1363 yield None
1403 yield None
1364 if index_size <= 65536:
1404 if index_size <= 65536:
1365 yield fp.read(index_size)
1405 yield fp.read(index_size)
1366 else:
1406 else:
1367 yield from util.filechunkiter(fp, limit=index_size)
1407 yield from util.filechunkiter(fp, limit=index_size)
1368
1408
1369 def get_data_stream():
1409 def get_data_stream():
1370 with self._datafp() as fp:
1410 with self._datafp() as fp:
1371 yield None
1411 yield None
1372 if data_size <= 65536:
1412 if data_size <= 65536:
1373 yield fp.read(data_size)
1413 yield fp.read(data_size)
1374 else:
1414 else:
1375 yield from util.filechunkiter(fp, limit=data_size)
1415 yield from util.filechunkiter(fp, limit=data_size)
1376
1416
1377 index_stream = get_index_stream()
1417 index_stream = get_index_stream()
1378 next(index_stream)
1418 next(index_stream)
1379 data_stream = get_data_stream()
1419 data_stream = get_data_stream()
1380 next(data_stream)
1420 next(data_stream)
1381 return [
1421 return [
1382 (self._datafile, data_stream, data_size),
1422 (self._datafile, data_stream, data_size),
1383 (self._indexfile, index_stream, index_size),
1423 (self._indexfile, index_stream, index_size),
1384 ]
1424 ]
1385
1425
1386 def _loadindex(self, docket=None):
1426 def _loadindex(self, docket=None):
1387
1427
1388 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1428 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1389
1429
1390 if self.postfix is not None:
1430 if self.postfix is not None:
1391 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1431 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1392 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1432 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1393 entry_point = b'%s.i.a' % self.radix
1433 entry_point = b'%s.i.a' % self.radix
1394 elif self._try_split and self.opener.exists(self._split_index_file):
1434 elif self._try_split and self.opener.exists(self._split_index_file):
1395 entry_point = self._split_index_file
1435 entry_point = self._split_index_file
1396 else:
1436 else:
1397 entry_point = b'%s.i' % self.radix
1437 entry_point = b'%s.i' % self.radix
1398
1438
1399 if docket is not None:
1439 if docket is not None:
1400 self._docket = docket
1440 self._docket = docket
1401 self._docket_file = entry_point
1441 self._docket_file = entry_point
1402 else:
1442 else:
1403 self._initempty = True
1443 self._initempty = True
1404 entry_data = self._get_data(entry_point, mmapindexthreshold)
1444 entry_data = self._get_data(entry_point, mmapindexthreshold)
1405 if len(entry_data) > 0:
1445 if len(entry_data) > 0:
1406 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1446 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1407 self._initempty = False
1447 self._initempty = False
1408 else:
1448 else:
1409 header = new_header
1449 header = new_header
1410
1450
1411 self._format_flags = header & ~0xFFFF
1451 self._format_flags = header & ~0xFFFF
1412 self._format_version = header & 0xFFFF
1452 self._format_version = header & 0xFFFF
1413
1453
1414 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1454 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1415 if supported_flags is None:
1455 if supported_flags is None:
1416 msg = _(b'unknown version (%d) in revlog %s')
1456 msg = _(b'unknown version (%d) in revlog %s')
1417 msg %= (self._format_version, self.display_id)
1457 msg %= (self._format_version, self.display_id)
1418 raise error.RevlogError(msg)
1458 raise error.RevlogError(msg)
1419 elif self._format_flags & ~supported_flags:
1459 elif self._format_flags & ~supported_flags:
1420 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1460 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1421 display_flag = self._format_flags >> 16
1461 display_flag = self._format_flags >> 16
1422 msg %= (display_flag, self._format_version, self.display_id)
1462 msg %= (display_flag, self._format_version, self.display_id)
1423 raise error.RevlogError(msg)
1463 raise error.RevlogError(msg)
1424
1464
1425 features = FEATURES_BY_VERSION[self._format_version]
1465 features = FEATURES_BY_VERSION[self._format_version]
1426 self._inline = features[b'inline'](self._format_flags)
1466 self._inline = features[b'inline'](self._format_flags)
1427 self.delta_config.general_delta = features[b'generaldelta'](
1467 self.delta_config.general_delta = features[b'generaldelta'](
1428 self._format_flags
1468 self._format_flags
1429 )
1469 )
1430 self.feature_config.has_side_data = features[b'sidedata']
1470 self.feature_config.has_side_data = features[b'sidedata']
1431
1471
1432 if not features[b'docket']:
1472 if not features[b'docket']:
1433 self._indexfile = entry_point
1473 self._indexfile = entry_point
1434 index_data = entry_data
1474 index_data = entry_data
1435 else:
1475 else:
1436 self._docket_file = entry_point
1476 self._docket_file = entry_point
1437 if self._initempty:
1477 if self._initempty:
1438 self._docket = docketutil.default_docket(self, header)
1478 self._docket = docketutil.default_docket(self, header)
1439 else:
1479 else:
1440 self._docket = docketutil.parse_docket(
1480 self._docket = docketutil.parse_docket(
1441 self, entry_data, use_pending=self._trypending
1481 self, entry_data, use_pending=self._trypending
1442 )
1482 )
1443
1483
1444 if self._docket is not None:
1484 if self._docket is not None:
1445 self._indexfile = self._docket.index_filepath()
1485 self._indexfile = self._docket.index_filepath()
1446 index_data = b''
1486 index_data = b''
1447 index_size = self._docket.index_end
1487 index_size = self._docket.index_end
1448 if index_size > 0:
1488 if index_size > 0:
1449 index_data = self._get_data(
1489 index_data = self._get_data(
1450 self._indexfile, mmapindexthreshold, size=index_size
1490 self._indexfile, mmapindexthreshold, size=index_size
1451 )
1491 )
1452 if len(index_data) < index_size:
1492 if len(index_data) < index_size:
1453 msg = _(b'too few index data for %s: got %d, expected %d')
1493 msg = _(b'too few index data for %s: got %d, expected %d')
1454 msg %= (self.display_id, len(index_data), index_size)
1494 msg %= (self.display_id, len(index_data), index_size)
1455 raise error.RevlogError(msg)
1495 raise error.RevlogError(msg)
1456
1496
1457 self._inline = False
1497 self._inline = False
1458 # generaldelta implied by version 2 revlogs.
1498 # generaldelta implied by version 2 revlogs.
1459 self.delta_config.general_delta = True
1499 self.delta_config.general_delta = True
1460 # the logic for persistent nodemap will be dealt with within the
1500 # the logic for persistent nodemap will be dealt with within the
1461 # main docket, so disable it for now.
1501 # main docket, so disable it for now.
1462 self._nodemap_file = None
1502 self._nodemap_file = None
1463
1503
1464 if self._docket is not None:
1504 if self._docket is not None:
1465 self._datafile = self._docket.data_filepath()
1505 self._datafile = self._docket.data_filepath()
1466 self._sidedatafile = self._docket.sidedata_filepath()
1506 self._sidedatafile = self._docket.sidedata_filepath()
1467 elif self.postfix is None:
1507 elif self.postfix is None:
1468 self._datafile = b'%s.d' % self.radix
1508 self._datafile = b'%s.d' % self.radix
1469 else:
1509 else:
1470 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1510 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1471
1511
1472 self.nodeconstants = sha1nodeconstants
1512 self.nodeconstants = sha1nodeconstants
1473 self.nullid = self.nodeconstants.nullid
1513 self.nullid = self.nodeconstants.nullid
1474
1514
1475 # sparse-revlog can't be on without general-delta (issue6056)
1515 # sparse-revlog can't be on without general-delta (issue6056)
1476 if not self.delta_config.general_delta:
1516 if not self.delta_config.general_delta:
1477 self.delta_config.sparse_revlog = False
1517 self.delta_config.sparse_revlog = False
1478
1518
1479 self._storedeltachains = True
1519 self._storedeltachains = True
1480
1520
1481 devel_nodemap = (
1521 devel_nodemap = (
1482 self._nodemap_file
1522 self._nodemap_file
1483 and force_nodemap
1523 and force_nodemap
1484 and parse_index_v1_nodemap is not None
1524 and parse_index_v1_nodemap is not None
1485 )
1525 )
1486
1526
1487 use_rust_index = False
1527 use_rust_index = False
1488 if rustrevlog is not None:
1528 if rustrevlog is not None:
1489 if self._nodemap_file is not None:
1529 if self._nodemap_file is not None:
1490 use_rust_index = True
1530 use_rust_index = True
1491 else:
1531 else:
1492 use_rust_index = self.opener.options.get(b'rust.index')
1532 use_rust_index = self.opener.options.get(b'rust.index')
1493
1533
1494 self._parse_index = parse_index_v1
1534 self._parse_index = parse_index_v1
1495 if self._format_version == REVLOGV0:
1535 if self._format_version == REVLOGV0:
1496 self._parse_index = revlogv0.parse_index_v0
1536 self._parse_index = revlogv0.parse_index_v0
1497 elif self._format_version == REVLOGV2:
1537 elif self._format_version == REVLOGV2:
1498 self._parse_index = parse_index_v2
1538 self._parse_index = parse_index_v2
1499 elif self._format_version == CHANGELOGV2:
1539 elif self._format_version == CHANGELOGV2:
1500 self._parse_index = parse_index_cl_v2
1540 self._parse_index = parse_index_cl_v2
1501 elif devel_nodemap:
1541 elif devel_nodemap:
1502 self._parse_index = parse_index_v1_nodemap
1542 self._parse_index = parse_index_v1_nodemap
1503 elif use_rust_index:
1543 elif use_rust_index:
1504 self._parse_index = parse_index_v1_mixed
1544 self._parse_index = parse_index_v1_mixed
1505 try:
1545 try:
1506 d = self._parse_index(index_data, self._inline)
1546 d = self._parse_index(index_data, self._inline)
1507 index, chunkcache = d
1547 index, chunkcache = d
1508 use_nodemap = (
1548 use_nodemap = (
1509 not self._inline
1549 not self._inline
1510 and self._nodemap_file is not None
1550 and self._nodemap_file is not None
1511 and hasattr(index, 'update_nodemap_data')
1551 and hasattr(index, 'update_nodemap_data')
1512 )
1552 )
1513 if use_nodemap:
1553 if use_nodemap:
1514 nodemap_data = nodemaputil.persisted_data(self)
1554 nodemap_data = nodemaputil.persisted_data(self)
1515 if nodemap_data is not None:
1555 if nodemap_data is not None:
1516 docket = nodemap_data[0]
1556 docket = nodemap_data[0]
1517 if (
1557 if (
1518 len(d[0]) > docket.tip_rev
1558 len(d[0]) > docket.tip_rev
1519 and d[0][docket.tip_rev][7] == docket.tip_node
1559 and d[0][docket.tip_rev][7] == docket.tip_node
1520 ):
1560 ):
1521 # no changelog tampering
1561 # no changelog tampering
1522 self._nodemap_docket = docket
1562 self._nodemap_docket = docket
1523 index.update_nodemap_data(*nodemap_data)
1563 index.update_nodemap_data(*nodemap_data)
1524 except (ValueError, IndexError):
1564 except (ValueError, IndexError):
1525 raise error.RevlogError(
1565 raise error.RevlogError(
1526 _(b"index %s is corrupted") % self.display_id
1566 _(b"index %s is corrupted") % self.display_id
1527 )
1567 )
1528 self.index = index
1568 self.index = index
1529 # revnum -> (chain-length, sum-delta-length)
1569 # revnum -> (chain-length, sum-delta-length)
1530 self._chaininfocache = util.lrucachedict(500)
1570 self._chaininfocache = util.lrucachedict(500)
1531
1571
1532 return chunkcache
1572 return chunkcache
1533
1573
1534 def _load_inner(self, chunk_cache):
1574 def _load_inner(self, chunk_cache):
1535 if self._docket is None:
1575 if self._docket is None:
1536 default_compression_header = None
1576 default_compression_header = None
1537 else:
1577 else:
1538 default_compression_header = self._docket.default_compression_header
1578 default_compression_header = self._docket.default_compression_header
1539
1579
1540 self._inner = _InnerRevlog(
1580 self._inner = _InnerRevlog(
1541 opener=self.opener,
1581 opener=self.opener,
1542 index=self.index,
1582 index=self.index,
1543 index_file=self._indexfile,
1583 index_file=self._indexfile,
1544 data_file=self._datafile,
1584 data_file=self._datafile,
1545 sidedata_file=self._sidedatafile,
1585 sidedata_file=self._sidedatafile,
1546 inline=self._inline,
1586 inline=self._inline,
1547 data_config=self.data_config,
1587 data_config=self.data_config,
1548 delta_config=self.delta_config,
1588 delta_config=self.delta_config,
1549 feature_config=self.feature_config,
1589 feature_config=self.feature_config,
1550 chunk_cache=chunk_cache,
1590 chunk_cache=chunk_cache,
1551 default_compression_header=default_compression_header,
1591 default_compression_header=default_compression_header,
1552 )
1592 )
1553
1593
1554 def get_revlog(self):
1594 def get_revlog(self):
1555 """simple function to mirror API of other not-really-revlog API"""
1595 """simple function to mirror API of other not-really-revlog API"""
1556 return self
1596 return self
1557
1597
1558 @util.propertycache
1598 @util.propertycache
1559 def revlog_kind(self):
1599 def revlog_kind(self):
1560 return self.target[0]
1600 return self.target[0]
1561
1601
1562 @util.propertycache
1602 @util.propertycache
1563 def display_id(self):
1603 def display_id(self):
1564 """The public facing "ID" of the revlog that we use in message"""
1604 """The public facing "ID" of the revlog that we use in message"""
1565 if self.revlog_kind == KIND_FILELOG:
1605 if self.revlog_kind == KIND_FILELOG:
1566 # Reference the file without the "data/" prefix, so it is familiar
1606 # Reference the file without the "data/" prefix, so it is familiar
1567 # to the user.
1607 # to the user.
1568 return self.target[1]
1608 return self.target[1]
1569 else:
1609 else:
1570 return self.radix
1610 return self.radix
1571
1611
1572 def _datafp(self, mode=b'r'):
1612 def _datafp(self, mode=b'r'):
1573 """file object for the revlog's data file"""
1613 """file object for the revlog's data file"""
1574 return self.opener(self._datafile, mode=mode)
1614 return self.opener(self._datafile, mode=mode)
1575
1615
1576 def tiprev(self):
1616 def tiprev(self):
1577 return len(self.index) - 1
1617 return len(self.index) - 1
1578
1618
1579 def tip(self):
1619 def tip(self):
1580 return self.node(self.tiprev())
1620 return self.node(self.tiprev())
1581
1621
1582 def __contains__(self, rev):
1622 def __contains__(self, rev):
1583 return 0 <= rev < len(self)
1623 return 0 <= rev < len(self)
1584
1624
1585 def __len__(self):
1625 def __len__(self):
1586 return len(self.index)
1626 return len(self.index)
1587
1627
1588 def __iter__(self):
1628 def __iter__(self):
1589 return iter(range(len(self)))
1629 return iter(range(len(self)))
1590
1630
1591 def revs(self, start=0, stop=None):
1631 def revs(self, start=0, stop=None):
1592 """iterate over all rev in this revlog (from start to stop)"""
1632 """iterate over all rev in this revlog (from start to stop)"""
1593 return storageutil.iterrevs(len(self), start=start, stop=stop)
1633 return storageutil.iterrevs(len(self), start=start, stop=stop)
1594
1634
1595 def hasnode(self, node):
1635 def hasnode(self, node):
1596 try:
1636 try:
1597 self.rev(node)
1637 self.rev(node)
1598 return True
1638 return True
1599 except KeyError:
1639 except KeyError:
1600 return False
1640 return False
1601
1641
1602 def _candelta(self, baserev, rev):
1642 def _candelta(self, baserev, rev):
1603 """whether two revisions (baserev, rev) can be delta-ed or not"""
1643 """whether two revisions (baserev, rev) can be delta-ed or not"""
1604 # Disable delta if either rev requires a content-changing flag
1644 # Disable delta if either rev requires a content-changing flag
1605 # processor (ex. LFS). This is because such flag processor can alter
1645 # processor (ex. LFS). This is because such flag processor can alter
1606 # the rawtext content that the delta will be based on, and two clients
1646 # the rawtext content that the delta will be based on, and two clients
1607 # could have a same revlog node with different flags (i.e. different
1647 # could have a same revlog node with different flags (i.e. different
1608 # rawtext contents) and the delta could be incompatible.
1648 # rawtext contents) and the delta could be incompatible.
1609 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1649 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1610 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1650 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1611 ):
1651 ):
1612 return False
1652 return False
1613 return True
1653 return True
1614
1654
1615 def update_caches(self, transaction):
1655 def update_caches(self, transaction):
1616 """update on disk cache
1656 """update on disk cache
1617
1657
1618 If a transaction is passed, the update may be delayed to transaction
1658 If a transaction is passed, the update may be delayed to transaction
1619 commit."""
1659 commit."""
1620 if self._nodemap_file is not None:
1660 if self._nodemap_file is not None:
1621 if transaction is None:
1661 if transaction is None:
1622 nodemaputil.update_persistent_nodemap(self)
1662 nodemaputil.update_persistent_nodemap(self)
1623 else:
1663 else:
1624 nodemaputil.setup_persistent_nodemap(transaction, self)
1664 nodemaputil.setup_persistent_nodemap(transaction, self)
1625
1665
1626 def clearcaches(self):
1666 def clearcaches(self):
1627 """Clear in-memory caches"""
1667 """Clear in-memory caches"""
1628 self._revisioncache = None
1668 self._revisioncache = None
1629 self._chainbasecache.clear()
1669 self._chainbasecache.clear()
1630 self._inner._segmentfile.clear_cache()
1670 self._inner._segmentfile.clear_cache()
1631 self._inner._segmentfile_sidedata.clear_cache()
1671 self._inner._segmentfile_sidedata.clear_cache()
1632 self._pcache = {}
1672 self._pcache = {}
1633 self._nodemap_docket = None
1673 self._nodemap_docket = None
1634 self.index.clearcaches()
1674 self.index.clearcaches()
1635 # The python code is the one responsible for validating the docket, we
1675 # The python code is the one responsible for validating the docket, we
1636 # end up having to refresh it here.
1676 # end up having to refresh it here.
1637 use_nodemap = (
1677 use_nodemap = (
1638 not self._inline
1678 not self._inline
1639 and self._nodemap_file is not None
1679 and self._nodemap_file is not None
1640 and hasattr(self.index, 'update_nodemap_data')
1680 and hasattr(self.index, 'update_nodemap_data')
1641 )
1681 )
1642 if use_nodemap:
1682 if use_nodemap:
1643 nodemap_data = nodemaputil.persisted_data(self)
1683 nodemap_data = nodemaputil.persisted_data(self)
1644 if nodemap_data is not None:
1684 if nodemap_data is not None:
1645 self._nodemap_docket = nodemap_data[0]
1685 self._nodemap_docket = nodemap_data[0]
1646 self.index.update_nodemap_data(*nodemap_data)
1686 self.index.update_nodemap_data(*nodemap_data)
1647
1687
1648 def rev(self, node):
1688 def rev(self, node):
1649 """return the revision number associated with a <nodeid>"""
1689 """return the revision number associated with a <nodeid>"""
1650 try:
1690 try:
1651 return self.index.rev(node)
1691 return self.index.rev(node)
1652 except TypeError:
1692 except TypeError:
1653 raise
1693 raise
1654 except error.RevlogError:
1694 except error.RevlogError:
1655 # parsers.c radix tree lookup failed
1695 # parsers.c radix tree lookup failed
1656 if (
1696 if (
1657 node == self.nodeconstants.wdirid
1697 node == self.nodeconstants.wdirid
1658 or node in self.nodeconstants.wdirfilenodeids
1698 or node in self.nodeconstants.wdirfilenodeids
1659 ):
1699 ):
1660 raise error.WdirUnsupported
1700 raise error.WdirUnsupported
1661 raise error.LookupError(node, self.display_id, _(b'no node'))
1701 raise error.LookupError(node, self.display_id, _(b'no node'))
1662
1702
1663 # Accessors for index entries.
1703 # Accessors for index entries.
1664
1704
1665 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1705 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1666 # are flags.
1706 # are flags.
1667 def start(self, rev):
1707 def start(self, rev):
1668 return int(self.index[rev][0] >> 16)
1708 return int(self.index[rev][0] >> 16)
1669
1709
1670 def sidedata_cut_off(self, rev):
1710 def sidedata_cut_off(self, rev):
1671 sd_cut_off = self.index[rev][8]
1711 sd_cut_off = self.index[rev][8]
1672 if sd_cut_off != 0:
1712 if sd_cut_off != 0:
1673 return sd_cut_off
1713 return sd_cut_off
1674 # This is some annoying dance, because entries without sidedata
1714 # This is some annoying dance, because entries without sidedata
1675 # currently use 0 as their ofsset. (instead of previous-offset +
1715 # currently use 0 as their ofsset. (instead of previous-offset +
1676 # previous-size)
1716 # previous-size)
1677 #
1717 #
1678 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1718 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1679 # In the meantime, we need this.
1719 # In the meantime, we need this.
1680 while 0 <= rev:
1720 while 0 <= rev:
1681 e = self.index[rev]
1721 e = self.index[rev]
1682 if e[9] != 0:
1722 if e[9] != 0:
1683 return e[8] + e[9]
1723 return e[8] + e[9]
1684 rev -= 1
1724 rev -= 1
1685 return 0
1725 return 0
1686
1726
1687 def flags(self, rev):
1727 def flags(self, rev):
1688 return self.index[rev][0] & 0xFFFF
1728 return self.index[rev][0] & 0xFFFF
1689
1729
1690 def length(self, rev):
1730 def length(self, rev):
1691 return self.index[rev][1]
1731 return self.index[rev][1]
1692
1732
1693 def sidedata_length(self, rev):
1733 def sidedata_length(self, rev):
1694 if not self.feature_config.has_side_data:
1734 if not self.feature_config.has_side_data:
1695 return 0
1735 return 0
1696 return self.index[rev][9]
1736 return self.index[rev][9]
1697
1737
1698 def rawsize(self, rev):
1738 def rawsize(self, rev):
1699 """return the length of the uncompressed text for a given revision"""
1739 """return the length of the uncompressed text for a given revision"""
1700 l = self.index[rev][2]
1740 l = self.index[rev][2]
1701 if l >= 0:
1741 if l >= 0:
1702 return l
1742 return l
1703
1743
1704 t = self.rawdata(rev)
1744 t = self.rawdata(rev)
1705 return len(t)
1745 return len(t)
1706
1746
1707 def size(self, rev):
1747 def size(self, rev):
1708 """length of non-raw text (processed by a "read" flag processor)"""
1748 """length of non-raw text (processed by a "read" flag processor)"""
1709 # fast path: if no "read" flag processor could change the content,
1749 # fast path: if no "read" flag processor could change the content,
1710 # size is rawsize. note: ELLIPSIS is known to not change the content.
1750 # size is rawsize. note: ELLIPSIS is known to not change the content.
1711 flags = self.flags(rev)
1751 flags = self.flags(rev)
1712 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1752 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1713 return self.rawsize(rev)
1753 return self.rawsize(rev)
1714
1754
1715 return len(self.revision(rev))
1755 return len(self.revision(rev))
1716
1756
1717 def fast_rank(self, rev):
1757 def fast_rank(self, rev):
1718 """Return the rank of a revision if already known, or None otherwise.
1758 """Return the rank of a revision if already known, or None otherwise.
1719
1759
1720 The rank of a revision is the size of the sub-graph it defines as a
1760 The rank of a revision is the size of the sub-graph it defines as a
1721 head. Equivalently, the rank of a revision `r` is the size of the set
1761 head. Equivalently, the rank of a revision `r` is the size of the set
1722 `ancestors(r)`, `r` included.
1762 `ancestors(r)`, `r` included.
1723
1763
1724 This method returns the rank retrieved from the revlog in constant
1764 This method returns the rank retrieved from the revlog in constant
1725 time. It makes no attempt at computing unknown values for versions of
1765 time. It makes no attempt at computing unknown values for versions of
1726 the revlog which do not persist the rank.
1766 the revlog which do not persist the rank.
1727 """
1767 """
1728 rank = self.index[rev][ENTRY_RANK]
1768 rank = self.index[rev][ENTRY_RANK]
1729 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1769 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1730 return None
1770 return None
1731 if rev == nullrev:
1771 if rev == nullrev:
1732 return 0 # convention
1772 return 0 # convention
1733 return rank
1773 return rank
1734
1774
1735 def chainbase(self, rev):
1775 def chainbase(self, rev):
1736 base = self._chainbasecache.get(rev)
1776 base = self._chainbasecache.get(rev)
1737 if base is not None:
1777 if base is not None:
1738 return base
1778 return base
1739
1779
1740 index = self.index
1780 index = self.index
1741 iterrev = rev
1781 iterrev = rev
1742 base = index[iterrev][3]
1782 base = index[iterrev][3]
1743 while base != iterrev:
1783 while base != iterrev:
1744 iterrev = base
1784 iterrev = base
1745 base = index[iterrev][3]
1785 base = index[iterrev][3]
1746
1786
1747 self._chainbasecache[rev] = base
1787 self._chainbasecache[rev] = base
1748 return base
1788 return base
1749
1789
1750 def linkrev(self, rev):
1790 def linkrev(self, rev):
1751 return self.index[rev][4]
1791 return self.index[rev][4]
1752
1792
1753 def parentrevs(self, rev):
1793 def parentrevs(self, rev):
1754 try:
1794 try:
1755 entry = self.index[rev]
1795 entry = self.index[rev]
1756 except IndexError:
1796 except IndexError:
1757 if rev == wdirrev:
1797 if rev == wdirrev:
1758 raise error.WdirUnsupported
1798 raise error.WdirUnsupported
1759 raise
1799 raise
1760
1800
1761 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1801 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1762 return entry[6], entry[5]
1802 return entry[6], entry[5]
1763 else:
1803 else:
1764 return entry[5], entry[6]
1804 return entry[5], entry[6]
1765
1805
1766 # fast parentrevs(rev) where rev isn't filtered
1806 # fast parentrevs(rev) where rev isn't filtered
1767 _uncheckedparentrevs = parentrevs
1807 _uncheckedparentrevs = parentrevs
1768
1808
1769 def node(self, rev):
1809 def node(self, rev):
1770 try:
1810 try:
1771 return self.index[rev][7]
1811 return self.index[rev][7]
1772 except IndexError:
1812 except IndexError:
1773 if rev == wdirrev:
1813 if rev == wdirrev:
1774 raise error.WdirUnsupported
1814 raise error.WdirUnsupported
1775 raise
1815 raise
1776
1816
1777 # Derived from index values.
1817 # Derived from index values.
1778
1818
1779 def end(self, rev):
1819 def end(self, rev):
1780 return self.start(rev) + self.length(rev)
1820 return self.start(rev) + self.length(rev)
1781
1821
1782 def parents(self, node):
1822 def parents(self, node):
1783 i = self.index
1823 i = self.index
1784 d = i[self.rev(node)]
1824 d = i[self.rev(node)]
1785 # inline node() to avoid function call overhead
1825 # inline node() to avoid function call overhead
1786 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1826 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1787 return i[d[6]][7], i[d[5]][7]
1827 return i[d[6]][7], i[d[5]][7]
1788 else:
1828 else:
1789 return i[d[5]][7], i[d[6]][7]
1829 return i[d[5]][7], i[d[6]][7]
1790
1830
1791 def chainlen(self, rev):
1831 def chainlen(self, rev):
1792 return self._chaininfo(rev)[0]
1832 return self._chaininfo(rev)[0]
1793
1833
1794 def _chaininfo(self, rev):
1834 def _chaininfo(self, rev):
1795 chaininfocache = self._chaininfocache
1835 chaininfocache = self._chaininfocache
1796 if rev in chaininfocache:
1836 if rev in chaininfocache:
1797 return chaininfocache[rev]
1837 return chaininfocache[rev]
1798 index = self.index
1838 index = self.index
1799 generaldelta = self.delta_config.general_delta
1839 generaldelta = self.delta_config.general_delta
1800 iterrev = rev
1840 iterrev = rev
1801 e = index[iterrev]
1841 e = index[iterrev]
1802 clen = 0
1842 clen = 0
1803 compresseddeltalen = 0
1843 compresseddeltalen = 0
1804 while iterrev != e[3]:
1844 while iterrev != e[3]:
1805 clen += 1
1845 clen += 1
1806 compresseddeltalen += e[1]
1846 compresseddeltalen += e[1]
1807 if generaldelta:
1847 if generaldelta:
1808 iterrev = e[3]
1848 iterrev = e[3]
1809 else:
1849 else:
1810 iterrev -= 1
1850 iterrev -= 1
1811 if iterrev in chaininfocache:
1851 if iterrev in chaininfocache:
1812 t = chaininfocache[iterrev]
1852 t = chaininfocache[iterrev]
1813 clen += t[0]
1853 clen += t[0]
1814 compresseddeltalen += t[1]
1854 compresseddeltalen += t[1]
1815 break
1855 break
1816 e = index[iterrev]
1856 e = index[iterrev]
1817 else:
1857 else:
1818 # Add text length of base since decompressing that also takes
1858 # Add text length of base since decompressing that also takes
1819 # work. For cache hits the length is already included.
1859 # work. For cache hits the length is already included.
1820 compresseddeltalen += e[1]
1860 compresseddeltalen += e[1]
1821 r = (clen, compresseddeltalen)
1861 r = (clen, compresseddeltalen)
1822 chaininfocache[rev] = r
1862 chaininfocache[rev] = r
1823 return r
1863 return r
1824
1864
1825 def _deltachain(self, rev, stoprev=None):
1865 def _deltachain(self, rev, stoprev=None):
1826 """Obtain the delta chain for a revision.
1866 return self._inner._deltachain(rev, stoprev=stoprev)
1827
1828 ``stoprev`` specifies a revision to stop at. If not specified, we
1829 stop at the base of the chain.
1830
1831 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1832 revs in ascending order and ``stopped`` is a bool indicating whether
1833 ``stoprev`` was hit.
1834 """
1835 generaldelta = self.delta_config.general_delta
1836 # Try C implementation.
1837 try:
1838 return self.index.deltachain(rev, stoprev, generaldelta)
1839 except AttributeError:
1840 pass
1841
1842 chain = []
1843
1844 # Alias to prevent attribute lookup in tight loop.
1845 index = self.index
1846
1847 iterrev = rev
1848 e = index[iterrev]
1849 while iterrev != e[3] and iterrev != stoprev:
1850 chain.append(iterrev)
1851 if generaldelta:
1852 iterrev = e[3]
1853 else:
1854 iterrev -= 1
1855 e = index[iterrev]
1856
1857 if iterrev == stoprev:
1858 stopped = True
1859 else:
1860 chain.append(iterrev)
1861 stopped = False
1862
1863 chain.reverse()
1864 return chain, stopped
1865
1867
1866 def ancestors(self, revs, stoprev=0, inclusive=False):
1868 def ancestors(self, revs, stoprev=0, inclusive=False):
1867 """Generate the ancestors of 'revs' in reverse revision order.
1869 """Generate the ancestors of 'revs' in reverse revision order.
1868 Does not generate revs lower than stoprev.
1870 Does not generate revs lower than stoprev.
1869
1871
1870 See the documentation for ancestor.lazyancestors for more details."""
1872 See the documentation for ancestor.lazyancestors for more details."""
1871
1873
1872 # first, make sure start revisions aren't filtered
1874 # first, make sure start revisions aren't filtered
1873 revs = list(revs)
1875 revs = list(revs)
1874 checkrev = self.node
1876 checkrev = self.node
1875 for r in revs:
1877 for r in revs:
1876 checkrev(r)
1878 checkrev(r)
1877 # and we're sure ancestors aren't filtered as well
1879 # and we're sure ancestors aren't filtered as well
1878
1880
1879 if rustancestor is not None and self.index.rust_ext_compat:
1881 if rustancestor is not None and self.index.rust_ext_compat:
1880 lazyancestors = rustancestor.LazyAncestors
1882 lazyancestors = rustancestor.LazyAncestors
1881 arg = self.index
1883 arg = self.index
1882 else:
1884 else:
1883 lazyancestors = ancestor.lazyancestors
1885 lazyancestors = ancestor.lazyancestors
1884 arg = self._uncheckedparentrevs
1886 arg = self._uncheckedparentrevs
1885 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1887 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1886
1888
1887 def descendants(self, revs):
1889 def descendants(self, revs):
1888 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1890 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1889
1891
1890 def findcommonmissing(self, common=None, heads=None):
1892 def findcommonmissing(self, common=None, heads=None):
1891 """Return a tuple of the ancestors of common and the ancestors of heads
1893 """Return a tuple of the ancestors of common and the ancestors of heads
1892 that are not ancestors of common. In revset terminology, we return the
1894 that are not ancestors of common. In revset terminology, we return the
1893 tuple:
1895 tuple:
1894
1896
1895 ::common, (::heads) - (::common)
1897 ::common, (::heads) - (::common)
1896
1898
1897 The list is sorted by revision number, meaning it is
1899 The list is sorted by revision number, meaning it is
1898 topologically sorted.
1900 topologically sorted.
1899
1901
1900 'heads' and 'common' are both lists of node IDs. If heads is
1902 'heads' and 'common' are both lists of node IDs. If heads is
1901 not supplied, uses all of the revlog's heads. If common is not
1903 not supplied, uses all of the revlog's heads. If common is not
1902 supplied, uses nullid."""
1904 supplied, uses nullid."""
1903 if common is None:
1905 if common is None:
1904 common = [self.nullid]
1906 common = [self.nullid]
1905 if heads is None:
1907 if heads is None:
1906 heads = self.heads()
1908 heads = self.heads()
1907
1909
1908 common = [self.rev(n) for n in common]
1910 common = [self.rev(n) for n in common]
1909 heads = [self.rev(n) for n in heads]
1911 heads = [self.rev(n) for n in heads]
1910
1912
1911 # we want the ancestors, but inclusive
1913 # we want the ancestors, but inclusive
1912 class lazyset:
1914 class lazyset:
1913 def __init__(self, lazyvalues):
1915 def __init__(self, lazyvalues):
1914 self.addedvalues = set()
1916 self.addedvalues = set()
1915 self.lazyvalues = lazyvalues
1917 self.lazyvalues = lazyvalues
1916
1918
1917 def __contains__(self, value):
1919 def __contains__(self, value):
1918 return value in self.addedvalues or value in self.lazyvalues
1920 return value in self.addedvalues or value in self.lazyvalues
1919
1921
1920 def __iter__(self):
1922 def __iter__(self):
1921 added = self.addedvalues
1923 added = self.addedvalues
1922 for r in added:
1924 for r in added:
1923 yield r
1925 yield r
1924 for r in self.lazyvalues:
1926 for r in self.lazyvalues:
1925 if not r in added:
1927 if not r in added:
1926 yield r
1928 yield r
1927
1929
1928 def add(self, value):
1930 def add(self, value):
1929 self.addedvalues.add(value)
1931 self.addedvalues.add(value)
1930
1932
1931 def update(self, values):
1933 def update(self, values):
1932 self.addedvalues.update(values)
1934 self.addedvalues.update(values)
1933
1935
1934 has = lazyset(self.ancestors(common))
1936 has = lazyset(self.ancestors(common))
1935 has.add(nullrev)
1937 has.add(nullrev)
1936 has.update(common)
1938 has.update(common)
1937
1939
1938 # take all ancestors from heads that aren't in has
1940 # take all ancestors from heads that aren't in has
1939 missing = set()
1941 missing = set()
1940 visit = collections.deque(r for r in heads if r not in has)
1942 visit = collections.deque(r for r in heads if r not in has)
1941 while visit:
1943 while visit:
1942 r = visit.popleft()
1944 r = visit.popleft()
1943 if r in missing:
1945 if r in missing:
1944 continue
1946 continue
1945 else:
1947 else:
1946 missing.add(r)
1948 missing.add(r)
1947 for p in self.parentrevs(r):
1949 for p in self.parentrevs(r):
1948 if p not in has:
1950 if p not in has:
1949 visit.append(p)
1951 visit.append(p)
1950 missing = list(missing)
1952 missing = list(missing)
1951 missing.sort()
1953 missing.sort()
1952 return has, [self.node(miss) for miss in missing]
1954 return has, [self.node(miss) for miss in missing]
1953
1955
1954 def incrementalmissingrevs(self, common=None):
1956 def incrementalmissingrevs(self, common=None):
1955 """Return an object that can be used to incrementally compute the
1957 """Return an object that can be used to incrementally compute the
1956 revision numbers of the ancestors of arbitrary sets that are not
1958 revision numbers of the ancestors of arbitrary sets that are not
1957 ancestors of common. This is an ancestor.incrementalmissingancestors
1959 ancestors of common. This is an ancestor.incrementalmissingancestors
1958 object.
1960 object.
1959
1961
1960 'common' is a list of revision numbers. If common is not supplied, uses
1962 'common' is a list of revision numbers. If common is not supplied, uses
1961 nullrev.
1963 nullrev.
1962 """
1964 """
1963 if common is None:
1965 if common is None:
1964 common = [nullrev]
1966 common = [nullrev]
1965
1967
1966 if rustancestor is not None and self.index.rust_ext_compat:
1968 if rustancestor is not None and self.index.rust_ext_compat:
1967 return rustancestor.MissingAncestors(self.index, common)
1969 return rustancestor.MissingAncestors(self.index, common)
1968 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1970 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1969
1971
1970 def findmissingrevs(self, common=None, heads=None):
1972 def findmissingrevs(self, common=None, heads=None):
1971 """Return the revision numbers of the ancestors of heads that
1973 """Return the revision numbers of the ancestors of heads that
1972 are not ancestors of common.
1974 are not ancestors of common.
1973
1975
1974 More specifically, return a list of revision numbers corresponding to
1976 More specifically, return a list of revision numbers corresponding to
1975 nodes N such that every N satisfies the following constraints:
1977 nodes N such that every N satisfies the following constraints:
1976
1978
1977 1. N is an ancestor of some node in 'heads'
1979 1. N is an ancestor of some node in 'heads'
1978 2. N is not an ancestor of any node in 'common'
1980 2. N is not an ancestor of any node in 'common'
1979
1981
1980 The list is sorted by revision number, meaning it is
1982 The list is sorted by revision number, meaning it is
1981 topologically sorted.
1983 topologically sorted.
1982
1984
1983 'heads' and 'common' are both lists of revision numbers. If heads is
1985 'heads' and 'common' are both lists of revision numbers. If heads is
1984 not supplied, uses all of the revlog's heads. If common is not
1986 not supplied, uses all of the revlog's heads. If common is not
1985 supplied, uses nullid."""
1987 supplied, uses nullid."""
1986 if common is None:
1988 if common is None:
1987 common = [nullrev]
1989 common = [nullrev]
1988 if heads is None:
1990 if heads is None:
1989 heads = self.headrevs()
1991 heads = self.headrevs()
1990
1992
1991 inc = self.incrementalmissingrevs(common=common)
1993 inc = self.incrementalmissingrevs(common=common)
1992 return inc.missingancestors(heads)
1994 return inc.missingancestors(heads)
1993
1995
1994 def findmissing(self, common=None, heads=None):
1996 def findmissing(self, common=None, heads=None):
1995 """Return the ancestors of heads that are not ancestors of common.
1997 """Return the ancestors of heads that are not ancestors of common.
1996
1998
1997 More specifically, return a list of nodes N such that every N
1999 More specifically, return a list of nodes N such that every N
1998 satisfies the following constraints:
2000 satisfies the following constraints:
1999
2001
2000 1. N is an ancestor of some node in 'heads'
2002 1. N is an ancestor of some node in 'heads'
2001 2. N is not an ancestor of any node in 'common'
2003 2. N is not an ancestor of any node in 'common'
2002
2004
2003 The list is sorted by revision number, meaning it is
2005 The list is sorted by revision number, meaning it is
2004 topologically sorted.
2006 topologically sorted.
2005
2007
2006 'heads' and 'common' are both lists of node IDs. If heads is
2008 'heads' and 'common' are both lists of node IDs. If heads is
2007 not supplied, uses all of the revlog's heads. If common is not
2009 not supplied, uses all of the revlog's heads. If common is not
2008 supplied, uses nullid."""
2010 supplied, uses nullid."""
2009 if common is None:
2011 if common is None:
2010 common = [self.nullid]
2012 common = [self.nullid]
2011 if heads is None:
2013 if heads is None:
2012 heads = self.heads()
2014 heads = self.heads()
2013
2015
2014 common = [self.rev(n) for n in common]
2016 common = [self.rev(n) for n in common]
2015 heads = [self.rev(n) for n in heads]
2017 heads = [self.rev(n) for n in heads]
2016
2018
2017 inc = self.incrementalmissingrevs(common=common)
2019 inc = self.incrementalmissingrevs(common=common)
2018 return [self.node(r) for r in inc.missingancestors(heads)]
2020 return [self.node(r) for r in inc.missingancestors(heads)]
2019
2021
2020 def nodesbetween(self, roots=None, heads=None):
2022 def nodesbetween(self, roots=None, heads=None):
2021 """Return a topological path from 'roots' to 'heads'.
2023 """Return a topological path from 'roots' to 'heads'.
2022
2024
2023 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2025 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2024 topologically sorted list of all nodes N that satisfy both of
2026 topologically sorted list of all nodes N that satisfy both of
2025 these constraints:
2027 these constraints:
2026
2028
2027 1. N is a descendant of some node in 'roots'
2029 1. N is a descendant of some node in 'roots'
2028 2. N is an ancestor of some node in 'heads'
2030 2. N is an ancestor of some node in 'heads'
2029
2031
2030 Every node is considered to be both a descendant and an ancestor
2032 Every node is considered to be both a descendant and an ancestor
2031 of itself, so every reachable node in 'roots' and 'heads' will be
2033 of itself, so every reachable node in 'roots' and 'heads' will be
2032 included in 'nodes'.
2034 included in 'nodes'.
2033
2035
2034 'outroots' is the list of reachable nodes in 'roots', i.e., the
2036 'outroots' is the list of reachable nodes in 'roots', i.e., the
2035 subset of 'roots' that is returned in 'nodes'. Likewise,
2037 subset of 'roots' that is returned in 'nodes'. Likewise,
2036 'outheads' is the subset of 'heads' that is also in 'nodes'.
2038 'outheads' is the subset of 'heads' that is also in 'nodes'.
2037
2039
2038 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2040 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2039 unspecified, uses nullid as the only root. If 'heads' is
2041 unspecified, uses nullid as the only root. If 'heads' is
2040 unspecified, uses list of all of the revlog's heads."""
2042 unspecified, uses list of all of the revlog's heads."""
2041 nonodes = ([], [], [])
2043 nonodes = ([], [], [])
2042 if roots is not None:
2044 if roots is not None:
2043 roots = list(roots)
2045 roots = list(roots)
2044 if not roots:
2046 if not roots:
2045 return nonodes
2047 return nonodes
2046 lowestrev = min([self.rev(n) for n in roots])
2048 lowestrev = min([self.rev(n) for n in roots])
2047 else:
2049 else:
2048 roots = [self.nullid] # Everybody's a descendant of nullid
2050 roots = [self.nullid] # Everybody's a descendant of nullid
2049 lowestrev = nullrev
2051 lowestrev = nullrev
2050 if (lowestrev == nullrev) and (heads is None):
2052 if (lowestrev == nullrev) and (heads is None):
2051 # We want _all_ the nodes!
2053 # We want _all_ the nodes!
2052 return (
2054 return (
2053 [self.node(r) for r in self],
2055 [self.node(r) for r in self],
2054 [self.nullid],
2056 [self.nullid],
2055 list(self.heads()),
2057 list(self.heads()),
2056 )
2058 )
2057 if heads is None:
2059 if heads is None:
2058 # All nodes are ancestors, so the latest ancestor is the last
2060 # All nodes are ancestors, so the latest ancestor is the last
2059 # node.
2061 # node.
2060 highestrev = len(self) - 1
2062 highestrev = len(self) - 1
2061 # Set ancestors to None to signal that every node is an ancestor.
2063 # Set ancestors to None to signal that every node is an ancestor.
2062 ancestors = None
2064 ancestors = None
2063 # Set heads to an empty dictionary for later discovery of heads
2065 # Set heads to an empty dictionary for later discovery of heads
2064 heads = {}
2066 heads = {}
2065 else:
2067 else:
2066 heads = list(heads)
2068 heads = list(heads)
2067 if not heads:
2069 if not heads:
2068 return nonodes
2070 return nonodes
2069 ancestors = set()
2071 ancestors = set()
2070 # Turn heads into a dictionary so we can remove 'fake' heads.
2072 # Turn heads into a dictionary so we can remove 'fake' heads.
2071 # Also, later we will be using it to filter out the heads we can't
2073 # Also, later we will be using it to filter out the heads we can't
2072 # find from roots.
2074 # find from roots.
2073 heads = dict.fromkeys(heads, False)
2075 heads = dict.fromkeys(heads, False)
2074 # Start at the top and keep marking parents until we're done.
2076 # Start at the top and keep marking parents until we're done.
2075 nodestotag = set(heads)
2077 nodestotag = set(heads)
2076 # Remember where the top was so we can use it as a limit later.
2078 # Remember where the top was so we can use it as a limit later.
2077 highestrev = max([self.rev(n) for n in nodestotag])
2079 highestrev = max([self.rev(n) for n in nodestotag])
2078 while nodestotag:
2080 while nodestotag:
2079 # grab a node to tag
2081 # grab a node to tag
2080 n = nodestotag.pop()
2082 n = nodestotag.pop()
2081 # Never tag nullid
2083 # Never tag nullid
2082 if n == self.nullid:
2084 if n == self.nullid:
2083 continue
2085 continue
2084 # A node's revision number represents its place in a
2086 # A node's revision number represents its place in a
2085 # topologically sorted list of nodes.
2087 # topologically sorted list of nodes.
2086 r = self.rev(n)
2088 r = self.rev(n)
2087 if r >= lowestrev:
2089 if r >= lowestrev:
2088 if n not in ancestors:
2090 if n not in ancestors:
2089 # If we are possibly a descendant of one of the roots
2091 # If we are possibly a descendant of one of the roots
2090 # and we haven't already been marked as an ancestor
2092 # and we haven't already been marked as an ancestor
2091 ancestors.add(n) # Mark as ancestor
2093 ancestors.add(n) # Mark as ancestor
2092 # Add non-nullid parents to list of nodes to tag.
2094 # Add non-nullid parents to list of nodes to tag.
2093 nodestotag.update(
2095 nodestotag.update(
2094 [p for p in self.parents(n) if p != self.nullid]
2096 [p for p in self.parents(n) if p != self.nullid]
2095 )
2097 )
2096 elif n in heads: # We've seen it before, is it a fake head?
2098 elif n in heads: # We've seen it before, is it a fake head?
2097 # So it is, real heads should not be the ancestors of
2099 # So it is, real heads should not be the ancestors of
2098 # any other heads.
2100 # any other heads.
2099 heads.pop(n)
2101 heads.pop(n)
2100 if not ancestors:
2102 if not ancestors:
2101 return nonodes
2103 return nonodes
2102 # Now that we have our set of ancestors, we want to remove any
2104 # Now that we have our set of ancestors, we want to remove any
2103 # roots that are not ancestors.
2105 # roots that are not ancestors.
2104
2106
2105 # If one of the roots was nullid, everything is included anyway.
2107 # If one of the roots was nullid, everything is included anyway.
2106 if lowestrev > nullrev:
2108 if lowestrev > nullrev:
2107 # But, since we weren't, let's recompute the lowest rev to not
2109 # But, since we weren't, let's recompute the lowest rev to not
2108 # include roots that aren't ancestors.
2110 # include roots that aren't ancestors.
2109
2111
2110 # Filter out roots that aren't ancestors of heads
2112 # Filter out roots that aren't ancestors of heads
2111 roots = [root for root in roots if root in ancestors]
2113 roots = [root for root in roots if root in ancestors]
2112 # Recompute the lowest revision
2114 # Recompute the lowest revision
2113 if roots:
2115 if roots:
2114 lowestrev = min([self.rev(root) for root in roots])
2116 lowestrev = min([self.rev(root) for root in roots])
2115 else:
2117 else:
2116 # No more roots? Return empty list
2118 # No more roots? Return empty list
2117 return nonodes
2119 return nonodes
2118 else:
2120 else:
2119 # We are descending from nullid, and don't need to care about
2121 # We are descending from nullid, and don't need to care about
2120 # any other roots.
2122 # any other roots.
2121 lowestrev = nullrev
2123 lowestrev = nullrev
2122 roots = [self.nullid]
2124 roots = [self.nullid]
2123 # Transform our roots list into a set.
2125 # Transform our roots list into a set.
2124 descendants = set(roots)
2126 descendants = set(roots)
2125 # Also, keep the original roots so we can filter out roots that aren't
2127 # Also, keep the original roots so we can filter out roots that aren't
2126 # 'real' roots (i.e. are descended from other roots).
2128 # 'real' roots (i.e. are descended from other roots).
2127 roots = descendants.copy()
2129 roots = descendants.copy()
2128 # Our topologically sorted list of output nodes.
2130 # Our topologically sorted list of output nodes.
2129 orderedout = []
2131 orderedout = []
2130 # Don't start at nullid since we don't want nullid in our output list,
2132 # Don't start at nullid since we don't want nullid in our output list,
2131 # and if nullid shows up in descendants, empty parents will look like
2133 # and if nullid shows up in descendants, empty parents will look like
2132 # they're descendants.
2134 # they're descendants.
2133 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2135 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2134 n = self.node(r)
2136 n = self.node(r)
2135 isdescendant = False
2137 isdescendant = False
2136 if lowestrev == nullrev: # Everybody is a descendant of nullid
2138 if lowestrev == nullrev: # Everybody is a descendant of nullid
2137 isdescendant = True
2139 isdescendant = True
2138 elif n in descendants:
2140 elif n in descendants:
2139 # n is already a descendant
2141 # n is already a descendant
2140 isdescendant = True
2142 isdescendant = True
2141 # This check only needs to be done here because all the roots
2143 # This check only needs to be done here because all the roots
2142 # will start being marked is descendants before the loop.
2144 # will start being marked is descendants before the loop.
2143 if n in roots:
2145 if n in roots:
2144 # If n was a root, check if it's a 'real' root.
2146 # If n was a root, check if it's a 'real' root.
2145 p = tuple(self.parents(n))
2147 p = tuple(self.parents(n))
2146 # If any of its parents are descendants, it's not a root.
2148 # If any of its parents are descendants, it's not a root.
2147 if (p[0] in descendants) or (p[1] in descendants):
2149 if (p[0] in descendants) or (p[1] in descendants):
2148 roots.remove(n)
2150 roots.remove(n)
2149 else:
2151 else:
2150 p = tuple(self.parents(n))
2152 p = tuple(self.parents(n))
2151 # A node is a descendant if either of its parents are
2153 # A node is a descendant if either of its parents are
2152 # descendants. (We seeded the dependents list with the roots
2154 # descendants. (We seeded the dependents list with the roots
2153 # up there, remember?)
2155 # up there, remember?)
2154 if (p[0] in descendants) or (p[1] in descendants):
2156 if (p[0] in descendants) or (p[1] in descendants):
2155 descendants.add(n)
2157 descendants.add(n)
2156 isdescendant = True
2158 isdescendant = True
2157 if isdescendant and ((ancestors is None) or (n in ancestors)):
2159 if isdescendant and ((ancestors is None) or (n in ancestors)):
2158 # Only include nodes that are both descendants and ancestors.
2160 # Only include nodes that are both descendants and ancestors.
2159 orderedout.append(n)
2161 orderedout.append(n)
2160 if (ancestors is not None) and (n in heads):
2162 if (ancestors is not None) and (n in heads):
2161 # We're trying to figure out which heads are reachable
2163 # We're trying to figure out which heads are reachable
2162 # from roots.
2164 # from roots.
2163 # Mark this head as having been reached
2165 # Mark this head as having been reached
2164 heads[n] = True
2166 heads[n] = True
2165 elif ancestors is None:
2167 elif ancestors is None:
2166 # Otherwise, we're trying to discover the heads.
2168 # Otherwise, we're trying to discover the heads.
2167 # Assume this is a head because if it isn't, the next step
2169 # Assume this is a head because if it isn't, the next step
2168 # will eventually remove it.
2170 # will eventually remove it.
2169 heads[n] = True
2171 heads[n] = True
2170 # But, obviously its parents aren't.
2172 # But, obviously its parents aren't.
2171 for p in self.parents(n):
2173 for p in self.parents(n):
2172 heads.pop(p, None)
2174 heads.pop(p, None)
2173 heads = [head for head, flag in heads.items() if flag]
2175 heads = [head for head, flag in heads.items() if flag]
2174 roots = list(roots)
2176 roots = list(roots)
2175 assert orderedout
2177 assert orderedout
2176 assert roots
2178 assert roots
2177 assert heads
2179 assert heads
2178 return (orderedout, roots, heads)
2180 return (orderedout, roots, heads)
2179
2181
2180 def headrevs(self, revs=None):
2182 def headrevs(self, revs=None):
2181 if revs is None:
2183 if revs is None:
2182 try:
2184 try:
2183 return self.index.headrevs()
2185 return self.index.headrevs()
2184 except AttributeError:
2186 except AttributeError:
2185 return self._headrevs()
2187 return self._headrevs()
2186 if rustdagop is not None and self.index.rust_ext_compat:
2188 if rustdagop is not None and self.index.rust_ext_compat:
2187 return rustdagop.headrevs(self.index, revs)
2189 return rustdagop.headrevs(self.index, revs)
2188 return dagop.headrevs(revs, self._uncheckedparentrevs)
2190 return dagop.headrevs(revs, self._uncheckedparentrevs)
2189
2191
2190 def computephases(self, roots):
2192 def computephases(self, roots):
2191 return self.index.computephasesmapsets(roots)
2193 return self.index.computephasesmapsets(roots)
2192
2194
2193 def _headrevs(self):
2195 def _headrevs(self):
2194 count = len(self)
2196 count = len(self)
2195 if not count:
2197 if not count:
2196 return [nullrev]
2198 return [nullrev]
2197 # we won't iter over filtered rev so nobody is a head at start
2199 # we won't iter over filtered rev so nobody is a head at start
2198 ishead = [0] * (count + 1)
2200 ishead = [0] * (count + 1)
2199 index = self.index
2201 index = self.index
2200 for r in self:
2202 for r in self:
2201 ishead[r] = 1 # I may be an head
2203 ishead[r] = 1 # I may be an head
2202 e = index[r]
2204 e = index[r]
2203 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2205 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2204 return [r for r, val in enumerate(ishead) if val]
2206 return [r for r, val in enumerate(ishead) if val]
2205
2207
2206 def heads(self, start=None, stop=None):
2208 def heads(self, start=None, stop=None):
2207 """return the list of all nodes that have no children
2209 """return the list of all nodes that have no children
2208
2210
2209 if start is specified, only heads that are descendants of
2211 if start is specified, only heads that are descendants of
2210 start will be returned
2212 start will be returned
2211 if stop is specified, it will consider all the revs from stop
2213 if stop is specified, it will consider all the revs from stop
2212 as if they had no children
2214 as if they had no children
2213 """
2215 """
2214 if start is None and stop is None:
2216 if start is None and stop is None:
2215 if not len(self):
2217 if not len(self):
2216 return [self.nullid]
2218 return [self.nullid]
2217 return [self.node(r) for r in self.headrevs()]
2219 return [self.node(r) for r in self.headrevs()]
2218
2220
2219 if start is None:
2221 if start is None:
2220 start = nullrev
2222 start = nullrev
2221 else:
2223 else:
2222 start = self.rev(start)
2224 start = self.rev(start)
2223
2225
2224 stoprevs = {self.rev(n) for n in stop or []}
2226 stoprevs = {self.rev(n) for n in stop or []}
2225
2227
2226 revs = dagop.headrevssubset(
2228 revs = dagop.headrevssubset(
2227 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2229 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2228 )
2230 )
2229
2231
2230 return [self.node(rev) for rev in revs]
2232 return [self.node(rev) for rev in revs]
2231
2233
2232 def children(self, node):
2234 def children(self, node):
2233 """find the children of a given node"""
2235 """find the children of a given node"""
2234 c = []
2236 c = []
2235 p = self.rev(node)
2237 p = self.rev(node)
2236 for r in self.revs(start=p + 1):
2238 for r in self.revs(start=p + 1):
2237 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2239 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2238 if prevs:
2240 if prevs:
2239 for pr in prevs:
2241 for pr in prevs:
2240 if pr == p:
2242 if pr == p:
2241 c.append(self.node(r))
2243 c.append(self.node(r))
2242 elif p == nullrev:
2244 elif p == nullrev:
2243 c.append(self.node(r))
2245 c.append(self.node(r))
2244 return c
2246 return c
2245
2247
2246 def commonancestorsheads(self, a, b):
2248 def commonancestorsheads(self, a, b):
2247 """calculate all the heads of the common ancestors of nodes a and b"""
2249 """calculate all the heads of the common ancestors of nodes a and b"""
2248 a, b = self.rev(a), self.rev(b)
2250 a, b = self.rev(a), self.rev(b)
2249 ancs = self._commonancestorsheads(a, b)
2251 ancs = self._commonancestorsheads(a, b)
2250 return pycompat.maplist(self.node, ancs)
2252 return pycompat.maplist(self.node, ancs)
2251
2253
2252 def _commonancestorsheads(self, *revs):
2254 def _commonancestorsheads(self, *revs):
2253 """calculate all the heads of the common ancestors of revs"""
2255 """calculate all the heads of the common ancestors of revs"""
2254 try:
2256 try:
2255 ancs = self.index.commonancestorsheads(*revs)
2257 ancs = self.index.commonancestorsheads(*revs)
2256 except (AttributeError, OverflowError): # C implementation failed
2258 except (AttributeError, OverflowError): # C implementation failed
2257 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2259 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2258 return ancs
2260 return ancs
2259
2261
2260 def isancestor(self, a, b):
2262 def isancestor(self, a, b):
2261 """return True if node a is an ancestor of node b
2263 """return True if node a is an ancestor of node b
2262
2264
2263 A revision is considered an ancestor of itself."""
2265 A revision is considered an ancestor of itself."""
2264 a, b = self.rev(a), self.rev(b)
2266 a, b = self.rev(a), self.rev(b)
2265 return self.isancestorrev(a, b)
2267 return self.isancestorrev(a, b)
2266
2268
2267 def isancestorrev(self, a, b):
2269 def isancestorrev(self, a, b):
2268 """return True if revision a is an ancestor of revision b
2270 """return True if revision a is an ancestor of revision b
2269
2271
2270 A revision is considered an ancestor of itself.
2272 A revision is considered an ancestor of itself.
2271
2273
2272 The implementation of this is trivial but the use of
2274 The implementation of this is trivial but the use of
2273 reachableroots is not."""
2275 reachableroots is not."""
2274 if a == nullrev:
2276 if a == nullrev:
2275 return True
2277 return True
2276 elif a == b:
2278 elif a == b:
2277 return True
2279 return True
2278 elif a > b:
2280 elif a > b:
2279 return False
2281 return False
2280 return bool(self.reachableroots(a, [b], [a], includepath=False))
2282 return bool(self.reachableroots(a, [b], [a], includepath=False))
2281
2283
2282 def reachableroots(self, minroot, heads, roots, includepath=False):
2284 def reachableroots(self, minroot, heads, roots, includepath=False):
2283 """return (heads(::(<roots> and <roots>::<heads>)))
2285 """return (heads(::(<roots> and <roots>::<heads>)))
2284
2286
2285 If includepath is True, return (<roots>::<heads>)."""
2287 If includepath is True, return (<roots>::<heads>)."""
2286 try:
2288 try:
2287 return self.index.reachableroots2(
2289 return self.index.reachableroots2(
2288 minroot, heads, roots, includepath
2290 minroot, heads, roots, includepath
2289 )
2291 )
2290 except AttributeError:
2292 except AttributeError:
2291 return dagop._reachablerootspure(
2293 return dagop._reachablerootspure(
2292 self.parentrevs, minroot, roots, heads, includepath
2294 self.parentrevs, minroot, roots, heads, includepath
2293 )
2295 )
2294
2296
2295 def ancestor(self, a, b):
2297 def ancestor(self, a, b):
2296 """calculate the "best" common ancestor of nodes a and b"""
2298 """calculate the "best" common ancestor of nodes a and b"""
2297
2299
2298 a, b = self.rev(a), self.rev(b)
2300 a, b = self.rev(a), self.rev(b)
2299 try:
2301 try:
2300 ancs = self.index.ancestors(a, b)
2302 ancs = self.index.ancestors(a, b)
2301 except (AttributeError, OverflowError):
2303 except (AttributeError, OverflowError):
2302 ancs = ancestor.ancestors(self.parentrevs, a, b)
2304 ancs = ancestor.ancestors(self.parentrevs, a, b)
2303 if ancs:
2305 if ancs:
2304 # choose a consistent winner when there's a tie
2306 # choose a consistent winner when there's a tie
2305 return min(map(self.node, ancs))
2307 return min(map(self.node, ancs))
2306 return self.nullid
2308 return self.nullid
2307
2309
2308 def _match(self, id):
2310 def _match(self, id):
2309 if isinstance(id, int):
2311 if isinstance(id, int):
2310 # rev
2312 # rev
2311 return self.node(id)
2313 return self.node(id)
2312 if len(id) == self.nodeconstants.nodelen:
2314 if len(id) == self.nodeconstants.nodelen:
2313 # possibly a binary node
2315 # possibly a binary node
2314 # odds of a binary node being all hex in ASCII are 1 in 10**25
2316 # odds of a binary node being all hex in ASCII are 1 in 10**25
2315 try:
2317 try:
2316 node = id
2318 node = id
2317 self.rev(node) # quick search the index
2319 self.rev(node) # quick search the index
2318 return node
2320 return node
2319 except error.LookupError:
2321 except error.LookupError:
2320 pass # may be partial hex id
2322 pass # may be partial hex id
2321 try:
2323 try:
2322 # str(rev)
2324 # str(rev)
2323 rev = int(id)
2325 rev = int(id)
2324 if b"%d" % rev != id:
2326 if b"%d" % rev != id:
2325 raise ValueError
2327 raise ValueError
2326 if rev < 0:
2328 if rev < 0:
2327 rev = len(self) + rev
2329 rev = len(self) + rev
2328 if rev < 0 or rev >= len(self):
2330 if rev < 0 or rev >= len(self):
2329 raise ValueError
2331 raise ValueError
2330 return self.node(rev)
2332 return self.node(rev)
2331 except (ValueError, OverflowError):
2333 except (ValueError, OverflowError):
2332 pass
2334 pass
2333 if len(id) == 2 * self.nodeconstants.nodelen:
2335 if len(id) == 2 * self.nodeconstants.nodelen:
2334 try:
2336 try:
2335 # a full hex nodeid?
2337 # a full hex nodeid?
2336 node = bin(id)
2338 node = bin(id)
2337 self.rev(node)
2339 self.rev(node)
2338 return node
2340 return node
2339 except (binascii.Error, error.LookupError):
2341 except (binascii.Error, error.LookupError):
2340 pass
2342 pass
2341
2343
2342 def _partialmatch(self, id):
2344 def _partialmatch(self, id):
2343 # we don't care wdirfilenodeids as they should be always full hash
2345 # we don't care wdirfilenodeids as they should be always full hash
2344 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2346 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2345 ambiguous = False
2347 ambiguous = False
2346 try:
2348 try:
2347 partial = self.index.partialmatch(id)
2349 partial = self.index.partialmatch(id)
2348 if partial and self.hasnode(partial):
2350 if partial and self.hasnode(partial):
2349 if maybewdir:
2351 if maybewdir:
2350 # single 'ff...' match in radix tree, ambiguous with wdir
2352 # single 'ff...' match in radix tree, ambiguous with wdir
2351 ambiguous = True
2353 ambiguous = True
2352 else:
2354 else:
2353 return partial
2355 return partial
2354 elif maybewdir:
2356 elif maybewdir:
2355 # no 'ff...' match in radix tree, wdir identified
2357 # no 'ff...' match in radix tree, wdir identified
2356 raise error.WdirUnsupported
2358 raise error.WdirUnsupported
2357 else:
2359 else:
2358 return None
2360 return None
2359 except error.RevlogError:
2361 except error.RevlogError:
2360 # parsers.c radix tree lookup gave multiple matches
2362 # parsers.c radix tree lookup gave multiple matches
2361 # fast path: for unfiltered changelog, radix tree is accurate
2363 # fast path: for unfiltered changelog, radix tree is accurate
2362 if not getattr(self, 'filteredrevs', None):
2364 if not getattr(self, 'filteredrevs', None):
2363 ambiguous = True
2365 ambiguous = True
2364 # fall through to slow path that filters hidden revisions
2366 # fall through to slow path that filters hidden revisions
2365 except (AttributeError, ValueError):
2367 except (AttributeError, ValueError):
2366 # we are pure python, or key is not hex
2368 # we are pure python, or key is not hex
2367 pass
2369 pass
2368 if ambiguous:
2370 if ambiguous:
2369 raise error.AmbiguousPrefixLookupError(
2371 raise error.AmbiguousPrefixLookupError(
2370 id, self.display_id, _(b'ambiguous identifier')
2372 id, self.display_id, _(b'ambiguous identifier')
2371 )
2373 )
2372
2374
2373 if id in self._pcache:
2375 if id in self._pcache:
2374 return self._pcache[id]
2376 return self._pcache[id]
2375
2377
2376 if len(id) <= 40:
2378 if len(id) <= 40:
2377 # hex(node)[:...]
2379 # hex(node)[:...]
2378 l = len(id) // 2 * 2 # grab an even number of digits
2380 l = len(id) // 2 * 2 # grab an even number of digits
2379 try:
2381 try:
2380 # we're dropping the last digit, so let's check that it's hex,
2382 # we're dropping the last digit, so let's check that it's hex,
2381 # to avoid the expensive computation below if it's not
2383 # to avoid the expensive computation below if it's not
2382 if len(id) % 2 > 0:
2384 if len(id) % 2 > 0:
2383 if not (id[-1] in hexdigits):
2385 if not (id[-1] in hexdigits):
2384 return None
2386 return None
2385 prefix = bin(id[:l])
2387 prefix = bin(id[:l])
2386 except binascii.Error:
2388 except binascii.Error:
2387 pass
2389 pass
2388 else:
2390 else:
2389 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2391 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2390 nl = [
2392 nl = [
2391 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2393 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2392 ]
2394 ]
2393 if self.nodeconstants.nullhex.startswith(id):
2395 if self.nodeconstants.nullhex.startswith(id):
2394 nl.append(self.nullid)
2396 nl.append(self.nullid)
2395 if len(nl) > 0:
2397 if len(nl) > 0:
2396 if len(nl) == 1 and not maybewdir:
2398 if len(nl) == 1 and not maybewdir:
2397 self._pcache[id] = nl[0]
2399 self._pcache[id] = nl[0]
2398 return nl[0]
2400 return nl[0]
2399 raise error.AmbiguousPrefixLookupError(
2401 raise error.AmbiguousPrefixLookupError(
2400 id, self.display_id, _(b'ambiguous identifier')
2402 id, self.display_id, _(b'ambiguous identifier')
2401 )
2403 )
2402 if maybewdir:
2404 if maybewdir:
2403 raise error.WdirUnsupported
2405 raise error.WdirUnsupported
2404 return None
2406 return None
2405
2407
2406 def lookup(self, id):
2408 def lookup(self, id):
2407 """locate a node based on:
2409 """locate a node based on:
2408 - revision number or str(revision number)
2410 - revision number or str(revision number)
2409 - nodeid or subset of hex nodeid
2411 - nodeid or subset of hex nodeid
2410 """
2412 """
2411 n = self._match(id)
2413 n = self._match(id)
2412 if n is not None:
2414 if n is not None:
2413 return n
2415 return n
2414 n = self._partialmatch(id)
2416 n = self._partialmatch(id)
2415 if n:
2417 if n:
2416 return n
2418 return n
2417
2419
2418 raise error.LookupError(id, self.display_id, _(b'no match found'))
2420 raise error.LookupError(id, self.display_id, _(b'no match found'))
2419
2421
2420 def shortest(self, node, minlength=1):
2422 def shortest(self, node, minlength=1):
2421 """Find the shortest unambiguous prefix that matches node."""
2423 """Find the shortest unambiguous prefix that matches node."""
2422
2424
2423 def isvalid(prefix):
2425 def isvalid(prefix):
2424 try:
2426 try:
2425 matchednode = self._partialmatch(prefix)
2427 matchednode = self._partialmatch(prefix)
2426 except error.AmbiguousPrefixLookupError:
2428 except error.AmbiguousPrefixLookupError:
2427 return False
2429 return False
2428 except error.WdirUnsupported:
2430 except error.WdirUnsupported:
2429 # single 'ff...' match
2431 # single 'ff...' match
2430 return True
2432 return True
2431 if matchednode is None:
2433 if matchednode is None:
2432 raise error.LookupError(node, self.display_id, _(b'no node'))
2434 raise error.LookupError(node, self.display_id, _(b'no node'))
2433 return True
2435 return True
2434
2436
2435 def maybewdir(prefix):
2437 def maybewdir(prefix):
2436 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2438 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2437
2439
2438 hexnode = hex(node)
2440 hexnode = hex(node)
2439
2441
2440 def disambiguate(hexnode, minlength):
2442 def disambiguate(hexnode, minlength):
2441 """Disambiguate against wdirid."""
2443 """Disambiguate against wdirid."""
2442 for length in range(minlength, len(hexnode) + 1):
2444 for length in range(minlength, len(hexnode) + 1):
2443 prefix = hexnode[:length]
2445 prefix = hexnode[:length]
2444 if not maybewdir(prefix):
2446 if not maybewdir(prefix):
2445 return prefix
2447 return prefix
2446
2448
2447 if not getattr(self, 'filteredrevs', None):
2449 if not getattr(self, 'filteredrevs', None):
2448 try:
2450 try:
2449 length = max(self.index.shortest(node), minlength)
2451 length = max(self.index.shortest(node), minlength)
2450 return disambiguate(hexnode, length)
2452 return disambiguate(hexnode, length)
2451 except error.RevlogError:
2453 except error.RevlogError:
2452 if node != self.nodeconstants.wdirid:
2454 if node != self.nodeconstants.wdirid:
2453 raise error.LookupError(
2455 raise error.LookupError(
2454 node, self.display_id, _(b'no node')
2456 node, self.display_id, _(b'no node')
2455 )
2457 )
2456 except AttributeError:
2458 except AttributeError:
2457 # Fall through to pure code
2459 # Fall through to pure code
2458 pass
2460 pass
2459
2461
2460 if node == self.nodeconstants.wdirid:
2462 if node == self.nodeconstants.wdirid:
2461 for length in range(minlength, len(hexnode) + 1):
2463 for length in range(minlength, len(hexnode) + 1):
2462 prefix = hexnode[:length]
2464 prefix = hexnode[:length]
2463 if isvalid(prefix):
2465 if isvalid(prefix):
2464 return prefix
2466 return prefix
2465
2467
2466 for length in range(minlength, len(hexnode) + 1):
2468 for length in range(minlength, len(hexnode) + 1):
2467 prefix = hexnode[:length]
2469 prefix = hexnode[:length]
2468 if isvalid(prefix):
2470 if isvalid(prefix):
2469 return disambiguate(hexnode, length)
2471 return disambiguate(hexnode, length)
2470
2472
2471 def cmp(self, node, text):
2473 def cmp(self, node, text):
2472 """compare text with a given file revision
2474 """compare text with a given file revision
2473
2475
2474 returns True if text is different than what is stored.
2476 returns True if text is different than what is stored.
2475 """
2477 """
2476 p1, p2 = self.parents(node)
2478 p1, p2 = self.parents(node)
2477 return storageutil.hashrevisionsha1(text, p1, p2) != node
2479 return storageutil.hashrevisionsha1(text, p1, p2) != node
2478
2480
2479 def deltaparent(self, rev):
2481 def deltaparent(self, rev):
2480 """return deltaparent of the given revision"""
2482 """return deltaparent of the given revision"""
2481 base = self.index[rev][3]
2483 base = self.index[rev][3]
2482 if base == rev:
2484 if base == rev:
2483 return nullrev
2485 return nullrev
2484 elif self.delta_config.general_delta:
2486 elif self.delta_config.general_delta:
2485 return base
2487 return base
2486 else:
2488 else:
2487 return rev - 1
2489 return rev - 1
2488
2490
2489 def issnapshot(self, rev):
2491 def issnapshot(self, rev):
2490 """tells whether rev is a snapshot"""
2492 """tells whether rev is a snapshot"""
2491 ret = self._inner.issnapshot(rev)
2493 ret = self._inner.issnapshot(rev)
2492 self.issnapshot = self._inner.issnapshot
2494 self.issnapshot = self._inner.issnapshot
2493 return ret
2495 return ret
2494
2496
2495 def snapshotdepth(self, rev):
2497 def snapshotdepth(self, rev):
2496 """number of snapshot in the chain before this one"""
2498 """number of snapshot in the chain before this one"""
2497 if not self.issnapshot(rev):
2499 if not self.issnapshot(rev):
2498 raise error.ProgrammingError(b'revision %d not a snapshot')
2500 raise error.ProgrammingError(b'revision %d not a snapshot')
2499 return len(self._deltachain(rev)[0]) - 1
2501 return len(self._inner._deltachain(rev)[0]) - 1
2500
2502
2501 def revdiff(self, rev1, rev2):
2503 def revdiff(self, rev1, rev2):
2502 """return or calculate a delta between two revisions
2504 """return or calculate a delta between two revisions
2503
2505
2504 The delta calculated is in binary form and is intended to be written to
2506 The delta calculated is in binary form and is intended to be written to
2505 revlog data directly. So this function needs raw revision data.
2507 revlog data directly. So this function needs raw revision data.
2506 """
2508 """
2507 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2509 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2508 return bytes(self._inner._chunk(rev2))
2510 return bytes(self._inner._chunk(rev2))
2509
2511
2510 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2512 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2511
2513
2512 def revision(self, nodeorrev):
2514 def revision(self, nodeorrev):
2513 """return an uncompressed revision of a given node or revision
2515 """return an uncompressed revision of a given node or revision
2514 number.
2516 number.
2515 """
2517 """
2516 return self._revisiondata(nodeorrev)
2518 return self._revisiondata(nodeorrev)
2517
2519
2518 def sidedata(self, nodeorrev):
2520 def sidedata(self, nodeorrev):
2519 """a map of extra data related to the changeset but not part of the hash
2521 """a map of extra data related to the changeset but not part of the hash
2520
2522
2521 This function currently return a dictionary. However, more advanced
2523 This function currently return a dictionary. However, more advanced
2522 mapping object will likely be used in the future for a more
2524 mapping object will likely be used in the future for a more
2523 efficient/lazy code.
2525 efficient/lazy code.
2524 """
2526 """
2525 # deal with <nodeorrev> argument type
2527 # deal with <nodeorrev> argument type
2526 if isinstance(nodeorrev, int):
2528 if isinstance(nodeorrev, int):
2527 rev = nodeorrev
2529 rev = nodeorrev
2528 else:
2530 else:
2529 rev = self.rev(nodeorrev)
2531 rev = self.rev(nodeorrev)
2530 return self._sidedata(rev)
2532 return self._sidedata(rev)
2531
2533
2532 def _revisiondata(self, nodeorrev, raw=False):
2534 def _revisiondata(self, nodeorrev, raw=False):
2533 # deal with <nodeorrev> argument type
2535 # deal with <nodeorrev> argument type
2534 if isinstance(nodeorrev, int):
2536 if isinstance(nodeorrev, int):
2535 rev = nodeorrev
2537 rev = nodeorrev
2536 node = self.node(rev)
2538 node = self.node(rev)
2537 else:
2539 else:
2538 node = nodeorrev
2540 node = nodeorrev
2539 rev = None
2541 rev = None
2540
2542
2541 # fast path the special `nullid` rev
2543 # fast path the special `nullid` rev
2542 if node == self.nullid:
2544 if node == self.nullid:
2543 return b""
2545 return b""
2544
2546
2545 # ``rawtext`` is the text as stored inside the revlog. Might be the
2547 # ``rawtext`` is the text as stored inside the revlog. Might be the
2546 # revision or might need to be processed to retrieve the revision.
2548 # revision or might need to be processed to retrieve the revision.
2547 rev, rawtext, validated = self._rawtext(node, rev)
2549 rev, rawtext, validated = self._rawtext(node, rev)
2548
2550
2549 if raw and validated:
2551 if raw and validated:
2550 # if we don't want to process the raw text and that raw
2552 # if we don't want to process the raw text and that raw
2551 # text is cached, we can exit early.
2553 # text is cached, we can exit early.
2552 return rawtext
2554 return rawtext
2553 if rev is None:
2555 if rev is None:
2554 rev = self.rev(node)
2556 rev = self.rev(node)
2555 # the revlog's flag for this revision
2557 # the revlog's flag for this revision
2556 # (usually alter its state or content)
2558 # (usually alter its state or content)
2557 flags = self.flags(rev)
2559 flags = self.flags(rev)
2558
2560
2559 if validated and flags == REVIDX_DEFAULT_FLAGS:
2561 if validated and flags == REVIDX_DEFAULT_FLAGS:
2560 # no extra flags set, no flag processor runs, text = rawtext
2562 # no extra flags set, no flag processor runs, text = rawtext
2561 return rawtext
2563 return rawtext
2562
2564
2563 if raw:
2565 if raw:
2564 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2566 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2565 text = rawtext
2567 text = rawtext
2566 else:
2568 else:
2567 r = flagutil.processflagsread(self, rawtext, flags)
2569 r = flagutil.processflagsread(self, rawtext, flags)
2568 text, validatehash = r
2570 text, validatehash = r
2569 if validatehash:
2571 if validatehash:
2570 self.checkhash(text, node, rev=rev)
2572 self.checkhash(text, node, rev=rev)
2571 if not validated:
2573 if not validated:
2572 self._revisioncache = (node, rev, rawtext)
2574 self._revisioncache = (node, rev, rawtext)
2573
2575
2574 return text
2576 return text
2575
2577
2576 def _rawtext(self, node, rev):
2578 def _rawtext(self, node, rev):
2577 """return the possibly unvalidated rawtext for a revision
2579 """return the possibly unvalidated rawtext for a revision
2578
2580
2579 returns (rev, rawtext, validated)
2581 returns (rev, rawtext, validated)
2580 """
2582 """
2581
2583
2582 # revision in the cache (could be useful to apply delta)
2584 # revision in the cache (could be useful to apply delta)
2583 cachedrev = None
2585 cachedrev = None
2584 # An intermediate text to apply deltas to
2586 # An intermediate text to apply deltas to
2585 basetext = None
2587 basetext = None
2586
2588
2587 # Check if we have the entry in cache
2589 # Check if we have the entry in cache
2588 # The cache entry looks like (node, rev, rawtext)
2590 # The cache entry looks like (node, rev, rawtext)
2589 if self._revisioncache:
2591 if self._revisioncache:
2590 if self._revisioncache[0] == node:
2592 if self._revisioncache[0] == node:
2591 return (rev, self._revisioncache[2], True)
2593 return (rev, self._revisioncache[2], True)
2592 cachedrev = self._revisioncache[1]
2594 cachedrev = self._revisioncache[1]
2593
2595
2594 if rev is None:
2596 if rev is None:
2595 rev = self.rev(node)
2597 rev = self.rev(node)
2596
2598
2597 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2599 chain, stopped = self._inner._deltachain(rev, stoprev=cachedrev)
2598 if stopped:
2600 if stopped:
2599 basetext = self._revisioncache[2]
2601 basetext = self._revisioncache[2]
2600
2602
2601 # drop cache to save memory, the caller is expected to
2603 # drop cache to save memory, the caller is expected to
2602 # update self._revisioncache after validating the text
2604 # update self._revisioncache after validating the text
2603 self._revisioncache = None
2605 self._revisioncache = None
2604
2606
2605 targetsize = None
2607 targetsize = None
2606 rawsize = self.index[rev][2]
2608 rawsize = self.index[rev][2]
2607 if 0 <= rawsize:
2609 if 0 <= rawsize:
2608 targetsize = 4 * rawsize
2610 targetsize = 4 * rawsize
2609
2611
2610 bins = self._inner._chunks(chain, targetsize=targetsize)
2612 bins = self._inner._chunks(chain, targetsize=targetsize)
2611 if basetext is None:
2613 if basetext is None:
2612 basetext = bytes(bins[0])
2614 basetext = bytes(bins[0])
2613 bins = bins[1:]
2615 bins = bins[1:]
2614
2616
2615 rawtext = mdiff.patches(basetext, bins)
2617 rawtext = mdiff.patches(basetext, bins)
2616 del basetext # let us have a chance to free memory early
2618 del basetext # let us have a chance to free memory early
2617 return (rev, rawtext, False)
2619 return (rev, rawtext, False)
2618
2620
2619 def _sidedata(self, rev):
2621 def _sidedata(self, rev):
2620 """Return the sidedata for a given revision number."""
2622 """Return the sidedata for a given revision number."""
2621 index_entry = self.index[rev]
2623 index_entry = self.index[rev]
2622 sidedata_offset = index_entry[8]
2624 sidedata_offset = index_entry[8]
2623 sidedata_size = index_entry[9]
2625 sidedata_size = index_entry[9]
2624
2626
2625 if self._inline:
2627 if self._inline:
2626 sidedata_offset += self.index.entry_size * (1 + rev)
2628 sidedata_offset += self.index.entry_size * (1 + rev)
2627 if sidedata_size == 0:
2629 if sidedata_size == 0:
2628 return {}
2630 return {}
2629
2631
2630 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2632 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2631 filename = self._sidedatafile
2633 filename = self._sidedatafile
2632 end = self._docket.sidedata_end
2634 end = self._docket.sidedata_end
2633 offset = sidedata_offset
2635 offset = sidedata_offset
2634 length = sidedata_size
2636 length = sidedata_size
2635 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2637 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2636 raise error.RevlogError(m)
2638 raise error.RevlogError(m)
2637
2639
2638 comp_segment = self._inner._segmentfile_sidedata.read_chunk(
2640 comp_segment = self._inner._segmentfile_sidedata.read_chunk(
2639 sidedata_offset, sidedata_size
2641 sidedata_offset, sidedata_size
2640 )
2642 )
2641
2643
2642 comp = self.index[rev][11]
2644 comp = self.index[rev][11]
2643 if comp == COMP_MODE_PLAIN:
2645 if comp == COMP_MODE_PLAIN:
2644 segment = comp_segment
2646 segment = comp_segment
2645 elif comp == COMP_MODE_DEFAULT:
2647 elif comp == COMP_MODE_DEFAULT:
2646 segment = self._inner._decompressor(comp_segment)
2648 segment = self._inner._decompressor(comp_segment)
2647 elif comp == COMP_MODE_INLINE:
2649 elif comp == COMP_MODE_INLINE:
2648 segment = self._inner.decompress(comp_segment)
2650 segment = self._inner.decompress(comp_segment)
2649 else:
2651 else:
2650 msg = b'unknown compression mode %d'
2652 msg = b'unknown compression mode %d'
2651 msg %= comp
2653 msg %= comp
2652 raise error.RevlogError(msg)
2654 raise error.RevlogError(msg)
2653
2655
2654 sidedata = sidedatautil.deserialize_sidedata(segment)
2656 sidedata = sidedatautil.deserialize_sidedata(segment)
2655 return sidedata
2657 return sidedata
2656
2658
2657 def rawdata(self, nodeorrev):
2659 def rawdata(self, nodeorrev):
2658 """return an uncompressed raw data of a given node or revision number."""
2660 """return an uncompressed raw data of a given node or revision number."""
2659 return self._revisiondata(nodeorrev, raw=True)
2661 return self._revisiondata(nodeorrev, raw=True)
2660
2662
2661 def hash(self, text, p1, p2):
2663 def hash(self, text, p1, p2):
2662 """Compute a node hash.
2664 """Compute a node hash.
2663
2665
2664 Available as a function so that subclasses can replace the hash
2666 Available as a function so that subclasses can replace the hash
2665 as needed.
2667 as needed.
2666 """
2668 """
2667 return storageutil.hashrevisionsha1(text, p1, p2)
2669 return storageutil.hashrevisionsha1(text, p1, p2)
2668
2670
2669 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2671 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2670 """Check node hash integrity.
2672 """Check node hash integrity.
2671
2673
2672 Available as a function so that subclasses can extend hash mismatch
2674 Available as a function so that subclasses can extend hash mismatch
2673 behaviors as needed.
2675 behaviors as needed.
2674 """
2676 """
2675 try:
2677 try:
2676 if p1 is None and p2 is None:
2678 if p1 is None and p2 is None:
2677 p1, p2 = self.parents(node)
2679 p1, p2 = self.parents(node)
2678 if node != self.hash(text, p1, p2):
2680 if node != self.hash(text, p1, p2):
2679 # Clear the revision cache on hash failure. The revision cache
2681 # Clear the revision cache on hash failure. The revision cache
2680 # only stores the raw revision and clearing the cache does have
2682 # only stores the raw revision and clearing the cache does have
2681 # the side-effect that we won't have a cache hit when the raw
2683 # the side-effect that we won't have a cache hit when the raw
2682 # revision data is accessed. But this case should be rare and
2684 # revision data is accessed. But this case should be rare and
2683 # it is extra work to teach the cache about the hash
2685 # it is extra work to teach the cache about the hash
2684 # verification state.
2686 # verification state.
2685 if self._revisioncache and self._revisioncache[0] == node:
2687 if self._revisioncache and self._revisioncache[0] == node:
2686 self._revisioncache = None
2688 self._revisioncache = None
2687
2689
2688 revornode = rev
2690 revornode = rev
2689 if revornode is None:
2691 if revornode is None:
2690 revornode = templatefilters.short(hex(node))
2692 revornode = templatefilters.short(hex(node))
2691 raise error.RevlogError(
2693 raise error.RevlogError(
2692 _(b"integrity check failed on %s:%s")
2694 _(b"integrity check failed on %s:%s")
2693 % (self.display_id, pycompat.bytestr(revornode))
2695 % (self.display_id, pycompat.bytestr(revornode))
2694 )
2696 )
2695 except error.RevlogError:
2697 except error.RevlogError:
2696 if self.feature_config.censorable and storageutil.iscensoredtext(
2698 if self.feature_config.censorable and storageutil.iscensoredtext(
2697 text
2699 text
2698 ):
2700 ):
2699 raise error.CensoredNodeError(self.display_id, node, text)
2701 raise error.CensoredNodeError(self.display_id, node, text)
2700 raise
2702 raise
2701
2703
2702 @property
2704 @property
2703 def _split_index_file(self):
2705 def _split_index_file(self):
2704 """the path where to expect the index of an ongoing splitting operation
2706 """the path where to expect the index of an ongoing splitting operation
2705
2707
2706 The file will only exist if a splitting operation is in progress, but
2708 The file will only exist if a splitting operation is in progress, but
2707 it is always expected at the same location."""
2709 it is always expected at the same location."""
2708 parts = self.radix.split(b'/')
2710 parts = self.radix.split(b'/')
2709 if len(parts) > 1:
2711 if len(parts) > 1:
2710 # adds a '-s' prefix to the ``data/` or `meta/` base
2712 # adds a '-s' prefix to the ``data/` or `meta/` base
2711 head = parts[0] + b'-s'
2713 head = parts[0] + b'-s'
2712 mids = parts[1:-1]
2714 mids = parts[1:-1]
2713 tail = parts[-1] + b'.i'
2715 tail = parts[-1] + b'.i'
2714 pieces = [head] + mids + [tail]
2716 pieces = [head] + mids + [tail]
2715 return b'/'.join(pieces)
2717 return b'/'.join(pieces)
2716 else:
2718 else:
2717 # the revlog is stored at the root of the store (changelog or
2719 # the revlog is stored at the root of the store (changelog or
2718 # manifest), no risk of collision.
2720 # manifest), no risk of collision.
2719 return self.radix + b'.i.s'
2721 return self.radix + b'.i.s'
2720
2722
2721 def _enforceinlinesize(self, tr, side_write=True):
2723 def _enforceinlinesize(self, tr, side_write=True):
2722 """Check if the revlog is too big for inline and convert if so.
2724 """Check if the revlog is too big for inline and convert if so.
2723
2725
2724 This should be called after revisions are added to the revlog. If the
2726 This should be called after revisions are added to the revlog. If the
2725 revlog has grown too large to be an inline revlog, it will convert it
2727 revlog has grown too large to be an inline revlog, it will convert it
2726 to use multiple index and data files.
2728 to use multiple index and data files.
2727 """
2729 """
2728 tiprev = len(self) - 1
2730 tiprev = len(self) - 1
2729 total_size = self.start(tiprev) + self.length(tiprev)
2731 total_size = self.start(tiprev) + self.length(tiprev)
2730 if not self._inline or total_size < _maxinline:
2732 if not self._inline or total_size < _maxinline:
2731 return
2733 return
2732
2734
2733 if self._docket is not None:
2735 if self._docket is not None:
2734 msg = b"inline revlog should not have a docket"
2736 msg = b"inline revlog should not have a docket"
2735 raise error.ProgrammingError(msg)
2737 raise error.ProgrammingError(msg)
2736
2738
2737 troffset = tr.findoffset(self._indexfile)
2739 troffset = tr.findoffset(self._indexfile)
2738 if troffset is None:
2740 if troffset is None:
2739 raise error.RevlogError(
2741 raise error.RevlogError(
2740 _(b"%s not found in the transaction") % self._indexfile
2742 _(b"%s not found in the transaction") % self._indexfile
2741 )
2743 )
2742 if troffset:
2744 if troffset:
2743 tr.addbackup(self._indexfile, for_offset=True)
2745 tr.addbackup(self._indexfile, for_offset=True)
2744 tr.add(self._datafile, 0)
2746 tr.add(self._datafile, 0)
2745
2747
2746 new_index_file_path = None
2748 new_index_file_path = None
2747 if side_write:
2749 if side_write:
2748 old_index_file_path = self._indexfile
2750 old_index_file_path = self._indexfile
2749 new_index_file_path = self._split_index_file
2751 new_index_file_path = self._split_index_file
2750 opener = self.opener
2752 opener = self.opener
2751 weak_self = weakref.ref(self)
2753 weak_self = weakref.ref(self)
2752
2754
2753 # the "split" index replace the real index when the transaction is
2755 # the "split" index replace the real index when the transaction is
2754 # finalized
2756 # finalized
2755 def finalize_callback(tr):
2757 def finalize_callback(tr):
2756 opener.rename(
2758 opener.rename(
2757 new_index_file_path,
2759 new_index_file_path,
2758 old_index_file_path,
2760 old_index_file_path,
2759 checkambig=True,
2761 checkambig=True,
2760 )
2762 )
2761 maybe_self = weak_self()
2763 maybe_self = weak_self()
2762 if maybe_self is not None:
2764 if maybe_self is not None:
2763 maybe_self._indexfile = old_index_file_path
2765 maybe_self._indexfile = old_index_file_path
2764 maybe_self._inner.index_file = maybe_self._indexfile
2766 maybe_self._inner.index_file = maybe_self._indexfile
2765
2767
2766 def abort_callback(tr):
2768 def abort_callback(tr):
2767 maybe_self = weak_self()
2769 maybe_self = weak_self()
2768 if maybe_self is not None:
2770 if maybe_self is not None:
2769 maybe_self._indexfile = old_index_file_path
2771 maybe_self._indexfile = old_index_file_path
2770 maybe_self._inner.inline = True
2772 maybe_self._inner.inline = True
2771 maybe_self._inner.index_file = old_index_file_path
2773 maybe_self._inner.index_file = old_index_file_path
2772
2774
2773 tr.registertmp(new_index_file_path)
2775 tr.registertmp(new_index_file_path)
2774 if self.target[1] is not None:
2776 if self.target[1] is not None:
2775 callback_id = b'000-revlog-split-%d-%s' % self.target
2777 callback_id = b'000-revlog-split-%d-%s' % self.target
2776 else:
2778 else:
2777 callback_id = b'000-revlog-split-%d' % self.target[0]
2779 callback_id = b'000-revlog-split-%d' % self.target[0]
2778 tr.addfinalize(callback_id, finalize_callback)
2780 tr.addfinalize(callback_id, finalize_callback)
2779 tr.addabort(callback_id, abort_callback)
2781 tr.addabort(callback_id, abort_callback)
2780
2782
2781 self._format_flags &= ~FLAG_INLINE_DATA
2783 self._format_flags &= ~FLAG_INLINE_DATA
2782 self._inner.split_inline(
2784 self._inner.split_inline(
2783 tr,
2785 tr,
2784 self._format_flags | self._format_version,
2786 self._format_flags | self._format_version,
2785 new_index_file_path=new_index_file_path,
2787 new_index_file_path=new_index_file_path,
2786 )
2788 )
2787
2789
2788 self._inline = False
2790 self._inline = False
2789 if new_index_file_path is not None:
2791 if new_index_file_path is not None:
2790 self._indexfile = new_index_file_path
2792 self._indexfile = new_index_file_path
2791
2793
2792 nodemaputil.setup_persistent_nodemap(tr, self)
2794 nodemaputil.setup_persistent_nodemap(tr, self)
2793
2795
2794 def _nodeduplicatecallback(self, transaction, node):
2796 def _nodeduplicatecallback(self, transaction, node):
2795 """called when trying to add a node already stored."""
2797 """called when trying to add a node already stored."""
2796
2798
2797 @contextlib.contextmanager
2799 @contextlib.contextmanager
2798 def reading(self):
2800 def reading(self):
2799 with self._inner.reading():
2801 with self._inner.reading():
2800 yield
2802 yield
2801
2803
2802 @contextlib.contextmanager
2804 @contextlib.contextmanager
2803 def _writing(self, transaction):
2805 def _writing(self, transaction):
2804 if self._trypending:
2806 if self._trypending:
2805 msg = b'try to write in a `trypending` revlog: %s'
2807 msg = b'try to write in a `trypending` revlog: %s'
2806 msg %= self.display_id
2808 msg %= self.display_id
2807 raise error.ProgrammingError(msg)
2809 raise error.ProgrammingError(msg)
2808 if self._inner.is_writing:
2810 if self._inner.is_writing:
2809 yield
2811 yield
2810 else:
2812 else:
2811 data_end = None
2813 data_end = None
2812 sidedata_end = None
2814 sidedata_end = None
2813 if self._docket is not None:
2815 if self._docket is not None:
2814 data_end = self._docket.data_end
2816 data_end = self._docket.data_end
2815 sidedata_end = self._docket.sidedata_end
2817 sidedata_end = self._docket.sidedata_end
2816 with self._inner.writing(
2818 with self._inner.writing(
2817 transaction,
2819 transaction,
2818 data_end=data_end,
2820 data_end=data_end,
2819 sidedata_end=sidedata_end,
2821 sidedata_end=sidedata_end,
2820 ):
2822 ):
2821 yield
2823 yield
2822 if self._docket is not None:
2824 if self._docket is not None:
2823 self._write_docket(transaction)
2825 self._write_docket(transaction)
2824
2826
2825 def _write_docket(self, transaction):
2827 def _write_docket(self, transaction):
2826 """write the current docket on disk
2828 """write the current docket on disk
2827
2829
2828 Exist as a method to help changelog to implement transaction logic
2830 Exist as a method to help changelog to implement transaction logic
2829
2831
2830 We could also imagine using the same transaction logic for all revlog
2832 We could also imagine using the same transaction logic for all revlog
2831 since docket are cheap."""
2833 since docket are cheap."""
2832 self._docket.write(transaction)
2834 self._docket.write(transaction)
2833
2835
2834 def addrevision(
2836 def addrevision(
2835 self,
2837 self,
2836 text,
2838 text,
2837 transaction,
2839 transaction,
2838 link,
2840 link,
2839 p1,
2841 p1,
2840 p2,
2842 p2,
2841 cachedelta=None,
2843 cachedelta=None,
2842 node=None,
2844 node=None,
2843 flags=REVIDX_DEFAULT_FLAGS,
2845 flags=REVIDX_DEFAULT_FLAGS,
2844 deltacomputer=None,
2846 deltacomputer=None,
2845 sidedata=None,
2847 sidedata=None,
2846 ):
2848 ):
2847 """add a revision to the log
2849 """add a revision to the log
2848
2850
2849 text - the revision data to add
2851 text - the revision data to add
2850 transaction - the transaction object used for rollback
2852 transaction - the transaction object used for rollback
2851 link - the linkrev data to add
2853 link - the linkrev data to add
2852 p1, p2 - the parent nodeids of the revision
2854 p1, p2 - the parent nodeids of the revision
2853 cachedelta - an optional precomputed delta
2855 cachedelta - an optional precomputed delta
2854 node - nodeid of revision; typically node is not specified, and it is
2856 node - nodeid of revision; typically node is not specified, and it is
2855 computed by default as hash(text, p1, p2), however subclasses might
2857 computed by default as hash(text, p1, p2), however subclasses might
2856 use different hashing method (and override checkhash() in such case)
2858 use different hashing method (and override checkhash() in such case)
2857 flags - the known flags to set on the revision
2859 flags - the known flags to set on the revision
2858 deltacomputer - an optional deltacomputer instance shared between
2860 deltacomputer - an optional deltacomputer instance shared between
2859 multiple calls
2861 multiple calls
2860 """
2862 """
2861 if link == nullrev:
2863 if link == nullrev:
2862 raise error.RevlogError(
2864 raise error.RevlogError(
2863 _(b"attempted to add linkrev -1 to %s") % self.display_id
2865 _(b"attempted to add linkrev -1 to %s") % self.display_id
2864 )
2866 )
2865
2867
2866 if sidedata is None:
2868 if sidedata is None:
2867 sidedata = {}
2869 sidedata = {}
2868 elif sidedata and not self.feature_config.has_side_data:
2870 elif sidedata and not self.feature_config.has_side_data:
2869 raise error.ProgrammingError(
2871 raise error.ProgrammingError(
2870 _(b"trying to add sidedata to a revlog who don't support them")
2872 _(b"trying to add sidedata to a revlog who don't support them")
2871 )
2873 )
2872
2874
2873 if flags:
2875 if flags:
2874 node = node or self.hash(text, p1, p2)
2876 node = node or self.hash(text, p1, p2)
2875
2877
2876 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2878 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2877
2879
2878 # If the flag processor modifies the revision data, ignore any provided
2880 # If the flag processor modifies the revision data, ignore any provided
2879 # cachedelta.
2881 # cachedelta.
2880 if rawtext != text:
2882 if rawtext != text:
2881 cachedelta = None
2883 cachedelta = None
2882
2884
2883 if len(rawtext) > _maxentrysize:
2885 if len(rawtext) > _maxentrysize:
2884 raise error.RevlogError(
2886 raise error.RevlogError(
2885 _(
2887 _(
2886 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2888 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2887 )
2889 )
2888 % (self.display_id, len(rawtext))
2890 % (self.display_id, len(rawtext))
2889 )
2891 )
2890
2892
2891 node = node or self.hash(rawtext, p1, p2)
2893 node = node or self.hash(rawtext, p1, p2)
2892 rev = self.index.get_rev(node)
2894 rev = self.index.get_rev(node)
2893 if rev is not None:
2895 if rev is not None:
2894 return rev
2896 return rev
2895
2897
2896 if validatehash:
2898 if validatehash:
2897 self.checkhash(rawtext, node, p1=p1, p2=p2)
2899 self.checkhash(rawtext, node, p1=p1, p2=p2)
2898
2900
2899 return self.addrawrevision(
2901 return self.addrawrevision(
2900 rawtext,
2902 rawtext,
2901 transaction,
2903 transaction,
2902 link,
2904 link,
2903 p1,
2905 p1,
2904 p2,
2906 p2,
2905 node,
2907 node,
2906 flags,
2908 flags,
2907 cachedelta=cachedelta,
2909 cachedelta=cachedelta,
2908 deltacomputer=deltacomputer,
2910 deltacomputer=deltacomputer,
2909 sidedata=sidedata,
2911 sidedata=sidedata,
2910 )
2912 )
2911
2913
2912 def addrawrevision(
2914 def addrawrevision(
2913 self,
2915 self,
2914 rawtext,
2916 rawtext,
2915 transaction,
2917 transaction,
2916 link,
2918 link,
2917 p1,
2919 p1,
2918 p2,
2920 p2,
2919 node,
2921 node,
2920 flags,
2922 flags,
2921 cachedelta=None,
2923 cachedelta=None,
2922 deltacomputer=None,
2924 deltacomputer=None,
2923 sidedata=None,
2925 sidedata=None,
2924 ):
2926 ):
2925 """add a raw revision with known flags, node and parents
2927 """add a raw revision with known flags, node and parents
2926 useful when reusing a revision not stored in this revlog (ex: received
2928 useful when reusing a revision not stored in this revlog (ex: received
2927 over wire, or read from an external bundle).
2929 over wire, or read from an external bundle).
2928 """
2930 """
2929 with self._writing(transaction):
2931 with self._writing(transaction):
2930 return self._addrevision(
2932 return self._addrevision(
2931 node,
2933 node,
2932 rawtext,
2934 rawtext,
2933 transaction,
2935 transaction,
2934 link,
2936 link,
2935 p1,
2937 p1,
2936 p2,
2938 p2,
2937 flags,
2939 flags,
2938 cachedelta,
2940 cachedelta,
2939 deltacomputer=deltacomputer,
2941 deltacomputer=deltacomputer,
2940 sidedata=sidedata,
2942 sidedata=sidedata,
2941 )
2943 )
2942
2944
2943 def compress(self, data):
2945 def compress(self, data):
2944 return self._inner.compress(data)
2946 return self._inner.compress(data)
2945
2947
2946 def decompress(self, data):
2948 def decompress(self, data):
2947 return self._inner.decompress(data)
2949 return self._inner.decompress(data)
2948
2950
2949 def _addrevision(
2951 def _addrevision(
2950 self,
2952 self,
2951 node,
2953 node,
2952 rawtext,
2954 rawtext,
2953 transaction,
2955 transaction,
2954 link,
2956 link,
2955 p1,
2957 p1,
2956 p2,
2958 p2,
2957 flags,
2959 flags,
2958 cachedelta,
2960 cachedelta,
2959 alwayscache=False,
2961 alwayscache=False,
2960 deltacomputer=None,
2962 deltacomputer=None,
2961 sidedata=None,
2963 sidedata=None,
2962 ):
2964 ):
2963 """internal function to add revisions to the log
2965 """internal function to add revisions to the log
2964
2966
2965 see addrevision for argument descriptions.
2967 see addrevision for argument descriptions.
2966
2968
2967 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2969 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2968
2970
2969 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2971 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2970 be used.
2972 be used.
2971
2973
2972 invariants:
2974 invariants:
2973 - rawtext is optional (can be None); if not set, cachedelta must be set.
2975 - rawtext is optional (can be None); if not set, cachedelta must be set.
2974 if both are set, they must correspond to each other.
2976 if both are set, they must correspond to each other.
2975 """
2977 """
2976 if node == self.nullid:
2978 if node == self.nullid:
2977 raise error.RevlogError(
2979 raise error.RevlogError(
2978 _(b"%s: attempt to add null revision") % self.display_id
2980 _(b"%s: attempt to add null revision") % self.display_id
2979 )
2981 )
2980 if (
2982 if (
2981 node == self.nodeconstants.wdirid
2983 node == self.nodeconstants.wdirid
2982 or node in self.nodeconstants.wdirfilenodeids
2984 or node in self.nodeconstants.wdirfilenodeids
2983 ):
2985 ):
2984 raise error.RevlogError(
2986 raise error.RevlogError(
2985 _(b"%s: attempt to add wdir revision") % self.display_id
2987 _(b"%s: attempt to add wdir revision") % self.display_id
2986 )
2988 )
2987 if self._inner._writinghandles is None:
2989 if self._inner._writinghandles is None:
2988 msg = b'adding revision outside `revlog._writing` context'
2990 msg = b'adding revision outside `revlog._writing` context'
2989 raise error.ProgrammingError(msg)
2991 raise error.ProgrammingError(msg)
2990
2992
2991 btext = [rawtext]
2993 btext = [rawtext]
2992
2994
2993 curr = len(self)
2995 curr = len(self)
2994 prev = curr - 1
2996 prev = curr - 1
2995
2997
2996 offset = self._get_data_offset(prev)
2998 offset = self._get_data_offset(prev)
2997
2999
2998 if self._concurrencychecker:
3000 if self._concurrencychecker:
2999 ifh, dfh, sdfh = self._inner._writinghandles
3001 ifh, dfh, sdfh = self._inner._writinghandles
3000 # XXX no checking for the sidedata file
3002 # XXX no checking for the sidedata file
3001 if self._inline:
3003 if self._inline:
3002 # offset is "as if" it were in the .d file, so we need to add on
3004 # offset is "as if" it were in the .d file, so we need to add on
3003 # the size of the entry metadata.
3005 # the size of the entry metadata.
3004 self._concurrencychecker(
3006 self._concurrencychecker(
3005 ifh, self._indexfile, offset + curr * self.index.entry_size
3007 ifh, self._indexfile, offset + curr * self.index.entry_size
3006 )
3008 )
3007 else:
3009 else:
3008 # Entries in the .i are a consistent size.
3010 # Entries in the .i are a consistent size.
3009 self._concurrencychecker(
3011 self._concurrencychecker(
3010 ifh, self._indexfile, curr * self.index.entry_size
3012 ifh, self._indexfile, curr * self.index.entry_size
3011 )
3013 )
3012 self._concurrencychecker(dfh, self._datafile, offset)
3014 self._concurrencychecker(dfh, self._datafile, offset)
3013
3015
3014 p1r, p2r = self.rev(p1), self.rev(p2)
3016 p1r, p2r = self.rev(p1), self.rev(p2)
3015
3017
3016 # full versions are inserted when the needed deltas
3018 # full versions are inserted when the needed deltas
3017 # become comparable to the uncompressed text
3019 # become comparable to the uncompressed text
3018 if rawtext is None:
3020 if rawtext is None:
3019 # need rawtext size, before changed by flag processors, which is
3021 # need rawtext size, before changed by flag processors, which is
3020 # the non-raw size. use revlog explicitly to avoid filelog's extra
3022 # the non-raw size. use revlog explicitly to avoid filelog's extra
3021 # logic that might remove metadata size.
3023 # logic that might remove metadata size.
3022 textlen = mdiff.patchedsize(
3024 textlen = mdiff.patchedsize(
3023 revlog.size(self, cachedelta[0]), cachedelta[1]
3025 revlog.size(self, cachedelta[0]), cachedelta[1]
3024 )
3026 )
3025 else:
3027 else:
3026 textlen = len(rawtext)
3028 textlen = len(rawtext)
3027
3029
3028 if deltacomputer is None:
3030 if deltacomputer is None:
3029 write_debug = None
3031 write_debug = None
3030 if self.delta_config.debug_delta:
3032 if self.delta_config.debug_delta:
3031 write_debug = transaction._report
3033 write_debug = transaction._report
3032 deltacomputer = deltautil.deltacomputer(
3034 deltacomputer = deltautil.deltacomputer(
3033 self, write_debug=write_debug
3035 self, write_debug=write_debug
3034 )
3036 )
3035
3037
3036 if cachedelta is not None and len(cachedelta) == 2:
3038 if cachedelta is not None and len(cachedelta) == 2:
3037 # If the cached delta has no information about how it should be
3039 # If the cached delta has no information about how it should be
3038 # reused, add the default reuse instruction according to the
3040 # reused, add the default reuse instruction according to the
3039 # revlog's configuration.
3041 # revlog's configuration.
3040 if (
3042 if (
3041 self.delta_config.general_delta
3043 self.delta_config.general_delta
3042 and self.delta_config.lazy_delta_base
3044 and self.delta_config.lazy_delta_base
3043 ):
3045 ):
3044 delta_base_reuse = DELTA_BASE_REUSE_TRY
3046 delta_base_reuse = DELTA_BASE_REUSE_TRY
3045 else:
3047 else:
3046 delta_base_reuse = DELTA_BASE_REUSE_NO
3048 delta_base_reuse = DELTA_BASE_REUSE_NO
3047 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3049 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3048
3050
3049 revinfo = revlogutils.revisioninfo(
3051 revinfo = revlogutils.revisioninfo(
3050 node,
3052 node,
3051 p1,
3053 p1,
3052 p2,
3054 p2,
3053 btext,
3055 btext,
3054 textlen,
3056 textlen,
3055 cachedelta,
3057 cachedelta,
3056 flags,
3058 flags,
3057 )
3059 )
3058
3060
3059 deltainfo = deltacomputer.finddeltainfo(revinfo)
3061 deltainfo = deltacomputer.finddeltainfo(revinfo)
3060
3062
3061 compression_mode = COMP_MODE_INLINE
3063 compression_mode = COMP_MODE_INLINE
3062 if self._docket is not None:
3064 if self._docket is not None:
3063 default_comp = self._docket.default_compression_header
3065 default_comp = self._docket.default_compression_header
3064 r = deltautil.delta_compression(default_comp, deltainfo)
3066 r = deltautil.delta_compression(default_comp, deltainfo)
3065 compression_mode, deltainfo = r
3067 compression_mode, deltainfo = r
3066
3068
3067 sidedata_compression_mode = COMP_MODE_INLINE
3069 sidedata_compression_mode = COMP_MODE_INLINE
3068 if sidedata and self.feature_config.has_side_data:
3070 if sidedata and self.feature_config.has_side_data:
3069 sidedata_compression_mode = COMP_MODE_PLAIN
3071 sidedata_compression_mode = COMP_MODE_PLAIN
3070 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3072 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3071 sidedata_offset = self._docket.sidedata_end
3073 sidedata_offset = self._docket.sidedata_end
3072 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3074 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3073 if (
3075 if (
3074 h != b'u'
3076 h != b'u'
3075 and comp_sidedata[0:1] != b'\0'
3077 and comp_sidedata[0:1] != b'\0'
3076 and len(comp_sidedata) < len(serialized_sidedata)
3078 and len(comp_sidedata) < len(serialized_sidedata)
3077 ):
3079 ):
3078 assert not h
3080 assert not h
3079 if (
3081 if (
3080 comp_sidedata[0:1]
3082 comp_sidedata[0:1]
3081 == self._docket.default_compression_header
3083 == self._docket.default_compression_header
3082 ):
3084 ):
3083 sidedata_compression_mode = COMP_MODE_DEFAULT
3085 sidedata_compression_mode = COMP_MODE_DEFAULT
3084 serialized_sidedata = comp_sidedata
3086 serialized_sidedata = comp_sidedata
3085 else:
3087 else:
3086 sidedata_compression_mode = COMP_MODE_INLINE
3088 sidedata_compression_mode = COMP_MODE_INLINE
3087 serialized_sidedata = comp_sidedata
3089 serialized_sidedata = comp_sidedata
3088 else:
3090 else:
3089 serialized_sidedata = b""
3091 serialized_sidedata = b""
3090 # Don't store the offset if the sidedata is empty, that way
3092 # Don't store the offset if the sidedata is empty, that way
3091 # we can easily detect empty sidedata and they will be no different
3093 # we can easily detect empty sidedata and they will be no different
3092 # than ones we manually add.
3094 # than ones we manually add.
3093 sidedata_offset = 0
3095 sidedata_offset = 0
3094
3096
3095 rank = RANK_UNKNOWN
3097 rank = RANK_UNKNOWN
3096 if self.feature_config.compute_rank:
3098 if self.feature_config.compute_rank:
3097 if (p1r, p2r) == (nullrev, nullrev):
3099 if (p1r, p2r) == (nullrev, nullrev):
3098 rank = 1
3100 rank = 1
3099 elif p1r != nullrev and p2r == nullrev:
3101 elif p1r != nullrev and p2r == nullrev:
3100 rank = 1 + self.fast_rank(p1r)
3102 rank = 1 + self.fast_rank(p1r)
3101 elif p1r == nullrev and p2r != nullrev:
3103 elif p1r == nullrev and p2r != nullrev:
3102 rank = 1 + self.fast_rank(p2r)
3104 rank = 1 + self.fast_rank(p2r)
3103 else: # merge node
3105 else: # merge node
3104 if rustdagop is not None and self.index.rust_ext_compat:
3106 if rustdagop is not None and self.index.rust_ext_compat:
3105 rank = rustdagop.rank(self.index, p1r, p2r)
3107 rank = rustdagop.rank(self.index, p1r, p2r)
3106 else:
3108 else:
3107 pmin, pmax = sorted((p1r, p2r))
3109 pmin, pmax = sorted((p1r, p2r))
3108 rank = 1 + self.fast_rank(pmax)
3110 rank = 1 + self.fast_rank(pmax)
3109 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3111 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3110
3112
3111 e = revlogutils.entry(
3113 e = revlogutils.entry(
3112 flags=flags,
3114 flags=flags,
3113 data_offset=offset,
3115 data_offset=offset,
3114 data_compressed_length=deltainfo.deltalen,
3116 data_compressed_length=deltainfo.deltalen,
3115 data_uncompressed_length=textlen,
3117 data_uncompressed_length=textlen,
3116 data_compression_mode=compression_mode,
3118 data_compression_mode=compression_mode,
3117 data_delta_base=deltainfo.base,
3119 data_delta_base=deltainfo.base,
3118 link_rev=link,
3120 link_rev=link,
3119 parent_rev_1=p1r,
3121 parent_rev_1=p1r,
3120 parent_rev_2=p2r,
3122 parent_rev_2=p2r,
3121 node_id=node,
3123 node_id=node,
3122 sidedata_offset=sidedata_offset,
3124 sidedata_offset=sidedata_offset,
3123 sidedata_compressed_length=len(serialized_sidedata),
3125 sidedata_compressed_length=len(serialized_sidedata),
3124 sidedata_compression_mode=sidedata_compression_mode,
3126 sidedata_compression_mode=sidedata_compression_mode,
3125 rank=rank,
3127 rank=rank,
3126 )
3128 )
3127
3129
3128 self.index.append(e)
3130 self.index.append(e)
3129 entry = self.index.entry_binary(curr)
3131 entry = self.index.entry_binary(curr)
3130 if curr == 0 and self._docket is None:
3132 if curr == 0 and self._docket is None:
3131 header = self._format_flags | self._format_version
3133 header = self._format_flags | self._format_version
3132 header = self.index.pack_header(header)
3134 header = self.index.pack_header(header)
3133 entry = header + entry
3135 entry = header + entry
3134 self._writeentry(
3136 self._writeentry(
3135 transaction,
3137 transaction,
3136 entry,
3138 entry,
3137 deltainfo.data,
3139 deltainfo.data,
3138 link,
3140 link,
3139 offset,
3141 offset,
3140 serialized_sidedata,
3142 serialized_sidedata,
3141 sidedata_offset,
3143 sidedata_offset,
3142 )
3144 )
3143
3145
3144 rawtext = btext[0]
3146 rawtext = btext[0]
3145
3147
3146 if alwayscache and rawtext is None:
3148 if alwayscache and rawtext is None:
3147 rawtext = deltacomputer.buildtext(revinfo)
3149 rawtext = deltacomputer.buildtext(revinfo)
3148
3150
3149 if type(rawtext) == bytes: # only accept immutable objects
3151 if type(rawtext) == bytes: # only accept immutable objects
3150 self._revisioncache = (node, curr, rawtext)
3152 self._revisioncache = (node, curr, rawtext)
3151 self._chainbasecache[curr] = deltainfo.chainbase
3153 self._chainbasecache[curr] = deltainfo.chainbase
3152 return curr
3154 return curr
3153
3155
3154 def _get_data_offset(self, prev):
3156 def _get_data_offset(self, prev):
3155 """Returns the current offset in the (in-transaction) data file.
3157 """Returns the current offset in the (in-transaction) data file.
3156 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3158 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3157 file to store that information: since sidedata can be rewritten to the
3159 file to store that information: since sidedata can be rewritten to the
3158 end of the data file within a transaction, you can have cases where, for
3160 end of the data file within a transaction, you can have cases where, for
3159 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3161 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3160 to `n - 1`'s sidedata being written after `n`'s data.
3162 to `n - 1`'s sidedata being written after `n`'s data.
3161
3163
3162 TODO cache this in a docket file before getting out of experimental."""
3164 TODO cache this in a docket file before getting out of experimental."""
3163 if self._docket is None:
3165 if self._docket is None:
3164 return self.end(prev)
3166 return self.end(prev)
3165 else:
3167 else:
3166 return self._docket.data_end
3168 return self._docket.data_end
3167
3169
3168 def _writeentry(
3170 def _writeentry(
3169 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3171 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3170 ):
3172 ):
3171 # Files opened in a+ mode have inconsistent behavior on various
3173 # Files opened in a+ mode have inconsistent behavior on various
3172 # platforms. Windows requires that a file positioning call be made
3174 # platforms. Windows requires that a file positioning call be made
3173 # when the file handle transitions between reads and writes. See
3175 # when the file handle transitions between reads and writes. See
3174 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3176 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3175 # platforms, Python or the platform itself can be buggy. Some versions
3177 # platforms, Python or the platform itself can be buggy. Some versions
3176 # of Solaris have been observed to not append at the end of the file
3178 # of Solaris have been observed to not append at the end of the file
3177 # if the file was seeked to before the end. See issue4943 for more.
3179 # if the file was seeked to before the end. See issue4943 for more.
3178 #
3180 #
3179 # We work around this issue by inserting a seek() before writing.
3181 # We work around this issue by inserting a seek() before writing.
3180 # Note: This is likely not necessary on Python 3. However, because
3182 # Note: This is likely not necessary on Python 3. However, because
3181 # the file handle is reused for reads and may be seeked there, we need
3183 # the file handle is reused for reads and may be seeked there, we need
3182 # to be careful before changing this.
3184 # to be careful before changing this.
3183 if self._inner._writinghandles is None:
3185 if self._inner._writinghandles is None:
3184 msg = b'adding revision outside `revlog._writing` context'
3186 msg = b'adding revision outside `revlog._writing` context'
3185 raise error.ProgrammingError(msg)
3187 raise error.ProgrammingError(msg)
3186 ifh, dfh, sdfh = self._inner._writinghandles
3188 ifh, dfh, sdfh = self._inner._writinghandles
3187 if self._docket is None:
3189 if self._docket is None:
3188 ifh.seek(0, os.SEEK_END)
3190 ifh.seek(0, os.SEEK_END)
3189 else:
3191 else:
3190 ifh.seek(self._docket.index_end, os.SEEK_SET)
3192 ifh.seek(self._docket.index_end, os.SEEK_SET)
3191 if dfh:
3193 if dfh:
3192 if self._docket is None:
3194 if self._docket is None:
3193 dfh.seek(0, os.SEEK_END)
3195 dfh.seek(0, os.SEEK_END)
3194 else:
3196 else:
3195 dfh.seek(self._docket.data_end, os.SEEK_SET)
3197 dfh.seek(self._docket.data_end, os.SEEK_SET)
3196 if sdfh:
3198 if sdfh:
3197 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3199 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3198
3200
3199 curr = len(self) - 1
3201 curr = len(self) - 1
3200 if not self._inline:
3202 if not self._inline:
3201 transaction.add(self._datafile, offset)
3203 transaction.add(self._datafile, offset)
3202 if self._sidedatafile:
3204 if self._sidedatafile:
3203 transaction.add(self._sidedatafile, sidedata_offset)
3205 transaction.add(self._sidedatafile, sidedata_offset)
3204 transaction.add(self._indexfile, curr * len(entry))
3206 transaction.add(self._indexfile, curr * len(entry))
3205 if data[0]:
3207 if data[0]:
3206 dfh.write(data[0])
3208 dfh.write(data[0])
3207 dfh.write(data[1])
3209 dfh.write(data[1])
3208 if sidedata:
3210 if sidedata:
3209 sdfh.write(sidedata)
3211 sdfh.write(sidedata)
3210 ifh.write(entry)
3212 ifh.write(entry)
3211 else:
3213 else:
3212 offset += curr * self.index.entry_size
3214 offset += curr * self.index.entry_size
3213 transaction.add(self._indexfile, offset)
3215 transaction.add(self._indexfile, offset)
3214 ifh.write(entry)
3216 ifh.write(entry)
3215 ifh.write(data[0])
3217 ifh.write(data[0])
3216 ifh.write(data[1])
3218 ifh.write(data[1])
3217 assert not sidedata
3219 assert not sidedata
3218 self._enforceinlinesize(transaction)
3220 self._enforceinlinesize(transaction)
3219 if self._docket is not None:
3221 if self._docket is not None:
3220 # revlog-v2 always has 3 writing handles, help Pytype
3222 # revlog-v2 always has 3 writing handles, help Pytype
3221 wh1 = self._inner._writinghandles[0]
3223 wh1 = self._inner._writinghandles[0]
3222 wh2 = self._inner._writinghandles[1]
3224 wh2 = self._inner._writinghandles[1]
3223 wh3 = self._inner._writinghandles[2]
3225 wh3 = self._inner._writinghandles[2]
3224 assert wh1 is not None
3226 assert wh1 is not None
3225 assert wh2 is not None
3227 assert wh2 is not None
3226 assert wh3 is not None
3228 assert wh3 is not None
3227 self._docket.index_end = wh1.tell()
3229 self._docket.index_end = wh1.tell()
3228 self._docket.data_end = wh2.tell()
3230 self._docket.data_end = wh2.tell()
3229 self._docket.sidedata_end = wh3.tell()
3231 self._docket.sidedata_end = wh3.tell()
3230
3232
3231 nodemaputil.setup_persistent_nodemap(transaction, self)
3233 nodemaputil.setup_persistent_nodemap(transaction, self)
3232
3234
3233 def addgroup(
3235 def addgroup(
3234 self,
3236 self,
3235 deltas,
3237 deltas,
3236 linkmapper,
3238 linkmapper,
3237 transaction,
3239 transaction,
3238 alwayscache=False,
3240 alwayscache=False,
3239 addrevisioncb=None,
3241 addrevisioncb=None,
3240 duplicaterevisioncb=None,
3242 duplicaterevisioncb=None,
3241 debug_info=None,
3243 debug_info=None,
3242 delta_base_reuse_policy=None,
3244 delta_base_reuse_policy=None,
3243 ):
3245 ):
3244 """
3246 """
3245 add a delta group
3247 add a delta group
3246
3248
3247 given a set of deltas, add them to the revision log. the
3249 given a set of deltas, add them to the revision log. the
3248 first delta is against its parent, which should be in our
3250 first delta is against its parent, which should be in our
3249 log, the rest are against the previous delta.
3251 log, the rest are against the previous delta.
3250
3252
3251 If ``addrevisioncb`` is defined, it will be called with arguments of
3253 If ``addrevisioncb`` is defined, it will be called with arguments of
3252 this revlog and the node that was added.
3254 this revlog and the node that was added.
3253 """
3255 """
3254
3256
3255 if self._adding_group:
3257 if self._adding_group:
3256 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3258 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3257
3259
3258 # read the default delta-base reuse policy from revlog config if the
3260 # read the default delta-base reuse policy from revlog config if the
3259 # group did not specify one.
3261 # group did not specify one.
3260 if delta_base_reuse_policy is None:
3262 if delta_base_reuse_policy is None:
3261 if (
3263 if (
3262 self.delta_config.general_delta
3264 self.delta_config.general_delta
3263 and self.delta_config.lazy_delta_base
3265 and self.delta_config.lazy_delta_base
3264 ):
3266 ):
3265 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3267 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3266 else:
3268 else:
3267 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3269 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3268
3270
3269 self._adding_group = True
3271 self._adding_group = True
3270 empty = True
3272 empty = True
3271 try:
3273 try:
3272 with self._writing(transaction):
3274 with self._writing(transaction):
3273 write_debug = None
3275 write_debug = None
3274 if self.delta_config.debug_delta:
3276 if self.delta_config.debug_delta:
3275 write_debug = transaction._report
3277 write_debug = transaction._report
3276 deltacomputer = deltautil.deltacomputer(
3278 deltacomputer = deltautil.deltacomputer(
3277 self,
3279 self,
3278 write_debug=write_debug,
3280 write_debug=write_debug,
3279 debug_info=debug_info,
3281 debug_info=debug_info,
3280 )
3282 )
3281 # loop through our set of deltas
3283 # loop through our set of deltas
3282 for data in deltas:
3284 for data in deltas:
3283 (
3285 (
3284 node,
3286 node,
3285 p1,
3287 p1,
3286 p2,
3288 p2,
3287 linknode,
3289 linknode,
3288 deltabase,
3290 deltabase,
3289 delta,
3291 delta,
3290 flags,
3292 flags,
3291 sidedata,
3293 sidedata,
3292 ) = data
3294 ) = data
3293 link = linkmapper(linknode)
3295 link = linkmapper(linknode)
3294 flags = flags or REVIDX_DEFAULT_FLAGS
3296 flags = flags or REVIDX_DEFAULT_FLAGS
3295
3297
3296 rev = self.index.get_rev(node)
3298 rev = self.index.get_rev(node)
3297 if rev is not None:
3299 if rev is not None:
3298 # this can happen if two branches make the same change
3300 # this can happen if two branches make the same change
3299 self._nodeduplicatecallback(transaction, rev)
3301 self._nodeduplicatecallback(transaction, rev)
3300 if duplicaterevisioncb:
3302 if duplicaterevisioncb:
3301 duplicaterevisioncb(self, rev)
3303 duplicaterevisioncb(self, rev)
3302 empty = False
3304 empty = False
3303 continue
3305 continue
3304
3306
3305 for p in (p1, p2):
3307 for p in (p1, p2):
3306 if not self.index.has_node(p):
3308 if not self.index.has_node(p):
3307 raise error.LookupError(
3309 raise error.LookupError(
3308 p, self.radix, _(b'unknown parent')
3310 p, self.radix, _(b'unknown parent')
3309 )
3311 )
3310
3312
3311 if not self.index.has_node(deltabase):
3313 if not self.index.has_node(deltabase):
3312 raise error.LookupError(
3314 raise error.LookupError(
3313 deltabase, self.display_id, _(b'unknown delta base')
3315 deltabase, self.display_id, _(b'unknown delta base')
3314 )
3316 )
3315
3317
3316 baserev = self.rev(deltabase)
3318 baserev = self.rev(deltabase)
3317
3319
3318 if baserev != nullrev and self.iscensored(baserev):
3320 if baserev != nullrev and self.iscensored(baserev):
3319 # if base is censored, delta must be full replacement in a
3321 # if base is censored, delta must be full replacement in a
3320 # single patch operation
3322 # single patch operation
3321 hlen = struct.calcsize(b">lll")
3323 hlen = struct.calcsize(b">lll")
3322 oldlen = self.rawsize(baserev)
3324 oldlen = self.rawsize(baserev)
3323 newlen = len(delta) - hlen
3325 newlen = len(delta) - hlen
3324 if delta[:hlen] != mdiff.replacediffheader(
3326 if delta[:hlen] != mdiff.replacediffheader(
3325 oldlen, newlen
3327 oldlen, newlen
3326 ):
3328 ):
3327 raise error.CensoredBaseError(
3329 raise error.CensoredBaseError(
3328 self.display_id, self.node(baserev)
3330 self.display_id, self.node(baserev)
3329 )
3331 )
3330
3332
3331 if not flags and self._peek_iscensored(baserev, delta):
3333 if not flags and self._peek_iscensored(baserev, delta):
3332 flags |= REVIDX_ISCENSORED
3334 flags |= REVIDX_ISCENSORED
3333
3335
3334 # We assume consumers of addrevisioncb will want to retrieve
3336 # We assume consumers of addrevisioncb will want to retrieve
3335 # the added revision, which will require a call to
3337 # the added revision, which will require a call to
3336 # revision(). revision() will fast path if there is a cache
3338 # revision(). revision() will fast path if there is a cache
3337 # hit. So, we tell _addrevision() to always cache in this case.
3339 # hit. So, we tell _addrevision() to always cache in this case.
3338 # We're only using addgroup() in the context of changegroup
3340 # We're only using addgroup() in the context of changegroup
3339 # generation so the revision data can always be handled as raw
3341 # generation so the revision data can always be handled as raw
3340 # by the flagprocessor.
3342 # by the flagprocessor.
3341 rev = self._addrevision(
3343 rev = self._addrevision(
3342 node,
3344 node,
3343 None,
3345 None,
3344 transaction,
3346 transaction,
3345 link,
3347 link,
3346 p1,
3348 p1,
3347 p2,
3349 p2,
3348 flags,
3350 flags,
3349 (baserev, delta, delta_base_reuse_policy),
3351 (baserev, delta, delta_base_reuse_policy),
3350 alwayscache=alwayscache,
3352 alwayscache=alwayscache,
3351 deltacomputer=deltacomputer,
3353 deltacomputer=deltacomputer,
3352 sidedata=sidedata,
3354 sidedata=sidedata,
3353 )
3355 )
3354
3356
3355 if addrevisioncb:
3357 if addrevisioncb:
3356 addrevisioncb(self, rev)
3358 addrevisioncb(self, rev)
3357 empty = False
3359 empty = False
3358 finally:
3360 finally:
3359 self._adding_group = False
3361 self._adding_group = False
3360 return not empty
3362 return not empty
3361
3363
3362 def iscensored(self, rev):
3364 def iscensored(self, rev):
3363 """Check if a file revision is censored."""
3365 """Check if a file revision is censored."""
3364 if not self.feature_config.censorable:
3366 if not self.feature_config.censorable:
3365 return False
3367 return False
3366
3368
3367 return self.flags(rev) & REVIDX_ISCENSORED
3369 return self.flags(rev) & REVIDX_ISCENSORED
3368
3370
3369 def _peek_iscensored(self, baserev, delta):
3371 def _peek_iscensored(self, baserev, delta):
3370 """Quickly check if a delta produces a censored revision."""
3372 """Quickly check if a delta produces a censored revision."""
3371 if not self.feature_config.censorable:
3373 if not self.feature_config.censorable:
3372 return False
3374 return False
3373
3375
3374 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3376 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3375
3377
3376 def getstrippoint(self, minlink):
3378 def getstrippoint(self, minlink):
3377 """find the minimum rev that must be stripped to strip the linkrev
3379 """find the minimum rev that must be stripped to strip the linkrev
3378
3380
3379 Returns a tuple containing the minimum rev and a set of all revs that
3381 Returns a tuple containing the minimum rev and a set of all revs that
3380 have linkrevs that will be broken by this strip.
3382 have linkrevs that will be broken by this strip.
3381 """
3383 """
3382 return storageutil.resolvestripinfo(
3384 return storageutil.resolvestripinfo(
3383 minlink,
3385 minlink,
3384 len(self) - 1,
3386 len(self) - 1,
3385 self.headrevs(),
3387 self.headrevs(),
3386 self.linkrev,
3388 self.linkrev,
3387 self.parentrevs,
3389 self.parentrevs,
3388 )
3390 )
3389
3391
3390 def strip(self, minlink, transaction):
3392 def strip(self, minlink, transaction):
3391 """truncate the revlog on the first revision with a linkrev >= minlink
3393 """truncate the revlog on the first revision with a linkrev >= minlink
3392
3394
3393 This function is called when we're stripping revision minlink and
3395 This function is called when we're stripping revision minlink and
3394 its descendants from the repository.
3396 its descendants from the repository.
3395
3397
3396 We have to remove all revisions with linkrev >= minlink, because
3398 We have to remove all revisions with linkrev >= minlink, because
3397 the equivalent changelog revisions will be renumbered after the
3399 the equivalent changelog revisions will be renumbered after the
3398 strip.
3400 strip.
3399
3401
3400 So we truncate the revlog on the first of these revisions, and
3402 So we truncate the revlog on the first of these revisions, and
3401 trust that the caller has saved the revisions that shouldn't be
3403 trust that the caller has saved the revisions that shouldn't be
3402 removed and that it'll re-add them after this truncation.
3404 removed and that it'll re-add them after this truncation.
3403 """
3405 """
3404 if len(self) == 0:
3406 if len(self) == 0:
3405 return
3407 return
3406
3408
3407 rev, _ = self.getstrippoint(minlink)
3409 rev, _ = self.getstrippoint(minlink)
3408 if rev == len(self):
3410 if rev == len(self):
3409 return
3411 return
3410
3412
3411 # first truncate the files on disk
3413 # first truncate the files on disk
3412 data_end = self.start(rev)
3414 data_end = self.start(rev)
3413 if not self._inline:
3415 if not self._inline:
3414 transaction.add(self._datafile, data_end)
3416 transaction.add(self._datafile, data_end)
3415 end = rev * self.index.entry_size
3417 end = rev * self.index.entry_size
3416 else:
3418 else:
3417 end = data_end + (rev * self.index.entry_size)
3419 end = data_end + (rev * self.index.entry_size)
3418
3420
3419 if self._sidedatafile:
3421 if self._sidedatafile:
3420 sidedata_end = self.sidedata_cut_off(rev)
3422 sidedata_end = self.sidedata_cut_off(rev)
3421 transaction.add(self._sidedatafile, sidedata_end)
3423 transaction.add(self._sidedatafile, sidedata_end)
3422
3424
3423 transaction.add(self._indexfile, end)
3425 transaction.add(self._indexfile, end)
3424 if self._docket is not None:
3426 if self._docket is not None:
3425 # XXX we could, leverage the docket while stripping. However it is
3427 # XXX we could, leverage the docket while stripping. However it is
3426 # not powerfull enough at the time of this comment
3428 # not powerfull enough at the time of this comment
3427 self._docket.index_end = end
3429 self._docket.index_end = end
3428 self._docket.data_end = data_end
3430 self._docket.data_end = data_end
3429 self._docket.sidedata_end = sidedata_end
3431 self._docket.sidedata_end = sidedata_end
3430 self._docket.write(transaction, stripping=True)
3432 self._docket.write(transaction, stripping=True)
3431
3433
3432 # then reset internal state in memory to forget those revisions
3434 # then reset internal state in memory to forget those revisions
3433 self._revisioncache = None
3435 self._revisioncache = None
3434 self._chaininfocache = util.lrucachedict(500)
3436 self._chaininfocache = util.lrucachedict(500)
3435 self._inner._segmentfile.clear_cache()
3437 self._inner._segmentfile.clear_cache()
3436 self._inner._segmentfile_sidedata.clear_cache()
3438 self._inner._segmentfile_sidedata.clear_cache()
3437
3439
3438 del self.index[rev:-1]
3440 del self.index[rev:-1]
3439
3441
3440 def checksize(self):
3442 def checksize(self):
3441 """Check size of index and data files
3443 """Check size of index and data files
3442
3444
3443 return a (dd, di) tuple.
3445 return a (dd, di) tuple.
3444 - dd: extra bytes for the "data" file
3446 - dd: extra bytes for the "data" file
3445 - di: extra bytes for the "index" file
3447 - di: extra bytes for the "index" file
3446
3448
3447 A healthy revlog will return (0, 0).
3449 A healthy revlog will return (0, 0).
3448 """
3450 """
3449 expected = 0
3451 expected = 0
3450 if len(self):
3452 if len(self):
3451 expected = max(0, self.end(len(self) - 1))
3453 expected = max(0, self.end(len(self) - 1))
3452
3454
3453 try:
3455 try:
3454 with self._datafp() as f:
3456 with self._datafp() as f:
3455 f.seek(0, io.SEEK_END)
3457 f.seek(0, io.SEEK_END)
3456 actual = f.tell()
3458 actual = f.tell()
3457 dd = actual - expected
3459 dd = actual - expected
3458 except FileNotFoundError:
3460 except FileNotFoundError:
3459 dd = 0
3461 dd = 0
3460
3462
3461 try:
3463 try:
3462 f = self.opener(self._indexfile)
3464 f = self.opener(self._indexfile)
3463 f.seek(0, io.SEEK_END)
3465 f.seek(0, io.SEEK_END)
3464 actual = f.tell()
3466 actual = f.tell()
3465 f.close()
3467 f.close()
3466 s = self.index.entry_size
3468 s = self.index.entry_size
3467 i = max(0, actual // s)
3469 i = max(0, actual // s)
3468 di = actual - (i * s)
3470 di = actual - (i * s)
3469 if self._inline:
3471 if self._inline:
3470 databytes = 0
3472 databytes = 0
3471 for r in self:
3473 for r in self:
3472 databytes += max(0, self.length(r))
3474 databytes += max(0, self.length(r))
3473 dd = 0
3475 dd = 0
3474 di = actual - len(self) * s - databytes
3476 di = actual - len(self) * s - databytes
3475 except FileNotFoundError:
3477 except FileNotFoundError:
3476 di = 0
3478 di = 0
3477
3479
3478 return (dd, di)
3480 return (dd, di)
3479
3481
3480 def files(self):
3482 def files(self):
3481 """return list of files that compose this revlog"""
3483 """return list of files that compose this revlog"""
3482 res = [self._indexfile]
3484 res = [self._indexfile]
3483 if self._docket_file is None:
3485 if self._docket_file is None:
3484 if not self._inline:
3486 if not self._inline:
3485 res.append(self._datafile)
3487 res.append(self._datafile)
3486 else:
3488 else:
3487 res.append(self._docket_file)
3489 res.append(self._docket_file)
3488 res.extend(self._docket.old_index_filepaths(include_empty=False))
3490 res.extend(self._docket.old_index_filepaths(include_empty=False))
3489 if self._docket.data_end:
3491 if self._docket.data_end:
3490 res.append(self._datafile)
3492 res.append(self._datafile)
3491 res.extend(self._docket.old_data_filepaths(include_empty=False))
3493 res.extend(self._docket.old_data_filepaths(include_empty=False))
3492 if self._docket.sidedata_end:
3494 if self._docket.sidedata_end:
3493 res.append(self._sidedatafile)
3495 res.append(self._sidedatafile)
3494 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3496 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3495 return res
3497 return res
3496
3498
3497 def emitrevisions(
3499 def emitrevisions(
3498 self,
3500 self,
3499 nodes,
3501 nodes,
3500 nodesorder=None,
3502 nodesorder=None,
3501 revisiondata=False,
3503 revisiondata=False,
3502 assumehaveparentrevisions=False,
3504 assumehaveparentrevisions=False,
3503 deltamode=repository.CG_DELTAMODE_STD,
3505 deltamode=repository.CG_DELTAMODE_STD,
3504 sidedata_helpers=None,
3506 sidedata_helpers=None,
3505 debug_info=None,
3507 debug_info=None,
3506 ):
3508 ):
3507 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3509 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3508 raise error.ProgrammingError(
3510 raise error.ProgrammingError(
3509 b'unhandled value for nodesorder: %s' % nodesorder
3511 b'unhandled value for nodesorder: %s' % nodesorder
3510 )
3512 )
3511
3513
3512 if nodesorder is None and not self.delta_config.general_delta:
3514 if nodesorder is None and not self.delta_config.general_delta:
3513 nodesorder = b'storage'
3515 nodesorder = b'storage'
3514
3516
3515 if (
3517 if (
3516 not self._storedeltachains
3518 not self._storedeltachains
3517 and deltamode != repository.CG_DELTAMODE_PREV
3519 and deltamode != repository.CG_DELTAMODE_PREV
3518 ):
3520 ):
3519 deltamode = repository.CG_DELTAMODE_FULL
3521 deltamode = repository.CG_DELTAMODE_FULL
3520
3522
3521 return storageutil.emitrevisions(
3523 return storageutil.emitrevisions(
3522 self,
3524 self,
3523 nodes,
3525 nodes,
3524 nodesorder,
3526 nodesorder,
3525 revlogrevisiondelta,
3527 revlogrevisiondelta,
3526 deltaparentfn=self.deltaparent,
3528 deltaparentfn=self.deltaparent,
3527 candeltafn=self._candelta,
3529 candeltafn=self._candelta,
3528 rawsizefn=self.rawsize,
3530 rawsizefn=self.rawsize,
3529 revdifffn=self.revdiff,
3531 revdifffn=self.revdiff,
3530 flagsfn=self.flags,
3532 flagsfn=self.flags,
3531 deltamode=deltamode,
3533 deltamode=deltamode,
3532 revisiondata=revisiondata,
3534 revisiondata=revisiondata,
3533 assumehaveparentrevisions=assumehaveparentrevisions,
3535 assumehaveparentrevisions=assumehaveparentrevisions,
3534 sidedata_helpers=sidedata_helpers,
3536 sidedata_helpers=sidedata_helpers,
3535 debug_info=debug_info,
3537 debug_info=debug_info,
3536 )
3538 )
3537
3539
3538 DELTAREUSEALWAYS = b'always'
3540 DELTAREUSEALWAYS = b'always'
3539 DELTAREUSESAMEREVS = b'samerevs'
3541 DELTAREUSESAMEREVS = b'samerevs'
3540 DELTAREUSENEVER = b'never'
3542 DELTAREUSENEVER = b'never'
3541
3543
3542 DELTAREUSEFULLADD = b'fulladd'
3544 DELTAREUSEFULLADD = b'fulladd'
3543
3545
3544 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3546 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3545
3547
3546 def clone(
3548 def clone(
3547 self,
3549 self,
3548 tr,
3550 tr,
3549 destrevlog,
3551 destrevlog,
3550 addrevisioncb=None,
3552 addrevisioncb=None,
3551 deltareuse=DELTAREUSESAMEREVS,
3553 deltareuse=DELTAREUSESAMEREVS,
3552 forcedeltabothparents=None,
3554 forcedeltabothparents=None,
3553 sidedata_helpers=None,
3555 sidedata_helpers=None,
3554 ):
3556 ):
3555 """Copy this revlog to another, possibly with format changes.
3557 """Copy this revlog to another, possibly with format changes.
3556
3558
3557 The destination revlog will contain the same revisions and nodes.
3559 The destination revlog will contain the same revisions and nodes.
3558 However, it may not be bit-for-bit identical due to e.g. delta encoding
3560 However, it may not be bit-for-bit identical due to e.g. delta encoding
3559 differences.
3561 differences.
3560
3562
3561 The ``deltareuse`` argument control how deltas from the existing revlog
3563 The ``deltareuse`` argument control how deltas from the existing revlog
3562 are preserved in the destination revlog. The argument can have the
3564 are preserved in the destination revlog. The argument can have the
3563 following values:
3565 following values:
3564
3566
3565 DELTAREUSEALWAYS
3567 DELTAREUSEALWAYS
3566 Deltas will always be reused (if possible), even if the destination
3568 Deltas will always be reused (if possible), even if the destination
3567 revlog would not select the same revisions for the delta. This is the
3569 revlog would not select the same revisions for the delta. This is the
3568 fastest mode of operation.
3570 fastest mode of operation.
3569 DELTAREUSESAMEREVS
3571 DELTAREUSESAMEREVS
3570 Deltas will be reused if the destination revlog would pick the same
3572 Deltas will be reused if the destination revlog would pick the same
3571 revisions for the delta. This mode strikes a balance between speed
3573 revisions for the delta. This mode strikes a balance between speed
3572 and optimization.
3574 and optimization.
3573 DELTAREUSENEVER
3575 DELTAREUSENEVER
3574 Deltas will never be reused. This is the slowest mode of execution.
3576 Deltas will never be reused. This is the slowest mode of execution.
3575 This mode can be used to recompute deltas (e.g. if the diff/delta
3577 This mode can be used to recompute deltas (e.g. if the diff/delta
3576 algorithm changes).
3578 algorithm changes).
3577 DELTAREUSEFULLADD
3579 DELTAREUSEFULLADD
3578 Revision will be re-added as if their were new content. This is
3580 Revision will be re-added as if their were new content. This is
3579 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3581 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3580 eg: large file detection and handling.
3582 eg: large file detection and handling.
3581
3583
3582 Delta computation can be slow, so the choice of delta reuse policy can
3584 Delta computation can be slow, so the choice of delta reuse policy can
3583 significantly affect run time.
3585 significantly affect run time.
3584
3586
3585 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3587 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3586 two extremes. Deltas will be reused if they are appropriate. But if the
3588 two extremes. Deltas will be reused if they are appropriate. But if the
3587 delta could choose a better revision, it will do so. This means if you
3589 delta could choose a better revision, it will do so. This means if you
3588 are converting a non-generaldelta revlog to a generaldelta revlog,
3590 are converting a non-generaldelta revlog to a generaldelta revlog,
3589 deltas will be recomputed if the delta's parent isn't a parent of the
3591 deltas will be recomputed if the delta's parent isn't a parent of the
3590 revision.
3592 revision.
3591
3593
3592 In addition to the delta policy, the ``forcedeltabothparents``
3594 In addition to the delta policy, the ``forcedeltabothparents``
3593 argument controls whether to force compute deltas against both parents
3595 argument controls whether to force compute deltas against both parents
3594 for merges. By default, the current default is used.
3596 for merges. By default, the current default is used.
3595
3597
3596 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3598 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3597 `sidedata_helpers`.
3599 `sidedata_helpers`.
3598 """
3600 """
3599 if deltareuse not in self.DELTAREUSEALL:
3601 if deltareuse not in self.DELTAREUSEALL:
3600 raise ValueError(
3602 raise ValueError(
3601 _(b'value for deltareuse invalid: %s') % deltareuse
3603 _(b'value for deltareuse invalid: %s') % deltareuse
3602 )
3604 )
3603
3605
3604 if len(destrevlog):
3606 if len(destrevlog):
3605 raise ValueError(_(b'destination revlog is not empty'))
3607 raise ValueError(_(b'destination revlog is not empty'))
3606
3608
3607 if getattr(self, 'filteredrevs', None):
3609 if getattr(self, 'filteredrevs', None):
3608 raise ValueError(_(b'source revlog has filtered revisions'))
3610 raise ValueError(_(b'source revlog has filtered revisions'))
3609 if getattr(destrevlog, 'filteredrevs', None):
3611 if getattr(destrevlog, 'filteredrevs', None):
3610 raise ValueError(_(b'destination revlog has filtered revisions'))
3612 raise ValueError(_(b'destination revlog has filtered revisions'))
3611
3613
3612 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3614 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3613 # if possible.
3615 # if possible.
3614 old_delta_config = destrevlog.delta_config
3616 old_delta_config = destrevlog.delta_config
3615 destrevlog.delta_config = destrevlog.delta_config.copy()
3617 destrevlog.delta_config = destrevlog.delta_config.copy()
3616
3618
3617 try:
3619 try:
3618 if deltareuse == self.DELTAREUSEALWAYS:
3620 if deltareuse == self.DELTAREUSEALWAYS:
3619 destrevlog.delta_config.lazy_delta_base = True
3621 destrevlog.delta_config.lazy_delta_base = True
3620 destrevlog.delta_config.lazy_delta = True
3622 destrevlog.delta_config.lazy_delta = True
3621 elif deltareuse == self.DELTAREUSESAMEREVS:
3623 elif deltareuse == self.DELTAREUSESAMEREVS:
3622 destrevlog.delta_config.lazy_delta_base = False
3624 destrevlog.delta_config.lazy_delta_base = False
3623 destrevlog.delta_config.lazy_delta = True
3625 destrevlog.delta_config.lazy_delta = True
3624 elif deltareuse == self.DELTAREUSENEVER:
3626 elif deltareuse == self.DELTAREUSENEVER:
3625 destrevlog.delta_config.lazy_delta_base = False
3627 destrevlog.delta_config.lazy_delta_base = False
3626 destrevlog.delta_config.lazy_delta = False
3628 destrevlog.delta_config.lazy_delta = False
3627
3629
3628 delta_both_parents = (
3630 delta_both_parents = (
3629 forcedeltabothparents or old_delta_config.delta_both_parents
3631 forcedeltabothparents or old_delta_config.delta_both_parents
3630 )
3632 )
3631 destrevlog.delta_config.delta_both_parents = delta_both_parents
3633 destrevlog.delta_config.delta_both_parents = delta_both_parents
3632
3634
3633 with self.reading(), destrevlog._writing(tr):
3635 with self.reading(), destrevlog._writing(tr):
3634 self._clone(
3636 self._clone(
3635 tr,
3637 tr,
3636 destrevlog,
3638 destrevlog,
3637 addrevisioncb,
3639 addrevisioncb,
3638 deltareuse,
3640 deltareuse,
3639 forcedeltabothparents,
3641 forcedeltabothparents,
3640 sidedata_helpers,
3642 sidedata_helpers,
3641 )
3643 )
3642
3644
3643 finally:
3645 finally:
3644 destrevlog.delta_config = old_delta_config
3646 destrevlog.delta_config = old_delta_config
3645
3647
3646 def _clone(
3648 def _clone(
3647 self,
3649 self,
3648 tr,
3650 tr,
3649 destrevlog,
3651 destrevlog,
3650 addrevisioncb,
3652 addrevisioncb,
3651 deltareuse,
3653 deltareuse,
3652 forcedeltabothparents,
3654 forcedeltabothparents,
3653 sidedata_helpers,
3655 sidedata_helpers,
3654 ):
3656 ):
3655 """perform the core duty of `revlog.clone` after parameter processing"""
3657 """perform the core duty of `revlog.clone` after parameter processing"""
3656 write_debug = None
3658 write_debug = None
3657 if self.delta_config.debug_delta:
3659 if self.delta_config.debug_delta:
3658 write_debug = tr._report
3660 write_debug = tr._report
3659 deltacomputer = deltautil.deltacomputer(
3661 deltacomputer = deltautil.deltacomputer(
3660 destrevlog,
3662 destrevlog,
3661 write_debug=write_debug,
3663 write_debug=write_debug,
3662 )
3664 )
3663 index = self.index
3665 index = self.index
3664 for rev in self:
3666 for rev in self:
3665 entry = index[rev]
3667 entry = index[rev]
3666
3668
3667 # Some classes override linkrev to take filtered revs into
3669 # Some classes override linkrev to take filtered revs into
3668 # account. Use raw entry from index.
3670 # account. Use raw entry from index.
3669 flags = entry[0] & 0xFFFF
3671 flags = entry[0] & 0xFFFF
3670 linkrev = entry[4]
3672 linkrev = entry[4]
3671 p1 = index[entry[5]][7]
3673 p1 = index[entry[5]][7]
3672 p2 = index[entry[6]][7]
3674 p2 = index[entry[6]][7]
3673 node = entry[7]
3675 node = entry[7]
3674
3676
3675 # (Possibly) reuse the delta from the revlog if allowed and
3677 # (Possibly) reuse the delta from the revlog if allowed and
3676 # the revlog chunk is a delta.
3678 # the revlog chunk is a delta.
3677 cachedelta = None
3679 cachedelta = None
3678 rawtext = None
3680 rawtext = None
3679 if deltareuse == self.DELTAREUSEFULLADD:
3681 if deltareuse == self.DELTAREUSEFULLADD:
3680 text = self._revisiondata(rev)
3682 text = self._revisiondata(rev)
3681 sidedata = self.sidedata(rev)
3683 sidedata = self.sidedata(rev)
3682
3684
3683 if sidedata_helpers is not None:
3685 if sidedata_helpers is not None:
3684 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3686 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3685 self, sidedata_helpers, sidedata, rev
3687 self, sidedata_helpers, sidedata, rev
3686 )
3688 )
3687 flags = flags | new_flags[0] & ~new_flags[1]
3689 flags = flags | new_flags[0] & ~new_flags[1]
3688
3690
3689 destrevlog.addrevision(
3691 destrevlog.addrevision(
3690 text,
3692 text,
3691 tr,
3693 tr,
3692 linkrev,
3694 linkrev,
3693 p1,
3695 p1,
3694 p2,
3696 p2,
3695 cachedelta=cachedelta,
3697 cachedelta=cachedelta,
3696 node=node,
3698 node=node,
3697 flags=flags,
3699 flags=flags,
3698 deltacomputer=deltacomputer,
3700 deltacomputer=deltacomputer,
3699 sidedata=sidedata,
3701 sidedata=sidedata,
3700 )
3702 )
3701 else:
3703 else:
3702 if destrevlog.delta_config.lazy_delta:
3704 if destrevlog.delta_config.lazy_delta:
3703 dp = self.deltaparent(rev)
3705 dp = self.deltaparent(rev)
3704 if dp != nullrev:
3706 if dp != nullrev:
3705 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3707 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3706
3708
3707 sidedata = None
3709 sidedata = None
3708 if not cachedelta:
3710 if not cachedelta:
3709 try:
3711 try:
3710 rawtext = self._revisiondata(rev)
3712 rawtext = self._revisiondata(rev)
3711 except error.CensoredNodeError as censored:
3713 except error.CensoredNodeError as censored:
3712 assert flags & REVIDX_ISCENSORED
3714 assert flags & REVIDX_ISCENSORED
3713 rawtext = censored.tombstone
3715 rawtext = censored.tombstone
3714 sidedata = self.sidedata(rev)
3716 sidedata = self.sidedata(rev)
3715 if sidedata is None:
3717 if sidedata is None:
3716 sidedata = self.sidedata(rev)
3718 sidedata = self.sidedata(rev)
3717
3719
3718 if sidedata_helpers is not None:
3720 if sidedata_helpers is not None:
3719 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3721 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3720 self, sidedata_helpers, sidedata, rev
3722 self, sidedata_helpers, sidedata, rev
3721 )
3723 )
3722 flags = flags | new_flags[0] & ~new_flags[1]
3724 flags = flags | new_flags[0] & ~new_flags[1]
3723
3725
3724 destrevlog._addrevision(
3726 destrevlog._addrevision(
3725 node,
3727 node,
3726 rawtext,
3728 rawtext,
3727 tr,
3729 tr,
3728 linkrev,
3730 linkrev,
3729 p1,
3731 p1,
3730 p2,
3732 p2,
3731 flags,
3733 flags,
3732 cachedelta,
3734 cachedelta,
3733 deltacomputer=deltacomputer,
3735 deltacomputer=deltacomputer,
3734 sidedata=sidedata,
3736 sidedata=sidedata,
3735 )
3737 )
3736
3738
3737 if addrevisioncb:
3739 if addrevisioncb:
3738 addrevisioncb(self, rev, node)
3740 addrevisioncb(self, rev, node)
3739
3741
3740 def censorrevision(self, tr, censornode, tombstone=b''):
3742 def censorrevision(self, tr, censornode, tombstone=b''):
3741 if self._format_version == REVLOGV0:
3743 if self._format_version == REVLOGV0:
3742 raise error.RevlogError(
3744 raise error.RevlogError(
3743 _(b'cannot censor with version %d revlogs')
3745 _(b'cannot censor with version %d revlogs')
3744 % self._format_version
3746 % self._format_version
3745 )
3747 )
3746 elif self._format_version == REVLOGV1:
3748 elif self._format_version == REVLOGV1:
3747 rewrite.v1_censor(self, tr, censornode, tombstone)
3749 rewrite.v1_censor(self, tr, censornode, tombstone)
3748 else:
3750 else:
3749 rewrite.v2_censor(self, tr, censornode, tombstone)
3751 rewrite.v2_censor(self, tr, censornode, tombstone)
3750
3752
3751 def verifyintegrity(self, state):
3753 def verifyintegrity(self, state):
3752 """Verifies the integrity of the revlog.
3754 """Verifies the integrity of the revlog.
3753
3755
3754 Yields ``revlogproblem`` instances describing problems that are
3756 Yields ``revlogproblem`` instances describing problems that are
3755 found.
3757 found.
3756 """
3758 """
3757 dd, di = self.checksize()
3759 dd, di = self.checksize()
3758 if dd:
3760 if dd:
3759 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3761 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3760 if di:
3762 if di:
3761 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3763 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3762
3764
3763 version = self._format_version
3765 version = self._format_version
3764
3766
3765 # The verifier tells us what version revlog we should be.
3767 # The verifier tells us what version revlog we should be.
3766 if version != state[b'expectedversion']:
3768 if version != state[b'expectedversion']:
3767 yield revlogproblem(
3769 yield revlogproblem(
3768 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3770 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3769 % (self.display_id, version, state[b'expectedversion'])
3771 % (self.display_id, version, state[b'expectedversion'])
3770 )
3772 )
3771
3773
3772 state[b'skipread'] = set()
3774 state[b'skipread'] = set()
3773 state[b'safe_renamed'] = set()
3775 state[b'safe_renamed'] = set()
3774
3776
3775 for rev in self:
3777 for rev in self:
3776 node = self.node(rev)
3778 node = self.node(rev)
3777
3779
3778 # Verify contents. 4 cases to care about:
3780 # Verify contents. 4 cases to care about:
3779 #
3781 #
3780 # common: the most common case
3782 # common: the most common case
3781 # rename: with a rename
3783 # rename: with a rename
3782 # meta: file content starts with b'\1\n', the metadata
3784 # meta: file content starts with b'\1\n', the metadata
3783 # header defined in filelog.py, but without a rename
3785 # header defined in filelog.py, but without a rename
3784 # ext: content stored externally
3786 # ext: content stored externally
3785 #
3787 #
3786 # More formally, their differences are shown below:
3788 # More formally, their differences are shown below:
3787 #
3789 #
3788 # | common | rename | meta | ext
3790 # | common | rename | meta | ext
3789 # -------------------------------------------------------
3791 # -------------------------------------------------------
3790 # flags() | 0 | 0 | 0 | not 0
3792 # flags() | 0 | 0 | 0 | not 0
3791 # renamed() | False | True | False | ?
3793 # renamed() | False | True | False | ?
3792 # rawtext[0:2]=='\1\n'| False | True | True | ?
3794 # rawtext[0:2]=='\1\n'| False | True | True | ?
3793 #
3795 #
3794 # "rawtext" means the raw text stored in revlog data, which
3796 # "rawtext" means the raw text stored in revlog data, which
3795 # could be retrieved by "rawdata(rev)". "text"
3797 # could be retrieved by "rawdata(rev)". "text"
3796 # mentioned below is "revision(rev)".
3798 # mentioned below is "revision(rev)".
3797 #
3799 #
3798 # There are 3 different lengths stored physically:
3800 # There are 3 different lengths stored physically:
3799 # 1. L1: rawsize, stored in revlog index
3801 # 1. L1: rawsize, stored in revlog index
3800 # 2. L2: len(rawtext), stored in revlog data
3802 # 2. L2: len(rawtext), stored in revlog data
3801 # 3. L3: len(text), stored in revlog data if flags==0, or
3803 # 3. L3: len(text), stored in revlog data if flags==0, or
3802 # possibly somewhere else if flags!=0
3804 # possibly somewhere else if flags!=0
3803 #
3805 #
3804 # L1 should be equal to L2. L3 could be different from them.
3806 # L1 should be equal to L2. L3 could be different from them.
3805 # "text" may or may not affect commit hash depending on flag
3807 # "text" may or may not affect commit hash depending on flag
3806 # processors (see flagutil.addflagprocessor).
3808 # processors (see flagutil.addflagprocessor).
3807 #
3809 #
3808 # | common | rename | meta | ext
3810 # | common | rename | meta | ext
3809 # -------------------------------------------------
3811 # -------------------------------------------------
3810 # rawsize() | L1 | L1 | L1 | L1
3812 # rawsize() | L1 | L1 | L1 | L1
3811 # size() | L1 | L2-LM | L1(*) | L1 (?)
3813 # size() | L1 | L2-LM | L1(*) | L1 (?)
3812 # len(rawtext) | L2 | L2 | L2 | L2
3814 # len(rawtext) | L2 | L2 | L2 | L2
3813 # len(text) | L2 | L2 | L2 | L3
3815 # len(text) | L2 | L2 | L2 | L3
3814 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3816 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3815 #
3817 #
3816 # LM: length of metadata, depending on rawtext
3818 # LM: length of metadata, depending on rawtext
3817 # (*): not ideal, see comment in filelog.size
3819 # (*): not ideal, see comment in filelog.size
3818 # (?): could be "- len(meta)" if the resolved content has
3820 # (?): could be "- len(meta)" if the resolved content has
3819 # rename metadata
3821 # rename metadata
3820 #
3822 #
3821 # Checks needed to be done:
3823 # Checks needed to be done:
3822 # 1. length check: L1 == L2, in all cases.
3824 # 1. length check: L1 == L2, in all cases.
3823 # 2. hash check: depending on flag processor, we may need to
3825 # 2. hash check: depending on flag processor, we may need to
3824 # use either "text" (external), or "rawtext" (in revlog).
3826 # use either "text" (external), or "rawtext" (in revlog).
3825
3827
3826 try:
3828 try:
3827 skipflags = state.get(b'skipflags', 0)
3829 skipflags = state.get(b'skipflags', 0)
3828 if skipflags:
3830 if skipflags:
3829 skipflags &= self.flags(rev)
3831 skipflags &= self.flags(rev)
3830
3832
3831 _verify_revision(self, skipflags, state, node)
3833 _verify_revision(self, skipflags, state, node)
3832
3834
3833 l1 = self.rawsize(rev)
3835 l1 = self.rawsize(rev)
3834 l2 = len(self.rawdata(node))
3836 l2 = len(self.rawdata(node))
3835
3837
3836 if l1 != l2:
3838 if l1 != l2:
3837 yield revlogproblem(
3839 yield revlogproblem(
3838 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3840 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3839 node=node,
3841 node=node,
3840 )
3842 )
3841
3843
3842 except error.CensoredNodeError:
3844 except error.CensoredNodeError:
3843 if state[b'erroroncensored']:
3845 if state[b'erroroncensored']:
3844 yield revlogproblem(
3846 yield revlogproblem(
3845 error=_(b'censored file data'), node=node
3847 error=_(b'censored file data'), node=node
3846 )
3848 )
3847 state[b'skipread'].add(node)
3849 state[b'skipread'].add(node)
3848 except Exception as e:
3850 except Exception as e:
3849 yield revlogproblem(
3851 yield revlogproblem(
3850 error=_(b'unpacking %s: %s')
3852 error=_(b'unpacking %s: %s')
3851 % (short(node), stringutil.forcebytestr(e)),
3853 % (short(node), stringutil.forcebytestr(e)),
3852 node=node,
3854 node=node,
3853 )
3855 )
3854 state[b'skipread'].add(node)
3856 state[b'skipread'].add(node)
3855
3857
3856 def storageinfo(
3858 def storageinfo(
3857 self,
3859 self,
3858 exclusivefiles=False,
3860 exclusivefiles=False,
3859 sharedfiles=False,
3861 sharedfiles=False,
3860 revisionscount=False,
3862 revisionscount=False,
3861 trackedsize=False,
3863 trackedsize=False,
3862 storedsize=False,
3864 storedsize=False,
3863 ):
3865 ):
3864 d = {}
3866 d = {}
3865
3867
3866 if exclusivefiles:
3868 if exclusivefiles:
3867 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3869 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3868 if not self._inline:
3870 if not self._inline:
3869 d[b'exclusivefiles'].append((self.opener, self._datafile))
3871 d[b'exclusivefiles'].append((self.opener, self._datafile))
3870
3872
3871 if sharedfiles:
3873 if sharedfiles:
3872 d[b'sharedfiles'] = []
3874 d[b'sharedfiles'] = []
3873
3875
3874 if revisionscount:
3876 if revisionscount:
3875 d[b'revisionscount'] = len(self)
3877 d[b'revisionscount'] = len(self)
3876
3878
3877 if trackedsize:
3879 if trackedsize:
3878 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3880 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3879
3881
3880 if storedsize:
3882 if storedsize:
3881 d[b'storedsize'] = sum(
3883 d[b'storedsize'] = sum(
3882 self.opener.stat(path).st_size for path in self.files()
3884 self.opener.stat(path).st_size for path in self.files()
3883 )
3885 )
3884
3886
3885 return d
3887 return d
3886
3888
3887 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3889 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3888 if not self.feature_config.has_side_data:
3890 if not self.feature_config.has_side_data:
3889 return
3891 return
3890 # revlog formats with sidedata support does not support inline
3892 # revlog formats with sidedata support does not support inline
3891 assert not self._inline
3893 assert not self._inline
3892 if not helpers[1] and not helpers[2]:
3894 if not helpers[1] and not helpers[2]:
3893 # Nothing to generate or remove
3895 # Nothing to generate or remove
3894 return
3896 return
3895
3897
3896 new_entries = []
3898 new_entries = []
3897 # append the new sidedata
3899 # append the new sidedata
3898 with self._writing(transaction):
3900 with self._writing(transaction):
3899 ifh, dfh, sdfh = self._inner._writinghandles
3901 ifh, dfh, sdfh = self._inner._writinghandles
3900 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3902 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3901
3903
3902 current_offset = sdfh.tell()
3904 current_offset = sdfh.tell()
3903 for rev in range(startrev, endrev + 1):
3905 for rev in range(startrev, endrev + 1):
3904 entry = self.index[rev]
3906 entry = self.index[rev]
3905 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3907 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3906 store=self,
3908 store=self,
3907 sidedata_helpers=helpers,
3909 sidedata_helpers=helpers,
3908 sidedata={},
3910 sidedata={},
3909 rev=rev,
3911 rev=rev,
3910 )
3912 )
3911
3913
3912 serialized_sidedata = sidedatautil.serialize_sidedata(
3914 serialized_sidedata = sidedatautil.serialize_sidedata(
3913 new_sidedata
3915 new_sidedata
3914 )
3916 )
3915
3917
3916 sidedata_compression_mode = COMP_MODE_INLINE
3918 sidedata_compression_mode = COMP_MODE_INLINE
3917 if serialized_sidedata and self.feature_config.has_side_data:
3919 if serialized_sidedata and self.feature_config.has_side_data:
3918 sidedata_compression_mode = COMP_MODE_PLAIN
3920 sidedata_compression_mode = COMP_MODE_PLAIN
3919 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3921 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3920 if (
3922 if (
3921 h != b'u'
3923 h != b'u'
3922 and comp_sidedata[0] != b'\0'
3924 and comp_sidedata[0] != b'\0'
3923 and len(comp_sidedata) < len(serialized_sidedata)
3925 and len(comp_sidedata) < len(serialized_sidedata)
3924 ):
3926 ):
3925 assert not h
3927 assert not h
3926 if (
3928 if (
3927 comp_sidedata[0]
3929 comp_sidedata[0]
3928 == self._docket.default_compression_header
3930 == self._docket.default_compression_header
3929 ):
3931 ):
3930 sidedata_compression_mode = COMP_MODE_DEFAULT
3932 sidedata_compression_mode = COMP_MODE_DEFAULT
3931 serialized_sidedata = comp_sidedata
3933 serialized_sidedata = comp_sidedata
3932 else:
3934 else:
3933 sidedata_compression_mode = COMP_MODE_INLINE
3935 sidedata_compression_mode = COMP_MODE_INLINE
3934 serialized_sidedata = comp_sidedata
3936 serialized_sidedata = comp_sidedata
3935 if entry[8] != 0 or entry[9] != 0:
3937 if entry[8] != 0 or entry[9] != 0:
3936 # rewriting entries that already have sidedata is not
3938 # rewriting entries that already have sidedata is not
3937 # supported yet, because it introduces garbage data in the
3939 # supported yet, because it introduces garbage data in the
3938 # revlog.
3940 # revlog.
3939 msg = b"rewriting existing sidedata is not supported yet"
3941 msg = b"rewriting existing sidedata is not supported yet"
3940 raise error.Abort(msg)
3942 raise error.Abort(msg)
3941
3943
3942 # Apply (potential) flags to add and to remove after running
3944 # Apply (potential) flags to add and to remove after running
3943 # the sidedata helpers
3945 # the sidedata helpers
3944 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3946 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3945 entry_update = (
3947 entry_update = (
3946 current_offset,
3948 current_offset,
3947 len(serialized_sidedata),
3949 len(serialized_sidedata),
3948 new_offset_flags,
3950 new_offset_flags,
3949 sidedata_compression_mode,
3951 sidedata_compression_mode,
3950 )
3952 )
3951
3953
3952 # the sidedata computation might have move the file cursors around
3954 # the sidedata computation might have move the file cursors around
3953 sdfh.seek(current_offset, os.SEEK_SET)
3955 sdfh.seek(current_offset, os.SEEK_SET)
3954 sdfh.write(serialized_sidedata)
3956 sdfh.write(serialized_sidedata)
3955 new_entries.append(entry_update)
3957 new_entries.append(entry_update)
3956 current_offset += len(serialized_sidedata)
3958 current_offset += len(serialized_sidedata)
3957 self._docket.sidedata_end = sdfh.tell()
3959 self._docket.sidedata_end = sdfh.tell()
3958
3960
3959 # rewrite the new index entries
3961 # rewrite the new index entries
3960 ifh.seek(startrev * self.index.entry_size)
3962 ifh.seek(startrev * self.index.entry_size)
3961 for i, e in enumerate(new_entries):
3963 for i, e in enumerate(new_entries):
3962 rev = startrev + i
3964 rev = startrev + i
3963 self.index.replace_sidedata_info(rev, *e)
3965 self.index.replace_sidedata_info(rev, *e)
3964 packed = self.index.entry_binary(rev)
3966 packed = self.index.entry_binary(rev)
3965 if rev == 0 and self._docket is None:
3967 if rev == 0 and self._docket is None:
3966 header = self._format_flags | self._format_version
3968 header = self._format_flags | self._format_version
3967 header = self.index.pack_header(header)
3969 header = self.index.pack_header(header)
3968 packed = header + packed
3970 packed = header + packed
3969 ifh.write(packed)
3971 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now