##// END OF EJS Templates
revlog: move `sidedata` in the inner object...
marmoute -
r51991:49d75cc1 default
parent child Browse files
Show More
@@ -1,3985 +1,3992 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of inline data embedded into a revlog
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class _Config:
244 class _Config:
245 def copy(self):
245 def copy(self):
246 return self.__class__(**self.__dict__)
246 return self.__class__(**self.__dict__)
247
247
248
248
249 @attr.s()
249 @attr.s()
250 class FeatureConfig(_Config):
250 class FeatureConfig(_Config):
251 """Hold configuration values about the available revlog features"""
251 """Hold configuration values about the available revlog features"""
252
252
253 # the default compression engine
253 # the default compression engine
254 compression_engine = attr.ib(default=b'zlib')
254 compression_engine = attr.ib(default=b'zlib')
255 # compression engines options
255 # compression engines options
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257
257
258 # can we use censor on this revlog
258 # can we use censor on this revlog
259 censorable = attr.ib(default=False)
259 censorable = attr.ib(default=False)
260 # does this revlog use the "side data" feature
260 # does this revlog use the "side data" feature
261 has_side_data = attr.ib(default=False)
261 has_side_data = attr.ib(default=False)
262 # might remove rank configuration once the computation has no impact
262 # might remove rank configuration once the computation has no impact
263 compute_rank = attr.ib(default=False)
263 compute_rank = attr.ib(default=False)
264 # parent order is supposed to be semantically irrelevant, so we
264 # parent order is supposed to be semantically irrelevant, so we
265 # normally resort parents to ensure that the first parent is non-null,
265 # normally resort parents to ensure that the first parent is non-null,
266 # if there is a non-null parent at all.
266 # if there is a non-null parent at all.
267 # filelog abuses the parent order as flag to mark some instances of
267 # filelog abuses the parent order as flag to mark some instances of
268 # meta-encoded files, so allow it to disable this behavior.
268 # meta-encoded files, so allow it to disable this behavior.
269 canonical_parent_order = attr.ib(default=False)
269 canonical_parent_order = attr.ib(default=False)
270 # can ellipsis commit be used
270 # can ellipsis commit be used
271 enable_ellipsis = attr.ib(default=False)
271 enable_ellipsis = attr.ib(default=False)
272
272
273 def copy(self):
273 def copy(self):
274 new = super().copy()
274 new = super().copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
276 return new
276 return new
277
277
278
278
279 @attr.s()
279 @attr.s()
280 class DataConfig(_Config):
280 class DataConfig(_Config):
281 """Hold configuration value about how the revlog data are read"""
281 """Hold configuration value about how the revlog data are read"""
282
282
283 # should we try to open the "pending" version of the revlog
283 # should we try to open the "pending" version of the revlog
284 try_pending = attr.ib(default=False)
284 try_pending = attr.ib(default=False)
285 # should we try to open the "splitted" version of the revlog
285 # should we try to open the "splitted" version of the revlog
286 try_split = attr.ib(default=False)
286 try_split = attr.ib(default=False)
287 # When True, indexfile should be opened with checkambig=True at writing,
287 # When True, indexfile should be opened with checkambig=True at writing,
288 # to avoid file stat ambiguity.
288 # to avoid file stat ambiguity.
289 check_ambig = attr.ib(default=False)
289 check_ambig = attr.ib(default=False)
290
290
291 # If true, use mmap instead of reading to deal with large index
291 # If true, use mmap instead of reading to deal with large index
292 mmap_large_index = attr.ib(default=False)
292 mmap_large_index = attr.ib(default=False)
293 # how much data is large
293 # how much data is large
294 mmap_index_threshold = attr.ib(default=None)
294 mmap_index_threshold = attr.ib(default=None)
295 # How much data to read and cache into the raw revlog data cache.
295 # How much data to read and cache into the raw revlog data cache.
296 chunk_cache_size = attr.ib(default=65536)
296 chunk_cache_size = attr.ib(default=65536)
297
297
298 # Allow sparse reading of the revlog data
298 # Allow sparse reading of the revlog data
299 with_sparse_read = attr.ib(default=False)
299 with_sparse_read = attr.ib(default=False)
300 # minimal density of a sparse read chunk
300 # minimal density of a sparse read chunk
301 sr_density_threshold = attr.ib(default=0.50)
301 sr_density_threshold = attr.ib(default=0.50)
302 # minimal size of data we skip when performing sparse read
302 # minimal size of data we skip when performing sparse read
303 sr_min_gap_size = attr.ib(default=262144)
303 sr_min_gap_size = attr.ib(default=262144)
304
304
305 # are delta encoded against arbitrary bases.
305 # are delta encoded against arbitrary bases.
306 generaldelta = attr.ib(default=False)
306 generaldelta = attr.ib(default=False)
307
307
308
308
309 @attr.s()
309 @attr.s()
310 class DeltaConfig(_Config):
310 class DeltaConfig(_Config):
311 """Hold configuration value about how new delta are computed
311 """Hold configuration value about how new delta are computed
312
312
313 Some attributes are duplicated from DataConfig to help havign each object
313 Some attributes are duplicated from DataConfig to help havign each object
314 self contained.
314 self contained.
315 """
315 """
316
316
317 # can delta be encoded against arbitrary bases.
317 # can delta be encoded against arbitrary bases.
318 general_delta = attr.ib(default=False)
318 general_delta = attr.ib(default=False)
319 # Allow sparse writing of the revlog data
319 # Allow sparse writing of the revlog data
320 sparse_revlog = attr.ib(default=False)
320 sparse_revlog = attr.ib(default=False)
321 # maximum length of a delta chain
321 # maximum length of a delta chain
322 max_chain_len = attr.ib(default=None)
322 max_chain_len = attr.ib(default=None)
323 # Maximum distance between delta chain base start and end
323 # Maximum distance between delta chain base start and end
324 max_deltachain_span = attr.ib(default=-1)
324 max_deltachain_span = attr.ib(default=-1)
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 # compression for the data content.
326 # compression for the data content.
327 upper_bound_comp = attr.ib(default=None)
327 upper_bound_comp = attr.ib(default=None)
328 # Should we try a delta against both parent
328 # Should we try a delta against both parent
329 delta_both_parents = attr.ib(default=True)
329 delta_both_parents = attr.ib(default=True)
330 # Test delta base candidate group by chunk of this maximal size.
330 # Test delta base candidate group by chunk of this maximal size.
331 candidate_group_chunk_size = attr.ib(default=0)
331 candidate_group_chunk_size = attr.ib(default=0)
332 # Should we display debug information about delta computation
332 # Should we display debug information about delta computation
333 debug_delta = attr.ib(default=False)
333 debug_delta = attr.ib(default=False)
334 # trust incoming delta by default
334 # trust incoming delta by default
335 lazy_delta = attr.ib(default=True)
335 lazy_delta = attr.ib(default=True)
336 # trust the base of incoming delta by default
336 # trust the base of incoming delta by default
337 lazy_delta_base = attr.ib(default=False)
337 lazy_delta_base = attr.ib(default=False)
338
338
339
339
340 class _InnerRevlog:
340 class _InnerRevlog:
341 """An inner layer of the revlog object
341 """An inner layer of the revlog object
342
342
343 That layer exist to be able to delegate some operation to Rust, its
343 That layer exist to be able to delegate some operation to Rust, its
344 boundaries are arbitrary and based on what we can delegate to Rust.
344 boundaries are arbitrary and based on what we can delegate to Rust.
345 """
345 """
346
346
347 def __init__(
347 def __init__(
348 self,
348 self,
349 opener,
349 opener,
350 index,
350 index,
351 index_file,
351 index_file,
352 data_file,
352 data_file,
353 sidedata_file,
353 sidedata_file,
354 inline,
354 inline,
355 data_config,
355 data_config,
356 delta_config,
356 delta_config,
357 feature_config,
357 feature_config,
358 chunk_cache,
358 chunk_cache,
359 default_compression_header,
359 default_compression_header,
360 ):
360 ):
361 self.opener = opener
361 self.opener = opener
362 self.index = index
362 self.index = index
363
363
364 self.__index_file = index_file
364 self.__index_file = index_file
365 self.data_file = data_file
365 self.data_file = data_file
366 self.sidedata_file = sidedata_file
366 self.sidedata_file = sidedata_file
367 self.inline = inline
367 self.inline = inline
368 self.data_config = data_config
368 self.data_config = data_config
369 self.delta_config = delta_config
369 self.delta_config = delta_config
370 self.feature_config = feature_config
370 self.feature_config = feature_config
371
371
372 self._default_compression_header = default_compression_header
372 self._default_compression_header = default_compression_header
373
373
374 # index
374 # index
375
375
376 # 3-tuple of file handles being used for active writing.
376 # 3-tuple of file handles being used for active writing.
377 self._writinghandles = None
377 self._writinghandles = None
378
378
379 self._segmentfile = randomaccessfile.randomaccessfile(
379 self._segmentfile = randomaccessfile.randomaccessfile(
380 self.opener,
380 self.opener,
381 (self.index_file if self.inline else self.data_file),
381 (self.index_file if self.inline else self.data_file),
382 self.data_config.chunk_cache_size,
382 self.data_config.chunk_cache_size,
383 chunk_cache,
383 chunk_cache,
384 )
384 )
385 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
385 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
386 self.opener,
386 self.opener,
387 self.sidedata_file,
387 self.sidedata_file,
388 self.data_config.chunk_cache_size,
388 self.data_config.chunk_cache_size,
389 )
389 )
390
390
391 # revlog header -> revlog compressor
391 # revlog header -> revlog compressor
392 self._decompressors = {}
392 self._decompressors = {}
393 # 3-tuple of (node, rev, text) for a raw revision.
393 # 3-tuple of (node, rev, text) for a raw revision.
394 self._revisioncache = None
394 self._revisioncache = None
395
395
396 @property
396 @property
397 def index_file(self):
397 def index_file(self):
398 return self.__index_file
398 return self.__index_file
399
399
400 @index_file.setter
400 @index_file.setter
401 def index_file(self, new_index_file):
401 def index_file(self, new_index_file):
402 self.__index_file = new_index_file
402 self.__index_file = new_index_file
403 if self.inline:
403 if self.inline:
404 self._segmentfile.filename = new_index_file
404 self._segmentfile.filename = new_index_file
405
405
406 def __len__(self):
406 def __len__(self):
407 return len(self.index)
407 return len(self.index)
408
408
409 # Derived from index values.
409 # Derived from index values.
410
410
411 def start(self, rev):
411 def start(self, rev):
412 """the offset of the data chunk for this revision"""
412 """the offset of the data chunk for this revision"""
413 return int(self.index[rev][0] >> 16)
413 return int(self.index[rev][0] >> 16)
414
414
415 def length(self, rev):
415 def length(self, rev):
416 """the length of the data chunk for this revision"""
416 """the length of the data chunk for this revision"""
417 return self.index[rev][1]
417 return self.index[rev][1]
418
418
419 def end(self, rev):
419 def end(self, rev):
420 """the end of the data chunk for this revision"""
420 """the end of the data chunk for this revision"""
421 return self.start(rev) + self.length(rev)
421 return self.start(rev) + self.length(rev)
422
422
423 def deltaparent(self, rev):
423 def deltaparent(self, rev):
424 """return deltaparent of the given revision"""
424 """return deltaparent of the given revision"""
425 base = self.index[rev][3]
425 base = self.index[rev][3]
426 if base == rev:
426 if base == rev:
427 return nullrev
427 return nullrev
428 elif self.delta_config.general_delta:
428 elif self.delta_config.general_delta:
429 return base
429 return base
430 else:
430 else:
431 return rev - 1
431 return rev - 1
432
432
433 def issnapshot(self, rev):
433 def issnapshot(self, rev):
434 """tells whether rev is a snapshot"""
434 """tells whether rev is a snapshot"""
435 if not self.delta_config.sparse_revlog:
435 if not self.delta_config.sparse_revlog:
436 return self.deltaparent(rev) == nullrev
436 return self.deltaparent(rev) == nullrev
437 elif hasattr(self.index, 'issnapshot'):
437 elif hasattr(self.index, 'issnapshot'):
438 # directly assign the method to cache the testing and access
438 # directly assign the method to cache the testing and access
439 self.issnapshot = self.index.issnapshot
439 self.issnapshot = self.index.issnapshot
440 return self.issnapshot(rev)
440 return self.issnapshot(rev)
441 if rev == nullrev:
441 if rev == nullrev:
442 return True
442 return True
443 entry = self.index[rev]
443 entry = self.index[rev]
444 base = entry[3]
444 base = entry[3]
445 if base == rev:
445 if base == rev:
446 return True
446 return True
447 if base == nullrev:
447 if base == nullrev:
448 return True
448 return True
449 p1 = entry[5]
449 p1 = entry[5]
450 while self.length(p1) == 0:
450 while self.length(p1) == 0:
451 b = self.deltaparent(p1)
451 b = self.deltaparent(p1)
452 if b == p1:
452 if b == p1:
453 break
453 break
454 p1 = b
454 p1 = b
455 p2 = entry[6]
455 p2 = entry[6]
456 while self.length(p2) == 0:
456 while self.length(p2) == 0:
457 b = self.deltaparent(p2)
457 b = self.deltaparent(p2)
458 if b == p2:
458 if b == p2:
459 break
459 break
460 p2 = b
460 p2 = b
461 if base == p1 or base == p2:
461 if base == p1 or base == p2:
462 return False
462 return False
463 return self.issnapshot(base)
463 return self.issnapshot(base)
464
464
465 def _deltachain(self, rev, stoprev=None):
465 def _deltachain(self, rev, stoprev=None):
466 """Obtain the delta chain for a revision.
466 """Obtain the delta chain for a revision.
467
467
468 ``stoprev`` specifies a revision to stop at. If not specified, we
468 ``stoprev`` specifies a revision to stop at. If not specified, we
469 stop at the base of the chain.
469 stop at the base of the chain.
470
470
471 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
471 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
472 revs in ascending order and ``stopped`` is a bool indicating whether
472 revs in ascending order and ``stopped`` is a bool indicating whether
473 ``stoprev`` was hit.
473 ``stoprev`` was hit.
474 """
474 """
475 generaldelta = self.delta_config.general_delta
475 generaldelta = self.delta_config.general_delta
476 # Try C implementation.
476 # Try C implementation.
477 try:
477 try:
478 return self.index.deltachain(rev, stoprev, generaldelta)
478 return self.index.deltachain(rev, stoprev, generaldelta)
479 except AttributeError:
479 except AttributeError:
480 pass
480 pass
481
481
482 chain = []
482 chain = []
483
483
484 # Alias to prevent attribute lookup in tight loop.
484 # Alias to prevent attribute lookup in tight loop.
485 index = self.index
485 index = self.index
486
486
487 iterrev = rev
487 iterrev = rev
488 e = index[iterrev]
488 e = index[iterrev]
489 while iterrev != e[3] and iterrev != stoprev:
489 while iterrev != e[3] and iterrev != stoprev:
490 chain.append(iterrev)
490 chain.append(iterrev)
491 if generaldelta:
491 if generaldelta:
492 iterrev = e[3]
492 iterrev = e[3]
493 else:
493 else:
494 iterrev -= 1
494 iterrev -= 1
495 e = index[iterrev]
495 e = index[iterrev]
496
496
497 if iterrev == stoprev:
497 if iterrev == stoprev:
498 stopped = True
498 stopped = True
499 else:
499 else:
500 chain.append(iterrev)
500 chain.append(iterrev)
501 stopped = False
501 stopped = False
502
502
503 chain.reverse()
503 chain.reverse()
504 return chain, stopped
504 return chain, stopped
505
505
506 @util.propertycache
506 @util.propertycache
507 def _compressor(self):
507 def _compressor(self):
508 engine = util.compengines[self.feature_config.compression_engine]
508 engine = util.compengines[self.feature_config.compression_engine]
509 return engine.revlogcompressor(
509 return engine.revlogcompressor(
510 self.feature_config.compression_engine_options
510 self.feature_config.compression_engine_options
511 )
511 )
512
512
513 @util.propertycache
513 @util.propertycache
514 def _decompressor(self):
514 def _decompressor(self):
515 """the default decompressor"""
515 """the default decompressor"""
516 if self._default_compression_header is None:
516 if self._default_compression_header is None:
517 return None
517 return None
518 t = self._default_compression_header
518 t = self._default_compression_header
519 c = self._get_decompressor(t)
519 c = self._get_decompressor(t)
520 return c.decompress
520 return c.decompress
521
521
522 def _get_decompressor(self, t):
522 def _get_decompressor(self, t):
523 try:
523 try:
524 compressor = self._decompressors[t]
524 compressor = self._decompressors[t]
525 except KeyError:
525 except KeyError:
526 try:
526 try:
527 engine = util.compengines.forrevlogheader(t)
527 engine = util.compengines.forrevlogheader(t)
528 compressor = engine.revlogcompressor(
528 compressor = engine.revlogcompressor(
529 self.feature_config.compression_engine_options
529 self.feature_config.compression_engine_options
530 )
530 )
531 self._decompressors[t] = compressor
531 self._decompressors[t] = compressor
532 except KeyError:
532 except KeyError:
533 raise error.RevlogError(
533 raise error.RevlogError(
534 _(b'unknown compression type %s') % binascii.hexlify(t)
534 _(b'unknown compression type %s') % binascii.hexlify(t)
535 )
535 )
536 return compressor
536 return compressor
537
537
538 def compress(self, data):
538 def compress(self, data):
539 """Generate a possibly-compressed representation of data."""
539 """Generate a possibly-compressed representation of data."""
540 if not data:
540 if not data:
541 return b'', data
541 return b'', data
542
542
543 compressed = self._compressor.compress(data)
543 compressed = self._compressor.compress(data)
544
544
545 if compressed:
545 if compressed:
546 # The revlog compressor added the header in the returned data.
546 # The revlog compressor added the header in the returned data.
547 return b'', compressed
547 return b'', compressed
548
548
549 if data[0:1] == b'\0':
549 if data[0:1] == b'\0':
550 return b'', data
550 return b'', data
551 return b'u', data
551 return b'u', data
552
552
553 def decompress(self, data):
553 def decompress(self, data):
554 """Decompress a revlog chunk.
554 """Decompress a revlog chunk.
555
555
556 The chunk is expected to begin with a header identifying the
556 The chunk is expected to begin with a header identifying the
557 format type so it can be routed to an appropriate decompressor.
557 format type so it can be routed to an appropriate decompressor.
558 """
558 """
559 if not data:
559 if not data:
560 return data
560 return data
561
561
562 # Revlogs are read much more frequently than they are written and many
562 # Revlogs are read much more frequently than they are written and many
563 # chunks only take microseconds to decompress, so performance is
563 # chunks only take microseconds to decompress, so performance is
564 # important here.
564 # important here.
565 #
565 #
566 # We can make a few assumptions about revlogs:
566 # We can make a few assumptions about revlogs:
567 #
567 #
568 # 1) the majority of chunks will be compressed (as opposed to inline
568 # 1) the majority of chunks will be compressed (as opposed to inline
569 # raw data).
569 # raw data).
570 # 2) decompressing *any* data will likely by at least 10x slower than
570 # 2) decompressing *any* data will likely by at least 10x slower than
571 # returning raw inline data.
571 # returning raw inline data.
572 # 3) we want to prioritize common and officially supported compression
572 # 3) we want to prioritize common and officially supported compression
573 # engines
573 # engines
574 #
574 #
575 # It follows that we want to optimize for "decompress compressed data
575 # It follows that we want to optimize for "decompress compressed data
576 # when encoded with common and officially supported compression engines"
576 # when encoded with common and officially supported compression engines"
577 # case over "raw data" and "data encoded by less common or non-official
577 # case over "raw data" and "data encoded by less common or non-official
578 # compression engines." That is why we have the inline lookup first
578 # compression engines." That is why we have the inline lookup first
579 # followed by the compengines lookup.
579 # followed by the compengines lookup.
580 #
580 #
581 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
581 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
582 # compressed chunks. And this matters for changelog and manifest reads.
582 # compressed chunks. And this matters for changelog and manifest reads.
583 t = data[0:1]
583 t = data[0:1]
584
584
585 if t == b'x':
585 if t == b'x':
586 try:
586 try:
587 return _zlibdecompress(data)
587 return _zlibdecompress(data)
588 except zlib.error as e:
588 except zlib.error as e:
589 raise error.RevlogError(
589 raise error.RevlogError(
590 _(b'revlog decompress error: %s')
590 _(b'revlog decompress error: %s')
591 % stringutil.forcebytestr(e)
591 % stringutil.forcebytestr(e)
592 )
592 )
593 # '\0' is more common than 'u' so it goes first.
593 # '\0' is more common than 'u' so it goes first.
594 elif t == b'\0':
594 elif t == b'\0':
595 return data
595 return data
596 elif t == b'u':
596 elif t == b'u':
597 return util.buffer(data, 1)
597 return util.buffer(data, 1)
598
598
599 compressor = self._get_decompressor(t)
599 compressor = self._get_decompressor(t)
600
600
601 return compressor.decompress(data)
601 return compressor.decompress(data)
602
602
603 @contextlib.contextmanager
603 @contextlib.contextmanager
604 def reading(self):
604 def reading(self):
605 """Context manager that keeps data and sidedata files open for reading"""
605 """Context manager that keeps data and sidedata files open for reading"""
606 if len(self.index) == 0:
606 if len(self.index) == 0:
607 yield # nothing to be read
607 yield # nothing to be read
608 else:
608 else:
609 with self._segmentfile.reading():
609 with self._segmentfile.reading():
610 with self._segmentfile_sidedata.reading():
610 with self._segmentfile_sidedata.reading():
611 yield
611 yield
612
612
613 @property
613 @property
614 def is_writing(self):
614 def is_writing(self):
615 """True is a writing context is open"""
615 """True is a writing context is open"""
616 return self._writinghandles is not None
616 return self._writinghandles is not None
617
617
618 @contextlib.contextmanager
618 @contextlib.contextmanager
619 def writing(self, transaction, data_end=None, sidedata_end=None):
619 def writing(self, transaction, data_end=None, sidedata_end=None):
620 """Open the revlog files for writing
620 """Open the revlog files for writing
621
621
622 Add content to a revlog should be done within such context.
622 Add content to a revlog should be done within such context.
623 """
623 """
624 if self.is_writing:
624 if self.is_writing:
625 yield
625 yield
626 else:
626 else:
627 ifh = dfh = sdfh = None
627 ifh = dfh = sdfh = None
628 try:
628 try:
629 r = len(self.index)
629 r = len(self.index)
630 # opening the data file.
630 # opening the data file.
631 dsize = 0
631 dsize = 0
632 if r:
632 if r:
633 dsize = self.end(r - 1)
633 dsize = self.end(r - 1)
634 dfh = None
634 dfh = None
635 if not self.inline:
635 if not self.inline:
636 try:
636 try:
637 dfh = self.opener(self.data_file, mode=b"r+")
637 dfh = self.opener(self.data_file, mode=b"r+")
638 if data_end is None:
638 if data_end is None:
639 dfh.seek(0, os.SEEK_END)
639 dfh.seek(0, os.SEEK_END)
640 else:
640 else:
641 dfh.seek(data_end, os.SEEK_SET)
641 dfh.seek(data_end, os.SEEK_SET)
642 except FileNotFoundError:
642 except FileNotFoundError:
643 dfh = self.opener(self.data_file, mode=b"w+")
643 dfh = self.opener(self.data_file, mode=b"w+")
644 transaction.add(self.data_file, dsize)
644 transaction.add(self.data_file, dsize)
645 if self.sidedata_file is not None:
645 if self.sidedata_file is not None:
646 assert sidedata_end is not None
646 assert sidedata_end is not None
647 # revlog-v2 does not inline, help Pytype
647 # revlog-v2 does not inline, help Pytype
648 assert dfh is not None
648 assert dfh is not None
649 try:
649 try:
650 sdfh = self.opener(self.sidedata_file, mode=b"r+")
650 sdfh = self.opener(self.sidedata_file, mode=b"r+")
651 dfh.seek(sidedata_end, os.SEEK_SET)
651 dfh.seek(sidedata_end, os.SEEK_SET)
652 except FileNotFoundError:
652 except FileNotFoundError:
653 sdfh = self.opener(self.sidedata_file, mode=b"w+")
653 sdfh = self.opener(self.sidedata_file, mode=b"w+")
654 transaction.add(self.sidedata_file, sidedata_end)
654 transaction.add(self.sidedata_file, sidedata_end)
655
655
656 # opening the index file.
656 # opening the index file.
657 isize = r * self.index.entry_size
657 isize = r * self.index.entry_size
658 ifh = self.__index_write_fp()
658 ifh = self.__index_write_fp()
659 if self.inline:
659 if self.inline:
660 transaction.add(self.index_file, dsize + isize)
660 transaction.add(self.index_file, dsize + isize)
661 else:
661 else:
662 transaction.add(self.index_file, isize)
662 transaction.add(self.index_file, isize)
663 # exposing all file handle for writing.
663 # exposing all file handle for writing.
664 self._writinghandles = (ifh, dfh, sdfh)
664 self._writinghandles = (ifh, dfh, sdfh)
665 self._segmentfile.writing_handle = ifh if self.inline else dfh
665 self._segmentfile.writing_handle = ifh if self.inline else dfh
666 self._segmentfile_sidedata.writing_handle = sdfh
666 self._segmentfile_sidedata.writing_handle = sdfh
667 yield
667 yield
668 finally:
668 finally:
669 self._writinghandles = None
669 self._writinghandles = None
670 self._segmentfile.writing_handle = None
670 self._segmentfile.writing_handle = None
671 self._segmentfile_sidedata.writing_handle = None
671 self._segmentfile_sidedata.writing_handle = None
672 if dfh is not None:
672 if dfh is not None:
673 dfh.close()
673 dfh.close()
674 if sdfh is not None:
674 if sdfh is not None:
675 sdfh.close()
675 sdfh.close()
676 # closing the index file last to avoid exposing referent to
676 # closing the index file last to avoid exposing referent to
677 # potential unflushed data content.
677 # potential unflushed data content.
678 if ifh is not None:
678 if ifh is not None:
679 ifh.close()
679 ifh.close()
680
680
681 def __index_write_fp(self, index_end=None):
681 def __index_write_fp(self, index_end=None):
682 """internal method to open the index file for writing
682 """internal method to open the index file for writing
683
683
684 You should not use this directly and use `_writing` instead
684 You should not use this directly and use `_writing` instead
685 """
685 """
686 try:
686 try:
687 f = self.opener(
687 f = self.opener(
688 self.index_file,
688 self.index_file,
689 mode=b"r+",
689 mode=b"r+",
690 checkambig=self.data_config.check_ambig,
690 checkambig=self.data_config.check_ambig,
691 )
691 )
692 if index_end is None:
692 if index_end is None:
693 f.seek(0, os.SEEK_END)
693 f.seek(0, os.SEEK_END)
694 else:
694 else:
695 f.seek(index_end, os.SEEK_SET)
695 f.seek(index_end, os.SEEK_SET)
696 return f
696 return f
697 except FileNotFoundError:
697 except FileNotFoundError:
698 return self.opener(
698 return self.opener(
699 self.index_file,
699 self.index_file,
700 mode=b"w+",
700 mode=b"w+",
701 checkambig=self.data_config.check_ambig,
701 checkambig=self.data_config.check_ambig,
702 )
702 )
703
703
704 def __index_new_fp(self):
704 def __index_new_fp(self):
705 """internal method to create a new index file for writing
705 """internal method to create a new index file for writing
706
706
707 You should not use this unless you are upgrading from inline revlog
707 You should not use this unless you are upgrading from inline revlog
708 """
708 """
709 return self.opener(
709 return self.opener(
710 self.index_file,
710 self.index_file,
711 mode=b"w",
711 mode=b"w",
712 checkambig=self.data_config.check_ambig,
712 checkambig=self.data_config.check_ambig,
713 atomictemp=True,
713 atomictemp=True,
714 )
714 )
715
715
716 def split_inline(self, tr, header, new_index_file_path=None):
716 def split_inline(self, tr, header, new_index_file_path=None):
717 """split the data of an inline revlog into an index and a data file"""
717 """split the data of an inline revlog into an index and a data file"""
718 existing_handles = False
718 existing_handles = False
719 if self._writinghandles is not None:
719 if self._writinghandles is not None:
720 existing_handles = True
720 existing_handles = True
721 fp = self._writinghandles[0]
721 fp = self._writinghandles[0]
722 fp.flush()
722 fp.flush()
723 fp.close()
723 fp.close()
724 # We can't use the cached file handle after close(). So prevent
724 # We can't use the cached file handle after close(). So prevent
725 # its usage.
725 # its usage.
726 self._writinghandles = None
726 self._writinghandles = None
727 self._segmentfile.writing_handle = None
727 self._segmentfile.writing_handle = None
728 # No need to deal with sidedata writing handle as it is only
728 # No need to deal with sidedata writing handle as it is only
729 # relevant with revlog-v2 which is never inline, not reaching
729 # relevant with revlog-v2 which is never inline, not reaching
730 # this code
730 # this code
731
731
732 new_dfh = self.opener(self.data_file, mode=b"w+")
732 new_dfh = self.opener(self.data_file, mode=b"w+")
733 new_dfh.truncate(0) # drop any potentially existing data
733 new_dfh.truncate(0) # drop any potentially existing data
734 try:
734 try:
735 with self.reading():
735 with self.reading():
736 for r in range(len(self.index)):
736 for r in range(len(self.index)):
737 new_dfh.write(self.get_segment_for_revs(r, r)[1])
737 new_dfh.write(self.get_segment_for_revs(r, r)[1])
738 new_dfh.flush()
738 new_dfh.flush()
739
739
740 if new_index_file_path is not None:
740 if new_index_file_path is not None:
741 self.index_file = new_index_file_path
741 self.index_file = new_index_file_path
742 with self.__index_new_fp() as fp:
742 with self.__index_new_fp() as fp:
743 self.inline = False
743 self.inline = False
744 for i in range(len(self.index)):
744 for i in range(len(self.index)):
745 e = self.index.entry_binary(i)
745 e = self.index.entry_binary(i)
746 if i == 0:
746 if i == 0:
747 packed_header = self.index.pack_header(header)
747 packed_header = self.index.pack_header(header)
748 e = packed_header + e
748 e = packed_header + e
749 fp.write(e)
749 fp.write(e)
750
750
751 # If we don't use side-write, the temp file replace the real
751 # If we don't use side-write, the temp file replace the real
752 # index when we exit the context manager
752 # index when we exit the context manager
753
753
754 self._segmentfile = randomaccessfile.randomaccessfile(
754 self._segmentfile = randomaccessfile.randomaccessfile(
755 self.opener,
755 self.opener,
756 self.data_file,
756 self.data_file,
757 self.data_config.chunk_cache_size,
757 self.data_config.chunk_cache_size,
758 )
758 )
759
759
760 if existing_handles:
760 if existing_handles:
761 # switched from inline to conventional reopen the index
761 # switched from inline to conventional reopen the index
762 ifh = self.__index_write_fp()
762 ifh = self.__index_write_fp()
763 self._writinghandles = (ifh, new_dfh, None)
763 self._writinghandles = (ifh, new_dfh, None)
764 self._segmentfile.writing_handle = new_dfh
764 self._segmentfile.writing_handle = new_dfh
765 new_dfh = None
765 new_dfh = None
766 # No need to deal with sidedata writing handle as it is only
766 # No need to deal with sidedata writing handle as it is only
767 # relevant with revlog-v2 which is never inline, not reaching
767 # relevant with revlog-v2 which is never inline, not reaching
768 # this code
768 # this code
769 finally:
769 finally:
770 if new_dfh is not None:
770 if new_dfh is not None:
771 new_dfh.close()
771 new_dfh.close()
772 return self.index_file
772 return self.index_file
773
773
774 def get_segment_for_revs(self, startrev, endrev):
774 def get_segment_for_revs(self, startrev, endrev):
775 """Obtain a segment of raw data corresponding to a range of revisions.
775 """Obtain a segment of raw data corresponding to a range of revisions.
776
776
777 Accepts the start and end revisions and an optional already-open
777 Accepts the start and end revisions and an optional already-open
778 file handle to be used for reading. If the file handle is read, its
778 file handle to be used for reading. If the file handle is read, its
779 seek position will not be preserved.
779 seek position will not be preserved.
780
780
781 Requests for data may be satisfied by a cache.
781 Requests for data may be satisfied by a cache.
782
782
783 Returns a 2-tuple of (offset, data) for the requested range of
783 Returns a 2-tuple of (offset, data) for the requested range of
784 revisions. Offset is the integer offset from the beginning of the
784 revisions. Offset is the integer offset from the beginning of the
785 revlog and data is a str or buffer of the raw byte data.
785 revlog and data is a str or buffer of the raw byte data.
786
786
787 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
787 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
788 to determine where each revision's data begins and ends.
788 to determine where each revision's data begins and ends.
789
789
790 API: we should consider making this a private part of the InnerRevlog
790 API: we should consider making this a private part of the InnerRevlog
791 at some point.
791 at some point.
792 """
792 """
793 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
793 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
794 # (functions are expensive).
794 # (functions are expensive).
795 index = self.index
795 index = self.index
796 istart = index[startrev]
796 istart = index[startrev]
797 start = int(istart[0] >> 16)
797 start = int(istart[0] >> 16)
798 if startrev == endrev:
798 if startrev == endrev:
799 end = start + istart[1]
799 end = start + istart[1]
800 else:
800 else:
801 iend = index[endrev]
801 iend = index[endrev]
802 end = int(iend[0] >> 16) + iend[1]
802 end = int(iend[0] >> 16) + iend[1]
803
803
804 if self.inline:
804 if self.inline:
805 start += (startrev + 1) * self.index.entry_size
805 start += (startrev + 1) * self.index.entry_size
806 end += (endrev + 1) * self.index.entry_size
806 end += (endrev + 1) * self.index.entry_size
807 length = end - start
807 length = end - start
808
808
809 return start, self._segmentfile.read_chunk(start, length)
809 return start, self._segmentfile.read_chunk(start, length)
810
810
811 def _chunk(self, rev):
811 def _chunk(self, rev):
812 """Obtain a single decompressed chunk for a revision.
812 """Obtain a single decompressed chunk for a revision.
813
813
814 Accepts an integer revision and an optional already-open file handle
814 Accepts an integer revision and an optional already-open file handle
815 to be used for reading. If used, the seek position of the file will not
815 to be used for reading. If used, the seek position of the file will not
816 be preserved.
816 be preserved.
817
817
818 Returns a str holding uncompressed data for the requested revision.
818 Returns a str holding uncompressed data for the requested revision.
819 """
819 """
820 compression_mode = self.index[rev][10]
820 compression_mode = self.index[rev][10]
821 data = self.get_segment_for_revs(rev, rev)[1]
821 data = self.get_segment_for_revs(rev, rev)[1]
822 if compression_mode == COMP_MODE_PLAIN:
822 if compression_mode == COMP_MODE_PLAIN:
823 return data
823 return data
824 elif compression_mode == COMP_MODE_DEFAULT:
824 elif compression_mode == COMP_MODE_DEFAULT:
825 return self._decompressor(data)
825 return self._decompressor(data)
826 elif compression_mode == COMP_MODE_INLINE:
826 elif compression_mode == COMP_MODE_INLINE:
827 return self.decompress(data)
827 return self.decompress(data)
828 else:
828 else:
829 msg = b'unknown compression mode %d'
829 msg = b'unknown compression mode %d'
830 msg %= compression_mode
830 msg %= compression_mode
831 raise error.RevlogError(msg)
831 raise error.RevlogError(msg)
832
832
833 def _chunks(self, revs, targetsize=None):
833 def _chunks(self, revs, targetsize=None):
834 """Obtain decompressed chunks for the specified revisions.
834 """Obtain decompressed chunks for the specified revisions.
835
835
836 Accepts an iterable of numeric revisions that are assumed to be in
836 Accepts an iterable of numeric revisions that are assumed to be in
837 ascending order. Also accepts an optional already-open file handle
837 ascending order. Also accepts an optional already-open file handle
838 to be used for reading. If used, the seek position of the file will
838 to be used for reading. If used, the seek position of the file will
839 not be preserved.
839 not be preserved.
840
840
841 This function is similar to calling ``self._chunk()`` multiple times,
841 This function is similar to calling ``self._chunk()`` multiple times,
842 but is faster.
842 but is faster.
843
843
844 Returns a list with decompressed data for each requested revision.
844 Returns a list with decompressed data for each requested revision.
845 """
845 """
846 if not revs:
846 if not revs:
847 return []
847 return []
848 start = self.start
848 start = self.start
849 length = self.length
849 length = self.length
850 inline = self.inline
850 inline = self.inline
851 iosize = self.index.entry_size
851 iosize = self.index.entry_size
852 buffer = util.buffer
852 buffer = util.buffer
853
853
854 l = []
854 l = []
855 ladd = l.append
855 ladd = l.append
856
856
857 if not self.data_config.with_sparse_read:
857 if not self.data_config.with_sparse_read:
858 slicedchunks = (revs,)
858 slicedchunks = (revs,)
859 else:
859 else:
860 slicedchunks = deltautil.slicechunk(
860 slicedchunks = deltautil.slicechunk(
861 self,
861 self,
862 revs,
862 revs,
863 targetsize=targetsize,
863 targetsize=targetsize,
864 )
864 )
865
865
866 for revschunk in slicedchunks:
866 for revschunk in slicedchunks:
867 firstrev = revschunk[0]
867 firstrev = revschunk[0]
868 # Skip trailing revisions with empty diff
868 # Skip trailing revisions with empty diff
869 for lastrev in revschunk[::-1]:
869 for lastrev in revschunk[::-1]:
870 if length(lastrev) != 0:
870 if length(lastrev) != 0:
871 break
871 break
872
872
873 try:
873 try:
874 offset, data = self.get_segment_for_revs(firstrev, lastrev)
874 offset, data = self.get_segment_for_revs(firstrev, lastrev)
875 except OverflowError:
875 except OverflowError:
876 # issue4215 - we can't cache a run of chunks greater than
876 # issue4215 - we can't cache a run of chunks greater than
877 # 2G on Windows
877 # 2G on Windows
878 return [self._chunk(rev) for rev in revschunk]
878 return [self._chunk(rev) for rev in revschunk]
879
879
880 decomp = self.decompress
880 decomp = self.decompress
881 # self._decompressor might be None, but will not be used in that case
881 # self._decompressor might be None, but will not be used in that case
882 def_decomp = self._decompressor
882 def_decomp = self._decompressor
883 for rev in revschunk:
883 for rev in revschunk:
884 chunkstart = start(rev)
884 chunkstart = start(rev)
885 if inline:
885 if inline:
886 chunkstart += (rev + 1) * iosize
886 chunkstart += (rev + 1) * iosize
887 chunklength = length(rev)
887 chunklength = length(rev)
888 comp_mode = self.index[rev][10]
888 comp_mode = self.index[rev][10]
889 c = buffer(data, chunkstart - offset, chunklength)
889 c = buffer(data, chunkstart - offset, chunklength)
890 if comp_mode == COMP_MODE_PLAIN:
890 if comp_mode == COMP_MODE_PLAIN:
891 ladd(c)
891 ladd(c)
892 elif comp_mode == COMP_MODE_INLINE:
892 elif comp_mode == COMP_MODE_INLINE:
893 ladd(decomp(c))
893 ladd(decomp(c))
894 elif comp_mode == COMP_MODE_DEFAULT:
894 elif comp_mode == COMP_MODE_DEFAULT:
895 ladd(def_decomp(c))
895 ladd(def_decomp(c))
896 else:
896 else:
897 msg = b'unknown compression mode %d'
897 msg = b'unknown compression mode %d'
898 msg %= comp_mode
898 msg %= comp_mode
899 raise error.RevlogError(msg)
899 raise error.RevlogError(msg)
900
900
901 return l
901 return l
902
902
903 def raw_text(self, node, rev):
903 def raw_text(self, node, rev):
904 """return the possibly unvalidated rawtext for a revision
904 """return the possibly unvalidated rawtext for a revision
905
905
906 returns (rev, rawtext, validated)
906 returns (rev, rawtext, validated)
907 """
907 """
908
908
909 # revision in the cache (could be useful to apply delta)
909 # revision in the cache (could be useful to apply delta)
910 cachedrev = None
910 cachedrev = None
911 # An intermediate text to apply deltas to
911 # An intermediate text to apply deltas to
912 basetext = None
912 basetext = None
913
913
914 # Check if we have the entry in cache
914 # Check if we have the entry in cache
915 # The cache entry looks like (node, rev, rawtext)
915 # The cache entry looks like (node, rev, rawtext)
916 if self._revisioncache:
916 if self._revisioncache:
917 cachedrev = self._revisioncache[1]
917 cachedrev = self._revisioncache[1]
918
918
919 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
919 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
920 if stopped:
920 if stopped:
921 basetext = self._revisioncache[2]
921 basetext = self._revisioncache[2]
922
922
923 # drop cache to save memory, the caller is expected to
923 # drop cache to save memory, the caller is expected to
924 # update self._inner._revisioncache after validating the text
924 # update self._inner._revisioncache after validating the text
925 self._revisioncache = None
925 self._revisioncache = None
926
926
927 targetsize = None
927 targetsize = None
928 rawsize = self.index[rev][2]
928 rawsize = self.index[rev][2]
929 if 0 <= rawsize:
929 if 0 <= rawsize:
930 targetsize = 4 * rawsize
930 targetsize = 4 * rawsize
931
931
932 bins = self._chunks(chain, targetsize=targetsize)
932 bins = self._chunks(chain, targetsize=targetsize)
933 if basetext is None:
933 if basetext is None:
934 basetext = bytes(bins[0])
934 basetext = bytes(bins[0])
935 bins = bins[1:]
935 bins = bins[1:]
936
936
937 rawtext = mdiff.patches(basetext, bins)
937 rawtext = mdiff.patches(basetext, bins)
938 del basetext # let us have a chance to free memory early
938 del basetext # let us have a chance to free memory early
939 return (rev, rawtext, False)
939 return (rev, rawtext, False)
940
940
941 def sidedata(self, rev, sidedata_end):
942 """Return the sidedata for a given revision number."""
943 index_entry = self.index[rev]
944 sidedata_offset = index_entry[8]
945 sidedata_size = index_entry[9]
946
947 if self.inline:
948 sidedata_offset += self.index.entry_size * (1 + rev)
949 if sidedata_size == 0:
950 return {}
951
952 if sidedata_end < sidedata_offset + sidedata_size:
953 filename = self.sidedata_file
954 end = sidedata_end
955 offset = sidedata_offset
956 length = sidedata_size
957 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
958 raise error.RevlogError(m)
959
960 comp_segment = self._segmentfile_sidedata.read_chunk(
961 sidedata_offset, sidedata_size
962 )
963
964 comp = self.index[rev][11]
965 if comp == COMP_MODE_PLAIN:
966 segment = comp_segment
967 elif comp == COMP_MODE_DEFAULT:
968 segment = self._decompressor(comp_segment)
969 elif comp == COMP_MODE_INLINE:
970 segment = self.decompress(comp_segment)
971 else:
972 msg = b'unknown compression mode %d'
973 msg %= comp
974 raise error.RevlogError(msg)
975
976 sidedata = sidedatautil.deserialize_sidedata(segment)
977 return sidedata
978
941
979
942 class revlog:
980 class revlog:
943 """
981 """
944 the underlying revision storage object
982 the underlying revision storage object
945
983
946 A revlog consists of two parts, an index and the revision data.
984 A revlog consists of two parts, an index and the revision data.
947
985
948 The index is a file with a fixed record size containing
986 The index is a file with a fixed record size containing
949 information on each revision, including its nodeid (hash), the
987 information on each revision, including its nodeid (hash), the
950 nodeids of its parents, the position and offset of its data within
988 nodeids of its parents, the position and offset of its data within
951 the data file, and the revision it's based on. Finally, each entry
989 the data file, and the revision it's based on. Finally, each entry
952 contains a linkrev entry that can serve as a pointer to external
990 contains a linkrev entry that can serve as a pointer to external
953 data.
991 data.
954
992
955 The revision data itself is a linear collection of data chunks.
993 The revision data itself is a linear collection of data chunks.
956 Each chunk represents a revision and is usually represented as a
994 Each chunk represents a revision and is usually represented as a
957 delta against the previous chunk. To bound lookup time, runs of
995 delta against the previous chunk. To bound lookup time, runs of
958 deltas are limited to about 2 times the length of the original
996 deltas are limited to about 2 times the length of the original
959 version data. This makes retrieval of a version proportional to
997 version data. This makes retrieval of a version proportional to
960 its size, or O(1) relative to the number of revisions.
998 its size, or O(1) relative to the number of revisions.
961
999
962 Both pieces of the revlog are written to in an append-only
1000 Both pieces of the revlog are written to in an append-only
963 fashion, which means we never need to rewrite a file to insert or
1001 fashion, which means we never need to rewrite a file to insert or
964 remove data, and can use some simple techniques to avoid the need
1002 remove data, and can use some simple techniques to avoid the need
965 for locking while reading.
1003 for locking while reading.
966
1004
967 If checkambig, indexfile is opened with checkambig=True at
1005 If checkambig, indexfile is opened with checkambig=True at
968 writing, to avoid file stat ambiguity.
1006 writing, to avoid file stat ambiguity.
969
1007
970 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1008 If mmaplargeindex is True, and an mmapindexthreshold is set, the
971 index will be mmapped rather than read if it is larger than the
1009 index will be mmapped rather than read if it is larger than the
972 configured threshold.
1010 configured threshold.
973
1011
974 If censorable is True, the revlog can have censored revisions.
1012 If censorable is True, the revlog can have censored revisions.
975
1013
976 If `upperboundcomp` is not None, this is the expected maximal gain from
1014 If `upperboundcomp` is not None, this is the expected maximal gain from
977 compression for the data content.
1015 compression for the data content.
978
1016
979 `concurrencychecker` is an optional function that receives 3 arguments: a
1017 `concurrencychecker` is an optional function that receives 3 arguments: a
980 file handle, a filename, and an expected position. It should check whether
1018 file handle, a filename, and an expected position. It should check whether
981 the current position in the file handle is valid, and log/warn/fail (by
1019 the current position in the file handle is valid, and log/warn/fail (by
982 raising).
1020 raising).
983
1021
984 See mercurial/revlogutils/contants.py for details about the content of an
1022 See mercurial/revlogutils/contants.py for details about the content of an
985 index entry.
1023 index entry.
986 """
1024 """
987
1025
988 _flagserrorclass = error.RevlogError
1026 _flagserrorclass = error.RevlogError
989
1027
990 @staticmethod
1028 @staticmethod
991 def is_inline_index(header_bytes):
1029 def is_inline_index(header_bytes):
992 """Determine if a revlog is inline from the initial bytes of the index"""
1030 """Determine if a revlog is inline from the initial bytes of the index"""
993 header = INDEX_HEADER.unpack(header_bytes)[0]
1031 header = INDEX_HEADER.unpack(header_bytes)[0]
994
1032
995 _format_flags = header & ~0xFFFF
1033 _format_flags = header & ~0xFFFF
996 _format_version = header & 0xFFFF
1034 _format_version = header & 0xFFFF
997
1035
998 features = FEATURES_BY_VERSION[_format_version]
1036 features = FEATURES_BY_VERSION[_format_version]
999 return features[b'inline'](_format_flags)
1037 return features[b'inline'](_format_flags)
1000
1038
1001 def __init__(
1039 def __init__(
1002 self,
1040 self,
1003 opener,
1041 opener,
1004 target,
1042 target,
1005 radix,
1043 radix,
1006 postfix=None, # only exist for `tmpcensored` now
1044 postfix=None, # only exist for `tmpcensored` now
1007 checkambig=False,
1045 checkambig=False,
1008 mmaplargeindex=False,
1046 mmaplargeindex=False,
1009 censorable=False,
1047 censorable=False,
1010 upperboundcomp=None,
1048 upperboundcomp=None,
1011 persistentnodemap=False,
1049 persistentnodemap=False,
1012 concurrencychecker=None,
1050 concurrencychecker=None,
1013 trypending=False,
1051 trypending=False,
1014 try_split=False,
1052 try_split=False,
1015 canonical_parent_order=True,
1053 canonical_parent_order=True,
1016 ):
1054 ):
1017 """
1055 """
1018 create a revlog object
1056 create a revlog object
1019
1057
1020 opener is a function that abstracts the file opening operation
1058 opener is a function that abstracts the file opening operation
1021 and can be used to implement COW semantics or the like.
1059 and can be used to implement COW semantics or the like.
1022
1060
1023 `target`: a (KIND, ID) tuple that identify the content stored in
1061 `target`: a (KIND, ID) tuple that identify the content stored in
1024 this revlog. It help the rest of the code to understand what the revlog
1062 this revlog. It help the rest of the code to understand what the revlog
1025 is about without having to resort to heuristic and index filename
1063 is about without having to resort to heuristic and index filename
1026 analysis. Note: that this must be reliably be set by normal code, but
1064 analysis. Note: that this must be reliably be set by normal code, but
1027 that test, debug, or performance measurement code might not set this to
1065 that test, debug, or performance measurement code might not set this to
1028 accurate value.
1066 accurate value.
1029 """
1067 """
1030
1068
1031 self.radix = radix
1069 self.radix = radix
1032
1070
1033 self._docket_file = None
1071 self._docket_file = None
1034 self._indexfile = None
1072 self._indexfile = None
1035 self._datafile = None
1073 self._datafile = None
1036 self._sidedatafile = None
1074 self._sidedatafile = None
1037 self._nodemap_file = None
1075 self._nodemap_file = None
1038 self.postfix = postfix
1076 self.postfix = postfix
1039 self._trypending = trypending
1077 self._trypending = trypending
1040 self._try_split = try_split
1078 self._try_split = try_split
1041 self.opener = opener
1079 self.opener = opener
1042 if persistentnodemap:
1080 if persistentnodemap:
1043 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1081 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1044
1082
1045 assert target[0] in ALL_KINDS
1083 assert target[0] in ALL_KINDS
1046 assert len(target) == 2
1084 assert len(target) == 2
1047 self.target = target
1085 self.target = target
1048 if b'feature-config' in self.opener.options:
1086 if b'feature-config' in self.opener.options:
1049 self.feature_config = self.opener.options[b'feature-config'].copy()
1087 self.feature_config = self.opener.options[b'feature-config'].copy()
1050 else:
1088 else:
1051 self.feature_config = FeatureConfig()
1089 self.feature_config = FeatureConfig()
1052 self.feature_config.censorable = censorable
1090 self.feature_config.censorable = censorable
1053 self.feature_config.canonical_parent_order = canonical_parent_order
1091 self.feature_config.canonical_parent_order = canonical_parent_order
1054 if b'data-config' in self.opener.options:
1092 if b'data-config' in self.opener.options:
1055 self.data_config = self.opener.options[b'data-config'].copy()
1093 self.data_config = self.opener.options[b'data-config'].copy()
1056 else:
1094 else:
1057 self.data_config = DataConfig()
1095 self.data_config = DataConfig()
1058 self.data_config.check_ambig = checkambig
1096 self.data_config.check_ambig = checkambig
1059 self.data_config.mmap_large_index = mmaplargeindex
1097 self.data_config.mmap_large_index = mmaplargeindex
1060 if b'delta-config' in self.opener.options:
1098 if b'delta-config' in self.opener.options:
1061 self.delta_config = self.opener.options[b'delta-config'].copy()
1099 self.delta_config = self.opener.options[b'delta-config'].copy()
1062 else:
1100 else:
1063 self.delta_config = DeltaConfig()
1101 self.delta_config = DeltaConfig()
1064 self.delta_config.upper_bound_comp = upperboundcomp
1102 self.delta_config.upper_bound_comp = upperboundcomp
1065
1103
1066 # Maps rev to chain base rev.
1104 # Maps rev to chain base rev.
1067 self._chainbasecache = util.lrucachedict(100)
1105 self._chainbasecache = util.lrucachedict(100)
1068
1106
1069 self.index = None
1107 self.index = None
1070 self._docket = None
1108 self._docket = None
1071 self._nodemap_docket = None
1109 self._nodemap_docket = None
1072 # Mapping of partial identifiers to full nodes.
1110 # Mapping of partial identifiers to full nodes.
1073 self._pcache = {}
1111 self._pcache = {}
1074
1112
1075 # other optionnals features
1113 # other optionnals features
1076
1114
1077 # Make copy of flag processors so each revlog instance can support
1115 # Make copy of flag processors so each revlog instance can support
1078 # custom flags.
1116 # custom flags.
1079 self._flagprocessors = dict(flagutil.flagprocessors)
1117 self._flagprocessors = dict(flagutil.flagprocessors)
1080 # prevent nesting of addgroup
1118 # prevent nesting of addgroup
1081 self._adding_group = None
1119 self._adding_group = None
1082
1120
1083 chunk_cache = self._loadindex()
1121 chunk_cache = self._loadindex()
1084 self._load_inner(chunk_cache)
1122 self._load_inner(chunk_cache)
1085 self._concurrencychecker = concurrencychecker
1123 self._concurrencychecker = concurrencychecker
1086
1124
1087 @property
1125 @property
1088 def _generaldelta(self):
1126 def _generaldelta(self):
1089 """temporary compatibility proxy"""
1127 """temporary compatibility proxy"""
1090 util.nouideprecwarn(
1128 util.nouideprecwarn(
1091 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1129 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1092 )
1130 )
1093 return self.delta_config.general_delta
1131 return self.delta_config.general_delta
1094
1132
1095 @property
1133 @property
1096 def _checkambig(self):
1134 def _checkambig(self):
1097 """temporary compatibility proxy"""
1135 """temporary compatibility proxy"""
1098 util.nouideprecwarn(
1136 util.nouideprecwarn(
1099 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1137 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1100 )
1138 )
1101 return self.data_config.check_ambig
1139 return self.data_config.check_ambig
1102
1140
1103 @property
1141 @property
1104 def _mmaplargeindex(self):
1142 def _mmaplargeindex(self):
1105 """temporary compatibility proxy"""
1143 """temporary compatibility proxy"""
1106 util.nouideprecwarn(
1144 util.nouideprecwarn(
1107 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1145 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1108 )
1146 )
1109 return self.data_config.mmap_large_index
1147 return self.data_config.mmap_large_index
1110
1148
1111 @property
1149 @property
1112 def _censorable(self):
1150 def _censorable(self):
1113 """temporary compatibility proxy"""
1151 """temporary compatibility proxy"""
1114 util.nouideprecwarn(
1152 util.nouideprecwarn(
1115 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1153 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1116 )
1154 )
1117 return self.feature_config.censorable
1155 return self.feature_config.censorable
1118
1156
1119 @property
1157 @property
1120 def _chunkcachesize(self):
1158 def _chunkcachesize(self):
1121 """temporary compatibility proxy"""
1159 """temporary compatibility proxy"""
1122 util.nouideprecwarn(
1160 util.nouideprecwarn(
1123 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1161 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1124 )
1162 )
1125 return self.data_config.chunk_cache_size
1163 return self.data_config.chunk_cache_size
1126
1164
1127 @property
1165 @property
1128 def _maxchainlen(self):
1166 def _maxchainlen(self):
1129 """temporary compatibility proxy"""
1167 """temporary compatibility proxy"""
1130 util.nouideprecwarn(
1168 util.nouideprecwarn(
1131 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1169 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1132 )
1170 )
1133 return self.delta_config.max_chain_len
1171 return self.delta_config.max_chain_len
1134
1172
1135 @property
1173 @property
1136 def _deltabothparents(self):
1174 def _deltabothparents(self):
1137 """temporary compatibility proxy"""
1175 """temporary compatibility proxy"""
1138 util.nouideprecwarn(
1176 util.nouideprecwarn(
1139 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1177 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1140 )
1178 )
1141 return self.delta_config.delta_both_parents
1179 return self.delta_config.delta_both_parents
1142
1180
1143 @property
1181 @property
1144 def _candidate_group_chunk_size(self):
1182 def _candidate_group_chunk_size(self):
1145 """temporary compatibility proxy"""
1183 """temporary compatibility proxy"""
1146 util.nouideprecwarn(
1184 util.nouideprecwarn(
1147 b"use revlog.delta_config.candidate_group_chunk_size",
1185 b"use revlog.delta_config.candidate_group_chunk_size",
1148 b"6.6",
1186 b"6.6",
1149 stacklevel=2,
1187 stacklevel=2,
1150 )
1188 )
1151 return self.delta_config.candidate_group_chunk_size
1189 return self.delta_config.candidate_group_chunk_size
1152
1190
1153 @property
1191 @property
1154 def _debug_delta(self):
1192 def _debug_delta(self):
1155 """temporary compatibility proxy"""
1193 """temporary compatibility proxy"""
1156 util.nouideprecwarn(
1194 util.nouideprecwarn(
1157 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1195 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1158 )
1196 )
1159 return self.delta_config.debug_delta
1197 return self.delta_config.debug_delta
1160
1198
1161 @property
1199 @property
1162 def _compengine(self):
1200 def _compengine(self):
1163 """temporary compatibility proxy"""
1201 """temporary compatibility proxy"""
1164 util.nouideprecwarn(
1202 util.nouideprecwarn(
1165 b"use revlog.feature_config.compression_engine",
1203 b"use revlog.feature_config.compression_engine",
1166 b"6.6",
1204 b"6.6",
1167 stacklevel=2,
1205 stacklevel=2,
1168 )
1206 )
1169 return self.feature_config.compression_engine
1207 return self.feature_config.compression_engine
1170
1208
1171 @property
1209 @property
1172 def upperboundcomp(self):
1210 def upperboundcomp(self):
1173 """temporary compatibility proxy"""
1211 """temporary compatibility proxy"""
1174 util.nouideprecwarn(
1212 util.nouideprecwarn(
1175 b"use revlog.delta_config.upper_bound_comp",
1213 b"use revlog.delta_config.upper_bound_comp",
1176 b"6.6",
1214 b"6.6",
1177 stacklevel=2,
1215 stacklevel=2,
1178 )
1216 )
1179 return self.delta_config.upper_bound_comp
1217 return self.delta_config.upper_bound_comp
1180
1218
1181 @property
1219 @property
1182 def _compengineopts(self):
1220 def _compengineopts(self):
1183 """temporary compatibility proxy"""
1221 """temporary compatibility proxy"""
1184 util.nouideprecwarn(
1222 util.nouideprecwarn(
1185 b"use revlog.feature_config.compression_engine_options",
1223 b"use revlog.feature_config.compression_engine_options",
1186 b"6.6",
1224 b"6.6",
1187 stacklevel=2,
1225 stacklevel=2,
1188 )
1226 )
1189 return self.feature_config.compression_engine_options
1227 return self.feature_config.compression_engine_options
1190
1228
1191 @property
1229 @property
1192 def _maxdeltachainspan(self):
1230 def _maxdeltachainspan(self):
1193 """temporary compatibility proxy"""
1231 """temporary compatibility proxy"""
1194 util.nouideprecwarn(
1232 util.nouideprecwarn(
1195 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1233 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1196 )
1234 )
1197 return self.delta_config.max_deltachain_span
1235 return self.delta_config.max_deltachain_span
1198
1236
1199 @property
1237 @property
1200 def _withsparseread(self):
1238 def _withsparseread(self):
1201 """temporary compatibility proxy"""
1239 """temporary compatibility proxy"""
1202 util.nouideprecwarn(
1240 util.nouideprecwarn(
1203 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1241 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1204 )
1242 )
1205 return self.data_config.with_sparse_read
1243 return self.data_config.with_sparse_read
1206
1244
1207 @property
1245 @property
1208 def _sparserevlog(self):
1246 def _sparserevlog(self):
1209 """temporary compatibility proxy"""
1247 """temporary compatibility proxy"""
1210 util.nouideprecwarn(
1248 util.nouideprecwarn(
1211 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1249 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1212 )
1250 )
1213 return self.delta_config.sparse_revlog
1251 return self.delta_config.sparse_revlog
1214
1252
1215 @property
1253 @property
1216 def hassidedata(self):
1254 def hassidedata(self):
1217 """temporary compatibility proxy"""
1255 """temporary compatibility proxy"""
1218 util.nouideprecwarn(
1256 util.nouideprecwarn(
1219 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1257 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1220 )
1258 )
1221 return self.feature_config.has_side_data
1259 return self.feature_config.has_side_data
1222
1260
1223 @property
1261 @property
1224 def _srdensitythreshold(self):
1262 def _srdensitythreshold(self):
1225 """temporary compatibility proxy"""
1263 """temporary compatibility proxy"""
1226 util.nouideprecwarn(
1264 util.nouideprecwarn(
1227 b"use revlog.data_config.sr_density_threshold",
1265 b"use revlog.data_config.sr_density_threshold",
1228 b"6.6",
1266 b"6.6",
1229 stacklevel=2,
1267 stacklevel=2,
1230 )
1268 )
1231 return self.data_config.sr_density_threshold
1269 return self.data_config.sr_density_threshold
1232
1270
1233 @property
1271 @property
1234 def _srmingapsize(self):
1272 def _srmingapsize(self):
1235 """temporary compatibility proxy"""
1273 """temporary compatibility proxy"""
1236 util.nouideprecwarn(
1274 util.nouideprecwarn(
1237 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1275 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1238 )
1276 )
1239 return self.data_config.sr_min_gap_size
1277 return self.data_config.sr_min_gap_size
1240
1278
1241 @property
1279 @property
1242 def _compute_rank(self):
1280 def _compute_rank(self):
1243 """temporary compatibility proxy"""
1281 """temporary compatibility proxy"""
1244 util.nouideprecwarn(
1282 util.nouideprecwarn(
1245 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1283 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1246 )
1284 )
1247 return self.feature_config.compute_rank
1285 return self.feature_config.compute_rank
1248
1286
1249 @property
1287 @property
1250 def canonical_parent_order(self):
1288 def canonical_parent_order(self):
1251 """temporary compatibility proxy"""
1289 """temporary compatibility proxy"""
1252 util.nouideprecwarn(
1290 util.nouideprecwarn(
1253 b"use revlog.feature_config.canonical_parent_order",
1291 b"use revlog.feature_config.canonical_parent_order",
1254 b"6.6",
1292 b"6.6",
1255 stacklevel=2,
1293 stacklevel=2,
1256 )
1294 )
1257 return self.feature_config.canonical_parent_order
1295 return self.feature_config.canonical_parent_order
1258
1296
1259 @property
1297 @property
1260 def _lazydelta(self):
1298 def _lazydelta(self):
1261 """temporary compatibility proxy"""
1299 """temporary compatibility proxy"""
1262 util.nouideprecwarn(
1300 util.nouideprecwarn(
1263 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1301 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1264 )
1302 )
1265 return self.delta_config.lazy_delta
1303 return self.delta_config.lazy_delta
1266
1304
1267 @property
1305 @property
1268 def _lazydeltabase(self):
1306 def _lazydeltabase(self):
1269 """temporary compatibility proxy"""
1307 """temporary compatibility proxy"""
1270 util.nouideprecwarn(
1308 util.nouideprecwarn(
1271 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1309 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1272 )
1310 )
1273 return self.delta_config.lazy_delta_base
1311 return self.delta_config.lazy_delta_base
1274
1312
1275 def _init_opts(self):
1313 def _init_opts(self):
1276 """process options (from above/config) to setup associated default revlog mode
1314 """process options (from above/config) to setup associated default revlog mode
1277
1315
1278 These values might be affected when actually reading on disk information.
1316 These values might be affected when actually reading on disk information.
1279
1317
1280 The relevant values are returned for use in _loadindex().
1318 The relevant values are returned for use in _loadindex().
1281
1319
1282 * newversionflags:
1320 * newversionflags:
1283 version header to use if we need to create a new revlog
1321 version header to use if we need to create a new revlog
1284
1322
1285 * mmapindexthreshold:
1323 * mmapindexthreshold:
1286 minimal index size for start to use mmap
1324 minimal index size for start to use mmap
1287
1325
1288 * force_nodemap:
1326 * force_nodemap:
1289 force the usage of a "development" version of the nodemap code
1327 force the usage of a "development" version of the nodemap code
1290 """
1328 """
1291 opts = self.opener.options
1329 opts = self.opener.options
1292
1330
1293 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1331 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1294 new_header = CHANGELOGV2
1332 new_header = CHANGELOGV2
1295 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1333 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1296 self.feature_config.compute_rank = compute_rank
1334 self.feature_config.compute_rank = compute_rank
1297 elif b'revlogv2' in opts:
1335 elif b'revlogv2' in opts:
1298 new_header = REVLOGV2
1336 new_header = REVLOGV2
1299 elif b'revlogv1' in opts:
1337 elif b'revlogv1' in opts:
1300 new_header = REVLOGV1 | FLAG_INLINE_DATA
1338 new_header = REVLOGV1 | FLAG_INLINE_DATA
1301 if b'generaldelta' in opts:
1339 if b'generaldelta' in opts:
1302 new_header |= FLAG_GENERALDELTA
1340 new_header |= FLAG_GENERALDELTA
1303 elif b'revlogv0' in self.opener.options:
1341 elif b'revlogv0' in self.opener.options:
1304 new_header = REVLOGV0
1342 new_header = REVLOGV0
1305 else:
1343 else:
1306 new_header = REVLOG_DEFAULT_VERSION
1344 new_header = REVLOG_DEFAULT_VERSION
1307
1345
1308 mmapindexthreshold = None
1346 mmapindexthreshold = None
1309 if self.data_config.mmap_large_index:
1347 if self.data_config.mmap_large_index:
1310 mmapindexthreshold = self.data_config.mmap_index_threshold
1348 mmapindexthreshold = self.data_config.mmap_index_threshold
1311 if self.feature_config.enable_ellipsis:
1349 if self.feature_config.enable_ellipsis:
1312 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1350 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1313
1351
1314 # revlog v0 doesn't have flag processors
1352 # revlog v0 doesn't have flag processors
1315 for flag, processor in opts.get(b'flagprocessors', {}).items():
1353 for flag, processor in opts.get(b'flagprocessors', {}).items():
1316 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1354 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1317
1355
1318 chunk_cache_size = self.data_config.chunk_cache_size
1356 chunk_cache_size = self.data_config.chunk_cache_size
1319 if chunk_cache_size <= 0:
1357 if chunk_cache_size <= 0:
1320 raise error.RevlogError(
1358 raise error.RevlogError(
1321 _(b'revlog chunk cache size %r is not greater than 0')
1359 _(b'revlog chunk cache size %r is not greater than 0')
1322 % chunk_cache_size
1360 % chunk_cache_size
1323 )
1361 )
1324 elif chunk_cache_size & (chunk_cache_size - 1):
1362 elif chunk_cache_size & (chunk_cache_size - 1):
1325 raise error.RevlogError(
1363 raise error.RevlogError(
1326 _(b'revlog chunk cache size %r is not a power of 2')
1364 _(b'revlog chunk cache size %r is not a power of 2')
1327 % chunk_cache_size
1365 % chunk_cache_size
1328 )
1366 )
1329 force_nodemap = opts.get(b'devel-force-nodemap', False)
1367 force_nodemap = opts.get(b'devel-force-nodemap', False)
1330 return new_header, mmapindexthreshold, force_nodemap
1368 return new_header, mmapindexthreshold, force_nodemap
1331
1369
1332 def _get_data(self, filepath, mmap_threshold, size=None):
1370 def _get_data(self, filepath, mmap_threshold, size=None):
1333 """return a file content with or without mmap
1371 """return a file content with or without mmap
1334
1372
1335 If the file is missing return the empty string"""
1373 If the file is missing return the empty string"""
1336 try:
1374 try:
1337 with self.opener(filepath) as fp:
1375 with self.opener(filepath) as fp:
1338 if mmap_threshold is not None:
1376 if mmap_threshold is not None:
1339 file_size = self.opener.fstat(fp).st_size
1377 file_size = self.opener.fstat(fp).st_size
1340 if file_size >= mmap_threshold:
1378 if file_size >= mmap_threshold:
1341 if size is not None:
1379 if size is not None:
1342 # avoid potentiel mmap crash
1380 # avoid potentiel mmap crash
1343 size = min(file_size, size)
1381 size = min(file_size, size)
1344 # TODO: should .close() to release resources without
1382 # TODO: should .close() to release resources without
1345 # relying on Python GC
1383 # relying on Python GC
1346 if size is None:
1384 if size is None:
1347 return util.buffer(util.mmapread(fp))
1385 return util.buffer(util.mmapread(fp))
1348 else:
1386 else:
1349 return util.buffer(util.mmapread(fp, size))
1387 return util.buffer(util.mmapread(fp, size))
1350 if size is None:
1388 if size is None:
1351 return fp.read()
1389 return fp.read()
1352 else:
1390 else:
1353 return fp.read(size)
1391 return fp.read(size)
1354 except FileNotFoundError:
1392 except FileNotFoundError:
1355 return b''
1393 return b''
1356
1394
1357 def get_streams(self, max_linkrev, force_inline=False):
1395 def get_streams(self, max_linkrev, force_inline=False):
1358 """return a list of streams that represent this revlog
1396 """return a list of streams that represent this revlog
1359
1397
1360 This is used by stream-clone to do bytes to bytes copies of a repository.
1398 This is used by stream-clone to do bytes to bytes copies of a repository.
1361
1399
1362 This streams data for all revisions that refer to a changelog revision up
1400 This streams data for all revisions that refer to a changelog revision up
1363 to `max_linkrev`.
1401 to `max_linkrev`.
1364
1402
1365 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1403 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1366
1404
1367 It returns is a list of three-tuple:
1405 It returns is a list of three-tuple:
1368
1406
1369 [
1407 [
1370 (filename, bytes_stream, stream_size),
1408 (filename, bytes_stream, stream_size),
1371 …
1409 …
1372 ]
1410 ]
1373 """
1411 """
1374 n = len(self)
1412 n = len(self)
1375 index = self.index
1413 index = self.index
1376 while n > 0:
1414 while n > 0:
1377 linkrev = index[n - 1][4]
1415 linkrev = index[n - 1][4]
1378 if linkrev < max_linkrev:
1416 if linkrev < max_linkrev:
1379 break
1417 break
1380 # note: this loop will rarely go through multiple iterations, since
1418 # note: this loop will rarely go through multiple iterations, since
1381 # it only traverses commits created during the current streaming
1419 # it only traverses commits created during the current streaming
1382 # pull operation.
1420 # pull operation.
1383 #
1421 #
1384 # If this become a problem, using a binary search should cap the
1422 # If this become a problem, using a binary search should cap the
1385 # runtime of this.
1423 # runtime of this.
1386 n = n - 1
1424 n = n - 1
1387 if n == 0:
1425 if n == 0:
1388 # no data to send
1426 # no data to send
1389 return []
1427 return []
1390 index_size = n * index.entry_size
1428 index_size = n * index.entry_size
1391 data_size = self.end(n - 1)
1429 data_size = self.end(n - 1)
1392
1430
1393 # XXX we might have been split (or stripped) since the object
1431 # XXX we might have been split (or stripped) since the object
1394 # initialization, We need to close this race too, but having a way to
1432 # initialization, We need to close this race too, but having a way to
1395 # pre-open the file we feed to the revlog and never closing them before
1433 # pre-open the file we feed to the revlog and never closing them before
1396 # we are done streaming.
1434 # we are done streaming.
1397
1435
1398 if self._inline:
1436 if self._inline:
1399
1437
1400 def get_stream():
1438 def get_stream():
1401 with self.opener(self._indexfile, mode=b"r") as fp:
1439 with self.opener(self._indexfile, mode=b"r") as fp:
1402 yield None
1440 yield None
1403 size = index_size + data_size
1441 size = index_size + data_size
1404 if size <= 65536:
1442 if size <= 65536:
1405 yield fp.read(size)
1443 yield fp.read(size)
1406 else:
1444 else:
1407 yield from util.filechunkiter(fp, limit=size)
1445 yield from util.filechunkiter(fp, limit=size)
1408
1446
1409 inline_stream = get_stream()
1447 inline_stream = get_stream()
1410 next(inline_stream)
1448 next(inline_stream)
1411 return [
1449 return [
1412 (self._indexfile, inline_stream, index_size + data_size),
1450 (self._indexfile, inline_stream, index_size + data_size),
1413 ]
1451 ]
1414 elif force_inline:
1452 elif force_inline:
1415
1453
1416 def get_stream():
1454 def get_stream():
1417 with self.reading():
1455 with self.reading():
1418 yield None
1456 yield None
1419
1457
1420 for rev in range(n):
1458 for rev in range(n):
1421 idx = self.index.entry_binary(rev)
1459 idx = self.index.entry_binary(rev)
1422 if rev == 0 and self._docket is None:
1460 if rev == 0 and self._docket is None:
1423 # re-inject the inline flag
1461 # re-inject the inline flag
1424 header = self._format_flags
1462 header = self._format_flags
1425 header |= self._format_version
1463 header |= self._format_version
1426 header |= FLAG_INLINE_DATA
1464 header |= FLAG_INLINE_DATA
1427 header = self.index.pack_header(header)
1465 header = self.index.pack_header(header)
1428 idx = header + idx
1466 idx = header + idx
1429 yield idx
1467 yield idx
1430 yield self._inner.get_segment_for_revs(rev, rev)[1]
1468 yield self._inner.get_segment_for_revs(rev, rev)[1]
1431
1469
1432 inline_stream = get_stream()
1470 inline_stream = get_stream()
1433 next(inline_stream)
1471 next(inline_stream)
1434 return [
1472 return [
1435 (self._indexfile, inline_stream, index_size + data_size),
1473 (self._indexfile, inline_stream, index_size + data_size),
1436 ]
1474 ]
1437 else:
1475 else:
1438
1476
1439 def get_index_stream():
1477 def get_index_stream():
1440 with self.opener(self._indexfile, mode=b"r") as fp:
1478 with self.opener(self._indexfile, mode=b"r") as fp:
1441 yield None
1479 yield None
1442 if index_size <= 65536:
1480 if index_size <= 65536:
1443 yield fp.read(index_size)
1481 yield fp.read(index_size)
1444 else:
1482 else:
1445 yield from util.filechunkiter(fp, limit=index_size)
1483 yield from util.filechunkiter(fp, limit=index_size)
1446
1484
1447 def get_data_stream():
1485 def get_data_stream():
1448 with self._datafp() as fp:
1486 with self._datafp() as fp:
1449 yield None
1487 yield None
1450 if data_size <= 65536:
1488 if data_size <= 65536:
1451 yield fp.read(data_size)
1489 yield fp.read(data_size)
1452 else:
1490 else:
1453 yield from util.filechunkiter(fp, limit=data_size)
1491 yield from util.filechunkiter(fp, limit=data_size)
1454
1492
1455 index_stream = get_index_stream()
1493 index_stream = get_index_stream()
1456 next(index_stream)
1494 next(index_stream)
1457 data_stream = get_data_stream()
1495 data_stream = get_data_stream()
1458 next(data_stream)
1496 next(data_stream)
1459 return [
1497 return [
1460 (self._datafile, data_stream, data_size),
1498 (self._datafile, data_stream, data_size),
1461 (self._indexfile, index_stream, index_size),
1499 (self._indexfile, index_stream, index_size),
1462 ]
1500 ]
1463
1501
1464 def _loadindex(self, docket=None):
1502 def _loadindex(self, docket=None):
1465
1503
1466 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1504 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1467
1505
1468 if self.postfix is not None:
1506 if self.postfix is not None:
1469 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1507 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1470 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1508 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1471 entry_point = b'%s.i.a' % self.radix
1509 entry_point = b'%s.i.a' % self.radix
1472 elif self._try_split and self.opener.exists(self._split_index_file):
1510 elif self._try_split and self.opener.exists(self._split_index_file):
1473 entry_point = self._split_index_file
1511 entry_point = self._split_index_file
1474 else:
1512 else:
1475 entry_point = b'%s.i' % self.radix
1513 entry_point = b'%s.i' % self.radix
1476
1514
1477 if docket is not None:
1515 if docket is not None:
1478 self._docket = docket
1516 self._docket = docket
1479 self._docket_file = entry_point
1517 self._docket_file = entry_point
1480 else:
1518 else:
1481 self._initempty = True
1519 self._initempty = True
1482 entry_data = self._get_data(entry_point, mmapindexthreshold)
1520 entry_data = self._get_data(entry_point, mmapindexthreshold)
1483 if len(entry_data) > 0:
1521 if len(entry_data) > 0:
1484 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1522 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1485 self._initempty = False
1523 self._initempty = False
1486 else:
1524 else:
1487 header = new_header
1525 header = new_header
1488
1526
1489 self._format_flags = header & ~0xFFFF
1527 self._format_flags = header & ~0xFFFF
1490 self._format_version = header & 0xFFFF
1528 self._format_version = header & 0xFFFF
1491
1529
1492 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1530 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1493 if supported_flags is None:
1531 if supported_flags is None:
1494 msg = _(b'unknown version (%d) in revlog %s')
1532 msg = _(b'unknown version (%d) in revlog %s')
1495 msg %= (self._format_version, self.display_id)
1533 msg %= (self._format_version, self.display_id)
1496 raise error.RevlogError(msg)
1534 raise error.RevlogError(msg)
1497 elif self._format_flags & ~supported_flags:
1535 elif self._format_flags & ~supported_flags:
1498 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1536 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1499 display_flag = self._format_flags >> 16
1537 display_flag = self._format_flags >> 16
1500 msg %= (display_flag, self._format_version, self.display_id)
1538 msg %= (display_flag, self._format_version, self.display_id)
1501 raise error.RevlogError(msg)
1539 raise error.RevlogError(msg)
1502
1540
1503 features = FEATURES_BY_VERSION[self._format_version]
1541 features = FEATURES_BY_VERSION[self._format_version]
1504 self._inline = features[b'inline'](self._format_flags)
1542 self._inline = features[b'inline'](self._format_flags)
1505 self.delta_config.general_delta = features[b'generaldelta'](
1543 self.delta_config.general_delta = features[b'generaldelta'](
1506 self._format_flags
1544 self._format_flags
1507 )
1545 )
1508 self.feature_config.has_side_data = features[b'sidedata']
1546 self.feature_config.has_side_data = features[b'sidedata']
1509
1547
1510 if not features[b'docket']:
1548 if not features[b'docket']:
1511 self._indexfile = entry_point
1549 self._indexfile = entry_point
1512 index_data = entry_data
1550 index_data = entry_data
1513 else:
1551 else:
1514 self._docket_file = entry_point
1552 self._docket_file = entry_point
1515 if self._initempty:
1553 if self._initempty:
1516 self._docket = docketutil.default_docket(self, header)
1554 self._docket = docketutil.default_docket(self, header)
1517 else:
1555 else:
1518 self._docket = docketutil.parse_docket(
1556 self._docket = docketutil.parse_docket(
1519 self, entry_data, use_pending=self._trypending
1557 self, entry_data, use_pending=self._trypending
1520 )
1558 )
1521
1559
1522 if self._docket is not None:
1560 if self._docket is not None:
1523 self._indexfile = self._docket.index_filepath()
1561 self._indexfile = self._docket.index_filepath()
1524 index_data = b''
1562 index_data = b''
1525 index_size = self._docket.index_end
1563 index_size = self._docket.index_end
1526 if index_size > 0:
1564 if index_size > 0:
1527 index_data = self._get_data(
1565 index_data = self._get_data(
1528 self._indexfile, mmapindexthreshold, size=index_size
1566 self._indexfile, mmapindexthreshold, size=index_size
1529 )
1567 )
1530 if len(index_data) < index_size:
1568 if len(index_data) < index_size:
1531 msg = _(b'too few index data for %s: got %d, expected %d')
1569 msg = _(b'too few index data for %s: got %d, expected %d')
1532 msg %= (self.display_id, len(index_data), index_size)
1570 msg %= (self.display_id, len(index_data), index_size)
1533 raise error.RevlogError(msg)
1571 raise error.RevlogError(msg)
1534
1572
1535 self._inline = False
1573 self._inline = False
1536 # generaldelta implied by version 2 revlogs.
1574 # generaldelta implied by version 2 revlogs.
1537 self.delta_config.general_delta = True
1575 self.delta_config.general_delta = True
1538 # the logic for persistent nodemap will be dealt with within the
1576 # the logic for persistent nodemap will be dealt with within the
1539 # main docket, so disable it for now.
1577 # main docket, so disable it for now.
1540 self._nodemap_file = None
1578 self._nodemap_file = None
1541
1579
1542 if self._docket is not None:
1580 if self._docket is not None:
1543 self._datafile = self._docket.data_filepath()
1581 self._datafile = self._docket.data_filepath()
1544 self._sidedatafile = self._docket.sidedata_filepath()
1582 self._sidedatafile = self._docket.sidedata_filepath()
1545 elif self.postfix is None:
1583 elif self.postfix is None:
1546 self._datafile = b'%s.d' % self.radix
1584 self._datafile = b'%s.d' % self.radix
1547 else:
1585 else:
1548 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1586 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1549
1587
1550 self.nodeconstants = sha1nodeconstants
1588 self.nodeconstants = sha1nodeconstants
1551 self.nullid = self.nodeconstants.nullid
1589 self.nullid = self.nodeconstants.nullid
1552
1590
1553 # sparse-revlog can't be on without general-delta (issue6056)
1591 # sparse-revlog can't be on without general-delta (issue6056)
1554 if not self.delta_config.general_delta:
1592 if not self.delta_config.general_delta:
1555 self.delta_config.sparse_revlog = False
1593 self.delta_config.sparse_revlog = False
1556
1594
1557 self._storedeltachains = True
1595 self._storedeltachains = True
1558
1596
1559 devel_nodemap = (
1597 devel_nodemap = (
1560 self._nodemap_file
1598 self._nodemap_file
1561 and force_nodemap
1599 and force_nodemap
1562 and parse_index_v1_nodemap is not None
1600 and parse_index_v1_nodemap is not None
1563 )
1601 )
1564
1602
1565 use_rust_index = False
1603 use_rust_index = False
1566 if rustrevlog is not None:
1604 if rustrevlog is not None:
1567 if self._nodemap_file is not None:
1605 if self._nodemap_file is not None:
1568 use_rust_index = True
1606 use_rust_index = True
1569 else:
1607 else:
1570 use_rust_index = self.opener.options.get(b'rust.index')
1608 use_rust_index = self.opener.options.get(b'rust.index')
1571
1609
1572 self._parse_index = parse_index_v1
1610 self._parse_index = parse_index_v1
1573 if self._format_version == REVLOGV0:
1611 if self._format_version == REVLOGV0:
1574 self._parse_index = revlogv0.parse_index_v0
1612 self._parse_index = revlogv0.parse_index_v0
1575 elif self._format_version == REVLOGV2:
1613 elif self._format_version == REVLOGV2:
1576 self._parse_index = parse_index_v2
1614 self._parse_index = parse_index_v2
1577 elif self._format_version == CHANGELOGV2:
1615 elif self._format_version == CHANGELOGV2:
1578 self._parse_index = parse_index_cl_v2
1616 self._parse_index = parse_index_cl_v2
1579 elif devel_nodemap:
1617 elif devel_nodemap:
1580 self._parse_index = parse_index_v1_nodemap
1618 self._parse_index = parse_index_v1_nodemap
1581 elif use_rust_index:
1619 elif use_rust_index:
1582 self._parse_index = parse_index_v1_mixed
1620 self._parse_index = parse_index_v1_mixed
1583 try:
1621 try:
1584 d = self._parse_index(index_data, self._inline)
1622 d = self._parse_index(index_data, self._inline)
1585 index, chunkcache = d
1623 index, chunkcache = d
1586 use_nodemap = (
1624 use_nodemap = (
1587 not self._inline
1625 not self._inline
1588 and self._nodemap_file is not None
1626 and self._nodemap_file is not None
1589 and hasattr(index, 'update_nodemap_data')
1627 and hasattr(index, 'update_nodemap_data')
1590 )
1628 )
1591 if use_nodemap:
1629 if use_nodemap:
1592 nodemap_data = nodemaputil.persisted_data(self)
1630 nodemap_data = nodemaputil.persisted_data(self)
1593 if nodemap_data is not None:
1631 if nodemap_data is not None:
1594 docket = nodemap_data[0]
1632 docket = nodemap_data[0]
1595 if (
1633 if (
1596 len(d[0]) > docket.tip_rev
1634 len(d[0]) > docket.tip_rev
1597 and d[0][docket.tip_rev][7] == docket.tip_node
1635 and d[0][docket.tip_rev][7] == docket.tip_node
1598 ):
1636 ):
1599 # no changelog tampering
1637 # no changelog tampering
1600 self._nodemap_docket = docket
1638 self._nodemap_docket = docket
1601 index.update_nodemap_data(*nodemap_data)
1639 index.update_nodemap_data(*nodemap_data)
1602 except (ValueError, IndexError):
1640 except (ValueError, IndexError):
1603 raise error.RevlogError(
1641 raise error.RevlogError(
1604 _(b"index %s is corrupted") % self.display_id
1642 _(b"index %s is corrupted") % self.display_id
1605 )
1643 )
1606 self.index = index
1644 self.index = index
1607 # revnum -> (chain-length, sum-delta-length)
1645 # revnum -> (chain-length, sum-delta-length)
1608 self._chaininfocache = util.lrucachedict(500)
1646 self._chaininfocache = util.lrucachedict(500)
1609
1647
1610 return chunkcache
1648 return chunkcache
1611
1649
1612 def _load_inner(self, chunk_cache):
1650 def _load_inner(self, chunk_cache):
1613 if self._docket is None:
1651 if self._docket is None:
1614 default_compression_header = None
1652 default_compression_header = None
1615 else:
1653 else:
1616 default_compression_header = self._docket.default_compression_header
1654 default_compression_header = self._docket.default_compression_header
1617
1655
1618 self._inner = _InnerRevlog(
1656 self._inner = _InnerRevlog(
1619 opener=self.opener,
1657 opener=self.opener,
1620 index=self.index,
1658 index=self.index,
1621 index_file=self._indexfile,
1659 index_file=self._indexfile,
1622 data_file=self._datafile,
1660 data_file=self._datafile,
1623 sidedata_file=self._sidedatafile,
1661 sidedata_file=self._sidedatafile,
1624 inline=self._inline,
1662 inline=self._inline,
1625 data_config=self.data_config,
1663 data_config=self.data_config,
1626 delta_config=self.delta_config,
1664 delta_config=self.delta_config,
1627 feature_config=self.feature_config,
1665 feature_config=self.feature_config,
1628 chunk_cache=chunk_cache,
1666 chunk_cache=chunk_cache,
1629 default_compression_header=default_compression_header,
1667 default_compression_header=default_compression_header,
1630 )
1668 )
1631
1669
1632 def get_revlog(self):
1670 def get_revlog(self):
1633 """simple function to mirror API of other not-really-revlog API"""
1671 """simple function to mirror API of other not-really-revlog API"""
1634 return self
1672 return self
1635
1673
1636 @util.propertycache
1674 @util.propertycache
1637 def revlog_kind(self):
1675 def revlog_kind(self):
1638 return self.target[0]
1676 return self.target[0]
1639
1677
1640 @util.propertycache
1678 @util.propertycache
1641 def display_id(self):
1679 def display_id(self):
1642 """The public facing "ID" of the revlog that we use in message"""
1680 """The public facing "ID" of the revlog that we use in message"""
1643 if self.revlog_kind == KIND_FILELOG:
1681 if self.revlog_kind == KIND_FILELOG:
1644 # Reference the file without the "data/" prefix, so it is familiar
1682 # Reference the file without the "data/" prefix, so it is familiar
1645 # to the user.
1683 # to the user.
1646 return self.target[1]
1684 return self.target[1]
1647 else:
1685 else:
1648 return self.radix
1686 return self.radix
1649
1687
1650 def _datafp(self, mode=b'r'):
1688 def _datafp(self, mode=b'r'):
1651 """file object for the revlog's data file"""
1689 """file object for the revlog's data file"""
1652 return self.opener(self._datafile, mode=mode)
1690 return self.opener(self._datafile, mode=mode)
1653
1691
1654 def tiprev(self):
1692 def tiprev(self):
1655 return len(self.index) - 1
1693 return len(self.index) - 1
1656
1694
1657 def tip(self):
1695 def tip(self):
1658 return self.node(self.tiprev())
1696 return self.node(self.tiprev())
1659
1697
1660 def __contains__(self, rev):
1698 def __contains__(self, rev):
1661 return 0 <= rev < len(self)
1699 return 0 <= rev < len(self)
1662
1700
1663 def __len__(self):
1701 def __len__(self):
1664 return len(self.index)
1702 return len(self.index)
1665
1703
1666 def __iter__(self):
1704 def __iter__(self):
1667 return iter(range(len(self)))
1705 return iter(range(len(self)))
1668
1706
1669 def revs(self, start=0, stop=None):
1707 def revs(self, start=0, stop=None):
1670 """iterate over all rev in this revlog (from start to stop)"""
1708 """iterate over all rev in this revlog (from start to stop)"""
1671 return storageutil.iterrevs(len(self), start=start, stop=stop)
1709 return storageutil.iterrevs(len(self), start=start, stop=stop)
1672
1710
1673 def hasnode(self, node):
1711 def hasnode(self, node):
1674 try:
1712 try:
1675 self.rev(node)
1713 self.rev(node)
1676 return True
1714 return True
1677 except KeyError:
1715 except KeyError:
1678 return False
1716 return False
1679
1717
1680 def _candelta(self, baserev, rev):
1718 def _candelta(self, baserev, rev):
1681 """whether two revisions (baserev, rev) can be delta-ed or not"""
1719 """whether two revisions (baserev, rev) can be delta-ed or not"""
1682 # Disable delta if either rev requires a content-changing flag
1720 # Disable delta if either rev requires a content-changing flag
1683 # processor (ex. LFS). This is because such flag processor can alter
1721 # processor (ex. LFS). This is because such flag processor can alter
1684 # the rawtext content that the delta will be based on, and two clients
1722 # the rawtext content that the delta will be based on, and two clients
1685 # could have a same revlog node with different flags (i.e. different
1723 # could have a same revlog node with different flags (i.e. different
1686 # rawtext contents) and the delta could be incompatible.
1724 # rawtext contents) and the delta could be incompatible.
1687 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1725 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1688 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1726 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1689 ):
1727 ):
1690 return False
1728 return False
1691 return True
1729 return True
1692
1730
1693 def update_caches(self, transaction):
1731 def update_caches(self, transaction):
1694 """update on disk cache
1732 """update on disk cache
1695
1733
1696 If a transaction is passed, the update may be delayed to transaction
1734 If a transaction is passed, the update may be delayed to transaction
1697 commit."""
1735 commit."""
1698 if self._nodemap_file is not None:
1736 if self._nodemap_file is not None:
1699 if transaction is None:
1737 if transaction is None:
1700 nodemaputil.update_persistent_nodemap(self)
1738 nodemaputil.update_persistent_nodemap(self)
1701 else:
1739 else:
1702 nodemaputil.setup_persistent_nodemap(transaction, self)
1740 nodemaputil.setup_persistent_nodemap(transaction, self)
1703
1741
1704 def clearcaches(self):
1742 def clearcaches(self):
1705 """Clear in-memory caches"""
1743 """Clear in-memory caches"""
1706 self._inner._revisioncache = None
1744 self._inner._revisioncache = None
1707 self._chainbasecache.clear()
1745 self._chainbasecache.clear()
1708 self._inner._segmentfile.clear_cache()
1746 self._inner._segmentfile.clear_cache()
1709 self._inner._segmentfile_sidedata.clear_cache()
1747 self._inner._segmentfile_sidedata.clear_cache()
1710 self._pcache = {}
1748 self._pcache = {}
1711 self._nodemap_docket = None
1749 self._nodemap_docket = None
1712 self.index.clearcaches()
1750 self.index.clearcaches()
1713 # The python code is the one responsible for validating the docket, we
1751 # The python code is the one responsible for validating the docket, we
1714 # end up having to refresh it here.
1752 # end up having to refresh it here.
1715 use_nodemap = (
1753 use_nodemap = (
1716 not self._inline
1754 not self._inline
1717 and self._nodemap_file is not None
1755 and self._nodemap_file is not None
1718 and hasattr(self.index, 'update_nodemap_data')
1756 and hasattr(self.index, 'update_nodemap_data')
1719 )
1757 )
1720 if use_nodemap:
1758 if use_nodemap:
1721 nodemap_data = nodemaputil.persisted_data(self)
1759 nodemap_data = nodemaputil.persisted_data(self)
1722 if nodemap_data is not None:
1760 if nodemap_data is not None:
1723 self._nodemap_docket = nodemap_data[0]
1761 self._nodemap_docket = nodemap_data[0]
1724 self.index.update_nodemap_data(*nodemap_data)
1762 self.index.update_nodemap_data(*nodemap_data)
1725
1763
1726 def rev(self, node):
1764 def rev(self, node):
1727 """return the revision number associated with a <nodeid>"""
1765 """return the revision number associated with a <nodeid>"""
1728 try:
1766 try:
1729 return self.index.rev(node)
1767 return self.index.rev(node)
1730 except TypeError:
1768 except TypeError:
1731 raise
1769 raise
1732 except error.RevlogError:
1770 except error.RevlogError:
1733 # parsers.c radix tree lookup failed
1771 # parsers.c radix tree lookup failed
1734 if (
1772 if (
1735 node == self.nodeconstants.wdirid
1773 node == self.nodeconstants.wdirid
1736 or node in self.nodeconstants.wdirfilenodeids
1774 or node in self.nodeconstants.wdirfilenodeids
1737 ):
1775 ):
1738 raise error.WdirUnsupported
1776 raise error.WdirUnsupported
1739 raise error.LookupError(node, self.display_id, _(b'no node'))
1777 raise error.LookupError(node, self.display_id, _(b'no node'))
1740
1778
1741 # Accessors for index entries.
1779 # Accessors for index entries.
1742
1780
1743 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1781 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1744 # are flags.
1782 # are flags.
1745 def start(self, rev):
1783 def start(self, rev):
1746 return int(self.index[rev][0] >> 16)
1784 return int(self.index[rev][0] >> 16)
1747
1785
1748 def sidedata_cut_off(self, rev):
1786 def sidedata_cut_off(self, rev):
1749 sd_cut_off = self.index[rev][8]
1787 sd_cut_off = self.index[rev][8]
1750 if sd_cut_off != 0:
1788 if sd_cut_off != 0:
1751 return sd_cut_off
1789 return sd_cut_off
1752 # This is some annoying dance, because entries without sidedata
1790 # This is some annoying dance, because entries without sidedata
1753 # currently use 0 as their ofsset. (instead of previous-offset +
1791 # currently use 0 as their ofsset. (instead of previous-offset +
1754 # previous-size)
1792 # previous-size)
1755 #
1793 #
1756 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1794 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1757 # In the meantime, we need this.
1795 # In the meantime, we need this.
1758 while 0 <= rev:
1796 while 0 <= rev:
1759 e = self.index[rev]
1797 e = self.index[rev]
1760 if e[9] != 0:
1798 if e[9] != 0:
1761 return e[8] + e[9]
1799 return e[8] + e[9]
1762 rev -= 1
1800 rev -= 1
1763 return 0
1801 return 0
1764
1802
1765 def flags(self, rev):
1803 def flags(self, rev):
1766 return self.index[rev][0] & 0xFFFF
1804 return self.index[rev][0] & 0xFFFF
1767
1805
1768 def length(self, rev):
1806 def length(self, rev):
1769 return self.index[rev][1]
1807 return self.index[rev][1]
1770
1808
1771 def sidedata_length(self, rev):
1809 def sidedata_length(self, rev):
1772 if not self.feature_config.has_side_data:
1810 if not self.feature_config.has_side_data:
1773 return 0
1811 return 0
1774 return self.index[rev][9]
1812 return self.index[rev][9]
1775
1813
1776 def rawsize(self, rev):
1814 def rawsize(self, rev):
1777 """return the length of the uncompressed text for a given revision"""
1815 """return the length of the uncompressed text for a given revision"""
1778 l = self.index[rev][2]
1816 l = self.index[rev][2]
1779 if l >= 0:
1817 if l >= 0:
1780 return l
1818 return l
1781
1819
1782 t = self.rawdata(rev)
1820 t = self.rawdata(rev)
1783 return len(t)
1821 return len(t)
1784
1822
1785 def size(self, rev):
1823 def size(self, rev):
1786 """length of non-raw text (processed by a "read" flag processor)"""
1824 """length of non-raw text (processed by a "read" flag processor)"""
1787 # fast path: if no "read" flag processor could change the content,
1825 # fast path: if no "read" flag processor could change the content,
1788 # size is rawsize. note: ELLIPSIS is known to not change the content.
1826 # size is rawsize. note: ELLIPSIS is known to not change the content.
1789 flags = self.flags(rev)
1827 flags = self.flags(rev)
1790 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1828 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1791 return self.rawsize(rev)
1829 return self.rawsize(rev)
1792
1830
1793 return len(self.revision(rev))
1831 return len(self.revision(rev))
1794
1832
1795 def fast_rank(self, rev):
1833 def fast_rank(self, rev):
1796 """Return the rank of a revision if already known, or None otherwise.
1834 """Return the rank of a revision if already known, or None otherwise.
1797
1835
1798 The rank of a revision is the size of the sub-graph it defines as a
1836 The rank of a revision is the size of the sub-graph it defines as a
1799 head. Equivalently, the rank of a revision `r` is the size of the set
1837 head. Equivalently, the rank of a revision `r` is the size of the set
1800 `ancestors(r)`, `r` included.
1838 `ancestors(r)`, `r` included.
1801
1839
1802 This method returns the rank retrieved from the revlog in constant
1840 This method returns the rank retrieved from the revlog in constant
1803 time. It makes no attempt at computing unknown values for versions of
1841 time. It makes no attempt at computing unknown values for versions of
1804 the revlog which do not persist the rank.
1842 the revlog which do not persist the rank.
1805 """
1843 """
1806 rank = self.index[rev][ENTRY_RANK]
1844 rank = self.index[rev][ENTRY_RANK]
1807 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1845 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1808 return None
1846 return None
1809 if rev == nullrev:
1847 if rev == nullrev:
1810 return 0 # convention
1848 return 0 # convention
1811 return rank
1849 return rank
1812
1850
1813 def chainbase(self, rev):
1851 def chainbase(self, rev):
1814 base = self._chainbasecache.get(rev)
1852 base = self._chainbasecache.get(rev)
1815 if base is not None:
1853 if base is not None:
1816 return base
1854 return base
1817
1855
1818 index = self.index
1856 index = self.index
1819 iterrev = rev
1857 iterrev = rev
1820 base = index[iterrev][3]
1858 base = index[iterrev][3]
1821 while base != iterrev:
1859 while base != iterrev:
1822 iterrev = base
1860 iterrev = base
1823 base = index[iterrev][3]
1861 base = index[iterrev][3]
1824
1862
1825 self._chainbasecache[rev] = base
1863 self._chainbasecache[rev] = base
1826 return base
1864 return base
1827
1865
1828 def linkrev(self, rev):
1866 def linkrev(self, rev):
1829 return self.index[rev][4]
1867 return self.index[rev][4]
1830
1868
1831 def parentrevs(self, rev):
1869 def parentrevs(self, rev):
1832 try:
1870 try:
1833 entry = self.index[rev]
1871 entry = self.index[rev]
1834 except IndexError:
1872 except IndexError:
1835 if rev == wdirrev:
1873 if rev == wdirrev:
1836 raise error.WdirUnsupported
1874 raise error.WdirUnsupported
1837 raise
1875 raise
1838
1876
1839 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1877 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1840 return entry[6], entry[5]
1878 return entry[6], entry[5]
1841 else:
1879 else:
1842 return entry[5], entry[6]
1880 return entry[5], entry[6]
1843
1881
1844 # fast parentrevs(rev) where rev isn't filtered
1882 # fast parentrevs(rev) where rev isn't filtered
1845 _uncheckedparentrevs = parentrevs
1883 _uncheckedparentrevs = parentrevs
1846
1884
1847 def node(self, rev):
1885 def node(self, rev):
1848 try:
1886 try:
1849 return self.index[rev][7]
1887 return self.index[rev][7]
1850 except IndexError:
1888 except IndexError:
1851 if rev == wdirrev:
1889 if rev == wdirrev:
1852 raise error.WdirUnsupported
1890 raise error.WdirUnsupported
1853 raise
1891 raise
1854
1892
1855 # Derived from index values.
1893 # Derived from index values.
1856
1894
1857 def end(self, rev):
1895 def end(self, rev):
1858 return self.start(rev) + self.length(rev)
1896 return self.start(rev) + self.length(rev)
1859
1897
1860 def parents(self, node):
1898 def parents(self, node):
1861 i = self.index
1899 i = self.index
1862 d = i[self.rev(node)]
1900 d = i[self.rev(node)]
1863 # inline node() to avoid function call overhead
1901 # inline node() to avoid function call overhead
1864 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1902 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1865 return i[d[6]][7], i[d[5]][7]
1903 return i[d[6]][7], i[d[5]][7]
1866 else:
1904 else:
1867 return i[d[5]][7], i[d[6]][7]
1905 return i[d[5]][7], i[d[6]][7]
1868
1906
1869 def chainlen(self, rev):
1907 def chainlen(self, rev):
1870 return self._chaininfo(rev)[0]
1908 return self._chaininfo(rev)[0]
1871
1909
1872 def _chaininfo(self, rev):
1910 def _chaininfo(self, rev):
1873 chaininfocache = self._chaininfocache
1911 chaininfocache = self._chaininfocache
1874 if rev in chaininfocache:
1912 if rev in chaininfocache:
1875 return chaininfocache[rev]
1913 return chaininfocache[rev]
1876 index = self.index
1914 index = self.index
1877 generaldelta = self.delta_config.general_delta
1915 generaldelta = self.delta_config.general_delta
1878 iterrev = rev
1916 iterrev = rev
1879 e = index[iterrev]
1917 e = index[iterrev]
1880 clen = 0
1918 clen = 0
1881 compresseddeltalen = 0
1919 compresseddeltalen = 0
1882 while iterrev != e[3]:
1920 while iterrev != e[3]:
1883 clen += 1
1921 clen += 1
1884 compresseddeltalen += e[1]
1922 compresseddeltalen += e[1]
1885 if generaldelta:
1923 if generaldelta:
1886 iterrev = e[3]
1924 iterrev = e[3]
1887 else:
1925 else:
1888 iterrev -= 1
1926 iterrev -= 1
1889 if iterrev in chaininfocache:
1927 if iterrev in chaininfocache:
1890 t = chaininfocache[iterrev]
1928 t = chaininfocache[iterrev]
1891 clen += t[0]
1929 clen += t[0]
1892 compresseddeltalen += t[1]
1930 compresseddeltalen += t[1]
1893 break
1931 break
1894 e = index[iterrev]
1932 e = index[iterrev]
1895 else:
1933 else:
1896 # Add text length of base since decompressing that also takes
1934 # Add text length of base since decompressing that also takes
1897 # work. For cache hits the length is already included.
1935 # work. For cache hits the length is already included.
1898 compresseddeltalen += e[1]
1936 compresseddeltalen += e[1]
1899 r = (clen, compresseddeltalen)
1937 r = (clen, compresseddeltalen)
1900 chaininfocache[rev] = r
1938 chaininfocache[rev] = r
1901 return r
1939 return r
1902
1940
1903 def _deltachain(self, rev, stoprev=None):
1941 def _deltachain(self, rev, stoprev=None):
1904 return self._inner._deltachain(rev, stoprev=stoprev)
1942 return self._inner._deltachain(rev, stoprev=stoprev)
1905
1943
1906 def ancestors(self, revs, stoprev=0, inclusive=False):
1944 def ancestors(self, revs, stoprev=0, inclusive=False):
1907 """Generate the ancestors of 'revs' in reverse revision order.
1945 """Generate the ancestors of 'revs' in reverse revision order.
1908 Does not generate revs lower than stoprev.
1946 Does not generate revs lower than stoprev.
1909
1947
1910 See the documentation for ancestor.lazyancestors for more details."""
1948 See the documentation for ancestor.lazyancestors for more details."""
1911
1949
1912 # first, make sure start revisions aren't filtered
1950 # first, make sure start revisions aren't filtered
1913 revs = list(revs)
1951 revs = list(revs)
1914 checkrev = self.node
1952 checkrev = self.node
1915 for r in revs:
1953 for r in revs:
1916 checkrev(r)
1954 checkrev(r)
1917 # and we're sure ancestors aren't filtered as well
1955 # and we're sure ancestors aren't filtered as well
1918
1956
1919 if rustancestor is not None and self.index.rust_ext_compat:
1957 if rustancestor is not None and self.index.rust_ext_compat:
1920 lazyancestors = rustancestor.LazyAncestors
1958 lazyancestors = rustancestor.LazyAncestors
1921 arg = self.index
1959 arg = self.index
1922 else:
1960 else:
1923 lazyancestors = ancestor.lazyancestors
1961 lazyancestors = ancestor.lazyancestors
1924 arg = self._uncheckedparentrevs
1962 arg = self._uncheckedparentrevs
1925 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1963 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1926
1964
1927 def descendants(self, revs):
1965 def descendants(self, revs):
1928 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1966 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1929
1967
1930 def findcommonmissing(self, common=None, heads=None):
1968 def findcommonmissing(self, common=None, heads=None):
1931 """Return a tuple of the ancestors of common and the ancestors of heads
1969 """Return a tuple of the ancestors of common and the ancestors of heads
1932 that are not ancestors of common. In revset terminology, we return the
1970 that are not ancestors of common. In revset terminology, we return the
1933 tuple:
1971 tuple:
1934
1972
1935 ::common, (::heads) - (::common)
1973 ::common, (::heads) - (::common)
1936
1974
1937 The list is sorted by revision number, meaning it is
1975 The list is sorted by revision number, meaning it is
1938 topologically sorted.
1976 topologically sorted.
1939
1977
1940 'heads' and 'common' are both lists of node IDs. If heads is
1978 'heads' and 'common' are both lists of node IDs. If heads is
1941 not supplied, uses all of the revlog's heads. If common is not
1979 not supplied, uses all of the revlog's heads. If common is not
1942 supplied, uses nullid."""
1980 supplied, uses nullid."""
1943 if common is None:
1981 if common is None:
1944 common = [self.nullid]
1982 common = [self.nullid]
1945 if heads is None:
1983 if heads is None:
1946 heads = self.heads()
1984 heads = self.heads()
1947
1985
1948 common = [self.rev(n) for n in common]
1986 common = [self.rev(n) for n in common]
1949 heads = [self.rev(n) for n in heads]
1987 heads = [self.rev(n) for n in heads]
1950
1988
1951 # we want the ancestors, but inclusive
1989 # we want the ancestors, but inclusive
1952 class lazyset:
1990 class lazyset:
1953 def __init__(self, lazyvalues):
1991 def __init__(self, lazyvalues):
1954 self.addedvalues = set()
1992 self.addedvalues = set()
1955 self.lazyvalues = lazyvalues
1993 self.lazyvalues = lazyvalues
1956
1994
1957 def __contains__(self, value):
1995 def __contains__(self, value):
1958 return value in self.addedvalues or value in self.lazyvalues
1996 return value in self.addedvalues or value in self.lazyvalues
1959
1997
1960 def __iter__(self):
1998 def __iter__(self):
1961 added = self.addedvalues
1999 added = self.addedvalues
1962 for r in added:
2000 for r in added:
1963 yield r
2001 yield r
1964 for r in self.lazyvalues:
2002 for r in self.lazyvalues:
1965 if not r in added:
2003 if not r in added:
1966 yield r
2004 yield r
1967
2005
1968 def add(self, value):
2006 def add(self, value):
1969 self.addedvalues.add(value)
2007 self.addedvalues.add(value)
1970
2008
1971 def update(self, values):
2009 def update(self, values):
1972 self.addedvalues.update(values)
2010 self.addedvalues.update(values)
1973
2011
1974 has = lazyset(self.ancestors(common))
2012 has = lazyset(self.ancestors(common))
1975 has.add(nullrev)
2013 has.add(nullrev)
1976 has.update(common)
2014 has.update(common)
1977
2015
1978 # take all ancestors from heads that aren't in has
2016 # take all ancestors from heads that aren't in has
1979 missing = set()
2017 missing = set()
1980 visit = collections.deque(r for r in heads if r not in has)
2018 visit = collections.deque(r for r in heads if r not in has)
1981 while visit:
2019 while visit:
1982 r = visit.popleft()
2020 r = visit.popleft()
1983 if r in missing:
2021 if r in missing:
1984 continue
2022 continue
1985 else:
2023 else:
1986 missing.add(r)
2024 missing.add(r)
1987 for p in self.parentrevs(r):
2025 for p in self.parentrevs(r):
1988 if p not in has:
2026 if p not in has:
1989 visit.append(p)
2027 visit.append(p)
1990 missing = list(missing)
2028 missing = list(missing)
1991 missing.sort()
2029 missing.sort()
1992 return has, [self.node(miss) for miss in missing]
2030 return has, [self.node(miss) for miss in missing]
1993
2031
1994 def incrementalmissingrevs(self, common=None):
2032 def incrementalmissingrevs(self, common=None):
1995 """Return an object that can be used to incrementally compute the
2033 """Return an object that can be used to incrementally compute the
1996 revision numbers of the ancestors of arbitrary sets that are not
2034 revision numbers of the ancestors of arbitrary sets that are not
1997 ancestors of common. This is an ancestor.incrementalmissingancestors
2035 ancestors of common. This is an ancestor.incrementalmissingancestors
1998 object.
2036 object.
1999
2037
2000 'common' is a list of revision numbers. If common is not supplied, uses
2038 'common' is a list of revision numbers. If common is not supplied, uses
2001 nullrev.
2039 nullrev.
2002 """
2040 """
2003 if common is None:
2041 if common is None:
2004 common = [nullrev]
2042 common = [nullrev]
2005
2043
2006 if rustancestor is not None and self.index.rust_ext_compat:
2044 if rustancestor is not None and self.index.rust_ext_compat:
2007 return rustancestor.MissingAncestors(self.index, common)
2045 return rustancestor.MissingAncestors(self.index, common)
2008 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2046 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2009
2047
2010 def findmissingrevs(self, common=None, heads=None):
2048 def findmissingrevs(self, common=None, heads=None):
2011 """Return the revision numbers of the ancestors of heads that
2049 """Return the revision numbers of the ancestors of heads that
2012 are not ancestors of common.
2050 are not ancestors of common.
2013
2051
2014 More specifically, return a list of revision numbers corresponding to
2052 More specifically, return a list of revision numbers corresponding to
2015 nodes N such that every N satisfies the following constraints:
2053 nodes N such that every N satisfies the following constraints:
2016
2054
2017 1. N is an ancestor of some node in 'heads'
2055 1. N is an ancestor of some node in 'heads'
2018 2. N is not an ancestor of any node in 'common'
2056 2. N is not an ancestor of any node in 'common'
2019
2057
2020 The list is sorted by revision number, meaning it is
2058 The list is sorted by revision number, meaning it is
2021 topologically sorted.
2059 topologically sorted.
2022
2060
2023 'heads' and 'common' are both lists of revision numbers. If heads is
2061 'heads' and 'common' are both lists of revision numbers. If heads is
2024 not supplied, uses all of the revlog's heads. If common is not
2062 not supplied, uses all of the revlog's heads. If common is not
2025 supplied, uses nullid."""
2063 supplied, uses nullid."""
2026 if common is None:
2064 if common is None:
2027 common = [nullrev]
2065 common = [nullrev]
2028 if heads is None:
2066 if heads is None:
2029 heads = self.headrevs()
2067 heads = self.headrevs()
2030
2068
2031 inc = self.incrementalmissingrevs(common=common)
2069 inc = self.incrementalmissingrevs(common=common)
2032 return inc.missingancestors(heads)
2070 return inc.missingancestors(heads)
2033
2071
2034 def findmissing(self, common=None, heads=None):
2072 def findmissing(self, common=None, heads=None):
2035 """Return the ancestors of heads that are not ancestors of common.
2073 """Return the ancestors of heads that are not ancestors of common.
2036
2074
2037 More specifically, return a list of nodes N such that every N
2075 More specifically, return a list of nodes N such that every N
2038 satisfies the following constraints:
2076 satisfies the following constraints:
2039
2077
2040 1. N is an ancestor of some node in 'heads'
2078 1. N is an ancestor of some node in 'heads'
2041 2. N is not an ancestor of any node in 'common'
2079 2. N is not an ancestor of any node in 'common'
2042
2080
2043 The list is sorted by revision number, meaning it is
2081 The list is sorted by revision number, meaning it is
2044 topologically sorted.
2082 topologically sorted.
2045
2083
2046 'heads' and 'common' are both lists of node IDs. If heads is
2084 'heads' and 'common' are both lists of node IDs. If heads is
2047 not supplied, uses all of the revlog's heads. If common is not
2085 not supplied, uses all of the revlog's heads. If common is not
2048 supplied, uses nullid."""
2086 supplied, uses nullid."""
2049 if common is None:
2087 if common is None:
2050 common = [self.nullid]
2088 common = [self.nullid]
2051 if heads is None:
2089 if heads is None:
2052 heads = self.heads()
2090 heads = self.heads()
2053
2091
2054 common = [self.rev(n) for n in common]
2092 common = [self.rev(n) for n in common]
2055 heads = [self.rev(n) for n in heads]
2093 heads = [self.rev(n) for n in heads]
2056
2094
2057 inc = self.incrementalmissingrevs(common=common)
2095 inc = self.incrementalmissingrevs(common=common)
2058 return [self.node(r) for r in inc.missingancestors(heads)]
2096 return [self.node(r) for r in inc.missingancestors(heads)]
2059
2097
2060 def nodesbetween(self, roots=None, heads=None):
2098 def nodesbetween(self, roots=None, heads=None):
2061 """Return a topological path from 'roots' to 'heads'.
2099 """Return a topological path from 'roots' to 'heads'.
2062
2100
2063 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2101 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2064 topologically sorted list of all nodes N that satisfy both of
2102 topologically sorted list of all nodes N that satisfy both of
2065 these constraints:
2103 these constraints:
2066
2104
2067 1. N is a descendant of some node in 'roots'
2105 1. N is a descendant of some node in 'roots'
2068 2. N is an ancestor of some node in 'heads'
2106 2. N is an ancestor of some node in 'heads'
2069
2107
2070 Every node is considered to be both a descendant and an ancestor
2108 Every node is considered to be both a descendant and an ancestor
2071 of itself, so every reachable node in 'roots' and 'heads' will be
2109 of itself, so every reachable node in 'roots' and 'heads' will be
2072 included in 'nodes'.
2110 included in 'nodes'.
2073
2111
2074 'outroots' is the list of reachable nodes in 'roots', i.e., the
2112 'outroots' is the list of reachable nodes in 'roots', i.e., the
2075 subset of 'roots' that is returned in 'nodes'. Likewise,
2113 subset of 'roots' that is returned in 'nodes'. Likewise,
2076 'outheads' is the subset of 'heads' that is also in 'nodes'.
2114 'outheads' is the subset of 'heads' that is also in 'nodes'.
2077
2115
2078 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2116 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2079 unspecified, uses nullid as the only root. If 'heads' is
2117 unspecified, uses nullid as the only root. If 'heads' is
2080 unspecified, uses list of all of the revlog's heads."""
2118 unspecified, uses list of all of the revlog's heads."""
2081 nonodes = ([], [], [])
2119 nonodes = ([], [], [])
2082 if roots is not None:
2120 if roots is not None:
2083 roots = list(roots)
2121 roots = list(roots)
2084 if not roots:
2122 if not roots:
2085 return nonodes
2123 return nonodes
2086 lowestrev = min([self.rev(n) for n in roots])
2124 lowestrev = min([self.rev(n) for n in roots])
2087 else:
2125 else:
2088 roots = [self.nullid] # Everybody's a descendant of nullid
2126 roots = [self.nullid] # Everybody's a descendant of nullid
2089 lowestrev = nullrev
2127 lowestrev = nullrev
2090 if (lowestrev == nullrev) and (heads is None):
2128 if (lowestrev == nullrev) and (heads is None):
2091 # We want _all_ the nodes!
2129 # We want _all_ the nodes!
2092 return (
2130 return (
2093 [self.node(r) for r in self],
2131 [self.node(r) for r in self],
2094 [self.nullid],
2132 [self.nullid],
2095 list(self.heads()),
2133 list(self.heads()),
2096 )
2134 )
2097 if heads is None:
2135 if heads is None:
2098 # All nodes are ancestors, so the latest ancestor is the last
2136 # All nodes are ancestors, so the latest ancestor is the last
2099 # node.
2137 # node.
2100 highestrev = len(self) - 1
2138 highestrev = len(self) - 1
2101 # Set ancestors to None to signal that every node is an ancestor.
2139 # Set ancestors to None to signal that every node is an ancestor.
2102 ancestors = None
2140 ancestors = None
2103 # Set heads to an empty dictionary for later discovery of heads
2141 # Set heads to an empty dictionary for later discovery of heads
2104 heads = {}
2142 heads = {}
2105 else:
2143 else:
2106 heads = list(heads)
2144 heads = list(heads)
2107 if not heads:
2145 if not heads:
2108 return nonodes
2146 return nonodes
2109 ancestors = set()
2147 ancestors = set()
2110 # Turn heads into a dictionary so we can remove 'fake' heads.
2148 # Turn heads into a dictionary so we can remove 'fake' heads.
2111 # Also, later we will be using it to filter out the heads we can't
2149 # Also, later we will be using it to filter out the heads we can't
2112 # find from roots.
2150 # find from roots.
2113 heads = dict.fromkeys(heads, False)
2151 heads = dict.fromkeys(heads, False)
2114 # Start at the top and keep marking parents until we're done.
2152 # Start at the top and keep marking parents until we're done.
2115 nodestotag = set(heads)
2153 nodestotag = set(heads)
2116 # Remember where the top was so we can use it as a limit later.
2154 # Remember where the top was so we can use it as a limit later.
2117 highestrev = max([self.rev(n) for n in nodestotag])
2155 highestrev = max([self.rev(n) for n in nodestotag])
2118 while nodestotag:
2156 while nodestotag:
2119 # grab a node to tag
2157 # grab a node to tag
2120 n = nodestotag.pop()
2158 n = nodestotag.pop()
2121 # Never tag nullid
2159 # Never tag nullid
2122 if n == self.nullid:
2160 if n == self.nullid:
2123 continue
2161 continue
2124 # A node's revision number represents its place in a
2162 # A node's revision number represents its place in a
2125 # topologically sorted list of nodes.
2163 # topologically sorted list of nodes.
2126 r = self.rev(n)
2164 r = self.rev(n)
2127 if r >= lowestrev:
2165 if r >= lowestrev:
2128 if n not in ancestors:
2166 if n not in ancestors:
2129 # If we are possibly a descendant of one of the roots
2167 # If we are possibly a descendant of one of the roots
2130 # and we haven't already been marked as an ancestor
2168 # and we haven't already been marked as an ancestor
2131 ancestors.add(n) # Mark as ancestor
2169 ancestors.add(n) # Mark as ancestor
2132 # Add non-nullid parents to list of nodes to tag.
2170 # Add non-nullid parents to list of nodes to tag.
2133 nodestotag.update(
2171 nodestotag.update(
2134 [p for p in self.parents(n) if p != self.nullid]
2172 [p for p in self.parents(n) if p != self.nullid]
2135 )
2173 )
2136 elif n in heads: # We've seen it before, is it a fake head?
2174 elif n in heads: # We've seen it before, is it a fake head?
2137 # So it is, real heads should not be the ancestors of
2175 # So it is, real heads should not be the ancestors of
2138 # any other heads.
2176 # any other heads.
2139 heads.pop(n)
2177 heads.pop(n)
2140 if not ancestors:
2178 if not ancestors:
2141 return nonodes
2179 return nonodes
2142 # Now that we have our set of ancestors, we want to remove any
2180 # Now that we have our set of ancestors, we want to remove any
2143 # roots that are not ancestors.
2181 # roots that are not ancestors.
2144
2182
2145 # If one of the roots was nullid, everything is included anyway.
2183 # If one of the roots was nullid, everything is included anyway.
2146 if lowestrev > nullrev:
2184 if lowestrev > nullrev:
2147 # But, since we weren't, let's recompute the lowest rev to not
2185 # But, since we weren't, let's recompute the lowest rev to not
2148 # include roots that aren't ancestors.
2186 # include roots that aren't ancestors.
2149
2187
2150 # Filter out roots that aren't ancestors of heads
2188 # Filter out roots that aren't ancestors of heads
2151 roots = [root for root in roots if root in ancestors]
2189 roots = [root for root in roots if root in ancestors]
2152 # Recompute the lowest revision
2190 # Recompute the lowest revision
2153 if roots:
2191 if roots:
2154 lowestrev = min([self.rev(root) for root in roots])
2192 lowestrev = min([self.rev(root) for root in roots])
2155 else:
2193 else:
2156 # No more roots? Return empty list
2194 # No more roots? Return empty list
2157 return nonodes
2195 return nonodes
2158 else:
2196 else:
2159 # We are descending from nullid, and don't need to care about
2197 # We are descending from nullid, and don't need to care about
2160 # any other roots.
2198 # any other roots.
2161 lowestrev = nullrev
2199 lowestrev = nullrev
2162 roots = [self.nullid]
2200 roots = [self.nullid]
2163 # Transform our roots list into a set.
2201 # Transform our roots list into a set.
2164 descendants = set(roots)
2202 descendants = set(roots)
2165 # Also, keep the original roots so we can filter out roots that aren't
2203 # Also, keep the original roots so we can filter out roots that aren't
2166 # 'real' roots (i.e. are descended from other roots).
2204 # 'real' roots (i.e. are descended from other roots).
2167 roots = descendants.copy()
2205 roots = descendants.copy()
2168 # Our topologically sorted list of output nodes.
2206 # Our topologically sorted list of output nodes.
2169 orderedout = []
2207 orderedout = []
2170 # Don't start at nullid since we don't want nullid in our output list,
2208 # Don't start at nullid since we don't want nullid in our output list,
2171 # and if nullid shows up in descendants, empty parents will look like
2209 # and if nullid shows up in descendants, empty parents will look like
2172 # they're descendants.
2210 # they're descendants.
2173 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2211 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2174 n = self.node(r)
2212 n = self.node(r)
2175 isdescendant = False
2213 isdescendant = False
2176 if lowestrev == nullrev: # Everybody is a descendant of nullid
2214 if lowestrev == nullrev: # Everybody is a descendant of nullid
2177 isdescendant = True
2215 isdescendant = True
2178 elif n in descendants:
2216 elif n in descendants:
2179 # n is already a descendant
2217 # n is already a descendant
2180 isdescendant = True
2218 isdescendant = True
2181 # This check only needs to be done here because all the roots
2219 # This check only needs to be done here because all the roots
2182 # will start being marked is descendants before the loop.
2220 # will start being marked is descendants before the loop.
2183 if n in roots:
2221 if n in roots:
2184 # If n was a root, check if it's a 'real' root.
2222 # If n was a root, check if it's a 'real' root.
2185 p = tuple(self.parents(n))
2223 p = tuple(self.parents(n))
2186 # If any of its parents are descendants, it's not a root.
2224 # If any of its parents are descendants, it's not a root.
2187 if (p[0] in descendants) or (p[1] in descendants):
2225 if (p[0] in descendants) or (p[1] in descendants):
2188 roots.remove(n)
2226 roots.remove(n)
2189 else:
2227 else:
2190 p = tuple(self.parents(n))
2228 p = tuple(self.parents(n))
2191 # A node is a descendant if either of its parents are
2229 # A node is a descendant if either of its parents are
2192 # descendants. (We seeded the dependents list with the roots
2230 # descendants. (We seeded the dependents list with the roots
2193 # up there, remember?)
2231 # up there, remember?)
2194 if (p[0] in descendants) or (p[1] in descendants):
2232 if (p[0] in descendants) or (p[1] in descendants):
2195 descendants.add(n)
2233 descendants.add(n)
2196 isdescendant = True
2234 isdescendant = True
2197 if isdescendant and ((ancestors is None) or (n in ancestors)):
2235 if isdescendant and ((ancestors is None) or (n in ancestors)):
2198 # Only include nodes that are both descendants and ancestors.
2236 # Only include nodes that are both descendants and ancestors.
2199 orderedout.append(n)
2237 orderedout.append(n)
2200 if (ancestors is not None) and (n in heads):
2238 if (ancestors is not None) and (n in heads):
2201 # We're trying to figure out which heads are reachable
2239 # We're trying to figure out which heads are reachable
2202 # from roots.
2240 # from roots.
2203 # Mark this head as having been reached
2241 # Mark this head as having been reached
2204 heads[n] = True
2242 heads[n] = True
2205 elif ancestors is None:
2243 elif ancestors is None:
2206 # Otherwise, we're trying to discover the heads.
2244 # Otherwise, we're trying to discover the heads.
2207 # Assume this is a head because if it isn't, the next step
2245 # Assume this is a head because if it isn't, the next step
2208 # will eventually remove it.
2246 # will eventually remove it.
2209 heads[n] = True
2247 heads[n] = True
2210 # But, obviously its parents aren't.
2248 # But, obviously its parents aren't.
2211 for p in self.parents(n):
2249 for p in self.parents(n):
2212 heads.pop(p, None)
2250 heads.pop(p, None)
2213 heads = [head for head, flag in heads.items() if flag]
2251 heads = [head for head, flag in heads.items() if flag]
2214 roots = list(roots)
2252 roots = list(roots)
2215 assert orderedout
2253 assert orderedout
2216 assert roots
2254 assert roots
2217 assert heads
2255 assert heads
2218 return (orderedout, roots, heads)
2256 return (orderedout, roots, heads)
2219
2257
2220 def headrevs(self, revs=None):
2258 def headrevs(self, revs=None):
2221 if revs is None:
2259 if revs is None:
2222 try:
2260 try:
2223 return self.index.headrevs()
2261 return self.index.headrevs()
2224 except AttributeError:
2262 except AttributeError:
2225 return self._headrevs()
2263 return self._headrevs()
2226 if rustdagop is not None and self.index.rust_ext_compat:
2264 if rustdagop is not None and self.index.rust_ext_compat:
2227 return rustdagop.headrevs(self.index, revs)
2265 return rustdagop.headrevs(self.index, revs)
2228 return dagop.headrevs(revs, self._uncheckedparentrevs)
2266 return dagop.headrevs(revs, self._uncheckedparentrevs)
2229
2267
2230 def computephases(self, roots):
2268 def computephases(self, roots):
2231 return self.index.computephasesmapsets(roots)
2269 return self.index.computephasesmapsets(roots)
2232
2270
2233 def _headrevs(self):
2271 def _headrevs(self):
2234 count = len(self)
2272 count = len(self)
2235 if not count:
2273 if not count:
2236 return [nullrev]
2274 return [nullrev]
2237 # we won't iter over filtered rev so nobody is a head at start
2275 # we won't iter over filtered rev so nobody is a head at start
2238 ishead = [0] * (count + 1)
2276 ishead = [0] * (count + 1)
2239 index = self.index
2277 index = self.index
2240 for r in self:
2278 for r in self:
2241 ishead[r] = 1 # I may be an head
2279 ishead[r] = 1 # I may be an head
2242 e = index[r]
2280 e = index[r]
2243 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2281 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2244 return [r for r, val in enumerate(ishead) if val]
2282 return [r for r, val in enumerate(ishead) if val]
2245
2283
2246 def heads(self, start=None, stop=None):
2284 def heads(self, start=None, stop=None):
2247 """return the list of all nodes that have no children
2285 """return the list of all nodes that have no children
2248
2286
2249 if start is specified, only heads that are descendants of
2287 if start is specified, only heads that are descendants of
2250 start will be returned
2288 start will be returned
2251 if stop is specified, it will consider all the revs from stop
2289 if stop is specified, it will consider all the revs from stop
2252 as if they had no children
2290 as if they had no children
2253 """
2291 """
2254 if start is None and stop is None:
2292 if start is None and stop is None:
2255 if not len(self):
2293 if not len(self):
2256 return [self.nullid]
2294 return [self.nullid]
2257 return [self.node(r) for r in self.headrevs()]
2295 return [self.node(r) for r in self.headrevs()]
2258
2296
2259 if start is None:
2297 if start is None:
2260 start = nullrev
2298 start = nullrev
2261 else:
2299 else:
2262 start = self.rev(start)
2300 start = self.rev(start)
2263
2301
2264 stoprevs = {self.rev(n) for n in stop or []}
2302 stoprevs = {self.rev(n) for n in stop or []}
2265
2303
2266 revs = dagop.headrevssubset(
2304 revs = dagop.headrevssubset(
2267 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2305 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2268 )
2306 )
2269
2307
2270 return [self.node(rev) for rev in revs]
2308 return [self.node(rev) for rev in revs]
2271
2309
2272 def children(self, node):
2310 def children(self, node):
2273 """find the children of a given node"""
2311 """find the children of a given node"""
2274 c = []
2312 c = []
2275 p = self.rev(node)
2313 p = self.rev(node)
2276 for r in self.revs(start=p + 1):
2314 for r in self.revs(start=p + 1):
2277 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2315 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2278 if prevs:
2316 if prevs:
2279 for pr in prevs:
2317 for pr in prevs:
2280 if pr == p:
2318 if pr == p:
2281 c.append(self.node(r))
2319 c.append(self.node(r))
2282 elif p == nullrev:
2320 elif p == nullrev:
2283 c.append(self.node(r))
2321 c.append(self.node(r))
2284 return c
2322 return c
2285
2323
2286 def commonancestorsheads(self, a, b):
2324 def commonancestorsheads(self, a, b):
2287 """calculate all the heads of the common ancestors of nodes a and b"""
2325 """calculate all the heads of the common ancestors of nodes a and b"""
2288 a, b = self.rev(a), self.rev(b)
2326 a, b = self.rev(a), self.rev(b)
2289 ancs = self._commonancestorsheads(a, b)
2327 ancs = self._commonancestorsheads(a, b)
2290 return pycompat.maplist(self.node, ancs)
2328 return pycompat.maplist(self.node, ancs)
2291
2329
2292 def _commonancestorsheads(self, *revs):
2330 def _commonancestorsheads(self, *revs):
2293 """calculate all the heads of the common ancestors of revs"""
2331 """calculate all the heads of the common ancestors of revs"""
2294 try:
2332 try:
2295 ancs = self.index.commonancestorsheads(*revs)
2333 ancs = self.index.commonancestorsheads(*revs)
2296 except (AttributeError, OverflowError): # C implementation failed
2334 except (AttributeError, OverflowError): # C implementation failed
2297 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2335 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2298 return ancs
2336 return ancs
2299
2337
2300 def isancestor(self, a, b):
2338 def isancestor(self, a, b):
2301 """return True if node a is an ancestor of node b
2339 """return True if node a is an ancestor of node b
2302
2340
2303 A revision is considered an ancestor of itself."""
2341 A revision is considered an ancestor of itself."""
2304 a, b = self.rev(a), self.rev(b)
2342 a, b = self.rev(a), self.rev(b)
2305 return self.isancestorrev(a, b)
2343 return self.isancestorrev(a, b)
2306
2344
2307 def isancestorrev(self, a, b):
2345 def isancestorrev(self, a, b):
2308 """return True if revision a is an ancestor of revision b
2346 """return True if revision a is an ancestor of revision b
2309
2347
2310 A revision is considered an ancestor of itself.
2348 A revision is considered an ancestor of itself.
2311
2349
2312 The implementation of this is trivial but the use of
2350 The implementation of this is trivial but the use of
2313 reachableroots is not."""
2351 reachableroots is not."""
2314 if a == nullrev:
2352 if a == nullrev:
2315 return True
2353 return True
2316 elif a == b:
2354 elif a == b:
2317 return True
2355 return True
2318 elif a > b:
2356 elif a > b:
2319 return False
2357 return False
2320 return bool(self.reachableroots(a, [b], [a], includepath=False))
2358 return bool(self.reachableroots(a, [b], [a], includepath=False))
2321
2359
2322 def reachableroots(self, minroot, heads, roots, includepath=False):
2360 def reachableroots(self, minroot, heads, roots, includepath=False):
2323 """return (heads(::(<roots> and <roots>::<heads>)))
2361 """return (heads(::(<roots> and <roots>::<heads>)))
2324
2362
2325 If includepath is True, return (<roots>::<heads>)."""
2363 If includepath is True, return (<roots>::<heads>)."""
2326 try:
2364 try:
2327 return self.index.reachableroots2(
2365 return self.index.reachableroots2(
2328 minroot, heads, roots, includepath
2366 minroot, heads, roots, includepath
2329 )
2367 )
2330 except AttributeError:
2368 except AttributeError:
2331 return dagop._reachablerootspure(
2369 return dagop._reachablerootspure(
2332 self.parentrevs, minroot, roots, heads, includepath
2370 self.parentrevs, minroot, roots, heads, includepath
2333 )
2371 )
2334
2372
2335 def ancestor(self, a, b):
2373 def ancestor(self, a, b):
2336 """calculate the "best" common ancestor of nodes a and b"""
2374 """calculate the "best" common ancestor of nodes a and b"""
2337
2375
2338 a, b = self.rev(a), self.rev(b)
2376 a, b = self.rev(a), self.rev(b)
2339 try:
2377 try:
2340 ancs = self.index.ancestors(a, b)
2378 ancs = self.index.ancestors(a, b)
2341 except (AttributeError, OverflowError):
2379 except (AttributeError, OverflowError):
2342 ancs = ancestor.ancestors(self.parentrevs, a, b)
2380 ancs = ancestor.ancestors(self.parentrevs, a, b)
2343 if ancs:
2381 if ancs:
2344 # choose a consistent winner when there's a tie
2382 # choose a consistent winner when there's a tie
2345 return min(map(self.node, ancs))
2383 return min(map(self.node, ancs))
2346 return self.nullid
2384 return self.nullid
2347
2385
2348 def _match(self, id):
2386 def _match(self, id):
2349 if isinstance(id, int):
2387 if isinstance(id, int):
2350 # rev
2388 # rev
2351 return self.node(id)
2389 return self.node(id)
2352 if len(id) == self.nodeconstants.nodelen:
2390 if len(id) == self.nodeconstants.nodelen:
2353 # possibly a binary node
2391 # possibly a binary node
2354 # odds of a binary node being all hex in ASCII are 1 in 10**25
2392 # odds of a binary node being all hex in ASCII are 1 in 10**25
2355 try:
2393 try:
2356 node = id
2394 node = id
2357 self.rev(node) # quick search the index
2395 self.rev(node) # quick search the index
2358 return node
2396 return node
2359 except error.LookupError:
2397 except error.LookupError:
2360 pass # may be partial hex id
2398 pass # may be partial hex id
2361 try:
2399 try:
2362 # str(rev)
2400 # str(rev)
2363 rev = int(id)
2401 rev = int(id)
2364 if b"%d" % rev != id:
2402 if b"%d" % rev != id:
2365 raise ValueError
2403 raise ValueError
2366 if rev < 0:
2404 if rev < 0:
2367 rev = len(self) + rev
2405 rev = len(self) + rev
2368 if rev < 0 or rev >= len(self):
2406 if rev < 0 or rev >= len(self):
2369 raise ValueError
2407 raise ValueError
2370 return self.node(rev)
2408 return self.node(rev)
2371 except (ValueError, OverflowError):
2409 except (ValueError, OverflowError):
2372 pass
2410 pass
2373 if len(id) == 2 * self.nodeconstants.nodelen:
2411 if len(id) == 2 * self.nodeconstants.nodelen:
2374 try:
2412 try:
2375 # a full hex nodeid?
2413 # a full hex nodeid?
2376 node = bin(id)
2414 node = bin(id)
2377 self.rev(node)
2415 self.rev(node)
2378 return node
2416 return node
2379 except (binascii.Error, error.LookupError):
2417 except (binascii.Error, error.LookupError):
2380 pass
2418 pass
2381
2419
2382 def _partialmatch(self, id):
2420 def _partialmatch(self, id):
2383 # we don't care wdirfilenodeids as they should be always full hash
2421 # we don't care wdirfilenodeids as they should be always full hash
2384 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2422 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2385 ambiguous = False
2423 ambiguous = False
2386 try:
2424 try:
2387 partial = self.index.partialmatch(id)
2425 partial = self.index.partialmatch(id)
2388 if partial and self.hasnode(partial):
2426 if partial and self.hasnode(partial):
2389 if maybewdir:
2427 if maybewdir:
2390 # single 'ff...' match in radix tree, ambiguous with wdir
2428 # single 'ff...' match in radix tree, ambiguous with wdir
2391 ambiguous = True
2429 ambiguous = True
2392 else:
2430 else:
2393 return partial
2431 return partial
2394 elif maybewdir:
2432 elif maybewdir:
2395 # no 'ff...' match in radix tree, wdir identified
2433 # no 'ff...' match in radix tree, wdir identified
2396 raise error.WdirUnsupported
2434 raise error.WdirUnsupported
2397 else:
2435 else:
2398 return None
2436 return None
2399 except error.RevlogError:
2437 except error.RevlogError:
2400 # parsers.c radix tree lookup gave multiple matches
2438 # parsers.c radix tree lookup gave multiple matches
2401 # fast path: for unfiltered changelog, radix tree is accurate
2439 # fast path: for unfiltered changelog, radix tree is accurate
2402 if not getattr(self, 'filteredrevs', None):
2440 if not getattr(self, 'filteredrevs', None):
2403 ambiguous = True
2441 ambiguous = True
2404 # fall through to slow path that filters hidden revisions
2442 # fall through to slow path that filters hidden revisions
2405 except (AttributeError, ValueError):
2443 except (AttributeError, ValueError):
2406 # we are pure python, or key is not hex
2444 # we are pure python, or key is not hex
2407 pass
2445 pass
2408 if ambiguous:
2446 if ambiguous:
2409 raise error.AmbiguousPrefixLookupError(
2447 raise error.AmbiguousPrefixLookupError(
2410 id, self.display_id, _(b'ambiguous identifier')
2448 id, self.display_id, _(b'ambiguous identifier')
2411 )
2449 )
2412
2450
2413 if id in self._pcache:
2451 if id in self._pcache:
2414 return self._pcache[id]
2452 return self._pcache[id]
2415
2453
2416 if len(id) <= 40:
2454 if len(id) <= 40:
2417 # hex(node)[:...]
2455 # hex(node)[:...]
2418 l = len(id) // 2 * 2 # grab an even number of digits
2456 l = len(id) // 2 * 2 # grab an even number of digits
2419 try:
2457 try:
2420 # we're dropping the last digit, so let's check that it's hex,
2458 # we're dropping the last digit, so let's check that it's hex,
2421 # to avoid the expensive computation below if it's not
2459 # to avoid the expensive computation below if it's not
2422 if len(id) % 2 > 0:
2460 if len(id) % 2 > 0:
2423 if not (id[-1] in hexdigits):
2461 if not (id[-1] in hexdigits):
2424 return None
2462 return None
2425 prefix = bin(id[:l])
2463 prefix = bin(id[:l])
2426 except binascii.Error:
2464 except binascii.Error:
2427 pass
2465 pass
2428 else:
2466 else:
2429 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2467 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2430 nl = [
2468 nl = [
2431 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2469 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2432 ]
2470 ]
2433 if self.nodeconstants.nullhex.startswith(id):
2471 if self.nodeconstants.nullhex.startswith(id):
2434 nl.append(self.nullid)
2472 nl.append(self.nullid)
2435 if len(nl) > 0:
2473 if len(nl) > 0:
2436 if len(nl) == 1 and not maybewdir:
2474 if len(nl) == 1 and not maybewdir:
2437 self._pcache[id] = nl[0]
2475 self._pcache[id] = nl[0]
2438 return nl[0]
2476 return nl[0]
2439 raise error.AmbiguousPrefixLookupError(
2477 raise error.AmbiguousPrefixLookupError(
2440 id, self.display_id, _(b'ambiguous identifier')
2478 id, self.display_id, _(b'ambiguous identifier')
2441 )
2479 )
2442 if maybewdir:
2480 if maybewdir:
2443 raise error.WdirUnsupported
2481 raise error.WdirUnsupported
2444 return None
2482 return None
2445
2483
2446 def lookup(self, id):
2484 def lookup(self, id):
2447 """locate a node based on:
2485 """locate a node based on:
2448 - revision number or str(revision number)
2486 - revision number or str(revision number)
2449 - nodeid or subset of hex nodeid
2487 - nodeid or subset of hex nodeid
2450 """
2488 """
2451 n = self._match(id)
2489 n = self._match(id)
2452 if n is not None:
2490 if n is not None:
2453 return n
2491 return n
2454 n = self._partialmatch(id)
2492 n = self._partialmatch(id)
2455 if n:
2493 if n:
2456 return n
2494 return n
2457
2495
2458 raise error.LookupError(id, self.display_id, _(b'no match found'))
2496 raise error.LookupError(id, self.display_id, _(b'no match found'))
2459
2497
2460 def shortest(self, node, minlength=1):
2498 def shortest(self, node, minlength=1):
2461 """Find the shortest unambiguous prefix that matches node."""
2499 """Find the shortest unambiguous prefix that matches node."""
2462
2500
2463 def isvalid(prefix):
2501 def isvalid(prefix):
2464 try:
2502 try:
2465 matchednode = self._partialmatch(prefix)
2503 matchednode = self._partialmatch(prefix)
2466 except error.AmbiguousPrefixLookupError:
2504 except error.AmbiguousPrefixLookupError:
2467 return False
2505 return False
2468 except error.WdirUnsupported:
2506 except error.WdirUnsupported:
2469 # single 'ff...' match
2507 # single 'ff...' match
2470 return True
2508 return True
2471 if matchednode is None:
2509 if matchednode is None:
2472 raise error.LookupError(node, self.display_id, _(b'no node'))
2510 raise error.LookupError(node, self.display_id, _(b'no node'))
2473 return True
2511 return True
2474
2512
2475 def maybewdir(prefix):
2513 def maybewdir(prefix):
2476 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2514 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2477
2515
2478 hexnode = hex(node)
2516 hexnode = hex(node)
2479
2517
2480 def disambiguate(hexnode, minlength):
2518 def disambiguate(hexnode, minlength):
2481 """Disambiguate against wdirid."""
2519 """Disambiguate against wdirid."""
2482 for length in range(minlength, len(hexnode) + 1):
2520 for length in range(minlength, len(hexnode) + 1):
2483 prefix = hexnode[:length]
2521 prefix = hexnode[:length]
2484 if not maybewdir(prefix):
2522 if not maybewdir(prefix):
2485 return prefix
2523 return prefix
2486
2524
2487 if not getattr(self, 'filteredrevs', None):
2525 if not getattr(self, 'filteredrevs', None):
2488 try:
2526 try:
2489 length = max(self.index.shortest(node), minlength)
2527 length = max(self.index.shortest(node), minlength)
2490 return disambiguate(hexnode, length)
2528 return disambiguate(hexnode, length)
2491 except error.RevlogError:
2529 except error.RevlogError:
2492 if node != self.nodeconstants.wdirid:
2530 if node != self.nodeconstants.wdirid:
2493 raise error.LookupError(
2531 raise error.LookupError(
2494 node, self.display_id, _(b'no node')
2532 node, self.display_id, _(b'no node')
2495 )
2533 )
2496 except AttributeError:
2534 except AttributeError:
2497 # Fall through to pure code
2535 # Fall through to pure code
2498 pass
2536 pass
2499
2537
2500 if node == self.nodeconstants.wdirid:
2538 if node == self.nodeconstants.wdirid:
2501 for length in range(minlength, len(hexnode) + 1):
2539 for length in range(minlength, len(hexnode) + 1):
2502 prefix = hexnode[:length]
2540 prefix = hexnode[:length]
2503 if isvalid(prefix):
2541 if isvalid(prefix):
2504 return prefix
2542 return prefix
2505
2543
2506 for length in range(minlength, len(hexnode) + 1):
2544 for length in range(minlength, len(hexnode) + 1):
2507 prefix = hexnode[:length]
2545 prefix = hexnode[:length]
2508 if isvalid(prefix):
2546 if isvalid(prefix):
2509 return disambiguate(hexnode, length)
2547 return disambiguate(hexnode, length)
2510
2548
2511 def cmp(self, node, text):
2549 def cmp(self, node, text):
2512 """compare text with a given file revision
2550 """compare text with a given file revision
2513
2551
2514 returns True if text is different than what is stored.
2552 returns True if text is different than what is stored.
2515 """
2553 """
2516 p1, p2 = self.parents(node)
2554 p1, p2 = self.parents(node)
2517 return storageutil.hashrevisionsha1(text, p1, p2) != node
2555 return storageutil.hashrevisionsha1(text, p1, p2) != node
2518
2556
2519 def deltaparent(self, rev):
2557 def deltaparent(self, rev):
2520 """return deltaparent of the given revision"""
2558 """return deltaparent of the given revision"""
2521 base = self.index[rev][3]
2559 base = self.index[rev][3]
2522 if base == rev:
2560 if base == rev:
2523 return nullrev
2561 return nullrev
2524 elif self.delta_config.general_delta:
2562 elif self.delta_config.general_delta:
2525 return base
2563 return base
2526 else:
2564 else:
2527 return rev - 1
2565 return rev - 1
2528
2566
2529 def issnapshot(self, rev):
2567 def issnapshot(self, rev):
2530 """tells whether rev is a snapshot"""
2568 """tells whether rev is a snapshot"""
2531 ret = self._inner.issnapshot(rev)
2569 ret = self._inner.issnapshot(rev)
2532 self.issnapshot = self._inner.issnapshot
2570 self.issnapshot = self._inner.issnapshot
2533 return ret
2571 return ret
2534
2572
2535 def snapshotdepth(self, rev):
2573 def snapshotdepth(self, rev):
2536 """number of snapshot in the chain before this one"""
2574 """number of snapshot in the chain before this one"""
2537 if not self.issnapshot(rev):
2575 if not self.issnapshot(rev):
2538 raise error.ProgrammingError(b'revision %d not a snapshot')
2576 raise error.ProgrammingError(b'revision %d not a snapshot')
2539 return len(self._inner._deltachain(rev)[0]) - 1
2577 return len(self._inner._deltachain(rev)[0]) - 1
2540
2578
2541 def revdiff(self, rev1, rev2):
2579 def revdiff(self, rev1, rev2):
2542 """return or calculate a delta between two revisions
2580 """return or calculate a delta between two revisions
2543
2581
2544 The delta calculated is in binary form and is intended to be written to
2582 The delta calculated is in binary form and is intended to be written to
2545 revlog data directly. So this function needs raw revision data.
2583 revlog data directly. So this function needs raw revision data.
2546 """
2584 """
2547 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2585 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2548 return bytes(self._inner._chunk(rev2))
2586 return bytes(self._inner._chunk(rev2))
2549
2587
2550 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2588 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2551
2589
2552 def revision(self, nodeorrev):
2590 def revision(self, nodeorrev):
2553 """return an uncompressed revision of a given node or revision
2591 """return an uncompressed revision of a given node or revision
2554 number.
2592 number.
2555 """
2593 """
2556 return self._revisiondata(nodeorrev)
2594 return self._revisiondata(nodeorrev)
2557
2595
2558 def sidedata(self, nodeorrev):
2596 def sidedata(self, nodeorrev):
2559 """a map of extra data related to the changeset but not part of the hash
2597 """a map of extra data related to the changeset but not part of the hash
2560
2598
2561 This function currently return a dictionary. However, more advanced
2599 This function currently return a dictionary. However, more advanced
2562 mapping object will likely be used in the future for a more
2600 mapping object will likely be used in the future for a more
2563 efficient/lazy code.
2601 efficient/lazy code.
2564 """
2602 """
2565 # deal with <nodeorrev> argument type
2603 # deal with <nodeorrev> argument type
2566 if isinstance(nodeorrev, int):
2604 if isinstance(nodeorrev, int):
2567 rev = nodeorrev
2605 rev = nodeorrev
2568 else:
2606 else:
2569 rev = self.rev(nodeorrev)
2607 rev = self.rev(nodeorrev)
2570 return self._sidedata(rev)
2608 return self._sidedata(rev)
2571
2609
2572 def _rawtext(self, node, rev):
2610 def _rawtext(self, node, rev):
2573 """return the possibly unvalidated rawtext for a revision
2611 """return the possibly unvalidated rawtext for a revision
2574
2612
2575 returns (rev, rawtext, validated)
2613 returns (rev, rawtext, validated)
2576 """
2614 """
2577 # Check if we have the entry in cache
2615 # Check if we have the entry in cache
2578 # The cache entry looks like (node, rev, rawtext)
2616 # The cache entry looks like (node, rev, rawtext)
2579 if self._inner._revisioncache:
2617 if self._inner._revisioncache:
2580 if self._inner._revisioncache[0] == node:
2618 if self._inner._revisioncache[0] == node:
2581 return (rev, self._inner._revisioncache[2], True)
2619 return (rev, self._inner._revisioncache[2], True)
2582
2620
2583 if rev is None:
2621 if rev is None:
2584 rev = self.rev(node)
2622 rev = self.rev(node)
2585
2623
2586 return self._inner.raw_text(node, rev)
2624 return self._inner.raw_text(node, rev)
2587
2625
2588 def _revisiondata(self, nodeorrev, raw=False):
2626 def _revisiondata(self, nodeorrev, raw=False):
2589 # deal with <nodeorrev> argument type
2627 # deal with <nodeorrev> argument type
2590 if isinstance(nodeorrev, int):
2628 if isinstance(nodeorrev, int):
2591 rev = nodeorrev
2629 rev = nodeorrev
2592 node = self.node(rev)
2630 node = self.node(rev)
2593 else:
2631 else:
2594 node = nodeorrev
2632 node = nodeorrev
2595 rev = None
2633 rev = None
2596
2634
2597 # fast path the special `nullid` rev
2635 # fast path the special `nullid` rev
2598 if node == self.nullid:
2636 if node == self.nullid:
2599 return b""
2637 return b""
2600
2638
2601 # ``rawtext`` is the text as stored inside the revlog. Might be the
2639 # ``rawtext`` is the text as stored inside the revlog. Might be the
2602 # revision or might need to be processed to retrieve the revision.
2640 # revision or might need to be processed to retrieve the revision.
2603 rev, rawtext, validated = self._rawtext(node, rev)
2641 rev, rawtext, validated = self._rawtext(node, rev)
2604
2642
2605 if raw and validated:
2643 if raw and validated:
2606 # if we don't want to process the raw text and that raw
2644 # if we don't want to process the raw text and that raw
2607 # text is cached, we can exit early.
2645 # text is cached, we can exit early.
2608 return rawtext
2646 return rawtext
2609 if rev is None:
2647 if rev is None:
2610 rev = self.rev(node)
2648 rev = self.rev(node)
2611 # the revlog's flag for this revision
2649 # the revlog's flag for this revision
2612 # (usually alter its state or content)
2650 # (usually alter its state or content)
2613 flags = self.flags(rev)
2651 flags = self.flags(rev)
2614
2652
2615 if validated and flags == REVIDX_DEFAULT_FLAGS:
2653 if validated and flags == REVIDX_DEFAULT_FLAGS:
2616 # no extra flags set, no flag processor runs, text = rawtext
2654 # no extra flags set, no flag processor runs, text = rawtext
2617 return rawtext
2655 return rawtext
2618
2656
2619 if raw:
2657 if raw:
2620 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2658 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2621 text = rawtext
2659 text = rawtext
2622 else:
2660 else:
2623 r = flagutil.processflagsread(self, rawtext, flags)
2661 r = flagutil.processflagsread(self, rawtext, flags)
2624 text, validatehash = r
2662 text, validatehash = r
2625 if validatehash:
2663 if validatehash:
2626 self.checkhash(text, node, rev=rev)
2664 self.checkhash(text, node, rev=rev)
2627 if not validated:
2665 if not validated:
2628 self._inner._revisioncache = (node, rev, rawtext)
2666 self._inner._revisioncache = (node, rev, rawtext)
2629
2667
2630 return text
2668 return text
2631
2669
2632 def _sidedata(self, rev):
2670 def _sidedata(self, rev):
2633 """Return the sidedata for a given revision number."""
2671 """Return the sidedata for a given revision number."""
2634 index_entry = self.index[rev]
2672 sidedata_end = None
2635 sidedata_offset = index_entry[8]
2673 if self._docket is not None:
2636 sidedata_size = index_entry[9]
2674 sidedata_end = self._docket.sidedata_end
2637
2675 return self._inner.sidedata(rev, sidedata_end)
2638 if self._inline:
2639 sidedata_offset += self.index.entry_size * (1 + rev)
2640 if sidedata_size == 0:
2641 return {}
2642
2643 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2644 filename = self._sidedatafile
2645 end = self._docket.sidedata_end
2646 offset = sidedata_offset
2647 length = sidedata_size
2648 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2649 raise error.RevlogError(m)
2650
2651 comp_segment = self._inner._segmentfile_sidedata.read_chunk(
2652 sidedata_offset, sidedata_size
2653 )
2654
2655 comp = self.index[rev][11]
2656 if comp == COMP_MODE_PLAIN:
2657 segment = comp_segment
2658 elif comp == COMP_MODE_DEFAULT:
2659 segment = self._inner._decompressor(comp_segment)
2660 elif comp == COMP_MODE_INLINE:
2661 segment = self._inner.decompress(comp_segment)
2662 else:
2663 msg = b'unknown compression mode %d'
2664 msg %= comp
2665 raise error.RevlogError(msg)
2666
2667 sidedata = sidedatautil.deserialize_sidedata(segment)
2668 return sidedata
2669
2676
2670 def rawdata(self, nodeorrev):
2677 def rawdata(self, nodeorrev):
2671 """return an uncompressed raw data of a given node or revision number."""
2678 """return an uncompressed raw data of a given node or revision number."""
2672 return self._revisiondata(nodeorrev, raw=True)
2679 return self._revisiondata(nodeorrev, raw=True)
2673
2680
2674 def hash(self, text, p1, p2):
2681 def hash(self, text, p1, p2):
2675 """Compute a node hash.
2682 """Compute a node hash.
2676
2683
2677 Available as a function so that subclasses can replace the hash
2684 Available as a function so that subclasses can replace the hash
2678 as needed.
2685 as needed.
2679 """
2686 """
2680 return storageutil.hashrevisionsha1(text, p1, p2)
2687 return storageutil.hashrevisionsha1(text, p1, p2)
2681
2688
2682 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2689 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2683 """Check node hash integrity.
2690 """Check node hash integrity.
2684
2691
2685 Available as a function so that subclasses can extend hash mismatch
2692 Available as a function so that subclasses can extend hash mismatch
2686 behaviors as needed.
2693 behaviors as needed.
2687 """
2694 """
2688 try:
2695 try:
2689 if p1 is None and p2 is None:
2696 if p1 is None and p2 is None:
2690 p1, p2 = self.parents(node)
2697 p1, p2 = self.parents(node)
2691 if node != self.hash(text, p1, p2):
2698 if node != self.hash(text, p1, p2):
2692 # Clear the revision cache on hash failure. The revision cache
2699 # Clear the revision cache on hash failure. The revision cache
2693 # only stores the raw revision and clearing the cache does have
2700 # only stores the raw revision and clearing the cache does have
2694 # the side-effect that we won't have a cache hit when the raw
2701 # the side-effect that we won't have a cache hit when the raw
2695 # revision data is accessed. But this case should be rare and
2702 # revision data is accessed. But this case should be rare and
2696 # it is extra work to teach the cache about the hash
2703 # it is extra work to teach the cache about the hash
2697 # verification state.
2704 # verification state.
2698 if (
2705 if (
2699 self._inner._revisioncache
2706 self._inner._revisioncache
2700 and self._inner._revisioncache[0] == node
2707 and self._inner._revisioncache[0] == node
2701 ):
2708 ):
2702 self._inner._revisioncache = None
2709 self._inner._revisioncache = None
2703
2710
2704 revornode = rev
2711 revornode = rev
2705 if revornode is None:
2712 if revornode is None:
2706 revornode = templatefilters.short(hex(node))
2713 revornode = templatefilters.short(hex(node))
2707 raise error.RevlogError(
2714 raise error.RevlogError(
2708 _(b"integrity check failed on %s:%s")
2715 _(b"integrity check failed on %s:%s")
2709 % (self.display_id, pycompat.bytestr(revornode))
2716 % (self.display_id, pycompat.bytestr(revornode))
2710 )
2717 )
2711 except error.RevlogError:
2718 except error.RevlogError:
2712 if self.feature_config.censorable and storageutil.iscensoredtext(
2719 if self.feature_config.censorable and storageutil.iscensoredtext(
2713 text
2720 text
2714 ):
2721 ):
2715 raise error.CensoredNodeError(self.display_id, node, text)
2722 raise error.CensoredNodeError(self.display_id, node, text)
2716 raise
2723 raise
2717
2724
2718 @property
2725 @property
2719 def _split_index_file(self):
2726 def _split_index_file(self):
2720 """the path where to expect the index of an ongoing splitting operation
2727 """the path where to expect the index of an ongoing splitting operation
2721
2728
2722 The file will only exist if a splitting operation is in progress, but
2729 The file will only exist if a splitting operation is in progress, but
2723 it is always expected at the same location."""
2730 it is always expected at the same location."""
2724 parts = self.radix.split(b'/')
2731 parts = self.radix.split(b'/')
2725 if len(parts) > 1:
2732 if len(parts) > 1:
2726 # adds a '-s' prefix to the ``data/` or `meta/` base
2733 # adds a '-s' prefix to the ``data/` or `meta/` base
2727 head = parts[0] + b'-s'
2734 head = parts[0] + b'-s'
2728 mids = parts[1:-1]
2735 mids = parts[1:-1]
2729 tail = parts[-1] + b'.i'
2736 tail = parts[-1] + b'.i'
2730 pieces = [head] + mids + [tail]
2737 pieces = [head] + mids + [tail]
2731 return b'/'.join(pieces)
2738 return b'/'.join(pieces)
2732 else:
2739 else:
2733 # the revlog is stored at the root of the store (changelog or
2740 # the revlog is stored at the root of the store (changelog or
2734 # manifest), no risk of collision.
2741 # manifest), no risk of collision.
2735 return self.radix + b'.i.s'
2742 return self.radix + b'.i.s'
2736
2743
2737 def _enforceinlinesize(self, tr, side_write=True):
2744 def _enforceinlinesize(self, tr, side_write=True):
2738 """Check if the revlog is too big for inline and convert if so.
2745 """Check if the revlog is too big for inline and convert if so.
2739
2746
2740 This should be called after revisions are added to the revlog. If the
2747 This should be called after revisions are added to the revlog. If the
2741 revlog has grown too large to be an inline revlog, it will convert it
2748 revlog has grown too large to be an inline revlog, it will convert it
2742 to use multiple index and data files.
2749 to use multiple index and data files.
2743 """
2750 """
2744 tiprev = len(self) - 1
2751 tiprev = len(self) - 1
2745 total_size = self.start(tiprev) + self.length(tiprev)
2752 total_size = self.start(tiprev) + self.length(tiprev)
2746 if not self._inline or total_size < _maxinline:
2753 if not self._inline or total_size < _maxinline:
2747 return
2754 return
2748
2755
2749 if self._docket is not None:
2756 if self._docket is not None:
2750 msg = b"inline revlog should not have a docket"
2757 msg = b"inline revlog should not have a docket"
2751 raise error.ProgrammingError(msg)
2758 raise error.ProgrammingError(msg)
2752
2759
2753 troffset = tr.findoffset(self._indexfile)
2760 troffset = tr.findoffset(self._indexfile)
2754 if troffset is None:
2761 if troffset is None:
2755 raise error.RevlogError(
2762 raise error.RevlogError(
2756 _(b"%s not found in the transaction") % self._indexfile
2763 _(b"%s not found in the transaction") % self._indexfile
2757 )
2764 )
2758 if troffset:
2765 if troffset:
2759 tr.addbackup(self._indexfile, for_offset=True)
2766 tr.addbackup(self._indexfile, for_offset=True)
2760 tr.add(self._datafile, 0)
2767 tr.add(self._datafile, 0)
2761
2768
2762 new_index_file_path = None
2769 new_index_file_path = None
2763 if side_write:
2770 if side_write:
2764 old_index_file_path = self._indexfile
2771 old_index_file_path = self._indexfile
2765 new_index_file_path = self._split_index_file
2772 new_index_file_path = self._split_index_file
2766 opener = self.opener
2773 opener = self.opener
2767 weak_self = weakref.ref(self)
2774 weak_self = weakref.ref(self)
2768
2775
2769 # the "split" index replace the real index when the transaction is
2776 # the "split" index replace the real index when the transaction is
2770 # finalized
2777 # finalized
2771 def finalize_callback(tr):
2778 def finalize_callback(tr):
2772 opener.rename(
2779 opener.rename(
2773 new_index_file_path,
2780 new_index_file_path,
2774 old_index_file_path,
2781 old_index_file_path,
2775 checkambig=True,
2782 checkambig=True,
2776 )
2783 )
2777 maybe_self = weak_self()
2784 maybe_self = weak_self()
2778 if maybe_self is not None:
2785 if maybe_self is not None:
2779 maybe_self._indexfile = old_index_file_path
2786 maybe_self._indexfile = old_index_file_path
2780 maybe_self._inner.index_file = maybe_self._indexfile
2787 maybe_self._inner.index_file = maybe_self._indexfile
2781
2788
2782 def abort_callback(tr):
2789 def abort_callback(tr):
2783 maybe_self = weak_self()
2790 maybe_self = weak_self()
2784 if maybe_self is not None:
2791 if maybe_self is not None:
2785 maybe_self._indexfile = old_index_file_path
2792 maybe_self._indexfile = old_index_file_path
2786 maybe_self._inner.inline = True
2793 maybe_self._inner.inline = True
2787 maybe_self._inner.index_file = old_index_file_path
2794 maybe_self._inner.index_file = old_index_file_path
2788
2795
2789 tr.registertmp(new_index_file_path)
2796 tr.registertmp(new_index_file_path)
2790 if self.target[1] is not None:
2797 if self.target[1] is not None:
2791 callback_id = b'000-revlog-split-%d-%s' % self.target
2798 callback_id = b'000-revlog-split-%d-%s' % self.target
2792 else:
2799 else:
2793 callback_id = b'000-revlog-split-%d' % self.target[0]
2800 callback_id = b'000-revlog-split-%d' % self.target[0]
2794 tr.addfinalize(callback_id, finalize_callback)
2801 tr.addfinalize(callback_id, finalize_callback)
2795 tr.addabort(callback_id, abort_callback)
2802 tr.addabort(callback_id, abort_callback)
2796
2803
2797 self._format_flags &= ~FLAG_INLINE_DATA
2804 self._format_flags &= ~FLAG_INLINE_DATA
2798 self._inner.split_inline(
2805 self._inner.split_inline(
2799 tr,
2806 tr,
2800 self._format_flags | self._format_version,
2807 self._format_flags | self._format_version,
2801 new_index_file_path=new_index_file_path,
2808 new_index_file_path=new_index_file_path,
2802 )
2809 )
2803
2810
2804 self._inline = False
2811 self._inline = False
2805 if new_index_file_path is not None:
2812 if new_index_file_path is not None:
2806 self._indexfile = new_index_file_path
2813 self._indexfile = new_index_file_path
2807
2814
2808 nodemaputil.setup_persistent_nodemap(tr, self)
2815 nodemaputil.setup_persistent_nodemap(tr, self)
2809
2816
2810 def _nodeduplicatecallback(self, transaction, node):
2817 def _nodeduplicatecallback(self, transaction, node):
2811 """called when trying to add a node already stored."""
2818 """called when trying to add a node already stored."""
2812
2819
2813 @contextlib.contextmanager
2820 @contextlib.contextmanager
2814 def reading(self):
2821 def reading(self):
2815 with self._inner.reading():
2822 with self._inner.reading():
2816 yield
2823 yield
2817
2824
2818 @contextlib.contextmanager
2825 @contextlib.contextmanager
2819 def _writing(self, transaction):
2826 def _writing(self, transaction):
2820 if self._trypending:
2827 if self._trypending:
2821 msg = b'try to write in a `trypending` revlog: %s'
2828 msg = b'try to write in a `trypending` revlog: %s'
2822 msg %= self.display_id
2829 msg %= self.display_id
2823 raise error.ProgrammingError(msg)
2830 raise error.ProgrammingError(msg)
2824 if self._inner.is_writing:
2831 if self._inner.is_writing:
2825 yield
2832 yield
2826 else:
2833 else:
2827 data_end = None
2834 data_end = None
2828 sidedata_end = None
2835 sidedata_end = None
2829 if self._docket is not None:
2836 if self._docket is not None:
2830 data_end = self._docket.data_end
2837 data_end = self._docket.data_end
2831 sidedata_end = self._docket.sidedata_end
2838 sidedata_end = self._docket.sidedata_end
2832 with self._inner.writing(
2839 with self._inner.writing(
2833 transaction,
2840 transaction,
2834 data_end=data_end,
2841 data_end=data_end,
2835 sidedata_end=sidedata_end,
2842 sidedata_end=sidedata_end,
2836 ):
2843 ):
2837 yield
2844 yield
2838 if self._docket is not None:
2845 if self._docket is not None:
2839 self._write_docket(transaction)
2846 self._write_docket(transaction)
2840
2847
2841 def _write_docket(self, transaction):
2848 def _write_docket(self, transaction):
2842 """write the current docket on disk
2849 """write the current docket on disk
2843
2850
2844 Exist as a method to help changelog to implement transaction logic
2851 Exist as a method to help changelog to implement transaction logic
2845
2852
2846 We could also imagine using the same transaction logic for all revlog
2853 We could also imagine using the same transaction logic for all revlog
2847 since docket are cheap."""
2854 since docket are cheap."""
2848 self._docket.write(transaction)
2855 self._docket.write(transaction)
2849
2856
2850 def addrevision(
2857 def addrevision(
2851 self,
2858 self,
2852 text,
2859 text,
2853 transaction,
2860 transaction,
2854 link,
2861 link,
2855 p1,
2862 p1,
2856 p2,
2863 p2,
2857 cachedelta=None,
2864 cachedelta=None,
2858 node=None,
2865 node=None,
2859 flags=REVIDX_DEFAULT_FLAGS,
2866 flags=REVIDX_DEFAULT_FLAGS,
2860 deltacomputer=None,
2867 deltacomputer=None,
2861 sidedata=None,
2868 sidedata=None,
2862 ):
2869 ):
2863 """add a revision to the log
2870 """add a revision to the log
2864
2871
2865 text - the revision data to add
2872 text - the revision data to add
2866 transaction - the transaction object used for rollback
2873 transaction - the transaction object used for rollback
2867 link - the linkrev data to add
2874 link - the linkrev data to add
2868 p1, p2 - the parent nodeids of the revision
2875 p1, p2 - the parent nodeids of the revision
2869 cachedelta - an optional precomputed delta
2876 cachedelta - an optional precomputed delta
2870 node - nodeid of revision; typically node is not specified, and it is
2877 node - nodeid of revision; typically node is not specified, and it is
2871 computed by default as hash(text, p1, p2), however subclasses might
2878 computed by default as hash(text, p1, p2), however subclasses might
2872 use different hashing method (and override checkhash() in such case)
2879 use different hashing method (and override checkhash() in such case)
2873 flags - the known flags to set on the revision
2880 flags - the known flags to set on the revision
2874 deltacomputer - an optional deltacomputer instance shared between
2881 deltacomputer - an optional deltacomputer instance shared between
2875 multiple calls
2882 multiple calls
2876 """
2883 """
2877 if link == nullrev:
2884 if link == nullrev:
2878 raise error.RevlogError(
2885 raise error.RevlogError(
2879 _(b"attempted to add linkrev -1 to %s") % self.display_id
2886 _(b"attempted to add linkrev -1 to %s") % self.display_id
2880 )
2887 )
2881
2888
2882 if sidedata is None:
2889 if sidedata is None:
2883 sidedata = {}
2890 sidedata = {}
2884 elif sidedata and not self.feature_config.has_side_data:
2891 elif sidedata and not self.feature_config.has_side_data:
2885 raise error.ProgrammingError(
2892 raise error.ProgrammingError(
2886 _(b"trying to add sidedata to a revlog who don't support them")
2893 _(b"trying to add sidedata to a revlog who don't support them")
2887 )
2894 )
2888
2895
2889 if flags:
2896 if flags:
2890 node = node or self.hash(text, p1, p2)
2897 node = node or self.hash(text, p1, p2)
2891
2898
2892 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2899 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2893
2900
2894 # If the flag processor modifies the revision data, ignore any provided
2901 # If the flag processor modifies the revision data, ignore any provided
2895 # cachedelta.
2902 # cachedelta.
2896 if rawtext != text:
2903 if rawtext != text:
2897 cachedelta = None
2904 cachedelta = None
2898
2905
2899 if len(rawtext) > _maxentrysize:
2906 if len(rawtext) > _maxentrysize:
2900 raise error.RevlogError(
2907 raise error.RevlogError(
2901 _(
2908 _(
2902 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2909 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2903 )
2910 )
2904 % (self.display_id, len(rawtext))
2911 % (self.display_id, len(rawtext))
2905 )
2912 )
2906
2913
2907 node = node or self.hash(rawtext, p1, p2)
2914 node = node or self.hash(rawtext, p1, p2)
2908 rev = self.index.get_rev(node)
2915 rev = self.index.get_rev(node)
2909 if rev is not None:
2916 if rev is not None:
2910 return rev
2917 return rev
2911
2918
2912 if validatehash:
2919 if validatehash:
2913 self.checkhash(rawtext, node, p1=p1, p2=p2)
2920 self.checkhash(rawtext, node, p1=p1, p2=p2)
2914
2921
2915 return self.addrawrevision(
2922 return self.addrawrevision(
2916 rawtext,
2923 rawtext,
2917 transaction,
2924 transaction,
2918 link,
2925 link,
2919 p1,
2926 p1,
2920 p2,
2927 p2,
2921 node,
2928 node,
2922 flags,
2929 flags,
2923 cachedelta=cachedelta,
2930 cachedelta=cachedelta,
2924 deltacomputer=deltacomputer,
2931 deltacomputer=deltacomputer,
2925 sidedata=sidedata,
2932 sidedata=sidedata,
2926 )
2933 )
2927
2934
2928 def addrawrevision(
2935 def addrawrevision(
2929 self,
2936 self,
2930 rawtext,
2937 rawtext,
2931 transaction,
2938 transaction,
2932 link,
2939 link,
2933 p1,
2940 p1,
2934 p2,
2941 p2,
2935 node,
2942 node,
2936 flags,
2943 flags,
2937 cachedelta=None,
2944 cachedelta=None,
2938 deltacomputer=None,
2945 deltacomputer=None,
2939 sidedata=None,
2946 sidedata=None,
2940 ):
2947 ):
2941 """add a raw revision with known flags, node and parents
2948 """add a raw revision with known flags, node and parents
2942 useful when reusing a revision not stored in this revlog (ex: received
2949 useful when reusing a revision not stored in this revlog (ex: received
2943 over wire, or read from an external bundle).
2950 over wire, or read from an external bundle).
2944 """
2951 """
2945 with self._writing(transaction):
2952 with self._writing(transaction):
2946 return self._addrevision(
2953 return self._addrevision(
2947 node,
2954 node,
2948 rawtext,
2955 rawtext,
2949 transaction,
2956 transaction,
2950 link,
2957 link,
2951 p1,
2958 p1,
2952 p2,
2959 p2,
2953 flags,
2960 flags,
2954 cachedelta,
2961 cachedelta,
2955 deltacomputer=deltacomputer,
2962 deltacomputer=deltacomputer,
2956 sidedata=sidedata,
2963 sidedata=sidedata,
2957 )
2964 )
2958
2965
2959 def compress(self, data):
2966 def compress(self, data):
2960 return self._inner.compress(data)
2967 return self._inner.compress(data)
2961
2968
2962 def decompress(self, data):
2969 def decompress(self, data):
2963 return self._inner.decompress(data)
2970 return self._inner.decompress(data)
2964
2971
2965 def _addrevision(
2972 def _addrevision(
2966 self,
2973 self,
2967 node,
2974 node,
2968 rawtext,
2975 rawtext,
2969 transaction,
2976 transaction,
2970 link,
2977 link,
2971 p1,
2978 p1,
2972 p2,
2979 p2,
2973 flags,
2980 flags,
2974 cachedelta,
2981 cachedelta,
2975 alwayscache=False,
2982 alwayscache=False,
2976 deltacomputer=None,
2983 deltacomputer=None,
2977 sidedata=None,
2984 sidedata=None,
2978 ):
2985 ):
2979 """internal function to add revisions to the log
2986 """internal function to add revisions to the log
2980
2987
2981 see addrevision for argument descriptions.
2988 see addrevision for argument descriptions.
2982
2989
2983 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2990 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2984
2991
2985 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2992 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2986 be used.
2993 be used.
2987
2994
2988 invariants:
2995 invariants:
2989 - rawtext is optional (can be None); if not set, cachedelta must be set.
2996 - rawtext is optional (can be None); if not set, cachedelta must be set.
2990 if both are set, they must correspond to each other.
2997 if both are set, they must correspond to each other.
2991 """
2998 """
2992 if node == self.nullid:
2999 if node == self.nullid:
2993 raise error.RevlogError(
3000 raise error.RevlogError(
2994 _(b"%s: attempt to add null revision") % self.display_id
3001 _(b"%s: attempt to add null revision") % self.display_id
2995 )
3002 )
2996 if (
3003 if (
2997 node == self.nodeconstants.wdirid
3004 node == self.nodeconstants.wdirid
2998 or node in self.nodeconstants.wdirfilenodeids
3005 or node in self.nodeconstants.wdirfilenodeids
2999 ):
3006 ):
3000 raise error.RevlogError(
3007 raise error.RevlogError(
3001 _(b"%s: attempt to add wdir revision") % self.display_id
3008 _(b"%s: attempt to add wdir revision") % self.display_id
3002 )
3009 )
3003 if self._inner._writinghandles is None:
3010 if self._inner._writinghandles is None:
3004 msg = b'adding revision outside `revlog._writing` context'
3011 msg = b'adding revision outside `revlog._writing` context'
3005 raise error.ProgrammingError(msg)
3012 raise error.ProgrammingError(msg)
3006
3013
3007 btext = [rawtext]
3014 btext = [rawtext]
3008
3015
3009 curr = len(self)
3016 curr = len(self)
3010 prev = curr - 1
3017 prev = curr - 1
3011
3018
3012 offset = self._get_data_offset(prev)
3019 offset = self._get_data_offset(prev)
3013
3020
3014 if self._concurrencychecker:
3021 if self._concurrencychecker:
3015 ifh, dfh, sdfh = self._inner._writinghandles
3022 ifh, dfh, sdfh = self._inner._writinghandles
3016 # XXX no checking for the sidedata file
3023 # XXX no checking for the sidedata file
3017 if self._inline:
3024 if self._inline:
3018 # offset is "as if" it were in the .d file, so we need to add on
3025 # offset is "as if" it were in the .d file, so we need to add on
3019 # the size of the entry metadata.
3026 # the size of the entry metadata.
3020 self._concurrencychecker(
3027 self._concurrencychecker(
3021 ifh, self._indexfile, offset + curr * self.index.entry_size
3028 ifh, self._indexfile, offset + curr * self.index.entry_size
3022 )
3029 )
3023 else:
3030 else:
3024 # Entries in the .i are a consistent size.
3031 # Entries in the .i are a consistent size.
3025 self._concurrencychecker(
3032 self._concurrencychecker(
3026 ifh, self._indexfile, curr * self.index.entry_size
3033 ifh, self._indexfile, curr * self.index.entry_size
3027 )
3034 )
3028 self._concurrencychecker(dfh, self._datafile, offset)
3035 self._concurrencychecker(dfh, self._datafile, offset)
3029
3036
3030 p1r, p2r = self.rev(p1), self.rev(p2)
3037 p1r, p2r = self.rev(p1), self.rev(p2)
3031
3038
3032 # full versions are inserted when the needed deltas
3039 # full versions are inserted when the needed deltas
3033 # become comparable to the uncompressed text
3040 # become comparable to the uncompressed text
3034 if rawtext is None:
3041 if rawtext is None:
3035 # need rawtext size, before changed by flag processors, which is
3042 # need rawtext size, before changed by flag processors, which is
3036 # the non-raw size. use revlog explicitly to avoid filelog's extra
3043 # the non-raw size. use revlog explicitly to avoid filelog's extra
3037 # logic that might remove metadata size.
3044 # logic that might remove metadata size.
3038 textlen = mdiff.patchedsize(
3045 textlen = mdiff.patchedsize(
3039 revlog.size(self, cachedelta[0]), cachedelta[1]
3046 revlog.size(self, cachedelta[0]), cachedelta[1]
3040 )
3047 )
3041 else:
3048 else:
3042 textlen = len(rawtext)
3049 textlen = len(rawtext)
3043
3050
3044 if deltacomputer is None:
3051 if deltacomputer is None:
3045 write_debug = None
3052 write_debug = None
3046 if self.delta_config.debug_delta:
3053 if self.delta_config.debug_delta:
3047 write_debug = transaction._report
3054 write_debug = transaction._report
3048 deltacomputer = deltautil.deltacomputer(
3055 deltacomputer = deltautil.deltacomputer(
3049 self, write_debug=write_debug
3056 self, write_debug=write_debug
3050 )
3057 )
3051
3058
3052 if cachedelta is not None and len(cachedelta) == 2:
3059 if cachedelta is not None and len(cachedelta) == 2:
3053 # If the cached delta has no information about how it should be
3060 # If the cached delta has no information about how it should be
3054 # reused, add the default reuse instruction according to the
3061 # reused, add the default reuse instruction according to the
3055 # revlog's configuration.
3062 # revlog's configuration.
3056 if (
3063 if (
3057 self.delta_config.general_delta
3064 self.delta_config.general_delta
3058 and self.delta_config.lazy_delta_base
3065 and self.delta_config.lazy_delta_base
3059 ):
3066 ):
3060 delta_base_reuse = DELTA_BASE_REUSE_TRY
3067 delta_base_reuse = DELTA_BASE_REUSE_TRY
3061 else:
3068 else:
3062 delta_base_reuse = DELTA_BASE_REUSE_NO
3069 delta_base_reuse = DELTA_BASE_REUSE_NO
3063 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3070 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3064
3071
3065 revinfo = revlogutils.revisioninfo(
3072 revinfo = revlogutils.revisioninfo(
3066 node,
3073 node,
3067 p1,
3074 p1,
3068 p2,
3075 p2,
3069 btext,
3076 btext,
3070 textlen,
3077 textlen,
3071 cachedelta,
3078 cachedelta,
3072 flags,
3079 flags,
3073 )
3080 )
3074
3081
3075 deltainfo = deltacomputer.finddeltainfo(revinfo)
3082 deltainfo = deltacomputer.finddeltainfo(revinfo)
3076
3083
3077 compression_mode = COMP_MODE_INLINE
3084 compression_mode = COMP_MODE_INLINE
3078 if self._docket is not None:
3085 if self._docket is not None:
3079 default_comp = self._docket.default_compression_header
3086 default_comp = self._docket.default_compression_header
3080 r = deltautil.delta_compression(default_comp, deltainfo)
3087 r = deltautil.delta_compression(default_comp, deltainfo)
3081 compression_mode, deltainfo = r
3088 compression_mode, deltainfo = r
3082
3089
3083 sidedata_compression_mode = COMP_MODE_INLINE
3090 sidedata_compression_mode = COMP_MODE_INLINE
3084 if sidedata and self.feature_config.has_side_data:
3091 if sidedata and self.feature_config.has_side_data:
3085 sidedata_compression_mode = COMP_MODE_PLAIN
3092 sidedata_compression_mode = COMP_MODE_PLAIN
3086 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3093 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3087 sidedata_offset = self._docket.sidedata_end
3094 sidedata_offset = self._docket.sidedata_end
3088 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3095 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3089 if (
3096 if (
3090 h != b'u'
3097 h != b'u'
3091 and comp_sidedata[0:1] != b'\0'
3098 and comp_sidedata[0:1] != b'\0'
3092 and len(comp_sidedata) < len(serialized_sidedata)
3099 and len(comp_sidedata) < len(serialized_sidedata)
3093 ):
3100 ):
3094 assert not h
3101 assert not h
3095 if (
3102 if (
3096 comp_sidedata[0:1]
3103 comp_sidedata[0:1]
3097 == self._docket.default_compression_header
3104 == self._docket.default_compression_header
3098 ):
3105 ):
3099 sidedata_compression_mode = COMP_MODE_DEFAULT
3106 sidedata_compression_mode = COMP_MODE_DEFAULT
3100 serialized_sidedata = comp_sidedata
3107 serialized_sidedata = comp_sidedata
3101 else:
3108 else:
3102 sidedata_compression_mode = COMP_MODE_INLINE
3109 sidedata_compression_mode = COMP_MODE_INLINE
3103 serialized_sidedata = comp_sidedata
3110 serialized_sidedata = comp_sidedata
3104 else:
3111 else:
3105 serialized_sidedata = b""
3112 serialized_sidedata = b""
3106 # Don't store the offset if the sidedata is empty, that way
3113 # Don't store the offset if the sidedata is empty, that way
3107 # we can easily detect empty sidedata and they will be no different
3114 # we can easily detect empty sidedata and they will be no different
3108 # than ones we manually add.
3115 # than ones we manually add.
3109 sidedata_offset = 0
3116 sidedata_offset = 0
3110
3117
3111 rank = RANK_UNKNOWN
3118 rank = RANK_UNKNOWN
3112 if self.feature_config.compute_rank:
3119 if self.feature_config.compute_rank:
3113 if (p1r, p2r) == (nullrev, nullrev):
3120 if (p1r, p2r) == (nullrev, nullrev):
3114 rank = 1
3121 rank = 1
3115 elif p1r != nullrev and p2r == nullrev:
3122 elif p1r != nullrev and p2r == nullrev:
3116 rank = 1 + self.fast_rank(p1r)
3123 rank = 1 + self.fast_rank(p1r)
3117 elif p1r == nullrev and p2r != nullrev:
3124 elif p1r == nullrev and p2r != nullrev:
3118 rank = 1 + self.fast_rank(p2r)
3125 rank = 1 + self.fast_rank(p2r)
3119 else: # merge node
3126 else: # merge node
3120 if rustdagop is not None and self.index.rust_ext_compat:
3127 if rustdagop is not None and self.index.rust_ext_compat:
3121 rank = rustdagop.rank(self.index, p1r, p2r)
3128 rank = rustdagop.rank(self.index, p1r, p2r)
3122 else:
3129 else:
3123 pmin, pmax = sorted((p1r, p2r))
3130 pmin, pmax = sorted((p1r, p2r))
3124 rank = 1 + self.fast_rank(pmax)
3131 rank = 1 + self.fast_rank(pmax)
3125 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3132 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3126
3133
3127 e = revlogutils.entry(
3134 e = revlogutils.entry(
3128 flags=flags,
3135 flags=flags,
3129 data_offset=offset,
3136 data_offset=offset,
3130 data_compressed_length=deltainfo.deltalen,
3137 data_compressed_length=deltainfo.deltalen,
3131 data_uncompressed_length=textlen,
3138 data_uncompressed_length=textlen,
3132 data_compression_mode=compression_mode,
3139 data_compression_mode=compression_mode,
3133 data_delta_base=deltainfo.base,
3140 data_delta_base=deltainfo.base,
3134 link_rev=link,
3141 link_rev=link,
3135 parent_rev_1=p1r,
3142 parent_rev_1=p1r,
3136 parent_rev_2=p2r,
3143 parent_rev_2=p2r,
3137 node_id=node,
3144 node_id=node,
3138 sidedata_offset=sidedata_offset,
3145 sidedata_offset=sidedata_offset,
3139 sidedata_compressed_length=len(serialized_sidedata),
3146 sidedata_compressed_length=len(serialized_sidedata),
3140 sidedata_compression_mode=sidedata_compression_mode,
3147 sidedata_compression_mode=sidedata_compression_mode,
3141 rank=rank,
3148 rank=rank,
3142 )
3149 )
3143
3150
3144 self.index.append(e)
3151 self.index.append(e)
3145 entry = self.index.entry_binary(curr)
3152 entry = self.index.entry_binary(curr)
3146 if curr == 0 and self._docket is None:
3153 if curr == 0 and self._docket is None:
3147 header = self._format_flags | self._format_version
3154 header = self._format_flags | self._format_version
3148 header = self.index.pack_header(header)
3155 header = self.index.pack_header(header)
3149 entry = header + entry
3156 entry = header + entry
3150 self._writeentry(
3157 self._writeentry(
3151 transaction,
3158 transaction,
3152 entry,
3159 entry,
3153 deltainfo.data,
3160 deltainfo.data,
3154 link,
3161 link,
3155 offset,
3162 offset,
3156 serialized_sidedata,
3163 serialized_sidedata,
3157 sidedata_offset,
3164 sidedata_offset,
3158 )
3165 )
3159
3166
3160 rawtext = btext[0]
3167 rawtext = btext[0]
3161
3168
3162 if alwayscache and rawtext is None:
3169 if alwayscache and rawtext is None:
3163 rawtext = deltacomputer.buildtext(revinfo)
3170 rawtext = deltacomputer.buildtext(revinfo)
3164
3171
3165 if type(rawtext) == bytes: # only accept immutable objects
3172 if type(rawtext) == bytes: # only accept immutable objects
3166 self._inner._revisioncache = (node, curr, rawtext)
3173 self._inner._revisioncache = (node, curr, rawtext)
3167 self._chainbasecache[curr] = deltainfo.chainbase
3174 self._chainbasecache[curr] = deltainfo.chainbase
3168 return curr
3175 return curr
3169
3176
3170 def _get_data_offset(self, prev):
3177 def _get_data_offset(self, prev):
3171 """Returns the current offset in the (in-transaction) data file.
3178 """Returns the current offset in the (in-transaction) data file.
3172 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3179 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3173 file to store that information: since sidedata can be rewritten to the
3180 file to store that information: since sidedata can be rewritten to the
3174 end of the data file within a transaction, you can have cases where, for
3181 end of the data file within a transaction, you can have cases where, for
3175 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3182 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3176 to `n - 1`'s sidedata being written after `n`'s data.
3183 to `n - 1`'s sidedata being written after `n`'s data.
3177
3184
3178 TODO cache this in a docket file before getting out of experimental."""
3185 TODO cache this in a docket file before getting out of experimental."""
3179 if self._docket is None:
3186 if self._docket is None:
3180 return self.end(prev)
3187 return self.end(prev)
3181 else:
3188 else:
3182 return self._docket.data_end
3189 return self._docket.data_end
3183
3190
3184 def _writeentry(
3191 def _writeentry(
3185 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3192 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3186 ):
3193 ):
3187 # Files opened in a+ mode have inconsistent behavior on various
3194 # Files opened in a+ mode have inconsistent behavior on various
3188 # platforms. Windows requires that a file positioning call be made
3195 # platforms. Windows requires that a file positioning call be made
3189 # when the file handle transitions between reads and writes. See
3196 # when the file handle transitions between reads and writes. See
3190 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3197 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3191 # platforms, Python or the platform itself can be buggy. Some versions
3198 # platforms, Python or the platform itself can be buggy. Some versions
3192 # of Solaris have been observed to not append at the end of the file
3199 # of Solaris have been observed to not append at the end of the file
3193 # if the file was seeked to before the end. See issue4943 for more.
3200 # if the file was seeked to before the end. See issue4943 for more.
3194 #
3201 #
3195 # We work around this issue by inserting a seek() before writing.
3202 # We work around this issue by inserting a seek() before writing.
3196 # Note: This is likely not necessary on Python 3. However, because
3203 # Note: This is likely not necessary on Python 3. However, because
3197 # the file handle is reused for reads and may be seeked there, we need
3204 # the file handle is reused for reads and may be seeked there, we need
3198 # to be careful before changing this.
3205 # to be careful before changing this.
3199 if self._inner._writinghandles is None:
3206 if self._inner._writinghandles is None:
3200 msg = b'adding revision outside `revlog._writing` context'
3207 msg = b'adding revision outside `revlog._writing` context'
3201 raise error.ProgrammingError(msg)
3208 raise error.ProgrammingError(msg)
3202 ifh, dfh, sdfh = self._inner._writinghandles
3209 ifh, dfh, sdfh = self._inner._writinghandles
3203 if self._docket is None:
3210 if self._docket is None:
3204 ifh.seek(0, os.SEEK_END)
3211 ifh.seek(0, os.SEEK_END)
3205 else:
3212 else:
3206 ifh.seek(self._docket.index_end, os.SEEK_SET)
3213 ifh.seek(self._docket.index_end, os.SEEK_SET)
3207 if dfh:
3214 if dfh:
3208 if self._docket is None:
3215 if self._docket is None:
3209 dfh.seek(0, os.SEEK_END)
3216 dfh.seek(0, os.SEEK_END)
3210 else:
3217 else:
3211 dfh.seek(self._docket.data_end, os.SEEK_SET)
3218 dfh.seek(self._docket.data_end, os.SEEK_SET)
3212 if sdfh:
3219 if sdfh:
3213 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3220 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3214
3221
3215 curr = len(self) - 1
3222 curr = len(self) - 1
3216 if not self._inline:
3223 if not self._inline:
3217 transaction.add(self._datafile, offset)
3224 transaction.add(self._datafile, offset)
3218 if self._sidedatafile:
3225 if self._sidedatafile:
3219 transaction.add(self._sidedatafile, sidedata_offset)
3226 transaction.add(self._sidedatafile, sidedata_offset)
3220 transaction.add(self._indexfile, curr * len(entry))
3227 transaction.add(self._indexfile, curr * len(entry))
3221 if data[0]:
3228 if data[0]:
3222 dfh.write(data[0])
3229 dfh.write(data[0])
3223 dfh.write(data[1])
3230 dfh.write(data[1])
3224 if sidedata:
3231 if sidedata:
3225 sdfh.write(sidedata)
3232 sdfh.write(sidedata)
3226 ifh.write(entry)
3233 ifh.write(entry)
3227 else:
3234 else:
3228 offset += curr * self.index.entry_size
3235 offset += curr * self.index.entry_size
3229 transaction.add(self._indexfile, offset)
3236 transaction.add(self._indexfile, offset)
3230 ifh.write(entry)
3237 ifh.write(entry)
3231 ifh.write(data[0])
3238 ifh.write(data[0])
3232 ifh.write(data[1])
3239 ifh.write(data[1])
3233 assert not sidedata
3240 assert not sidedata
3234 self._enforceinlinesize(transaction)
3241 self._enforceinlinesize(transaction)
3235 if self._docket is not None:
3242 if self._docket is not None:
3236 # revlog-v2 always has 3 writing handles, help Pytype
3243 # revlog-v2 always has 3 writing handles, help Pytype
3237 wh1 = self._inner._writinghandles[0]
3244 wh1 = self._inner._writinghandles[0]
3238 wh2 = self._inner._writinghandles[1]
3245 wh2 = self._inner._writinghandles[1]
3239 wh3 = self._inner._writinghandles[2]
3246 wh3 = self._inner._writinghandles[2]
3240 assert wh1 is not None
3247 assert wh1 is not None
3241 assert wh2 is not None
3248 assert wh2 is not None
3242 assert wh3 is not None
3249 assert wh3 is not None
3243 self._docket.index_end = wh1.tell()
3250 self._docket.index_end = wh1.tell()
3244 self._docket.data_end = wh2.tell()
3251 self._docket.data_end = wh2.tell()
3245 self._docket.sidedata_end = wh3.tell()
3252 self._docket.sidedata_end = wh3.tell()
3246
3253
3247 nodemaputil.setup_persistent_nodemap(transaction, self)
3254 nodemaputil.setup_persistent_nodemap(transaction, self)
3248
3255
3249 def addgroup(
3256 def addgroup(
3250 self,
3257 self,
3251 deltas,
3258 deltas,
3252 linkmapper,
3259 linkmapper,
3253 transaction,
3260 transaction,
3254 alwayscache=False,
3261 alwayscache=False,
3255 addrevisioncb=None,
3262 addrevisioncb=None,
3256 duplicaterevisioncb=None,
3263 duplicaterevisioncb=None,
3257 debug_info=None,
3264 debug_info=None,
3258 delta_base_reuse_policy=None,
3265 delta_base_reuse_policy=None,
3259 ):
3266 ):
3260 """
3267 """
3261 add a delta group
3268 add a delta group
3262
3269
3263 given a set of deltas, add them to the revision log. the
3270 given a set of deltas, add them to the revision log. the
3264 first delta is against its parent, which should be in our
3271 first delta is against its parent, which should be in our
3265 log, the rest are against the previous delta.
3272 log, the rest are against the previous delta.
3266
3273
3267 If ``addrevisioncb`` is defined, it will be called with arguments of
3274 If ``addrevisioncb`` is defined, it will be called with arguments of
3268 this revlog and the node that was added.
3275 this revlog and the node that was added.
3269 """
3276 """
3270
3277
3271 if self._adding_group:
3278 if self._adding_group:
3272 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3279 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3273
3280
3274 # read the default delta-base reuse policy from revlog config if the
3281 # read the default delta-base reuse policy from revlog config if the
3275 # group did not specify one.
3282 # group did not specify one.
3276 if delta_base_reuse_policy is None:
3283 if delta_base_reuse_policy is None:
3277 if (
3284 if (
3278 self.delta_config.general_delta
3285 self.delta_config.general_delta
3279 and self.delta_config.lazy_delta_base
3286 and self.delta_config.lazy_delta_base
3280 ):
3287 ):
3281 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3288 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3282 else:
3289 else:
3283 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3290 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3284
3291
3285 self._adding_group = True
3292 self._adding_group = True
3286 empty = True
3293 empty = True
3287 try:
3294 try:
3288 with self._writing(transaction):
3295 with self._writing(transaction):
3289 write_debug = None
3296 write_debug = None
3290 if self.delta_config.debug_delta:
3297 if self.delta_config.debug_delta:
3291 write_debug = transaction._report
3298 write_debug = transaction._report
3292 deltacomputer = deltautil.deltacomputer(
3299 deltacomputer = deltautil.deltacomputer(
3293 self,
3300 self,
3294 write_debug=write_debug,
3301 write_debug=write_debug,
3295 debug_info=debug_info,
3302 debug_info=debug_info,
3296 )
3303 )
3297 # loop through our set of deltas
3304 # loop through our set of deltas
3298 for data in deltas:
3305 for data in deltas:
3299 (
3306 (
3300 node,
3307 node,
3301 p1,
3308 p1,
3302 p2,
3309 p2,
3303 linknode,
3310 linknode,
3304 deltabase,
3311 deltabase,
3305 delta,
3312 delta,
3306 flags,
3313 flags,
3307 sidedata,
3314 sidedata,
3308 ) = data
3315 ) = data
3309 link = linkmapper(linknode)
3316 link = linkmapper(linknode)
3310 flags = flags or REVIDX_DEFAULT_FLAGS
3317 flags = flags or REVIDX_DEFAULT_FLAGS
3311
3318
3312 rev = self.index.get_rev(node)
3319 rev = self.index.get_rev(node)
3313 if rev is not None:
3320 if rev is not None:
3314 # this can happen if two branches make the same change
3321 # this can happen if two branches make the same change
3315 self._nodeduplicatecallback(transaction, rev)
3322 self._nodeduplicatecallback(transaction, rev)
3316 if duplicaterevisioncb:
3323 if duplicaterevisioncb:
3317 duplicaterevisioncb(self, rev)
3324 duplicaterevisioncb(self, rev)
3318 empty = False
3325 empty = False
3319 continue
3326 continue
3320
3327
3321 for p in (p1, p2):
3328 for p in (p1, p2):
3322 if not self.index.has_node(p):
3329 if not self.index.has_node(p):
3323 raise error.LookupError(
3330 raise error.LookupError(
3324 p, self.radix, _(b'unknown parent')
3331 p, self.radix, _(b'unknown parent')
3325 )
3332 )
3326
3333
3327 if not self.index.has_node(deltabase):
3334 if not self.index.has_node(deltabase):
3328 raise error.LookupError(
3335 raise error.LookupError(
3329 deltabase, self.display_id, _(b'unknown delta base')
3336 deltabase, self.display_id, _(b'unknown delta base')
3330 )
3337 )
3331
3338
3332 baserev = self.rev(deltabase)
3339 baserev = self.rev(deltabase)
3333
3340
3334 if baserev != nullrev and self.iscensored(baserev):
3341 if baserev != nullrev and self.iscensored(baserev):
3335 # if base is censored, delta must be full replacement in a
3342 # if base is censored, delta must be full replacement in a
3336 # single patch operation
3343 # single patch operation
3337 hlen = struct.calcsize(b">lll")
3344 hlen = struct.calcsize(b">lll")
3338 oldlen = self.rawsize(baserev)
3345 oldlen = self.rawsize(baserev)
3339 newlen = len(delta) - hlen
3346 newlen = len(delta) - hlen
3340 if delta[:hlen] != mdiff.replacediffheader(
3347 if delta[:hlen] != mdiff.replacediffheader(
3341 oldlen, newlen
3348 oldlen, newlen
3342 ):
3349 ):
3343 raise error.CensoredBaseError(
3350 raise error.CensoredBaseError(
3344 self.display_id, self.node(baserev)
3351 self.display_id, self.node(baserev)
3345 )
3352 )
3346
3353
3347 if not flags and self._peek_iscensored(baserev, delta):
3354 if not flags and self._peek_iscensored(baserev, delta):
3348 flags |= REVIDX_ISCENSORED
3355 flags |= REVIDX_ISCENSORED
3349
3356
3350 # We assume consumers of addrevisioncb will want to retrieve
3357 # We assume consumers of addrevisioncb will want to retrieve
3351 # the added revision, which will require a call to
3358 # the added revision, which will require a call to
3352 # revision(). revision() will fast path if there is a cache
3359 # revision(). revision() will fast path if there is a cache
3353 # hit. So, we tell _addrevision() to always cache in this case.
3360 # hit. So, we tell _addrevision() to always cache in this case.
3354 # We're only using addgroup() in the context of changegroup
3361 # We're only using addgroup() in the context of changegroup
3355 # generation so the revision data can always be handled as raw
3362 # generation so the revision data can always be handled as raw
3356 # by the flagprocessor.
3363 # by the flagprocessor.
3357 rev = self._addrevision(
3364 rev = self._addrevision(
3358 node,
3365 node,
3359 None,
3366 None,
3360 transaction,
3367 transaction,
3361 link,
3368 link,
3362 p1,
3369 p1,
3363 p2,
3370 p2,
3364 flags,
3371 flags,
3365 (baserev, delta, delta_base_reuse_policy),
3372 (baserev, delta, delta_base_reuse_policy),
3366 alwayscache=alwayscache,
3373 alwayscache=alwayscache,
3367 deltacomputer=deltacomputer,
3374 deltacomputer=deltacomputer,
3368 sidedata=sidedata,
3375 sidedata=sidedata,
3369 )
3376 )
3370
3377
3371 if addrevisioncb:
3378 if addrevisioncb:
3372 addrevisioncb(self, rev)
3379 addrevisioncb(self, rev)
3373 empty = False
3380 empty = False
3374 finally:
3381 finally:
3375 self._adding_group = False
3382 self._adding_group = False
3376 return not empty
3383 return not empty
3377
3384
3378 def iscensored(self, rev):
3385 def iscensored(self, rev):
3379 """Check if a file revision is censored."""
3386 """Check if a file revision is censored."""
3380 if not self.feature_config.censorable:
3387 if not self.feature_config.censorable:
3381 return False
3388 return False
3382
3389
3383 return self.flags(rev) & REVIDX_ISCENSORED
3390 return self.flags(rev) & REVIDX_ISCENSORED
3384
3391
3385 def _peek_iscensored(self, baserev, delta):
3392 def _peek_iscensored(self, baserev, delta):
3386 """Quickly check if a delta produces a censored revision."""
3393 """Quickly check if a delta produces a censored revision."""
3387 if not self.feature_config.censorable:
3394 if not self.feature_config.censorable:
3388 return False
3395 return False
3389
3396
3390 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3397 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3391
3398
3392 def getstrippoint(self, minlink):
3399 def getstrippoint(self, minlink):
3393 """find the minimum rev that must be stripped to strip the linkrev
3400 """find the minimum rev that must be stripped to strip the linkrev
3394
3401
3395 Returns a tuple containing the minimum rev and a set of all revs that
3402 Returns a tuple containing the minimum rev and a set of all revs that
3396 have linkrevs that will be broken by this strip.
3403 have linkrevs that will be broken by this strip.
3397 """
3404 """
3398 return storageutil.resolvestripinfo(
3405 return storageutil.resolvestripinfo(
3399 minlink,
3406 minlink,
3400 len(self) - 1,
3407 len(self) - 1,
3401 self.headrevs(),
3408 self.headrevs(),
3402 self.linkrev,
3409 self.linkrev,
3403 self.parentrevs,
3410 self.parentrevs,
3404 )
3411 )
3405
3412
3406 def strip(self, minlink, transaction):
3413 def strip(self, minlink, transaction):
3407 """truncate the revlog on the first revision with a linkrev >= minlink
3414 """truncate the revlog on the first revision with a linkrev >= minlink
3408
3415
3409 This function is called when we're stripping revision minlink and
3416 This function is called when we're stripping revision minlink and
3410 its descendants from the repository.
3417 its descendants from the repository.
3411
3418
3412 We have to remove all revisions with linkrev >= minlink, because
3419 We have to remove all revisions with linkrev >= minlink, because
3413 the equivalent changelog revisions will be renumbered after the
3420 the equivalent changelog revisions will be renumbered after the
3414 strip.
3421 strip.
3415
3422
3416 So we truncate the revlog on the first of these revisions, and
3423 So we truncate the revlog on the first of these revisions, and
3417 trust that the caller has saved the revisions that shouldn't be
3424 trust that the caller has saved the revisions that shouldn't be
3418 removed and that it'll re-add them after this truncation.
3425 removed and that it'll re-add them after this truncation.
3419 """
3426 """
3420 if len(self) == 0:
3427 if len(self) == 0:
3421 return
3428 return
3422
3429
3423 rev, _ = self.getstrippoint(minlink)
3430 rev, _ = self.getstrippoint(minlink)
3424 if rev == len(self):
3431 if rev == len(self):
3425 return
3432 return
3426
3433
3427 # first truncate the files on disk
3434 # first truncate the files on disk
3428 data_end = self.start(rev)
3435 data_end = self.start(rev)
3429 if not self._inline:
3436 if not self._inline:
3430 transaction.add(self._datafile, data_end)
3437 transaction.add(self._datafile, data_end)
3431 end = rev * self.index.entry_size
3438 end = rev * self.index.entry_size
3432 else:
3439 else:
3433 end = data_end + (rev * self.index.entry_size)
3440 end = data_end + (rev * self.index.entry_size)
3434
3441
3435 if self._sidedatafile:
3442 if self._sidedatafile:
3436 sidedata_end = self.sidedata_cut_off(rev)
3443 sidedata_end = self.sidedata_cut_off(rev)
3437 transaction.add(self._sidedatafile, sidedata_end)
3444 transaction.add(self._sidedatafile, sidedata_end)
3438
3445
3439 transaction.add(self._indexfile, end)
3446 transaction.add(self._indexfile, end)
3440 if self._docket is not None:
3447 if self._docket is not None:
3441 # XXX we could, leverage the docket while stripping. However it is
3448 # XXX we could, leverage the docket while stripping. However it is
3442 # not powerfull enough at the time of this comment
3449 # not powerfull enough at the time of this comment
3443 self._docket.index_end = end
3450 self._docket.index_end = end
3444 self._docket.data_end = data_end
3451 self._docket.data_end = data_end
3445 self._docket.sidedata_end = sidedata_end
3452 self._docket.sidedata_end = sidedata_end
3446 self._docket.write(transaction, stripping=True)
3453 self._docket.write(transaction, stripping=True)
3447
3454
3448 # then reset internal state in memory to forget those revisions
3455 # then reset internal state in memory to forget those revisions
3449 self._inner._revisioncache = None
3456 self._inner._revisioncache = None
3450 self._chaininfocache = util.lrucachedict(500)
3457 self._chaininfocache = util.lrucachedict(500)
3451 self._inner._segmentfile.clear_cache()
3458 self._inner._segmentfile.clear_cache()
3452 self._inner._segmentfile_sidedata.clear_cache()
3459 self._inner._segmentfile_sidedata.clear_cache()
3453
3460
3454 del self.index[rev:-1]
3461 del self.index[rev:-1]
3455
3462
3456 def checksize(self):
3463 def checksize(self):
3457 """Check size of index and data files
3464 """Check size of index and data files
3458
3465
3459 return a (dd, di) tuple.
3466 return a (dd, di) tuple.
3460 - dd: extra bytes for the "data" file
3467 - dd: extra bytes for the "data" file
3461 - di: extra bytes for the "index" file
3468 - di: extra bytes for the "index" file
3462
3469
3463 A healthy revlog will return (0, 0).
3470 A healthy revlog will return (0, 0).
3464 """
3471 """
3465 expected = 0
3472 expected = 0
3466 if len(self):
3473 if len(self):
3467 expected = max(0, self.end(len(self) - 1))
3474 expected = max(0, self.end(len(self) - 1))
3468
3475
3469 try:
3476 try:
3470 with self._datafp() as f:
3477 with self._datafp() as f:
3471 f.seek(0, io.SEEK_END)
3478 f.seek(0, io.SEEK_END)
3472 actual = f.tell()
3479 actual = f.tell()
3473 dd = actual - expected
3480 dd = actual - expected
3474 except FileNotFoundError:
3481 except FileNotFoundError:
3475 dd = 0
3482 dd = 0
3476
3483
3477 try:
3484 try:
3478 f = self.opener(self._indexfile)
3485 f = self.opener(self._indexfile)
3479 f.seek(0, io.SEEK_END)
3486 f.seek(0, io.SEEK_END)
3480 actual = f.tell()
3487 actual = f.tell()
3481 f.close()
3488 f.close()
3482 s = self.index.entry_size
3489 s = self.index.entry_size
3483 i = max(0, actual // s)
3490 i = max(0, actual // s)
3484 di = actual - (i * s)
3491 di = actual - (i * s)
3485 if self._inline:
3492 if self._inline:
3486 databytes = 0
3493 databytes = 0
3487 for r in self:
3494 for r in self:
3488 databytes += max(0, self.length(r))
3495 databytes += max(0, self.length(r))
3489 dd = 0
3496 dd = 0
3490 di = actual - len(self) * s - databytes
3497 di = actual - len(self) * s - databytes
3491 except FileNotFoundError:
3498 except FileNotFoundError:
3492 di = 0
3499 di = 0
3493
3500
3494 return (dd, di)
3501 return (dd, di)
3495
3502
3496 def files(self):
3503 def files(self):
3497 """return list of files that compose this revlog"""
3504 """return list of files that compose this revlog"""
3498 res = [self._indexfile]
3505 res = [self._indexfile]
3499 if self._docket_file is None:
3506 if self._docket_file is None:
3500 if not self._inline:
3507 if not self._inline:
3501 res.append(self._datafile)
3508 res.append(self._datafile)
3502 else:
3509 else:
3503 res.append(self._docket_file)
3510 res.append(self._docket_file)
3504 res.extend(self._docket.old_index_filepaths(include_empty=False))
3511 res.extend(self._docket.old_index_filepaths(include_empty=False))
3505 if self._docket.data_end:
3512 if self._docket.data_end:
3506 res.append(self._datafile)
3513 res.append(self._datafile)
3507 res.extend(self._docket.old_data_filepaths(include_empty=False))
3514 res.extend(self._docket.old_data_filepaths(include_empty=False))
3508 if self._docket.sidedata_end:
3515 if self._docket.sidedata_end:
3509 res.append(self._sidedatafile)
3516 res.append(self._sidedatafile)
3510 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3517 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3511 return res
3518 return res
3512
3519
3513 def emitrevisions(
3520 def emitrevisions(
3514 self,
3521 self,
3515 nodes,
3522 nodes,
3516 nodesorder=None,
3523 nodesorder=None,
3517 revisiondata=False,
3524 revisiondata=False,
3518 assumehaveparentrevisions=False,
3525 assumehaveparentrevisions=False,
3519 deltamode=repository.CG_DELTAMODE_STD,
3526 deltamode=repository.CG_DELTAMODE_STD,
3520 sidedata_helpers=None,
3527 sidedata_helpers=None,
3521 debug_info=None,
3528 debug_info=None,
3522 ):
3529 ):
3523 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3530 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3524 raise error.ProgrammingError(
3531 raise error.ProgrammingError(
3525 b'unhandled value for nodesorder: %s' % nodesorder
3532 b'unhandled value for nodesorder: %s' % nodesorder
3526 )
3533 )
3527
3534
3528 if nodesorder is None and not self.delta_config.general_delta:
3535 if nodesorder is None and not self.delta_config.general_delta:
3529 nodesorder = b'storage'
3536 nodesorder = b'storage'
3530
3537
3531 if (
3538 if (
3532 not self._storedeltachains
3539 not self._storedeltachains
3533 and deltamode != repository.CG_DELTAMODE_PREV
3540 and deltamode != repository.CG_DELTAMODE_PREV
3534 ):
3541 ):
3535 deltamode = repository.CG_DELTAMODE_FULL
3542 deltamode = repository.CG_DELTAMODE_FULL
3536
3543
3537 return storageutil.emitrevisions(
3544 return storageutil.emitrevisions(
3538 self,
3545 self,
3539 nodes,
3546 nodes,
3540 nodesorder,
3547 nodesorder,
3541 revlogrevisiondelta,
3548 revlogrevisiondelta,
3542 deltaparentfn=self.deltaparent,
3549 deltaparentfn=self.deltaparent,
3543 candeltafn=self._candelta,
3550 candeltafn=self._candelta,
3544 rawsizefn=self.rawsize,
3551 rawsizefn=self.rawsize,
3545 revdifffn=self.revdiff,
3552 revdifffn=self.revdiff,
3546 flagsfn=self.flags,
3553 flagsfn=self.flags,
3547 deltamode=deltamode,
3554 deltamode=deltamode,
3548 revisiondata=revisiondata,
3555 revisiondata=revisiondata,
3549 assumehaveparentrevisions=assumehaveparentrevisions,
3556 assumehaveparentrevisions=assumehaveparentrevisions,
3550 sidedata_helpers=sidedata_helpers,
3557 sidedata_helpers=sidedata_helpers,
3551 debug_info=debug_info,
3558 debug_info=debug_info,
3552 )
3559 )
3553
3560
3554 DELTAREUSEALWAYS = b'always'
3561 DELTAREUSEALWAYS = b'always'
3555 DELTAREUSESAMEREVS = b'samerevs'
3562 DELTAREUSESAMEREVS = b'samerevs'
3556 DELTAREUSENEVER = b'never'
3563 DELTAREUSENEVER = b'never'
3557
3564
3558 DELTAREUSEFULLADD = b'fulladd'
3565 DELTAREUSEFULLADD = b'fulladd'
3559
3566
3560 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3567 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3561
3568
3562 def clone(
3569 def clone(
3563 self,
3570 self,
3564 tr,
3571 tr,
3565 destrevlog,
3572 destrevlog,
3566 addrevisioncb=None,
3573 addrevisioncb=None,
3567 deltareuse=DELTAREUSESAMEREVS,
3574 deltareuse=DELTAREUSESAMEREVS,
3568 forcedeltabothparents=None,
3575 forcedeltabothparents=None,
3569 sidedata_helpers=None,
3576 sidedata_helpers=None,
3570 ):
3577 ):
3571 """Copy this revlog to another, possibly with format changes.
3578 """Copy this revlog to another, possibly with format changes.
3572
3579
3573 The destination revlog will contain the same revisions and nodes.
3580 The destination revlog will contain the same revisions and nodes.
3574 However, it may not be bit-for-bit identical due to e.g. delta encoding
3581 However, it may not be bit-for-bit identical due to e.g. delta encoding
3575 differences.
3582 differences.
3576
3583
3577 The ``deltareuse`` argument control how deltas from the existing revlog
3584 The ``deltareuse`` argument control how deltas from the existing revlog
3578 are preserved in the destination revlog. The argument can have the
3585 are preserved in the destination revlog. The argument can have the
3579 following values:
3586 following values:
3580
3587
3581 DELTAREUSEALWAYS
3588 DELTAREUSEALWAYS
3582 Deltas will always be reused (if possible), even if the destination
3589 Deltas will always be reused (if possible), even if the destination
3583 revlog would not select the same revisions for the delta. This is the
3590 revlog would not select the same revisions for the delta. This is the
3584 fastest mode of operation.
3591 fastest mode of operation.
3585 DELTAREUSESAMEREVS
3592 DELTAREUSESAMEREVS
3586 Deltas will be reused if the destination revlog would pick the same
3593 Deltas will be reused if the destination revlog would pick the same
3587 revisions for the delta. This mode strikes a balance between speed
3594 revisions for the delta. This mode strikes a balance between speed
3588 and optimization.
3595 and optimization.
3589 DELTAREUSENEVER
3596 DELTAREUSENEVER
3590 Deltas will never be reused. This is the slowest mode of execution.
3597 Deltas will never be reused. This is the slowest mode of execution.
3591 This mode can be used to recompute deltas (e.g. if the diff/delta
3598 This mode can be used to recompute deltas (e.g. if the diff/delta
3592 algorithm changes).
3599 algorithm changes).
3593 DELTAREUSEFULLADD
3600 DELTAREUSEFULLADD
3594 Revision will be re-added as if their were new content. This is
3601 Revision will be re-added as if their were new content. This is
3595 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3602 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3596 eg: large file detection and handling.
3603 eg: large file detection and handling.
3597
3604
3598 Delta computation can be slow, so the choice of delta reuse policy can
3605 Delta computation can be slow, so the choice of delta reuse policy can
3599 significantly affect run time.
3606 significantly affect run time.
3600
3607
3601 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3608 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3602 two extremes. Deltas will be reused if they are appropriate. But if the
3609 two extremes. Deltas will be reused if they are appropriate. But if the
3603 delta could choose a better revision, it will do so. This means if you
3610 delta could choose a better revision, it will do so. This means if you
3604 are converting a non-generaldelta revlog to a generaldelta revlog,
3611 are converting a non-generaldelta revlog to a generaldelta revlog,
3605 deltas will be recomputed if the delta's parent isn't a parent of the
3612 deltas will be recomputed if the delta's parent isn't a parent of the
3606 revision.
3613 revision.
3607
3614
3608 In addition to the delta policy, the ``forcedeltabothparents``
3615 In addition to the delta policy, the ``forcedeltabothparents``
3609 argument controls whether to force compute deltas against both parents
3616 argument controls whether to force compute deltas against both parents
3610 for merges. By default, the current default is used.
3617 for merges. By default, the current default is used.
3611
3618
3612 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3619 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3613 `sidedata_helpers`.
3620 `sidedata_helpers`.
3614 """
3621 """
3615 if deltareuse not in self.DELTAREUSEALL:
3622 if deltareuse not in self.DELTAREUSEALL:
3616 raise ValueError(
3623 raise ValueError(
3617 _(b'value for deltareuse invalid: %s') % deltareuse
3624 _(b'value for deltareuse invalid: %s') % deltareuse
3618 )
3625 )
3619
3626
3620 if len(destrevlog):
3627 if len(destrevlog):
3621 raise ValueError(_(b'destination revlog is not empty'))
3628 raise ValueError(_(b'destination revlog is not empty'))
3622
3629
3623 if getattr(self, 'filteredrevs', None):
3630 if getattr(self, 'filteredrevs', None):
3624 raise ValueError(_(b'source revlog has filtered revisions'))
3631 raise ValueError(_(b'source revlog has filtered revisions'))
3625 if getattr(destrevlog, 'filteredrevs', None):
3632 if getattr(destrevlog, 'filteredrevs', None):
3626 raise ValueError(_(b'destination revlog has filtered revisions'))
3633 raise ValueError(_(b'destination revlog has filtered revisions'))
3627
3634
3628 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3635 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3629 # if possible.
3636 # if possible.
3630 old_delta_config = destrevlog.delta_config
3637 old_delta_config = destrevlog.delta_config
3631 destrevlog.delta_config = destrevlog.delta_config.copy()
3638 destrevlog.delta_config = destrevlog.delta_config.copy()
3632
3639
3633 try:
3640 try:
3634 if deltareuse == self.DELTAREUSEALWAYS:
3641 if deltareuse == self.DELTAREUSEALWAYS:
3635 destrevlog.delta_config.lazy_delta_base = True
3642 destrevlog.delta_config.lazy_delta_base = True
3636 destrevlog.delta_config.lazy_delta = True
3643 destrevlog.delta_config.lazy_delta = True
3637 elif deltareuse == self.DELTAREUSESAMEREVS:
3644 elif deltareuse == self.DELTAREUSESAMEREVS:
3638 destrevlog.delta_config.lazy_delta_base = False
3645 destrevlog.delta_config.lazy_delta_base = False
3639 destrevlog.delta_config.lazy_delta = True
3646 destrevlog.delta_config.lazy_delta = True
3640 elif deltareuse == self.DELTAREUSENEVER:
3647 elif deltareuse == self.DELTAREUSENEVER:
3641 destrevlog.delta_config.lazy_delta_base = False
3648 destrevlog.delta_config.lazy_delta_base = False
3642 destrevlog.delta_config.lazy_delta = False
3649 destrevlog.delta_config.lazy_delta = False
3643
3650
3644 delta_both_parents = (
3651 delta_both_parents = (
3645 forcedeltabothparents or old_delta_config.delta_both_parents
3652 forcedeltabothparents or old_delta_config.delta_both_parents
3646 )
3653 )
3647 destrevlog.delta_config.delta_both_parents = delta_both_parents
3654 destrevlog.delta_config.delta_both_parents = delta_both_parents
3648
3655
3649 with self.reading(), destrevlog._writing(tr):
3656 with self.reading(), destrevlog._writing(tr):
3650 self._clone(
3657 self._clone(
3651 tr,
3658 tr,
3652 destrevlog,
3659 destrevlog,
3653 addrevisioncb,
3660 addrevisioncb,
3654 deltareuse,
3661 deltareuse,
3655 forcedeltabothparents,
3662 forcedeltabothparents,
3656 sidedata_helpers,
3663 sidedata_helpers,
3657 )
3664 )
3658
3665
3659 finally:
3666 finally:
3660 destrevlog.delta_config = old_delta_config
3667 destrevlog.delta_config = old_delta_config
3661
3668
3662 def _clone(
3669 def _clone(
3663 self,
3670 self,
3664 tr,
3671 tr,
3665 destrevlog,
3672 destrevlog,
3666 addrevisioncb,
3673 addrevisioncb,
3667 deltareuse,
3674 deltareuse,
3668 forcedeltabothparents,
3675 forcedeltabothparents,
3669 sidedata_helpers,
3676 sidedata_helpers,
3670 ):
3677 ):
3671 """perform the core duty of `revlog.clone` after parameter processing"""
3678 """perform the core duty of `revlog.clone` after parameter processing"""
3672 write_debug = None
3679 write_debug = None
3673 if self.delta_config.debug_delta:
3680 if self.delta_config.debug_delta:
3674 write_debug = tr._report
3681 write_debug = tr._report
3675 deltacomputer = deltautil.deltacomputer(
3682 deltacomputer = deltautil.deltacomputer(
3676 destrevlog,
3683 destrevlog,
3677 write_debug=write_debug,
3684 write_debug=write_debug,
3678 )
3685 )
3679 index = self.index
3686 index = self.index
3680 for rev in self:
3687 for rev in self:
3681 entry = index[rev]
3688 entry = index[rev]
3682
3689
3683 # Some classes override linkrev to take filtered revs into
3690 # Some classes override linkrev to take filtered revs into
3684 # account. Use raw entry from index.
3691 # account. Use raw entry from index.
3685 flags = entry[0] & 0xFFFF
3692 flags = entry[0] & 0xFFFF
3686 linkrev = entry[4]
3693 linkrev = entry[4]
3687 p1 = index[entry[5]][7]
3694 p1 = index[entry[5]][7]
3688 p2 = index[entry[6]][7]
3695 p2 = index[entry[6]][7]
3689 node = entry[7]
3696 node = entry[7]
3690
3697
3691 # (Possibly) reuse the delta from the revlog if allowed and
3698 # (Possibly) reuse the delta from the revlog if allowed and
3692 # the revlog chunk is a delta.
3699 # the revlog chunk is a delta.
3693 cachedelta = None
3700 cachedelta = None
3694 rawtext = None
3701 rawtext = None
3695 if deltareuse == self.DELTAREUSEFULLADD:
3702 if deltareuse == self.DELTAREUSEFULLADD:
3696 text = self._revisiondata(rev)
3703 text = self._revisiondata(rev)
3697 sidedata = self.sidedata(rev)
3704 sidedata = self.sidedata(rev)
3698
3705
3699 if sidedata_helpers is not None:
3706 if sidedata_helpers is not None:
3700 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3707 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3701 self, sidedata_helpers, sidedata, rev
3708 self, sidedata_helpers, sidedata, rev
3702 )
3709 )
3703 flags = flags | new_flags[0] & ~new_flags[1]
3710 flags = flags | new_flags[0] & ~new_flags[1]
3704
3711
3705 destrevlog.addrevision(
3712 destrevlog.addrevision(
3706 text,
3713 text,
3707 tr,
3714 tr,
3708 linkrev,
3715 linkrev,
3709 p1,
3716 p1,
3710 p2,
3717 p2,
3711 cachedelta=cachedelta,
3718 cachedelta=cachedelta,
3712 node=node,
3719 node=node,
3713 flags=flags,
3720 flags=flags,
3714 deltacomputer=deltacomputer,
3721 deltacomputer=deltacomputer,
3715 sidedata=sidedata,
3722 sidedata=sidedata,
3716 )
3723 )
3717 else:
3724 else:
3718 if destrevlog.delta_config.lazy_delta:
3725 if destrevlog.delta_config.lazy_delta:
3719 dp = self.deltaparent(rev)
3726 dp = self.deltaparent(rev)
3720 if dp != nullrev:
3727 if dp != nullrev:
3721 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3728 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3722
3729
3723 sidedata = None
3730 sidedata = None
3724 if not cachedelta:
3731 if not cachedelta:
3725 try:
3732 try:
3726 rawtext = self._revisiondata(rev)
3733 rawtext = self._revisiondata(rev)
3727 except error.CensoredNodeError as censored:
3734 except error.CensoredNodeError as censored:
3728 assert flags & REVIDX_ISCENSORED
3735 assert flags & REVIDX_ISCENSORED
3729 rawtext = censored.tombstone
3736 rawtext = censored.tombstone
3730 sidedata = self.sidedata(rev)
3737 sidedata = self.sidedata(rev)
3731 if sidedata is None:
3738 if sidedata is None:
3732 sidedata = self.sidedata(rev)
3739 sidedata = self.sidedata(rev)
3733
3740
3734 if sidedata_helpers is not None:
3741 if sidedata_helpers is not None:
3735 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3742 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3736 self, sidedata_helpers, sidedata, rev
3743 self, sidedata_helpers, sidedata, rev
3737 )
3744 )
3738 flags = flags | new_flags[0] & ~new_flags[1]
3745 flags = flags | new_flags[0] & ~new_flags[1]
3739
3746
3740 destrevlog._addrevision(
3747 destrevlog._addrevision(
3741 node,
3748 node,
3742 rawtext,
3749 rawtext,
3743 tr,
3750 tr,
3744 linkrev,
3751 linkrev,
3745 p1,
3752 p1,
3746 p2,
3753 p2,
3747 flags,
3754 flags,
3748 cachedelta,
3755 cachedelta,
3749 deltacomputer=deltacomputer,
3756 deltacomputer=deltacomputer,
3750 sidedata=sidedata,
3757 sidedata=sidedata,
3751 )
3758 )
3752
3759
3753 if addrevisioncb:
3760 if addrevisioncb:
3754 addrevisioncb(self, rev, node)
3761 addrevisioncb(self, rev, node)
3755
3762
3756 def censorrevision(self, tr, censornode, tombstone=b''):
3763 def censorrevision(self, tr, censornode, tombstone=b''):
3757 if self._format_version == REVLOGV0:
3764 if self._format_version == REVLOGV0:
3758 raise error.RevlogError(
3765 raise error.RevlogError(
3759 _(b'cannot censor with version %d revlogs')
3766 _(b'cannot censor with version %d revlogs')
3760 % self._format_version
3767 % self._format_version
3761 )
3768 )
3762 elif self._format_version == REVLOGV1:
3769 elif self._format_version == REVLOGV1:
3763 rewrite.v1_censor(self, tr, censornode, tombstone)
3770 rewrite.v1_censor(self, tr, censornode, tombstone)
3764 else:
3771 else:
3765 rewrite.v2_censor(self, tr, censornode, tombstone)
3772 rewrite.v2_censor(self, tr, censornode, tombstone)
3766
3773
3767 def verifyintegrity(self, state):
3774 def verifyintegrity(self, state):
3768 """Verifies the integrity of the revlog.
3775 """Verifies the integrity of the revlog.
3769
3776
3770 Yields ``revlogproblem`` instances describing problems that are
3777 Yields ``revlogproblem`` instances describing problems that are
3771 found.
3778 found.
3772 """
3779 """
3773 dd, di = self.checksize()
3780 dd, di = self.checksize()
3774 if dd:
3781 if dd:
3775 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3782 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3776 if di:
3783 if di:
3777 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3784 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3778
3785
3779 version = self._format_version
3786 version = self._format_version
3780
3787
3781 # The verifier tells us what version revlog we should be.
3788 # The verifier tells us what version revlog we should be.
3782 if version != state[b'expectedversion']:
3789 if version != state[b'expectedversion']:
3783 yield revlogproblem(
3790 yield revlogproblem(
3784 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3791 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3785 % (self.display_id, version, state[b'expectedversion'])
3792 % (self.display_id, version, state[b'expectedversion'])
3786 )
3793 )
3787
3794
3788 state[b'skipread'] = set()
3795 state[b'skipread'] = set()
3789 state[b'safe_renamed'] = set()
3796 state[b'safe_renamed'] = set()
3790
3797
3791 for rev in self:
3798 for rev in self:
3792 node = self.node(rev)
3799 node = self.node(rev)
3793
3800
3794 # Verify contents. 4 cases to care about:
3801 # Verify contents. 4 cases to care about:
3795 #
3802 #
3796 # common: the most common case
3803 # common: the most common case
3797 # rename: with a rename
3804 # rename: with a rename
3798 # meta: file content starts with b'\1\n', the metadata
3805 # meta: file content starts with b'\1\n', the metadata
3799 # header defined in filelog.py, but without a rename
3806 # header defined in filelog.py, but without a rename
3800 # ext: content stored externally
3807 # ext: content stored externally
3801 #
3808 #
3802 # More formally, their differences are shown below:
3809 # More formally, their differences are shown below:
3803 #
3810 #
3804 # | common | rename | meta | ext
3811 # | common | rename | meta | ext
3805 # -------------------------------------------------------
3812 # -------------------------------------------------------
3806 # flags() | 0 | 0 | 0 | not 0
3813 # flags() | 0 | 0 | 0 | not 0
3807 # renamed() | False | True | False | ?
3814 # renamed() | False | True | False | ?
3808 # rawtext[0:2]=='\1\n'| False | True | True | ?
3815 # rawtext[0:2]=='\1\n'| False | True | True | ?
3809 #
3816 #
3810 # "rawtext" means the raw text stored in revlog data, which
3817 # "rawtext" means the raw text stored in revlog data, which
3811 # could be retrieved by "rawdata(rev)". "text"
3818 # could be retrieved by "rawdata(rev)". "text"
3812 # mentioned below is "revision(rev)".
3819 # mentioned below is "revision(rev)".
3813 #
3820 #
3814 # There are 3 different lengths stored physically:
3821 # There are 3 different lengths stored physically:
3815 # 1. L1: rawsize, stored in revlog index
3822 # 1. L1: rawsize, stored in revlog index
3816 # 2. L2: len(rawtext), stored in revlog data
3823 # 2. L2: len(rawtext), stored in revlog data
3817 # 3. L3: len(text), stored in revlog data if flags==0, or
3824 # 3. L3: len(text), stored in revlog data if flags==0, or
3818 # possibly somewhere else if flags!=0
3825 # possibly somewhere else if flags!=0
3819 #
3826 #
3820 # L1 should be equal to L2. L3 could be different from them.
3827 # L1 should be equal to L2. L3 could be different from them.
3821 # "text" may or may not affect commit hash depending on flag
3828 # "text" may or may not affect commit hash depending on flag
3822 # processors (see flagutil.addflagprocessor).
3829 # processors (see flagutil.addflagprocessor).
3823 #
3830 #
3824 # | common | rename | meta | ext
3831 # | common | rename | meta | ext
3825 # -------------------------------------------------
3832 # -------------------------------------------------
3826 # rawsize() | L1 | L1 | L1 | L1
3833 # rawsize() | L1 | L1 | L1 | L1
3827 # size() | L1 | L2-LM | L1(*) | L1 (?)
3834 # size() | L1 | L2-LM | L1(*) | L1 (?)
3828 # len(rawtext) | L2 | L2 | L2 | L2
3835 # len(rawtext) | L2 | L2 | L2 | L2
3829 # len(text) | L2 | L2 | L2 | L3
3836 # len(text) | L2 | L2 | L2 | L3
3830 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3837 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3831 #
3838 #
3832 # LM: length of metadata, depending on rawtext
3839 # LM: length of metadata, depending on rawtext
3833 # (*): not ideal, see comment in filelog.size
3840 # (*): not ideal, see comment in filelog.size
3834 # (?): could be "- len(meta)" if the resolved content has
3841 # (?): could be "- len(meta)" if the resolved content has
3835 # rename metadata
3842 # rename metadata
3836 #
3843 #
3837 # Checks needed to be done:
3844 # Checks needed to be done:
3838 # 1. length check: L1 == L2, in all cases.
3845 # 1. length check: L1 == L2, in all cases.
3839 # 2. hash check: depending on flag processor, we may need to
3846 # 2. hash check: depending on flag processor, we may need to
3840 # use either "text" (external), or "rawtext" (in revlog).
3847 # use either "text" (external), or "rawtext" (in revlog).
3841
3848
3842 try:
3849 try:
3843 skipflags = state.get(b'skipflags', 0)
3850 skipflags = state.get(b'skipflags', 0)
3844 if skipflags:
3851 if skipflags:
3845 skipflags &= self.flags(rev)
3852 skipflags &= self.flags(rev)
3846
3853
3847 _verify_revision(self, skipflags, state, node)
3854 _verify_revision(self, skipflags, state, node)
3848
3855
3849 l1 = self.rawsize(rev)
3856 l1 = self.rawsize(rev)
3850 l2 = len(self.rawdata(node))
3857 l2 = len(self.rawdata(node))
3851
3858
3852 if l1 != l2:
3859 if l1 != l2:
3853 yield revlogproblem(
3860 yield revlogproblem(
3854 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3861 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3855 node=node,
3862 node=node,
3856 )
3863 )
3857
3864
3858 except error.CensoredNodeError:
3865 except error.CensoredNodeError:
3859 if state[b'erroroncensored']:
3866 if state[b'erroroncensored']:
3860 yield revlogproblem(
3867 yield revlogproblem(
3861 error=_(b'censored file data'), node=node
3868 error=_(b'censored file data'), node=node
3862 )
3869 )
3863 state[b'skipread'].add(node)
3870 state[b'skipread'].add(node)
3864 except Exception as e:
3871 except Exception as e:
3865 yield revlogproblem(
3872 yield revlogproblem(
3866 error=_(b'unpacking %s: %s')
3873 error=_(b'unpacking %s: %s')
3867 % (short(node), stringutil.forcebytestr(e)),
3874 % (short(node), stringutil.forcebytestr(e)),
3868 node=node,
3875 node=node,
3869 )
3876 )
3870 state[b'skipread'].add(node)
3877 state[b'skipread'].add(node)
3871
3878
3872 def storageinfo(
3879 def storageinfo(
3873 self,
3880 self,
3874 exclusivefiles=False,
3881 exclusivefiles=False,
3875 sharedfiles=False,
3882 sharedfiles=False,
3876 revisionscount=False,
3883 revisionscount=False,
3877 trackedsize=False,
3884 trackedsize=False,
3878 storedsize=False,
3885 storedsize=False,
3879 ):
3886 ):
3880 d = {}
3887 d = {}
3881
3888
3882 if exclusivefiles:
3889 if exclusivefiles:
3883 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3890 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3884 if not self._inline:
3891 if not self._inline:
3885 d[b'exclusivefiles'].append((self.opener, self._datafile))
3892 d[b'exclusivefiles'].append((self.opener, self._datafile))
3886
3893
3887 if sharedfiles:
3894 if sharedfiles:
3888 d[b'sharedfiles'] = []
3895 d[b'sharedfiles'] = []
3889
3896
3890 if revisionscount:
3897 if revisionscount:
3891 d[b'revisionscount'] = len(self)
3898 d[b'revisionscount'] = len(self)
3892
3899
3893 if trackedsize:
3900 if trackedsize:
3894 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3901 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3895
3902
3896 if storedsize:
3903 if storedsize:
3897 d[b'storedsize'] = sum(
3904 d[b'storedsize'] = sum(
3898 self.opener.stat(path).st_size for path in self.files()
3905 self.opener.stat(path).st_size for path in self.files()
3899 )
3906 )
3900
3907
3901 return d
3908 return d
3902
3909
3903 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3910 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3904 if not self.feature_config.has_side_data:
3911 if not self.feature_config.has_side_data:
3905 return
3912 return
3906 # revlog formats with sidedata support does not support inline
3913 # revlog formats with sidedata support does not support inline
3907 assert not self._inline
3914 assert not self._inline
3908 if not helpers[1] and not helpers[2]:
3915 if not helpers[1] and not helpers[2]:
3909 # Nothing to generate or remove
3916 # Nothing to generate or remove
3910 return
3917 return
3911
3918
3912 new_entries = []
3919 new_entries = []
3913 # append the new sidedata
3920 # append the new sidedata
3914 with self._writing(transaction):
3921 with self._writing(transaction):
3915 ifh, dfh, sdfh = self._inner._writinghandles
3922 ifh, dfh, sdfh = self._inner._writinghandles
3916 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3923 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3917
3924
3918 current_offset = sdfh.tell()
3925 current_offset = sdfh.tell()
3919 for rev in range(startrev, endrev + 1):
3926 for rev in range(startrev, endrev + 1):
3920 entry = self.index[rev]
3927 entry = self.index[rev]
3921 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3928 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3922 store=self,
3929 store=self,
3923 sidedata_helpers=helpers,
3930 sidedata_helpers=helpers,
3924 sidedata={},
3931 sidedata={},
3925 rev=rev,
3932 rev=rev,
3926 )
3933 )
3927
3934
3928 serialized_sidedata = sidedatautil.serialize_sidedata(
3935 serialized_sidedata = sidedatautil.serialize_sidedata(
3929 new_sidedata
3936 new_sidedata
3930 )
3937 )
3931
3938
3932 sidedata_compression_mode = COMP_MODE_INLINE
3939 sidedata_compression_mode = COMP_MODE_INLINE
3933 if serialized_sidedata and self.feature_config.has_side_data:
3940 if serialized_sidedata and self.feature_config.has_side_data:
3934 sidedata_compression_mode = COMP_MODE_PLAIN
3941 sidedata_compression_mode = COMP_MODE_PLAIN
3935 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3942 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3936 if (
3943 if (
3937 h != b'u'
3944 h != b'u'
3938 and comp_sidedata[0] != b'\0'
3945 and comp_sidedata[0] != b'\0'
3939 and len(comp_sidedata) < len(serialized_sidedata)
3946 and len(comp_sidedata) < len(serialized_sidedata)
3940 ):
3947 ):
3941 assert not h
3948 assert not h
3942 if (
3949 if (
3943 comp_sidedata[0]
3950 comp_sidedata[0]
3944 == self._docket.default_compression_header
3951 == self._docket.default_compression_header
3945 ):
3952 ):
3946 sidedata_compression_mode = COMP_MODE_DEFAULT
3953 sidedata_compression_mode = COMP_MODE_DEFAULT
3947 serialized_sidedata = comp_sidedata
3954 serialized_sidedata = comp_sidedata
3948 else:
3955 else:
3949 sidedata_compression_mode = COMP_MODE_INLINE
3956 sidedata_compression_mode = COMP_MODE_INLINE
3950 serialized_sidedata = comp_sidedata
3957 serialized_sidedata = comp_sidedata
3951 if entry[8] != 0 or entry[9] != 0:
3958 if entry[8] != 0 or entry[9] != 0:
3952 # rewriting entries that already have sidedata is not
3959 # rewriting entries that already have sidedata is not
3953 # supported yet, because it introduces garbage data in the
3960 # supported yet, because it introduces garbage data in the
3954 # revlog.
3961 # revlog.
3955 msg = b"rewriting existing sidedata is not supported yet"
3962 msg = b"rewriting existing sidedata is not supported yet"
3956 raise error.Abort(msg)
3963 raise error.Abort(msg)
3957
3964
3958 # Apply (potential) flags to add and to remove after running
3965 # Apply (potential) flags to add and to remove after running
3959 # the sidedata helpers
3966 # the sidedata helpers
3960 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3967 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3961 entry_update = (
3968 entry_update = (
3962 current_offset,
3969 current_offset,
3963 len(serialized_sidedata),
3970 len(serialized_sidedata),
3964 new_offset_flags,
3971 new_offset_flags,
3965 sidedata_compression_mode,
3972 sidedata_compression_mode,
3966 )
3973 )
3967
3974
3968 # the sidedata computation might have move the file cursors around
3975 # the sidedata computation might have move the file cursors around
3969 sdfh.seek(current_offset, os.SEEK_SET)
3976 sdfh.seek(current_offset, os.SEEK_SET)
3970 sdfh.write(serialized_sidedata)
3977 sdfh.write(serialized_sidedata)
3971 new_entries.append(entry_update)
3978 new_entries.append(entry_update)
3972 current_offset += len(serialized_sidedata)
3979 current_offset += len(serialized_sidedata)
3973 self._docket.sidedata_end = sdfh.tell()
3980 self._docket.sidedata_end = sdfh.tell()
3974
3981
3975 # rewrite the new index entries
3982 # rewrite the new index entries
3976 ifh.seek(startrev * self.index.entry_size)
3983 ifh.seek(startrev * self.index.entry_size)
3977 for i, e in enumerate(new_entries):
3984 for i, e in enumerate(new_entries):
3978 rev = startrev + i
3985 rev = startrev + i
3979 self.index.replace_sidedata_info(rev, *e)
3986 self.index.replace_sidedata_info(rev, *e)
3980 packed = self.index.entry_binary(rev)
3987 packed = self.index.entry_binary(rev)
3981 if rev == 0 and self._docket is None:
3988 if rev == 0 and self._docket is None:
3982 header = self._format_flags | self._format_version
3989 header = self._format_flags | self._format_version
3983 header = self.index.pack_header(header)
3990 header = self.index.pack_header(header)
3984 packed = header + packed
3991 packed = header + packed
3985 ifh.write(packed)
3992 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now