##// END OF EJS Templates
revlog: consolidate cache invalidation within the inner objet...
marmoute -
r51994:045b5f74 default
parent child Browse files
Show More
@@ -1,4041 +1,4042 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of inline data embedded into a revlog
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class _Config:
244 class _Config:
245 def copy(self):
245 def copy(self):
246 return self.__class__(**self.__dict__)
246 return self.__class__(**self.__dict__)
247
247
248
248
249 @attr.s()
249 @attr.s()
250 class FeatureConfig(_Config):
250 class FeatureConfig(_Config):
251 """Hold configuration values about the available revlog features"""
251 """Hold configuration values about the available revlog features"""
252
252
253 # the default compression engine
253 # the default compression engine
254 compression_engine = attr.ib(default=b'zlib')
254 compression_engine = attr.ib(default=b'zlib')
255 # compression engines options
255 # compression engines options
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257
257
258 # can we use censor on this revlog
258 # can we use censor on this revlog
259 censorable = attr.ib(default=False)
259 censorable = attr.ib(default=False)
260 # does this revlog use the "side data" feature
260 # does this revlog use the "side data" feature
261 has_side_data = attr.ib(default=False)
261 has_side_data = attr.ib(default=False)
262 # might remove rank configuration once the computation has no impact
262 # might remove rank configuration once the computation has no impact
263 compute_rank = attr.ib(default=False)
263 compute_rank = attr.ib(default=False)
264 # parent order is supposed to be semantically irrelevant, so we
264 # parent order is supposed to be semantically irrelevant, so we
265 # normally resort parents to ensure that the first parent is non-null,
265 # normally resort parents to ensure that the first parent is non-null,
266 # if there is a non-null parent at all.
266 # if there is a non-null parent at all.
267 # filelog abuses the parent order as flag to mark some instances of
267 # filelog abuses the parent order as flag to mark some instances of
268 # meta-encoded files, so allow it to disable this behavior.
268 # meta-encoded files, so allow it to disable this behavior.
269 canonical_parent_order = attr.ib(default=False)
269 canonical_parent_order = attr.ib(default=False)
270 # can ellipsis commit be used
270 # can ellipsis commit be used
271 enable_ellipsis = attr.ib(default=False)
271 enable_ellipsis = attr.ib(default=False)
272
272
273 def copy(self):
273 def copy(self):
274 new = super().copy()
274 new = super().copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
276 return new
276 return new
277
277
278
278
279 @attr.s()
279 @attr.s()
280 class DataConfig(_Config):
280 class DataConfig(_Config):
281 """Hold configuration value about how the revlog data are read"""
281 """Hold configuration value about how the revlog data are read"""
282
282
283 # should we try to open the "pending" version of the revlog
283 # should we try to open the "pending" version of the revlog
284 try_pending = attr.ib(default=False)
284 try_pending = attr.ib(default=False)
285 # should we try to open the "splitted" version of the revlog
285 # should we try to open the "splitted" version of the revlog
286 try_split = attr.ib(default=False)
286 try_split = attr.ib(default=False)
287 # When True, indexfile should be opened with checkambig=True at writing,
287 # When True, indexfile should be opened with checkambig=True at writing,
288 # to avoid file stat ambiguity.
288 # to avoid file stat ambiguity.
289 check_ambig = attr.ib(default=False)
289 check_ambig = attr.ib(default=False)
290
290
291 # If true, use mmap instead of reading to deal with large index
291 # If true, use mmap instead of reading to deal with large index
292 mmap_large_index = attr.ib(default=False)
292 mmap_large_index = attr.ib(default=False)
293 # how much data is large
293 # how much data is large
294 mmap_index_threshold = attr.ib(default=None)
294 mmap_index_threshold = attr.ib(default=None)
295 # How much data to read and cache into the raw revlog data cache.
295 # How much data to read and cache into the raw revlog data cache.
296 chunk_cache_size = attr.ib(default=65536)
296 chunk_cache_size = attr.ib(default=65536)
297
297
298 # Allow sparse reading of the revlog data
298 # Allow sparse reading of the revlog data
299 with_sparse_read = attr.ib(default=False)
299 with_sparse_read = attr.ib(default=False)
300 # minimal density of a sparse read chunk
300 # minimal density of a sparse read chunk
301 sr_density_threshold = attr.ib(default=0.50)
301 sr_density_threshold = attr.ib(default=0.50)
302 # minimal size of data we skip when performing sparse read
302 # minimal size of data we skip when performing sparse read
303 sr_min_gap_size = attr.ib(default=262144)
303 sr_min_gap_size = attr.ib(default=262144)
304
304
305 # are delta encoded against arbitrary bases.
305 # are delta encoded against arbitrary bases.
306 generaldelta = attr.ib(default=False)
306 generaldelta = attr.ib(default=False)
307
307
308
308
309 @attr.s()
309 @attr.s()
310 class DeltaConfig(_Config):
310 class DeltaConfig(_Config):
311 """Hold configuration value about how new delta are computed
311 """Hold configuration value about how new delta are computed
312
312
313 Some attributes are duplicated from DataConfig to help havign each object
313 Some attributes are duplicated from DataConfig to help havign each object
314 self contained.
314 self contained.
315 """
315 """
316
316
317 # can delta be encoded against arbitrary bases.
317 # can delta be encoded against arbitrary bases.
318 general_delta = attr.ib(default=False)
318 general_delta = attr.ib(default=False)
319 # Allow sparse writing of the revlog data
319 # Allow sparse writing of the revlog data
320 sparse_revlog = attr.ib(default=False)
320 sparse_revlog = attr.ib(default=False)
321 # maximum length of a delta chain
321 # maximum length of a delta chain
322 max_chain_len = attr.ib(default=None)
322 max_chain_len = attr.ib(default=None)
323 # Maximum distance between delta chain base start and end
323 # Maximum distance between delta chain base start and end
324 max_deltachain_span = attr.ib(default=-1)
324 max_deltachain_span = attr.ib(default=-1)
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 # compression for the data content.
326 # compression for the data content.
327 upper_bound_comp = attr.ib(default=None)
327 upper_bound_comp = attr.ib(default=None)
328 # Should we try a delta against both parent
328 # Should we try a delta against both parent
329 delta_both_parents = attr.ib(default=True)
329 delta_both_parents = attr.ib(default=True)
330 # Test delta base candidate group by chunk of this maximal size.
330 # Test delta base candidate group by chunk of this maximal size.
331 candidate_group_chunk_size = attr.ib(default=0)
331 candidate_group_chunk_size = attr.ib(default=0)
332 # Should we display debug information about delta computation
332 # Should we display debug information about delta computation
333 debug_delta = attr.ib(default=False)
333 debug_delta = attr.ib(default=False)
334 # trust incoming delta by default
334 # trust incoming delta by default
335 lazy_delta = attr.ib(default=True)
335 lazy_delta = attr.ib(default=True)
336 # trust the base of incoming delta by default
336 # trust the base of incoming delta by default
337 lazy_delta_base = attr.ib(default=False)
337 lazy_delta_base = attr.ib(default=False)
338
338
339
339
340 class _InnerRevlog:
340 class _InnerRevlog:
341 """An inner layer of the revlog object
341 """An inner layer of the revlog object
342
342
343 That layer exist to be able to delegate some operation to Rust, its
343 That layer exist to be able to delegate some operation to Rust, its
344 boundaries are arbitrary and based on what we can delegate to Rust.
344 boundaries are arbitrary and based on what we can delegate to Rust.
345 """
345 """
346
346
347 def __init__(
347 def __init__(
348 self,
348 self,
349 opener,
349 opener,
350 index,
350 index,
351 index_file,
351 index_file,
352 data_file,
352 data_file,
353 sidedata_file,
353 sidedata_file,
354 inline,
354 inline,
355 data_config,
355 data_config,
356 delta_config,
356 delta_config,
357 feature_config,
357 feature_config,
358 chunk_cache,
358 chunk_cache,
359 default_compression_header,
359 default_compression_header,
360 ):
360 ):
361 self.opener = opener
361 self.opener = opener
362 self.index = index
362 self.index = index
363
363
364 self.__index_file = index_file
364 self.__index_file = index_file
365 self.data_file = data_file
365 self.data_file = data_file
366 self.sidedata_file = sidedata_file
366 self.sidedata_file = sidedata_file
367 self.inline = inline
367 self.inline = inline
368 self.data_config = data_config
368 self.data_config = data_config
369 self.delta_config = delta_config
369 self.delta_config = delta_config
370 self.feature_config = feature_config
370 self.feature_config = feature_config
371
371
372 self._default_compression_header = default_compression_header
372 self._default_compression_header = default_compression_header
373
373
374 # index
374 # index
375
375
376 # 3-tuple of file handles being used for active writing.
376 # 3-tuple of file handles being used for active writing.
377 self._writinghandles = None
377 self._writinghandles = None
378
378
379 self._segmentfile = randomaccessfile.randomaccessfile(
379 self._segmentfile = randomaccessfile.randomaccessfile(
380 self.opener,
380 self.opener,
381 (self.index_file if self.inline else self.data_file),
381 (self.index_file if self.inline else self.data_file),
382 self.data_config.chunk_cache_size,
382 self.data_config.chunk_cache_size,
383 chunk_cache,
383 chunk_cache,
384 )
384 )
385 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
385 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
386 self.opener,
386 self.opener,
387 self.sidedata_file,
387 self.sidedata_file,
388 self.data_config.chunk_cache_size,
388 self.data_config.chunk_cache_size,
389 )
389 )
390
390
391 # revlog header -> revlog compressor
391 # revlog header -> revlog compressor
392 self._decompressors = {}
392 self._decompressors = {}
393 # 3-tuple of (node, rev, text) for a raw revision.
393 # 3-tuple of (node, rev, text) for a raw revision.
394 self._revisioncache = None
394 self._revisioncache = None
395
395
396 @property
396 @property
397 def index_file(self):
397 def index_file(self):
398 return self.__index_file
398 return self.__index_file
399
399
400 @index_file.setter
400 @index_file.setter
401 def index_file(self, new_index_file):
401 def index_file(self, new_index_file):
402 self.__index_file = new_index_file
402 self.__index_file = new_index_file
403 if self.inline:
403 if self.inline:
404 self._segmentfile.filename = new_index_file
404 self._segmentfile.filename = new_index_file
405
405
406 def __len__(self):
406 def __len__(self):
407 return len(self.index)
407 return len(self.index)
408
408
409 def clear_cache(self):
410 self._revisioncache = None
411 self._segmentfile.clear_cache()
412 self._segmentfile_sidedata.clear_cache()
413
409 # Derived from index values.
414 # Derived from index values.
410
415
411 def start(self, rev):
416 def start(self, rev):
412 """the offset of the data chunk for this revision"""
417 """the offset of the data chunk for this revision"""
413 return int(self.index[rev][0] >> 16)
418 return int(self.index[rev][0] >> 16)
414
419
415 def length(self, rev):
420 def length(self, rev):
416 """the length of the data chunk for this revision"""
421 """the length of the data chunk for this revision"""
417 return self.index[rev][1]
422 return self.index[rev][1]
418
423
419 def end(self, rev):
424 def end(self, rev):
420 """the end of the data chunk for this revision"""
425 """the end of the data chunk for this revision"""
421 return self.start(rev) + self.length(rev)
426 return self.start(rev) + self.length(rev)
422
427
423 def deltaparent(self, rev):
428 def deltaparent(self, rev):
424 """return deltaparent of the given revision"""
429 """return deltaparent of the given revision"""
425 base = self.index[rev][3]
430 base = self.index[rev][3]
426 if base == rev:
431 if base == rev:
427 return nullrev
432 return nullrev
428 elif self.delta_config.general_delta:
433 elif self.delta_config.general_delta:
429 return base
434 return base
430 else:
435 else:
431 return rev - 1
436 return rev - 1
432
437
433 def issnapshot(self, rev):
438 def issnapshot(self, rev):
434 """tells whether rev is a snapshot"""
439 """tells whether rev is a snapshot"""
435 if not self.delta_config.sparse_revlog:
440 if not self.delta_config.sparse_revlog:
436 return self.deltaparent(rev) == nullrev
441 return self.deltaparent(rev) == nullrev
437 elif hasattr(self.index, 'issnapshot'):
442 elif hasattr(self.index, 'issnapshot'):
438 # directly assign the method to cache the testing and access
443 # directly assign the method to cache the testing and access
439 self.issnapshot = self.index.issnapshot
444 self.issnapshot = self.index.issnapshot
440 return self.issnapshot(rev)
445 return self.issnapshot(rev)
441 if rev == nullrev:
446 if rev == nullrev:
442 return True
447 return True
443 entry = self.index[rev]
448 entry = self.index[rev]
444 base = entry[3]
449 base = entry[3]
445 if base == rev:
450 if base == rev:
446 return True
451 return True
447 if base == nullrev:
452 if base == nullrev:
448 return True
453 return True
449 p1 = entry[5]
454 p1 = entry[5]
450 while self.length(p1) == 0:
455 while self.length(p1) == 0:
451 b = self.deltaparent(p1)
456 b = self.deltaparent(p1)
452 if b == p1:
457 if b == p1:
453 break
458 break
454 p1 = b
459 p1 = b
455 p2 = entry[6]
460 p2 = entry[6]
456 while self.length(p2) == 0:
461 while self.length(p2) == 0:
457 b = self.deltaparent(p2)
462 b = self.deltaparent(p2)
458 if b == p2:
463 if b == p2:
459 break
464 break
460 p2 = b
465 p2 = b
461 if base == p1 or base == p2:
466 if base == p1 or base == p2:
462 return False
467 return False
463 return self.issnapshot(base)
468 return self.issnapshot(base)
464
469
465 def _deltachain(self, rev, stoprev=None):
470 def _deltachain(self, rev, stoprev=None):
466 """Obtain the delta chain for a revision.
471 """Obtain the delta chain for a revision.
467
472
468 ``stoprev`` specifies a revision to stop at. If not specified, we
473 ``stoprev`` specifies a revision to stop at. If not specified, we
469 stop at the base of the chain.
474 stop at the base of the chain.
470
475
471 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
476 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
472 revs in ascending order and ``stopped`` is a bool indicating whether
477 revs in ascending order and ``stopped`` is a bool indicating whether
473 ``stoprev`` was hit.
478 ``stoprev`` was hit.
474 """
479 """
475 generaldelta = self.delta_config.general_delta
480 generaldelta = self.delta_config.general_delta
476 # Try C implementation.
481 # Try C implementation.
477 try:
482 try:
478 return self.index.deltachain(rev, stoprev, generaldelta)
483 return self.index.deltachain(rev, stoprev, generaldelta)
479 except AttributeError:
484 except AttributeError:
480 pass
485 pass
481
486
482 chain = []
487 chain = []
483
488
484 # Alias to prevent attribute lookup in tight loop.
489 # Alias to prevent attribute lookup in tight loop.
485 index = self.index
490 index = self.index
486
491
487 iterrev = rev
492 iterrev = rev
488 e = index[iterrev]
493 e = index[iterrev]
489 while iterrev != e[3] and iterrev != stoprev:
494 while iterrev != e[3] and iterrev != stoprev:
490 chain.append(iterrev)
495 chain.append(iterrev)
491 if generaldelta:
496 if generaldelta:
492 iterrev = e[3]
497 iterrev = e[3]
493 else:
498 else:
494 iterrev -= 1
499 iterrev -= 1
495 e = index[iterrev]
500 e = index[iterrev]
496
501
497 if iterrev == stoprev:
502 if iterrev == stoprev:
498 stopped = True
503 stopped = True
499 else:
504 else:
500 chain.append(iterrev)
505 chain.append(iterrev)
501 stopped = False
506 stopped = False
502
507
503 chain.reverse()
508 chain.reverse()
504 return chain, stopped
509 return chain, stopped
505
510
506 @util.propertycache
511 @util.propertycache
507 def _compressor(self):
512 def _compressor(self):
508 engine = util.compengines[self.feature_config.compression_engine]
513 engine = util.compengines[self.feature_config.compression_engine]
509 return engine.revlogcompressor(
514 return engine.revlogcompressor(
510 self.feature_config.compression_engine_options
515 self.feature_config.compression_engine_options
511 )
516 )
512
517
513 @util.propertycache
518 @util.propertycache
514 def _decompressor(self):
519 def _decompressor(self):
515 """the default decompressor"""
520 """the default decompressor"""
516 if self._default_compression_header is None:
521 if self._default_compression_header is None:
517 return None
522 return None
518 t = self._default_compression_header
523 t = self._default_compression_header
519 c = self._get_decompressor(t)
524 c = self._get_decompressor(t)
520 return c.decompress
525 return c.decompress
521
526
522 def _get_decompressor(self, t):
527 def _get_decompressor(self, t):
523 try:
528 try:
524 compressor = self._decompressors[t]
529 compressor = self._decompressors[t]
525 except KeyError:
530 except KeyError:
526 try:
531 try:
527 engine = util.compengines.forrevlogheader(t)
532 engine = util.compengines.forrevlogheader(t)
528 compressor = engine.revlogcompressor(
533 compressor = engine.revlogcompressor(
529 self.feature_config.compression_engine_options
534 self.feature_config.compression_engine_options
530 )
535 )
531 self._decompressors[t] = compressor
536 self._decompressors[t] = compressor
532 except KeyError:
537 except KeyError:
533 raise error.RevlogError(
538 raise error.RevlogError(
534 _(b'unknown compression type %s') % binascii.hexlify(t)
539 _(b'unknown compression type %s') % binascii.hexlify(t)
535 )
540 )
536 return compressor
541 return compressor
537
542
538 def compress(self, data):
543 def compress(self, data):
539 """Generate a possibly-compressed representation of data."""
544 """Generate a possibly-compressed representation of data."""
540 if not data:
545 if not data:
541 return b'', data
546 return b'', data
542
547
543 compressed = self._compressor.compress(data)
548 compressed = self._compressor.compress(data)
544
549
545 if compressed:
550 if compressed:
546 # The revlog compressor added the header in the returned data.
551 # The revlog compressor added the header in the returned data.
547 return b'', compressed
552 return b'', compressed
548
553
549 if data[0:1] == b'\0':
554 if data[0:1] == b'\0':
550 return b'', data
555 return b'', data
551 return b'u', data
556 return b'u', data
552
557
553 def decompress(self, data):
558 def decompress(self, data):
554 """Decompress a revlog chunk.
559 """Decompress a revlog chunk.
555
560
556 The chunk is expected to begin with a header identifying the
561 The chunk is expected to begin with a header identifying the
557 format type so it can be routed to an appropriate decompressor.
562 format type so it can be routed to an appropriate decompressor.
558 """
563 """
559 if not data:
564 if not data:
560 return data
565 return data
561
566
562 # Revlogs are read much more frequently than they are written and many
567 # Revlogs are read much more frequently than they are written and many
563 # chunks only take microseconds to decompress, so performance is
568 # chunks only take microseconds to decompress, so performance is
564 # important here.
569 # important here.
565 #
570 #
566 # We can make a few assumptions about revlogs:
571 # We can make a few assumptions about revlogs:
567 #
572 #
568 # 1) the majority of chunks will be compressed (as opposed to inline
573 # 1) the majority of chunks will be compressed (as opposed to inline
569 # raw data).
574 # raw data).
570 # 2) decompressing *any* data will likely by at least 10x slower than
575 # 2) decompressing *any* data will likely by at least 10x slower than
571 # returning raw inline data.
576 # returning raw inline data.
572 # 3) we want to prioritize common and officially supported compression
577 # 3) we want to prioritize common and officially supported compression
573 # engines
578 # engines
574 #
579 #
575 # It follows that we want to optimize for "decompress compressed data
580 # It follows that we want to optimize for "decompress compressed data
576 # when encoded with common and officially supported compression engines"
581 # when encoded with common and officially supported compression engines"
577 # case over "raw data" and "data encoded by less common or non-official
582 # case over "raw data" and "data encoded by less common or non-official
578 # compression engines." That is why we have the inline lookup first
583 # compression engines." That is why we have the inline lookup first
579 # followed by the compengines lookup.
584 # followed by the compengines lookup.
580 #
585 #
581 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
586 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
582 # compressed chunks. And this matters for changelog and manifest reads.
587 # compressed chunks. And this matters for changelog and manifest reads.
583 t = data[0:1]
588 t = data[0:1]
584
589
585 if t == b'x':
590 if t == b'x':
586 try:
591 try:
587 return _zlibdecompress(data)
592 return _zlibdecompress(data)
588 except zlib.error as e:
593 except zlib.error as e:
589 raise error.RevlogError(
594 raise error.RevlogError(
590 _(b'revlog decompress error: %s')
595 _(b'revlog decompress error: %s')
591 % stringutil.forcebytestr(e)
596 % stringutil.forcebytestr(e)
592 )
597 )
593 # '\0' is more common than 'u' so it goes first.
598 # '\0' is more common than 'u' so it goes first.
594 elif t == b'\0':
599 elif t == b'\0':
595 return data
600 return data
596 elif t == b'u':
601 elif t == b'u':
597 return util.buffer(data, 1)
602 return util.buffer(data, 1)
598
603
599 compressor = self._get_decompressor(t)
604 compressor = self._get_decompressor(t)
600
605
601 return compressor.decompress(data)
606 return compressor.decompress(data)
602
607
603 @contextlib.contextmanager
608 @contextlib.contextmanager
604 def reading(self):
609 def reading(self):
605 """Context manager that keeps data and sidedata files open for reading"""
610 """Context manager that keeps data and sidedata files open for reading"""
606 if len(self.index) == 0:
611 if len(self.index) == 0:
607 yield # nothing to be read
612 yield # nothing to be read
608 else:
613 else:
609 with self._segmentfile.reading():
614 with self._segmentfile.reading():
610 with self._segmentfile_sidedata.reading():
615 with self._segmentfile_sidedata.reading():
611 yield
616 yield
612
617
613 @property
618 @property
614 def is_writing(self):
619 def is_writing(self):
615 """True is a writing context is open"""
620 """True is a writing context is open"""
616 return self._writinghandles is not None
621 return self._writinghandles is not None
617
622
618 @contextlib.contextmanager
623 @contextlib.contextmanager
619 def writing(self, transaction, data_end=None, sidedata_end=None):
624 def writing(self, transaction, data_end=None, sidedata_end=None):
620 """Open the revlog files for writing
625 """Open the revlog files for writing
621
626
622 Add content to a revlog should be done within such context.
627 Add content to a revlog should be done within such context.
623 """
628 """
624 if self.is_writing:
629 if self.is_writing:
625 yield
630 yield
626 else:
631 else:
627 ifh = dfh = sdfh = None
632 ifh = dfh = sdfh = None
628 try:
633 try:
629 r = len(self.index)
634 r = len(self.index)
630 # opening the data file.
635 # opening the data file.
631 dsize = 0
636 dsize = 0
632 if r:
637 if r:
633 dsize = self.end(r - 1)
638 dsize = self.end(r - 1)
634 dfh = None
639 dfh = None
635 if not self.inline:
640 if not self.inline:
636 try:
641 try:
637 dfh = self.opener(self.data_file, mode=b"r+")
642 dfh = self.opener(self.data_file, mode=b"r+")
638 if data_end is None:
643 if data_end is None:
639 dfh.seek(0, os.SEEK_END)
644 dfh.seek(0, os.SEEK_END)
640 else:
645 else:
641 dfh.seek(data_end, os.SEEK_SET)
646 dfh.seek(data_end, os.SEEK_SET)
642 except FileNotFoundError:
647 except FileNotFoundError:
643 dfh = self.opener(self.data_file, mode=b"w+")
648 dfh = self.opener(self.data_file, mode=b"w+")
644 transaction.add(self.data_file, dsize)
649 transaction.add(self.data_file, dsize)
645 if self.sidedata_file is not None:
650 if self.sidedata_file is not None:
646 assert sidedata_end is not None
651 assert sidedata_end is not None
647 # revlog-v2 does not inline, help Pytype
652 # revlog-v2 does not inline, help Pytype
648 assert dfh is not None
653 assert dfh is not None
649 try:
654 try:
650 sdfh = self.opener(self.sidedata_file, mode=b"r+")
655 sdfh = self.opener(self.sidedata_file, mode=b"r+")
651 dfh.seek(sidedata_end, os.SEEK_SET)
656 dfh.seek(sidedata_end, os.SEEK_SET)
652 except FileNotFoundError:
657 except FileNotFoundError:
653 sdfh = self.opener(self.sidedata_file, mode=b"w+")
658 sdfh = self.opener(self.sidedata_file, mode=b"w+")
654 transaction.add(self.sidedata_file, sidedata_end)
659 transaction.add(self.sidedata_file, sidedata_end)
655
660
656 # opening the index file.
661 # opening the index file.
657 isize = r * self.index.entry_size
662 isize = r * self.index.entry_size
658 ifh = self.__index_write_fp()
663 ifh = self.__index_write_fp()
659 if self.inline:
664 if self.inline:
660 transaction.add(self.index_file, dsize + isize)
665 transaction.add(self.index_file, dsize + isize)
661 else:
666 else:
662 transaction.add(self.index_file, isize)
667 transaction.add(self.index_file, isize)
663 # exposing all file handle for writing.
668 # exposing all file handle for writing.
664 self._writinghandles = (ifh, dfh, sdfh)
669 self._writinghandles = (ifh, dfh, sdfh)
665 self._segmentfile.writing_handle = ifh if self.inline else dfh
670 self._segmentfile.writing_handle = ifh if self.inline else dfh
666 self._segmentfile_sidedata.writing_handle = sdfh
671 self._segmentfile_sidedata.writing_handle = sdfh
667 yield
672 yield
668 finally:
673 finally:
669 self._writinghandles = None
674 self._writinghandles = None
670 self._segmentfile.writing_handle = None
675 self._segmentfile.writing_handle = None
671 self._segmentfile_sidedata.writing_handle = None
676 self._segmentfile_sidedata.writing_handle = None
672 if dfh is not None:
677 if dfh is not None:
673 dfh.close()
678 dfh.close()
674 if sdfh is not None:
679 if sdfh is not None:
675 sdfh.close()
680 sdfh.close()
676 # closing the index file last to avoid exposing referent to
681 # closing the index file last to avoid exposing referent to
677 # potential unflushed data content.
682 # potential unflushed data content.
678 if ifh is not None:
683 if ifh is not None:
679 ifh.close()
684 ifh.close()
680
685
681 def __index_write_fp(self, index_end=None):
686 def __index_write_fp(self, index_end=None):
682 """internal method to open the index file for writing
687 """internal method to open the index file for writing
683
688
684 You should not use this directly and use `_writing` instead
689 You should not use this directly and use `_writing` instead
685 """
690 """
686 try:
691 try:
687 f = self.opener(
692 f = self.opener(
688 self.index_file,
693 self.index_file,
689 mode=b"r+",
694 mode=b"r+",
690 checkambig=self.data_config.check_ambig,
695 checkambig=self.data_config.check_ambig,
691 )
696 )
692 if index_end is None:
697 if index_end is None:
693 f.seek(0, os.SEEK_END)
698 f.seek(0, os.SEEK_END)
694 else:
699 else:
695 f.seek(index_end, os.SEEK_SET)
700 f.seek(index_end, os.SEEK_SET)
696 return f
701 return f
697 except FileNotFoundError:
702 except FileNotFoundError:
698 return self.opener(
703 return self.opener(
699 self.index_file,
704 self.index_file,
700 mode=b"w+",
705 mode=b"w+",
701 checkambig=self.data_config.check_ambig,
706 checkambig=self.data_config.check_ambig,
702 )
707 )
703
708
704 def __index_new_fp(self):
709 def __index_new_fp(self):
705 """internal method to create a new index file for writing
710 """internal method to create a new index file for writing
706
711
707 You should not use this unless you are upgrading from inline revlog
712 You should not use this unless you are upgrading from inline revlog
708 """
713 """
709 return self.opener(
714 return self.opener(
710 self.index_file,
715 self.index_file,
711 mode=b"w",
716 mode=b"w",
712 checkambig=self.data_config.check_ambig,
717 checkambig=self.data_config.check_ambig,
713 atomictemp=True,
718 atomictemp=True,
714 )
719 )
715
720
716 def split_inline(self, tr, header, new_index_file_path=None):
721 def split_inline(self, tr, header, new_index_file_path=None):
717 """split the data of an inline revlog into an index and a data file"""
722 """split the data of an inline revlog into an index and a data file"""
718 existing_handles = False
723 existing_handles = False
719 if self._writinghandles is not None:
724 if self._writinghandles is not None:
720 existing_handles = True
725 existing_handles = True
721 fp = self._writinghandles[0]
726 fp = self._writinghandles[0]
722 fp.flush()
727 fp.flush()
723 fp.close()
728 fp.close()
724 # We can't use the cached file handle after close(). So prevent
729 # We can't use the cached file handle after close(). So prevent
725 # its usage.
730 # its usage.
726 self._writinghandles = None
731 self._writinghandles = None
727 self._segmentfile.writing_handle = None
732 self._segmentfile.writing_handle = None
728 # No need to deal with sidedata writing handle as it is only
733 # No need to deal with sidedata writing handle as it is only
729 # relevant with revlog-v2 which is never inline, not reaching
734 # relevant with revlog-v2 which is never inline, not reaching
730 # this code
735 # this code
731
736
732 new_dfh = self.opener(self.data_file, mode=b"w+")
737 new_dfh = self.opener(self.data_file, mode=b"w+")
733 new_dfh.truncate(0) # drop any potentially existing data
738 new_dfh.truncate(0) # drop any potentially existing data
734 try:
739 try:
735 with self.reading():
740 with self.reading():
736 for r in range(len(self.index)):
741 for r in range(len(self.index)):
737 new_dfh.write(self.get_segment_for_revs(r, r)[1])
742 new_dfh.write(self.get_segment_for_revs(r, r)[1])
738 new_dfh.flush()
743 new_dfh.flush()
739
744
740 if new_index_file_path is not None:
745 if new_index_file_path is not None:
741 self.index_file = new_index_file_path
746 self.index_file = new_index_file_path
742 with self.__index_new_fp() as fp:
747 with self.__index_new_fp() as fp:
743 self.inline = False
748 self.inline = False
744 for i in range(len(self.index)):
749 for i in range(len(self.index)):
745 e = self.index.entry_binary(i)
750 e = self.index.entry_binary(i)
746 if i == 0:
751 if i == 0:
747 packed_header = self.index.pack_header(header)
752 packed_header = self.index.pack_header(header)
748 e = packed_header + e
753 e = packed_header + e
749 fp.write(e)
754 fp.write(e)
750
755
751 # If we don't use side-write, the temp file replace the real
756 # If we don't use side-write, the temp file replace the real
752 # index when we exit the context manager
757 # index when we exit the context manager
753
758
754 self._segmentfile = randomaccessfile.randomaccessfile(
759 self._segmentfile = randomaccessfile.randomaccessfile(
755 self.opener,
760 self.opener,
756 self.data_file,
761 self.data_file,
757 self.data_config.chunk_cache_size,
762 self.data_config.chunk_cache_size,
758 )
763 )
759
764
760 if existing_handles:
765 if existing_handles:
761 # switched from inline to conventional reopen the index
766 # switched from inline to conventional reopen the index
762 ifh = self.__index_write_fp()
767 ifh = self.__index_write_fp()
763 self._writinghandles = (ifh, new_dfh, None)
768 self._writinghandles = (ifh, new_dfh, None)
764 self._segmentfile.writing_handle = new_dfh
769 self._segmentfile.writing_handle = new_dfh
765 new_dfh = None
770 new_dfh = None
766 # No need to deal with sidedata writing handle as it is only
771 # No need to deal with sidedata writing handle as it is only
767 # relevant with revlog-v2 which is never inline, not reaching
772 # relevant with revlog-v2 which is never inline, not reaching
768 # this code
773 # this code
769 finally:
774 finally:
770 if new_dfh is not None:
775 if new_dfh is not None:
771 new_dfh.close()
776 new_dfh.close()
772 return self.index_file
777 return self.index_file
773
778
774 def get_segment_for_revs(self, startrev, endrev):
779 def get_segment_for_revs(self, startrev, endrev):
775 """Obtain a segment of raw data corresponding to a range of revisions.
780 """Obtain a segment of raw data corresponding to a range of revisions.
776
781
777 Accepts the start and end revisions and an optional already-open
782 Accepts the start and end revisions and an optional already-open
778 file handle to be used for reading. If the file handle is read, its
783 file handle to be used for reading. If the file handle is read, its
779 seek position will not be preserved.
784 seek position will not be preserved.
780
785
781 Requests for data may be satisfied by a cache.
786 Requests for data may be satisfied by a cache.
782
787
783 Returns a 2-tuple of (offset, data) for the requested range of
788 Returns a 2-tuple of (offset, data) for the requested range of
784 revisions. Offset is the integer offset from the beginning of the
789 revisions. Offset is the integer offset from the beginning of the
785 revlog and data is a str or buffer of the raw byte data.
790 revlog and data is a str or buffer of the raw byte data.
786
791
787 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
792 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
788 to determine where each revision's data begins and ends.
793 to determine where each revision's data begins and ends.
789
794
790 API: we should consider making this a private part of the InnerRevlog
795 API: we should consider making this a private part of the InnerRevlog
791 at some point.
796 at some point.
792 """
797 """
793 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
798 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
794 # (functions are expensive).
799 # (functions are expensive).
795 index = self.index
800 index = self.index
796 istart = index[startrev]
801 istart = index[startrev]
797 start = int(istart[0] >> 16)
802 start = int(istart[0] >> 16)
798 if startrev == endrev:
803 if startrev == endrev:
799 end = start + istart[1]
804 end = start + istart[1]
800 else:
805 else:
801 iend = index[endrev]
806 iend = index[endrev]
802 end = int(iend[0] >> 16) + iend[1]
807 end = int(iend[0] >> 16) + iend[1]
803
808
804 if self.inline:
809 if self.inline:
805 start += (startrev + 1) * self.index.entry_size
810 start += (startrev + 1) * self.index.entry_size
806 end += (endrev + 1) * self.index.entry_size
811 end += (endrev + 1) * self.index.entry_size
807 length = end - start
812 length = end - start
808
813
809 return start, self._segmentfile.read_chunk(start, length)
814 return start, self._segmentfile.read_chunk(start, length)
810
815
811 def _chunk(self, rev):
816 def _chunk(self, rev):
812 """Obtain a single decompressed chunk for a revision.
817 """Obtain a single decompressed chunk for a revision.
813
818
814 Accepts an integer revision and an optional already-open file handle
819 Accepts an integer revision and an optional already-open file handle
815 to be used for reading. If used, the seek position of the file will not
820 to be used for reading. If used, the seek position of the file will not
816 be preserved.
821 be preserved.
817
822
818 Returns a str holding uncompressed data for the requested revision.
823 Returns a str holding uncompressed data for the requested revision.
819 """
824 """
820 compression_mode = self.index[rev][10]
825 compression_mode = self.index[rev][10]
821 data = self.get_segment_for_revs(rev, rev)[1]
826 data = self.get_segment_for_revs(rev, rev)[1]
822 if compression_mode == COMP_MODE_PLAIN:
827 if compression_mode == COMP_MODE_PLAIN:
823 return data
828 return data
824 elif compression_mode == COMP_MODE_DEFAULT:
829 elif compression_mode == COMP_MODE_DEFAULT:
825 return self._decompressor(data)
830 return self._decompressor(data)
826 elif compression_mode == COMP_MODE_INLINE:
831 elif compression_mode == COMP_MODE_INLINE:
827 return self.decompress(data)
832 return self.decompress(data)
828 else:
833 else:
829 msg = b'unknown compression mode %d'
834 msg = b'unknown compression mode %d'
830 msg %= compression_mode
835 msg %= compression_mode
831 raise error.RevlogError(msg)
836 raise error.RevlogError(msg)
832
837
833 def _chunks(self, revs, targetsize=None):
838 def _chunks(self, revs, targetsize=None):
834 """Obtain decompressed chunks for the specified revisions.
839 """Obtain decompressed chunks for the specified revisions.
835
840
836 Accepts an iterable of numeric revisions that are assumed to be in
841 Accepts an iterable of numeric revisions that are assumed to be in
837 ascending order. Also accepts an optional already-open file handle
842 ascending order. Also accepts an optional already-open file handle
838 to be used for reading. If used, the seek position of the file will
843 to be used for reading. If used, the seek position of the file will
839 not be preserved.
844 not be preserved.
840
845
841 This function is similar to calling ``self._chunk()`` multiple times,
846 This function is similar to calling ``self._chunk()`` multiple times,
842 but is faster.
847 but is faster.
843
848
844 Returns a list with decompressed data for each requested revision.
849 Returns a list with decompressed data for each requested revision.
845 """
850 """
846 if not revs:
851 if not revs:
847 return []
852 return []
848 start = self.start
853 start = self.start
849 length = self.length
854 length = self.length
850 inline = self.inline
855 inline = self.inline
851 iosize = self.index.entry_size
856 iosize = self.index.entry_size
852 buffer = util.buffer
857 buffer = util.buffer
853
858
854 l = []
859 l = []
855 ladd = l.append
860 ladd = l.append
856
861
857 if not self.data_config.with_sparse_read:
862 if not self.data_config.with_sparse_read:
858 slicedchunks = (revs,)
863 slicedchunks = (revs,)
859 else:
864 else:
860 slicedchunks = deltautil.slicechunk(
865 slicedchunks = deltautil.slicechunk(
861 self,
866 self,
862 revs,
867 revs,
863 targetsize=targetsize,
868 targetsize=targetsize,
864 )
869 )
865
870
866 for revschunk in slicedchunks:
871 for revschunk in slicedchunks:
867 firstrev = revschunk[0]
872 firstrev = revschunk[0]
868 # Skip trailing revisions with empty diff
873 # Skip trailing revisions with empty diff
869 for lastrev in revschunk[::-1]:
874 for lastrev in revschunk[::-1]:
870 if length(lastrev) != 0:
875 if length(lastrev) != 0:
871 break
876 break
872
877
873 try:
878 try:
874 offset, data = self.get_segment_for_revs(firstrev, lastrev)
879 offset, data = self.get_segment_for_revs(firstrev, lastrev)
875 except OverflowError:
880 except OverflowError:
876 # issue4215 - we can't cache a run of chunks greater than
881 # issue4215 - we can't cache a run of chunks greater than
877 # 2G on Windows
882 # 2G on Windows
878 return [self._chunk(rev) for rev in revschunk]
883 return [self._chunk(rev) for rev in revschunk]
879
884
880 decomp = self.decompress
885 decomp = self.decompress
881 # self._decompressor might be None, but will not be used in that case
886 # self._decompressor might be None, but will not be used in that case
882 def_decomp = self._decompressor
887 def_decomp = self._decompressor
883 for rev in revschunk:
888 for rev in revschunk:
884 chunkstart = start(rev)
889 chunkstart = start(rev)
885 if inline:
890 if inline:
886 chunkstart += (rev + 1) * iosize
891 chunkstart += (rev + 1) * iosize
887 chunklength = length(rev)
892 chunklength = length(rev)
888 comp_mode = self.index[rev][10]
893 comp_mode = self.index[rev][10]
889 c = buffer(data, chunkstart - offset, chunklength)
894 c = buffer(data, chunkstart - offset, chunklength)
890 if comp_mode == COMP_MODE_PLAIN:
895 if comp_mode == COMP_MODE_PLAIN:
891 ladd(c)
896 ladd(c)
892 elif comp_mode == COMP_MODE_INLINE:
897 elif comp_mode == COMP_MODE_INLINE:
893 ladd(decomp(c))
898 ladd(decomp(c))
894 elif comp_mode == COMP_MODE_DEFAULT:
899 elif comp_mode == COMP_MODE_DEFAULT:
895 ladd(def_decomp(c))
900 ladd(def_decomp(c))
896 else:
901 else:
897 msg = b'unknown compression mode %d'
902 msg = b'unknown compression mode %d'
898 msg %= comp_mode
903 msg %= comp_mode
899 raise error.RevlogError(msg)
904 raise error.RevlogError(msg)
900
905
901 return l
906 return l
902
907
903 def raw_text(self, node, rev):
908 def raw_text(self, node, rev):
904 """return the possibly unvalidated rawtext for a revision
909 """return the possibly unvalidated rawtext for a revision
905
910
906 returns (rev, rawtext, validated)
911 returns (rev, rawtext, validated)
907 """
912 """
908
913
909 # revision in the cache (could be useful to apply delta)
914 # revision in the cache (could be useful to apply delta)
910 cachedrev = None
915 cachedrev = None
911 # An intermediate text to apply deltas to
916 # An intermediate text to apply deltas to
912 basetext = None
917 basetext = None
913
918
914 # Check if we have the entry in cache
919 # Check if we have the entry in cache
915 # The cache entry looks like (node, rev, rawtext)
920 # The cache entry looks like (node, rev, rawtext)
916 if self._revisioncache:
921 if self._revisioncache:
917 cachedrev = self._revisioncache[1]
922 cachedrev = self._revisioncache[1]
918
923
919 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
924 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
920 if stopped:
925 if stopped:
921 basetext = self._revisioncache[2]
926 basetext = self._revisioncache[2]
922
927
923 # drop cache to save memory, the caller is expected to
928 # drop cache to save memory, the caller is expected to
924 # update self._inner._revisioncache after validating the text
929 # update self._inner._revisioncache after validating the text
925 self._revisioncache = None
930 self._revisioncache = None
926
931
927 targetsize = None
932 targetsize = None
928 rawsize = self.index[rev][2]
933 rawsize = self.index[rev][2]
929 if 0 <= rawsize:
934 if 0 <= rawsize:
930 targetsize = 4 * rawsize
935 targetsize = 4 * rawsize
931
936
932 bins = self._chunks(chain, targetsize=targetsize)
937 bins = self._chunks(chain, targetsize=targetsize)
933 if basetext is None:
938 if basetext is None:
934 basetext = bytes(bins[0])
939 basetext = bytes(bins[0])
935 bins = bins[1:]
940 bins = bins[1:]
936
941
937 rawtext = mdiff.patches(basetext, bins)
942 rawtext = mdiff.patches(basetext, bins)
938 del basetext # let us have a chance to free memory early
943 del basetext # let us have a chance to free memory early
939 return (rev, rawtext, False)
944 return (rev, rawtext, False)
940
945
941 def sidedata(self, rev, sidedata_end):
946 def sidedata(self, rev, sidedata_end):
942 """Return the sidedata for a given revision number."""
947 """Return the sidedata for a given revision number."""
943 index_entry = self.index[rev]
948 index_entry = self.index[rev]
944 sidedata_offset = index_entry[8]
949 sidedata_offset = index_entry[8]
945 sidedata_size = index_entry[9]
950 sidedata_size = index_entry[9]
946
951
947 if self.inline:
952 if self.inline:
948 sidedata_offset += self.index.entry_size * (1 + rev)
953 sidedata_offset += self.index.entry_size * (1 + rev)
949 if sidedata_size == 0:
954 if sidedata_size == 0:
950 return {}
955 return {}
951
956
952 if sidedata_end < sidedata_offset + sidedata_size:
957 if sidedata_end < sidedata_offset + sidedata_size:
953 filename = self.sidedata_file
958 filename = self.sidedata_file
954 end = sidedata_end
959 end = sidedata_end
955 offset = sidedata_offset
960 offset = sidedata_offset
956 length = sidedata_size
961 length = sidedata_size
957 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
962 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
958 raise error.RevlogError(m)
963 raise error.RevlogError(m)
959
964
960 comp_segment = self._segmentfile_sidedata.read_chunk(
965 comp_segment = self._segmentfile_sidedata.read_chunk(
961 sidedata_offset, sidedata_size
966 sidedata_offset, sidedata_size
962 )
967 )
963
968
964 comp = self.index[rev][11]
969 comp = self.index[rev][11]
965 if comp == COMP_MODE_PLAIN:
970 if comp == COMP_MODE_PLAIN:
966 segment = comp_segment
971 segment = comp_segment
967 elif comp == COMP_MODE_DEFAULT:
972 elif comp == COMP_MODE_DEFAULT:
968 segment = self._decompressor(comp_segment)
973 segment = self._decompressor(comp_segment)
969 elif comp == COMP_MODE_INLINE:
974 elif comp == COMP_MODE_INLINE:
970 segment = self.decompress(comp_segment)
975 segment = self.decompress(comp_segment)
971 else:
976 else:
972 msg = b'unknown compression mode %d'
977 msg = b'unknown compression mode %d'
973 msg %= comp
978 msg %= comp
974 raise error.RevlogError(msg)
979 raise error.RevlogError(msg)
975
980
976 sidedata = sidedatautil.deserialize_sidedata(segment)
981 sidedata = sidedatautil.deserialize_sidedata(segment)
977 return sidedata
982 return sidedata
978
983
979 def write_entry(
984 def write_entry(
980 self,
985 self,
981 transaction,
986 transaction,
982 entry,
987 entry,
983 data,
988 data,
984 link,
989 link,
985 offset,
990 offset,
986 sidedata,
991 sidedata,
987 sidedata_offset,
992 sidedata_offset,
988 index_end,
993 index_end,
989 data_end,
994 data_end,
990 sidedata_end,
995 sidedata_end,
991 ):
996 ):
992 # Files opened in a+ mode have inconsistent behavior on various
997 # Files opened in a+ mode have inconsistent behavior on various
993 # platforms. Windows requires that a file positioning call be made
998 # platforms. Windows requires that a file positioning call be made
994 # when the file handle transitions between reads and writes. See
999 # when the file handle transitions between reads and writes. See
995 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1000 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
996 # platforms, Python or the platform itself can be buggy. Some versions
1001 # platforms, Python or the platform itself can be buggy. Some versions
997 # of Solaris have been observed to not append at the end of the file
1002 # of Solaris have been observed to not append at the end of the file
998 # if the file was seeked to before the end. See issue4943 for more.
1003 # if the file was seeked to before the end. See issue4943 for more.
999 #
1004 #
1000 # We work around this issue by inserting a seek() before writing.
1005 # We work around this issue by inserting a seek() before writing.
1001 # Note: This is likely not necessary on Python 3. However, because
1006 # Note: This is likely not necessary on Python 3. However, because
1002 # the file handle is reused for reads and may be seeked there, we need
1007 # the file handle is reused for reads and may be seeked there, we need
1003 # to be careful before changing this.
1008 # to be careful before changing this.
1004 if self._writinghandles is None:
1009 if self._writinghandles is None:
1005 msg = b'adding revision outside `revlog._writing` context'
1010 msg = b'adding revision outside `revlog._writing` context'
1006 raise error.ProgrammingError(msg)
1011 raise error.ProgrammingError(msg)
1007 ifh, dfh, sdfh = self._writinghandles
1012 ifh, dfh, sdfh = self._writinghandles
1008 if index_end is None:
1013 if index_end is None:
1009 ifh.seek(0, os.SEEK_END)
1014 ifh.seek(0, os.SEEK_END)
1010 else:
1015 else:
1011 ifh.seek(index_end, os.SEEK_SET)
1016 ifh.seek(index_end, os.SEEK_SET)
1012 if dfh:
1017 if dfh:
1013 if data_end is None:
1018 if data_end is None:
1014 dfh.seek(0, os.SEEK_END)
1019 dfh.seek(0, os.SEEK_END)
1015 else:
1020 else:
1016 dfh.seek(data_end, os.SEEK_SET)
1021 dfh.seek(data_end, os.SEEK_SET)
1017 if sdfh:
1022 if sdfh:
1018 sdfh.seek(sidedata_end, os.SEEK_SET)
1023 sdfh.seek(sidedata_end, os.SEEK_SET)
1019
1024
1020 curr = len(self.index) - 1
1025 curr = len(self.index) - 1
1021 if not self.inline:
1026 if not self.inline:
1022 transaction.add(self.data_file, offset)
1027 transaction.add(self.data_file, offset)
1023 if self.sidedata_file:
1028 if self.sidedata_file:
1024 transaction.add(self.sidedata_file, sidedata_offset)
1029 transaction.add(self.sidedata_file, sidedata_offset)
1025 transaction.add(self.index_file, curr * len(entry))
1030 transaction.add(self.index_file, curr * len(entry))
1026 if data[0]:
1031 if data[0]:
1027 dfh.write(data[0])
1032 dfh.write(data[0])
1028 dfh.write(data[1])
1033 dfh.write(data[1])
1029 if sidedata:
1034 if sidedata:
1030 sdfh.write(sidedata)
1035 sdfh.write(sidedata)
1031 ifh.write(entry)
1036 ifh.write(entry)
1032 else:
1037 else:
1033 offset += curr * self.index.entry_size
1038 offset += curr * self.index.entry_size
1034 transaction.add(self.index_file, offset)
1039 transaction.add(self.index_file, offset)
1035 ifh.write(entry)
1040 ifh.write(entry)
1036 ifh.write(data[0])
1041 ifh.write(data[0])
1037 ifh.write(data[1])
1042 ifh.write(data[1])
1038 assert not sidedata
1043 assert not sidedata
1039 return (
1044 return (
1040 ifh.tell(),
1045 ifh.tell(),
1041 dfh.tell() if dfh else None,
1046 dfh.tell() if dfh else None,
1042 sdfh.tell() if sdfh else None,
1047 sdfh.tell() if sdfh else None,
1043 )
1048 )
1044
1049
1045
1050
1046 class revlog:
1051 class revlog:
1047 """
1052 """
1048 the underlying revision storage object
1053 the underlying revision storage object
1049
1054
1050 A revlog consists of two parts, an index and the revision data.
1055 A revlog consists of two parts, an index and the revision data.
1051
1056
1052 The index is a file with a fixed record size containing
1057 The index is a file with a fixed record size containing
1053 information on each revision, including its nodeid (hash), the
1058 information on each revision, including its nodeid (hash), the
1054 nodeids of its parents, the position and offset of its data within
1059 nodeids of its parents, the position and offset of its data within
1055 the data file, and the revision it's based on. Finally, each entry
1060 the data file, and the revision it's based on. Finally, each entry
1056 contains a linkrev entry that can serve as a pointer to external
1061 contains a linkrev entry that can serve as a pointer to external
1057 data.
1062 data.
1058
1063
1059 The revision data itself is a linear collection of data chunks.
1064 The revision data itself is a linear collection of data chunks.
1060 Each chunk represents a revision and is usually represented as a
1065 Each chunk represents a revision and is usually represented as a
1061 delta against the previous chunk. To bound lookup time, runs of
1066 delta against the previous chunk. To bound lookup time, runs of
1062 deltas are limited to about 2 times the length of the original
1067 deltas are limited to about 2 times the length of the original
1063 version data. This makes retrieval of a version proportional to
1068 version data. This makes retrieval of a version proportional to
1064 its size, or O(1) relative to the number of revisions.
1069 its size, or O(1) relative to the number of revisions.
1065
1070
1066 Both pieces of the revlog are written to in an append-only
1071 Both pieces of the revlog are written to in an append-only
1067 fashion, which means we never need to rewrite a file to insert or
1072 fashion, which means we never need to rewrite a file to insert or
1068 remove data, and can use some simple techniques to avoid the need
1073 remove data, and can use some simple techniques to avoid the need
1069 for locking while reading.
1074 for locking while reading.
1070
1075
1071 If checkambig, indexfile is opened with checkambig=True at
1076 If checkambig, indexfile is opened with checkambig=True at
1072 writing, to avoid file stat ambiguity.
1077 writing, to avoid file stat ambiguity.
1073
1078
1074 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1079 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1075 index will be mmapped rather than read if it is larger than the
1080 index will be mmapped rather than read if it is larger than the
1076 configured threshold.
1081 configured threshold.
1077
1082
1078 If censorable is True, the revlog can have censored revisions.
1083 If censorable is True, the revlog can have censored revisions.
1079
1084
1080 If `upperboundcomp` is not None, this is the expected maximal gain from
1085 If `upperboundcomp` is not None, this is the expected maximal gain from
1081 compression for the data content.
1086 compression for the data content.
1082
1087
1083 `concurrencychecker` is an optional function that receives 3 arguments: a
1088 `concurrencychecker` is an optional function that receives 3 arguments: a
1084 file handle, a filename, and an expected position. It should check whether
1089 file handle, a filename, and an expected position. It should check whether
1085 the current position in the file handle is valid, and log/warn/fail (by
1090 the current position in the file handle is valid, and log/warn/fail (by
1086 raising).
1091 raising).
1087
1092
1088 See mercurial/revlogutils/contants.py for details about the content of an
1093 See mercurial/revlogutils/contants.py for details about the content of an
1089 index entry.
1094 index entry.
1090 """
1095 """
1091
1096
1092 _flagserrorclass = error.RevlogError
1097 _flagserrorclass = error.RevlogError
1093
1098
1094 @staticmethod
1099 @staticmethod
1095 def is_inline_index(header_bytes):
1100 def is_inline_index(header_bytes):
1096 """Determine if a revlog is inline from the initial bytes of the index"""
1101 """Determine if a revlog is inline from the initial bytes of the index"""
1097 header = INDEX_HEADER.unpack(header_bytes)[0]
1102 header = INDEX_HEADER.unpack(header_bytes)[0]
1098
1103
1099 _format_flags = header & ~0xFFFF
1104 _format_flags = header & ~0xFFFF
1100 _format_version = header & 0xFFFF
1105 _format_version = header & 0xFFFF
1101
1106
1102 features = FEATURES_BY_VERSION[_format_version]
1107 features = FEATURES_BY_VERSION[_format_version]
1103 return features[b'inline'](_format_flags)
1108 return features[b'inline'](_format_flags)
1104
1109
1105 def __init__(
1110 def __init__(
1106 self,
1111 self,
1107 opener,
1112 opener,
1108 target,
1113 target,
1109 radix,
1114 radix,
1110 postfix=None, # only exist for `tmpcensored` now
1115 postfix=None, # only exist for `tmpcensored` now
1111 checkambig=False,
1116 checkambig=False,
1112 mmaplargeindex=False,
1117 mmaplargeindex=False,
1113 censorable=False,
1118 censorable=False,
1114 upperboundcomp=None,
1119 upperboundcomp=None,
1115 persistentnodemap=False,
1120 persistentnodemap=False,
1116 concurrencychecker=None,
1121 concurrencychecker=None,
1117 trypending=False,
1122 trypending=False,
1118 try_split=False,
1123 try_split=False,
1119 canonical_parent_order=True,
1124 canonical_parent_order=True,
1120 ):
1125 ):
1121 """
1126 """
1122 create a revlog object
1127 create a revlog object
1123
1128
1124 opener is a function that abstracts the file opening operation
1129 opener is a function that abstracts the file opening operation
1125 and can be used to implement COW semantics or the like.
1130 and can be used to implement COW semantics or the like.
1126
1131
1127 `target`: a (KIND, ID) tuple that identify the content stored in
1132 `target`: a (KIND, ID) tuple that identify the content stored in
1128 this revlog. It help the rest of the code to understand what the revlog
1133 this revlog. It help the rest of the code to understand what the revlog
1129 is about without having to resort to heuristic and index filename
1134 is about without having to resort to heuristic and index filename
1130 analysis. Note: that this must be reliably be set by normal code, but
1135 analysis. Note: that this must be reliably be set by normal code, but
1131 that test, debug, or performance measurement code might not set this to
1136 that test, debug, or performance measurement code might not set this to
1132 accurate value.
1137 accurate value.
1133 """
1138 """
1134
1139
1135 self.radix = radix
1140 self.radix = radix
1136
1141
1137 self._docket_file = None
1142 self._docket_file = None
1138 self._indexfile = None
1143 self._indexfile = None
1139 self._datafile = None
1144 self._datafile = None
1140 self._sidedatafile = None
1145 self._sidedatafile = None
1141 self._nodemap_file = None
1146 self._nodemap_file = None
1142 self.postfix = postfix
1147 self.postfix = postfix
1143 self._trypending = trypending
1148 self._trypending = trypending
1144 self._try_split = try_split
1149 self._try_split = try_split
1145 self.opener = opener
1150 self.opener = opener
1146 if persistentnodemap:
1151 if persistentnodemap:
1147 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1152 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1148
1153
1149 assert target[0] in ALL_KINDS
1154 assert target[0] in ALL_KINDS
1150 assert len(target) == 2
1155 assert len(target) == 2
1151 self.target = target
1156 self.target = target
1152 if b'feature-config' in self.opener.options:
1157 if b'feature-config' in self.opener.options:
1153 self.feature_config = self.opener.options[b'feature-config'].copy()
1158 self.feature_config = self.opener.options[b'feature-config'].copy()
1154 else:
1159 else:
1155 self.feature_config = FeatureConfig()
1160 self.feature_config = FeatureConfig()
1156 self.feature_config.censorable = censorable
1161 self.feature_config.censorable = censorable
1157 self.feature_config.canonical_parent_order = canonical_parent_order
1162 self.feature_config.canonical_parent_order = canonical_parent_order
1158 if b'data-config' in self.opener.options:
1163 if b'data-config' in self.opener.options:
1159 self.data_config = self.opener.options[b'data-config'].copy()
1164 self.data_config = self.opener.options[b'data-config'].copy()
1160 else:
1165 else:
1161 self.data_config = DataConfig()
1166 self.data_config = DataConfig()
1162 self.data_config.check_ambig = checkambig
1167 self.data_config.check_ambig = checkambig
1163 self.data_config.mmap_large_index = mmaplargeindex
1168 self.data_config.mmap_large_index = mmaplargeindex
1164 if b'delta-config' in self.opener.options:
1169 if b'delta-config' in self.opener.options:
1165 self.delta_config = self.opener.options[b'delta-config'].copy()
1170 self.delta_config = self.opener.options[b'delta-config'].copy()
1166 else:
1171 else:
1167 self.delta_config = DeltaConfig()
1172 self.delta_config = DeltaConfig()
1168 self.delta_config.upper_bound_comp = upperboundcomp
1173 self.delta_config.upper_bound_comp = upperboundcomp
1169
1174
1170 # Maps rev to chain base rev.
1175 # Maps rev to chain base rev.
1171 self._chainbasecache = util.lrucachedict(100)
1176 self._chainbasecache = util.lrucachedict(100)
1172
1177
1173 self.index = None
1178 self.index = None
1174 self._docket = None
1179 self._docket = None
1175 self._nodemap_docket = None
1180 self._nodemap_docket = None
1176 # Mapping of partial identifiers to full nodes.
1181 # Mapping of partial identifiers to full nodes.
1177 self._pcache = {}
1182 self._pcache = {}
1178
1183
1179 # other optionnals features
1184 # other optionnals features
1180
1185
1181 # Make copy of flag processors so each revlog instance can support
1186 # Make copy of flag processors so each revlog instance can support
1182 # custom flags.
1187 # custom flags.
1183 self._flagprocessors = dict(flagutil.flagprocessors)
1188 self._flagprocessors = dict(flagutil.flagprocessors)
1184 # prevent nesting of addgroup
1189 # prevent nesting of addgroup
1185 self._adding_group = None
1190 self._adding_group = None
1186
1191
1187 chunk_cache = self._loadindex()
1192 chunk_cache = self._loadindex()
1188 self._load_inner(chunk_cache)
1193 self._load_inner(chunk_cache)
1189 self._concurrencychecker = concurrencychecker
1194 self._concurrencychecker = concurrencychecker
1190
1195
1191 @property
1196 @property
1192 def _generaldelta(self):
1197 def _generaldelta(self):
1193 """temporary compatibility proxy"""
1198 """temporary compatibility proxy"""
1194 util.nouideprecwarn(
1199 util.nouideprecwarn(
1195 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1200 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1196 )
1201 )
1197 return self.delta_config.general_delta
1202 return self.delta_config.general_delta
1198
1203
1199 @property
1204 @property
1200 def _checkambig(self):
1205 def _checkambig(self):
1201 """temporary compatibility proxy"""
1206 """temporary compatibility proxy"""
1202 util.nouideprecwarn(
1207 util.nouideprecwarn(
1203 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1208 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1204 )
1209 )
1205 return self.data_config.check_ambig
1210 return self.data_config.check_ambig
1206
1211
1207 @property
1212 @property
1208 def _mmaplargeindex(self):
1213 def _mmaplargeindex(self):
1209 """temporary compatibility proxy"""
1214 """temporary compatibility proxy"""
1210 util.nouideprecwarn(
1215 util.nouideprecwarn(
1211 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1216 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1212 )
1217 )
1213 return self.data_config.mmap_large_index
1218 return self.data_config.mmap_large_index
1214
1219
1215 @property
1220 @property
1216 def _censorable(self):
1221 def _censorable(self):
1217 """temporary compatibility proxy"""
1222 """temporary compatibility proxy"""
1218 util.nouideprecwarn(
1223 util.nouideprecwarn(
1219 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1224 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1220 )
1225 )
1221 return self.feature_config.censorable
1226 return self.feature_config.censorable
1222
1227
1223 @property
1228 @property
1224 def _chunkcachesize(self):
1229 def _chunkcachesize(self):
1225 """temporary compatibility proxy"""
1230 """temporary compatibility proxy"""
1226 util.nouideprecwarn(
1231 util.nouideprecwarn(
1227 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1232 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1228 )
1233 )
1229 return self.data_config.chunk_cache_size
1234 return self.data_config.chunk_cache_size
1230
1235
1231 @property
1236 @property
1232 def _maxchainlen(self):
1237 def _maxchainlen(self):
1233 """temporary compatibility proxy"""
1238 """temporary compatibility proxy"""
1234 util.nouideprecwarn(
1239 util.nouideprecwarn(
1235 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1240 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1236 )
1241 )
1237 return self.delta_config.max_chain_len
1242 return self.delta_config.max_chain_len
1238
1243
1239 @property
1244 @property
1240 def _deltabothparents(self):
1245 def _deltabothparents(self):
1241 """temporary compatibility proxy"""
1246 """temporary compatibility proxy"""
1242 util.nouideprecwarn(
1247 util.nouideprecwarn(
1243 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1248 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1244 )
1249 )
1245 return self.delta_config.delta_both_parents
1250 return self.delta_config.delta_both_parents
1246
1251
1247 @property
1252 @property
1248 def _candidate_group_chunk_size(self):
1253 def _candidate_group_chunk_size(self):
1249 """temporary compatibility proxy"""
1254 """temporary compatibility proxy"""
1250 util.nouideprecwarn(
1255 util.nouideprecwarn(
1251 b"use revlog.delta_config.candidate_group_chunk_size",
1256 b"use revlog.delta_config.candidate_group_chunk_size",
1252 b"6.6",
1257 b"6.6",
1253 stacklevel=2,
1258 stacklevel=2,
1254 )
1259 )
1255 return self.delta_config.candidate_group_chunk_size
1260 return self.delta_config.candidate_group_chunk_size
1256
1261
1257 @property
1262 @property
1258 def _debug_delta(self):
1263 def _debug_delta(self):
1259 """temporary compatibility proxy"""
1264 """temporary compatibility proxy"""
1260 util.nouideprecwarn(
1265 util.nouideprecwarn(
1261 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1266 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1262 )
1267 )
1263 return self.delta_config.debug_delta
1268 return self.delta_config.debug_delta
1264
1269
1265 @property
1270 @property
1266 def _compengine(self):
1271 def _compengine(self):
1267 """temporary compatibility proxy"""
1272 """temporary compatibility proxy"""
1268 util.nouideprecwarn(
1273 util.nouideprecwarn(
1269 b"use revlog.feature_config.compression_engine",
1274 b"use revlog.feature_config.compression_engine",
1270 b"6.6",
1275 b"6.6",
1271 stacklevel=2,
1276 stacklevel=2,
1272 )
1277 )
1273 return self.feature_config.compression_engine
1278 return self.feature_config.compression_engine
1274
1279
1275 @property
1280 @property
1276 def upperboundcomp(self):
1281 def upperboundcomp(self):
1277 """temporary compatibility proxy"""
1282 """temporary compatibility proxy"""
1278 util.nouideprecwarn(
1283 util.nouideprecwarn(
1279 b"use revlog.delta_config.upper_bound_comp",
1284 b"use revlog.delta_config.upper_bound_comp",
1280 b"6.6",
1285 b"6.6",
1281 stacklevel=2,
1286 stacklevel=2,
1282 )
1287 )
1283 return self.delta_config.upper_bound_comp
1288 return self.delta_config.upper_bound_comp
1284
1289
1285 @property
1290 @property
1286 def _compengineopts(self):
1291 def _compengineopts(self):
1287 """temporary compatibility proxy"""
1292 """temporary compatibility proxy"""
1288 util.nouideprecwarn(
1293 util.nouideprecwarn(
1289 b"use revlog.feature_config.compression_engine_options",
1294 b"use revlog.feature_config.compression_engine_options",
1290 b"6.6",
1295 b"6.6",
1291 stacklevel=2,
1296 stacklevel=2,
1292 )
1297 )
1293 return self.feature_config.compression_engine_options
1298 return self.feature_config.compression_engine_options
1294
1299
1295 @property
1300 @property
1296 def _maxdeltachainspan(self):
1301 def _maxdeltachainspan(self):
1297 """temporary compatibility proxy"""
1302 """temporary compatibility proxy"""
1298 util.nouideprecwarn(
1303 util.nouideprecwarn(
1299 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1304 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1300 )
1305 )
1301 return self.delta_config.max_deltachain_span
1306 return self.delta_config.max_deltachain_span
1302
1307
1303 @property
1308 @property
1304 def _withsparseread(self):
1309 def _withsparseread(self):
1305 """temporary compatibility proxy"""
1310 """temporary compatibility proxy"""
1306 util.nouideprecwarn(
1311 util.nouideprecwarn(
1307 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1312 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1308 )
1313 )
1309 return self.data_config.with_sparse_read
1314 return self.data_config.with_sparse_read
1310
1315
1311 @property
1316 @property
1312 def _sparserevlog(self):
1317 def _sparserevlog(self):
1313 """temporary compatibility proxy"""
1318 """temporary compatibility proxy"""
1314 util.nouideprecwarn(
1319 util.nouideprecwarn(
1315 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1320 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1316 )
1321 )
1317 return self.delta_config.sparse_revlog
1322 return self.delta_config.sparse_revlog
1318
1323
1319 @property
1324 @property
1320 def hassidedata(self):
1325 def hassidedata(self):
1321 """temporary compatibility proxy"""
1326 """temporary compatibility proxy"""
1322 util.nouideprecwarn(
1327 util.nouideprecwarn(
1323 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1328 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1324 )
1329 )
1325 return self.feature_config.has_side_data
1330 return self.feature_config.has_side_data
1326
1331
1327 @property
1332 @property
1328 def _srdensitythreshold(self):
1333 def _srdensitythreshold(self):
1329 """temporary compatibility proxy"""
1334 """temporary compatibility proxy"""
1330 util.nouideprecwarn(
1335 util.nouideprecwarn(
1331 b"use revlog.data_config.sr_density_threshold",
1336 b"use revlog.data_config.sr_density_threshold",
1332 b"6.6",
1337 b"6.6",
1333 stacklevel=2,
1338 stacklevel=2,
1334 )
1339 )
1335 return self.data_config.sr_density_threshold
1340 return self.data_config.sr_density_threshold
1336
1341
1337 @property
1342 @property
1338 def _srmingapsize(self):
1343 def _srmingapsize(self):
1339 """temporary compatibility proxy"""
1344 """temporary compatibility proxy"""
1340 util.nouideprecwarn(
1345 util.nouideprecwarn(
1341 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1346 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1342 )
1347 )
1343 return self.data_config.sr_min_gap_size
1348 return self.data_config.sr_min_gap_size
1344
1349
1345 @property
1350 @property
1346 def _compute_rank(self):
1351 def _compute_rank(self):
1347 """temporary compatibility proxy"""
1352 """temporary compatibility proxy"""
1348 util.nouideprecwarn(
1353 util.nouideprecwarn(
1349 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1354 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1350 )
1355 )
1351 return self.feature_config.compute_rank
1356 return self.feature_config.compute_rank
1352
1357
1353 @property
1358 @property
1354 def canonical_parent_order(self):
1359 def canonical_parent_order(self):
1355 """temporary compatibility proxy"""
1360 """temporary compatibility proxy"""
1356 util.nouideprecwarn(
1361 util.nouideprecwarn(
1357 b"use revlog.feature_config.canonical_parent_order",
1362 b"use revlog.feature_config.canonical_parent_order",
1358 b"6.6",
1363 b"6.6",
1359 stacklevel=2,
1364 stacklevel=2,
1360 )
1365 )
1361 return self.feature_config.canonical_parent_order
1366 return self.feature_config.canonical_parent_order
1362
1367
1363 @property
1368 @property
1364 def _lazydelta(self):
1369 def _lazydelta(self):
1365 """temporary compatibility proxy"""
1370 """temporary compatibility proxy"""
1366 util.nouideprecwarn(
1371 util.nouideprecwarn(
1367 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1372 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1368 )
1373 )
1369 return self.delta_config.lazy_delta
1374 return self.delta_config.lazy_delta
1370
1375
1371 @property
1376 @property
1372 def _lazydeltabase(self):
1377 def _lazydeltabase(self):
1373 """temporary compatibility proxy"""
1378 """temporary compatibility proxy"""
1374 util.nouideprecwarn(
1379 util.nouideprecwarn(
1375 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1380 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1376 )
1381 )
1377 return self.delta_config.lazy_delta_base
1382 return self.delta_config.lazy_delta_base
1378
1383
1379 def _init_opts(self):
1384 def _init_opts(self):
1380 """process options (from above/config) to setup associated default revlog mode
1385 """process options (from above/config) to setup associated default revlog mode
1381
1386
1382 These values might be affected when actually reading on disk information.
1387 These values might be affected when actually reading on disk information.
1383
1388
1384 The relevant values are returned for use in _loadindex().
1389 The relevant values are returned for use in _loadindex().
1385
1390
1386 * newversionflags:
1391 * newversionflags:
1387 version header to use if we need to create a new revlog
1392 version header to use if we need to create a new revlog
1388
1393
1389 * mmapindexthreshold:
1394 * mmapindexthreshold:
1390 minimal index size for start to use mmap
1395 minimal index size for start to use mmap
1391
1396
1392 * force_nodemap:
1397 * force_nodemap:
1393 force the usage of a "development" version of the nodemap code
1398 force the usage of a "development" version of the nodemap code
1394 """
1399 """
1395 opts = self.opener.options
1400 opts = self.opener.options
1396
1401
1397 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1402 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1398 new_header = CHANGELOGV2
1403 new_header = CHANGELOGV2
1399 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1404 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1400 self.feature_config.compute_rank = compute_rank
1405 self.feature_config.compute_rank = compute_rank
1401 elif b'revlogv2' in opts:
1406 elif b'revlogv2' in opts:
1402 new_header = REVLOGV2
1407 new_header = REVLOGV2
1403 elif b'revlogv1' in opts:
1408 elif b'revlogv1' in opts:
1404 new_header = REVLOGV1 | FLAG_INLINE_DATA
1409 new_header = REVLOGV1 | FLAG_INLINE_DATA
1405 if b'generaldelta' in opts:
1410 if b'generaldelta' in opts:
1406 new_header |= FLAG_GENERALDELTA
1411 new_header |= FLAG_GENERALDELTA
1407 elif b'revlogv0' in self.opener.options:
1412 elif b'revlogv0' in self.opener.options:
1408 new_header = REVLOGV0
1413 new_header = REVLOGV0
1409 else:
1414 else:
1410 new_header = REVLOG_DEFAULT_VERSION
1415 new_header = REVLOG_DEFAULT_VERSION
1411
1416
1412 mmapindexthreshold = None
1417 mmapindexthreshold = None
1413 if self.data_config.mmap_large_index:
1418 if self.data_config.mmap_large_index:
1414 mmapindexthreshold = self.data_config.mmap_index_threshold
1419 mmapindexthreshold = self.data_config.mmap_index_threshold
1415 if self.feature_config.enable_ellipsis:
1420 if self.feature_config.enable_ellipsis:
1416 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1421 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1417
1422
1418 # revlog v0 doesn't have flag processors
1423 # revlog v0 doesn't have flag processors
1419 for flag, processor in opts.get(b'flagprocessors', {}).items():
1424 for flag, processor in opts.get(b'flagprocessors', {}).items():
1420 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1425 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1421
1426
1422 chunk_cache_size = self.data_config.chunk_cache_size
1427 chunk_cache_size = self.data_config.chunk_cache_size
1423 if chunk_cache_size <= 0:
1428 if chunk_cache_size <= 0:
1424 raise error.RevlogError(
1429 raise error.RevlogError(
1425 _(b'revlog chunk cache size %r is not greater than 0')
1430 _(b'revlog chunk cache size %r is not greater than 0')
1426 % chunk_cache_size
1431 % chunk_cache_size
1427 )
1432 )
1428 elif chunk_cache_size & (chunk_cache_size - 1):
1433 elif chunk_cache_size & (chunk_cache_size - 1):
1429 raise error.RevlogError(
1434 raise error.RevlogError(
1430 _(b'revlog chunk cache size %r is not a power of 2')
1435 _(b'revlog chunk cache size %r is not a power of 2')
1431 % chunk_cache_size
1436 % chunk_cache_size
1432 )
1437 )
1433 force_nodemap = opts.get(b'devel-force-nodemap', False)
1438 force_nodemap = opts.get(b'devel-force-nodemap', False)
1434 return new_header, mmapindexthreshold, force_nodemap
1439 return new_header, mmapindexthreshold, force_nodemap
1435
1440
1436 def _get_data(self, filepath, mmap_threshold, size=None):
1441 def _get_data(self, filepath, mmap_threshold, size=None):
1437 """return a file content with or without mmap
1442 """return a file content with or without mmap
1438
1443
1439 If the file is missing return the empty string"""
1444 If the file is missing return the empty string"""
1440 try:
1445 try:
1441 with self.opener(filepath) as fp:
1446 with self.opener(filepath) as fp:
1442 if mmap_threshold is not None:
1447 if mmap_threshold is not None:
1443 file_size = self.opener.fstat(fp).st_size
1448 file_size = self.opener.fstat(fp).st_size
1444 if file_size >= mmap_threshold:
1449 if file_size >= mmap_threshold:
1445 if size is not None:
1450 if size is not None:
1446 # avoid potentiel mmap crash
1451 # avoid potentiel mmap crash
1447 size = min(file_size, size)
1452 size = min(file_size, size)
1448 # TODO: should .close() to release resources without
1453 # TODO: should .close() to release resources without
1449 # relying on Python GC
1454 # relying on Python GC
1450 if size is None:
1455 if size is None:
1451 return util.buffer(util.mmapread(fp))
1456 return util.buffer(util.mmapread(fp))
1452 else:
1457 else:
1453 return util.buffer(util.mmapread(fp, size))
1458 return util.buffer(util.mmapread(fp, size))
1454 if size is None:
1459 if size is None:
1455 return fp.read()
1460 return fp.read()
1456 else:
1461 else:
1457 return fp.read(size)
1462 return fp.read(size)
1458 except FileNotFoundError:
1463 except FileNotFoundError:
1459 return b''
1464 return b''
1460
1465
1461 def get_streams(self, max_linkrev, force_inline=False):
1466 def get_streams(self, max_linkrev, force_inline=False):
1462 """return a list of streams that represent this revlog
1467 """return a list of streams that represent this revlog
1463
1468
1464 This is used by stream-clone to do bytes to bytes copies of a repository.
1469 This is used by stream-clone to do bytes to bytes copies of a repository.
1465
1470
1466 This streams data for all revisions that refer to a changelog revision up
1471 This streams data for all revisions that refer to a changelog revision up
1467 to `max_linkrev`.
1472 to `max_linkrev`.
1468
1473
1469 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1474 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1470
1475
1471 It returns is a list of three-tuple:
1476 It returns is a list of three-tuple:
1472
1477
1473 [
1478 [
1474 (filename, bytes_stream, stream_size),
1479 (filename, bytes_stream, stream_size),
1475 …
1480 …
1476 ]
1481 ]
1477 """
1482 """
1478 n = len(self)
1483 n = len(self)
1479 index = self.index
1484 index = self.index
1480 while n > 0:
1485 while n > 0:
1481 linkrev = index[n - 1][4]
1486 linkrev = index[n - 1][4]
1482 if linkrev < max_linkrev:
1487 if linkrev < max_linkrev:
1483 break
1488 break
1484 # note: this loop will rarely go through multiple iterations, since
1489 # note: this loop will rarely go through multiple iterations, since
1485 # it only traverses commits created during the current streaming
1490 # it only traverses commits created during the current streaming
1486 # pull operation.
1491 # pull operation.
1487 #
1492 #
1488 # If this become a problem, using a binary search should cap the
1493 # If this become a problem, using a binary search should cap the
1489 # runtime of this.
1494 # runtime of this.
1490 n = n - 1
1495 n = n - 1
1491 if n == 0:
1496 if n == 0:
1492 # no data to send
1497 # no data to send
1493 return []
1498 return []
1494 index_size = n * index.entry_size
1499 index_size = n * index.entry_size
1495 data_size = self.end(n - 1)
1500 data_size = self.end(n - 1)
1496
1501
1497 # XXX we might have been split (or stripped) since the object
1502 # XXX we might have been split (or stripped) since the object
1498 # initialization, We need to close this race too, but having a way to
1503 # initialization, We need to close this race too, but having a way to
1499 # pre-open the file we feed to the revlog and never closing them before
1504 # pre-open the file we feed to the revlog and never closing them before
1500 # we are done streaming.
1505 # we are done streaming.
1501
1506
1502 if self._inline:
1507 if self._inline:
1503
1508
1504 def get_stream():
1509 def get_stream():
1505 with self.opener(self._indexfile, mode=b"r") as fp:
1510 with self.opener(self._indexfile, mode=b"r") as fp:
1506 yield None
1511 yield None
1507 size = index_size + data_size
1512 size = index_size + data_size
1508 if size <= 65536:
1513 if size <= 65536:
1509 yield fp.read(size)
1514 yield fp.read(size)
1510 else:
1515 else:
1511 yield from util.filechunkiter(fp, limit=size)
1516 yield from util.filechunkiter(fp, limit=size)
1512
1517
1513 inline_stream = get_stream()
1518 inline_stream = get_stream()
1514 next(inline_stream)
1519 next(inline_stream)
1515 return [
1520 return [
1516 (self._indexfile, inline_stream, index_size + data_size),
1521 (self._indexfile, inline_stream, index_size + data_size),
1517 ]
1522 ]
1518 elif force_inline:
1523 elif force_inline:
1519
1524
1520 def get_stream():
1525 def get_stream():
1521 with self.reading():
1526 with self.reading():
1522 yield None
1527 yield None
1523
1528
1524 for rev in range(n):
1529 for rev in range(n):
1525 idx = self.index.entry_binary(rev)
1530 idx = self.index.entry_binary(rev)
1526 if rev == 0 and self._docket is None:
1531 if rev == 0 and self._docket is None:
1527 # re-inject the inline flag
1532 # re-inject the inline flag
1528 header = self._format_flags
1533 header = self._format_flags
1529 header |= self._format_version
1534 header |= self._format_version
1530 header |= FLAG_INLINE_DATA
1535 header |= FLAG_INLINE_DATA
1531 header = self.index.pack_header(header)
1536 header = self.index.pack_header(header)
1532 idx = header + idx
1537 idx = header + idx
1533 yield idx
1538 yield idx
1534 yield self._inner.get_segment_for_revs(rev, rev)[1]
1539 yield self._inner.get_segment_for_revs(rev, rev)[1]
1535
1540
1536 inline_stream = get_stream()
1541 inline_stream = get_stream()
1537 next(inline_stream)
1542 next(inline_stream)
1538 return [
1543 return [
1539 (self._indexfile, inline_stream, index_size + data_size),
1544 (self._indexfile, inline_stream, index_size + data_size),
1540 ]
1545 ]
1541 else:
1546 else:
1542
1547
1543 def get_index_stream():
1548 def get_index_stream():
1544 with self.opener(self._indexfile, mode=b"r") as fp:
1549 with self.opener(self._indexfile, mode=b"r") as fp:
1545 yield None
1550 yield None
1546 if index_size <= 65536:
1551 if index_size <= 65536:
1547 yield fp.read(index_size)
1552 yield fp.read(index_size)
1548 else:
1553 else:
1549 yield from util.filechunkiter(fp, limit=index_size)
1554 yield from util.filechunkiter(fp, limit=index_size)
1550
1555
1551 def get_data_stream():
1556 def get_data_stream():
1552 with self._datafp() as fp:
1557 with self._datafp() as fp:
1553 yield None
1558 yield None
1554 if data_size <= 65536:
1559 if data_size <= 65536:
1555 yield fp.read(data_size)
1560 yield fp.read(data_size)
1556 else:
1561 else:
1557 yield from util.filechunkiter(fp, limit=data_size)
1562 yield from util.filechunkiter(fp, limit=data_size)
1558
1563
1559 index_stream = get_index_stream()
1564 index_stream = get_index_stream()
1560 next(index_stream)
1565 next(index_stream)
1561 data_stream = get_data_stream()
1566 data_stream = get_data_stream()
1562 next(data_stream)
1567 next(data_stream)
1563 return [
1568 return [
1564 (self._datafile, data_stream, data_size),
1569 (self._datafile, data_stream, data_size),
1565 (self._indexfile, index_stream, index_size),
1570 (self._indexfile, index_stream, index_size),
1566 ]
1571 ]
1567
1572
1568 def _loadindex(self, docket=None):
1573 def _loadindex(self, docket=None):
1569
1574
1570 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1575 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1571
1576
1572 if self.postfix is not None:
1577 if self.postfix is not None:
1573 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1578 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1574 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1579 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1575 entry_point = b'%s.i.a' % self.radix
1580 entry_point = b'%s.i.a' % self.radix
1576 elif self._try_split and self.opener.exists(self._split_index_file):
1581 elif self._try_split and self.opener.exists(self._split_index_file):
1577 entry_point = self._split_index_file
1582 entry_point = self._split_index_file
1578 else:
1583 else:
1579 entry_point = b'%s.i' % self.radix
1584 entry_point = b'%s.i' % self.radix
1580
1585
1581 if docket is not None:
1586 if docket is not None:
1582 self._docket = docket
1587 self._docket = docket
1583 self._docket_file = entry_point
1588 self._docket_file = entry_point
1584 else:
1589 else:
1585 self._initempty = True
1590 self._initempty = True
1586 entry_data = self._get_data(entry_point, mmapindexthreshold)
1591 entry_data = self._get_data(entry_point, mmapindexthreshold)
1587 if len(entry_data) > 0:
1592 if len(entry_data) > 0:
1588 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1593 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1589 self._initempty = False
1594 self._initempty = False
1590 else:
1595 else:
1591 header = new_header
1596 header = new_header
1592
1597
1593 self._format_flags = header & ~0xFFFF
1598 self._format_flags = header & ~0xFFFF
1594 self._format_version = header & 0xFFFF
1599 self._format_version = header & 0xFFFF
1595
1600
1596 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1601 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1597 if supported_flags is None:
1602 if supported_flags is None:
1598 msg = _(b'unknown version (%d) in revlog %s')
1603 msg = _(b'unknown version (%d) in revlog %s')
1599 msg %= (self._format_version, self.display_id)
1604 msg %= (self._format_version, self.display_id)
1600 raise error.RevlogError(msg)
1605 raise error.RevlogError(msg)
1601 elif self._format_flags & ~supported_flags:
1606 elif self._format_flags & ~supported_flags:
1602 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1607 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1603 display_flag = self._format_flags >> 16
1608 display_flag = self._format_flags >> 16
1604 msg %= (display_flag, self._format_version, self.display_id)
1609 msg %= (display_flag, self._format_version, self.display_id)
1605 raise error.RevlogError(msg)
1610 raise error.RevlogError(msg)
1606
1611
1607 features = FEATURES_BY_VERSION[self._format_version]
1612 features = FEATURES_BY_VERSION[self._format_version]
1608 self._inline = features[b'inline'](self._format_flags)
1613 self._inline = features[b'inline'](self._format_flags)
1609 self.delta_config.general_delta = features[b'generaldelta'](
1614 self.delta_config.general_delta = features[b'generaldelta'](
1610 self._format_flags
1615 self._format_flags
1611 )
1616 )
1612 self.feature_config.has_side_data = features[b'sidedata']
1617 self.feature_config.has_side_data = features[b'sidedata']
1613
1618
1614 if not features[b'docket']:
1619 if not features[b'docket']:
1615 self._indexfile = entry_point
1620 self._indexfile = entry_point
1616 index_data = entry_data
1621 index_data = entry_data
1617 else:
1622 else:
1618 self._docket_file = entry_point
1623 self._docket_file = entry_point
1619 if self._initempty:
1624 if self._initempty:
1620 self._docket = docketutil.default_docket(self, header)
1625 self._docket = docketutil.default_docket(self, header)
1621 else:
1626 else:
1622 self._docket = docketutil.parse_docket(
1627 self._docket = docketutil.parse_docket(
1623 self, entry_data, use_pending=self._trypending
1628 self, entry_data, use_pending=self._trypending
1624 )
1629 )
1625
1630
1626 if self._docket is not None:
1631 if self._docket is not None:
1627 self._indexfile = self._docket.index_filepath()
1632 self._indexfile = self._docket.index_filepath()
1628 index_data = b''
1633 index_data = b''
1629 index_size = self._docket.index_end
1634 index_size = self._docket.index_end
1630 if index_size > 0:
1635 if index_size > 0:
1631 index_data = self._get_data(
1636 index_data = self._get_data(
1632 self._indexfile, mmapindexthreshold, size=index_size
1637 self._indexfile, mmapindexthreshold, size=index_size
1633 )
1638 )
1634 if len(index_data) < index_size:
1639 if len(index_data) < index_size:
1635 msg = _(b'too few index data for %s: got %d, expected %d')
1640 msg = _(b'too few index data for %s: got %d, expected %d')
1636 msg %= (self.display_id, len(index_data), index_size)
1641 msg %= (self.display_id, len(index_data), index_size)
1637 raise error.RevlogError(msg)
1642 raise error.RevlogError(msg)
1638
1643
1639 self._inline = False
1644 self._inline = False
1640 # generaldelta implied by version 2 revlogs.
1645 # generaldelta implied by version 2 revlogs.
1641 self.delta_config.general_delta = True
1646 self.delta_config.general_delta = True
1642 # the logic for persistent nodemap will be dealt with within the
1647 # the logic for persistent nodemap will be dealt with within the
1643 # main docket, so disable it for now.
1648 # main docket, so disable it for now.
1644 self._nodemap_file = None
1649 self._nodemap_file = None
1645
1650
1646 if self._docket is not None:
1651 if self._docket is not None:
1647 self._datafile = self._docket.data_filepath()
1652 self._datafile = self._docket.data_filepath()
1648 self._sidedatafile = self._docket.sidedata_filepath()
1653 self._sidedatafile = self._docket.sidedata_filepath()
1649 elif self.postfix is None:
1654 elif self.postfix is None:
1650 self._datafile = b'%s.d' % self.radix
1655 self._datafile = b'%s.d' % self.radix
1651 else:
1656 else:
1652 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1657 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1653
1658
1654 self.nodeconstants = sha1nodeconstants
1659 self.nodeconstants = sha1nodeconstants
1655 self.nullid = self.nodeconstants.nullid
1660 self.nullid = self.nodeconstants.nullid
1656
1661
1657 # sparse-revlog can't be on without general-delta (issue6056)
1662 # sparse-revlog can't be on without general-delta (issue6056)
1658 if not self.delta_config.general_delta:
1663 if not self.delta_config.general_delta:
1659 self.delta_config.sparse_revlog = False
1664 self.delta_config.sparse_revlog = False
1660
1665
1661 self._storedeltachains = True
1666 self._storedeltachains = True
1662
1667
1663 devel_nodemap = (
1668 devel_nodemap = (
1664 self._nodemap_file
1669 self._nodemap_file
1665 and force_nodemap
1670 and force_nodemap
1666 and parse_index_v1_nodemap is not None
1671 and parse_index_v1_nodemap is not None
1667 )
1672 )
1668
1673
1669 use_rust_index = False
1674 use_rust_index = False
1670 if rustrevlog is not None:
1675 if rustrevlog is not None:
1671 if self._nodemap_file is not None:
1676 if self._nodemap_file is not None:
1672 use_rust_index = True
1677 use_rust_index = True
1673 else:
1678 else:
1674 use_rust_index = self.opener.options.get(b'rust.index')
1679 use_rust_index = self.opener.options.get(b'rust.index')
1675
1680
1676 self._parse_index = parse_index_v1
1681 self._parse_index = parse_index_v1
1677 if self._format_version == REVLOGV0:
1682 if self._format_version == REVLOGV0:
1678 self._parse_index = revlogv0.parse_index_v0
1683 self._parse_index = revlogv0.parse_index_v0
1679 elif self._format_version == REVLOGV2:
1684 elif self._format_version == REVLOGV2:
1680 self._parse_index = parse_index_v2
1685 self._parse_index = parse_index_v2
1681 elif self._format_version == CHANGELOGV2:
1686 elif self._format_version == CHANGELOGV2:
1682 self._parse_index = parse_index_cl_v2
1687 self._parse_index = parse_index_cl_v2
1683 elif devel_nodemap:
1688 elif devel_nodemap:
1684 self._parse_index = parse_index_v1_nodemap
1689 self._parse_index = parse_index_v1_nodemap
1685 elif use_rust_index:
1690 elif use_rust_index:
1686 self._parse_index = parse_index_v1_mixed
1691 self._parse_index = parse_index_v1_mixed
1687 try:
1692 try:
1688 d = self._parse_index(index_data, self._inline)
1693 d = self._parse_index(index_data, self._inline)
1689 index, chunkcache = d
1694 index, chunkcache = d
1690 use_nodemap = (
1695 use_nodemap = (
1691 not self._inline
1696 not self._inline
1692 and self._nodemap_file is not None
1697 and self._nodemap_file is not None
1693 and hasattr(index, 'update_nodemap_data')
1698 and hasattr(index, 'update_nodemap_data')
1694 )
1699 )
1695 if use_nodemap:
1700 if use_nodemap:
1696 nodemap_data = nodemaputil.persisted_data(self)
1701 nodemap_data = nodemaputil.persisted_data(self)
1697 if nodemap_data is not None:
1702 if nodemap_data is not None:
1698 docket = nodemap_data[0]
1703 docket = nodemap_data[0]
1699 if (
1704 if (
1700 len(d[0]) > docket.tip_rev
1705 len(d[0]) > docket.tip_rev
1701 and d[0][docket.tip_rev][7] == docket.tip_node
1706 and d[0][docket.tip_rev][7] == docket.tip_node
1702 ):
1707 ):
1703 # no changelog tampering
1708 # no changelog tampering
1704 self._nodemap_docket = docket
1709 self._nodemap_docket = docket
1705 index.update_nodemap_data(*nodemap_data)
1710 index.update_nodemap_data(*nodemap_data)
1706 except (ValueError, IndexError):
1711 except (ValueError, IndexError):
1707 raise error.RevlogError(
1712 raise error.RevlogError(
1708 _(b"index %s is corrupted") % self.display_id
1713 _(b"index %s is corrupted") % self.display_id
1709 )
1714 )
1710 self.index = index
1715 self.index = index
1711 # revnum -> (chain-length, sum-delta-length)
1716 # revnum -> (chain-length, sum-delta-length)
1712 self._chaininfocache = util.lrucachedict(500)
1717 self._chaininfocache = util.lrucachedict(500)
1713
1718
1714 return chunkcache
1719 return chunkcache
1715
1720
1716 def _load_inner(self, chunk_cache):
1721 def _load_inner(self, chunk_cache):
1717 if self._docket is None:
1722 if self._docket is None:
1718 default_compression_header = None
1723 default_compression_header = None
1719 else:
1724 else:
1720 default_compression_header = self._docket.default_compression_header
1725 default_compression_header = self._docket.default_compression_header
1721
1726
1722 self._inner = _InnerRevlog(
1727 self._inner = _InnerRevlog(
1723 opener=self.opener,
1728 opener=self.opener,
1724 index=self.index,
1729 index=self.index,
1725 index_file=self._indexfile,
1730 index_file=self._indexfile,
1726 data_file=self._datafile,
1731 data_file=self._datafile,
1727 sidedata_file=self._sidedatafile,
1732 sidedata_file=self._sidedatafile,
1728 inline=self._inline,
1733 inline=self._inline,
1729 data_config=self.data_config,
1734 data_config=self.data_config,
1730 delta_config=self.delta_config,
1735 delta_config=self.delta_config,
1731 feature_config=self.feature_config,
1736 feature_config=self.feature_config,
1732 chunk_cache=chunk_cache,
1737 chunk_cache=chunk_cache,
1733 default_compression_header=default_compression_header,
1738 default_compression_header=default_compression_header,
1734 )
1739 )
1735
1740
1736 def get_revlog(self):
1741 def get_revlog(self):
1737 """simple function to mirror API of other not-really-revlog API"""
1742 """simple function to mirror API of other not-really-revlog API"""
1738 return self
1743 return self
1739
1744
1740 @util.propertycache
1745 @util.propertycache
1741 def revlog_kind(self):
1746 def revlog_kind(self):
1742 return self.target[0]
1747 return self.target[0]
1743
1748
1744 @util.propertycache
1749 @util.propertycache
1745 def display_id(self):
1750 def display_id(self):
1746 """The public facing "ID" of the revlog that we use in message"""
1751 """The public facing "ID" of the revlog that we use in message"""
1747 if self.revlog_kind == KIND_FILELOG:
1752 if self.revlog_kind == KIND_FILELOG:
1748 # Reference the file without the "data/" prefix, so it is familiar
1753 # Reference the file without the "data/" prefix, so it is familiar
1749 # to the user.
1754 # to the user.
1750 return self.target[1]
1755 return self.target[1]
1751 else:
1756 else:
1752 return self.radix
1757 return self.radix
1753
1758
1754 def _datafp(self, mode=b'r'):
1759 def _datafp(self, mode=b'r'):
1755 """file object for the revlog's data file"""
1760 """file object for the revlog's data file"""
1756 return self.opener(self._datafile, mode=mode)
1761 return self.opener(self._datafile, mode=mode)
1757
1762
1758 def tiprev(self):
1763 def tiprev(self):
1759 return len(self.index) - 1
1764 return len(self.index) - 1
1760
1765
1761 def tip(self):
1766 def tip(self):
1762 return self.node(self.tiprev())
1767 return self.node(self.tiprev())
1763
1768
1764 def __contains__(self, rev):
1769 def __contains__(self, rev):
1765 return 0 <= rev < len(self)
1770 return 0 <= rev < len(self)
1766
1771
1767 def __len__(self):
1772 def __len__(self):
1768 return len(self.index)
1773 return len(self.index)
1769
1774
1770 def __iter__(self):
1775 def __iter__(self):
1771 return iter(range(len(self)))
1776 return iter(range(len(self)))
1772
1777
1773 def revs(self, start=0, stop=None):
1778 def revs(self, start=0, stop=None):
1774 """iterate over all rev in this revlog (from start to stop)"""
1779 """iterate over all rev in this revlog (from start to stop)"""
1775 return storageutil.iterrevs(len(self), start=start, stop=stop)
1780 return storageutil.iterrevs(len(self), start=start, stop=stop)
1776
1781
1777 def hasnode(self, node):
1782 def hasnode(self, node):
1778 try:
1783 try:
1779 self.rev(node)
1784 self.rev(node)
1780 return True
1785 return True
1781 except KeyError:
1786 except KeyError:
1782 return False
1787 return False
1783
1788
1784 def _candelta(self, baserev, rev):
1789 def _candelta(self, baserev, rev):
1785 """whether two revisions (baserev, rev) can be delta-ed or not"""
1790 """whether two revisions (baserev, rev) can be delta-ed or not"""
1786 # Disable delta if either rev requires a content-changing flag
1791 # Disable delta if either rev requires a content-changing flag
1787 # processor (ex. LFS). This is because such flag processor can alter
1792 # processor (ex. LFS). This is because such flag processor can alter
1788 # the rawtext content that the delta will be based on, and two clients
1793 # the rawtext content that the delta will be based on, and two clients
1789 # could have a same revlog node with different flags (i.e. different
1794 # could have a same revlog node with different flags (i.e. different
1790 # rawtext contents) and the delta could be incompatible.
1795 # rawtext contents) and the delta could be incompatible.
1791 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1796 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1792 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1797 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1793 ):
1798 ):
1794 return False
1799 return False
1795 return True
1800 return True
1796
1801
1797 def update_caches(self, transaction):
1802 def update_caches(self, transaction):
1798 """update on disk cache
1803 """update on disk cache
1799
1804
1800 If a transaction is passed, the update may be delayed to transaction
1805 If a transaction is passed, the update may be delayed to transaction
1801 commit."""
1806 commit."""
1802 if self._nodemap_file is not None:
1807 if self._nodemap_file is not None:
1803 if transaction is None:
1808 if transaction is None:
1804 nodemaputil.update_persistent_nodemap(self)
1809 nodemaputil.update_persistent_nodemap(self)
1805 else:
1810 else:
1806 nodemaputil.setup_persistent_nodemap(transaction, self)
1811 nodemaputil.setup_persistent_nodemap(transaction, self)
1807
1812
1808 def clearcaches(self):
1813 def clearcaches(self):
1809 """Clear in-memory caches"""
1814 """Clear in-memory caches"""
1810 self._inner._revisioncache = None
1811 self._chainbasecache.clear()
1815 self._chainbasecache.clear()
1812 self._inner._segmentfile.clear_cache()
1816 self._inner.clear_cache()
1813 self._inner._segmentfile_sidedata.clear_cache()
1814 self._pcache = {}
1817 self._pcache = {}
1815 self._nodemap_docket = None
1818 self._nodemap_docket = None
1816 self.index.clearcaches()
1819 self.index.clearcaches()
1817 # The python code is the one responsible for validating the docket, we
1820 # The python code is the one responsible for validating the docket, we
1818 # end up having to refresh it here.
1821 # end up having to refresh it here.
1819 use_nodemap = (
1822 use_nodemap = (
1820 not self._inline
1823 not self._inline
1821 and self._nodemap_file is not None
1824 and self._nodemap_file is not None
1822 and hasattr(self.index, 'update_nodemap_data')
1825 and hasattr(self.index, 'update_nodemap_data')
1823 )
1826 )
1824 if use_nodemap:
1827 if use_nodemap:
1825 nodemap_data = nodemaputil.persisted_data(self)
1828 nodemap_data = nodemaputil.persisted_data(self)
1826 if nodemap_data is not None:
1829 if nodemap_data is not None:
1827 self._nodemap_docket = nodemap_data[0]
1830 self._nodemap_docket = nodemap_data[0]
1828 self.index.update_nodemap_data(*nodemap_data)
1831 self.index.update_nodemap_data(*nodemap_data)
1829
1832
1830 def rev(self, node):
1833 def rev(self, node):
1831 """return the revision number associated with a <nodeid>"""
1834 """return the revision number associated with a <nodeid>"""
1832 try:
1835 try:
1833 return self.index.rev(node)
1836 return self.index.rev(node)
1834 except TypeError:
1837 except TypeError:
1835 raise
1838 raise
1836 except error.RevlogError:
1839 except error.RevlogError:
1837 # parsers.c radix tree lookup failed
1840 # parsers.c radix tree lookup failed
1838 if (
1841 if (
1839 node == self.nodeconstants.wdirid
1842 node == self.nodeconstants.wdirid
1840 or node in self.nodeconstants.wdirfilenodeids
1843 or node in self.nodeconstants.wdirfilenodeids
1841 ):
1844 ):
1842 raise error.WdirUnsupported
1845 raise error.WdirUnsupported
1843 raise error.LookupError(node, self.display_id, _(b'no node'))
1846 raise error.LookupError(node, self.display_id, _(b'no node'))
1844
1847
1845 # Accessors for index entries.
1848 # Accessors for index entries.
1846
1849
1847 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1850 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1848 # are flags.
1851 # are flags.
1849 def start(self, rev):
1852 def start(self, rev):
1850 return int(self.index[rev][0] >> 16)
1853 return int(self.index[rev][0] >> 16)
1851
1854
1852 def sidedata_cut_off(self, rev):
1855 def sidedata_cut_off(self, rev):
1853 sd_cut_off = self.index[rev][8]
1856 sd_cut_off = self.index[rev][8]
1854 if sd_cut_off != 0:
1857 if sd_cut_off != 0:
1855 return sd_cut_off
1858 return sd_cut_off
1856 # This is some annoying dance, because entries without sidedata
1859 # This is some annoying dance, because entries without sidedata
1857 # currently use 0 as their ofsset. (instead of previous-offset +
1860 # currently use 0 as their ofsset. (instead of previous-offset +
1858 # previous-size)
1861 # previous-size)
1859 #
1862 #
1860 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1863 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1861 # In the meantime, we need this.
1864 # In the meantime, we need this.
1862 while 0 <= rev:
1865 while 0 <= rev:
1863 e = self.index[rev]
1866 e = self.index[rev]
1864 if e[9] != 0:
1867 if e[9] != 0:
1865 return e[8] + e[9]
1868 return e[8] + e[9]
1866 rev -= 1
1869 rev -= 1
1867 return 0
1870 return 0
1868
1871
1869 def flags(self, rev):
1872 def flags(self, rev):
1870 return self.index[rev][0] & 0xFFFF
1873 return self.index[rev][0] & 0xFFFF
1871
1874
1872 def length(self, rev):
1875 def length(self, rev):
1873 return self.index[rev][1]
1876 return self.index[rev][1]
1874
1877
1875 def sidedata_length(self, rev):
1878 def sidedata_length(self, rev):
1876 if not self.feature_config.has_side_data:
1879 if not self.feature_config.has_side_data:
1877 return 0
1880 return 0
1878 return self.index[rev][9]
1881 return self.index[rev][9]
1879
1882
1880 def rawsize(self, rev):
1883 def rawsize(self, rev):
1881 """return the length of the uncompressed text for a given revision"""
1884 """return the length of the uncompressed text for a given revision"""
1882 l = self.index[rev][2]
1885 l = self.index[rev][2]
1883 if l >= 0:
1886 if l >= 0:
1884 return l
1887 return l
1885
1888
1886 t = self.rawdata(rev)
1889 t = self.rawdata(rev)
1887 return len(t)
1890 return len(t)
1888
1891
1889 def size(self, rev):
1892 def size(self, rev):
1890 """length of non-raw text (processed by a "read" flag processor)"""
1893 """length of non-raw text (processed by a "read" flag processor)"""
1891 # fast path: if no "read" flag processor could change the content,
1894 # fast path: if no "read" flag processor could change the content,
1892 # size is rawsize. note: ELLIPSIS is known to not change the content.
1895 # size is rawsize. note: ELLIPSIS is known to not change the content.
1893 flags = self.flags(rev)
1896 flags = self.flags(rev)
1894 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1897 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1895 return self.rawsize(rev)
1898 return self.rawsize(rev)
1896
1899
1897 return len(self.revision(rev))
1900 return len(self.revision(rev))
1898
1901
1899 def fast_rank(self, rev):
1902 def fast_rank(self, rev):
1900 """Return the rank of a revision if already known, or None otherwise.
1903 """Return the rank of a revision if already known, or None otherwise.
1901
1904
1902 The rank of a revision is the size of the sub-graph it defines as a
1905 The rank of a revision is the size of the sub-graph it defines as a
1903 head. Equivalently, the rank of a revision `r` is the size of the set
1906 head. Equivalently, the rank of a revision `r` is the size of the set
1904 `ancestors(r)`, `r` included.
1907 `ancestors(r)`, `r` included.
1905
1908
1906 This method returns the rank retrieved from the revlog in constant
1909 This method returns the rank retrieved from the revlog in constant
1907 time. It makes no attempt at computing unknown values for versions of
1910 time. It makes no attempt at computing unknown values for versions of
1908 the revlog which do not persist the rank.
1911 the revlog which do not persist the rank.
1909 """
1912 """
1910 rank = self.index[rev][ENTRY_RANK]
1913 rank = self.index[rev][ENTRY_RANK]
1911 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1914 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1912 return None
1915 return None
1913 if rev == nullrev:
1916 if rev == nullrev:
1914 return 0 # convention
1917 return 0 # convention
1915 return rank
1918 return rank
1916
1919
1917 def chainbase(self, rev):
1920 def chainbase(self, rev):
1918 base = self._chainbasecache.get(rev)
1921 base = self._chainbasecache.get(rev)
1919 if base is not None:
1922 if base is not None:
1920 return base
1923 return base
1921
1924
1922 index = self.index
1925 index = self.index
1923 iterrev = rev
1926 iterrev = rev
1924 base = index[iterrev][3]
1927 base = index[iterrev][3]
1925 while base != iterrev:
1928 while base != iterrev:
1926 iterrev = base
1929 iterrev = base
1927 base = index[iterrev][3]
1930 base = index[iterrev][3]
1928
1931
1929 self._chainbasecache[rev] = base
1932 self._chainbasecache[rev] = base
1930 return base
1933 return base
1931
1934
1932 def linkrev(self, rev):
1935 def linkrev(self, rev):
1933 return self.index[rev][4]
1936 return self.index[rev][4]
1934
1937
1935 def parentrevs(self, rev):
1938 def parentrevs(self, rev):
1936 try:
1939 try:
1937 entry = self.index[rev]
1940 entry = self.index[rev]
1938 except IndexError:
1941 except IndexError:
1939 if rev == wdirrev:
1942 if rev == wdirrev:
1940 raise error.WdirUnsupported
1943 raise error.WdirUnsupported
1941 raise
1944 raise
1942
1945
1943 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1946 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1944 return entry[6], entry[5]
1947 return entry[6], entry[5]
1945 else:
1948 else:
1946 return entry[5], entry[6]
1949 return entry[5], entry[6]
1947
1950
1948 # fast parentrevs(rev) where rev isn't filtered
1951 # fast parentrevs(rev) where rev isn't filtered
1949 _uncheckedparentrevs = parentrevs
1952 _uncheckedparentrevs = parentrevs
1950
1953
1951 def node(self, rev):
1954 def node(self, rev):
1952 try:
1955 try:
1953 return self.index[rev][7]
1956 return self.index[rev][7]
1954 except IndexError:
1957 except IndexError:
1955 if rev == wdirrev:
1958 if rev == wdirrev:
1956 raise error.WdirUnsupported
1959 raise error.WdirUnsupported
1957 raise
1960 raise
1958
1961
1959 # Derived from index values.
1962 # Derived from index values.
1960
1963
1961 def end(self, rev):
1964 def end(self, rev):
1962 return self.start(rev) + self.length(rev)
1965 return self.start(rev) + self.length(rev)
1963
1966
1964 def parents(self, node):
1967 def parents(self, node):
1965 i = self.index
1968 i = self.index
1966 d = i[self.rev(node)]
1969 d = i[self.rev(node)]
1967 # inline node() to avoid function call overhead
1970 # inline node() to avoid function call overhead
1968 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1971 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1969 return i[d[6]][7], i[d[5]][7]
1972 return i[d[6]][7], i[d[5]][7]
1970 else:
1973 else:
1971 return i[d[5]][7], i[d[6]][7]
1974 return i[d[5]][7], i[d[6]][7]
1972
1975
1973 def chainlen(self, rev):
1976 def chainlen(self, rev):
1974 return self._chaininfo(rev)[0]
1977 return self._chaininfo(rev)[0]
1975
1978
1976 def _chaininfo(self, rev):
1979 def _chaininfo(self, rev):
1977 chaininfocache = self._chaininfocache
1980 chaininfocache = self._chaininfocache
1978 if rev in chaininfocache:
1981 if rev in chaininfocache:
1979 return chaininfocache[rev]
1982 return chaininfocache[rev]
1980 index = self.index
1983 index = self.index
1981 generaldelta = self.delta_config.general_delta
1984 generaldelta = self.delta_config.general_delta
1982 iterrev = rev
1985 iterrev = rev
1983 e = index[iterrev]
1986 e = index[iterrev]
1984 clen = 0
1987 clen = 0
1985 compresseddeltalen = 0
1988 compresseddeltalen = 0
1986 while iterrev != e[3]:
1989 while iterrev != e[3]:
1987 clen += 1
1990 clen += 1
1988 compresseddeltalen += e[1]
1991 compresseddeltalen += e[1]
1989 if generaldelta:
1992 if generaldelta:
1990 iterrev = e[3]
1993 iterrev = e[3]
1991 else:
1994 else:
1992 iterrev -= 1
1995 iterrev -= 1
1993 if iterrev in chaininfocache:
1996 if iterrev in chaininfocache:
1994 t = chaininfocache[iterrev]
1997 t = chaininfocache[iterrev]
1995 clen += t[0]
1998 clen += t[0]
1996 compresseddeltalen += t[1]
1999 compresseddeltalen += t[1]
1997 break
2000 break
1998 e = index[iterrev]
2001 e = index[iterrev]
1999 else:
2002 else:
2000 # Add text length of base since decompressing that also takes
2003 # Add text length of base since decompressing that also takes
2001 # work. For cache hits the length is already included.
2004 # work. For cache hits the length is already included.
2002 compresseddeltalen += e[1]
2005 compresseddeltalen += e[1]
2003 r = (clen, compresseddeltalen)
2006 r = (clen, compresseddeltalen)
2004 chaininfocache[rev] = r
2007 chaininfocache[rev] = r
2005 return r
2008 return r
2006
2009
2007 def _deltachain(self, rev, stoprev=None):
2010 def _deltachain(self, rev, stoprev=None):
2008 return self._inner._deltachain(rev, stoprev=stoprev)
2011 return self._inner._deltachain(rev, stoprev=stoprev)
2009
2012
2010 def ancestors(self, revs, stoprev=0, inclusive=False):
2013 def ancestors(self, revs, stoprev=0, inclusive=False):
2011 """Generate the ancestors of 'revs' in reverse revision order.
2014 """Generate the ancestors of 'revs' in reverse revision order.
2012 Does not generate revs lower than stoprev.
2015 Does not generate revs lower than stoprev.
2013
2016
2014 See the documentation for ancestor.lazyancestors for more details."""
2017 See the documentation for ancestor.lazyancestors for more details."""
2015
2018
2016 # first, make sure start revisions aren't filtered
2019 # first, make sure start revisions aren't filtered
2017 revs = list(revs)
2020 revs = list(revs)
2018 checkrev = self.node
2021 checkrev = self.node
2019 for r in revs:
2022 for r in revs:
2020 checkrev(r)
2023 checkrev(r)
2021 # and we're sure ancestors aren't filtered as well
2024 # and we're sure ancestors aren't filtered as well
2022
2025
2023 if rustancestor is not None and self.index.rust_ext_compat:
2026 if rustancestor is not None and self.index.rust_ext_compat:
2024 lazyancestors = rustancestor.LazyAncestors
2027 lazyancestors = rustancestor.LazyAncestors
2025 arg = self.index
2028 arg = self.index
2026 else:
2029 else:
2027 lazyancestors = ancestor.lazyancestors
2030 lazyancestors = ancestor.lazyancestors
2028 arg = self._uncheckedparentrevs
2031 arg = self._uncheckedparentrevs
2029 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2032 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2030
2033
2031 def descendants(self, revs):
2034 def descendants(self, revs):
2032 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2035 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2033
2036
2034 def findcommonmissing(self, common=None, heads=None):
2037 def findcommonmissing(self, common=None, heads=None):
2035 """Return a tuple of the ancestors of common and the ancestors of heads
2038 """Return a tuple of the ancestors of common and the ancestors of heads
2036 that are not ancestors of common. In revset terminology, we return the
2039 that are not ancestors of common. In revset terminology, we return the
2037 tuple:
2040 tuple:
2038
2041
2039 ::common, (::heads) - (::common)
2042 ::common, (::heads) - (::common)
2040
2043
2041 The list is sorted by revision number, meaning it is
2044 The list is sorted by revision number, meaning it is
2042 topologically sorted.
2045 topologically sorted.
2043
2046
2044 'heads' and 'common' are both lists of node IDs. If heads is
2047 'heads' and 'common' are both lists of node IDs. If heads is
2045 not supplied, uses all of the revlog's heads. If common is not
2048 not supplied, uses all of the revlog's heads. If common is not
2046 supplied, uses nullid."""
2049 supplied, uses nullid."""
2047 if common is None:
2050 if common is None:
2048 common = [self.nullid]
2051 common = [self.nullid]
2049 if heads is None:
2052 if heads is None:
2050 heads = self.heads()
2053 heads = self.heads()
2051
2054
2052 common = [self.rev(n) for n in common]
2055 common = [self.rev(n) for n in common]
2053 heads = [self.rev(n) for n in heads]
2056 heads = [self.rev(n) for n in heads]
2054
2057
2055 # we want the ancestors, but inclusive
2058 # we want the ancestors, but inclusive
2056 class lazyset:
2059 class lazyset:
2057 def __init__(self, lazyvalues):
2060 def __init__(self, lazyvalues):
2058 self.addedvalues = set()
2061 self.addedvalues = set()
2059 self.lazyvalues = lazyvalues
2062 self.lazyvalues = lazyvalues
2060
2063
2061 def __contains__(self, value):
2064 def __contains__(self, value):
2062 return value in self.addedvalues or value in self.lazyvalues
2065 return value in self.addedvalues or value in self.lazyvalues
2063
2066
2064 def __iter__(self):
2067 def __iter__(self):
2065 added = self.addedvalues
2068 added = self.addedvalues
2066 for r in added:
2069 for r in added:
2067 yield r
2070 yield r
2068 for r in self.lazyvalues:
2071 for r in self.lazyvalues:
2069 if not r in added:
2072 if not r in added:
2070 yield r
2073 yield r
2071
2074
2072 def add(self, value):
2075 def add(self, value):
2073 self.addedvalues.add(value)
2076 self.addedvalues.add(value)
2074
2077
2075 def update(self, values):
2078 def update(self, values):
2076 self.addedvalues.update(values)
2079 self.addedvalues.update(values)
2077
2080
2078 has = lazyset(self.ancestors(common))
2081 has = lazyset(self.ancestors(common))
2079 has.add(nullrev)
2082 has.add(nullrev)
2080 has.update(common)
2083 has.update(common)
2081
2084
2082 # take all ancestors from heads that aren't in has
2085 # take all ancestors from heads that aren't in has
2083 missing = set()
2086 missing = set()
2084 visit = collections.deque(r for r in heads if r not in has)
2087 visit = collections.deque(r for r in heads if r not in has)
2085 while visit:
2088 while visit:
2086 r = visit.popleft()
2089 r = visit.popleft()
2087 if r in missing:
2090 if r in missing:
2088 continue
2091 continue
2089 else:
2092 else:
2090 missing.add(r)
2093 missing.add(r)
2091 for p in self.parentrevs(r):
2094 for p in self.parentrevs(r):
2092 if p not in has:
2095 if p not in has:
2093 visit.append(p)
2096 visit.append(p)
2094 missing = list(missing)
2097 missing = list(missing)
2095 missing.sort()
2098 missing.sort()
2096 return has, [self.node(miss) for miss in missing]
2099 return has, [self.node(miss) for miss in missing]
2097
2100
2098 def incrementalmissingrevs(self, common=None):
2101 def incrementalmissingrevs(self, common=None):
2099 """Return an object that can be used to incrementally compute the
2102 """Return an object that can be used to incrementally compute the
2100 revision numbers of the ancestors of arbitrary sets that are not
2103 revision numbers of the ancestors of arbitrary sets that are not
2101 ancestors of common. This is an ancestor.incrementalmissingancestors
2104 ancestors of common. This is an ancestor.incrementalmissingancestors
2102 object.
2105 object.
2103
2106
2104 'common' is a list of revision numbers. If common is not supplied, uses
2107 'common' is a list of revision numbers. If common is not supplied, uses
2105 nullrev.
2108 nullrev.
2106 """
2109 """
2107 if common is None:
2110 if common is None:
2108 common = [nullrev]
2111 common = [nullrev]
2109
2112
2110 if rustancestor is not None and self.index.rust_ext_compat:
2113 if rustancestor is not None and self.index.rust_ext_compat:
2111 return rustancestor.MissingAncestors(self.index, common)
2114 return rustancestor.MissingAncestors(self.index, common)
2112 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2115 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2113
2116
2114 def findmissingrevs(self, common=None, heads=None):
2117 def findmissingrevs(self, common=None, heads=None):
2115 """Return the revision numbers of the ancestors of heads that
2118 """Return the revision numbers of the ancestors of heads that
2116 are not ancestors of common.
2119 are not ancestors of common.
2117
2120
2118 More specifically, return a list of revision numbers corresponding to
2121 More specifically, return a list of revision numbers corresponding to
2119 nodes N such that every N satisfies the following constraints:
2122 nodes N such that every N satisfies the following constraints:
2120
2123
2121 1. N is an ancestor of some node in 'heads'
2124 1. N is an ancestor of some node in 'heads'
2122 2. N is not an ancestor of any node in 'common'
2125 2. N is not an ancestor of any node in 'common'
2123
2126
2124 The list is sorted by revision number, meaning it is
2127 The list is sorted by revision number, meaning it is
2125 topologically sorted.
2128 topologically sorted.
2126
2129
2127 'heads' and 'common' are both lists of revision numbers. If heads is
2130 'heads' and 'common' are both lists of revision numbers. If heads is
2128 not supplied, uses all of the revlog's heads. If common is not
2131 not supplied, uses all of the revlog's heads. If common is not
2129 supplied, uses nullid."""
2132 supplied, uses nullid."""
2130 if common is None:
2133 if common is None:
2131 common = [nullrev]
2134 common = [nullrev]
2132 if heads is None:
2135 if heads is None:
2133 heads = self.headrevs()
2136 heads = self.headrevs()
2134
2137
2135 inc = self.incrementalmissingrevs(common=common)
2138 inc = self.incrementalmissingrevs(common=common)
2136 return inc.missingancestors(heads)
2139 return inc.missingancestors(heads)
2137
2140
2138 def findmissing(self, common=None, heads=None):
2141 def findmissing(self, common=None, heads=None):
2139 """Return the ancestors of heads that are not ancestors of common.
2142 """Return the ancestors of heads that are not ancestors of common.
2140
2143
2141 More specifically, return a list of nodes N such that every N
2144 More specifically, return a list of nodes N such that every N
2142 satisfies the following constraints:
2145 satisfies the following constraints:
2143
2146
2144 1. N is an ancestor of some node in 'heads'
2147 1. N is an ancestor of some node in 'heads'
2145 2. N is not an ancestor of any node in 'common'
2148 2. N is not an ancestor of any node in 'common'
2146
2149
2147 The list is sorted by revision number, meaning it is
2150 The list is sorted by revision number, meaning it is
2148 topologically sorted.
2151 topologically sorted.
2149
2152
2150 'heads' and 'common' are both lists of node IDs. If heads is
2153 'heads' and 'common' are both lists of node IDs. If heads is
2151 not supplied, uses all of the revlog's heads. If common is not
2154 not supplied, uses all of the revlog's heads. If common is not
2152 supplied, uses nullid."""
2155 supplied, uses nullid."""
2153 if common is None:
2156 if common is None:
2154 common = [self.nullid]
2157 common = [self.nullid]
2155 if heads is None:
2158 if heads is None:
2156 heads = self.heads()
2159 heads = self.heads()
2157
2160
2158 common = [self.rev(n) for n in common]
2161 common = [self.rev(n) for n in common]
2159 heads = [self.rev(n) for n in heads]
2162 heads = [self.rev(n) for n in heads]
2160
2163
2161 inc = self.incrementalmissingrevs(common=common)
2164 inc = self.incrementalmissingrevs(common=common)
2162 return [self.node(r) for r in inc.missingancestors(heads)]
2165 return [self.node(r) for r in inc.missingancestors(heads)]
2163
2166
2164 def nodesbetween(self, roots=None, heads=None):
2167 def nodesbetween(self, roots=None, heads=None):
2165 """Return a topological path from 'roots' to 'heads'.
2168 """Return a topological path from 'roots' to 'heads'.
2166
2169
2167 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2170 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2168 topologically sorted list of all nodes N that satisfy both of
2171 topologically sorted list of all nodes N that satisfy both of
2169 these constraints:
2172 these constraints:
2170
2173
2171 1. N is a descendant of some node in 'roots'
2174 1. N is a descendant of some node in 'roots'
2172 2. N is an ancestor of some node in 'heads'
2175 2. N is an ancestor of some node in 'heads'
2173
2176
2174 Every node is considered to be both a descendant and an ancestor
2177 Every node is considered to be both a descendant and an ancestor
2175 of itself, so every reachable node in 'roots' and 'heads' will be
2178 of itself, so every reachable node in 'roots' and 'heads' will be
2176 included in 'nodes'.
2179 included in 'nodes'.
2177
2180
2178 'outroots' is the list of reachable nodes in 'roots', i.e., the
2181 'outroots' is the list of reachable nodes in 'roots', i.e., the
2179 subset of 'roots' that is returned in 'nodes'. Likewise,
2182 subset of 'roots' that is returned in 'nodes'. Likewise,
2180 'outheads' is the subset of 'heads' that is also in 'nodes'.
2183 'outheads' is the subset of 'heads' that is also in 'nodes'.
2181
2184
2182 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2185 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2183 unspecified, uses nullid as the only root. If 'heads' is
2186 unspecified, uses nullid as the only root. If 'heads' is
2184 unspecified, uses list of all of the revlog's heads."""
2187 unspecified, uses list of all of the revlog's heads."""
2185 nonodes = ([], [], [])
2188 nonodes = ([], [], [])
2186 if roots is not None:
2189 if roots is not None:
2187 roots = list(roots)
2190 roots = list(roots)
2188 if not roots:
2191 if not roots:
2189 return nonodes
2192 return nonodes
2190 lowestrev = min([self.rev(n) for n in roots])
2193 lowestrev = min([self.rev(n) for n in roots])
2191 else:
2194 else:
2192 roots = [self.nullid] # Everybody's a descendant of nullid
2195 roots = [self.nullid] # Everybody's a descendant of nullid
2193 lowestrev = nullrev
2196 lowestrev = nullrev
2194 if (lowestrev == nullrev) and (heads is None):
2197 if (lowestrev == nullrev) and (heads is None):
2195 # We want _all_ the nodes!
2198 # We want _all_ the nodes!
2196 return (
2199 return (
2197 [self.node(r) for r in self],
2200 [self.node(r) for r in self],
2198 [self.nullid],
2201 [self.nullid],
2199 list(self.heads()),
2202 list(self.heads()),
2200 )
2203 )
2201 if heads is None:
2204 if heads is None:
2202 # All nodes are ancestors, so the latest ancestor is the last
2205 # All nodes are ancestors, so the latest ancestor is the last
2203 # node.
2206 # node.
2204 highestrev = len(self) - 1
2207 highestrev = len(self) - 1
2205 # Set ancestors to None to signal that every node is an ancestor.
2208 # Set ancestors to None to signal that every node is an ancestor.
2206 ancestors = None
2209 ancestors = None
2207 # Set heads to an empty dictionary for later discovery of heads
2210 # Set heads to an empty dictionary for later discovery of heads
2208 heads = {}
2211 heads = {}
2209 else:
2212 else:
2210 heads = list(heads)
2213 heads = list(heads)
2211 if not heads:
2214 if not heads:
2212 return nonodes
2215 return nonodes
2213 ancestors = set()
2216 ancestors = set()
2214 # Turn heads into a dictionary so we can remove 'fake' heads.
2217 # Turn heads into a dictionary so we can remove 'fake' heads.
2215 # Also, later we will be using it to filter out the heads we can't
2218 # Also, later we will be using it to filter out the heads we can't
2216 # find from roots.
2219 # find from roots.
2217 heads = dict.fromkeys(heads, False)
2220 heads = dict.fromkeys(heads, False)
2218 # Start at the top and keep marking parents until we're done.
2221 # Start at the top and keep marking parents until we're done.
2219 nodestotag = set(heads)
2222 nodestotag = set(heads)
2220 # Remember where the top was so we can use it as a limit later.
2223 # Remember where the top was so we can use it as a limit later.
2221 highestrev = max([self.rev(n) for n in nodestotag])
2224 highestrev = max([self.rev(n) for n in nodestotag])
2222 while nodestotag:
2225 while nodestotag:
2223 # grab a node to tag
2226 # grab a node to tag
2224 n = nodestotag.pop()
2227 n = nodestotag.pop()
2225 # Never tag nullid
2228 # Never tag nullid
2226 if n == self.nullid:
2229 if n == self.nullid:
2227 continue
2230 continue
2228 # A node's revision number represents its place in a
2231 # A node's revision number represents its place in a
2229 # topologically sorted list of nodes.
2232 # topologically sorted list of nodes.
2230 r = self.rev(n)
2233 r = self.rev(n)
2231 if r >= lowestrev:
2234 if r >= lowestrev:
2232 if n not in ancestors:
2235 if n not in ancestors:
2233 # If we are possibly a descendant of one of the roots
2236 # If we are possibly a descendant of one of the roots
2234 # and we haven't already been marked as an ancestor
2237 # and we haven't already been marked as an ancestor
2235 ancestors.add(n) # Mark as ancestor
2238 ancestors.add(n) # Mark as ancestor
2236 # Add non-nullid parents to list of nodes to tag.
2239 # Add non-nullid parents to list of nodes to tag.
2237 nodestotag.update(
2240 nodestotag.update(
2238 [p for p in self.parents(n) if p != self.nullid]
2241 [p for p in self.parents(n) if p != self.nullid]
2239 )
2242 )
2240 elif n in heads: # We've seen it before, is it a fake head?
2243 elif n in heads: # We've seen it before, is it a fake head?
2241 # So it is, real heads should not be the ancestors of
2244 # So it is, real heads should not be the ancestors of
2242 # any other heads.
2245 # any other heads.
2243 heads.pop(n)
2246 heads.pop(n)
2244 if not ancestors:
2247 if not ancestors:
2245 return nonodes
2248 return nonodes
2246 # Now that we have our set of ancestors, we want to remove any
2249 # Now that we have our set of ancestors, we want to remove any
2247 # roots that are not ancestors.
2250 # roots that are not ancestors.
2248
2251
2249 # If one of the roots was nullid, everything is included anyway.
2252 # If one of the roots was nullid, everything is included anyway.
2250 if lowestrev > nullrev:
2253 if lowestrev > nullrev:
2251 # But, since we weren't, let's recompute the lowest rev to not
2254 # But, since we weren't, let's recompute the lowest rev to not
2252 # include roots that aren't ancestors.
2255 # include roots that aren't ancestors.
2253
2256
2254 # Filter out roots that aren't ancestors of heads
2257 # Filter out roots that aren't ancestors of heads
2255 roots = [root for root in roots if root in ancestors]
2258 roots = [root for root in roots if root in ancestors]
2256 # Recompute the lowest revision
2259 # Recompute the lowest revision
2257 if roots:
2260 if roots:
2258 lowestrev = min([self.rev(root) for root in roots])
2261 lowestrev = min([self.rev(root) for root in roots])
2259 else:
2262 else:
2260 # No more roots? Return empty list
2263 # No more roots? Return empty list
2261 return nonodes
2264 return nonodes
2262 else:
2265 else:
2263 # We are descending from nullid, and don't need to care about
2266 # We are descending from nullid, and don't need to care about
2264 # any other roots.
2267 # any other roots.
2265 lowestrev = nullrev
2268 lowestrev = nullrev
2266 roots = [self.nullid]
2269 roots = [self.nullid]
2267 # Transform our roots list into a set.
2270 # Transform our roots list into a set.
2268 descendants = set(roots)
2271 descendants = set(roots)
2269 # Also, keep the original roots so we can filter out roots that aren't
2272 # Also, keep the original roots so we can filter out roots that aren't
2270 # 'real' roots (i.e. are descended from other roots).
2273 # 'real' roots (i.e. are descended from other roots).
2271 roots = descendants.copy()
2274 roots = descendants.copy()
2272 # Our topologically sorted list of output nodes.
2275 # Our topologically sorted list of output nodes.
2273 orderedout = []
2276 orderedout = []
2274 # Don't start at nullid since we don't want nullid in our output list,
2277 # Don't start at nullid since we don't want nullid in our output list,
2275 # and if nullid shows up in descendants, empty parents will look like
2278 # and if nullid shows up in descendants, empty parents will look like
2276 # they're descendants.
2279 # they're descendants.
2277 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2280 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2278 n = self.node(r)
2281 n = self.node(r)
2279 isdescendant = False
2282 isdescendant = False
2280 if lowestrev == nullrev: # Everybody is a descendant of nullid
2283 if lowestrev == nullrev: # Everybody is a descendant of nullid
2281 isdescendant = True
2284 isdescendant = True
2282 elif n in descendants:
2285 elif n in descendants:
2283 # n is already a descendant
2286 # n is already a descendant
2284 isdescendant = True
2287 isdescendant = True
2285 # This check only needs to be done here because all the roots
2288 # This check only needs to be done here because all the roots
2286 # will start being marked is descendants before the loop.
2289 # will start being marked is descendants before the loop.
2287 if n in roots:
2290 if n in roots:
2288 # If n was a root, check if it's a 'real' root.
2291 # If n was a root, check if it's a 'real' root.
2289 p = tuple(self.parents(n))
2292 p = tuple(self.parents(n))
2290 # If any of its parents are descendants, it's not a root.
2293 # If any of its parents are descendants, it's not a root.
2291 if (p[0] in descendants) or (p[1] in descendants):
2294 if (p[0] in descendants) or (p[1] in descendants):
2292 roots.remove(n)
2295 roots.remove(n)
2293 else:
2296 else:
2294 p = tuple(self.parents(n))
2297 p = tuple(self.parents(n))
2295 # A node is a descendant if either of its parents are
2298 # A node is a descendant if either of its parents are
2296 # descendants. (We seeded the dependents list with the roots
2299 # descendants. (We seeded the dependents list with the roots
2297 # up there, remember?)
2300 # up there, remember?)
2298 if (p[0] in descendants) or (p[1] in descendants):
2301 if (p[0] in descendants) or (p[1] in descendants):
2299 descendants.add(n)
2302 descendants.add(n)
2300 isdescendant = True
2303 isdescendant = True
2301 if isdescendant and ((ancestors is None) or (n in ancestors)):
2304 if isdescendant and ((ancestors is None) or (n in ancestors)):
2302 # Only include nodes that are both descendants and ancestors.
2305 # Only include nodes that are both descendants and ancestors.
2303 orderedout.append(n)
2306 orderedout.append(n)
2304 if (ancestors is not None) and (n in heads):
2307 if (ancestors is not None) and (n in heads):
2305 # We're trying to figure out which heads are reachable
2308 # We're trying to figure out which heads are reachable
2306 # from roots.
2309 # from roots.
2307 # Mark this head as having been reached
2310 # Mark this head as having been reached
2308 heads[n] = True
2311 heads[n] = True
2309 elif ancestors is None:
2312 elif ancestors is None:
2310 # Otherwise, we're trying to discover the heads.
2313 # Otherwise, we're trying to discover the heads.
2311 # Assume this is a head because if it isn't, the next step
2314 # Assume this is a head because if it isn't, the next step
2312 # will eventually remove it.
2315 # will eventually remove it.
2313 heads[n] = True
2316 heads[n] = True
2314 # But, obviously its parents aren't.
2317 # But, obviously its parents aren't.
2315 for p in self.parents(n):
2318 for p in self.parents(n):
2316 heads.pop(p, None)
2319 heads.pop(p, None)
2317 heads = [head for head, flag in heads.items() if flag]
2320 heads = [head for head, flag in heads.items() if flag]
2318 roots = list(roots)
2321 roots = list(roots)
2319 assert orderedout
2322 assert orderedout
2320 assert roots
2323 assert roots
2321 assert heads
2324 assert heads
2322 return (orderedout, roots, heads)
2325 return (orderedout, roots, heads)
2323
2326
2324 def headrevs(self, revs=None):
2327 def headrevs(self, revs=None):
2325 if revs is None:
2328 if revs is None:
2326 try:
2329 try:
2327 return self.index.headrevs()
2330 return self.index.headrevs()
2328 except AttributeError:
2331 except AttributeError:
2329 return self._headrevs()
2332 return self._headrevs()
2330 if rustdagop is not None and self.index.rust_ext_compat:
2333 if rustdagop is not None and self.index.rust_ext_compat:
2331 return rustdagop.headrevs(self.index, revs)
2334 return rustdagop.headrevs(self.index, revs)
2332 return dagop.headrevs(revs, self._uncheckedparentrevs)
2335 return dagop.headrevs(revs, self._uncheckedparentrevs)
2333
2336
2334 def computephases(self, roots):
2337 def computephases(self, roots):
2335 return self.index.computephasesmapsets(roots)
2338 return self.index.computephasesmapsets(roots)
2336
2339
2337 def _headrevs(self):
2340 def _headrevs(self):
2338 count = len(self)
2341 count = len(self)
2339 if not count:
2342 if not count:
2340 return [nullrev]
2343 return [nullrev]
2341 # we won't iter over filtered rev so nobody is a head at start
2344 # we won't iter over filtered rev so nobody is a head at start
2342 ishead = [0] * (count + 1)
2345 ishead = [0] * (count + 1)
2343 index = self.index
2346 index = self.index
2344 for r in self:
2347 for r in self:
2345 ishead[r] = 1 # I may be an head
2348 ishead[r] = 1 # I may be an head
2346 e = index[r]
2349 e = index[r]
2347 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2350 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2348 return [r for r, val in enumerate(ishead) if val]
2351 return [r for r, val in enumerate(ishead) if val]
2349
2352
2350 def heads(self, start=None, stop=None):
2353 def heads(self, start=None, stop=None):
2351 """return the list of all nodes that have no children
2354 """return the list of all nodes that have no children
2352
2355
2353 if start is specified, only heads that are descendants of
2356 if start is specified, only heads that are descendants of
2354 start will be returned
2357 start will be returned
2355 if stop is specified, it will consider all the revs from stop
2358 if stop is specified, it will consider all the revs from stop
2356 as if they had no children
2359 as if they had no children
2357 """
2360 """
2358 if start is None and stop is None:
2361 if start is None and stop is None:
2359 if not len(self):
2362 if not len(self):
2360 return [self.nullid]
2363 return [self.nullid]
2361 return [self.node(r) for r in self.headrevs()]
2364 return [self.node(r) for r in self.headrevs()]
2362
2365
2363 if start is None:
2366 if start is None:
2364 start = nullrev
2367 start = nullrev
2365 else:
2368 else:
2366 start = self.rev(start)
2369 start = self.rev(start)
2367
2370
2368 stoprevs = {self.rev(n) for n in stop or []}
2371 stoprevs = {self.rev(n) for n in stop or []}
2369
2372
2370 revs = dagop.headrevssubset(
2373 revs = dagop.headrevssubset(
2371 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2374 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2372 )
2375 )
2373
2376
2374 return [self.node(rev) for rev in revs]
2377 return [self.node(rev) for rev in revs]
2375
2378
2376 def children(self, node):
2379 def children(self, node):
2377 """find the children of a given node"""
2380 """find the children of a given node"""
2378 c = []
2381 c = []
2379 p = self.rev(node)
2382 p = self.rev(node)
2380 for r in self.revs(start=p + 1):
2383 for r in self.revs(start=p + 1):
2381 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2384 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2382 if prevs:
2385 if prevs:
2383 for pr in prevs:
2386 for pr in prevs:
2384 if pr == p:
2387 if pr == p:
2385 c.append(self.node(r))
2388 c.append(self.node(r))
2386 elif p == nullrev:
2389 elif p == nullrev:
2387 c.append(self.node(r))
2390 c.append(self.node(r))
2388 return c
2391 return c
2389
2392
2390 def commonancestorsheads(self, a, b):
2393 def commonancestorsheads(self, a, b):
2391 """calculate all the heads of the common ancestors of nodes a and b"""
2394 """calculate all the heads of the common ancestors of nodes a and b"""
2392 a, b = self.rev(a), self.rev(b)
2395 a, b = self.rev(a), self.rev(b)
2393 ancs = self._commonancestorsheads(a, b)
2396 ancs = self._commonancestorsheads(a, b)
2394 return pycompat.maplist(self.node, ancs)
2397 return pycompat.maplist(self.node, ancs)
2395
2398
2396 def _commonancestorsheads(self, *revs):
2399 def _commonancestorsheads(self, *revs):
2397 """calculate all the heads of the common ancestors of revs"""
2400 """calculate all the heads of the common ancestors of revs"""
2398 try:
2401 try:
2399 ancs = self.index.commonancestorsheads(*revs)
2402 ancs = self.index.commonancestorsheads(*revs)
2400 except (AttributeError, OverflowError): # C implementation failed
2403 except (AttributeError, OverflowError): # C implementation failed
2401 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2404 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2402 return ancs
2405 return ancs
2403
2406
2404 def isancestor(self, a, b):
2407 def isancestor(self, a, b):
2405 """return True if node a is an ancestor of node b
2408 """return True if node a is an ancestor of node b
2406
2409
2407 A revision is considered an ancestor of itself."""
2410 A revision is considered an ancestor of itself."""
2408 a, b = self.rev(a), self.rev(b)
2411 a, b = self.rev(a), self.rev(b)
2409 return self.isancestorrev(a, b)
2412 return self.isancestorrev(a, b)
2410
2413
2411 def isancestorrev(self, a, b):
2414 def isancestorrev(self, a, b):
2412 """return True if revision a is an ancestor of revision b
2415 """return True if revision a is an ancestor of revision b
2413
2416
2414 A revision is considered an ancestor of itself.
2417 A revision is considered an ancestor of itself.
2415
2418
2416 The implementation of this is trivial but the use of
2419 The implementation of this is trivial but the use of
2417 reachableroots is not."""
2420 reachableroots is not."""
2418 if a == nullrev:
2421 if a == nullrev:
2419 return True
2422 return True
2420 elif a == b:
2423 elif a == b:
2421 return True
2424 return True
2422 elif a > b:
2425 elif a > b:
2423 return False
2426 return False
2424 return bool(self.reachableroots(a, [b], [a], includepath=False))
2427 return bool(self.reachableroots(a, [b], [a], includepath=False))
2425
2428
2426 def reachableroots(self, minroot, heads, roots, includepath=False):
2429 def reachableroots(self, minroot, heads, roots, includepath=False):
2427 """return (heads(::(<roots> and <roots>::<heads>)))
2430 """return (heads(::(<roots> and <roots>::<heads>)))
2428
2431
2429 If includepath is True, return (<roots>::<heads>)."""
2432 If includepath is True, return (<roots>::<heads>)."""
2430 try:
2433 try:
2431 return self.index.reachableroots2(
2434 return self.index.reachableroots2(
2432 minroot, heads, roots, includepath
2435 minroot, heads, roots, includepath
2433 )
2436 )
2434 except AttributeError:
2437 except AttributeError:
2435 return dagop._reachablerootspure(
2438 return dagop._reachablerootspure(
2436 self.parentrevs, minroot, roots, heads, includepath
2439 self.parentrevs, minroot, roots, heads, includepath
2437 )
2440 )
2438
2441
2439 def ancestor(self, a, b):
2442 def ancestor(self, a, b):
2440 """calculate the "best" common ancestor of nodes a and b"""
2443 """calculate the "best" common ancestor of nodes a and b"""
2441
2444
2442 a, b = self.rev(a), self.rev(b)
2445 a, b = self.rev(a), self.rev(b)
2443 try:
2446 try:
2444 ancs = self.index.ancestors(a, b)
2447 ancs = self.index.ancestors(a, b)
2445 except (AttributeError, OverflowError):
2448 except (AttributeError, OverflowError):
2446 ancs = ancestor.ancestors(self.parentrevs, a, b)
2449 ancs = ancestor.ancestors(self.parentrevs, a, b)
2447 if ancs:
2450 if ancs:
2448 # choose a consistent winner when there's a tie
2451 # choose a consistent winner when there's a tie
2449 return min(map(self.node, ancs))
2452 return min(map(self.node, ancs))
2450 return self.nullid
2453 return self.nullid
2451
2454
2452 def _match(self, id):
2455 def _match(self, id):
2453 if isinstance(id, int):
2456 if isinstance(id, int):
2454 # rev
2457 # rev
2455 return self.node(id)
2458 return self.node(id)
2456 if len(id) == self.nodeconstants.nodelen:
2459 if len(id) == self.nodeconstants.nodelen:
2457 # possibly a binary node
2460 # possibly a binary node
2458 # odds of a binary node being all hex in ASCII are 1 in 10**25
2461 # odds of a binary node being all hex in ASCII are 1 in 10**25
2459 try:
2462 try:
2460 node = id
2463 node = id
2461 self.rev(node) # quick search the index
2464 self.rev(node) # quick search the index
2462 return node
2465 return node
2463 except error.LookupError:
2466 except error.LookupError:
2464 pass # may be partial hex id
2467 pass # may be partial hex id
2465 try:
2468 try:
2466 # str(rev)
2469 # str(rev)
2467 rev = int(id)
2470 rev = int(id)
2468 if b"%d" % rev != id:
2471 if b"%d" % rev != id:
2469 raise ValueError
2472 raise ValueError
2470 if rev < 0:
2473 if rev < 0:
2471 rev = len(self) + rev
2474 rev = len(self) + rev
2472 if rev < 0 or rev >= len(self):
2475 if rev < 0 or rev >= len(self):
2473 raise ValueError
2476 raise ValueError
2474 return self.node(rev)
2477 return self.node(rev)
2475 except (ValueError, OverflowError):
2478 except (ValueError, OverflowError):
2476 pass
2479 pass
2477 if len(id) == 2 * self.nodeconstants.nodelen:
2480 if len(id) == 2 * self.nodeconstants.nodelen:
2478 try:
2481 try:
2479 # a full hex nodeid?
2482 # a full hex nodeid?
2480 node = bin(id)
2483 node = bin(id)
2481 self.rev(node)
2484 self.rev(node)
2482 return node
2485 return node
2483 except (binascii.Error, error.LookupError):
2486 except (binascii.Error, error.LookupError):
2484 pass
2487 pass
2485
2488
2486 def _partialmatch(self, id):
2489 def _partialmatch(self, id):
2487 # we don't care wdirfilenodeids as they should be always full hash
2490 # we don't care wdirfilenodeids as they should be always full hash
2488 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2491 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2489 ambiguous = False
2492 ambiguous = False
2490 try:
2493 try:
2491 partial = self.index.partialmatch(id)
2494 partial = self.index.partialmatch(id)
2492 if partial and self.hasnode(partial):
2495 if partial and self.hasnode(partial):
2493 if maybewdir:
2496 if maybewdir:
2494 # single 'ff...' match in radix tree, ambiguous with wdir
2497 # single 'ff...' match in radix tree, ambiguous with wdir
2495 ambiguous = True
2498 ambiguous = True
2496 else:
2499 else:
2497 return partial
2500 return partial
2498 elif maybewdir:
2501 elif maybewdir:
2499 # no 'ff...' match in radix tree, wdir identified
2502 # no 'ff...' match in radix tree, wdir identified
2500 raise error.WdirUnsupported
2503 raise error.WdirUnsupported
2501 else:
2504 else:
2502 return None
2505 return None
2503 except error.RevlogError:
2506 except error.RevlogError:
2504 # parsers.c radix tree lookup gave multiple matches
2507 # parsers.c radix tree lookup gave multiple matches
2505 # fast path: for unfiltered changelog, radix tree is accurate
2508 # fast path: for unfiltered changelog, radix tree is accurate
2506 if not getattr(self, 'filteredrevs', None):
2509 if not getattr(self, 'filteredrevs', None):
2507 ambiguous = True
2510 ambiguous = True
2508 # fall through to slow path that filters hidden revisions
2511 # fall through to slow path that filters hidden revisions
2509 except (AttributeError, ValueError):
2512 except (AttributeError, ValueError):
2510 # we are pure python, or key is not hex
2513 # we are pure python, or key is not hex
2511 pass
2514 pass
2512 if ambiguous:
2515 if ambiguous:
2513 raise error.AmbiguousPrefixLookupError(
2516 raise error.AmbiguousPrefixLookupError(
2514 id, self.display_id, _(b'ambiguous identifier')
2517 id, self.display_id, _(b'ambiguous identifier')
2515 )
2518 )
2516
2519
2517 if id in self._pcache:
2520 if id in self._pcache:
2518 return self._pcache[id]
2521 return self._pcache[id]
2519
2522
2520 if len(id) <= 40:
2523 if len(id) <= 40:
2521 # hex(node)[:...]
2524 # hex(node)[:...]
2522 l = len(id) // 2 * 2 # grab an even number of digits
2525 l = len(id) // 2 * 2 # grab an even number of digits
2523 try:
2526 try:
2524 # we're dropping the last digit, so let's check that it's hex,
2527 # we're dropping the last digit, so let's check that it's hex,
2525 # to avoid the expensive computation below if it's not
2528 # to avoid the expensive computation below if it's not
2526 if len(id) % 2 > 0:
2529 if len(id) % 2 > 0:
2527 if not (id[-1] in hexdigits):
2530 if not (id[-1] in hexdigits):
2528 return None
2531 return None
2529 prefix = bin(id[:l])
2532 prefix = bin(id[:l])
2530 except binascii.Error:
2533 except binascii.Error:
2531 pass
2534 pass
2532 else:
2535 else:
2533 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2536 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2534 nl = [
2537 nl = [
2535 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2538 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2536 ]
2539 ]
2537 if self.nodeconstants.nullhex.startswith(id):
2540 if self.nodeconstants.nullhex.startswith(id):
2538 nl.append(self.nullid)
2541 nl.append(self.nullid)
2539 if len(nl) > 0:
2542 if len(nl) > 0:
2540 if len(nl) == 1 and not maybewdir:
2543 if len(nl) == 1 and not maybewdir:
2541 self._pcache[id] = nl[0]
2544 self._pcache[id] = nl[0]
2542 return nl[0]
2545 return nl[0]
2543 raise error.AmbiguousPrefixLookupError(
2546 raise error.AmbiguousPrefixLookupError(
2544 id, self.display_id, _(b'ambiguous identifier')
2547 id, self.display_id, _(b'ambiguous identifier')
2545 )
2548 )
2546 if maybewdir:
2549 if maybewdir:
2547 raise error.WdirUnsupported
2550 raise error.WdirUnsupported
2548 return None
2551 return None
2549
2552
2550 def lookup(self, id):
2553 def lookup(self, id):
2551 """locate a node based on:
2554 """locate a node based on:
2552 - revision number or str(revision number)
2555 - revision number or str(revision number)
2553 - nodeid or subset of hex nodeid
2556 - nodeid or subset of hex nodeid
2554 """
2557 """
2555 n = self._match(id)
2558 n = self._match(id)
2556 if n is not None:
2559 if n is not None:
2557 return n
2560 return n
2558 n = self._partialmatch(id)
2561 n = self._partialmatch(id)
2559 if n:
2562 if n:
2560 return n
2563 return n
2561
2564
2562 raise error.LookupError(id, self.display_id, _(b'no match found'))
2565 raise error.LookupError(id, self.display_id, _(b'no match found'))
2563
2566
2564 def shortest(self, node, minlength=1):
2567 def shortest(self, node, minlength=1):
2565 """Find the shortest unambiguous prefix that matches node."""
2568 """Find the shortest unambiguous prefix that matches node."""
2566
2569
2567 def isvalid(prefix):
2570 def isvalid(prefix):
2568 try:
2571 try:
2569 matchednode = self._partialmatch(prefix)
2572 matchednode = self._partialmatch(prefix)
2570 except error.AmbiguousPrefixLookupError:
2573 except error.AmbiguousPrefixLookupError:
2571 return False
2574 return False
2572 except error.WdirUnsupported:
2575 except error.WdirUnsupported:
2573 # single 'ff...' match
2576 # single 'ff...' match
2574 return True
2577 return True
2575 if matchednode is None:
2578 if matchednode is None:
2576 raise error.LookupError(node, self.display_id, _(b'no node'))
2579 raise error.LookupError(node, self.display_id, _(b'no node'))
2577 return True
2580 return True
2578
2581
2579 def maybewdir(prefix):
2582 def maybewdir(prefix):
2580 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2583 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2581
2584
2582 hexnode = hex(node)
2585 hexnode = hex(node)
2583
2586
2584 def disambiguate(hexnode, minlength):
2587 def disambiguate(hexnode, minlength):
2585 """Disambiguate against wdirid."""
2588 """Disambiguate against wdirid."""
2586 for length in range(minlength, len(hexnode) + 1):
2589 for length in range(minlength, len(hexnode) + 1):
2587 prefix = hexnode[:length]
2590 prefix = hexnode[:length]
2588 if not maybewdir(prefix):
2591 if not maybewdir(prefix):
2589 return prefix
2592 return prefix
2590
2593
2591 if not getattr(self, 'filteredrevs', None):
2594 if not getattr(self, 'filteredrevs', None):
2592 try:
2595 try:
2593 length = max(self.index.shortest(node), minlength)
2596 length = max(self.index.shortest(node), minlength)
2594 return disambiguate(hexnode, length)
2597 return disambiguate(hexnode, length)
2595 except error.RevlogError:
2598 except error.RevlogError:
2596 if node != self.nodeconstants.wdirid:
2599 if node != self.nodeconstants.wdirid:
2597 raise error.LookupError(
2600 raise error.LookupError(
2598 node, self.display_id, _(b'no node')
2601 node, self.display_id, _(b'no node')
2599 )
2602 )
2600 except AttributeError:
2603 except AttributeError:
2601 # Fall through to pure code
2604 # Fall through to pure code
2602 pass
2605 pass
2603
2606
2604 if node == self.nodeconstants.wdirid:
2607 if node == self.nodeconstants.wdirid:
2605 for length in range(minlength, len(hexnode) + 1):
2608 for length in range(minlength, len(hexnode) + 1):
2606 prefix = hexnode[:length]
2609 prefix = hexnode[:length]
2607 if isvalid(prefix):
2610 if isvalid(prefix):
2608 return prefix
2611 return prefix
2609
2612
2610 for length in range(minlength, len(hexnode) + 1):
2613 for length in range(minlength, len(hexnode) + 1):
2611 prefix = hexnode[:length]
2614 prefix = hexnode[:length]
2612 if isvalid(prefix):
2615 if isvalid(prefix):
2613 return disambiguate(hexnode, length)
2616 return disambiguate(hexnode, length)
2614
2617
2615 def cmp(self, node, text):
2618 def cmp(self, node, text):
2616 """compare text with a given file revision
2619 """compare text with a given file revision
2617
2620
2618 returns True if text is different than what is stored.
2621 returns True if text is different than what is stored.
2619 """
2622 """
2620 p1, p2 = self.parents(node)
2623 p1, p2 = self.parents(node)
2621 return storageutil.hashrevisionsha1(text, p1, p2) != node
2624 return storageutil.hashrevisionsha1(text, p1, p2) != node
2622
2625
2623 def deltaparent(self, rev):
2626 def deltaparent(self, rev):
2624 """return deltaparent of the given revision"""
2627 """return deltaparent of the given revision"""
2625 base = self.index[rev][3]
2628 base = self.index[rev][3]
2626 if base == rev:
2629 if base == rev:
2627 return nullrev
2630 return nullrev
2628 elif self.delta_config.general_delta:
2631 elif self.delta_config.general_delta:
2629 return base
2632 return base
2630 else:
2633 else:
2631 return rev - 1
2634 return rev - 1
2632
2635
2633 def issnapshot(self, rev):
2636 def issnapshot(self, rev):
2634 """tells whether rev is a snapshot"""
2637 """tells whether rev is a snapshot"""
2635 ret = self._inner.issnapshot(rev)
2638 ret = self._inner.issnapshot(rev)
2636 self.issnapshot = self._inner.issnapshot
2639 self.issnapshot = self._inner.issnapshot
2637 return ret
2640 return ret
2638
2641
2639 def snapshotdepth(self, rev):
2642 def snapshotdepth(self, rev):
2640 """number of snapshot in the chain before this one"""
2643 """number of snapshot in the chain before this one"""
2641 if not self.issnapshot(rev):
2644 if not self.issnapshot(rev):
2642 raise error.ProgrammingError(b'revision %d not a snapshot')
2645 raise error.ProgrammingError(b'revision %d not a snapshot')
2643 return len(self._inner._deltachain(rev)[0]) - 1
2646 return len(self._inner._deltachain(rev)[0]) - 1
2644
2647
2645 def revdiff(self, rev1, rev2):
2648 def revdiff(self, rev1, rev2):
2646 """return or calculate a delta between two revisions
2649 """return or calculate a delta between two revisions
2647
2650
2648 The delta calculated is in binary form and is intended to be written to
2651 The delta calculated is in binary form and is intended to be written to
2649 revlog data directly. So this function needs raw revision data.
2652 revlog data directly. So this function needs raw revision data.
2650 """
2653 """
2651 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2654 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2652 return bytes(self._inner._chunk(rev2))
2655 return bytes(self._inner._chunk(rev2))
2653
2656
2654 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2657 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2655
2658
2656 def revision(self, nodeorrev):
2659 def revision(self, nodeorrev):
2657 """return an uncompressed revision of a given node or revision
2660 """return an uncompressed revision of a given node or revision
2658 number.
2661 number.
2659 """
2662 """
2660 return self._revisiondata(nodeorrev)
2663 return self._revisiondata(nodeorrev)
2661
2664
2662 def sidedata(self, nodeorrev):
2665 def sidedata(self, nodeorrev):
2663 """a map of extra data related to the changeset but not part of the hash
2666 """a map of extra data related to the changeset but not part of the hash
2664
2667
2665 This function currently return a dictionary. However, more advanced
2668 This function currently return a dictionary. However, more advanced
2666 mapping object will likely be used in the future for a more
2669 mapping object will likely be used in the future for a more
2667 efficient/lazy code.
2670 efficient/lazy code.
2668 """
2671 """
2669 # deal with <nodeorrev> argument type
2672 # deal with <nodeorrev> argument type
2670 if isinstance(nodeorrev, int):
2673 if isinstance(nodeorrev, int):
2671 rev = nodeorrev
2674 rev = nodeorrev
2672 else:
2675 else:
2673 rev = self.rev(nodeorrev)
2676 rev = self.rev(nodeorrev)
2674 return self._sidedata(rev)
2677 return self._sidedata(rev)
2675
2678
2676 def _rawtext(self, node, rev):
2679 def _rawtext(self, node, rev):
2677 """return the possibly unvalidated rawtext for a revision
2680 """return the possibly unvalidated rawtext for a revision
2678
2681
2679 returns (rev, rawtext, validated)
2682 returns (rev, rawtext, validated)
2680 """
2683 """
2681 # Check if we have the entry in cache
2684 # Check if we have the entry in cache
2682 # The cache entry looks like (node, rev, rawtext)
2685 # The cache entry looks like (node, rev, rawtext)
2683 if self._inner._revisioncache:
2686 if self._inner._revisioncache:
2684 if self._inner._revisioncache[0] == node:
2687 if self._inner._revisioncache[0] == node:
2685 return (rev, self._inner._revisioncache[2], True)
2688 return (rev, self._inner._revisioncache[2], True)
2686
2689
2687 if rev is None:
2690 if rev is None:
2688 rev = self.rev(node)
2691 rev = self.rev(node)
2689
2692
2690 return self._inner.raw_text(node, rev)
2693 return self._inner.raw_text(node, rev)
2691
2694
2692 def _revisiondata(self, nodeorrev, raw=False):
2695 def _revisiondata(self, nodeorrev, raw=False):
2693 # deal with <nodeorrev> argument type
2696 # deal with <nodeorrev> argument type
2694 if isinstance(nodeorrev, int):
2697 if isinstance(nodeorrev, int):
2695 rev = nodeorrev
2698 rev = nodeorrev
2696 node = self.node(rev)
2699 node = self.node(rev)
2697 else:
2700 else:
2698 node = nodeorrev
2701 node = nodeorrev
2699 rev = None
2702 rev = None
2700
2703
2701 # fast path the special `nullid` rev
2704 # fast path the special `nullid` rev
2702 if node == self.nullid:
2705 if node == self.nullid:
2703 return b""
2706 return b""
2704
2707
2705 # ``rawtext`` is the text as stored inside the revlog. Might be the
2708 # ``rawtext`` is the text as stored inside the revlog. Might be the
2706 # revision or might need to be processed to retrieve the revision.
2709 # revision or might need to be processed to retrieve the revision.
2707 rev, rawtext, validated = self._rawtext(node, rev)
2710 rev, rawtext, validated = self._rawtext(node, rev)
2708
2711
2709 if raw and validated:
2712 if raw and validated:
2710 # if we don't want to process the raw text and that raw
2713 # if we don't want to process the raw text and that raw
2711 # text is cached, we can exit early.
2714 # text is cached, we can exit early.
2712 return rawtext
2715 return rawtext
2713 if rev is None:
2716 if rev is None:
2714 rev = self.rev(node)
2717 rev = self.rev(node)
2715 # the revlog's flag for this revision
2718 # the revlog's flag for this revision
2716 # (usually alter its state or content)
2719 # (usually alter its state or content)
2717 flags = self.flags(rev)
2720 flags = self.flags(rev)
2718
2721
2719 if validated and flags == REVIDX_DEFAULT_FLAGS:
2722 if validated and flags == REVIDX_DEFAULT_FLAGS:
2720 # no extra flags set, no flag processor runs, text = rawtext
2723 # no extra flags set, no flag processor runs, text = rawtext
2721 return rawtext
2724 return rawtext
2722
2725
2723 if raw:
2726 if raw:
2724 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2727 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2725 text = rawtext
2728 text = rawtext
2726 else:
2729 else:
2727 r = flagutil.processflagsread(self, rawtext, flags)
2730 r = flagutil.processflagsread(self, rawtext, flags)
2728 text, validatehash = r
2731 text, validatehash = r
2729 if validatehash:
2732 if validatehash:
2730 self.checkhash(text, node, rev=rev)
2733 self.checkhash(text, node, rev=rev)
2731 if not validated:
2734 if not validated:
2732 self._inner._revisioncache = (node, rev, rawtext)
2735 self._inner._revisioncache = (node, rev, rawtext)
2733
2736
2734 return text
2737 return text
2735
2738
2736 def _sidedata(self, rev):
2739 def _sidedata(self, rev):
2737 """Return the sidedata for a given revision number."""
2740 """Return the sidedata for a given revision number."""
2738 sidedata_end = None
2741 sidedata_end = None
2739 if self._docket is not None:
2742 if self._docket is not None:
2740 sidedata_end = self._docket.sidedata_end
2743 sidedata_end = self._docket.sidedata_end
2741 return self._inner.sidedata(rev, sidedata_end)
2744 return self._inner.sidedata(rev, sidedata_end)
2742
2745
2743 def rawdata(self, nodeorrev):
2746 def rawdata(self, nodeorrev):
2744 """return an uncompressed raw data of a given node or revision number."""
2747 """return an uncompressed raw data of a given node or revision number."""
2745 return self._revisiondata(nodeorrev, raw=True)
2748 return self._revisiondata(nodeorrev, raw=True)
2746
2749
2747 def hash(self, text, p1, p2):
2750 def hash(self, text, p1, p2):
2748 """Compute a node hash.
2751 """Compute a node hash.
2749
2752
2750 Available as a function so that subclasses can replace the hash
2753 Available as a function so that subclasses can replace the hash
2751 as needed.
2754 as needed.
2752 """
2755 """
2753 return storageutil.hashrevisionsha1(text, p1, p2)
2756 return storageutil.hashrevisionsha1(text, p1, p2)
2754
2757
2755 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2758 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2756 """Check node hash integrity.
2759 """Check node hash integrity.
2757
2760
2758 Available as a function so that subclasses can extend hash mismatch
2761 Available as a function so that subclasses can extend hash mismatch
2759 behaviors as needed.
2762 behaviors as needed.
2760 """
2763 """
2761 try:
2764 try:
2762 if p1 is None and p2 is None:
2765 if p1 is None and p2 is None:
2763 p1, p2 = self.parents(node)
2766 p1, p2 = self.parents(node)
2764 if node != self.hash(text, p1, p2):
2767 if node != self.hash(text, p1, p2):
2765 # Clear the revision cache on hash failure. The revision cache
2768 # Clear the revision cache on hash failure. The revision cache
2766 # only stores the raw revision and clearing the cache does have
2769 # only stores the raw revision and clearing the cache does have
2767 # the side-effect that we won't have a cache hit when the raw
2770 # the side-effect that we won't have a cache hit when the raw
2768 # revision data is accessed. But this case should be rare and
2771 # revision data is accessed. But this case should be rare and
2769 # it is extra work to teach the cache about the hash
2772 # it is extra work to teach the cache about the hash
2770 # verification state.
2773 # verification state.
2771 if (
2774 if (
2772 self._inner._revisioncache
2775 self._inner._revisioncache
2773 and self._inner._revisioncache[0] == node
2776 and self._inner._revisioncache[0] == node
2774 ):
2777 ):
2775 self._inner._revisioncache = None
2778 self._inner._revisioncache = None
2776
2779
2777 revornode = rev
2780 revornode = rev
2778 if revornode is None:
2781 if revornode is None:
2779 revornode = templatefilters.short(hex(node))
2782 revornode = templatefilters.short(hex(node))
2780 raise error.RevlogError(
2783 raise error.RevlogError(
2781 _(b"integrity check failed on %s:%s")
2784 _(b"integrity check failed on %s:%s")
2782 % (self.display_id, pycompat.bytestr(revornode))
2785 % (self.display_id, pycompat.bytestr(revornode))
2783 )
2786 )
2784 except error.RevlogError:
2787 except error.RevlogError:
2785 if self.feature_config.censorable and storageutil.iscensoredtext(
2788 if self.feature_config.censorable and storageutil.iscensoredtext(
2786 text
2789 text
2787 ):
2790 ):
2788 raise error.CensoredNodeError(self.display_id, node, text)
2791 raise error.CensoredNodeError(self.display_id, node, text)
2789 raise
2792 raise
2790
2793
2791 @property
2794 @property
2792 def _split_index_file(self):
2795 def _split_index_file(self):
2793 """the path where to expect the index of an ongoing splitting operation
2796 """the path where to expect the index of an ongoing splitting operation
2794
2797
2795 The file will only exist if a splitting operation is in progress, but
2798 The file will only exist if a splitting operation is in progress, but
2796 it is always expected at the same location."""
2799 it is always expected at the same location."""
2797 parts = self.radix.split(b'/')
2800 parts = self.radix.split(b'/')
2798 if len(parts) > 1:
2801 if len(parts) > 1:
2799 # adds a '-s' prefix to the ``data/` or `meta/` base
2802 # adds a '-s' prefix to the ``data/` or `meta/` base
2800 head = parts[0] + b'-s'
2803 head = parts[0] + b'-s'
2801 mids = parts[1:-1]
2804 mids = parts[1:-1]
2802 tail = parts[-1] + b'.i'
2805 tail = parts[-1] + b'.i'
2803 pieces = [head] + mids + [tail]
2806 pieces = [head] + mids + [tail]
2804 return b'/'.join(pieces)
2807 return b'/'.join(pieces)
2805 else:
2808 else:
2806 # the revlog is stored at the root of the store (changelog or
2809 # the revlog is stored at the root of the store (changelog or
2807 # manifest), no risk of collision.
2810 # manifest), no risk of collision.
2808 return self.radix + b'.i.s'
2811 return self.radix + b'.i.s'
2809
2812
2810 def _enforceinlinesize(self, tr, side_write=True):
2813 def _enforceinlinesize(self, tr, side_write=True):
2811 """Check if the revlog is too big for inline and convert if so.
2814 """Check if the revlog is too big for inline and convert if so.
2812
2815
2813 This should be called after revisions are added to the revlog. If the
2816 This should be called after revisions are added to the revlog. If the
2814 revlog has grown too large to be an inline revlog, it will convert it
2817 revlog has grown too large to be an inline revlog, it will convert it
2815 to use multiple index and data files.
2818 to use multiple index and data files.
2816 """
2819 """
2817 tiprev = len(self) - 1
2820 tiprev = len(self) - 1
2818 total_size = self.start(tiprev) + self.length(tiprev)
2821 total_size = self.start(tiprev) + self.length(tiprev)
2819 if not self._inline or total_size < _maxinline:
2822 if not self._inline or total_size < _maxinline:
2820 return
2823 return
2821
2824
2822 if self._docket is not None:
2825 if self._docket is not None:
2823 msg = b"inline revlog should not have a docket"
2826 msg = b"inline revlog should not have a docket"
2824 raise error.ProgrammingError(msg)
2827 raise error.ProgrammingError(msg)
2825
2828
2826 troffset = tr.findoffset(self._indexfile)
2829 troffset = tr.findoffset(self._indexfile)
2827 if troffset is None:
2830 if troffset is None:
2828 raise error.RevlogError(
2831 raise error.RevlogError(
2829 _(b"%s not found in the transaction") % self._indexfile
2832 _(b"%s not found in the transaction") % self._indexfile
2830 )
2833 )
2831 if troffset:
2834 if troffset:
2832 tr.addbackup(self._indexfile, for_offset=True)
2835 tr.addbackup(self._indexfile, for_offset=True)
2833 tr.add(self._datafile, 0)
2836 tr.add(self._datafile, 0)
2834
2837
2835 new_index_file_path = None
2838 new_index_file_path = None
2836 if side_write:
2839 if side_write:
2837 old_index_file_path = self._indexfile
2840 old_index_file_path = self._indexfile
2838 new_index_file_path = self._split_index_file
2841 new_index_file_path = self._split_index_file
2839 opener = self.opener
2842 opener = self.opener
2840 weak_self = weakref.ref(self)
2843 weak_self = weakref.ref(self)
2841
2844
2842 # the "split" index replace the real index when the transaction is
2845 # the "split" index replace the real index when the transaction is
2843 # finalized
2846 # finalized
2844 def finalize_callback(tr):
2847 def finalize_callback(tr):
2845 opener.rename(
2848 opener.rename(
2846 new_index_file_path,
2849 new_index_file_path,
2847 old_index_file_path,
2850 old_index_file_path,
2848 checkambig=True,
2851 checkambig=True,
2849 )
2852 )
2850 maybe_self = weak_self()
2853 maybe_self = weak_self()
2851 if maybe_self is not None:
2854 if maybe_self is not None:
2852 maybe_self._indexfile = old_index_file_path
2855 maybe_self._indexfile = old_index_file_path
2853 maybe_self._inner.index_file = maybe_self._indexfile
2856 maybe_self._inner.index_file = maybe_self._indexfile
2854
2857
2855 def abort_callback(tr):
2858 def abort_callback(tr):
2856 maybe_self = weak_self()
2859 maybe_self = weak_self()
2857 if maybe_self is not None:
2860 if maybe_self is not None:
2858 maybe_self._indexfile = old_index_file_path
2861 maybe_self._indexfile = old_index_file_path
2859 maybe_self._inner.inline = True
2862 maybe_self._inner.inline = True
2860 maybe_self._inner.index_file = old_index_file_path
2863 maybe_self._inner.index_file = old_index_file_path
2861
2864
2862 tr.registertmp(new_index_file_path)
2865 tr.registertmp(new_index_file_path)
2863 if self.target[1] is not None:
2866 if self.target[1] is not None:
2864 callback_id = b'000-revlog-split-%d-%s' % self.target
2867 callback_id = b'000-revlog-split-%d-%s' % self.target
2865 else:
2868 else:
2866 callback_id = b'000-revlog-split-%d' % self.target[0]
2869 callback_id = b'000-revlog-split-%d' % self.target[0]
2867 tr.addfinalize(callback_id, finalize_callback)
2870 tr.addfinalize(callback_id, finalize_callback)
2868 tr.addabort(callback_id, abort_callback)
2871 tr.addabort(callback_id, abort_callback)
2869
2872
2870 self._format_flags &= ~FLAG_INLINE_DATA
2873 self._format_flags &= ~FLAG_INLINE_DATA
2871 self._inner.split_inline(
2874 self._inner.split_inline(
2872 tr,
2875 tr,
2873 self._format_flags | self._format_version,
2876 self._format_flags | self._format_version,
2874 new_index_file_path=new_index_file_path,
2877 new_index_file_path=new_index_file_path,
2875 )
2878 )
2876
2879
2877 self._inline = False
2880 self._inline = False
2878 if new_index_file_path is not None:
2881 if new_index_file_path is not None:
2879 self._indexfile = new_index_file_path
2882 self._indexfile = new_index_file_path
2880
2883
2881 nodemaputil.setup_persistent_nodemap(tr, self)
2884 nodemaputil.setup_persistent_nodemap(tr, self)
2882
2885
2883 def _nodeduplicatecallback(self, transaction, node):
2886 def _nodeduplicatecallback(self, transaction, node):
2884 """called when trying to add a node already stored."""
2887 """called when trying to add a node already stored."""
2885
2888
2886 @contextlib.contextmanager
2889 @contextlib.contextmanager
2887 def reading(self):
2890 def reading(self):
2888 with self._inner.reading():
2891 with self._inner.reading():
2889 yield
2892 yield
2890
2893
2891 @contextlib.contextmanager
2894 @contextlib.contextmanager
2892 def _writing(self, transaction):
2895 def _writing(self, transaction):
2893 if self._trypending:
2896 if self._trypending:
2894 msg = b'try to write in a `trypending` revlog: %s'
2897 msg = b'try to write in a `trypending` revlog: %s'
2895 msg %= self.display_id
2898 msg %= self.display_id
2896 raise error.ProgrammingError(msg)
2899 raise error.ProgrammingError(msg)
2897 if self._inner.is_writing:
2900 if self._inner.is_writing:
2898 yield
2901 yield
2899 else:
2902 else:
2900 data_end = None
2903 data_end = None
2901 sidedata_end = None
2904 sidedata_end = None
2902 if self._docket is not None:
2905 if self._docket is not None:
2903 data_end = self._docket.data_end
2906 data_end = self._docket.data_end
2904 sidedata_end = self._docket.sidedata_end
2907 sidedata_end = self._docket.sidedata_end
2905 with self._inner.writing(
2908 with self._inner.writing(
2906 transaction,
2909 transaction,
2907 data_end=data_end,
2910 data_end=data_end,
2908 sidedata_end=sidedata_end,
2911 sidedata_end=sidedata_end,
2909 ):
2912 ):
2910 yield
2913 yield
2911 if self._docket is not None:
2914 if self._docket is not None:
2912 self._write_docket(transaction)
2915 self._write_docket(transaction)
2913
2916
2914 def _write_docket(self, transaction):
2917 def _write_docket(self, transaction):
2915 """write the current docket on disk
2918 """write the current docket on disk
2916
2919
2917 Exist as a method to help changelog to implement transaction logic
2920 Exist as a method to help changelog to implement transaction logic
2918
2921
2919 We could also imagine using the same transaction logic for all revlog
2922 We could also imagine using the same transaction logic for all revlog
2920 since docket are cheap."""
2923 since docket are cheap."""
2921 self._docket.write(transaction)
2924 self._docket.write(transaction)
2922
2925
2923 def addrevision(
2926 def addrevision(
2924 self,
2927 self,
2925 text,
2928 text,
2926 transaction,
2929 transaction,
2927 link,
2930 link,
2928 p1,
2931 p1,
2929 p2,
2932 p2,
2930 cachedelta=None,
2933 cachedelta=None,
2931 node=None,
2934 node=None,
2932 flags=REVIDX_DEFAULT_FLAGS,
2935 flags=REVIDX_DEFAULT_FLAGS,
2933 deltacomputer=None,
2936 deltacomputer=None,
2934 sidedata=None,
2937 sidedata=None,
2935 ):
2938 ):
2936 """add a revision to the log
2939 """add a revision to the log
2937
2940
2938 text - the revision data to add
2941 text - the revision data to add
2939 transaction - the transaction object used for rollback
2942 transaction - the transaction object used for rollback
2940 link - the linkrev data to add
2943 link - the linkrev data to add
2941 p1, p2 - the parent nodeids of the revision
2944 p1, p2 - the parent nodeids of the revision
2942 cachedelta - an optional precomputed delta
2945 cachedelta - an optional precomputed delta
2943 node - nodeid of revision; typically node is not specified, and it is
2946 node - nodeid of revision; typically node is not specified, and it is
2944 computed by default as hash(text, p1, p2), however subclasses might
2947 computed by default as hash(text, p1, p2), however subclasses might
2945 use different hashing method (and override checkhash() in such case)
2948 use different hashing method (and override checkhash() in such case)
2946 flags - the known flags to set on the revision
2949 flags - the known flags to set on the revision
2947 deltacomputer - an optional deltacomputer instance shared between
2950 deltacomputer - an optional deltacomputer instance shared between
2948 multiple calls
2951 multiple calls
2949 """
2952 """
2950 if link == nullrev:
2953 if link == nullrev:
2951 raise error.RevlogError(
2954 raise error.RevlogError(
2952 _(b"attempted to add linkrev -1 to %s") % self.display_id
2955 _(b"attempted to add linkrev -1 to %s") % self.display_id
2953 )
2956 )
2954
2957
2955 if sidedata is None:
2958 if sidedata is None:
2956 sidedata = {}
2959 sidedata = {}
2957 elif sidedata and not self.feature_config.has_side_data:
2960 elif sidedata and not self.feature_config.has_side_data:
2958 raise error.ProgrammingError(
2961 raise error.ProgrammingError(
2959 _(b"trying to add sidedata to a revlog who don't support them")
2962 _(b"trying to add sidedata to a revlog who don't support them")
2960 )
2963 )
2961
2964
2962 if flags:
2965 if flags:
2963 node = node or self.hash(text, p1, p2)
2966 node = node or self.hash(text, p1, p2)
2964
2967
2965 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2968 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2966
2969
2967 # If the flag processor modifies the revision data, ignore any provided
2970 # If the flag processor modifies the revision data, ignore any provided
2968 # cachedelta.
2971 # cachedelta.
2969 if rawtext != text:
2972 if rawtext != text:
2970 cachedelta = None
2973 cachedelta = None
2971
2974
2972 if len(rawtext) > _maxentrysize:
2975 if len(rawtext) > _maxentrysize:
2973 raise error.RevlogError(
2976 raise error.RevlogError(
2974 _(
2977 _(
2975 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2978 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2976 )
2979 )
2977 % (self.display_id, len(rawtext))
2980 % (self.display_id, len(rawtext))
2978 )
2981 )
2979
2982
2980 node = node or self.hash(rawtext, p1, p2)
2983 node = node or self.hash(rawtext, p1, p2)
2981 rev = self.index.get_rev(node)
2984 rev = self.index.get_rev(node)
2982 if rev is not None:
2985 if rev is not None:
2983 return rev
2986 return rev
2984
2987
2985 if validatehash:
2988 if validatehash:
2986 self.checkhash(rawtext, node, p1=p1, p2=p2)
2989 self.checkhash(rawtext, node, p1=p1, p2=p2)
2987
2990
2988 return self.addrawrevision(
2991 return self.addrawrevision(
2989 rawtext,
2992 rawtext,
2990 transaction,
2993 transaction,
2991 link,
2994 link,
2992 p1,
2995 p1,
2993 p2,
2996 p2,
2994 node,
2997 node,
2995 flags,
2998 flags,
2996 cachedelta=cachedelta,
2999 cachedelta=cachedelta,
2997 deltacomputer=deltacomputer,
3000 deltacomputer=deltacomputer,
2998 sidedata=sidedata,
3001 sidedata=sidedata,
2999 )
3002 )
3000
3003
3001 def addrawrevision(
3004 def addrawrevision(
3002 self,
3005 self,
3003 rawtext,
3006 rawtext,
3004 transaction,
3007 transaction,
3005 link,
3008 link,
3006 p1,
3009 p1,
3007 p2,
3010 p2,
3008 node,
3011 node,
3009 flags,
3012 flags,
3010 cachedelta=None,
3013 cachedelta=None,
3011 deltacomputer=None,
3014 deltacomputer=None,
3012 sidedata=None,
3015 sidedata=None,
3013 ):
3016 ):
3014 """add a raw revision with known flags, node and parents
3017 """add a raw revision with known flags, node and parents
3015 useful when reusing a revision not stored in this revlog (ex: received
3018 useful when reusing a revision not stored in this revlog (ex: received
3016 over wire, or read from an external bundle).
3019 over wire, or read from an external bundle).
3017 """
3020 """
3018 with self._writing(transaction):
3021 with self._writing(transaction):
3019 return self._addrevision(
3022 return self._addrevision(
3020 node,
3023 node,
3021 rawtext,
3024 rawtext,
3022 transaction,
3025 transaction,
3023 link,
3026 link,
3024 p1,
3027 p1,
3025 p2,
3028 p2,
3026 flags,
3029 flags,
3027 cachedelta,
3030 cachedelta,
3028 deltacomputer=deltacomputer,
3031 deltacomputer=deltacomputer,
3029 sidedata=sidedata,
3032 sidedata=sidedata,
3030 )
3033 )
3031
3034
3032 def compress(self, data):
3035 def compress(self, data):
3033 return self._inner.compress(data)
3036 return self._inner.compress(data)
3034
3037
3035 def decompress(self, data):
3038 def decompress(self, data):
3036 return self._inner.decompress(data)
3039 return self._inner.decompress(data)
3037
3040
3038 def _addrevision(
3041 def _addrevision(
3039 self,
3042 self,
3040 node,
3043 node,
3041 rawtext,
3044 rawtext,
3042 transaction,
3045 transaction,
3043 link,
3046 link,
3044 p1,
3047 p1,
3045 p2,
3048 p2,
3046 flags,
3049 flags,
3047 cachedelta,
3050 cachedelta,
3048 alwayscache=False,
3051 alwayscache=False,
3049 deltacomputer=None,
3052 deltacomputer=None,
3050 sidedata=None,
3053 sidedata=None,
3051 ):
3054 ):
3052 """internal function to add revisions to the log
3055 """internal function to add revisions to the log
3053
3056
3054 see addrevision for argument descriptions.
3057 see addrevision for argument descriptions.
3055
3058
3056 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3059 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3057
3060
3058 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3061 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3059 be used.
3062 be used.
3060
3063
3061 invariants:
3064 invariants:
3062 - rawtext is optional (can be None); if not set, cachedelta must be set.
3065 - rawtext is optional (can be None); if not set, cachedelta must be set.
3063 if both are set, they must correspond to each other.
3066 if both are set, they must correspond to each other.
3064 """
3067 """
3065 if node == self.nullid:
3068 if node == self.nullid:
3066 raise error.RevlogError(
3069 raise error.RevlogError(
3067 _(b"%s: attempt to add null revision") % self.display_id
3070 _(b"%s: attempt to add null revision") % self.display_id
3068 )
3071 )
3069 if (
3072 if (
3070 node == self.nodeconstants.wdirid
3073 node == self.nodeconstants.wdirid
3071 or node in self.nodeconstants.wdirfilenodeids
3074 or node in self.nodeconstants.wdirfilenodeids
3072 ):
3075 ):
3073 raise error.RevlogError(
3076 raise error.RevlogError(
3074 _(b"%s: attempt to add wdir revision") % self.display_id
3077 _(b"%s: attempt to add wdir revision") % self.display_id
3075 )
3078 )
3076 if self._inner._writinghandles is None:
3079 if self._inner._writinghandles is None:
3077 msg = b'adding revision outside `revlog._writing` context'
3080 msg = b'adding revision outside `revlog._writing` context'
3078 raise error.ProgrammingError(msg)
3081 raise error.ProgrammingError(msg)
3079
3082
3080 btext = [rawtext]
3083 btext = [rawtext]
3081
3084
3082 curr = len(self)
3085 curr = len(self)
3083 prev = curr - 1
3086 prev = curr - 1
3084
3087
3085 offset = self._get_data_offset(prev)
3088 offset = self._get_data_offset(prev)
3086
3089
3087 if self._concurrencychecker:
3090 if self._concurrencychecker:
3088 ifh, dfh, sdfh = self._inner._writinghandles
3091 ifh, dfh, sdfh = self._inner._writinghandles
3089 # XXX no checking for the sidedata file
3092 # XXX no checking for the sidedata file
3090 if self._inline:
3093 if self._inline:
3091 # offset is "as if" it were in the .d file, so we need to add on
3094 # offset is "as if" it were in the .d file, so we need to add on
3092 # the size of the entry metadata.
3095 # the size of the entry metadata.
3093 self._concurrencychecker(
3096 self._concurrencychecker(
3094 ifh, self._indexfile, offset + curr * self.index.entry_size
3097 ifh, self._indexfile, offset + curr * self.index.entry_size
3095 )
3098 )
3096 else:
3099 else:
3097 # Entries in the .i are a consistent size.
3100 # Entries in the .i are a consistent size.
3098 self._concurrencychecker(
3101 self._concurrencychecker(
3099 ifh, self._indexfile, curr * self.index.entry_size
3102 ifh, self._indexfile, curr * self.index.entry_size
3100 )
3103 )
3101 self._concurrencychecker(dfh, self._datafile, offset)
3104 self._concurrencychecker(dfh, self._datafile, offset)
3102
3105
3103 p1r, p2r = self.rev(p1), self.rev(p2)
3106 p1r, p2r = self.rev(p1), self.rev(p2)
3104
3107
3105 # full versions are inserted when the needed deltas
3108 # full versions are inserted when the needed deltas
3106 # become comparable to the uncompressed text
3109 # become comparable to the uncompressed text
3107 if rawtext is None:
3110 if rawtext is None:
3108 # need rawtext size, before changed by flag processors, which is
3111 # need rawtext size, before changed by flag processors, which is
3109 # the non-raw size. use revlog explicitly to avoid filelog's extra
3112 # the non-raw size. use revlog explicitly to avoid filelog's extra
3110 # logic that might remove metadata size.
3113 # logic that might remove metadata size.
3111 textlen = mdiff.patchedsize(
3114 textlen = mdiff.patchedsize(
3112 revlog.size(self, cachedelta[0]), cachedelta[1]
3115 revlog.size(self, cachedelta[0]), cachedelta[1]
3113 )
3116 )
3114 else:
3117 else:
3115 textlen = len(rawtext)
3118 textlen = len(rawtext)
3116
3119
3117 if deltacomputer is None:
3120 if deltacomputer is None:
3118 write_debug = None
3121 write_debug = None
3119 if self.delta_config.debug_delta:
3122 if self.delta_config.debug_delta:
3120 write_debug = transaction._report
3123 write_debug = transaction._report
3121 deltacomputer = deltautil.deltacomputer(
3124 deltacomputer = deltautil.deltacomputer(
3122 self, write_debug=write_debug
3125 self, write_debug=write_debug
3123 )
3126 )
3124
3127
3125 if cachedelta is not None and len(cachedelta) == 2:
3128 if cachedelta is not None and len(cachedelta) == 2:
3126 # If the cached delta has no information about how it should be
3129 # If the cached delta has no information about how it should be
3127 # reused, add the default reuse instruction according to the
3130 # reused, add the default reuse instruction according to the
3128 # revlog's configuration.
3131 # revlog's configuration.
3129 if (
3132 if (
3130 self.delta_config.general_delta
3133 self.delta_config.general_delta
3131 and self.delta_config.lazy_delta_base
3134 and self.delta_config.lazy_delta_base
3132 ):
3135 ):
3133 delta_base_reuse = DELTA_BASE_REUSE_TRY
3136 delta_base_reuse = DELTA_BASE_REUSE_TRY
3134 else:
3137 else:
3135 delta_base_reuse = DELTA_BASE_REUSE_NO
3138 delta_base_reuse = DELTA_BASE_REUSE_NO
3136 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3139 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3137
3140
3138 revinfo = revlogutils.revisioninfo(
3141 revinfo = revlogutils.revisioninfo(
3139 node,
3142 node,
3140 p1,
3143 p1,
3141 p2,
3144 p2,
3142 btext,
3145 btext,
3143 textlen,
3146 textlen,
3144 cachedelta,
3147 cachedelta,
3145 flags,
3148 flags,
3146 )
3149 )
3147
3150
3148 deltainfo = deltacomputer.finddeltainfo(revinfo)
3151 deltainfo = deltacomputer.finddeltainfo(revinfo)
3149
3152
3150 compression_mode = COMP_MODE_INLINE
3153 compression_mode = COMP_MODE_INLINE
3151 if self._docket is not None:
3154 if self._docket is not None:
3152 default_comp = self._docket.default_compression_header
3155 default_comp = self._docket.default_compression_header
3153 r = deltautil.delta_compression(default_comp, deltainfo)
3156 r = deltautil.delta_compression(default_comp, deltainfo)
3154 compression_mode, deltainfo = r
3157 compression_mode, deltainfo = r
3155
3158
3156 sidedata_compression_mode = COMP_MODE_INLINE
3159 sidedata_compression_mode = COMP_MODE_INLINE
3157 if sidedata and self.feature_config.has_side_data:
3160 if sidedata and self.feature_config.has_side_data:
3158 sidedata_compression_mode = COMP_MODE_PLAIN
3161 sidedata_compression_mode = COMP_MODE_PLAIN
3159 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3162 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3160 sidedata_offset = self._docket.sidedata_end
3163 sidedata_offset = self._docket.sidedata_end
3161 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3164 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3162 if (
3165 if (
3163 h != b'u'
3166 h != b'u'
3164 and comp_sidedata[0:1] != b'\0'
3167 and comp_sidedata[0:1] != b'\0'
3165 and len(comp_sidedata) < len(serialized_sidedata)
3168 and len(comp_sidedata) < len(serialized_sidedata)
3166 ):
3169 ):
3167 assert not h
3170 assert not h
3168 if (
3171 if (
3169 comp_sidedata[0:1]
3172 comp_sidedata[0:1]
3170 == self._docket.default_compression_header
3173 == self._docket.default_compression_header
3171 ):
3174 ):
3172 sidedata_compression_mode = COMP_MODE_DEFAULT
3175 sidedata_compression_mode = COMP_MODE_DEFAULT
3173 serialized_sidedata = comp_sidedata
3176 serialized_sidedata = comp_sidedata
3174 else:
3177 else:
3175 sidedata_compression_mode = COMP_MODE_INLINE
3178 sidedata_compression_mode = COMP_MODE_INLINE
3176 serialized_sidedata = comp_sidedata
3179 serialized_sidedata = comp_sidedata
3177 else:
3180 else:
3178 serialized_sidedata = b""
3181 serialized_sidedata = b""
3179 # Don't store the offset if the sidedata is empty, that way
3182 # Don't store the offset if the sidedata is empty, that way
3180 # we can easily detect empty sidedata and they will be no different
3183 # we can easily detect empty sidedata and they will be no different
3181 # than ones we manually add.
3184 # than ones we manually add.
3182 sidedata_offset = 0
3185 sidedata_offset = 0
3183
3186
3184 rank = RANK_UNKNOWN
3187 rank = RANK_UNKNOWN
3185 if self.feature_config.compute_rank:
3188 if self.feature_config.compute_rank:
3186 if (p1r, p2r) == (nullrev, nullrev):
3189 if (p1r, p2r) == (nullrev, nullrev):
3187 rank = 1
3190 rank = 1
3188 elif p1r != nullrev and p2r == nullrev:
3191 elif p1r != nullrev and p2r == nullrev:
3189 rank = 1 + self.fast_rank(p1r)
3192 rank = 1 + self.fast_rank(p1r)
3190 elif p1r == nullrev and p2r != nullrev:
3193 elif p1r == nullrev and p2r != nullrev:
3191 rank = 1 + self.fast_rank(p2r)
3194 rank = 1 + self.fast_rank(p2r)
3192 else: # merge node
3195 else: # merge node
3193 if rustdagop is not None and self.index.rust_ext_compat:
3196 if rustdagop is not None and self.index.rust_ext_compat:
3194 rank = rustdagop.rank(self.index, p1r, p2r)
3197 rank = rustdagop.rank(self.index, p1r, p2r)
3195 else:
3198 else:
3196 pmin, pmax = sorted((p1r, p2r))
3199 pmin, pmax = sorted((p1r, p2r))
3197 rank = 1 + self.fast_rank(pmax)
3200 rank = 1 + self.fast_rank(pmax)
3198 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3201 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3199
3202
3200 e = revlogutils.entry(
3203 e = revlogutils.entry(
3201 flags=flags,
3204 flags=flags,
3202 data_offset=offset,
3205 data_offset=offset,
3203 data_compressed_length=deltainfo.deltalen,
3206 data_compressed_length=deltainfo.deltalen,
3204 data_uncompressed_length=textlen,
3207 data_uncompressed_length=textlen,
3205 data_compression_mode=compression_mode,
3208 data_compression_mode=compression_mode,
3206 data_delta_base=deltainfo.base,
3209 data_delta_base=deltainfo.base,
3207 link_rev=link,
3210 link_rev=link,
3208 parent_rev_1=p1r,
3211 parent_rev_1=p1r,
3209 parent_rev_2=p2r,
3212 parent_rev_2=p2r,
3210 node_id=node,
3213 node_id=node,
3211 sidedata_offset=sidedata_offset,
3214 sidedata_offset=sidedata_offset,
3212 sidedata_compressed_length=len(serialized_sidedata),
3215 sidedata_compressed_length=len(serialized_sidedata),
3213 sidedata_compression_mode=sidedata_compression_mode,
3216 sidedata_compression_mode=sidedata_compression_mode,
3214 rank=rank,
3217 rank=rank,
3215 )
3218 )
3216
3219
3217 self.index.append(e)
3220 self.index.append(e)
3218 entry = self.index.entry_binary(curr)
3221 entry = self.index.entry_binary(curr)
3219 if curr == 0 and self._docket is None:
3222 if curr == 0 and self._docket is None:
3220 header = self._format_flags | self._format_version
3223 header = self._format_flags | self._format_version
3221 header = self.index.pack_header(header)
3224 header = self.index.pack_header(header)
3222 entry = header + entry
3225 entry = header + entry
3223 self._writeentry(
3226 self._writeentry(
3224 transaction,
3227 transaction,
3225 entry,
3228 entry,
3226 deltainfo.data,
3229 deltainfo.data,
3227 link,
3230 link,
3228 offset,
3231 offset,
3229 serialized_sidedata,
3232 serialized_sidedata,
3230 sidedata_offset,
3233 sidedata_offset,
3231 )
3234 )
3232
3235
3233 rawtext = btext[0]
3236 rawtext = btext[0]
3234
3237
3235 if alwayscache and rawtext is None:
3238 if alwayscache and rawtext is None:
3236 rawtext = deltacomputer.buildtext(revinfo)
3239 rawtext = deltacomputer.buildtext(revinfo)
3237
3240
3238 if type(rawtext) == bytes: # only accept immutable objects
3241 if type(rawtext) == bytes: # only accept immutable objects
3239 self._inner._revisioncache = (node, curr, rawtext)
3242 self._inner._revisioncache = (node, curr, rawtext)
3240 self._chainbasecache[curr] = deltainfo.chainbase
3243 self._chainbasecache[curr] = deltainfo.chainbase
3241 return curr
3244 return curr
3242
3245
3243 def _get_data_offset(self, prev):
3246 def _get_data_offset(self, prev):
3244 """Returns the current offset in the (in-transaction) data file.
3247 """Returns the current offset in the (in-transaction) data file.
3245 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3248 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3246 file to store that information: since sidedata can be rewritten to the
3249 file to store that information: since sidedata can be rewritten to the
3247 end of the data file within a transaction, you can have cases where, for
3250 end of the data file within a transaction, you can have cases where, for
3248 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3251 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3249 to `n - 1`'s sidedata being written after `n`'s data.
3252 to `n - 1`'s sidedata being written after `n`'s data.
3250
3253
3251 TODO cache this in a docket file before getting out of experimental."""
3254 TODO cache this in a docket file before getting out of experimental."""
3252 if self._docket is None:
3255 if self._docket is None:
3253 return self.end(prev)
3256 return self.end(prev)
3254 else:
3257 else:
3255 return self._docket.data_end
3258 return self._docket.data_end
3256
3259
3257 def _writeentry(
3260 def _writeentry(
3258 self,
3261 self,
3259 transaction,
3262 transaction,
3260 entry,
3263 entry,
3261 data,
3264 data,
3262 link,
3265 link,
3263 offset,
3266 offset,
3264 sidedata,
3267 sidedata,
3265 sidedata_offset,
3268 sidedata_offset,
3266 ):
3269 ):
3267 # Files opened in a+ mode have inconsistent behavior on various
3270 # Files opened in a+ mode have inconsistent behavior on various
3268 # platforms. Windows requires that a file positioning call be made
3271 # platforms. Windows requires that a file positioning call be made
3269 # when the file handle transitions between reads and writes. See
3272 # when the file handle transitions between reads and writes. See
3270 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3273 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3271 # platforms, Python or the platform itself can be buggy. Some versions
3274 # platforms, Python or the platform itself can be buggy. Some versions
3272 # of Solaris have been observed to not append at the end of the file
3275 # of Solaris have been observed to not append at the end of the file
3273 # if the file was seeked to before the end. See issue4943 for more.
3276 # if the file was seeked to before the end. See issue4943 for more.
3274 #
3277 #
3275 # We work around this issue by inserting a seek() before writing.
3278 # We work around this issue by inserting a seek() before writing.
3276 # Note: This is likely not necessary on Python 3. However, because
3279 # Note: This is likely not necessary on Python 3. However, because
3277 # the file handle is reused for reads and may be seeked there, we need
3280 # the file handle is reused for reads and may be seeked there, we need
3278 # to be careful before changing this.
3281 # to be careful before changing this.
3279 index_end = data_end = sidedata_end = None
3282 index_end = data_end = sidedata_end = None
3280 if self._docket is not None:
3283 if self._docket is not None:
3281 index_end = self._docket.index_end
3284 index_end = self._docket.index_end
3282 data_end = self._docket.data_end
3285 data_end = self._docket.data_end
3283 sidedata_end = self._docket.sidedata_end
3286 sidedata_end = self._docket.sidedata_end
3284
3287
3285 files_end = self._inner.write_entry(
3288 files_end = self._inner.write_entry(
3286 transaction,
3289 transaction,
3287 entry,
3290 entry,
3288 data,
3291 data,
3289 link,
3292 link,
3290 offset,
3293 offset,
3291 sidedata,
3294 sidedata,
3292 sidedata_offset,
3295 sidedata_offset,
3293 index_end,
3296 index_end,
3294 data_end,
3297 data_end,
3295 sidedata_end,
3298 sidedata_end,
3296 )
3299 )
3297 self._enforceinlinesize(transaction)
3300 self._enforceinlinesize(transaction)
3298 if self._docket is not None:
3301 if self._docket is not None:
3299 self._docket.index_end = files_end[0]
3302 self._docket.index_end = files_end[0]
3300 self._docket.data_end = files_end[1]
3303 self._docket.data_end = files_end[1]
3301 self._docket.sidedata_end = files_end[2]
3304 self._docket.sidedata_end = files_end[2]
3302
3305
3303 nodemaputil.setup_persistent_nodemap(transaction, self)
3306 nodemaputil.setup_persistent_nodemap(transaction, self)
3304
3307
3305 def addgroup(
3308 def addgroup(
3306 self,
3309 self,
3307 deltas,
3310 deltas,
3308 linkmapper,
3311 linkmapper,
3309 transaction,
3312 transaction,
3310 alwayscache=False,
3313 alwayscache=False,
3311 addrevisioncb=None,
3314 addrevisioncb=None,
3312 duplicaterevisioncb=None,
3315 duplicaterevisioncb=None,
3313 debug_info=None,
3316 debug_info=None,
3314 delta_base_reuse_policy=None,
3317 delta_base_reuse_policy=None,
3315 ):
3318 ):
3316 """
3319 """
3317 add a delta group
3320 add a delta group
3318
3321
3319 given a set of deltas, add them to the revision log. the
3322 given a set of deltas, add them to the revision log. the
3320 first delta is against its parent, which should be in our
3323 first delta is against its parent, which should be in our
3321 log, the rest are against the previous delta.
3324 log, the rest are against the previous delta.
3322
3325
3323 If ``addrevisioncb`` is defined, it will be called with arguments of
3326 If ``addrevisioncb`` is defined, it will be called with arguments of
3324 this revlog and the node that was added.
3327 this revlog and the node that was added.
3325 """
3328 """
3326
3329
3327 if self._adding_group:
3330 if self._adding_group:
3328 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3331 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3329
3332
3330 # read the default delta-base reuse policy from revlog config if the
3333 # read the default delta-base reuse policy from revlog config if the
3331 # group did not specify one.
3334 # group did not specify one.
3332 if delta_base_reuse_policy is None:
3335 if delta_base_reuse_policy is None:
3333 if (
3336 if (
3334 self.delta_config.general_delta
3337 self.delta_config.general_delta
3335 and self.delta_config.lazy_delta_base
3338 and self.delta_config.lazy_delta_base
3336 ):
3339 ):
3337 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3340 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3338 else:
3341 else:
3339 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3342 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3340
3343
3341 self._adding_group = True
3344 self._adding_group = True
3342 empty = True
3345 empty = True
3343 try:
3346 try:
3344 with self._writing(transaction):
3347 with self._writing(transaction):
3345 write_debug = None
3348 write_debug = None
3346 if self.delta_config.debug_delta:
3349 if self.delta_config.debug_delta:
3347 write_debug = transaction._report
3350 write_debug = transaction._report
3348 deltacomputer = deltautil.deltacomputer(
3351 deltacomputer = deltautil.deltacomputer(
3349 self,
3352 self,
3350 write_debug=write_debug,
3353 write_debug=write_debug,
3351 debug_info=debug_info,
3354 debug_info=debug_info,
3352 )
3355 )
3353 # loop through our set of deltas
3356 # loop through our set of deltas
3354 for data in deltas:
3357 for data in deltas:
3355 (
3358 (
3356 node,
3359 node,
3357 p1,
3360 p1,
3358 p2,
3361 p2,
3359 linknode,
3362 linknode,
3360 deltabase,
3363 deltabase,
3361 delta,
3364 delta,
3362 flags,
3365 flags,
3363 sidedata,
3366 sidedata,
3364 ) = data
3367 ) = data
3365 link = linkmapper(linknode)
3368 link = linkmapper(linknode)
3366 flags = flags or REVIDX_DEFAULT_FLAGS
3369 flags = flags or REVIDX_DEFAULT_FLAGS
3367
3370
3368 rev = self.index.get_rev(node)
3371 rev = self.index.get_rev(node)
3369 if rev is not None:
3372 if rev is not None:
3370 # this can happen if two branches make the same change
3373 # this can happen if two branches make the same change
3371 self._nodeduplicatecallback(transaction, rev)
3374 self._nodeduplicatecallback(transaction, rev)
3372 if duplicaterevisioncb:
3375 if duplicaterevisioncb:
3373 duplicaterevisioncb(self, rev)
3376 duplicaterevisioncb(self, rev)
3374 empty = False
3377 empty = False
3375 continue
3378 continue
3376
3379
3377 for p in (p1, p2):
3380 for p in (p1, p2):
3378 if not self.index.has_node(p):
3381 if not self.index.has_node(p):
3379 raise error.LookupError(
3382 raise error.LookupError(
3380 p, self.radix, _(b'unknown parent')
3383 p, self.radix, _(b'unknown parent')
3381 )
3384 )
3382
3385
3383 if not self.index.has_node(deltabase):
3386 if not self.index.has_node(deltabase):
3384 raise error.LookupError(
3387 raise error.LookupError(
3385 deltabase, self.display_id, _(b'unknown delta base')
3388 deltabase, self.display_id, _(b'unknown delta base')
3386 )
3389 )
3387
3390
3388 baserev = self.rev(deltabase)
3391 baserev = self.rev(deltabase)
3389
3392
3390 if baserev != nullrev and self.iscensored(baserev):
3393 if baserev != nullrev and self.iscensored(baserev):
3391 # if base is censored, delta must be full replacement in a
3394 # if base is censored, delta must be full replacement in a
3392 # single patch operation
3395 # single patch operation
3393 hlen = struct.calcsize(b">lll")
3396 hlen = struct.calcsize(b">lll")
3394 oldlen = self.rawsize(baserev)
3397 oldlen = self.rawsize(baserev)
3395 newlen = len(delta) - hlen
3398 newlen = len(delta) - hlen
3396 if delta[:hlen] != mdiff.replacediffheader(
3399 if delta[:hlen] != mdiff.replacediffheader(
3397 oldlen, newlen
3400 oldlen, newlen
3398 ):
3401 ):
3399 raise error.CensoredBaseError(
3402 raise error.CensoredBaseError(
3400 self.display_id, self.node(baserev)
3403 self.display_id, self.node(baserev)
3401 )
3404 )
3402
3405
3403 if not flags and self._peek_iscensored(baserev, delta):
3406 if not flags and self._peek_iscensored(baserev, delta):
3404 flags |= REVIDX_ISCENSORED
3407 flags |= REVIDX_ISCENSORED
3405
3408
3406 # We assume consumers of addrevisioncb will want to retrieve
3409 # We assume consumers of addrevisioncb will want to retrieve
3407 # the added revision, which will require a call to
3410 # the added revision, which will require a call to
3408 # revision(). revision() will fast path if there is a cache
3411 # revision(). revision() will fast path if there is a cache
3409 # hit. So, we tell _addrevision() to always cache in this case.
3412 # hit. So, we tell _addrevision() to always cache in this case.
3410 # We're only using addgroup() in the context of changegroup
3413 # We're only using addgroup() in the context of changegroup
3411 # generation so the revision data can always be handled as raw
3414 # generation so the revision data can always be handled as raw
3412 # by the flagprocessor.
3415 # by the flagprocessor.
3413 rev = self._addrevision(
3416 rev = self._addrevision(
3414 node,
3417 node,
3415 None,
3418 None,
3416 transaction,
3419 transaction,
3417 link,
3420 link,
3418 p1,
3421 p1,
3419 p2,
3422 p2,
3420 flags,
3423 flags,
3421 (baserev, delta, delta_base_reuse_policy),
3424 (baserev, delta, delta_base_reuse_policy),
3422 alwayscache=alwayscache,
3425 alwayscache=alwayscache,
3423 deltacomputer=deltacomputer,
3426 deltacomputer=deltacomputer,
3424 sidedata=sidedata,
3427 sidedata=sidedata,
3425 )
3428 )
3426
3429
3427 if addrevisioncb:
3430 if addrevisioncb:
3428 addrevisioncb(self, rev)
3431 addrevisioncb(self, rev)
3429 empty = False
3432 empty = False
3430 finally:
3433 finally:
3431 self._adding_group = False
3434 self._adding_group = False
3432 return not empty
3435 return not empty
3433
3436
3434 def iscensored(self, rev):
3437 def iscensored(self, rev):
3435 """Check if a file revision is censored."""
3438 """Check if a file revision is censored."""
3436 if not self.feature_config.censorable:
3439 if not self.feature_config.censorable:
3437 return False
3440 return False
3438
3441
3439 return self.flags(rev) & REVIDX_ISCENSORED
3442 return self.flags(rev) & REVIDX_ISCENSORED
3440
3443
3441 def _peek_iscensored(self, baserev, delta):
3444 def _peek_iscensored(self, baserev, delta):
3442 """Quickly check if a delta produces a censored revision."""
3445 """Quickly check if a delta produces a censored revision."""
3443 if not self.feature_config.censorable:
3446 if not self.feature_config.censorable:
3444 return False
3447 return False
3445
3448
3446 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3449 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3447
3450
3448 def getstrippoint(self, minlink):
3451 def getstrippoint(self, minlink):
3449 """find the minimum rev that must be stripped to strip the linkrev
3452 """find the minimum rev that must be stripped to strip the linkrev
3450
3453
3451 Returns a tuple containing the minimum rev and a set of all revs that
3454 Returns a tuple containing the minimum rev and a set of all revs that
3452 have linkrevs that will be broken by this strip.
3455 have linkrevs that will be broken by this strip.
3453 """
3456 """
3454 return storageutil.resolvestripinfo(
3457 return storageutil.resolvestripinfo(
3455 minlink,
3458 minlink,
3456 len(self) - 1,
3459 len(self) - 1,
3457 self.headrevs(),
3460 self.headrevs(),
3458 self.linkrev,
3461 self.linkrev,
3459 self.parentrevs,
3462 self.parentrevs,
3460 )
3463 )
3461
3464
3462 def strip(self, minlink, transaction):
3465 def strip(self, minlink, transaction):
3463 """truncate the revlog on the first revision with a linkrev >= minlink
3466 """truncate the revlog on the first revision with a linkrev >= minlink
3464
3467
3465 This function is called when we're stripping revision minlink and
3468 This function is called when we're stripping revision minlink and
3466 its descendants from the repository.
3469 its descendants from the repository.
3467
3470
3468 We have to remove all revisions with linkrev >= minlink, because
3471 We have to remove all revisions with linkrev >= minlink, because
3469 the equivalent changelog revisions will be renumbered after the
3472 the equivalent changelog revisions will be renumbered after the
3470 strip.
3473 strip.
3471
3474
3472 So we truncate the revlog on the first of these revisions, and
3475 So we truncate the revlog on the first of these revisions, and
3473 trust that the caller has saved the revisions that shouldn't be
3476 trust that the caller has saved the revisions that shouldn't be
3474 removed and that it'll re-add them after this truncation.
3477 removed and that it'll re-add them after this truncation.
3475 """
3478 """
3476 if len(self) == 0:
3479 if len(self) == 0:
3477 return
3480 return
3478
3481
3479 rev, _ = self.getstrippoint(minlink)
3482 rev, _ = self.getstrippoint(minlink)
3480 if rev == len(self):
3483 if rev == len(self):
3481 return
3484 return
3482
3485
3483 # first truncate the files on disk
3486 # first truncate the files on disk
3484 data_end = self.start(rev)
3487 data_end = self.start(rev)
3485 if not self._inline:
3488 if not self._inline:
3486 transaction.add(self._datafile, data_end)
3489 transaction.add(self._datafile, data_end)
3487 end = rev * self.index.entry_size
3490 end = rev * self.index.entry_size
3488 else:
3491 else:
3489 end = data_end + (rev * self.index.entry_size)
3492 end = data_end + (rev * self.index.entry_size)
3490
3493
3491 if self._sidedatafile:
3494 if self._sidedatafile:
3492 sidedata_end = self.sidedata_cut_off(rev)
3495 sidedata_end = self.sidedata_cut_off(rev)
3493 transaction.add(self._sidedatafile, sidedata_end)
3496 transaction.add(self._sidedatafile, sidedata_end)
3494
3497
3495 transaction.add(self._indexfile, end)
3498 transaction.add(self._indexfile, end)
3496 if self._docket is not None:
3499 if self._docket is not None:
3497 # XXX we could, leverage the docket while stripping. However it is
3500 # XXX we could, leverage the docket while stripping. However it is
3498 # not powerfull enough at the time of this comment
3501 # not powerfull enough at the time of this comment
3499 self._docket.index_end = end
3502 self._docket.index_end = end
3500 self._docket.data_end = data_end
3503 self._docket.data_end = data_end
3501 self._docket.sidedata_end = sidedata_end
3504 self._docket.sidedata_end = sidedata_end
3502 self._docket.write(transaction, stripping=True)
3505 self._docket.write(transaction, stripping=True)
3503
3506
3504 # then reset internal state in memory to forget those revisions
3507 # then reset internal state in memory to forget those revisions
3505 self._inner._revisioncache = None
3506 self._chaininfocache = util.lrucachedict(500)
3508 self._chaininfocache = util.lrucachedict(500)
3507 self._inner._segmentfile.clear_cache()
3509 self._inner.clear_cache()
3508 self._inner._segmentfile_sidedata.clear_cache()
3509
3510
3510 del self.index[rev:-1]
3511 del self.index[rev:-1]
3511
3512
3512 def checksize(self):
3513 def checksize(self):
3513 """Check size of index and data files
3514 """Check size of index and data files
3514
3515
3515 return a (dd, di) tuple.
3516 return a (dd, di) tuple.
3516 - dd: extra bytes for the "data" file
3517 - dd: extra bytes for the "data" file
3517 - di: extra bytes for the "index" file
3518 - di: extra bytes for the "index" file
3518
3519
3519 A healthy revlog will return (0, 0).
3520 A healthy revlog will return (0, 0).
3520 """
3521 """
3521 expected = 0
3522 expected = 0
3522 if len(self):
3523 if len(self):
3523 expected = max(0, self.end(len(self) - 1))
3524 expected = max(0, self.end(len(self) - 1))
3524
3525
3525 try:
3526 try:
3526 with self._datafp() as f:
3527 with self._datafp() as f:
3527 f.seek(0, io.SEEK_END)
3528 f.seek(0, io.SEEK_END)
3528 actual = f.tell()
3529 actual = f.tell()
3529 dd = actual - expected
3530 dd = actual - expected
3530 except FileNotFoundError:
3531 except FileNotFoundError:
3531 dd = 0
3532 dd = 0
3532
3533
3533 try:
3534 try:
3534 f = self.opener(self._indexfile)
3535 f = self.opener(self._indexfile)
3535 f.seek(0, io.SEEK_END)
3536 f.seek(0, io.SEEK_END)
3536 actual = f.tell()
3537 actual = f.tell()
3537 f.close()
3538 f.close()
3538 s = self.index.entry_size
3539 s = self.index.entry_size
3539 i = max(0, actual // s)
3540 i = max(0, actual // s)
3540 di = actual - (i * s)
3541 di = actual - (i * s)
3541 if self._inline:
3542 if self._inline:
3542 databytes = 0
3543 databytes = 0
3543 for r in self:
3544 for r in self:
3544 databytes += max(0, self.length(r))
3545 databytes += max(0, self.length(r))
3545 dd = 0
3546 dd = 0
3546 di = actual - len(self) * s - databytes
3547 di = actual - len(self) * s - databytes
3547 except FileNotFoundError:
3548 except FileNotFoundError:
3548 di = 0
3549 di = 0
3549
3550
3550 return (dd, di)
3551 return (dd, di)
3551
3552
3552 def files(self):
3553 def files(self):
3553 """return list of files that compose this revlog"""
3554 """return list of files that compose this revlog"""
3554 res = [self._indexfile]
3555 res = [self._indexfile]
3555 if self._docket_file is None:
3556 if self._docket_file is None:
3556 if not self._inline:
3557 if not self._inline:
3557 res.append(self._datafile)
3558 res.append(self._datafile)
3558 else:
3559 else:
3559 res.append(self._docket_file)
3560 res.append(self._docket_file)
3560 res.extend(self._docket.old_index_filepaths(include_empty=False))
3561 res.extend(self._docket.old_index_filepaths(include_empty=False))
3561 if self._docket.data_end:
3562 if self._docket.data_end:
3562 res.append(self._datafile)
3563 res.append(self._datafile)
3563 res.extend(self._docket.old_data_filepaths(include_empty=False))
3564 res.extend(self._docket.old_data_filepaths(include_empty=False))
3564 if self._docket.sidedata_end:
3565 if self._docket.sidedata_end:
3565 res.append(self._sidedatafile)
3566 res.append(self._sidedatafile)
3566 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3567 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3567 return res
3568 return res
3568
3569
3569 def emitrevisions(
3570 def emitrevisions(
3570 self,
3571 self,
3571 nodes,
3572 nodes,
3572 nodesorder=None,
3573 nodesorder=None,
3573 revisiondata=False,
3574 revisiondata=False,
3574 assumehaveparentrevisions=False,
3575 assumehaveparentrevisions=False,
3575 deltamode=repository.CG_DELTAMODE_STD,
3576 deltamode=repository.CG_DELTAMODE_STD,
3576 sidedata_helpers=None,
3577 sidedata_helpers=None,
3577 debug_info=None,
3578 debug_info=None,
3578 ):
3579 ):
3579 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3580 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3580 raise error.ProgrammingError(
3581 raise error.ProgrammingError(
3581 b'unhandled value for nodesorder: %s' % nodesorder
3582 b'unhandled value for nodesorder: %s' % nodesorder
3582 )
3583 )
3583
3584
3584 if nodesorder is None and not self.delta_config.general_delta:
3585 if nodesorder is None and not self.delta_config.general_delta:
3585 nodesorder = b'storage'
3586 nodesorder = b'storage'
3586
3587
3587 if (
3588 if (
3588 not self._storedeltachains
3589 not self._storedeltachains
3589 and deltamode != repository.CG_DELTAMODE_PREV
3590 and deltamode != repository.CG_DELTAMODE_PREV
3590 ):
3591 ):
3591 deltamode = repository.CG_DELTAMODE_FULL
3592 deltamode = repository.CG_DELTAMODE_FULL
3592
3593
3593 return storageutil.emitrevisions(
3594 return storageutil.emitrevisions(
3594 self,
3595 self,
3595 nodes,
3596 nodes,
3596 nodesorder,
3597 nodesorder,
3597 revlogrevisiondelta,
3598 revlogrevisiondelta,
3598 deltaparentfn=self.deltaparent,
3599 deltaparentfn=self.deltaparent,
3599 candeltafn=self._candelta,
3600 candeltafn=self._candelta,
3600 rawsizefn=self.rawsize,
3601 rawsizefn=self.rawsize,
3601 revdifffn=self.revdiff,
3602 revdifffn=self.revdiff,
3602 flagsfn=self.flags,
3603 flagsfn=self.flags,
3603 deltamode=deltamode,
3604 deltamode=deltamode,
3604 revisiondata=revisiondata,
3605 revisiondata=revisiondata,
3605 assumehaveparentrevisions=assumehaveparentrevisions,
3606 assumehaveparentrevisions=assumehaveparentrevisions,
3606 sidedata_helpers=sidedata_helpers,
3607 sidedata_helpers=sidedata_helpers,
3607 debug_info=debug_info,
3608 debug_info=debug_info,
3608 )
3609 )
3609
3610
3610 DELTAREUSEALWAYS = b'always'
3611 DELTAREUSEALWAYS = b'always'
3611 DELTAREUSESAMEREVS = b'samerevs'
3612 DELTAREUSESAMEREVS = b'samerevs'
3612 DELTAREUSENEVER = b'never'
3613 DELTAREUSENEVER = b'never'
3613
3614
3614 DELTAREUSEFULLADD = b'fulladd'
3615 DELTAREUSEFULLADD = b'fulladd'
3615
3616
3616 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3617 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3617
3618
3618 def clone(
3619 def clone(
3619 self,
3620 self,
3620 tr,
3621 tr,
3621 destrevlog,
3622 destrevlog,
3622 addrevisioncb=None,
3623 addrevisioncb=None,
3623 deltareuse=DELTAREUSESAMEREVS,
3624 deltareuse=DELTAREUSESAMEREVS,
3624 forcedeltabothparents=None,
3625 forcedeltabothparents=None,
3625 sidedata_helpers=None,
3626 sidedata_helpers=None,
3626 ):
3627 ):
3627 """Copy this revlog to another, possibly with format changes.
3628 """Copy this revlog to another, possibly with format changes.
3628
3629
3629 The destination revlog will contain the same revisions and nodes.
3630 The destination revlog will contain the same revisions and nodes.
3630 However, it may not be bit-for-bit identical due to e.g. delta encoding
3631 However, it may not be bit-for-bit identical due to e.g. delta encoding
3631 differences.
3632 differences.
3632
3633
3633 The ``deltareuse`` argument control how deltas from the existing revlog
3634 The ``deltareuse`` argument control how deltas from the existing revlog
3634 are preserved in the destination revlog. The argument can have the
3635 are preserved in the destination revlog. The argument can have the
3635 following values:
3636 following values:
3636
3637
3637 DELTAREUSEALWAYS
3638 DELTAREUSEALWAYS
3638 Deltas will always be reused (if possible), even if the destination
3639 Deltas will always be reused (if possible), even if the destination
3639 revlog would not select the same revisions for the delta. This is the
3640 revlog would not select the same revisions for the delta. This is the
3640 fastest mode of operation.
3641 fastest mode of operation.
3641 DELTAREUSESAMEREVS
3642 DELTAREUSESAMEREVS
3642 Deltas will be reused if the destination revlog would pick the same
3643 Deltas will be reused if the destination revlog would pick the same
3643 revisions for the delta. This mode strikes a balance between speed
3644 revisions for the delta. This mode strikes a balance between speed
3644 and optimization.
3645 and optimization.
3645 DELTAREUSENEVER
3646 DELTAREUSENEVER
3646 Deltas will never be reused. This is the slowest mode of execution.
3647 Deltas will never be reused. This is the slowest mode of execution.
3647 This mode can be used to recompute deltas (e.g. if the diff/delta
3648 This mode can be used to recompute deltas (e.g. if the diff/delta
3648 algorithm changes).
3649 algorithm changes).
3649 DELTAREUSEFULLADD
3650 DELTAREUSEFULLADD
3650 Revision will be re-added as if their were new content. This is
3651 Revision will be re-added as if their were new content. This is
3651 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3652 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3652 eg: large file detection and handling.
3653 eg: large file detection and handling.
3653
3654
3654 Delta computation can be slow, so the choice of delta reuse policy can
3655 Delta computation can be slow, so the choice of delta reuse policy can
3655 significantly affect run time.
3656 significantly affect run time.
3656
3657
3657 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3658 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3658 two extremes. Deltas will be reused if they are appropriate. But if the
3659 two extremes. Deltas will be reused if they are appropriate. But if the
3659 delta could choose a better revision, it will do so. This means if you
3660 delta could choose a better revision, it will do so. This means if you
3660 are converting a non-generaldelta revlog to a generaldelta revlog,
3661 are converting a non-generaldelta revlog to a generaldelta revlog,
3661 deltas will be recomputed if the delta's parent isn't a parent of the
3662 deltas will be recomputed if the delta's parent isn't a parent of the
3662 revision.
3663 revision.
3663
3664
3664 In addition to the delta policy, the ``forcedeltabothparents``
3665 In addition to the delta policy, the ``forcedeltabothparents``
3665 argument controls whether to force compute deltas against both parents
3666 argument controls whether to force compute deltas against both parents
3666 for merges. By default, the current default is used.
3667 for merges. By default, the current default is used.
3667
3668
3668 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3669 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3669 `sidedata_helpers`.
3670 `sidedata_helpers`.
3670 """
3671 """
3671 if deltareuse not in self.DELTAREUSEALL:
3672 if deltareuse not in self.DELTAREUSEALL:
3672 raise ValueError(
3673 raise ValueError(
3673 _(b'value for deltareuse invalid: %s') % deltareuse
3674 _(b'value for deltareuse invalid: %s') % deltareuse
3674 )
3675 )
3675
3676
3676 if len(destrevlog):
3677 if len(destrevlog):
3677 raise ValueError(_(b'destination revlog is not empty'))
3678 raise ValueError(_(b'destination revlog is not empty'))
3678
3679
3679 if getattr(self, 'filteredrevs', None):
3680 if getattr(self, 'filteredrevs', None):
3680 raise ValueError(_(b'source revlog has filtered revisions'))
3681 raise ValueError(_(b'source revlog has filtered revisions'))
3681 if getattr(destrevlog, 'filteredrevs', None):
3682 if getattr(destrevlog, 'filteredrevs', None):
3682 raise ValueError(_(b'destination revlog has filtered revisions'))
3683 raise ValueError(_(b'destination revlog has filtered revisions'))
3683
3684
3684 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3685 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3685 # if possible.
3686 # if possible.
3686 old_delta_config = destrevlog.delta_config
3687 old_delta_config = destrevlog.delta_config
3687 destrevlog.delta_config = destrevlog.delta_config.copy()
3688 destrevlog.delta_config = destrevlog.delta_config.copy()
3688
3689
3689 try:
3690 try:
3690 if deltareuse == self.DELTAREUSEALWAYS:
3691 if deltareuse == self.DELTAREUSEALWAYS:
3691 destrevlog.delta_config.lazy_delta_base = True
3692 destrevlog.delta_config.lazy_delta_base = True
3692 destrevlog.delta_config.lazy_delta = True
3693 destrevlog.delta_config.lazy_delta = True
3693 elif deltareuse == self.DELTAREUSESAMEREVS:
3694 elif deltareuse == self.DELTAREUSESAMEREVS:
3694 destrevlog.delta_config.lazy_delta_base = False
3695 destrevlog.delta_config.lazy_delta_base = False
3695 destrevlog.delta_config.lazy_delta = True
3696 destrevlog.delta_config.lazy_delta = True
3696 elif deltareuse == self.DELTAREUSENEVER:
3697 elif deltareuse == self.DELTAREUSENEVER:
3697 destrevlog.delta_config.lazy_delta_base = False
3698 destrevlog.delta_config.lazy_delta_base = False
3698 destrevlog.delta_config.lazy_delta = False
3699 destrevlog.delta_config.lazy_delta = False
3699
3700
3700 delta_both_parents = (
3701 delta_both_parents = (
3701 forcedeltabothparents or old_delta_config.delta_both_parents
3702 forcedeltabothparents or old_delta_config.delta_both_parents
3702 )
3703 )
3703 destrevlog.delta_config.delta_both_parents = delta_both_parents
3704 destrevlog.delta_config.delta_both_parents = delta_both_parents
3704
3705
3705 with self.reading(), destrevlog._writing(tr):
3706 with self.reading(), destrevlog._writing(tr):
3706 self._clone(
3707 self._clone(
3707 tr,
3708 tr,
3708 destrevlog,
3709 destrevlog,
3709 addrevisioncb,
3710 addrevisioncb,
3710 deltareuse,
3711 deltareuse,
3711 forcedeltabothparents,
3712 forcedeltabothparents,
3712 sidedata_helpers,
3713 sidedata_helpers,
3713 )
3714 )
3714
3715
3715 finally:
3716 finally:
3716 destrevlog.delta_config = old_delta_config
3717 destrevlog.delta_config = old_delta_config
3717
3718
3718 def _clone(
3719 def _clone(
3719 self,
3720 self,
3720 tr,
3721 tr,
3721 destrevlog,
3722 destrevlog,
3722 addrevisioncb,
3723 addrevisioncb,
3723 deltareuse,
3724 deltareuse,
3724 forcedeltabothparents,
3725 forcedeltabothparents,
3725 sidedata_helpers,
3726 sidedata_helpers,
3726 ):
3727 ):
3727 """perform the core duty of `revlog.clone` after parameter processing"""
3728 """perform the core duty of `revlog.clone` after parameter processing"""
3728 write_debug = None
3729 write_debug = None
3729 if self.delta_config.debug_delta:
3730 if self.delta_config.debug_delta:
3730 write_debug = tr._report
3731 write_debug = tr._report
3731 deltacomputer = deltautil.deltacomputer(
3732 deltacomputer = deltautil.deltacomputer(
3732 destrevlog,
3733 destrevlog,
3733 write_debug=write_debug,
3734 write_debug=write_debug,
3734 )
3735 )
3735 index = self.index
3736 index = self.index
3736 for rev in self:
3737 for rev in self:
3737 entry = index[rev]
3738 entry = index[rev]
3738
3739
3739 # Some classes override linkrev to take filtered revs into
3740 # Some classes override linkrev to take filtered revs into
3740 # account. Use raw entry from index.
3741 # account. Use raw entry from index.
3741 flags = entry[0] & 0xFFFF
3742 flags = entry[0] & 0xFFFF
3742 linkrev = entry[4]
3743 linkrev = entry[4]
3743 p1 = index[entry[5]][7]
3744 p1 = index[entry[5]][7]
3744 p2 = index[entry[6]][7]
3745 p2 = index[entry[6]][7]
3745 node = entry[7]
3746 node = entry[7]
3746
3747
3747 # (Possibly) reuse the delta from the revlog if allowed and
3748 # (Possibly) reuse the delta from the revlog if allowed and
3748 # the revlog chunk is a delta.
3749 # the revlog chunk is a delta.
3749 cachedelta = None
3750 cachedelta = None
3750 rawtext = None
3751 rawtext = None
3751 if deltareuse == self.DELTAREUSEFULLADD:
3752 if deltareuse == self.DELTAREUSEFULLADD:
3752 text = self._revisiondata(rev)
3753 text = self._revisiondata(rev)
3753 sidedata = self.sidedata(rev)
3754 sidedata = self.sidedata(rev)
3754
3755
3755 if sidedata_helpers is not None:
3756 if sidedata_helpers is not None:
3756 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3757 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3757 self, sidedata_helpers, sidedata, rev
3758 self, sidedata_helpers, sidedata, rev
3758 )
3759 )
3759 flags = flags | new_flags[0] & ~new_flags[1]
3760 flags = flags | new_flags[0] & ~new_flags[1]
3760
3761
3761 destrevlog.addrevision(
3762 destrevlog.addrevision(
3762 text,
3763 text,
3763 tr,
3764 tr,
3764 linkrev,
3765 linkrev,
3765 p1,
3766 p1,
3766 p2,
3767 p2,
3767 cachedelta=cachedelta,
3768 cachedelta=cachedelta,
3768 node=node,
3769 node=node,
3769 flags=flags,
3770 flags=flags,
3770 deltacomputer=deltacomputer,
3771 deltacomputer=deltacomputer,
3771 sidedata=sidedata,
3772 sidedata=sidedata,
3772 )
3773 )
3773 else:
3774 else:
3774 if destrevlog.delta_config.lazy_delta:
3775 if destrevlog.delta_config.lazy_delta:
3775 dp = self.deltaparent(rev)
3776 dp = self.deltaparent(rev)
3776 if dp != nullrev:
3777 if dp != nullrev:
3777 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3778 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3778
3779
3779 sidedata = None
3780 sidedata = None
3780 if not cachedelta:
3781 if not cachedelta:
3781 try:
3782 try:
3782 rawtext = self._revisiondata(rev)
3783 rawtext = self._revisiondata(rev)
3783 except error.CensoredNodeError as censored:
3784 except error.CensoredNodeError as censored:
3784 assert flags & REVIDX_ISCENSORED
3785 assert flags & REVIDX_ISCENSORED
3785 rawtext = censored.tombstone
3786 rawtext = censored.tombstone
3786 sidedata = self.sidedata(rev)
3787 sidedata = self.sidedata(rev)
3787 if sidedata is None:
3788 if sidedata is None:
3788 sidedata = self.sidedata(rev)
3789 sidedata = self.sidedata(rev)
3789
3790
3790 if sidedata_helpers is not None:
3791 if sidedata_helpers is not None:
3791 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3792 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3792 self, sidedata_helpers, sidedata, rev
3793 self, sidedata_helpers, sidedata, rev
3793 )
3794 )
3794 flags = flags | new_flags[0] & ~new_flags[1]
3795 flags = flags | new_flags[0] & ~new_flags[1]
3795
3796
3796 destrevlog._addrevision(
3797 destrevlog._addrevision(
3797 node,
3798 node,
3798 rawtext,
3799 rawtext,
3799 tr,
3800 tr,
3800 linkrev,
3801 linkrev,
3801 p1,
3802 p1,
3802 p2,
3803 p2,
3803 flags,
3804 flags,
3804 cachedelta,
3805 cachedelta,
3805 deltacomputer=deltacomputer,
3806 deltacomputer=deltacomputer,
3806 sidedata=sidedata,
3807 sidedata=sidedata,
3807 )
3808 )
3808
3809
3809 if addrevisioncb:
3810 if addrevisioncb:
3810 addrevisioncb(self, rev, node)
3811 addrevisioncb(self, rev, node)
3811
3812
3812 def censorrevision(self, tr, censornode, tombstone=b''):
3813 def censorrevision(self, tr, censornode, tombstone=b''):
3813 if self._format_version == REVLOGV0:
3814 if self._format_version == REVLOGV0:
3814 raise error.RevlogError(
3815 raise error.RevlogError(
3815 _(b'cannot censor with version %d revlogs')
3816 _(b'cannot censor with version %d revlogs')
3816 % self._format_version
3817 % self._format_version
3817 )
3818 )
3818 elif self._format_version == REVLOGV1:
3819 elif self._format_version == REVLOGV1:
3819 rewrite.v1_censor(self, tr, censornode, tombstone)
3820 rewrite.v1_censor(self, tr, censornode, tombstone)
3820 else:
3821 else:
3821 rewrite.v2_censor(self, tr, censornode, tombstone)
3822 rewrite.v2_censor(self, tr, censornode, tombstone)
3822
3823
3823 def verifyintegrity(self, state):
3824 def verifyintegrity(self, state):
3824 """Verifies the integrity of the revlog.
3825 """Verifies the integrity of the revlog.
3825
3826
3826 Yields ``revlogproblem`` instances describing problems that are
3827 Yields ``revlogproblem`` instances describing problems that are
3827 found.
3828 found.
3828 """
3829 """
3829 dd, di = self.checksize()
3830 dd, di = self.checksize()
3830 if dd:
3831 if dd:
3831 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3832 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3832 if di:
3833 if di:
3833 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3834 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3834
3835
3835 version = self._format_version
3836 version = self._format_version
3836
3837
3837 # The verifier tells us what version revlog we should be.
3838 # The verifier tells us what version revlog we should be.
3838 if version != state[b'expectedversion']:
3839 if version != state[b'expectedversion']:
3839 yield revlogproblem(
3840 yield revlogproblem(
3840 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3841 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3841 % (self.display_id, version, state[b'expectedversion'])
3842 % (self.display_id, version, state[b'expectedversion'])
3842 )
3843 )
3843
3844
3844 state[b'skipread'] = set()
3845 state[b'skipread'] = set()
3845 state[b'safe_renamed'] = set()
3846 state[b'safe_renamed'] = set()
3846
3847
3847 for rev in self:
3848 for rev in self:
3848 node = self.node(rev)
3849 node = self.node(rev)
3849
3850
3850 # Verify contents. 4 cases to care about:
3851 # Verify contents. 4 cases to care about:
3851 #
3852 #
3852 # common: the most common case
3853 # common: the most common case
3853 # rename: with a rename
3854 # rename: with a rename
3854 # meta: file content starts with b'\1\n', the metadata
3855 # meta: file content starts with b'\1\n', the metadata
3855 # header defined in filelog.py, but without a rename
3856 # header defined in filelog.py, but without a rename
3856 # ext: content stored externally
3857 # ext: content stored externally
3857 #
3858 #
3858 # More formally, their differences are shown below:
3859 # More formally, their differences are shown below:
3859 #
3860 #
3860 # | common | rename | meta | ext
3861 # | common | rename | meta | ext
3861 # -------------------------------------------------------
3862 # -------------------------------------------------------
3862 # flags() | 0 | 0 | 0 | not 0
3863 # flags() | 0 | 0 | 0 | not 0
3863 # renamed() | False | True | False | ?
3864 # renamed() | False | True | False | ?
3864 # rawtext[0:2]=='\1\n'| False | True | True | ?
3865 # rawtext[0:2]=='\1\n'| False | True | True | ?
3865 #
3866 #
3866 # "rawtext" means the raw text stored in revlog data, which
3867 # "rawtext" means the raw text stored in revlog data, which
3867 # could be retrieved by "rawdata(rev)". "text"
3868 # could be retrieved by "rawdata(rev)". "text"
3868 # mentioned below is "revision(rev)".
3869 # mentioned below is "revision(rev)".
3869 #
3870 #
3870 # There are 3 different lengths stored physically:
3871 # There are 3 different lengths stored physically:
3871 # 1. L1: rawsize, stored in revlog index
3872 # 1. L1: rawsize, stored in revlog index
3872 # 2. L2: len(rawtext), stored in revlog data
3873 # 2. L2: len(rawtext), stored in revlog data
3873 # 3. L3: len(text), stored in revlog data if flags==0, or
3874 # 3. L3: len(text), stored in revlog data if flags==0, or
3874 # possibly somewhere else if flags!=0
3875 # possibly somewhere else if flags!=0
3875 #
3876 #
3876 # L1 should be equal to L2. L3 could be different from them.
3877 # L1 should be equal to L2. L3 could be different from them.
3877 # "text" may or may not affect commit hash depending on flag
3878 # "text" may or may not affect commit hash depending on flag
3878 # processors (see flagutil.addflagprocessor).
3879 # processors (see flagutil.addflagprocessor).
3879 #
3880 #
3880 # | common | rename | meta | ext
3881 # | common | rename | meta | ext
3881 # -------------------------------------------------
3882 # -------------------------------------------------
3882 # rawsize() | L1 | L1 | L1 | L1
3883 # rawsize() | L1 | L1 | L1 | L1
3883 # size() | L1 | L2-LM | L1(*) | L1 (?)
3884 # size() | L1 | L2-LM | L1(*) | L1 (?)
3884 # len(rawtext) | L2 | L2 | L2 | L2
3885 # len(rawtext) | L2 | L2 | L2 | L2
3885 # len(text) | L2 | L2 | L2 | L3
3886 # len(text) | L2 | L2 | L2 | L3
3886 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3887 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3887 #
3888 #
3888 # LM: length of metadata, depending on rawtext
3889 # LM: length of metadata, depending on rawtext
3889 # (*): not ideal, see comment in filelog.size
3890 # (*): not ideal, see comment in filelog.size
3890 # (?): could be "- len(meta)" if the resolved content has
3891 # (?): could be "- len(meta)" if the resolved content has
3891 # rename metadata
3892 # rename metadata
3892 #
3893 #
3893 # Checks needed to be done:
3894 # Checks needed to be done:
3894 # 1. length check: L1 == L2, in all cases.
3895 # 1. length check: L1 == L2, in all cases.
3895 # 2. hash check: depending on flag processor, we may need to
3896 # 2. hash check: depending on flag processor, we may need to
3896 # use either "text" (external), or "rawtext" (in revlog).
3897 # use either "text" (external), or "rawtext" (in revlog).
3897
3898
3898 try:
3899 try:
3899 skipflags = state.get(b'skipflags', 0)
3900 skipflags = state.get(b'skipflags', 0)
3900 if skipflags:
3901 if skipflags:
3901 skipflags &= self.flags(rev)
3902 skipflags &= self.flags(rev)
3902
3903
3903 _verify_revision(self, skipflags, state, node)
3904 _verify_revision(self, skipflags, state, node)
3904
3905
3905 l1 = self.rawsize(rev)
3906 l1 = self.rawsize(rev)
3906 l2 = len(self.rawdata(node))
3907 l2 = len(self.rawdata(node))
3907
3908
3908 if l1 != l2:
3909 if l1 != l2:
3909 yield revlogproblem(
3910 yield revlogproblem(
3910 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3911 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3911 node=node,
3912 node=node,
3912 )
3913 )
3913
3914
3914 except error.CensoredNodeError:
3915 except error.CensoredNodeError:
3915 if state[b'erroroncensored']:
3916 if state[b'erroroncensored']:
3916 yield revlogproblem(
3917 yield revlogproblem(
3917 error=_(b'censored file data'), node=node
3918 error=_(b'censored file data'), node=node
3918 )
3919 )
3919 state[b'skipread'].add(node)
3920 state[b'skipread'].add(node)
3920 except Exception as e:
3921 except Exception as e:
3921 yield revlogproblem(
3922 yield revlogproblem(
3922 error=_(b'unpacking %s: %s')
3923 error=_(b'unpacking %s: %s')
3923 % (short(node), stringutil.forcebytestr(e)),
3924 % (short(node), stringutil.forcebytestr(e)),
3924 node=node,
3925 node=node,
3925 )
3926 )
3926 state[b'skipread'].add(node)
3927 state[b'skipread'].add(node)
3927
3928
3928 def storageinfo(
3929 def storageinfo(
3929 self,
3930 self,
3930 exclusivefiles=False,
3931 exclusivefiles=False,
3931 sharedfiles=False,
3932 sharedfiles=False,
3932 revisionscount=False,
3933 revisionscount=False,
3933 trackedsize=False,
3934 trackedsize=False,
3934 storedsize=False,
3935 storedsize=False,
3935 ):
3936 ):
3936 d = {}
3937 d = {}
3937
3938
3938 if exclusivefiles:
3939 if exclusivefiles:
3939 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3940 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3940 if not self._inline:
3941 if not self._inline:
3941 d[b'exclusivefiles'].append((self.opener, self._datafile))
3942 d[b'exclusivefiles'].append((self.opener, self._datafile))
3942
3943
3943 if sharedfiles:
3944 if sharedfiles:
3944 d[b'sharedfiles'] = []
3945 d[b'sharedfiles'] = []
3945
3946
3946 if revisionscount:
3947 if revisionscount:
3947 d[b'revisionscount'] = len(self)
3948 d[b'revisionscount'] = len(self)
3948
3949
3949 if trackedsize:
3950 if trackedsize:
3950 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3951 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3951
3952
3952 if storedsize:
3953 if storedsize:
3953 d[b'storedsize'] = sum(
3954 d[b'storedsize'] = sum(
3954 self.opener.stat(path).st_size for path in self.files()
3955 self.opener.stat(path).st_size for path in self.files()
3955 )
3956 )
3956
3957
3957 return d
3958 return d
3958
3959
3959 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3960 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3960 if not self.feature_config.has_side_data:
3961 if not self.feature_config.has_side_data:
3961 return
3962 return
3962 # revlog formats with sidedata support does not support inline
3963 # revlog formats with sidedata support does not support inline
3963 assert not self._inline
3964 assert not self._inline
3964 if not helpers[1] and not helpers[2]:
3965 if not helpers[1] and not helpers[2]:
3965 # Nothing to generate or remove
3966 # Nothing to generate or remove
3966 return
3967 return
3967
3968
3968 new_entries = []
3969 new_entries = []
3969 # append the new sidedata
3970 # append the new sidedata
3970 with self._writing(transaction):
3971 with self._writing(transaction):
3971 ifh, dfh, sdfh = self._inner._writinghandles
3972 ifh, dfh, sdfh = self._inner._writinghandles
3972 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3973 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3973
3974
3974 current_offset = sdfh.tell()
3975 current_offset = sdfh.tell()
3975 for rev in range(startrev, endrev + 1):
3976 for rev in range(startrev, endrev + 1):
3976 entry = self.index[rev]
3977 entry = self.index[rev]
3977 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3978 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3978 store=self,
3979 store=self,
3979 sidedata_helpers=helpers,
3980 sidedata_helpers=helpers,
3980 sidedata={},
3981 sidedata={},
3981 rev=rev,
3982 rev=rev,
3982 )
3983 )
3983
3984
3984 serialized_sidedata = sidedatautil.serialize_sidedata(
3985 serialized_sidedata = sidedatautil.serialize_sidedata(
3985 new_sidedata
3986 new_sidedata
3986 )
3987 )
3987
3988
3988 sidedata_compression_mode = COMP_MODE_INLINE
3989 sidedata_compression_mode = COMP_MODE_INLINE
3989 if serialized_sidedata and self.feature_config.has_side_data:
3990 if serialized_sidedata and self.feature_config.has_side_data:
3990 sidedata_compression_mode = COMP_MODE_PLAIN
3991 sidedata_compression_mode = COMP_MODE_PLAIN
3991 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3992 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3992 if (
3993 if (
3993 h != b'u'
3994 h != b'u'
3994 and comp_sidedata[0] != b'\0'
3995 and comp_sidedata[0] != b'\0'
3995 and len(comp_sidedata) < len(serialized_sidedata)
3996 and len(comp_sidedata) < len(serialized_sidedata)
3996 ):
3997 ):
3997 assert not h
3998 assert not h
3998 if (
3999 if (
3999 comp_sidedata[0]
4000 comp_sidedata[0]
4000 == self._docket.default_compression_header
4001 == self._docket.default_compression_header
4001 ):
4002 ):
4002 sidedata_compression_mode = COMP_MODE_DEFAULT
4003 sidedata_compression_mode = COMP_MODE_DEFAULT
4003 serialized_sidedata = comp_sidedata
4004 serialized_sidedata = comp_sidedata
4004 else:
4005 else:
4005 sidedata_compression_mode = COMP_MODE_INLINE
4006 sidedata_compression_mode = COMP_MODE_INLINE
4006 serialized_sidedata = comp_sidedata
4007 serialized_sidedata = comp_sidedata
4007 if entry[8] != 0 or entry[9] != 0:
4008 if entry[8] != 0 or entry[9] != 0:
4008 # rewriting entries that already have sidedata is not
4009 # rewriting entries that already have sidedata is not
4009 # supported yet, because it introduces garbage data in the
4010 # supported yet, because it introduces garbage data in the
4010 # revlog.
4011 # revlog.
4011 msg = b"rewriting existing sidedata is not supported yet"
4012 msg = b"rewriting existing sidedata is not supported yet"
4012 raise error.Abort(msg)
4013 raise error.Abort(msg)
4013
4014
4014 # Apply (potential) flags to add and to remove after running
4015 # Apply (potential) flags to add and to remove after running
4015 # the sidedata helpers
4016 # the sidedata helpers
4016 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4017 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4017 entry_update = (
4018 entry_update = (
4018 current_offset,
4019 current_offset,
4019 len(serialized_sidedata),
4020 len(serialized_sidedata),
4020 new_offset_flags,
4021 new_offset_flags,
4021 sidedata_compression_mode,
4022 sidedata_compression_mode,
4022 )
4023 )
4023
4024
4024 # the sidedata computation might have move the file cursors around
4025 # the sidedata computation might have move the file cursors around
4025 sdfh.seek(current_offset, os.SEEK_SET)
4026 sdfh.seek(current_offset, os.SEEK_SET)
4026 sdfh.write(serialized_sidedata)
4027 sdfh.write(serialized_sidedata)
4027 new_entries.append(entry_update)
4028 new_entries.append(entry_update)
4028 current_offset += len(serialized_sidedata)
4029 current_offset += len(serialized_sidedata)
4029 self._docket.sidedata_end = sdfh.tell()
4030 self._docket.sidedata_end = sdfh.tell()
4030
4031
4031 # rewrite the new index entries
4032 # rewrite the new index entries
4032 ifh.seek(startrev * self.index.entry_size)
4033 ifh.seek(startrev * self.index.entry_size)
4033 for i, e in enumerate(new_entries):
4034 for i, e in enumerate(new_entries):
4034 rev = startrev + i
4035 rev = startrev + i
4035 self.index.replace_sidedata_info(rev, *e)
4036 self.index.replace_sidedata_info(rev, *e)
4036 packed = self.index.entry_binary(rev)
4037 packed = self.index.entry_binary(rev)
4037 if rev == 0 and self._docket is None:
4038 if rev == 0 and self._docket is None:
4038 header = self._format_flags | self._format_version
4039 header = self._format_flags | self._format_version
4039 header = self.index.pack_header(header)
4040 header = self.index.pack_header(header)
4040 packed = header + packed
4041 packed = header + packed
4041 ifh.write(packed)
4042 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now