##// END OF EJS Templates
revlog: move the `rawtext` method on the inner object...
marmoute -
r51990:be56d322 default
parent child Browse files
Show More
@@ -1,3974 +1,3985 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of inline data embedded into a revlog
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class _Config:
244 class _Config:
245 def copy(self):
245 def copy(self):
246 return self.__class__(**self.__dict__)
246 return self.__class__(**self.__dict__)
247
247
248
248
249 @attr.s()
249 @attr.s()
250 class FeatureConfig(_Config):
250 class FeatureConfig(_Config):
251 """Hold configuration values about the available revlog features"""
251 """Hold configuration values about the available revlog features"""
252
252
253 # the default compression engine
253 # the default compression engine
254 compression_engine = attr.ib(default=b'zlib')
254 compression_engine = attr.ib(default=b'zlib')
255 # compression engines options
255 # compression engines options
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257
257
258 # can we use censor on this revlog
258 # can we use censor on this revlog
259 censorable = attr.ib(default=False)
259 censorable = attr.ib(default=False)
260 # does this revlog use the "side data" feature
260 # does this revlog use the "side data" feature
261 has_side_data = attr.ib(default=False)
261 has_side_data = attr.ib(default=False)
262 # might remove rank configuration once the computation has no impact
262 # might remove rank configuration once the computation has no impact
263 compute_rank = attr.ib(default=False)
263 compute_rank = attr.ib(default=False)
264 # parent order is supposed to be semantically irrelevant, so we
264 # parent order is supposed to be semantically irrelevant, so we
265 # normally resort parents to ensure that the first parent is non-null,
265 # normally resort parents to ensure that the first parent is non-null,
266 # if there is a non-null parent at all.
266 # if there is a non-null parent at all.
267 # filelog abuses the parent order as flag to mark some instances of
267 # filelog abuses the parent order as flag to mark some instances of
268 # meta-encoded files, so allow it to disable this behavior.
268 # meta-encoded files, so allow it to disable this behavior.
269 canonical_parent_order = attr.ib(default=False)
269 canonical_parent_order = attr.ib(default=False)
270 # can ellipsis commit be used
270 # can ellipsis commit be used
271 enable_ellipsis = attr.ib(default=False)
271 enable_ellipsis = attr.ib(default=False)
272
272
273 def copy(self):
273 def copy(self):
274 new = super().copy()
274 new = super().copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
276 return new
276 return new
277
277
278
278
279 @attr.s()
279 @attr.s()
280 class DataConfig(_Config):
280 class DataConfig(_Config):
281 """Hold configuration value about how the revlog data are read"""
281 """Hold configuration value about how the revlog data are read"""
282
282
283 # should we try to open the "pending" version of the revlog
283 # should we try to open the "pending" version of the revlog
284 try_pending = attr.ib(default=False)
284 try_pending = attr.ib(default=False)
285 # should we try to open the "splitted" version of the revlog
285 # should we try to open the "splitted" version of the revlog
286 try_split = attr.ib(default=False)
286 try_split = attr.ib(default=False)
287 # When True, indexfile should be opened with checkambig=True at writing,
287 # When True, indexfile should be opened with checkambig=True at writing,
288 # to avoid file stat ambiguity.
288 # to avoid file stat ambiguity.
289 check_ambig = attr.ib(default=False)
289 check_ambig = attr.ib(default=False)
290
290
291 # If true, use mmap instead of reading to deal with large index
291 # If true, use mmap instead of reading to deal with large index
292 mmap_large_index = attr.ib(default=False)
292 mmap_large_index = attr.ib(default=False)
293 # how much data is large
293 # how much data is large
294 mmap_index_threshold = attr.ib(default=None)
294 mmap_index_threshold = attr.ib(default=None)
295 # How much data to read and cache into the raw revlog data cache.
295 # How much data to read and cache into the raw revlog data cache.
296 chunk_cache_size = attr.ib(default=65536)
296 chunk_cache_size = attr.ib(default=65536)
297
297
298 # Allow sparse reading of the revlog data
298 # Allow sparse reading of the revlog data
299 with_sparse_read = attr.ib(default=False)
299 with_sparse_read = attr.ib(default=False)
300 # minimal density of a sparse read chunk
300 # minimal density of a sparse read chunk
301 sr_density_threshold = attr.ib(default=0.50)
301 sr_density_threshold = attr.ib(default=0.50)
302 # minimal size of data we skip when performing sparse read
302 # minimal size of data we skip when performing sparse read
303 sr_min_gap_size = attr.ib(default=262144)
303 sr_min_gap_size = attr.ib(default=262144)
304
304
305 # are delta encoded against arbitrary bases.
305 # are delta encoded against arbitrary bases.
306 generaldelta = attr.ib(default=False)
306 generaldelta = attr.ib(default=False)
307
307
308
308
309 @attr.s()
309 @attr.s()
310 class DeltaConfig(_Config):
310 class DeltaConfig(_Config):
311 """Hold configuration value about how new delta are computed
311 """Hold configuration value about how new delta are computed
312
312
313 Some attributes are duplicated from DataConfig to help havign each object
313 Some attributes are duplicated from DataConfig to help havign each object
314 self contained.
314 self contained.
315 """
315 """
316
316
317 # can delta be encoded against arbitrary bases.
317 # can delta be encoded against arbitrary bases.
318 general_delta = attr.ib(default=False)
318 general_delta = attr.ib(default=False)
319 # Allow sparse writing of the revlog data
319 # Allow sparse writing of the revlog data
320 sparse_revlog = attr.ib(default=False)
320 sparse_revlog = attr.ib(default=False)
321 # maximum length of a delta chain
321 # maximum length of a delta chain
322 max_chain_len = attr.ib(default=None)
322 max_chain_len = attr.ib(default=None)
323 # Maximum distance between delta chain base start and end
323 # Maximum distance between delta chain base start and end
324 max_deltachain_span = attr.ib(default=-1)
324 max_deltachain_span = attr.ib(default=-1)
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 # compression for the data content.
326 # compression for the data content.
327 upper_bound_comp = attr.ib(default=None)
327 upper_bound_comp = attr.ib(default=None)
328 # Should we try a delta against both parent
328 # Should we try a delta against both parent
329 delta_both_parents = attr.ib(default=True)
329 delta_both_parents = attr.ib(default=True)
330 # Test delta base candidate group by chunk of this maximal size.
330 # Test delta base candidate group by chunk of this maximal size.
331 candidate_group_chunk_size = attr.ib(default=0)
331 candidate_group_chunk_size = attr.ib(default=0)
332 # Should we display debug information about delta computation
332 # Should we display debug information about delta computation
333 debug_delta = attr.ib(default=False)
333 debug_delta = attr.ib(default=False)
334 # trust incoming delta by default
334 # trust incoming delta by default
335 lazy_delta = attr.ib(default=True)
335 lazy_delta = attr.ib(default=True)
336 # trust the base of incoming delta by default
336 # trust the base of incoming delta by default
337 lazy_delta_base = attr.ib(default=False)
337 lazy_delta_base = attr.ib(default=False)
338
338
339
339
340 class _InnerRevlog:
340 class _InnerRevlog:
341 """An inner layer of the revlog object
341 """An inner layer of the revlog object
342
342
343 That layer exist to be able to delegate some operation to Rust, its
343 That layer exist to be able to delegate some operation to Rust, its
344 boundaries are arbitrary and based on what we can delegate to Rust.
344 boundaries are arbitrary and based on what we can delegate to Rust.
345 """
345 """
346
346
347 def __init__(
347 def __init__(
348 self,
348 self,
349 opener,
349 opener,
350 index,
350 index,
351 index_file,
351 index_file,
352 data_file,
352 data_file,
353 sidedata_file,
353 sidedata_file,
354 inline,
354 inline,
355 data_config,
355 data_config,
356 delta_config,
356 delta_config,
357 feature_config,
357 feature_config,
358 chunk_cache,
358 chunk_cache,
359 default_compression_header,
359 default_compression_header,
360 ):
360 ):
361 self.opener = opener
361 self.opener = opener
362 self.index = index
362 self.index = index
363
363
364 self.__index_file = index_file
364 self.__index_file = index_file
365 self.data_file = data_file
365 self.data_file = data_file
366 self.sidedata_file = sidedata_file
366 self.sidedata_file = sidedata_file
367 self.inline = inline
367 self.inline = inline
368 self.data_config = data_config
368 self.data_config = data_config
369 self.delta_config = delta_config
369 self.delta_config = delta_config
370 self.feature_config = feature_config
370 self.feature_config = feature_config
371
371
372 self._default_compression_header = default_compression_header
372 self._default_compression_header = default_compression_header
373
373
374 # index
374 # index
375
375
376 # 3-tuple of file handles being used for active writing.
376 # 3-tuple of file handles being used for active writing.
377 self._writinghandles = None
377 self._writinghandles = None
378
378
379 self._segmentfile = randomaccessfile.randomaccessfile(
379 self._segmentfile = randomaccessfile.randomaccessfile(
380 self.opener,
380 self.opener,
381 (self.index_file if self.inline else self.data_file),
381 (self.index_file if self.inline else self.data_file),
382 self.data_config.chunk_cache_size,
382 self.data_config.chunk_cache_size,
383 chunk_cache,
383 chunk_cache,
384 )
384 )
385 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
385 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
386 self.opener,
386 self.opener,
387 self.sidedata_file,
387 self.sidedata_file,
388 self.data_config.chunk_cache_size,
388 self.data_config.chunk_cache_size,
389 )
389 )
390
390
391 # revlog header -> revlog compressor
391 # revlog header -> revlog compressor
392 self._decompressors = {}
392 self._decompressors = {}
393 # 3-tuple of (node, rev, text) for a raw revision.
393 # 3-tuple of (node, rev, text) for a raw revision.
394 self._revisioncache = None
394 self._revisioncache = None
395
395
396 @property
396 @property
397 def index_file(self):
397 def index_file(self):
398 return self.__index_file
398 return self.__index_file
399
399
400 @index_file.setter
400 @index_file.setter
401 def index_file(self, new_index_file):
401 def index_file(self, new_index_file):
402 self.__index_file = new_index_file
402 self.__index_file = new_index_file
403 if self.inline:
403 if self.inline:
404 self._segmentfile.filename = new_index_file
404 self._segmentfile.filename = new_index_file
405
405
406 def __len__(self):
406 def __len__(self):
407 return len(self.index)
407 return len(self.index)
408
408
409 # Derived from index values.
409 # Derived from index values.
410
410
411 def start(self, rev):
411 def start(self, rev):
412 """the offset of the data chunk for this revision"""
412 """the offset of the data chunk for this revision"""
413 return int(self.index[rev][0] >> 16)
413 return int(self.index[rev][0] >> 16)
414
414
415 def length(self, rev):
415 def length(self, rev):
416 """the length of the data chunk for this revision"""
416 """the length of the data chunk for this revision"""
417 return self.index[rev][1]
417 return self.index[rev][1]
418
418
419 def end(self, rev):
419 def end(self, rev):
420 """the end of the data chunk for this revision"""
420 """the end of the data chunk for this revision"""
421 return self.start(rev) + self.length(rev)
421 return self.start(rev) + self.length(rev)
422
422
423 def deltaparent(self, rev):
423 def deltaparent(self, rev):
424 """return deltaparent of the given revision"""
424 """return deltaparent of the given revision"""
425 base = self.index[rev][3]
425 base = self.index[rev][3]
426 if base == rev:
426 if base == rev:
427 return nullrev
427 return nullrev
428 elif self.delta_config.general_delta:
428 elif self.delta_config.general_delta:
429 return base
429 return base
430 else:
430 else:
431 return rev - 1
431 return rev - 1
432
432
433 def issnapshot(self, rev):
433 def issnapshot(self, rev):
434 """tells whether rev is a snapshot"""
434 """tells whether rev is a snapshot"""
435 if not self.delta_config.sparse_revlog:
435 if not self.delta_config.sparse_revlog:
436 return self.deltaparent(rev) == nullrev
436 return self.deltaparent(rev) == nullrev
437 elif hasattr(self.index, 'issnapshot'):
437 elif hasattr(self.index, 'issnapshot'):
438 # directly assign the method to cache the testing and access
438 # directly assign the method to cache the testing and access
439 self.issnapshot = self.index.issnapshot
439 self.issnapshot = self.index.issnapshot
440 return self.issnapshot(rev)
440 return self.issnapshot(rev)
441 if rev == nullrev:
441 if rev == nullrev:
442 return True
442 return True
443 entry = self.index[rev]
443 entry = self.index[rev]
444 base = entry[3]
444 base = entry[3]
445 if base == rev:
445 if base == rev:
446 return True
446 return True
447 if base == nullrev:
447 if base == nullrev:
448 return True
448 return True
449 p1 = entry[5]
449 p1 = entry[5]
450 while self.length(p1) == 0:
450 while self.length(p1) == 0:
451 b = self.deltaparent(p1)
451 b = self.deltaparent(p1)
452 if b == p1:
452 if b == p1:
453 break
453 break
454 p1 = b
454 p1 = b
455 p2 = entry[6]
455 p2 = entry[6]
456 while self.length(p2) == 0:
456 while self.length(p2) == 0:
457 b = self.deltaparent(p2)
457 b = self.deltaparent(p2)
458 if b == p2:
458 if b == p2:
459 break
459 break
460 p2 = b
460 p2 = b
461 if base == p1 or base == p2:
461 if base == p1 or base == p2:
462 return False
462 return False
463 return self.issnapshot(base)
463 return self.issnapshot(base)
464
464
465 def _deltachain(self, rev, stoprev=None):
465 def _deltachain(self, rev, stoprev=None):
466 """Obtain the delta chain for a revision.
466 """Obtain the delta chain for a revision.
467
467
468 ``stoprev`` specifies a revision to stop at. If not specified, we
468 ``stoprev`` specifies a revision to stop at. If not specified, we
469 stop at the base of the chain.
469 stop at the base of the chain.
470
470
471 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
471 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
472 revs in ascending order and ``stopped`` is a bool indicating whether
472 revs in ascending order and ``stopped`` is a bool indicating whether
473 ``stoprev`` was hit.
473 ``stoprev`` was hit.
474 """
474 """
475 generaldelta = self.delta_config.general_delta
475 generaldelta = self.delta_config.general_delta
476 # Try C implementation.
476 # Try C implementation.
477 try:
477 try:
478 return self.index.deltachain(rev, stoprev, generaldelta)
478 return self.index.deltachain(rev, stoprev, generaldelta)
479 except AttributeError:
479 except AttributeError:
480 pass
480 pass
481
481
482 chain = []
482 chain = []
483
483
484 # Alias to prevent attribute lookup in tight loop.
484 # Alias to prevent attribute lookup in tight loop.
485 index = self.index
485 index = self.index
486
486
487 iterrev = rev
487 iterrev = rev
488 e = index[iterrev]
488 e = index[iterrev]
489 while iterrev != e[3] and iterrev != stoprev:
489 while iterrev != e[3] and iterrev != stoprev:
490 chain.append(iterrev)
490 chain.append(iterrev)
491 if generaldelta:
491 if generaldelta:
492 iterrev = e[3]
492 iterrev = e[3]
493 else:
493 else:
494 iterrev -= 1
494 iterrev -= 1
495 e = index[iterrev]
495 e = index[iterrev]
496
496
497 if iterrev == stoprev:
497 if iterrev == stoprev:
498 stopped = True
498 stopped = True
499 else:
499 else:
500 chain.append(iterrev)
500 chain.append(iterrev)
501 stopped = False
501 stopped = False
502
502
503 chain.reverse()
503 chain.reverse()
504 return chain, stopped
504 return chain, stopped
505
505
506 @util.propertycache
506 @util.propertycache
507 def _compressor(self):
507 def _compressor(self):
508 engine = util.compengines[self.feature_config.compression_engine]
508 engine = util.compengines[self.feature_config.compression_engine]
509 return engine.revlogcompressor(
509 return engine.revlogcompressor(
510 self.feature_config.compression_engine_options
510 self.feature_config.compression_engine_options
511 )
511 )
512
512
513 @util.propertycache
513 @util.propertycache
514 def _decompressor(self):
514 def _decompressor(self):
515 """the default decompressor"""
515 """the default decompressor"""
516 if self._default_compression_header is None:
516 if self._default_compression_header is None:
517 return None
517 return None
518 t = self._default_compression_header
518 t = self._default_compression_header
519 c = self._get_decompressor(t)
519 c = self._get_decompressor(t)
520 return c.decompress
520 return c.decompress
521
521
522 def _get_decompressor(self, t):
522 def _get_decompressor(self, t):
523 try:
523 try:
524 compressor = self._decompressors[t]
524 compressor = self._decompressors[t]
525 except KeyError:
525 except KeyError:
526 try:
526 try:
527 engine = util.compengines.forrevlogheader(t)
527 engine = util.compengines.forrevlogheader(t)
528 compressor = engine.revlogcompressor(
528 compressor = engine.revlogcompressor(
529 self.feature_config.compression_engine_options
529 self.feature_config.compression_engine_options
530 )
530 )
531 self._decompressors[t] = compressor
531 self._decompressors[t] = compressor
532 except KeyError:
532 except KeyError:
533 raise error.RevlogError(
533 raise error.RevlogError(
534 _(b'unknown compression type %s') % binascii.hexlify(t)
534 _(b'unknown compression type %s') % binascii.hexlify(t)
535 )
535 )
536 return compressor
536 return compressor
537
537
538 def compress(self, data):
538 def compress(self, data):
539 """Generate a possibly-compressed representation of data."""
539 """Generate a possibly-compressed representation of data."""
540 if not data:
540 if not data:
541 return b'', data
541 return b'', data
542
542
543 compressed = self._compressor.compress(data)
543 compressed = self._compressor.compress(data)
544
544
545 if compressed:
545 if compressed:
546 # The revlog compressor added the header in the returned data.
546 # The revlog compressor added the header in the returned data.
547 return b'', compressed
547 return b'', compressed
548
548
549 if data[0:1] == b'\0':
549 if data[0:1] == b'\0':
550 return b'', data
550 return b'', data
551 return b'u', data
551 return b'u', data
552
552
553 def decompress(self, data):
553 def decompress(self, data):
554 """Decompress a revlog chunk.
554 """Decompress a revlog chunk.
555
555
556 The chunk is expected to begin with a header identifying the
556 The chunk is expected to begin with a header identifying the
557 format type so it can be routed to an appropriate decompressor.
557 format type so it can be routed to an appropriate decompressor.
558 """
558 """
559 if not data:
559 if not data:
560 return data
560 return data
561
561
562 # Revlogs are read much more frequently than they are written and many
562 # Revlogs are read much more frequently than they are written and many
563 # chunks only take microseconds to decompress, so performance is
563 # chunks only take microseconds to decompress, so performance is
564 # important here.
564 # important here.
565 #
565 #
566 # We can make a few assumptions about revlogs:
566 # We can make a few assumptions about revlogs:
567 #
567 #
568 # 1) the majority of chunks will be compressed (as opposed to inline
568 # 1) the majority of chunks will be compressed (as opposed to inline
569 # raw data).
569 # raw data).
570 # 2) decompressing *any* data will likely by at least 10x slower than
570 # 2) decompressing *any* data will likely by at least 10x slower than
571 # returning raw inline data.
571 # returning raw inline data.
572 # 3) we want to prioritize common and officially supported compression
572 # 3) we want to prioritize common and officially supported compression
573 # engines
573 # engines
574 #
574 #
575 # It follows that we want to optimize for "decompress compressed data
575 # It follows that we want to optimize for "decompress compressed data
576 # when encoded with common and officially supported compression engines"
576 # when encoded with common and officially supported compression engines"
577 # case over "raw data" and "data encoded by less common or non-official
577 # case over "raw data" and "data encoded by less common or non-official
578 # compression engines." That is why we have the inline lookup first
578 # compression engines." That is why we have the inline lookup first
579 # followed by the compengines lookup.
579 # followed by the compengines lookup.
580 #
580 #
581 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
581 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
582 # compressed chunks. And this matters for changelog and manifest reads.
582 # compressed chunks. And this matters for changelog and manifest reads.
583 t = data[0:1]
583 t = data[0:1]
584
584
585 if t == b'x':
585 if t == b'x':
586 try:
586 try:
587 return _zlibdecompress(data)
587 return _zlibdecompress(data)
588 except zlib.error as e:
588 except zlib.error as e:
589 raise error.RevlogError(
589 raise error.RevlogError(
590 _(b'revlog decompress error: %s')
590 _(b'revlog decompress error: %s')
591 % stringutil.forcebytestr(e)
591 % stringutil.forcebytestr(e)
592 )
592 )
593 # '\0' is more common than 'u' so it goes first.
593 # '\0' is more common than 'u' so it goes first.
594 elif t == b'\0':
594 elif t == b'\0':
595 return data
595 return data
596 elif t == b'u':
596 elif t == b'u':
597 return util.buffer(data, 1)
597 return util.buffer(data, 1)
598
598
599 compressor = self._get_decompressor(t)
599 compressor = self._get_decompressor(t)
600
600
601 return compressor.decompress(data)
601 return compressor.decompress(data)
602
602
603 @contextlib.contextmanager
603 @contextlib.contextmanager
604 def reading(self):
604 def reading(self):
605 """Context manager that keeps data and sidedata files open for reading"""
605 """Context manager that keeps data and sidedata files open for reading"""
606 if len(self.index) == 0:
606 if len(self.index) == 0:
607 yield # nothing to be read
607 yield # nothing to be read
608 else:
608 else:
609 with self._segmentfile.reading():
609 with self._segmentfile.reading():
610 with self._segmentfile_sidedata.reading():
610 with self._segmentfile_sidedata.reading():
611 yield
611 yield
612
612
613 @property
613 @property
614 def is_writing(self):
614 def is_writing(self):
615 """True is a writing context is open"""
615 """True is a writing context is open"""
616 return self._writinghandles is not None
616 return self._writinghandles is not None
617
617
618 @contextlib.contextmanager
618 @contextlib.contextmanager
619 def writing(self, transaction, data_end=None, sidedata_end=None):
619 def writing(self, transaction, data_end=None, sidedata_end=None):
620 """Open the revlog files for writing
620 """Open the revlog files for writing
621
621
622 Add content to a revlog should be done within such context.
622 Add content to a revlog should be done within such context.
623 """
623 """
624 if self.is_writing:
624 if self.is_writing:
625 yield
625 yield
626 else:
626 else:
627 ifh = dfh = sdfh = None
627 ifh = dfh = sdfh = None
628 try:
628 try:
629 r = len(self.index)
629 r = len(self.index)
630 # opening the data file.
630 # opening the data file.
631 dsize = 0
631 dsize = 0
632 if r:
632 if r:
633 dsize = self.end(r - 1)
633 dsize = self.end(r - 1)
634 dfh = None
634 dfh = None
635 if not self.inline:
635 if not self.inline:
636 try:
636 try:
637 dfh = self.opener(self.data_file, mode=b"r+")
637 dfh = self.opener(self.data_file, mode=b"r+")
638 if data_end is None:
638 if data_end is None:
639 dfh.seek(0, os.SEEK_END)
639 dfh.seek(0, os.SEEK_END)
640 else:
640 else:
641 dfh.seek(data_end, os.SEEK_SET)
641 dfh.seek(data_end, os.SEEK_SET)
642 except FileNotFoundError:
642 except FileNotFoundError:
643 dfh = self.opener(self.data_file, mode=b"w+")
643 dfh = self.opener(self.data_file, mode=b"w+")
644 transaction.add(self.data_file, dsize)
644 transaction.add(self.data_file, dsize)
645 if self.sidedata_file is not None:
645 if self.sidedata_file is not None:
646 assert sidedata_end is not None
646 assert sidedata_end is not None
647 # revlog-v2 does not inline, help Pytype
647 # revlog-v2 does not inline, help Pytype
648 assert dfh is not None
648 assert dfh is not None
649 try:
649 try:
650 sdfh = self.opener(self.sidedata_file, mode=b"r+")
650 sdfh = self.opener(self.sidedata_file, mode=b"r+")
651 dfh.seek(sidedata_end, os.SEEK_SET)
651 dfh.seek(sidedata_end, os.SEEK_SET)
652 except FileNotFoundError:
652 except FileNotFoundError:
653 sdfh = self.opener(self.sidedata_file, mode=b"w+")
653 sdfh = self.opener(self.sidedata_file, mode=b"w+")
654 transaction.add(self.sidedata_file, sidedata_end)
654 transaction.add(self.sidedata_file, sidedata_end)
655
655
656 # opening the index file.
656 # opening the index file.
657 isize = r * self.index.entry_size
657 isize = r * self.index.entry_size
658 ifh = self.__index_write_fp()
658 ifh = self.__index_write_fp()
659 if self.inline:
659 if self.inline:
660 transaction.add(self.index_file, dsize + isize)
660 transaction.add(self.index_file, dsize + isize)
661 else:
661 else:
662 transaction.add(self.index_file, isize)
662 transaction.add(self.index_file, isize)
663 # exposing all file handle for writing.
663 # exposing all file handle for writing.
664 self._writinghandles = (ifh, dfh, sdfh)
664 self._writinghandles = (ifh, dfh, sdfh)
665 self._segmentfile.writing_handle = ifh if self.inline else dfh
665 self._segmentfile.writing_handle = ifh if self.inline else dfh
666 self._segmentfile_sidedata.writing_handle = sdfh
666 self._segmentfile_sidedata.writing_handle = sdfh
667 yield
667 yield
668 finally:
668 finally:
669 self._writinghandles = None
669 self._writinghandles = None
670 self._segmentfile.writing_handle = None
670 self._segmentfile.writing_handle = None
671 self._segmentfile_sidedata.writing_handle = None
671 self._segmentfile_sidedata.writing_handle = None
672 if dfh is not None:
672 if dfh is not None:
673 dfh.close()
673 dfh.close()
674 if sdfh is not None:
674 if sdfh is not None:
675 sdfh.close()
675 sdfh.close()
676 # closing the index file last to avoid exposing referent to
676 # closing the index file last to avoid exposing referent to
677 # potential unflushed data content.
677 # potential unflushed data content.
678 if ifh is not None:
678 if ifh is not None:
679 ifh.close()
679 ifh.close()
680
680
681 def __index_write_fp(self, index_end=None):
681 def __index_write_fp(self, index_end=None):
682 """internal method to open the index file for writing
682 """internal method to open the index file for writing
683
683
684 You should not use this directly and use `_writing` instead
684 You should not use this directly and use `_writing` instead
685 """
685 """
686 try:
686 try:
687 f = self.opener(
687 f = self.opener(
688 self.index_file,
688 self.index_file,
689 mode=b"r+",
689 mode=b"r+",
690 checkambig=self.data_config.check_ambig,
690 checkambig=self.data_config.check_ambig,
691 )
691 )
692 if index_end is None:
692 if index_end is None:
693 f.seek(0, os.SEEK_END)
693 f.seek(0, os.SEEK_END)
694 else:
694 else:
695 f.seek(index_end, os.SEEK_SET)
695 f.seek(index_end, os.SEEK_SET)
696 return f
696 return f
697 except FileNotFoundError:
697 except FileNotFoundError:
698 return self.opener(
698 return self.opener(
699 self.index_file,
699 self.index_file,
700 mode=b"w+",
700 mode=b"w+",
701 checkambig=self.data_config.check_ambig,
701 checkambig=self.data_config.check_ambig,
702 )
702 )
703
703
704 def __index_new_fp(self):
704 def __index_new_fp(self):
705 """internal method to create a new index file for writing
705 """internal method to create a new index file for writing
706
706
707 You should not use this unless you are upgrading from inline revlog
707 You should not use this unless you are upgrading from inline revlog
708 """
708 """
709 return self.opener(
709 return self.opener(
710 self.index_file,
710 self.index_file,
711 mode=b"w",
711 mode=b"w",
712 checkambig=self.data_config.check_ambig,
712 checkambig=self.data_config.check_ambig,
713 atomictemp=True,
713 atomictemp=True,
714 )
714 )
715
715
716 def split_inline(self, tr, header, new_index_file_path=None):
716 def split_inline(self, tr, header, new_index_file_path=None):
717 """split the data of an inline revlog into an index and a data file"""
717 """split the data of an inline revlog into an index and a data file"""
718 existing_handles = False
718 existing_handles = False
719 if self._writinghandles is not None:
719 if self._writinghandles is not None:
720 existing_handles = True
720 existing_handles = True
721 fp = self._writinghandles[0]
721 fp = self._writinghandles[0]
722 fp.flush()
722 fp.flush()
723 fp.close()
723 fp.close()
724 # We can't use the cached file handle after close(). So prevent
724 # We can't use the cached file handle after close(). So prevent
725 # its usage.
725 # its usage.
726 self._writinghandles = None
726 self._writinghandles = None
727 self._segmentfile.writing_handle = None
727 self._segmentfile.writing_handle = None
728 # No need to deal with sidedata writing handle as it is only
728 # No need to deal with sidedata writing handle as it is only
729 # relevant with revlog-v2 which is never inline, not reaching
729 # relevant with revlog-v2 which is never inline, not reaching
730 # this code
730 # this code
731
731
732 new_dfh = self.opener(self.data_file, mode=b"w+")
732 new_dfh = self.opener(self.data_file, mode=b"w+")
733 new_dfh.truncate(0) # drop any potentially existing data
733 new_dfh.truncate(0) # drop any potentially existing data
734 try:
734 try:
735 with self.reading():
735 with self.reading():
736 for r in range(len(self.index)):
736 for r in range(len(self.index)):
737 new_dfh.write(self.get_segment_for_revs(r, r)[1])
737 new_dfh.write(self.get_segment_for_revs(r, r)[1])
738 new_dfh.flush()
738 new_dfh.flush()
739
739
740 if new_index_file_path is not None:
740 if new_index_file_path is not None:
741 self.index_file = new_index_file_path
741 self.index_file = new_index_file_path
742 with self.__index_new_fp() as fp:
742 with self.__index_new_fp() as fp:
743 self.inline = False
743 self.inline = False
744 for i in range(len(self.index)):
744 for i in range(len(self.index)):
745 e = self.index.entry_binary(i)
745 e = self.index.entry_binary(i)
746 if i == 0:
746 if i == 0:
747 packed_header = self.index.pack_header(header)
747 packed_header = self.index.pack_header(header)
748 e = packed_header + e
748 e = packed_header + e
749 fp.write(e)
749 fp.write(e)
750
750
751 # If we don't use side-write, the temp file replace the real
751 # If we don't use side-write, the temp file replace the real
752 # index when we exit the context manager
752 # index when we exit the context manager
753
753
754 self._segmentfile = randomaccessfile.randomaccessfile(
754 self._segmentfile = randomaccessfile.randomaccessfile(
755 self.opener,
755 self.opener,
756 self.data_file,
756 self.data_file,
757 self.data_config.chunk_cache_size,
757 self.data_config.chunk_cache_size,
758 )
758 )
759
759
760 if existing_handles:
760 if existing_handles:
761 # switched from inline to conventional reopen the index
761 # switched from inline to conventional reopen the index
762 ifh = self.__index_write_fp()
762 ifh = self.__index_write_fp()
763 self._writinghandles = (ifh, new_dfh, None)
763 self._writinghandles = (ifh, new_dfh, None)
764 self._segmentfile.writing_handle = new_dfh
764 self._segmentfile.writing_handle = new_dfh
765 new_dfh = None
765 new_dfh = None
766 # No need to deal with sidedata writing handle as it is only
766 # No need to deal with sidedata writing handle as it is only
767 # relevant with revlog-v2 which is never inline, not reaching
767 # relevant with revlog-v2 which is never inline, not reaching
768 # this code
768 # this code
769 finally:
769 finally:
770 if new_dfh is not None:
770 if new_dfh is not None:
771 new_dfh.close()
771 new_dfh.close()
772 return self.index_file
772 return self.index_file
773
773
774 def get_segment_for_revs(self, startrev, endrev):
774 def get_segment_for_revs(self, startrev, endrev):
775 """Obtain a segment of raw data corresponding to a range of revisions.
775 """Obtain a segment of raw data corresponding to a range of revisions.
776
776
777 Accepts the start and end revisions and an optional already-open
777 Accepts the start and end revisions and an optional already-open
778 file handle to be used for reading. If the file handle is read, its
778 file handle to be used for reading. If the file handle is read, its
779 seek position will not be preserved.
779 seek position will not be preserved.
780
780
781 Requests for data may be satisfied by a cache.
781 Requests for data may be satisfied by a cache.
782
782
783 Returns a 2-tuple of (offset, data) for the requested range of
783 Returns a 2-tuple of (offset, data) for the requested range of
784 revisions. Offset is the integer offset from the beginning of the
784 revisions. Offset is the integer offset from the beginning of the
785 revlog and data is a str or buffer of the raw byte data.
785 revlog and data is a str or buffer of the raw byte data.
786
786
787 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
787 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
788 to determine where each revision's data begins and ends.
788 to determine where each revision's data begins and ends.
789
789
790 API: we should consider making this a private part of the InnerRevlog
790 API: we should consider making this a private part of the InnerRevlog
791 at some point.
791 at some point.
792 """
792 """
793 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
793 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
794 # (functions are expensive).
794 # (functions are expensive).
795 index = self.index
795 index = self.index
796 istart = index[startrev]
796 istart = index[startrev]
797 start = int(istart[0] >> 16)
797 start = int(istart[0] >> 16)
798 if startrev == endrev:
798 if startrev == endrev:
799 end = start + istart[1]
799 end = start + istart[1]
800 else:
800 else:
801 iend = index[endrev]
801 iend = index[endrev]
802 end = int(iend[0] >> 16) + iend[1]
802 end = int(iend[0] >> 16) + iend[1]
803
803
804 if self.inline:
804 if self.inline:
805 start += (startrev + 1) * self.index.entry_size
805 start += (startrev + 1) * self.index.entry_size
806 end += (endrev + 1) * self.index.entry_size
806 end += (endrev + 1) * self.index.entry_size
807 length = end - start
807 length = end - start
808
808
809 return start, self._segmentfile.read_chunk(start, length)
809 return start, self._segmentfile.read_chunk(start, length)
810
810
811 def _chunk(self, rev):
811 def _chunk(self, rev):
812 """Obtain a single decompressed chunk for a revision.
812 """Obtain a single decompressed chunk for a revision.
813
813
814 Accepts an integer revision and an optional already-open file handle
814 Accepts an integer revision and an optional already-open file handle
815 to be used for reading. If used, the seek position of the file will not
815 to be used for reading. If used, the seek position of the file will not
816 be preserved.
816 be preserved.
817
817
818 Returns a str holding uncompressed data for the requested revision.
818 Returns a str holding uncompressed data for the requested revision.
819 """
819 """
820 compression_mode = self.index[rev][10]
820 compression_mode = self.index[rev][10]
821 data = self.get_segment_for_revs(rev, rev)[1]
821 data = self.get_segment_for_revs(rev, rev)[1]
822 if compression_mode == COMP_MODE_PLAIN:
822 if compression_mode == COMP_MODE_PLAIN:
823 return data
823 return data
824 elif compression_mode == COMP_MODE_DEFAULT:
824 elif compression_mode == COMP_MODE_DEFAULT:
825 return self._decompressor(data)
825 return self._decompressor(data)
826 elif compression_mode == COMP_MODE_INLINE:
826 elif compression_mode == COMP_MODE_INLINE:
827 return self.decompress(data)
827 return self.decompress(data)
828 else:
828 else:
829 msg = b'unknown compression mode %d'
829 msg = b'unknown compression mode %d'
830 msg %= compression_mode
830 msg %= compression_mode
831 raise error.RevlogError(msg)
831 raise error.RevlogError(msg)
832
832
833 def _chunks(self, revs, targetsize=None):
833 def _chunks(self, revs, targetsize=None):
834 """Obtain decompressed chunks for the specified revisions.
834 """Obtain decompressed chunks for the specified revisions.
835
835
836 Accepts an iterable of numeric revisions that are assumed to be in
836 Accepts an iterable of numeric revisions that are assumed to be in
837 ascending order. Also accepts an optional already-open file handle
837 ascending order. Also accepts an optional already-open file handle
838 to be used for reading. If used, the seek position of the file will
838 to be used for reading. If used, the seek position of the file will
839 not be preserved.
839 not be preserved.
840
840
841 This function is similar to calling ``self._chunk()`` multiple times,
841 This function is similar to calling ``self._chunk()`` multiple times,
842 but is faster.
842 but is faster.
843
843
844 Returns a list with decompressed data for each requested revision.
844 Returns a list with decompressed data for each requested revision.
845 """
845 """
846 if not revs:
846 if not revs:
847 return []
847 return []
848 start = self.start
848 start = self.start
849 length = self.length
849 length = self.length
850 inline = self.inline
850 inline = self.inline
851 iosize = self.index.entry_size
851 iosize = self.index.entry_size
852 buffer = util.buffer
852 buffer = util.buffer
853
853
854 l = []
854 l = []
855 ladd = l.append
855 ladd = l.append
856
856
857 if not self.data_config.with_sparse_read:
857 if not self.data_config.with_sparse_read:
858 slicedchunks = (revs,)
858 slicedchunks = (revs,)
859 else:
859 else:
860 slicedchunks = deltautil.slicechunk(
860 slicedchunks = deltautil.slicechunk(
861 self,
861 self,
862 revs,
862 revs,
863 targetsize=targetsize,
863 targetsize=targetsize,
864 )
864 )
865
865
866 for revschunk in slicedchunks:
866 for revschunk in slicedchunks:
867 firstrev = revschunk[0]
867 firstrev = revschunk[0]
868 # Skip trailing revisions with empty diff
868 # Skip trailing revisions with empty diff
869 for lastrev in revschunk[::-1]:
869 for lastrev in revschunk[::-1]:
870 if length(lastrev) != 0:
870 if length(lastrev) != 0:
871 break
871 break
872
872
873 try:
873 try:
874 offset, data = self.get_segment_for_revs(firstrev, lastrev)
874 offset, data = self.get_segment_for_revs(firstrev, lastrev)
875 except OverflowError:
875 except OverflowError:
876 # issue4215 - we can't cache a run of chunks greater than
876 # issue4215 - we can't cache a run of chunks greater than
877 # 2G on Windows
877 # 2G on Windows
878 return [self._chunk(rev) for rev in revschunk]
878 return [self._chunk(rev) for rev in revschunk]
879
879
880 decomp = self.decompress
880 decomp = self.decompress
881 # self._decompressor might be None, but will not be used in that case
881 # self._decompressor might be None, but will not be used in that case
882 def_decomp = self._decompressor
882 def_decomp = self._decompressor
883 for rev in revschunk:
883 for rev in revschunk:
884 chunkstart = start(rev)
884 chunkstart = start(rev)
885 if inline:
885 if inline:
886 chunkstart += (rev + 1) * iosize
886 chunkstart += (rev + 1) * iosize
887 chunklength = length(rev)
887 chunklength = length(rev)
888 comp_mode = self.index[rev][10]
888 comp_mode = self.index[rev][10]
889 c = buffer(data, chunkstart - offset, chunklength)
889 c = buffer(data, chunkstart - offset, chunklength)
890 if comp_mode == COMP_MODE_PLAIN:
890 if comp_mode == COMP_MODE_PLAIN:
891 ladd(c)
891 ladd(c)
892 elif comp_mode == COMP_MODE_INLINE:
892 elif comp_mode == COMP_MODE_INLINE:
893 ladd(decomp(c))
893 ladd(decomp(c))
894 elif comp_mode == COMP_MODE_DEFAULT:
894 elif comp_mode == COMP_MODE_DEFAULT:
895 ladd(def_decomp(c))
895 ladd(def_decomp(c))
896 else:
896 else:
897 msg = b'unknown compression mode %d'
897 msg = b'unknown compression mode %d'
898 msg %= comp_mode
898 msg %= comp_mode
899 raise error.RevlogError(msg)
899 raise error.RevlogError(msg)
900
900
901 return l
901 return l
902
902
903 def raw_text(self, node, rev):
904 """return the possibly unvalidated rawtext for a revision
905
906 returns (rev, rawtext, validated)
907 """
908
909 # revision in the cache (could be useful to apply delta)
910 cachedrev = None
911 # An intermediate text to apply deltas to
912 basetext = None
913
914 # Check if we have the entry in cache
915 # The cache entry looks like (node, rev, rawtext)
916 if self._revisioncache:
917 cachedrev = self._revisioncache[1]
918
919 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
920 if stopped:
921 basetext = self._revisioncache[2]
922
923 # drop cache to save memory, the caller is expected to
924 # update self._inner._revisioncache after validating the text
925 self._revisioncache = None
926
927 targetsize = None
928 rawsize = self.index[rev][2]
929 if 0 <= rawsize:
930 targetsize = 4 * rawsize
931
932 bins = self._chunks(chain, targetsize=targetsize)
933 if basetext is None:
934 basetext = bytes(bins[0])
935 bins = bins[1:]
936
937 rawtext = mdiff.patches(basetext, bins)
938 del basetext # let us have a chance to free memory early
939 return (rev, rawtext, False)
940
903
941
904 class revlog:
942 class revlog:
905 """
943 """
906 the underlying revision storage object
944 the underlying revision storage object
907
945
908 A revlog consists of two parts, an index and the revision data.
946 A revlog consists of two parts, an index and the revision data.
909
947
910 The index is a file with a fixed record size containing
948 The index is a file with a fixed record size containing
911 information on each revision, including its nodeid (hash), the
949 information on each revision, including its nodeid (hash), the
912 nodeids of its parents, the position and offset of its data within
950 nodeids of its parents, the position and offset of its data within
913 the data file, and the revision it's based on. Finally, each entry
951 the data file, and the revision it's based on. Finally, each entry
914 contains a linkrev entry that can serve as a pointer to external
952 contains a linkrev entry that can serve as a pointer to external
915 data.
953 data.
916
954
917 The revision data itself is a linear collection of data chunks.
955 The revision data itself is a linear collection of data chunks.
918 Each chunk represents a revision and is usually represented as a
956 Each chunk represents a revision and is usually represented as a
919 delta against the previous chunk. To bound lookup time, runs of
957 delta against the previous chunk. To bound lookup time, runs of
920 deltas are limited to about 2 times the length of the original
958 deltas are limited to about 2 times the length of the original
921 version data. This makes retrieval of a version proportional to
959 version data. This makes retrieval of a version proportional to
922 its size, or O(1) relative to the number of revisions.
960 its size, or O(1) relative to the number of revisions.
923
961
924 Both pieces of the revlog are written to in an append-only
962 Both pieces of the revlog are written to in an append-only
925 fashion, which means we never need to rewrite a file to insert or
963 fashion, which means we never need to rewrite a file to insert or
926 remove data, and can use some simple techniques to avoid the need
964 remove data, and can use some simple techniques to avoid the need
927 for locking while reading.
965 for locking while reading.
928
966
929 If checkambig, indexfile is opened with checkambig=True at
967 If checkambig, indexfile is opened with checkambig=True at
930 writing, to avoid file stat ambiguity.
968 writing, to avoid file stat ambiguity.
931
969
932 If mmaplargeindex is True, and an mmapindexthreshold is set, the
970 If mmaplargeindex is True, and an mmapindexthreshold is set, the
933 index will be mmapped rather than read if it is larger than the
971 index will be mmapped rather than read if it is larger than the
934 configured threshold.
972 configured threshold.
935
973
936 If censorable is True, the revlog can have censored revisions.
974 If censorable is True, the revlog can have censored revisions.
937
975
938 If `upperboundcomp` is not None, this is the expected maximal gain from
976 If `upperboundcomp` is not None, this is the expected maximal gain from
939 compression for the data content.
977 compression for the data content.
940
978
941 `concurrencychecker` is an optional function that receives 3 arguments: a
979 `concurrencychecker` is an optional function that receives 3 arguments: a
942 file handle, a filename, and an expected position. It should check whether
980 file handle, a filename, and an expected position. It should check whether
943 the current position in the file handle is valid, and log/warn/fail (by
981 the current position in the file handle is valid, and log/warn/fail (by
944 raising).
982 raising).
945
983
946 See mercurial/revlogutils/contants.py for details about the content of an
984 See mercurial/revlogutils/contants.py for details about the content of an
947 index entry.
985 index entry.
948 """
986 """
949
987
950 _flagserrorclass = error.RevlogError
988 _flagserrorclass = error.RevlogError
951
989
952 @staticmethod
990 @staticmethod
953 def is_inline_index(header_bytes):
991 def is_inline_index(header_bytes):
954 """Determine if a revlog is inline from the initial bytes of the index"""
992 """Determine if a revlog is inline from the initial bytes of the index"""
955 header = INDEX_HEADER.unpack(header_bytes)[0]
993 header = INDEX_HEADER.unpack(header_bytes)[0]
956
994
957 _format_flags = header & ~0xFFFF
995 _format_flags = header & ~0xFFFF
958 _format_version = header & 0xFFFF
996 _format_version = header & 0xFFFF
959
997
960 features = FEATURES_BY_VERSION[_format_version]
998 features = FEATURES_BY_VERSION[_format_version]
961 return features[b'inline'](_format_flags)
999 return features[b'inline'](_format_flags)
962
1000
963 def __init__(
1001 def __init__(
964 self,
1002 self,
965 opener,
1003 opener,
966 target,
1004 target,
967 radix,
1005 radix,
968 postfix=None, # only exist for `tmpcensored` now
1006 postfix=None, # only exist for `tmpcensored` now
969 checkambig=False,
1007 checkambig=False,
970 mmaplargeindex=False,
1008 mmaplargeindex=False,
971 censorable=False,
1009 censorable=False,
972 upperboundcomp=None,
1010 upperboundcomp=None,
973 persistentnodemap=False,
1011 persistentnodemap=False,
974 concurrencychecker=None,
1012 concurrencychecker=None,
975 trypending=False,
1013 trypending=False,
976 try_split=False,
1014 try_split=False,
977 canonical_parent_order=True,
1015 canonical_parent_order=True,
978 ):
1016 ):
979 """
1017 """
980 create a revlog object
1018 create a revlog object
981
1019
982 opener is a function that abstracts the file opening operation
1020 opener is a function that abstracts the file opening operation
983 and can be used to implement COW semantics or the like.
1021 and can be used to implement COW semantics or the like.
984
1022
985 `target`: a (KIND, ID) tuple that identify the content stored in
1023 `target`: a (KIND, ID) tuple that identify the content stored in
986 this revlog. It help the rest of the code to understand what the revlog
1024 this revlog. It help the rest of the code to understand what the revlog
987 is about without having to resort to heuristic and index filename
1025 is about without having to resort to heuristic and index filename
988 analysis. Note: that this must be reliably be set by normal code, but
1026 analysis. Note: that this must be reliably be set by normal code, but
989 that test, debug, or performance measurement code might not set this to
1027 that test, debug, or performance measurement code might not set this to
990 accurate value.
1028 accurate value.
991 """
1029 """
992
1030
993 self.radix = radix
1031 self.radix = radix
994
1032
995 self._docket_file = None
1033 self._docket_file = None
996 self._indexfile = None
1034 self._indexfile = None
997 self._datafile = None
1035 self._datafile = None
998 self._sidedatafile = None
1036 self._sidedatafile = None
999 self._nodemap_file = None
1037 self._nodemap_file = None
1000 self.postfix = postfix
1038 self.postfix = postfix
1001 self._trypending = trypending
1039 self._trypending = trypending
1002 self._try_split = try_split
1040 self._try_split = try_split
1003 self.opener = opener
1041 self.opener = opener
1004 if persistentnodemap:
1042 if persistentnodemap:
1005 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1043 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1006
1044
1007 assert target[0] in ALL_KINDS
1045 assert target[0] in ALL_KINDS
1008 assert len(target) == 2
1046 assert len(target) == 2
1009 self.target = target
1047 self.target = target
1010 if b'feature-config' in self.opener.options:
1048 if b'feature-config' in self.opener.options:
1011 self.feature_config = self.opener.options[b'feature-config'].copy()
1049 self.feature_config = self.opener.options[b'feature-config'].copy()
1012 else:
1050 else:
1013 self.feature_config = FeatureConfig()
1051 self.feature_config = FeatureConfig()
1014 self.feature_config.censorable = censorable
1052 self.feature_config.censorable = censorable
1015 self.feature_config.canonical_parent_order = canonical_parent_order
1053 self.feature_config.canonical_parent_order = canonical_parent_order
1016 if b'data-config' in self.opener.options:
1054 if b'data-config' in self.opener.options:
1017 self.data_config = self.opener.options[b'data-config'].copy()
1055 self.data_config = self.opener.options[b'data-config'].copy()
1018 else:
1056 else:
1019 self.data_config = DataConfig()
1057 self.data_config = DataConfig()
1020 self.data_config.check_ambig = checkambig
1058 self.data_config.check_ambig = checkambig
1021 self.data_config.mmap_large_index = mmaplargeindex
1059 self.data_config.mmap_large_index = mmaplargeindex
1022 if b'delta-config' in self.opener.options:
1060 if b'delta-config' in self.opener.options:
1023 self.delta_config = self.opener.options[b'delta-config'].copy()
1061 self.delta_config = self.opener.options[b'delta-config'].copy()
1024 else:
1062 else:
1025 self.delta_config = DeltaConfig()
1063 self.delta_config = DeltaConfig()
1026 self.delta_config.upper_bound_comp = upperboundcomp
1064 self.delta_config.upper_bound_comp = upperboundcomp
1027
1065
1028 # Maps rev to chain base rev.
1066 # Maps rev to chain base rev.
1029 self._chainbasecache = util.lrucachedict(100)
1067 self._chainbasecache = util.lrucachedict(100)
1030
1068
1031 self.index = None
1069 self.index = None
1032 self._docket = None
1070 self._docket = None
1033 self._nodemap_docket = None
1071 self._nodemap_docket = None
1034 # Mapping of partial identifiers to full nodes.
1072 # Mapping of partial identifiers to full nodes.
1035 self._pcache = {}
1073 self._pcache = {}
1036
1074
1037 # other optionnals features
1075 # other optionnals features
1038
1076
1039 # Make copy of flag processors so each revlog instance can support
1077 # Make copy of flag processors so each revlog instance can support
1040 # custom flags.
1078 # custom flags.
1041 self._flagprocessors = dict(flagutil.flagprocessors)
1079 self._flagprocessors = dict(flagutil.flagprocessors)
1042 # prevent nesting of addgroup
1080 # prevent nesting of addgroup
1043 self._adding_group = None
1081 self._adding_group = None
1044
1082
1045 chunk_cache = self._loadindex()
1083 chunk_cache = self._loadindex()
1046 self._load_inner(chunk_cache)
1084 self._load_inner(chunk_cache)
1047 self._concurrencychecker = concurrencychecker
1085 self._concurrencychecker = concurrencychecker
1048
1086
1049 @property
1087 @property
1050 def _generaldelta(self):
1088 def _generaldelta(self):
1051 """temporary compatibility proxy"""
1089 """temporary compatibility proxy"""
1052 util.nouideprecwarn(
1090 util.nouideprecwarn(
1053 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1091 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1054 )
1092 )
1055 return self.delta_config.general_delta
1093 return self.delta_config.general_delta
1056
1094
1057 @property
1095 @property
1058 def _checkambig(self):
1096 def _checkambig(self):
1059 """temporary compatibility proxy"""
1097 """temporary compatibility proxy"""
1060 util.nouideprecwarn(
1098 util.nouideprecwarn(
1061 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1099 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1062 )
1100 )
1063 return self.data_config.check_ambig
1101 return self.data_config.check_ambig
1064
1102
1065 @property
1103 @property
1066 def _mmaplargeindex(self):
1104 def _mmaplargeindex(self):
1067 """temporary compatibility proxy"""
1105 """temporary compatibility proxy"""
1068 util.nouideprecwarn(
1106 util.nouideprecwarn(
1069 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1107 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1070 )
1108 )
1071 return self.data_config.mmap_large_index
1109 return self.data_config.mmap_large_index
1072
1110
1073 @property
1111 @property
1074 def _censorable(self):
1112 def _censorable(self):
1075 """temporary compatibility proxy"""
1113 """temporary compatibility proxy"""
1076 util.nouideprecwarn(
1114 util.nouideprecwarn(
1077 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1115 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1078 )
1116 )
1079 return self.feature_config.censorable
1117 return self.feature_config.censorable
1080
1118
1081 @property
1119 @property
1082 def _chunkcachesize(self):
1120 def _chunkcachesize(self):
1083 """temporary compatibility proxy"""
1121 """temporary compatibility proxy"""
1084 util.nouideprecwarn(
1122 util.nouideprecwarn(
1085 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1123 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1086 )
1124 )
1087 return self.data_config.chunk_cache_size
1125 return self.data_config.chunk_cache_size
1088
1126
1089 @property
1127 @property
1090 def _maxchainlen(self):
1128 def _maxchainlen(self):
1091 """temporary compatibility proxy"""
1129 """temporary compatibility proxy"""
1092 util.nouideprecwarn(
1130 util.nouideprecwarn(
1093 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1131 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1094 )
1132 )
1095 return self.delta_config.max_chain_len
1133 return self.delta_config.max_chain_len
1096
1134
1097 @property
1135 @property
1098 def _deltabothparents(self):
1136 def _deltabothparents(self):
1099 """temporary compatibility proxy"""
1137 """temporary compatibility proxy"""
1100 util.nouideprecwarn(
1138 util.nouideprecwarn(
1101 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1139 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1102 )
1140 )
1103 return self.delta_config.delta_both_parents
1141 return self.delta_config.delta_both_parents
1104
1142
1105 @property
1143 @property
1106 def _candidate_group_chunk_size(self):
1144 def _candidate_group_chunk_size(self):
1107 """temporary compatibility proxy"""
1145 """temporary compatibility proxy"""
1108 util.nouideprecwarn(
1146 util.nouideprecwarn(
1109 b"use revlog.delta_config.candidate_group_chunk_size",
1147 b"use revlog.delta_config.candidate_group_chunk_size",
1110 b"6.6",
1148 b"6.6",
1111 stacklevel=2,
1149 stacklevel=2,
1112 )
1150 )
1113 return self.delta_config.candidate_group_chunk_size
1151 return self.delta_config.candidate_group_chunk_size
1114
1152
1115 @property
1153 @property
1116 def _debug_delta(self):
1154 def _debug_delta(self):
1117 """temporary compatibility proxy"""
1155 """temporary compatibility proxy"""
1118 util.nouideprecwarn(
1156 util.nouideprecwarn(
1119 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1157 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1120 )
1158 )
1121 return self.delta_config.debug_delta
1159 return self.delta_config.debug_delta
1122
1160
1123 @property
1161 @property
1124 def _compengine(self):
1162 def _compengine(self):
1125 """temporary compatibility proxy"""
1163 """temporary compatibility proxy"""
1126 util.nouideprecwarn(
1164 util.nouideprecwarn(
1127 b"use revlog.feature_config.compression_engine",
1165 b"use revlog.feature_config.compression_engine",
1128 b"6.6",
1166 b"6.6",
1129 stacklevel=2,
1167 stacklevel=2,
1130 )
1168 )
1131 return self.feature_config.compression_engine
1169 return self.feature_config.compression_engine
1132
1170
1133 @property
1171 @property
1134 def upperboundcomp(self):
1172 def upperboundcomp(self):
1135 """temporary compatibility proxy"""
1173 """temporary compatibility proxy"""
1136 util.nouideprecwarn(
1174 util.nouideprecwarn(
1137 b"use revlog.delta_config.upper_bound_comp",
1175 b"use revlog.delta_config.upper_bound_comp",
1138 b"6.6",
1176 b"6.6",
1139 stacklevel=2,
1177 stacklevel=2,
1140 )
1178 )
1141 return self.delta_config.upper_bound_comp
1179 return self.delta_config.upper_bound_comp
1142
1180
1143 @property
1181 @property
1144 def _compengineopts(self):
1182 def _compengineopts(self):
1145 """temporary compatibility proxy"""
1183 """temporary compatibility proxy"""
1146 util.nouideprecwarn(
1184 util.nouideprecwarn(
1147 b"use revlog.feature_config.compression_engine_options",
1185 b"use revlog.feature_config.compression_engine_options",
1148 b"6.6",
1186 b"6.6",
1149 stacklevel=2,
1187 stacklevel=2,
1150 )
1188 )
1151 return self.feature_config.compression_engine_options
1189 return self.feature_config.compression_engine_options
1152
1190
1153 @property
1191 @property
1154 def _maxdeltachainspan(self):
1192 def _maxdeltachainspan(self):
1155 """temporary compatibility proxy"""
1193 """temporary compatibility proxy"""
1156 util.nouideprecwarn(
1194 util.nouideprecwarn(
1157 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1195 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1158 )
1196 )
1159 return self.delta_config.max_deltachain_span
1197 return self.delta_config.max_deltachain_span
1160
1198
1161 @property
1199 @property
1162 def _withsparseread(self):
1200 def _withsparseread(self):
1163 """temporary compatibility proxy"""
1201 """temporary compatibility proxy"""
1164 util.nouideprecwarn(
1202 util.nouideprecwarn(
1165 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1203 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1166 )
1204 )
1167 return self.data_config.with_sparse_read
1205 return self.data_config.with_sparse_read
1168
1206
1169 @property
1207 @property
1170 def _sparserevlog(self):
1208 def _sparserevlog(self):
1171 """temporary compatibility proxy"""
1209 """temporary compatibility proxy"""
1172 util.nouideprecwarn(
1210 util.nouideprecwarn(
1173 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1211 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1174 )
1212 )
1175 return self.delta_config.sparse_revlog
1213 return self.delta_config.sparse_revlog
1176
1214
1177 @property
1215 @property
1178 def hassidedata(self):
1216 def hassidedata(self):
1179 """temporary compatibility proxy"""
1217 """temporary compatibility proxy"""
1180 util.nouideprecwarn(
1218 util.nouideprecwarn(
1181 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1219 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1182 )
1220 )
1183 return self.feature_config.has_side_data
1221 return self.feature_config.has_side_data
1184
1222
1185 @property
1223 @property
1186 def _srdensitythreshold(self):
1224 def _srdensitythreshold(self):
1187 """temporary compatibility proxy"""
1225 """temporary compatibility proxy"""
1188 util.nouideprecwarn(
1226 util.nouideprecwarn(
1189 b"use revlog.data_config.sr_density_threshold",
1227 b"use revlog.data_config.sr_density_threshold",
1190 b"6.6",
1228 b"6.6",
1191 stacklevel=2,
1229 stacklevel=2,
1192 )
1230 )
1193 return self.data_config.sr_density_threshold
1231 return self.data_config.sr_density_threshold
1194
1232
1195 @property
1233 @property
1196 def _srmingapsize(self):
1234 def _srmingapsize(self):
1197 """temporary compatibility proxy"""
1235 """temporary compatibility proxy"""
1198 util.nouideprecwarn(
1236 util.nouideprecwarn(
1199 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1237 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1200 )
1238 )
1201 return self.data_config.sr_min_gap_size
1239 return self.data_config.sr_min_gap_size
1202
1240
1203 @property
1241 @property
1204 def _compute_rank(self):
1242 def _compute_rank(self):
1205 """temporary compatibility proxy"""
1243 """temporary compatibility proxy"""
1206 util.nouideprecwarn(
1244 util.nouideprecwarn(
1207 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1245 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1208 )
1246 )
1209 return self.feature_config.compute_rank
1247 return self.feature_config.compute_rank
1210
1248
1211 @property
1249 @property
1212 def canonical_parent_order(self):
1250 def canonical_parent_order(self):
1213 """temporary compatibility proxy"""
1251 """temporary compatibility proxy"""
1214 util.nouideprecwarn(
1252 util.nouideprecwarn(
1215 b"use revlog.feature_config.canonical_parent_order",
1253 b"use revlog.feature_config.canonical_parent_order",
1216 b"6.6",
1254 b"6.6",
1217 stacklevel=2,
1255 stacklevel=2,
1218 )
1256 )
1219 return self.feature_config.canonical_parent_order
1257 return self.feature_config.canonical_parent_order
1220
1258
1221 @property
1259 @property
1222 def _lazydelta(self):
1260 def _lazydelta(self):
1223 """temporary compatibility proxy"""
1261 """temporary compatibility proxy"""
1224 util.nouideprecwarn(
1262 util.nouideprecwarn(
1225 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1263 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1226 )
1264 )
1227 return self.delta_config.lazy_delta
1265 return self.delta_config.lazy_delta
1228
1266
1229 @property
1267 @property
1230 def _lazydeltabase(self):
1268 def _lazydeltabase(self):
1231 """temporary compatibility proxy"""
1269 """temporary compatibility proxy"""
1232 util.nouideprecwarn(
1270 util.nouideprecwarn(
1233 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1271 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1234 )
1272 )
1235 return self.delta_config.lazy_delta_base
1273 return self.delta_config.lazy_delta_base
1236
1274
1237 def _init_opts(self):
1275 def _init_opts(self):
1238 """process options (from above/config) to setup associated default revlog mode
1276 """process options (from above/config) to setup associated default revlog mode
1239
1277
1240 These values might be affected when actually reading on disk information.
1278 These values might be affected when actually reading on disk information.
1241
1279
1242 The relevant values are returned for use in _loadindex().
1280 The relevant values are returned for use in _loadindex().
1243
1281
1244 * newversionflags:
1282 * newversionflags:
1245 version header to use if we need to create a new revlog
1283 version header to use if we need to create a new revlog
1246
1284
1247 * mmapindexthreshold:
1285 * mmapindexthreshold:
1248 minimal index size for start to use mmap
1286 minimal index size for start to use mmap
1249
1287
1250 * force_nodemap:
1288 * force_nodemap:
1251 force the usage of a "development" version of the nodemap code
1289 force the usage of a "development" version of the nodemap code
1252 """
1290 """
1253 opts = self.opener.options
1291 opts = self.opener.options
1254
1292
1255 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1293 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1256 new_header = CHANGELOGV2
1294 new_header = CHANGELOGV2
1257 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1295 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1258 self.feature_config.compute_rank = compute_rank
1296 self.feature_config.compute_rank = compute_rank
1259 elif b'revlogv2' in opts:
1297 elif b'revlogv2' in opts:
1260 new_header = REVLOGV2
1298 new_header = REVLOGV2
1261 elif b'revlogv1' in opts:
1299 elif b'revlogv1' in opts:
1262 new_header = REVLOGV1 | FLAG_INLINE_DATA
1300 new_header = REVLOGV1 | FLAG_INLINE_DATA
1263 if b'generaldelta' in opts:
1301 if b'generaldelta' in opts:
1264 new_header |= FLAG_GENERALDELTA
1302 new_header |= FLAG_GENERALDELTA
1265 elif b'revlogv0' in self.opener.options:
1303 elif b'revlogv0' in self.opener.options:
1266 new_header = REVLOGV0
1304 new_header = REVLOGV0
1267 else:
1305 else:
1268 new_header = REVLOG_DEFAULT_VERSION
1306 new_header = REVLOG_DEFAULT_VERSION
1269
1307
1270 mmapindexthreshold = None
1308 mmapindexthreshold = None
1271 if self.data_config.mmap_large_index:
1309 if self.data_config.mmap_large_index:
1272 mmapindexthreshold = self.data_config.mmap_index_threshold
1310 mmapindexthreshold = self.data_config.mmap_index_threshold
1273 if self.feature_config.enable_ellipsis:
1311 if self.feature_config.enable_ellipsis:
1274 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1312 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1275
1313
1276 # revlog v0 doesn't have flag processors
1314 # revlog v0 doesn't have flag processors
1277 for flag, processor in opts.get(b'flagprocessors', {}).items():
1315 for flag, processor in opts.get(b'flagprocessors', {}).items():
1278 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1316 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1279
1317
1280 chunk_cache_size = self.data_config.chunk_cache_size
1318 chunk_cache_size = self.data_config.chunk_cache_size
1281 if chunk_cache_size <= 0:
1319 if chunk_cache_size <= 0:
1282 raise error.RevlogError(
1320 raise error.RevlogError(
1283 _(b'revlog chunk cache size %r is not greater than 0')
1321 _(b'revlog chunk cache size %r is not greater than 0')
1284 % chunk_cache_size
1322 % chunk_cache_size
1285 )
1323 )
1286 elif chunk_cache_size & (chunk_cache_size - 1):
1324 elif chunk_cache_size & (chunk_cache_size - 1):
1287 raise error.RevlogError(
1325 raise error.RevlogError(
1288 _(b'revlog chunk cache size %r is not a power of 2')
1326 _(b'revlog chunk cache size %r is not a power of 2')
1289 % chunk_cache_size
1327 % chunk_cache_size
1290 )
1328 )
1291 force_nodemap = opts.get(b'devel-force-nodemap', False)
1329 force_nodemap = opts.get(b'devel-force-nodemap', False)
1292 return new_header, mmapindexthreshold, force_nodemap
1330 return new_header, mmapindexthreshold, force_nodemap
1293
1331
1294 def _get_data(self, filepath, mmap_threshold, size=None):
1332 def _get_data(self, filepath, mmap_threshold, size=None):
1295 """return a file content with or without mmap
1333 """return a file content with or without mmap
1296
1334
1297 If the file is missing return the empty string"""
1335 If the file is missing return the empty string"""
1298 try:
1336 try:
1299 with self.opener(filepath) as fp:
1337 with self.opener(filepath) as fp:
1300 if mmap_threshold is not None:
1338 if mmap_threshold is not None:
1301 file_size = self.opener.fstat(fp).st_size
1339 file_size = self.opener.fstat(fp).st_size
1302 if file_size >= mmap_threshold:
1340 if file_size >= mmap_threshold:
1303 if size is not None:
1341 if size is not None:
1304 # avoid potentiel mmap crash
1342 # avoid potentiel mmap crash
1305 size = min(file_size, size)
1343 size = min(file_size, size)
1306 # TODO: should .close() to release resources without
1344 # TODO: should .close() to release resources without
1307 # relying on Python GC
1345 # relying on Python GC
1308 if size is None:
1346 if size is None:
1309 return util.buffer(util.mmapread(fp))
1347 return util.buffer(util.mmapread(fp))
1310 else:
1348 else:
1311 return util.buffer(util.mmapread(fp, size))
1349 return util.buffer(util.mmapread(fp, size))
1312 if size is None:
1350 if size is None:
1313 return fp.read()
1351 return fp.read()
1314 else:
1352 else:
1315 return fp.read(size)
1353 return fp.read(size)
1316 except FileNotFoundError:
1354 except FileNotFoundError:
1317 return b''
1355 return b''
1318
1356
1319 def get_streams(self, max_linkrev, force_inline=False):
1357 def get_streams(self, max_linkrev, force_inline=False):
1320 """return a list of streams that represent this revlog
1358 """return a list of streams that represent this revlog
1321
1359
1322 This is used by stream-clone to do bytes to bytes copies of a repository.
1360 This is used by stream-clone to do bytes to bytes copies of a repository.
1323
1361
1324 This streams data for all revisions that refer to a changelog revision up
1362 This streams data for all revisions that refer to a changelog revision up
1325 to `max_linkrev`.
1363 to `max_linkrev`.
1326
1364
1327 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1365 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1328
1366
1329 It returns is a list of three-tuple:
1367 It returns is a list of three-tuple:
1330
1368
1331 [
1369 [
1332 (filename, bytes_stream, stream_size),
1370 (filename, bytes_stream, stream_size),
1333 …
1371 …
1334 ]
1372 ]
1335 """
1373 """
1336 n = len(self)
1374 n = len(self)
1337 index = self.index
1375 index = self.index
1338 while n > 0:
1376 while n > 0:
1339 linkrev = index[n - 1][4]
1377 linkrev = index[n - 1][4]
1340 if linkrev < max_linkrev:
1378 if linkrev < max_linkrev:
1341 break
1379 break
1342 # note: this loop will rarely go through multiple iterations, since
1380 # note: this loop will rarely go through multiple iterations, since
1343 # it only traverses commits created during the current streaming
1381 # it only traverses commits created during the current streaming
1344 # pull operation.
1382 # pull operation.
1345 #
1383 #
1346 # If this become a problem, using a binary search should cap the
1384 # If this become a problem, using a binary search should cap the
1347 # runtime of this.
1385 # runtime of this.
1348 n = n - 1
1386 n = n - 1
1349 if n == 0:
1387 if n == 0:
1350 # no data to send
1388 # no data to send
1351 return []
1389 return []
1352 index_size = n * index.entry_size
1390 index_size = n * index.entry_size
1353 data_size = self.end(n - 1)
1391 data_size = self.end(n - 1)
1354
1392
1355 # XXX we might have been split (or stripped) since the object
1393 # XXX we might have been split (or stripped) since the object
1356 # initialization, We need to close this race too, but having a way to
1394 # initialization, We need to close this race too, but having a way to
1357 # pre-open the file we feed to the revlog and never closing them before
1395 # pre-open the file we feed to the revlog and never closing them before
1358 # we are done streaming.
1396 # we are done streaming.
1359
1397
1360 if self._inline:
1398 if self._inline:
1361
1399
1362 def get_stream():
1400 def get_stream():
1363 with self.opener(self._indexfile, mode=b"r") as fp:
1401 with self.opener(self._indexfile, mode=b"r") as fp:
1364 yield None
1402 yield None
1365 size = index_size + data_size
1403 size = index_size + data_size
1366 if size <= 65536:
1404 if size <= 65536:
1367 yield fp.read(size)
1405 yield fp.read(size)
1368 else:
1406 else:
1369 yield from util.filechunkiter(fp, limit=size)
1407 yield from util.filechunkiter(fp, limit=size)
1370
1408
1371 inline_stream = get_stream()
1409 inline_stream = get_stream()
1372 next(inline_stream)
1410 next(inline_stream)
1373 return [
1411 return [
1374 (self._indexfile, inline_stream, index_size + data_size),
1412 (self._indexfile, inline_stream, index_size + data_size),
1375 ]
1413 ]
1376 elif force_inline:
1414 elif force_inline:
1377
1415
1378 def get_stream():
1416 def get_stream():
1379 with self.reading():
1417 with self.reading():
1380 yield None
1418 yield None
1381
1419
1382 for rev in range(n):
1420 for rev in range(n):
1383 idx = self.index.entry_binary(rev)
1421 idx = self.index.entry_binary(rev)
1384 if rev == 0 and self._docket is None:
1422 if rev == 0 and self._docket is None:
1385 # re-inject the inline flag
1423 # re-inject the inline flag
1386 header = self._format_flags
1424 header = self._format_flags
1387 header |= self._format_version
1425 header |= self._format_version
1388 header |= FLAG_INLINE_DATA
1426 header |= FLAG_INLINE_DATA
1389 header = self.index.pack_header(header)
1427 header = self.index.pack_header(header)
1390 idx = header + idx
1428 idx = header + idx
1391 yield idx
1429 yield idx
1392 yield self._inner.get_segment_for_revs(rev, rev)[1]
1430 yield self._inner.get_segment_for_revs(rev, rev)[1]
1393
1431
1394 inline_stream = get_stream()
1432 inline_stream = get_stream()
1395 next(inline_stream)
1433 next(inline_stream)
1396 return [
1434 return [
1397 (self._indexfile, inline_stream, index_size + data_size),
1435 (self._indexfile, inline_stream, index_size + data_size),
1398 ]
1436 ]
1399 else:
1437 else:
1400
1438
1401 def get_index_stream():
1439 def get_index_stream():
1402 with self.opener(self._indexfile, mode=b"r") as fp:
1440 with self.opener(self._indexfile, mode=b"r") as fp:
1403 yield None
1441 yield None
1404 if index_size <= 65536:
1442 if index_size <= 65536:
1405 yield fp.read(index_size)
1443 yield fp.read(index_size)
1406 else:
1444 else:
1407 yield from util.filechunkiter(fp, limit=index_size)
1445 yield from util.filechunkiter(fp, limit=index_size)
1408
1446
1409 def get_data_stream():
1447 def get_data_stream():
1410 with self._datafp() as fp:
1448 with self._datafp() as fp:
1411 yield None
1449 yield None
1412 if data_size <= 65536:
1450 if data_size <= 65536:
1413 yield fp.read(data_size)
1451 yield fp.read(data_size)
1414 else:
1452 else:
1415 yield from util.filechunkiter(fp, limit=data_size)
1453 yield from util.filechunkiter(fp, limit=data_size)
1416
1454
1417 index_stream = get_index_stream()
1455 index_stream = get_index_stream()
1418 next(index_stream)
1456 next(index_stream)
1419 data_stream = get_data_stream()
1457 data_stream = get_data_stream()
1420 next(data_stream)
1458 next(data_stream)
1421 return [
1459 return [
1422 (self._datafile, data_stream, data_size),
1460 (self._datafile, data_stream, data_size),
1423 (self._indexfile, index_stream, index_size),
1461 (self._indexfile, index_stream, index_size),
1424 ]
1462 ]
1425
1463
1426 def _loadindex(self, docket=None):
1464 def _loadindex(self, docket=None):
1427
1465
1428 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1466 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1429
1467
1430 if self.postfix is not None:
1468 if self.postfix is not None:
1431 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1469 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1432 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1470 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1433 entry_point = b'%s.i.a' % self.radix
1471 entry_point = b'%s.i.a' % self.radix
1434 elif self._try_split and self.opener.exists(self._split_index_file):
1472 elif self._try_split and self.opener.exists(self._split_index_file):
1435 entry_point = self._split_index_file
1473 entry_point = self._split_index_file
1436 else:
1474 else:
1437 entry_point = b'%s.i' % self.radix
1475 entry_point = b'%s.i' % self.radix
1438
1476
1439 if docket is not None:
1477 if docket is not None:
1440 self._docket = docket
1478 self._docket = docket
1441 self._docket_file = entry_point
1479 self._docket_file = entry_point
1442 else:
1480 else:
1443 self._initempty = True
1481 self._initempty = True
1444 entry_data = self._get_data(entry_point, mmapindexthreshold)
1482 entry_data = self._get_data(entry_point, mmapindexthreshold)
1445 if len(entry_data) > 0:
1483 if len(entry_data) > 0:
1446 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1484 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1447 self._initempty = False
1485 self._initempty = False
1448 else:
1486 else:
1449 header = new_header
1487 header = new_header
1450
1488
1451 self._format_flags = header & ~0xFFFF
1489 self._format_flags = header & ~0xFFFF
1452 self._format_version = header & 0xFFFF
1490 self._format_version = header & 0xFFFF
1453
1491
1454 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1492 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1455 if supported_flags is None:
1493 if supported_flags is None:
1456 msg = _(b'unknown version (%d) in revlog %s')
1494 msg = _(b'unknown version (%d) in revlog %s')
1457 msg %= (self._format_version, self.display_id)
1495 msg %= (self._format_version, self.display_id)
1458 raise error.RevlogError(msg)
1496 raise error.RevlogError(msg)
1459 elif self._format_flags & ~supported_flags:
1497 elif self._format_flags & ~supported_flags:
1460 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1498 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1461 display_flag = self._format_flags >> 16
1499 display_flag = self._format_flags >> 16
1462 msg %= (display_flag, self._format_version, self.display_id)
1500 msg %= (display_flag, self._format_version, self.display_id)
1463 raise error.RevlogError(msg)
1501 raise error.RevlogError(msg)
1464
1502
1465 features = FEATURES_BY_VERSION[self._format_version]
1503 features = FEATURES_BY_VERSION[self._format_version]
1466 self._inline = features[b'inline'](self._format_flags)
1504 self._inline = features[b'inline'](self._format_flags)
1467 self.delta_config.general_delta = features[b'generaldelta'](
1505 self.delta_config.general_delta = features[b'generaldelta'](
1468 self._format_flags
1506 self._format_flags
1469 )
1507 )
1470 self.feature_config.has_side_data = features[b'sidedata']
1508 self.feature_config.has_side_data = features[b'sidedata']
1471
1509
1472 if not features[b'docket']:
1510 if not features[b'docket']:
1473 self._indexfile = entry_point
1511 self._indexfile = entry_point
1474 index_data = entry_data
1512 index_data = entry_data
1475 else:
1513 else:
1476 self._docket_file = entry_point
1514 self._docket_file = entry_point
1477 if self._initempty:
1515 if self._initempty:
1478 self._docket = docketutil.default_docket(self, header)
1516 self._docket = docketutil.default_docket(self, header)
1479 else:
1517 else:
1480 self._docket = docketutil.parse_docket(
1518 self._docket = docketutil.parse_docket(
1481 self, entry_data, use_pending=self._trypending
1519 self, entry_data, use_pending=self._trypending
1482 )
1520 )
1483
1521
1484 if self._docket is not None:
1522 if self._docket is not None:
1485 self._indexfile = self._docket.index_filepath()
1523 self._indexfile = self._docket.index_filepath()
1486 index_data = b''
1524 index_data = b''
1487 index_size = self._docket.index_end
1525 index_size = self._docket.index_end
1488 if index_size > 0:
1526 if index_size > 0:
1489 index_data = self._get_data(
1527 index_data = self._get_data(
1490 self._indexfile, mmapindexthreshold, size=index_size
1528 self._indexfile, mmapindexthreshold, size=index_size
1491 )
1529 )
1492 if len(index_data) < index_size:
1530 if len(index_data) < index_size:
1493 msg = _(b'too few index data for %s: got %d, expected %d')
1531 msg = _(b'too few index data for %s: got %d, expected %d')
1494 msg %= (self.display_id, len(index_data), index_size)
1532 msg %= (self.display_id, len(index_data), index_size)
1495 raise error.RevlogError(msg)
1533 raise error.RevlogError(msg)
1496
1534
1497 self._inline = False
1535 self._inline = False
1498 # generaldelta implied by version 2 revlogs.
1536 # generaldelta implied by version 2 revlogs.
1499 self.delta_config.general_delta = True
1537 self.delta_config.general_delta = True
1500 # the logic for persistent nodemap will be dealt with within the
1538 # the logic for persistent nodemap will be dealt with within the
1501 # main docket, so disable it for now.
1539 # main docket, so disable it for now.
1502 self._nodemap_file = None
1540 self._nodemap_file = None
1503
1541
1504 if self._docket is not None:
1542 if self._docket is not None:
1505 self._datafile = self._docket.data_filepath()
1543 self._datafile = self._docket.data_filepath()
1506 self._sidedatafile = self._docket.sidedata_filepath()
1544 self._sidedatafile = self._docket.sidedata_filepath()
1507 elif self.postfix is None:
1545 elif self.postfix is None:
1508 self._datafile = b'%s.d' % self.radix
1546 self._datafile = b'%s.d' % self.radix
1509 else:
1547 else:
1510 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1548 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1511
1549
1512 self.nodeconstants = sha1nodeconstants
1550 self.nodeconstants = sha1nodeconstants
1513 self.nullid = self.nodeconstants.nullid
1551 self.nullid = self.nodeconstants.nullid
1514
1552
1515 # sparse-revlog can't be on without general-delta (issue6056)
1553 # sparse-revlog can't be on without general-delta (issue6056)
1516 if not self.delta_config.general_delta:
1554 if not self.delta_config.general_delta:
1517 self.delta_config.sparse_revlog = False
1555 self.delta_config.sparse_revlog = False
1518
1556
1519 self._storedeltachains = True
1557 self._storedeltachains = True
1520
1558
1521 devel_nodemap = (
1559 devel_nodemap = (
1522 self._nodemap_file
1560 self._nodemap_file
1523 and force_nodemap
1561 and force_nodemap
1524 and parse_index_v1_nodemap is not None
1562 and parse_index_v1_nodemap is not None
1525 )
1563 )
1526
1564
1527 use_rust_index = False
1565 use_rust_index = False
1528 if rustrevlog is not None:
1566 if rustrevlog is not None:
1529 if self._nodemap_file is not None:
1567 if self._nodemap_file is not None:
1530 use_rust_index = True
1568 use_rust_index = True
1531 else:
1569 else:
1532 use_rust_index = self.opener.options.get(b'rust.index')
1570 use_rust_index = self.opener.options.get(b'rust.index')
1533
1571
1534 self._parse_index = parse_index_v1
1572 self._parse_index = parse_index_v1
1535 if self._format_version == REVLOGV0:
1573 if self._format_version == REVLOGV0:
1536 self._parse_index = revlogv0.parse_index_v0
1574 self._parse_index = revlogv0.parse_index_v0
1537 elif self._format_version == REVLOGV2:
1575 elif self._format_version == REVLOGV2:
1538 self._parse_index = parse_index_v2
1576 self._parse_index = parse_index_v2
1539 elif self._format_version == CHANGELOGV2:
1577 elif self._format_version == CHANGELOGV2:
1540 self._parse_index = parse_index_cl_v2
1578 self._parse_index = parse_index_cl_v2
1541 elif devel_nodemap:
1579 elif devel_nodemap:
1542 self._parse_index = parse_index_v1_nodemap
1580 self._parse_index = parse_index_v1_nodemap
1543 elif use_rust_index:
1581 elif use_rust_index:
1544 self._parse_index = parse_index_v1_mixed
1582 self._parse_index = parse_index_v1_mixed
1545 try:
1583 try:
1546 d = self._parse_index(index_data, self._inline)
1584 d = self._parse_index(index_data, self._inline)
1547 index, chunkcache = d
1585 index, chunkcache = d
1548 use_nodemap = (
1586 use_nodemap = (
1549 not self._inline
1587 not self._inline
1550 and self._nodemap_file is not None
1588 and self._nodemap_file is not None
1551 and hasattr(index, 'update_nodemap_data')
1589 and hasattr(index, 'update_nodemap_data')
1552 )
1590 )
1553 if use_nodemap:
1591 if use_nodemap:
1554 nodemap_data = nodemaputil.persisted_data(self)
1592 nodemap_data = nodemaputil.persisted_data(self)
1555 if nodemap_data is not None:
1593 if nodemap_data is not None:
1556 docket = nodemap_data[0]
1594 docket = nodemap_data[0]
1557 if (
1595 if (
1558 len(d[0]) > docket.tip_rev
1596 len(d[0]) > docket.tip_rev
1559 and d[0][docket.tip_rev][7] == docket.tip_node
1597 and d[0][docket.tip_rev][7] == docket.tip_node
1560 ):
1598 ):
1561 # no changelog tampering
1599 # no changelog tampering
1562 self._nodemap_docket = docket
1600 self._nodemap_docket = docket
1563 index.update_nodemap_data(*nodemap_data)
1601 index.update_nodemap_data(*nodemap_data)
1564 except (ValueError, IndexError):
1602 except (ValueError, IndexError):
1565 raise error.RevlogError(
1603 raise error.RevlogError(
1566 _(b"index %s is corrupted") % self.display_id
1604 _(b"index %s is corrupted") % self.display_id
1567 )
1605 )
1568 self.index = index
1606 self.index = index
1569 # revnum -> (chain-length, sum-delta-length)
1607 # revnum -> (chain-length, sum-delta-length)
1570 self._chaininfocache = util.lrucachedict(500)
1608 self._chaininfocache = util.lrucachedict(500)
1571
1609
1572 return chunkcache
1610 return chunkcache
1573
1611
1574 def _load_inner(self, chunk_cache):
1612 def _load_inner(self, chunk_cache):
1575 if self._docket is None:
1613 if self._docket is None:
1576 default_compression_header = None
1614 default_compression_header = None
1577 else:
1615 else:
1578 default_compression_header = self._docket.default_compression_header
1616 default_compression_header = self._docket.default_compression_header
1579
1617
1580 self._inner = _InnerRevlog(
1618 self._inner = _InnerRevlog(
1581 opener=self.opener,
1619 opener=self.opener,
1582 index=self.index,
1620 index=self.index,
1583 index_file=self._indexfile,
1621 index_file=self._indexfile,
1584 data_file=self._datafile,
1622 data_file=self._datafile,
1585 sidedata_file=self._sidedatafile,
1623 sidedata_file=self._sidedatafile,
1586 inline=self._inline,
1624 inline=self._inline,
1587 data_config=self.data_config,
1625 data_config=self.data_config,
1588 delta_config=self.delta_config,
1626 delta_config=self.delta_config,
1589 feature_config=self.feature_config,
1627 feature_config=self.feature_config,
1590 chunk_cache=chunk_cache,
1628 chunk_cache=chunk_cache,
1591 default_compression_header=default_compression_header,
1629 default_compression_header=default_compression_header,
1592 )
1630 )
1593
1631
1594 def get_revlog(self):
1632 def get_revlog(self):
1595 """simple function to mirror API of other not-really-revlog API"""
1633 """simple function to mirror API of other not-really-revlog API"""
1596 return self
1634 return self
1597
1635
1598 @util.propertycache
1636 @util.propertycache
1599 def revlog_kind(self):
1637 def revlog_kind(self):
1600 return self.target[0]
1638 return self.target[0]
1601
1639
1602 @util.propertycache
1640 @util.propertycache
1603 def display_id(self):
1641 def display_id(self):
1604 """The public facing "ID" of the revlog that we use in message"""
1642 """The public facing "ID" of the revlog that we use in message"""
1605 if self.revlog_kind == KIND_FILELOG:
1643 if self.revlog_kind == KIND_FILELOG:
1606 # Reference the file without the "data/" prefix, so it is familiar
1644 # Reference the file without the "data/" prefix, so it is familiar
1607 # to the user.
1645 # to the user.
1608 return self.target[1]
1646 return self.target[1]
1609 else:
1647 else:
1610 return self.radix
1648 return self.radix
1611
1649
1612 def _datafp(self, mode=b'r'):
1650 def _datafp(self, mode=b'r'):
1613 """file object for the revlog's data file"""
1651 """file object for the revlog's data file"""
1614 return self.opener(self._datafile, mode=mode)
1652 return self.opener(self._datafile, mode=mode)
1615
1653
1616 def tiprev(self):
1654 def tiprev(self):
1617 return len(self.index) - 1
1655 return len(self.index) - 1
1618
1656
1619 def tip(self):
1657 def tip(self):
1620 return self.node(self.tiprev())
1658 return self.node(self.tiprev())
1621
1659
1622 def __contains__(self, rev):
1660 def __contains__(self, rev):
1623 return 0 <= rev < len(self)
1661 return 0 <= rev < len(self)
1624
1662
1625 def __len__(self):
1663 def __len__(self):
1626 return len(self.index)
1664 return len(self.index)
1627
1665
1628 def __iter__(self):
1666 def __iter__(self):
1629 return iter(range(len(self)))
1667 return iter(range(len(self)))
1630
1668
1631 def revs(self, start=0, stop=None):
1669 def revs(self, start=0, stop=None):
1632 """iterate over all rev in this revlog (from start to stop)"""
1670 """iterate over all rev in this revlog (from start to stop)"""
1633 return storageutil.iterrevs(len(self), start=start, stop=stop)
1671 return storageutil.iterrevs(len(self), start=start, stop=stop)
1634
1672
1635 def hasnode(self, node):
1673 def hasnode(self, node):
1636 try:
1674 try:
1637 self.rev(node)
1675 self.rev(node)
1638 return True
1676 return True
1639 except KeyError:
1677 except KeyError:
1640 return False
1678 return False
1641
1679
1642 def _candelta(self, baserev, rev):
1680 def _candelta(self, baserev, rev):
1643 """whether two revisions (baserev, rev) can be delta-ed or not"""
1681 """whether two revisions (baserev, rev) can be delta-ed or not"""
1644 # Disable delta if either rev requires a content-changing flag
1682 # Disable delta if either rev requires a content-changing flag
1645 # processor (ex. LFS). This is because such flag processor can alter
1683 # processor (ex. LFS). This is because such flag processor can alter
1646 # the rawtext content that the delta will be based on, and two clients
1684 # the rawtext content that the delta will be based on, and two clients
1647 # could have a same revlog node with different flags (i.e. different
1685 # could have a same revlog node with different flags (i.e. different
1648 # rawtext contents) and the delta could be incompatible.
1686 # rawtext contents) and the delta could be incompatible.
1649 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1687 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1650 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1688 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1651 ):
1689 ):
1652 return False
1690 return False
1653 return True
1691 return True
1654
1692
1655 def update_caches(self, transaction):
1693 def update_caches(self, transaction):
1656 """update on disk cache
1694 """update on disk cache
1657
1695
1658 If a transaction is passed, the update may be delayed to transaction
1696 If a transaction is passed, the update may be delayed to transaction
1659 commit."""
1697 commit."""
1660 if self._nodemap_file is not None:
1698 if self._nodemap_file is not None:
1661 if transaction is None:
1699 if transaction is None:
1662 nodemaputil.update_persistent_nodemap(self)
1700 nodemaputil.update_persistent_nodemap(self)
1663 else:
1701 else:
1664 nodemaputil.setup_persistent_nodemap(transaction, self)
1702 nodemaputil.setup_persistent_nodemap(transaction, self)
1665
1703
1666 def clearcaches(self):
1704 def clearcaches(self):
1667 """Clear in-memory caches"""
1705 """Clear in-memory caches"""
1668 self._inner._revisioncache = None
1706 self._inner._revisioncache = None
1669 self._chainbasecache.clear()
1707 self._chainbasecache.clear()
1670 self._inner._segmentfile.clear_cache()
1708 self._inner._segmentfile.clear_cache()
1671 self._inner._segmentfile_sidedata.clear_cache()
1709 self._inner._segmentfile_sidedata.clear_cache()
1672 self._pcache = {}
1710 self._pcache = {}
1673 self._nodemap_docket = None
1711 self._nodemap_docket = None
1674 self.index.clearcaches()
1712 self.index.clearcaches()
1675 # The python code is the one responsible for validating the docket, we
1713 # The python code is the one responsible for validating the docket, we
1676 # end up having to refresh it here.
1714 # end up having to refresh it here.
1677 use_nodemap = (
1715 use_nodemap = (
1678 not self._inline
1716 not self._inline
1679 and self._nodemap_file is not None
1717 and self._nodemap_file is not None
1680 and hasattr(self.index, 'update_nodemap_data')
1718 and hasattr(self.index, 'update_nodemap_data')
1681 )
1719 )
1682 if use_nodemap:
1720 if use_nodemap:
1683 nodemap_data = nodemaputil.persisted_data(self)
1721 nodemap_data = nodemaputil.persisted_data(self)
1684 if nodemap_data is not None:
1722 if nodemap_data is not None:
1685 self._nodemap_docket = nodemap_data[0]
1723 self._nodemap_docket = nodemap_data[0]
1686 self.index.update_nodemap_data(*nodemap_data)
1724 self.index.update_nodemap_data(*nodemap_data)
1687
1725
1688 def rev(self, node):
1726 def rev(self, node):
1689 """return the revision number associated with a <nodeid>"""
1727 """return the revision number associated with a <nodeid>"""
1690 try:
1728 try:
1691 return self.index.rev(node)
1729 return self.index.rev(node)
1692 except TypeError:
1730 except TypeError:
1693 raise
1731 raise
1694 except error.RevlogError:
1732 except error.RevlogError:
1695 # parsers.c radix tree lookup failed
1733 # parsers.c radix tree lookup failed
1696 if (
1734 if (
1697 node == self.nodeconstants.wdirid
1735 node == self.nodeconstants.wdirid
1698 or node in self.nodeconstants.wdirfilenodeids
1736 or node in self.nodeconstants.wdirfilenodeids
1699 ):
1737 ):
1700 raise error.WdirUnsupported
1738 raise error.WdirUnsupported
1701 raise error.LookupError(node, self.display_id, _(b'no node'))
1739 raise error.LookupError(node, self.display_id, _(b'no node'))
1702
1740
1703 # Accessors for index entries.
1741 # Accessors for index entries.
1704
1742
1705 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1743 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1706 # are flags.
1744 # are flags.
1707 def start(self, rev):
1745 def start(self, rev):
1708 return int(self.index[rev][0] >> 16)
1746 return int(self.index[rev][0] >> 16)
1709
1747
1710 def sidedata_cut_off(self, rev):
1748 def sidedata_cut_off(self, rev):
1711 sd_cut_off = self.index[rev][8]
1749 sd_cut_off = self.index[rev][8]
1712 if sd_cut_off != 0:
1750 if sd_cut_off != 0:
1713 return sd_cut_off
1751 return sd_cut_off
1714 # This is some annoying dance, because entries without sidedata
1752 # This is some annoying dance, because entries without sidedata
1715 # currently use 0 as their ofsset. (instead of previous-offset +
1753 # currently use 0 as their ofsset. (instead of previous-offset +
1716 # previous-size)
1754 # previous-size)
1717 #
1755 #
1718 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1756 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1719 # In the meantime, we need this.
1757 # In the meantime, we need this.
1720 while 0 <= rev:
1758 while 0 <= rev:
1721 e = self.index[rev]
1759 e = self.index[rev]
1722 if e[9] != 0:
1760 if e[9] != 0:
1723 return e[8] + e[9]
1761 return e[8] + e[9]
1724 rev -= 1
1762 rev -= 1
1725 return 0
1763 return 0
1726
1764
1727 def flags(self, rev):
1765 def flags(self, rev):
1728 return self.index[rev][0] & 0xFFFF
1766 return self.index[rev][0] & 0xFFFF
1729
1767
1730 def length(self, rev):
1768 def length(self, rev):
1731 return self.index[rev][1]
1769 return self.index[rev][1]
1732
1770
1733 def sidedata_length(self, rev):
1771 def sidedata_length(self, rev):
1734 if not self.feature_config.has_side_data:
1772 if not self.feature_config.has_side_data:
1735 return 0
1773 return 0
1736 return self.index[rev][9]
1774 return self.index[rev][9]
1737
1775
1738 def rawsize(self, rev):
1776 def rawsize(self, rev):
1739 """return the length of the uncompressed text for a given revision"""
1777 """return the length of the uncompressed text for a given revision"""
1740 l = self.index[rev][2]
1778 l = self.index[rev][2]
1741 if l >= 0:
1779 if l >= 0:
1742 return l
1780 return l
1743
1781
1744 t = self.rawdata(rev)
1782 t = self.rawdata(rev)
1745 return len(t)
1783 return len(t)
1746
1784
1747 def size(self, rev):
1785 def size(self, rev):
1748 """length of non-raw text (processed by a "read" flag processor)"""
1786 """length of non-raw text (processed by a "read" flag processor)"""
1749 # fast path: if no "read" flag processor could change the content,
1787 # fast path: if no "read" flag processor could change the content,
1750 # size is rawsize. note: ELLIPSIS is known to not change the content.
1788 # size is rawsize. note: ELLIPSIS is known to not change the content.
1751 flags = self.flags(rev)
1789 flags = self.flags(rev)
1752 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1790 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1753 return self.rawsize(rev)
1791 return self.rawsize(rev)
1754
1792
1755 return len(self.revision(rev))
1793 return len(self.revision(rev))
1756
1794
1757 def fast_rank(self, rev):
1795 def fast_rank(self, rev):
1758 """Return the rank of a revision if already known, or None otherwise.
1796 """Return the rank of a revision if already known, or None otherwise.
1759
1797
1760 The rank of a revision is the size of the sub-graph it defines as a
1798 The rank of a revision is the size of the sub-graph it defines as a
1761 head. Equivalently, the rank of a revision `r` is the size of the set
1799 head. Equivalently, the rank of a revision `r` is the size of the set
1762 `ancestors(r)`, `r` included.
1800 `ancestors(r)`, `r` included.
1763
1801
1764 This method returns the rank retrieved from the revlog in constant
1802 This method returns the rank retrieved from the revlog in constant
1765 time. It makes no attempt at computing unknown values for versions of
1803 time. It makes no attempt at computing unknown values for versions of
1766 the revlog which do not persist the rank.
1804 the revlog which do not persist the rank.
1767 """
1805 """
1768 rank = self.index[rev][ENTRY_RANK]
1806 rank = self.index[rev][ENTRY_RANK]
1769 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1807 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1770 return None
1808 return None
1771 if rev == nullrev:
1809 if rev == nullrev:
1772 return 0 # convention
1810 return 0 # convention
1773 return rank
1811 return rank
1774
1812
1775 def chainbase(self, rev):
1813 def chainbase(self, rev):
1776 base = self._chainbasecache.get(rev)
1814 base = self._chainbasecache.get(rev)
1777 if base is not None:
1815 if base is not None:
1778 return base
1816 return base
1779
1817
1780 index = self.index
1818 index = self.index
1781 iterrev = rev
1819 iterrev = rev
1782 base = index[iterrev][3]
1820 base = index[iterrev][3]
1783 while base != iterrev:
1821 while base != iterrev:
1784 iterrev = base
1822 iterrev = base
1785 base = index[iterrev][3]
1823 base = index[iterrev][3]
1786
1824
1787 self._chainbasecache[rev] = base
1825 self._chainbasecache[rev] = base
1788 return base
1826 return base
1789
1827
1790 def linkrev(self, rev):
1828 def linkrev(self, rev):
1791 return self.index[rev][4]
1829 return self.index[rev][4]
1792
1830
1793 def parentrevs(self, rev):
1831 def parentrevs(self, rev):
1794 try:
1832 try:
1795 entry = self.index[rev]
1833 entry = self.index[rev]
1796 except IndexError:
1834 except IndexError:
1797 if rev == wdirrev:
1835 if rev == wdirrev:
1798 raise error.WdirUnsupported
1836 raise error.WdirUnsupported
1799 raise
1837 raise
1800
1838
1801 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1839 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1802 return entry[6], entry[5]
1840 return entry[6], entry[5]
1803 else:
1841 else:
1804 return entry[5], entry[6]
1842 return entry[5], entry[6]
1805
1843
1806 # fast parentrevs(rev) where rev isn't filtered
1844 # fast parentrevs(rev) where rev isn't filtered
1807 _uncheckedparentrevs = parentrevs
1845 _uncheckedparentrevs = parentrevs
1808
1846
1809 def node(self, rev):
1847 def node(self, rev):
1810 try:
1848 try:
1811 return self.index[rev][7]
1849 return self.index[rev][7]
1812 except IndexError:
1850 except IndexError:
1813 if rev == wdirrev:
1851 if rev == wdirrev:
1814 raise error.WdirUnsupported
1852 raise error.WdirUnsupported
1815 raise
1853 raise
1816
1854
1817 # Derived from index values.
1855 # Derived from index values.
1818
1856
1819 def end(self, rev):
1857 def end(self, rev):
1820 return self.start(rev) + self.length(rev)
1858 return self.start(rev) + self.length(rev)
1821
1859
1822 def parents(self, node):
1860 def parents(self, node):
1823 i = self.index
1861 i = self.index
1824 d = i[self.rev(node)]
1862 d = i[self.rev(node)]
1825 # inline node() to avoid function call overhead
1863 # inline node() to avoid function call overhead
1826 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1864 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1827 return i[d[6]][7], i[d[5]][7]
1865 return i[d[6]][7], i[d[5]][7]
1828 else:
1866 else:
1829 return i[d[5]][7], i[d[6]][7]
1867 return i[d[5]][7], i[d[6]][7]
1830
1868
1831 def chainlen(self, rev):
1869 def chainlen(self, rev):
1832 return self._chaininfo(rev)[0]
1870 return self._chaininfo(rev)[0]
1833
1871
1834 def _chaininfo(self, rev):
1872 def _chaininfo(self, rev):
1835 chaininfocache = self._chaininfocache
1873 chaininfocache = self._chaininfocache
1836 if rev in chaininfocache:
1874 if rev in chaininfocache:
1837 return chaininfocache[rev]
1875 return chaininfocache[rev]
1838 index = self.index
1876 index = self.index
1839 generaldelta = self.delta_config.general_delta
1877 generaldelta = self.delta_config.general_delta
1840 iterrev = rev
1878 iterrev = rev
1841 e = index[iterrev]
1879 e = index[iterrev]
1842 clen = 0
1880 clen = 0
1843 compresseddeltalen = 0
1881 compresseddeltalen = 0
1844 while iterrev != e[3]:
1882 while iterrev != e[3]:
1845 clen += 1
1883 clen += 1
1846 compresseddeltalen += e[1]
1884 compresseddeltalen += e[1]
1847 if generaldelta:
1885 if generaldelta:
1848 iterrev = e[3]
1886 iterrev = e[3]
1849 else:
1887 else:
1850 iterrev -= 1
1888 iterrev -= 1
1851 if iterrev in chaininfocache:
1889 if iterrev in chaininfocache:
1852 t = chaininfocache[iterrev]
1890 t = chaininfocache[iterrev]
1853 clen += t[0]
1891 clen += t[0]
1854 compresseddeltalen += t[1]
1892 compresseddeltalen += t[1]
1855 break
1893 break
1856 e = index[iterrev]
1894 e = index[iterrev]
1857 else:
1895 else:
1858 # Add text length of base since decompressing that also takes
1896 # Add text length of base since decompressing that also takes
1859 # work. For cache hits the length is already included.
1897 # work. For cache hits the length is already included.
1860 compresseddeltalen += e[1]
1898 compresseddeltalen += e[1]
1861 r = (clen, compresseddeltalen)
1899 r = (clen, compresseddeltalen)
1862 chaininfocache[rev] = r
1900 chaininfocache[rev] = r
1863 return r
1901 return r
1864
1902
1865 def _deltachain(self, rev, stoprev=None):
1903 def _deltachain(self, rev, stoprev=None):
1866 return self._inner._deltachain(rev, stoprev=stoprev)
1904 return self._inner._deltachain(rev, stoprev=stoprev)
1867
1905
1868 def ancestors(self, revs, stoprev=0, inclusive=False):
1906 def ancestors(self, revs, stoprev=0, inclusive=False):
1869 """Generate the ancestors of 'revs' in reverse revision order.
1907 """Generate the ancestors of 'revs' in reverse revision order.
1870 Does not generate revs lower than stoprev.
1908 Does not generate revs lower than stoprev.
1871
1909
1872 See the documentation for ancestor.lazyancestors for more details."""
1910 See the documentation for ancestor.lazyancestors for more details."""
1873
1911
1874 # first, make sure start revisions aren't filtered
1912 # first, make sure start revisions aren't filtered
1875 revs = list(revs)
1913 revs = list(revs)
1876 checkrev = self.node
1914 checkrev = self.node
1877 for r in revs:
1915 for r in revs:
1878 checkrev(r)
1916 checkrev(r)
1879 # and we're sure ancestors aren't filtered as well
1917 # and we're sure ancestors aren't filtered as well
1880
1918
1881 if rustancestor is not None and self.index.rust_ext_compat:
1919 if rustancestor is not None and self.index.rust_ext_compat:
1882 lazyancestors = rustancestor.LazyAncestors
1920 lazyancestors = rustancestor.LazyAncestors
1883 arg = self.index
1921 arg = self.index
1884 else:
1922 else:
1885 lazyancestors = ancestor.lazyancestors
1923 lazyancestors = ancestor.lazyancestors
1886 arg = self._uncheckedparentrevs
1924 arg = self._uncheckedparentrevs
1887 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1925 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1888
1926
1889 def descendants(self, revs):
1927 def descendants(self, revs):
1890 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1928 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1891
1929
1892 def findcommonmissing(self, common=None, heads=None):
1930 def findcommonmissing(self, common=None, heads=None):
1893 """Return a tuple of the ancestors of common and the ancestors of heads
1931 """Return a tuple of the ancestors of common and the ancestors of heads
1894 that are not ancestors of common. In revset terminology, we return the
1932 that are not ancestors of common. In revset terminology, we return the
1895 tuple:
1933 tuple:
1896
1934
1897 ::common, (::heads) - (::common)
1935 ::common, (::heads) - (::common)
1898
1936
1899 The list is sorted by revision number, meaning it is
1937 The list is sorted by revision number, meaning it is
1900 topologically sorted.
1938 topologically sorted.
1901
1939
1902 'heads' and 'common' are both lists of node IDs. If heads is
1940 'heads' and 'common' are both lists of node IDs. If heads is
1903 not supplied, uses all of the revlog's heads. If common is not
1941 not supplied, uses all of the revlog's heads. If common is not
1904 supplied, uses nullid."""
1942 supplied, uses nullid."""
1905 if common is None:
1943 if common is None:
1906 common = [self.nullid]
1944 common = [self.nullid]
1907 if heads is None:
1945 if heads is None:
1908 heads = self.heads()
1946 heads = self.heads()
1909
1947
1910 common = [self.rev(n) for n in common]
1948 common = [self.rev(n) for n in common]
1911 heads = [self.rev(n) for n in heads]
1949 heads = [self.rev(n) for n in heads]
1912
1950
1913 # we want the ancestors, but inclusive
1951 # we want the ancestors, but inclusive
1914 class lazyset:
1952 class lazyset:
1915 def __init__(self, lazyvalues):
1953 def __init__(self, lazyvalues):
1916 self.addedvalues = set()
1954 self.addedvalues = set()
1917 self.lazyvalues = lazyvalues
1955 self.lazyvalues = lazyvalues
1918
1956
1919 def __contains__(self, value):
1957 def __contains__(self, value):
1920 return value in self.addedvalues or value in self.lazyvalues
1958 return value in self.addedvalues or value in self.lazyvalues
1921
1959
1922 def __iter__(self):
1960 def __iter__(self):
1923 added = self.addedvalues
1961 added = self.addedvalues
1924 for r in added:
1962 for r in added:
1925 yield r
1963 yield r
1926 for r in self.lazyvalues:
1964 for r in self.lazyvalues:
1927 if not r in added:
1965 if not r in added:
1928 yield r
1966 yield r
1929
1967
1930 def add(self, value):
1968 def add(self, value):
1931 self.addedvalues.add(value)
1969 self.addedvalues.add(value)
1932
1970
1933 def update(self, values):
1971 def update(self, values):
1934 self.addedvalues.update(values)
1972 self.addedvalues.update(values)
1935
1973
1936 has = lazyset(self.ancestors(common))
1974 has = lazyset(self.ancestors(common))
1937 has.add(nullrev)
1975 has.add(nullrev)
1938 has.update(common)
1976 has.update(common)
1939
1977
1940 # take all ancestors from heads that aren't in has
1978 # take all ancestors from heads that aren't in has
1941 missing = set()
1979 missing = set()
1942 visit = collections.deque(r for r in heads if r not in has)
1980 visit = collections.deque(r for r in heads if r not in has)
1943 while visit:
1981 while visit:
1944 r = visit.popleft()
1982 r = visit.popleft()
1945 if r in missing:
1983 if r in missing:
1946 continue
1984 continue
1947 else:
1985 else:
1948 missing.add(r)
1986 missing.add(r)
1949 for p in self.parentrevs(r):
1987 for p in self.parentrevs(r):
1950 if p not in has:
1988 if p not in has:
1951 visit.append(p)
1989 visit.append(p)
1952 missing = list(missing)
1990 missing = list(missing)
1953 missing.sort()
1991 missing.sort()
1954 return has, [self.node(miss) for miss in missing]
1992 return has, [self.node(miss) for miss in missing]
1955
1993
1956 def incrementalmissingrevs(self, common=None):
1994 def incrementalmissingrevs(self, common=None):
1957 """Return an object that can be used to incrementally compute the
1995 """Return an object that can be used to incrementally compute the
1958 revision numbers of the ancestors of arbitrary sets that are not
1996 revision numbers of the ancestors of arbitrary sets that are not
1959 ancestors of common. This is an ancestor.incrementalmissingancestors
1997 ancestors of common. This is an ancestor.incrementalmissingancestors
1960 object.
1998 object.
1961
1999
1962 'common' is a list of revision numbers. If common is not supplied, uses
2000 'common' is a list of revision numbers. If common is not supplied, uses
1963 nullrev.
2001 nullrev.
1964 """
2002 """
1965 if common is None:
2003 if common is None:
1966 common = [nullrev]
2004 common = [nullrev]
1967
2005
1968 if rustancestor is not None and self.index.rust_ext_compat:
2006 if rustancestor is not None and self.index.rust_ext_compat:
1969 return rustancestor.MissingAncestors(self.index, common)
2007 return rustancestor.MissingAncestors(self.index, common)
1970 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2008 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1971
2009
1972 def findmissingrevs(self, common=None, heads=None):
2010 def findmissingrevs(self, common=None, heads=None):
1973 """Return the revision numbers of the ancestors of heads that
2011 """Return the revision numbers of the ancestors of heads that
1974 are not ancestors of common.
2012 are not ancestors of common.
1975
2013
1976 More specifically, return a list of revision numbers corresponding to
2014 More specifically, return a list of revision numbers corresponding to
1977 nodes N such that every N satisfies the following constraints:
2015 nodes N such that every N satisfies the following constraints:
1978
2016
1979 1. N is an ancestor of some node in 'heads'
2017 1. N is an ancestor of some node in 'heads'
1980 2. N is not an ancestor of any node in 'common'
2018 2. N is not an ancestor of any node in 'common'
1981
2019
1982 The list is sorted by revision number, meaning it is
2020 The list is sorted by revision number, meaning it is
1983 topologically sorted.
2021 topologically sorted.
1984
2022
1985 'heads' and 'common' are both lists of revision numbers. If heads is
2023 'heads' and 'common' are both lists of revision numbers. If heads is
1986 not supplied, uses all of the revlog's heads. If common is not
2024 not supplied, uses all of the revlog's heads. If common is not
1987 supplied, uses nullid."""
2025 supplied, uses nullid."""
1988 if common is None:
2026 if common is None:
1989 common = [nullrev]
2027 common = [nullrev]
1990 if heads is None:
2028 if heads is None:
1991 heads = self.headrevs()
2029 heads = self.headrevs()
1992
2030
1993 inc = self.incrementalmissingrevs(common=common)
2031 inc = self.incrementalmissingrevs(common=common)
1994 return inc.missingancestors(heads)
2032 return inc.missingancestors(heads)
1995
2033
1996 def findmissing(self, common=None, heads=None):
2034 def findmissing(self, common=None, heads=None):
1997 """Return the ancestors of heads that are not ancestors of common.
2035 """Return the ancestors of heads that are not ancestors of common.
1998
2036
1999 More specifically, return a list of nodes N such that every N
2037 More specifically, return a list of nodes N such that every N
2000 satisfies the following constraints:
2038 satisfies the following constraints:
2001
2039
2002 1. N is an ancestor of some node in 'heads'
2040 1. N is an ancestor of some node in 'heads'
2003 2. N is not an ancestor of any node in 'common'
2041 2. N is not an ancestor of any node in 'common'
2004
2042
2005 The list is sorted by revision number, meaning it is
2043 The list is sorted by revision number, meaning it is
2006 topologically sorted.
2044 topologically sorted.
2007
2045
2008 'heads' and 'common' are both lists of node IDs. If heads is
2046 'heads' and 'common' are both lists of node IDs. If heads is
2009 not supplied, uses all of the revlog's heads. If common is not
2047 not supplied, uses all of the revlog's heads. If common is not
2010 supplied, uses nullid."""
2048 supplied, uses nullid."""
2011 if common is None:
2049 if common is None:
2012 common = [self.nullid]
2050 common = [self.nullid]
2013 if heads is None:
2051 if heads is None:
2014 heads = self.heads()
2052 heads = self.heads()
2015
2053
2016 common = [self.rev(n) for n in common]
2054 common = [self.rev(n) for n in common]
2017 heads = [self.rev(n) for n in heads]
2055 heads = [self.rev(n) for n in heads]
2018
2056
2019 inc = self.incrementalmissingrevs(common=common)
2057 inc = self.incrementalmissingrevs(common=common)
2020 return [self.node(r) for r in inc.missingancestors(heads)]
2058 return [self.node(r) for r in inc.missingancestors(heads)]
2021
2059
2022 def nodesbetween(self, roots=None, heads=None):
2060 def nodesbetween(self, roots=None, heads=None):
2023 """Return a topological path from 'roots' to 'heads'.
2061 """Return a topological path from 'roots' to 'heads'.
2024
2062
2025 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2063 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2026 topologically sorted list of all nodes N that satisfy both of
2064 topologically sorted list of all nodes N that satisfy both of
2027 these constraints:
2065 these constraints:
2028
2066
2029 1. N is a descendant of some node in 'roots'
2067 1. N is a descendant of some node in 'roots'
2030 2. N is an ancestor of some node in 'heads'
2068 2. N is an ancestor of some node in 'heads'
2031
2069
2032 Every node is considered to be both a descendant and an ancestor
2070 Every node is considered to be both a descendant and an ancestor
2033 of itself, so every reachable node in 'roots' and 'heads' will be
2071 of itself, so every reachable node in 'roots' and 'heads' will be
2034 included in 'nodes'.
2072 included in 'nodes'.
2035
2073
2036 'outroots' is the list of reachable nodes in 'roots', i.e., the
2074 'outroots' is the list of reachable nodes in 'roots', i.e., the
2037 subset of 'roots' that is returned in 'nodes'. Likewise,
2075 subset of 'roots' that is returned in 'nodes'. Likewise,
2038 'outheads' is the subset of 'heads' that is also in 'nodes'.
2076 'outheads' is the subset of 'heads' that is also in 'nodes'.
2039
2077
2040 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2078 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2041 unspecified, uses nullid as the only root. If 'heads' is
2079 unspecified, uses nullid as the only root. If 'heads' is
2042 unspecified, uses list of all of the revlog's heads."""
2080 unspecified, uses list of all of the revlog's heads."""
2043 nonodes = ([], [], [])
2081 nonodes = ([], [], [])
2044 if roots is not None:
2082 if roots is not None:
2045 roots = list(roots)
2083 roots = list(roots)
2046 if not roots:
2084 if not roots:
2047 return nonodes
2085 return nonodes
2048 lowestrev = min([self.rev(n) for n in roots])
2086 lowestrev = min([self.rev(n) for n in roots])
2049 else:
2087 else:
2050 roots = [self.nullid] # Everybody's a descendant of nullid
2088 roots = [self.nullid] # Everybody's a descendant of nullid
2051 lowestrev = nullrev
2089 lowestrev = nullrev
2052 if (lowestrev == nullrev) and (heads is None):
2090 if (lowestrev == nullrev) and (heads is None):
2053 # We want _all_ the nodes!
2091 # We want _all_ the nodes!
2054 return (
2092 return (
2055 [self.node(r) for r in self],
2093 [self.node(r) for r in self],
2056 [self.nullid],
2094 [self.nullid],
2057 list(self.heads()),
2095 list(self.heads()),
2058 )
2096 )
2059 if heads is None:
2097 if heads is None:
2060 # All nodes are ancestors, so the latest ancestor is the last
2098 # All nodes are ancestors, so the latest ancestor is the last
2061 # node.
2099 # node.
2062 highestrev = len(self) - 1
2100 highestrev = len(self) - 1
2063 # Set ancestors to None to signal that every node is an ancestor.
2101 # Set ancestors to None to signal that every node is an ancestor.
2064 ancestors = None
2102 ancestors = None
2065 # Set heads to an empty dictionary for later discovery of heads
2103 # Set heads to an empty dictionary for later discovery of heads
2066 heads = {}
2104 heads = {}
2067 else:
2105 else:
2068 heads = list(heads)
2106 heads = list(heads)
2069 if not heads:
2107 if not heads:
2070 return nonodes
2108 return nonodes
2071 ancestors = set()
2109 ancestors = set()
2072 # Turn heads into a dictionary so we can remove 'fake' heads.
2110 # Turn heads into a dictionary so we can remove 'fake' heads.
2073 # Also, later we will be using it to filter out the heads we can't
2111 # Also, later we will be using it to filter out the heads we can't
2074 # find from roots.
2112 # find from roots.
2075 heads = dict.fromkeys(heads, False)
2113 heads = dict.fromkeys(heads, False)
2076 # Start at the top and keep marking parents until we're done.
2114 # Start at the top and keep marking parents until we're done.
2077 nodestotag = set(heads)
2115 nodestotag = set(heads)
2078 # Remember where the top was so we can use it as a limit later.
2116 # Remember where the top was so we can use it as a limit later.
2079 highestrev = max([self.rev(n) for n in nodestotag])
2117 highestrev = max([self.rev(n) for n in nodestotag])
2080 while nodestotag:
2118 while nodestotag:
2081 # grab a node to tag
2119 # grab a node to tag
2082 n = nodestotag.pop()
2120 n = nodestotag.pop()
2083 # Never tag nullid
2121 # Never tag nullid
2084 if n == self.nullid:
2122 if n == self.nullid:
2085 continue
2123 continue
2086 # A node's revision number represents its place in a
2124 # A node's revision number represents its place in a
2087 # topologically sorted list of nodes.
2125 # topologically sorted list of nodes.
2088 r = self.rev(n)
2126 r = self.rev(n)
2089 if r >= lowestrev:
2127 if r >= lowestrev:
2090 if n not in ancestors:
2128 if n not in ancestors:
2091 # If we are possibly a descendant of one of the roots
2129 # If we are possibly a descendant of one of the roots
2092 # and we haven't already been marked as an ancestor
2130 # and we haven't already been marked as an ancestor
2093 ancestors.add(n) # Mark as ancestor
2131 ancestors.add(n) # Mark as ancestor
2094 # Add non-nullid parents to list of nodes to tag.
2132 # Add non-nullid parents to list of nodes to tag.
2095 nodestotag.update(
2133 nodestotag.update(
2096 [p for p in self.parents(n) if p != self.nullid]
2134 [p for p in self.parents(n) if p != self.nullid]
2097 )
2135 )
2098 elif n in heads: # We've seen it before, is it a fake head?
2136 elif n in heads: # We've seen it before, is it a fake head?
2099 # So it is, real heads should not be the ancestors of
2137 # So it is, real heads should not be the ancestors of
2100 # any other heads.
2138 # any other heads.
2101 heads.pop(n)
2139 heads.pop(n)
2102 if not ancestors:
2140 if not ancestors:
2103 return nonodes
2141 return nonodes
2104 # Now that we have our set of ancestors, we want to remove any
2142 # Now that we have our set of ancestors, we want to remove any
2105 # roots that are not ancestors.
2143 # roots that are not ancestors.
2106
2144
2107 # If one of the roots was nullid, everything is included anyway.
2145 # If one of the roots was nullid, everything is included anyway.
2108 if lowestrev > nullrev:
2146 if lowestrev > nullrev:
2109 # But, since we weren't, let's recompute the lowest rev to not
2147 # But, since we weren't, let's recompute the lowest rev to not
2110 # include roots that aren't ancestors.
2148 # include roots that aren't ancestors.
2111
2149
2112 # Filter out roots that aren't ancestors of heads
2150 # Filter out roots that aren't ancestors of heads
2113 roots = [root for root in roots if root in ancestors]
2151 roots = [root for root in roots if root in ancestors]
2114 # Recompute the lowest revision
2152 # Recompute the lowest revision
2115 if roots:
2153 if roots:
2116 lowestrev = min([self.rev(root) for root in roots])
2154 lowestrev = min([self.rev(root) for root in roots])
2117 else:
2155 else:
2118 # No more roots? Return empty list
2156 # No more roots? Return empty list
2119 return nonodes
2157 return nonodes
2120 else:
2158 else:
2121 # We are descending from nullid, and don't need to care about
2159 # We are descending from nullid, and don't need to care about
2122 # any other roots.
2160 # any other roots.
2123 lowestrev = nullrev
2161 lowestrev = nullrev
2124 roots = [self.nullid]
2162 roots = [self.nullid]
2125 # Transform our roots list into a set.
2163 # Transform our roots list into a set.
2126 descendants = set(roots)
2164 descendants = set(roots)
2127 # Also, keep the original roots so we can filter out roots that aren't
2165 # Also, keep the original roots so we can filter out roots that aren't
2128 # 'real' roots (i.e. are descended from other roots).
2166 # 'real' roots (i.e. are descended from other roots).
2129 roots = descendants.copy()
2167 roots = descendants.copy()
2130 # Our topologically sorted list of output nodes.
2168 # Our topologically sorted list of output nodes.
2131 orderedout = []
2169 orderedout = []
2132 # Don't start at nullid since we don't want nullid in our output list,
2170 # Don't start at nullid since we don't want nullid in our output list,
2133 # and if nullid shows up in descendants, empty parents will look like
2171 # and if nullid shows up in descendants, empty parents will look like
2134 # they're descendants.
2172 # they're descendants.
2135 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2173 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2136 n = self.node(r)
2174 n = self.node(r)
2137 isdescendant = False
2175 isdescendant = False
2138 if lowestrev == nullrev: # Everybody is a descendant of nullid
2176 if lowestrev == nullrev: # Everybody is a descendant of nullid
2139 isdescendant = True
2177 isdescendant = True
2140 elif n in descendants:
2178 elif n in descendants:
2141 # n is already a descendant
2179 # n is already a descendant
2142 isdescendant = True
2180 isdescendant = True
2143 # This check only needs to be done here because all the roots
2181 # This check only needs to be done here because all the roots
2144 # will start being marked is descendants before the loop.
2182 # will start being marked is descendants before the loop.
2145 if n in roots:
2183 if n in roots:
2146 # If n was a root, check if it's a 'real' root.
2184 # If n was a root, check if it's a 'real' root.
2147 p = tuple(self.parents(n))
2185 p = tuple(self.parents(n))
2148 # If any of its parents are descendants, it's not a root.
2186 # If any of its parents are descendants, it's not a root.
2149 if (p[0] in descendants) or (p[1] in descendants):
2187 if (p[0] in descendants) or (p[1] in descendants):
2150 roots.remove(n)
2188 roots.remove(n)
2151 else:
2189 else:
2152 p = tuple(self.parents(n))
2190 p = tuple(self.parents(n))
2153 # A node is a descendant if either of its parents are
2191 # A node is a descendant if either of its parents are
2154 # descendants. (We seeded the dependents list with the roots
2192 # descendants. (We seeded the dependents list with the roots
2155 # up there, remember?)
2193 # up there, remember?)
2156 if (p[0] in descendants) or (p[1] in descendants):
2194 if (p[0] in descendants) or (p[1] in descendants):
2157 descendants.add(n)
2195 descendants.add(n)
2158 isdescendant = True
2196 isdescendant = True
2159 if isdescendant and ((ancestors is None) or (n in ancestors)):
2197 if isdescendant and ((ancestors is None) or (n in ancestors)):
2160 # Only include nodes that are both descendants and ancestors.
2198 # Only include nodes that are both descendants and ancestors.
2161 orderedout.append(n)
2199 orderedout.append(n)
2162 if (ancestors is not None) and (n in heads):
2200 if (ancestors is not None) and (n in heads):
2163 # We're trying to figure out which heads are reachable
2201 # We're trying to figure out which heads are reachable
2164 # from roots.
2202 # from roots.
2165 # Mark this head as having been reached
2203 # Mark this head as having been reached
2166 heads[n] = True
2204 heads[n] = True
2167 elif ancestors is None:
2205 elif ancestors is None:
2168 # Otherwise, we're trying to discover the heads.
2206 # Otherwise, we're trying to discover the heads.
2169 # Assume this is a head because if it isn't, the next step
2207 # Assume this is a head because if it isn't, the next step
2170 # will eventually remove it.
2208 # will eventually remove it.
2171 heads[n] = True
2209 heads[n] = True
2172 # But, obviously its parents aren't.
2210 # But, obviously its parents aren't.
2173 for p in self.parents(n):
2211 for p in self.parents(n):
2174 heads.pop(p, None)
2212 heads.pop(p, None)
2175 heads = [head for head, flag in heads.items() if flag]
2213 heads = [head for head, flag in heads.items() if flag]
2176 roots = list(roots)
2214 roots = list(roots)
2177 assert orderedout
2215 assert orderedout
2178 assert roots
2216 assert roots
2179 assert heads
2217 assert heads
2180 return (orderedout, roots, heads)
2218 return (orderedout, roots, heads)
2181
2219
2182 def headrevs(self, revs=None):
2220 def headrevs(self, revs=None):
2183 if revs is None:
2221 if revs is None:
2184 try:
2222 try:
2185 return self.index.headrevs()
2223 return self.index.headrevs()
2186 except AttributeError:
2224 except AttributeError:
2187 return self._headrevs()
2225 return self._headrevs()
2188 if rustdagop is not None and self.index.rust_ext_compat:
2226 if rustdagop is not None and self.index.rust_ext_compat:
2189 return rustdagop.headrevs(self.index, revs)
2227 return rustdagop.headrevs(self.index, revs)
2190 return dagop.headrevs(revs, self._uncheckedparentrevs)
2228 return dagop.headrevs(revs, self._uncheckedparentrevs)
2191
2229
2192 def computephases(self, roots):
2230 def computephases(self, roots):
2193 return self.index.computephasesmapsets(roots)
2231 return self.index.computephasesmapsets(roots)
2194
2232
2195 def _headrevs(self):
2233 def _headrevs(self):
2196 count = len(self)
2234 count = len(self)
2197 if not count:
2235 if not count:
2198 return [nullrev]
2236 return [nullrev]
2199 # we won't iter over filtered rev so nobody is a head at start
2237 # we won't iter over filtered rev so nobody is a head at start
2200 ishead = [0] * (count + 1)
2238 ishead = [0] * (count + 1)
2201 index = self.index
2239 index = self.index
2202 for r in self:
2240 for r in self:
2203 ishead[r] = 1 # I may be an head
2241 ishead[r] = 1 # I may be an head
2204 e = index[r]
2242 e = index[r]
2205 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2243 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2206 return [r for r, val in enumerate(ishead) if val]
2244 return [r for r, val in enumerate(ishead) if val]
2207
2245
2208 def heads(self, start=None, stop=None):
2246 def heads(self, start=None, stop=None):
2209 """return the list of all nodes that have no children
2247 """return the list of all nodes that have no children
2210
2248
2211 if start is specified, only heads that are descendants of
2249 if start is specified, only heads that are descendants of
2212 start will be returned
2250 start will be returned
2213 if stop is specified, it will consider all the revs from stop
2251 if stop is specified, it will consider all the revs from stop
2214 as if they had no children
2252 as if they had no children
2215 """
2253 """
2216 if start is None and stop is None:
2254 if start is None and stop is None:
2217 if not len(self):
2255 if not len(self):
2218 return [self.nullid]
2256 return [self.nullid]
2219 return [self.node(r) for r in self.headrevs()]
2257 return [self.node(r) for r in self.headrevs()]
2220
2258
2221 if start is None:
2259 if start is None:
2222 start = nullrev
2260 start = nullrev
2223 else:
2261 else:
2224 start = self.rev(start)
2262 start = self.rev(start)
2225
2263
2226 stoprevs = {self.rev(n) for n in stop or []}
2264 stoprevs = {self.rev(n) for n in stop or []}
2227
2265
2228 revs = dagop.headrevssubset(
2266 revs = dagop.headrevssubset(
2229 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2267 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2230 )
2268 )
2231
2269
2232 return [self.node(rev) for rev in revs]
2270 return [self.node(rev) for rev in revs]
2233
2271
2234 def children(self, node):
2272 def children(self, node):
2235 """find the children of a given node"""
2273 """find the children of a given node"""
2236 c = []
2274 c = []
2237 p = self.rev(node)
2275 p = self.rev(node)
2238 for r in self.revs(start=p + 1):
2276 for r in self.revs(start=p + 1):
2239 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2277 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2240 if prevs:
2278 if prevs:
2241 for pr in prevs:
2279 for pr in prevs:
2242 if pr == p:
2280 if pr == p:
2243 c.append(self.node(r))
2281 c.append(self.node(r))
2244 elif p == nullrev:
2282 elif p == nullrev:
2245 c.append(self.node(r))
2283 c.append(self.node(r))
2246 return c
2284 return c
2247
2285
2248 def commonancestorsheads(self, a, b):
2286 def commonancestorsheads(self, a, b):
2249 """calculate all the heads of the common ancestors of nodes a and b"""
2287 """calculate all the heads of the common ancestors of nodes a and b"""
2250 a, b = self.rev(a), self.rev(b)
2288 a, b = self.rev(a), self.rev(b)
2251 ancs = self._commonancestorsheads(a, b)
2289 ancs = self._commonancestorsheads(a, b)
2252 return pycompat.maplist(self.node, ancs)
2290 return pycompat.maplist(self.node, ancs)
2253
2291
2254 def _commonancestorsheads(self, *revs):
2292 def _commonancestorsheads(self, *revs):
2255 """calculate all the heads of the common ancestors of revs"""
2293 """calculate all the heads of the common ancestors of revs"""
2256 try:
2294 try:
2257 ancs = self.index.commonancestorsheads(*revs)
2295 ancs = self.index.commonancestorsheads(*revs)
2258 except (AttributeError, OverflowError): # C implementation failed
2296 except (AttributeError, OverflowError): # C implementation failed
2259 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2297 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2260 return ancs
2298 return ancs
2261
2299
2262 def isancestor(self, a, b):
2300 def isancestor(self, a, b):
2263 """return True if node a is an ancestor of node b
2301 """return True if node a is an ancestor of node b
2264
2302
2265 A revision is considered an ancestor of itself."""
2303 A revision is considered an ancestor of itself."""
2266 a, b = self.rev(a), self.rev(b)
2304 a, b = self.rev(a), self.rev(b)
2267 return self.isancestorrev(a, b)
2305 return self.isancestorrev(a, b)
2268
2306
2269 def isancestorrev(self, a, b):
2307 def isancestorrev(self, a, b):
2270 """return True if revision a is an ancestor of revision b
2308 """return True if revision a is an ancestor of revision b
2271
2309
2272 A revision is considered an ancestor of itself.
2310 A revision is considered an ancestor of itself.
2273
2311
2274 The implementation of this is trivial but the use of
2312 The implementation of this is trivial but the use of
2275 reachableroots is not."""
2313 reachableroots is not."""
2276 if a == nullrev:
2314 if a == nullrev:
2277 return True
2315 return True
2278 elif a == b:
2316 elif a == b:
2279 return True
2317 return True
2280 elif a > b:
2318 elif a > b:
2281 return False
2319 return False
2282 return bool(self.reachableroots(a, [b], [a], includepath=False))
2320 return bool(self.reachableroots(a, [b], [a], includepath=False))
2283
2321
2284 def reachableroots(self, minroot, heads, roots, includepath=False):
2322 def reachableroots(self, minroot, heads, roots, includepath=False):
2285 """return (heads(::(<roots> and <roots>::<heads>)))
2323 """return (heads(::(<roots> and <roots>::<heads>)))
2286
2324
2287 If includepath is True, return (<roots>::<heads>)."""
2325 If includepath is True, return (<roots>::<heads>)."""
2288 try:
2326 try:
2289 return self.index.reachableroots2(
2327 return self.index.reachableroots2(
2290 minroot, heads, roots, includepath
2328 minroot, heads, roots, includepath
2291 )
2329 )
2292 except AttributeError:
2330 except AttributeError:
2293 return dagop._reachablerootspure(
2331 return dagop._reachablerootspure(
2294 self.parentrevs, minroot, roots, heads, includepath
2332 self.parentrevs, minroot, roots, heads, includepath
2295 )
2333 )
2296
2334
2297 def ancestor(self, a, b):
2335 def ancestor(self, a, b):
2298 """calculate the "best" common ancestor of nodes a and b"""
2336 """calculate the "best" common ancestor of nodes a and b"""
2299
2337
2300 a, b = self.rev(a), self.rev(b)
2338 a, b = self.rev(a), self.rev(b)
2301 try:
2339 try:
2302 ancs = self.index.ancestors(a, b)
2340 ancs = self.index.ancestors(a, b)
2303 except (AttributeError, OverflowError):
2341 except (AttributeError, OverflowError):
2304 ancs = ancestor.ancestors(self.parentrevs, a, b)
2342 ancs = ancestor.ancestors(self.parentrevs, a, b)
2305 if ancs:
2343 if ancs:
2306 # choose a consistent winner when there's a tie
2344 # choose a consistent winner when there's a tie
2307 return min(map(self.node, ancs))
2345 return min(map(self.node, ancs))
2308 return self.nullid
2346 return self.nullid
2309
2347
2310 def _match(self, id):
2348 def _match(self, id):
2311 if isinstance(id, int):
2349 if isinstance(id, int):
2312 # rev
2350 # rev
2313 return self.node(id)
2351 return self.node(id)
2314 if len(id) == self.nodeconstants.nodelen:
2352 if len(id) == self.nodeconstants.nodelen:
2315 # possibly a binary node
2353 # possibly a binary node
2316 # odds of a binary node being all hex in ASCII are 1 in 10**25
2354 # odds of a binary node being all hex in ASCII are 1 in 10**25
2317 try:
2355 try:
2318 node = id
2356 node = id
2319 self.rev(node) # quick search the index
2357 self.rev(node) # quick search the index
2320 return node
2358 return node
2321 except error.LookupError:
2359 except error.LookupError:
2322 pass # may be partial hex id
2360 pass # may be partial hex id
2323 try:
2361 try:
2324 # str(rev)
2362 # str(rev)
2325 rev = int(id)
2363 rev = int(id)
2326 if b"%d" % rev != id:
2364 if b"%d" % rev != id:
2327 raise ValueError
2365 raise ValueError
2328 if rev < 0:
2366 if rev < 0:
2329 rev = len(self) + rev
2367 rev = len(self) + rev
2330 if rev < 0 or rev >= len(self):
2368 if rev < 0 or rev >= len(self):
2331 raise ValueError
2369 raise ValueError
2332 return self.node(rev)
2370 return self.node(rev)
2333 except (ValueError, OverflowError):
2371 except (ValueError, OverflowError):
2334 pass
2372 pass
2335 if len(id) == 2 * self.nodeconstants.nodelen:
2373 if len(id) == 2 * self.nodeconstants.nodelen:
2336 try:
2374 try:
2337 # a full hex nodeid?
2375 # a full hex nodeid?
2338 node = bin(id)
2376 node = bin(id)
2339 self.rev(node)
2377 self.rev(node)
2340 return node
2378 return node
2341 except (binascii.Error, error.LookupError):
2379 except (binascii.Error, error.LookupError):
2342 pass
2380 pass
2343
2381
2344 def _partialmatch(self, id):
2382 def _partialmatch(self, id):
2345 # we don't care wdirfilenodeids as they should be always full hash
2383 # we don't care wdirfilenodeids as they should be always full hash
2346 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2384 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2347 ambiguous = False
2385 ambiguous = False
2348 try:
2386 try:
2349 partial = self.index.partialmatch(id)
2387 partial = self.index.partialmatch(id)
2350 if partial and self.hasnode(partial):
2388 if partial and self.hasnode(partial):
2351 if maybewdir:
2389 if maybewdir:
2352 # single 'ff...' match in radix tree, ambiguous with wdir
2390 # single 'ff...' match in radix tree, ambiguous with wdir
2353 ambiguous = True
2391 ambiguous = True
2354 else:
2392 else:
2355 return partial
2393 return partial
2356 elif maybewdir:
2394 elif maybewdir:
2357 # no 'ff...' match in radix tree, wdir identified
2395 # no 'ff...' match in radix tree, wdir identified
2358 raise error.WdirUnsupported
2396 raise error.WdirUnsupported
2359 else:
2397 else:
2360 return None
2398 return None
2361 except error.RevlogError:
2399 except error.RevlogError:
2362 # parsers.c radix tree lookup gave multiple matches
2400 # parsers.c radix tree lookup gave multiple matches
2363 # fast path: for unfiltered changelog, radix tree is accurate
2401 # fast path: for unfiltered changelog, radix tree is accurate
2364 if not getattr(self, 'filteredrevs', None):
2402 if not getattr(self, 'filteredrevs', None):
2365 ambiguous = True
2403 ambiguous = True
2366 # fall through to slow path that filters hidden revisions
2404 # fall through to slow path that filters hidden revisions
2367 except (AttributeError, ValueError):
2405 except (AttributeError, ValueError):
2368 # we are pure python, or key is not hex
2406 # we are pure python, or key is not hex
2369 pass
2407 pass
2370 if ambiguous:
2408 if ambiguous:
2371 raise error.AmbiguousPrefixLookupError(
2409 raise error.AmbiguousPrefixLookupError(
2372 id, self.display_id, _(b'ambiguous identifier')
2410 id, self.display_id, _(b'ambiguous identifier')
2373 )
2411 )
2374
2412
2375 if id in self._pcache:
2413 if id in self._pcache:
2376 return self._pcache[id]
2414 return self._pcache[id]
2377
2415
2378 if len(id) <= 40:
2416 if len(id) <= 40:
2379 # hex(node)[:...]
2417 # hex(node)[:...]
2380 l = len(id) // 2 * 2 # grab an even number of digits
2418 l = len(id) // 2 * 2 # grab an even number of digits
2381 try:
2419 try:
2382 # we're dropping the last digit, so let's check that it's hex,
2420 # we're dropping the last digit, so let's check that it's hex,
2383 # to avoid the expensive computation below if it's not
2421 # to avoid the expensive computation below if it's not
2384 if len(id) % 2 > 0:
2422 if len(id) % 2 > 0:
2385 if not (id[-1] in hexdigits):
2423 if not (id[-1] in hexdigits):
2386 return None
2424 return None
2387 prefix = bin(id[:l])
2425 prefix = bin(id[:l])
2388 except binascii.Error:
2426 except binascii.Error:
2389 pass
2427 pass
2390 else:
2428 else:
2391 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2429 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2392 nl = [
2430 nl = [
2393 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2431 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2394 ]
2432 ]
2395 if self.nodeconstants.nullhex.startswith(id):
2433 if self.nodeconstants.nullhex.startswith(id):
2396 nl.append(self.nullid)
2434 nl.append(self.nullid)
2397 if len(nl) > 0:
2435 if len(nl) > 0:
2398 if len(nl) == 1 and not maybewdir:
2436 if len(nl) == 1 and not maybewdir:
2399 self._pcache[id] = nl[0]
2437 self._pcache[id] = nl[0]
2400 return nl[0]
2438 return nl[0]
2401 raise error.AmbiguousPrefixLookupError(
2439 raise error.AmbiguousPrefixLookupError(
2402 id, self.display_id, _(b'ambiguous identifier')
2440 id, self.display_id, _(b'ambiguous identifier')
2403 )
2441 )
2404 if maybewdir:
2442 if maybewdir:
2405 raise error.WdirUnsupported
2443 raise error.WdirUnsupported
2406 return None
2444 return None
2407
2445
2408 def lookup(self, id):
2446 def lookup(self, id):
2409 """locate a node based on:
2447 """locate a node based on:
2410 - revision number or str(revision number)
2448 - revision number or str(revision number)
2411 - nodeid or subset of hex nodeid
2449 - nodeid or subset of hex nodeid
2412 """
2450 """
2413 n = self._match(id)
2451 n = self._match(id)
2414 if n is not None:
2452 if n is not None:
2415 return n
2453 return n
2416 n = self._partialmatch(id)
2454 n = self._partialmatch(id)
2417 if n:
2455 if n:
2418 return n
2456 return n
2419
2457
2420 raise error.LookupError(id, self.display_id, _(b'no match found'))
2458 raise error.LookupError(id, self.display_id, _(b'no match found'))
2421
2459
2422 def shortest(self, node, minlength=1):
2460 def shortest(self, node, minlength=1):
2423 """Find the shortest unambiguous prefix that matches node."""
2461 """Find the shortest unambiguous prefix that matches node."""
2424
2462
2425 def isvalid(prefix):
2463 def isvalid(prefix):
2426 try:
2464 try:
2427 matchednode = self._partialmatch(prefix)
2465 matchednode = self._partialmatch(prefix)
2428 except error.AmbiguousPrefixLookupError:
2466 except error.AmbiguousPrefixLookupError:
2429 return False
2467 return False
2430 except error.WdirUnsupported:
2468 except error.WdirUnsupported:
2431 # single 'ff...' match
2469 # single 'ff...' match
2432 return True
2470 return True
2433 if matchednode is None:
2471 if matchednode is None:
2434 raise error.LookupError(node, self.display_id, _(b'no node'))
2472 raise error.LookupError(node, self.display_id, _(b'no node'))
2435 return True
2473 return True
2436
2474
2437 def maybewdir(prefix):
2475 def maybewdir(prefix):
2438 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2476 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2439
2477
2440 hexnode = hex(node)
2478 hexnode = hex(node)
2441
2479
2442 def disambiguate(hexnode, minlength):
2480 def disambiguate(hexnode, minlength):
2443 """Disambiguate against wdirid."""
2481 """Disambiguate against wdirid."""
2444 for length in range(minlength, len(hexnode) + 1):
2482 for length in range(minlength, len(hexnode) + 1):
2445 prefix = hexnode[:length]
2483 prefix = hexnode[:length]
2446 if not maybewdir(prefix):
2484 if not maybewdir(prefix):
2447 return prefix
2485 return prefix
2448
2486
2449 if not getattr(self, 'filteredrevs', None):
2487 if not getattr(self, 'filteredrevs', None):
2450 try:
2488 try:
2451 length = max(self.index.shortest(node), minlength)
2489 length = max(self.index.shortest(node), minlength)
2452 return disambiguate(hexnode, length)
2490 return disambiguate(hexnode, length)
2453 except error.RevlogError:
2491 except error.RevlogError:
2454 if node != self.nodeconstants.wdirid:
2492 if node != self.nodeconstants.wdirid:
2455 raise error.LookupError(
2493 raise error.LookupError(
2456 node, self.display_id, _(b'no node')
2494 node, self.display_id, _(b'no node')
2457 )
2495 )
2458 except AttributeError:
2496 except AttributeError:
2459 # Fall through to pure code
2497 # Fall through to pure code
2460 pass
2498 pass
2461
2499
2462 if node == self.nodeconstants.wdirid:
2500 if node == self.nodeconstants.wdirid:
2463 for length in range(minlength, len(hexnode) + 1):
2501 for length in range(minlength, len(hexnode) + 1):
2464 prefix = hexnode[:length]
2502 prefix = hexnode[:length]
2465 if isvalid(prefix):
2503 if isvalid(prefix):
2466 return prefix
2504 return prefix
2467
2505
2468 for length in range(minlength, len(hexnode) + 1):
2506 for length in range(minlength, len(hexnode) + 1):
2469 prefix = hexnode[:length]
2507 prefix = hexnode[:length]
2470 if isvalid(prefix):
2508 if isvalid(prefix):
2471 return disambiguate(hexnode, length)
2509 return disambiguate(hexnode, length)
2472
2510
2473 def cmp(self, node, text):
2511 def cmp(self, node, text):
2474 """compare text with a given file revision
2512 """compare text with a given file revision
2475
2513
2476 returns True if text is different than what is stored.
2514 returns True if text is different than what is stored.
2477 """
2515 """
2478 p1, p2 = self.parents(node)
2516 p1, p2 = self.parents(node)
2479 return storageutil.hashrevisionsha1(text, p1, p2) != node
2517 return storageutil.hashrevisionsha1(text, p1, p2) != node
2480
2518
2481 def deltaparent(self, rev):
2519 def deltaparent(self, rev):
2482 """return deltaparent of the given revision"""
2520 """return deltaparent of the given revision"""
2483 base = self.index[rev][3]
2521 base = self.index[rev][3]
2484 if base == rev:
2522 if base == rev:
2485 return nullrev
2523 return nullrev
2486 elif self.delta_config.general_delta:
2524 elif self.delta_config.general_delta:
2487 return base
2525 return base
2488 else:
2526 else:
2489 return rev - 1
2527 return rev - 1
2490
2528
2491 def issnapshot(self, rev):
2529 def issnapshot(self, rev):
2492 """tells whether rev is a snapshot"""
2530 """tells whether rev is a snapshot"""
2493 ret = self._inner.issnapshot(rev)
2531 ret = self._inner.issnapshot(rev)
2494 self.issnapshot = self._inner.issnapshot
2532 self.issnapshot = self._inner.issnapshot
2495 return ret
2533 return ret
2496
2534
2497 def snapshotdepth(self, rev):
2535 def snapshotdepth(self, rev):
2498 """number of snapshot in the chain before this one"""
2536 """number of snapshot in the chain before this one"""
2499 if not self.issnapshot(rev):
2537 if not self.issnapshot(rev):
2500 raise error.ProgrammingError(b'revision %d not a snapshot')
2538 raise error.ProgrammingError(b'revision %d not a snapshot')
2501 return len(self._inner._deltachain(rev)[0]) - 1
2539 return len(self._inner._deltachain(rev)[0]) - 1
2502
2540
2503 def revdiff(self, rev1, rev2):
2541 def revdiff(self, rev1, rev2):
2504 """return or calculate a delta between two revisions
2542 """return or calculate a delta between two revisions
2505
2543
2506 The delta calculated is in binary form and is intended to be written to
2544 The delta calculated is in binary form and is intended to be written to
2507 revlog data directly. So this function needs raw revision data.
2545 revlog data directly. So this function needs raw revision data.
2508 """
2546 """
2509 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2547 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2510 return bytes(self._inner._chunk(rev2))
2548 return bytes(self._inner._chunk(rev2))
2511
2549
2512 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2550 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2513
2551
2514 def revision(self, nodeorrev):
2552 def revision(self, nodeorrev):
2515 """return an uncompressed revision of a given node or revision
2553 """return an uncompressed revision of a given node or revision
2516 number.
2554 number.
2517 """
2555 """
2518 return self._revisiondata(nodeorrev)
2556 return self._revisiondata(nodeorrev)
2519
2557
2520 def sidedata(self, nodeorrev):
2558 def sidedata(self, nodeorrev):
2521 """a map of extra data related to the changeset but not part of the hash
2559 """a map of extra data related to the changeset but not part of the hash
2522
2560
2523 This function currently return a dictionary. However, more advanced
2561 This function currently return a dictionary. However, more advanced
2524 mapping object will likely be used in the future for a more
2562 mapping object will likely be used in the future for a more
2525 efficient/lazy code.
2563 efficient/lazy code.
2526 """
2564 """
2527 # deal with <nodeorrev> argument type
2565 # deal with <nodeorrev> argument type
2528 if isinstance(nodeorrev, int):
2566 if isinstance(nodeorrev, int):
2529 rev = nodeorrev
2567 rev = nodeorrev
2530 else:
2568 else:
2531 rev = self.rev(nodeorrev)
2569 rev = self.rev(nodeorrev)
2532 return self._sidedata(rev)
2570 return self._sidedata(rev)
2533
2571
2572 def _rawtext(self, node, rev):
2573 """return the possibly unvalidated rawtext for a revision
2574
2575 returns (rev, rawtext, validated)
2576 """
2577 # Check if we have the entry in cache
2578 # The cache entry looks like (node, rev, rawtext)
2579 if self._inner._revisioncache:
2580 if self._inner._revisioncache[0] == node:
2581 return (rev, self._inner._revisioncache[2], True)
2582
2583 if rev is None:
2584 rev = self.rev(node)
2585
2586 return self._inner.raw_text(node, rev)
2587
2534 def _revisiondata(self, nodeorrev, raw=False):
2588 def _revisiondata(self, nodeorrev, raw=False):
2535 # deal with <nodeorrev> argument type
2589 # deal with <nodeorrev> argument type
2536 if isinstance(nodeorrev, int):
2590 if isinstance(nodeorrev, int):
2537 rev = nodeorrev
2591 rev = nodeorrev
2538 node = self.node(rev)
2592 node = self.node(rev)
2539 else:
2593 else:
2540 node = nodeorrev
2594 node = nodeorrev
2541 rev = None
2595 rev = None
2542
2596
2543 # fast path the special `nullid` rev
2597 # fast path the special `nullid` rev
2544 if node == self.nullid:
2598 if node == self.nullid:
2545 return b""
2599 return b""
2546
2600
2547 # ``rawtext`` is the text as stored inside the revlog. Might be the
2601 # ``rawtext`` is the text as stored inside the revlog. Might be the
2548 # revision or might need to be processed to retrieve the revision.
2602 # revision or might need to be processed to retrieve the revision.
2549 rev, rawtext, validated = self._rawtext(node, rev)
2603 rev, rawtext, validated = self._rawtext(node, rev)
2550
2604
2551 if raw and validated:
2605 if raw and validated:
2552 # if we don't want to process the raw text and that raw
2606 # if we don't want to process the raw text and that raw
2553 # text is cached, we can exit early.
2607 # text is cached, we can exit early.
2554 return rawtext
2608 return rawtext
2555 if rev is None:
2609 if rev is None:
2556 rev = self.rev(node)
2610 rev = self.rev(node)
2557 # the revlog's flag for this revision
2611 # the revlog's flag for this revision
2558 # (usually alter its state or content)
2612 # (usually alter its state or content)
2559 flags = self.flags(rev)
2613 flags = self.flags(rev)
2560
2614
2561 if validated and flags == REVIDX_DEFAULT_FLAGS:
2615 if validated and flags == REVIDX_DEFAULT_FLAGS:
2562 # no extra flags set, no flag processor runs, text = rawtext
2616 # no extra flags set, no flag processor runs, text = rawtext
2563 return rawtext
2617 return rawtext
2564
2618
2565 if raw:
2619 if raw:
2566 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2620 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2567 text = rawtext
2621 text = rawtext
2568 else:
2622 else:
2569 r = flagutil.processflagsread(self, rawtext, flags)
2623 r = flagutil.processflagsread(self, rawtext, flags)
2570 text, validatehash = r
2624 text, validatehash = r
2571 if validatehash:
2625 if validatehash:
2572 self.checkhash(text, node, rev=rev)
2626 self.checkhash(text, node, rev=rev)
2573 if not validated:
2627 if not validated:
2574 self._inner._revisioncache = (node, rev, rawtext)
2628 self._inner._revisioncache = (node, rev, rawtext)
2575
2629
2576 return text
2630 return text
2577
2631
2578 def _rawtext(self, node, rev):
2579 """return the possibly unvalidated rawtext for a revision
2580
2581 returns (rev, rawtext, validated)
2582 """
2583
2584 # revision in the cache (could be useful to apply delta)
2585 cachedrev = None
2586 # An intermediate text to apply deltas to
2587 basetext = None
2588
2589 # Check if we have the entry in cache
2590 # The cache entry looks like (node, rev, rawtext)
2591 if self._inner._revisioncache:
2592 if self._inner._revisioncache[0] == node:
2593 return (rev, self._inner._revisioncache[2], True)
2594 cachedrev = self._inner._revisioncache[1]
2595
2596 if rev is None:
2597 rev = self.rev(node)
2598
2599 chain, stopped = self._inner._deltachain(rev, stoprev=cachedrev)
2600 if stopped:
2601 basetext = self._inner._revisioncache[2]
2602
2603 # drop cache to save memory, the caller is expected to
2604 # update self._inner._revisioncache after validating the text
2605 self._inner._revisioncache = None
2606
2607 targetsize = None
2608 rawsize = self.index[rev][2]
2609 if 0 <= rawsize:
2610 targetsize = 4 * rawsize
2611
2612 bins = self._inner._chunks(chain, targetsize=targetsize)
2613 if basetext is None:
2614 basetext = bytes(bins[0])
2615 bins = bins[1:]
2616
2617 rawtext = mdiff.patches(basetext, bins)
2618 del basetext # let us have a chance to free memory early
2619 return (rev, rawtext, False)
2620
2621 def _sidedata(self, rev):
2632 def _sidedata(self, rev):
2622 """Return the sidedata for a given revision number."""
2633 """Return the sidedata for a given revision number."""
2623 index_entry = self.index[rev]
2634 index_entry = self.index[rev]
2624 sidedata_offset = index_entry[8]
2635 sidedata_offset = index_entry[8]
2625 sidedata_size = index_entry[9]
2636 sidedata_size = index_entry[9]
2626
2637
2627 if self._inline:
2638 if self._inline:
2628 sidedata_offset += self.index.entry_size * (1 + rev)
2639 sidedata_offset += self.index.entry_size * (1 + rev)
2629 if sidedata_size == 0:
2640 if sidedata_size == 0:
2630 return {}
2641 return {}
2631
2642
2632 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2643 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2633 filename = self._sidedatafile
2644 filename = self._sidedatafile
2634 end = self._docket.sidedata_end
2645 end = self._docket.sidedata_end
2635 offset = sidedata_offset
2646 offset = sidedata_offset
2636 length = sidedata_size
2647 length = sidedata_size
2637 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2648 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2638 raise error.RevlogError(m)
2649 raise error.RevlogError(m)
2639
2650
2640 comp_segment = self._inner._segmentfile_sidedata.read_chunk(
2651 comp_segment = self._inner._segmentfile_sidedata.read_chunk(
2641 sidedata_offset, sidedata_size
2652 sidedata_offset, sidedata_size
2642 )
2653 )
2643
2654
2644 comp = self.index[rev][11]
2655 comp = self.index[rev][11]
2645 if comp == COMP_MODE_PLAIN:
2656 if comp == COMP_MODE_PLAIN:
2646 segment = comp_segment
2657 segment = comp_segment
2647 elif comp == COMP_MODE_DEFAULT:
2658 elif comp == COMP_MODE_DEFAULT:
2648 segment = self._inner._decompressor(comp_segment)
2659 segment = self._inner._decompressor(comp_segment)
2649 elif comp == COMP_MODE_INLINE:
2660 elif comp == COMP_MODE_INLINE:
2650 segment = self._inner.decompress(comp_segment)
2661 segment = self._inner.decompress(comp_segment)
2651 else:
2662 else:
2652 msg = b'unknown compression mode %d'
2663 msg = b'unknown compression mode %d'
2653 msg %= comp
2664 msg %= comp
2654 raise error.RevlogError(msg)
2665 raise error.RevlogError(msg)
2655
2666
2656 sidedata = sidedatautil.deserialize_sidedata(segment)
2667 sidedata = sidedatautil.deserialize_sidedata(segment)
2657 return sidedata
2668 return sidedata
2658
2669
2659 def rawdata(self, nodeorrev):
2670 def rawdata(self, nodeorrev):
2660 """return an uncompressed raw data of a given node or revision number."""
2671 """return an uncompressed raw data of a given node or revision number."""
2661 return self._revisiondata(nodeorrev, raw=True)
2672 return self._revisiondata(nodeorrev, raw=True)
2662
2673
2663 def hash(self, text, p1, p2):
2674 def hash(self, text, p1, p2):
2664 """Compute a node hash.
2675 """Compute a node hash.
2665
2676
2666 Available as a function so that subclasses can replace the hash
2677 Available as a function so that subclasses can replace the hash
2667 as needed.
2678 as needed.
2668 """
2679 """
2669 return storageutil.hashrevisionsha1(text, p1, p2)
2680 return storageutil.hashrevisionsha1(text, p1, p2)
2670
2681
2671 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2682 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2672 """Check node hash integrity.
2683 """Check node hash integrity.
2673
2684
2674 Available as a function so that subclasses can extend hash mismatch
2685 Available as a function so that subclasses can extend hash mismatch
2675 behaviors as needed.
2686 behaviors as needed.
2676 """
2687 """
2677 try:
2688 try:
2678 if p1 is None and p2 is None:
2689 if p1 is None and p2 is None:
2679 p1, p2 = self.parents(node)
2690 p1, p2 = self.parents(node)
2680 if node != self.hash(text, p1, p2):
2691 if node != self.hash(text, p1, p2):
2681 # Clear the revision cache on hash failure. The revision cache
2692 # Clear the revision cache on hash failure. The revision cache
2682 # only stores the raw revision and clearing the cache does have
2693 # only stores the raw revision and clearing the cache does have
2683 # the side-effect that we won't have a cache hit when the raw
2694 # the side-effect that we won't have a cache hit when the raw
2684 # revision data is accessed. But this case should be rare and
2695 # revision data is accessed. But this case should be rare and
2685 # it is extra work to teach the cache about the hash
2696 # it is extra work to teach the cache about the hash
2686 # verification state.
2697 # verification state.
2687 if (
2698 if (
2688 self._inner._revisioncache
2699 self._inner._revisioncache
2689 and self._inner._revisioncache[0] == node
2700 and self._inner._revisioncache[0] == node
2690 ):
2701 ):
2691 self._inner._revisioncache = None
2702 self._inner._revisioncache = None
2692
2703
2693 revornode = rev
2704 revornode = rev
2694 if revornode is None:
2705 if revornode is None:
2695 revornode = templatefilters.short(hex(node))
2706 revornode = templatefilters.short(hex(node))
2696 raise error.RevlogError(
2707 raise error.RevlogError(
2697 _(b"integrity check failed on %s:%s")
2708 _(b"integrity check failed on %s:%s")
2698 % (self.display_id, pycompat.bytestr(revornode))
2709 % (self.display_id, pycompat.bytestr(revornode))
2699 )
2710 )
2700 except error.RevlogError:
2711 except error.RevlogError:
2701 if self.feature_config.censorable and storageutil.iscensoredtext(
2712 if self.feature_config.censorable and storageutil.iscensoredtext(
2702 text
2713 text
2703 ):
2714 ):
2704 raise error.CensoredNodeError(self.display_id, node, text)
2715 raise error.CensoredNodeError(self.display_id, node, text)
2705 raise
2716 raise
2706
2717
2707 @property
2718 @property
2708 def _split_index_file(self):
2719 def _split_index_file(self):
2709 """the path where to expect the index of an ongoing splitting operation
2720 """the path where to expect the index of an ongoing splitting operation
2710
2721
2711 The file will only exist if a splitting operation is in progress, but
2722 The file will only exist if a splitting operation is in progress, but
2712 it is always expected at the same location."""
2723 it is always expected at the same location."""
2713 parts = self.radix.split(b'/')
2724 parts = self.radix.split(b'/')
2714 if len(parts) > 1:
2725 if len(parts) > 1:
2715 # adds a '-s' prefix to the ``data/` or `meta/` base
2726 # adds a '-s' prefix to the ``data/` or `meta/` base
2716 head = parts[0] + b'-s'
2727 head = parts[0] + b'-s'
2717 mids = parts[1:-1]
2728 mids = parts[1:-1]
2718 tail = parts[-1] + b'.i'
2729 tail = parts[-1] + b'.i'
2719 pieces = [head] + mids + [tail]
2730 pieces = [head] + mids + [tail]
2720 return b'/'.join(pieces)
2731 return b'/'.join(pieces)
2721 else:
2732 else:
2722 # the revlog is stored at the root of the store (changelog or
2733 # the revlog is stored at the root of the store (changelog or
2723 # manifest), no risk of collision.
2734 # manifest), no risk of collision.
2724 return self.radix + b'.i.s'
2735 return self.radix + b'.i.s'
2725
2736
2726 def _enforceinlinesize(self, tr, side_write=True):
2737 def _enforceinlinesize(self, tr, side_write=True):
2727 """Check if the revlog is too big for inline and convert if so.
2738 """Check if the revlog is too big for inline and convert if so.
2728
2739
2729 This should be called after revisions are added to the revlog. If the
2740 This should be called after revisions are added to the revlog. If the
2730 revlog has grown too large to be an inline revlog, it will convert it
2741 revlog has grown too large to be an inline revlog, it will convert it
2731 to use multiple index and data files.
2742 to use multiple index and data files.
2732 """
2743 """
2733 tiprev = len(self) - 1
2744 tiprev = len(self) - 1
2734 total_size = self.start(tiprev) + self.length(tiprev)
2745 total_size = self.start(tiprev) + self.length(tiprev)
2735 if not self._inline or total_size < _maxinline:
2746 if not self._inline or total_size < _maxinline:
2736 return
2747 return
2737
2748
2738 if self._docket is not None:
2749 if self._docket is not None:
2739 msg = b"inline revlog should not have a docket"
2750 msg = b"inline revlog should not have a docket"
2740 raise error.ProgrammingError(msg)
2751 raise error.ProgrammingError(msg)
2741
2752
2742 troffset = tr.findoffset(self._indexfile)
2753 troffset = tr.findoffset(self._indexfile)
2743 if troffset is None:
2754 if troffset is None:
2744 raise error.RevlogError(
2755 raise error.RevlogError(
2745 _(b"%s not found in the transaction") % self._indexfile
2756 _(b"%s not found in the transaction") % self._indexfile
2746 )
2757 )
2747 if troffset:
2758 if troffset:
2748 tr.addbackup(self._indexfile, for_offset=True)
2759 tr.addbackup(self._indexfile, for_offset=True)
2749 tr.add(self._datafile, 0)
2760 tr.add(self._datafile, 0)
2750
2761
2751 new_index_file_path = None
2762 new_index_file_path = None
2752 if side_write:
2763 if side_write:
2753 old_index_file_path = self._indexfile
2764 old_index_file_path = self._indexfile
2754 new_index_file_path = self._split_index_file
2765 new_index_file_path = self._split_index_file
2755 opener = self.opener
2766 opener = self.opener
2756 weak_self = weakref.ref(self)
2767 weak_self = weakref.ref(self)
2757
2768
2758 # the "split" index replace the real index when the transaction is
2769 # the "split" index replace the real index when the transaction is
2759 # finalized
2770 # finalized
2760 def finalize_callback(tr):
2771 def finalize_callback(tr):
2761 opener.rename(
2772 opener.rename(
2762 new_index_file_path,
2773 new_index_file_path,
2763 old_index_file_path,
2774 old_index_file_path,
2764 checkambig=True,
2775 checkambig=True,
2765 )
2776 )
2766 maybe_self = weak_self()
2777 maybe_self = weak_self()
2767 if maybe_self is not None:
2778 if maybe_self is not None:
2768 maybe_self._indexfile = old_index_file_path
2779 maybe_self._indexfile = old_index_file_path
2769 maybe_self._inner.index_file = maybe_self._indexfile
2780 maybe_self._inner.index_file = maybe_self._indexfile
2770
2781
2771 def abort_callback(tr):
2782 def abort_callback(tr):
2772 maybe_self = weak_self()
2783 maybe_self = weak_self()
2773 if maybe_self is not None:
2784 if maybe_self is not None:
2774 maybe_self._indexfile = old_index_file_path
2785 maybe_self._indexfile = old_index_file_path
2775 maybe_self._inner.inline = True
2786 maybe_self._inner.inline = True
2776 maybe_self._inner.index_file = old_index_file_path
2787 maybe_self._inner.index_file = old_index_file_path
2777
2788
2778 tr.registertmp(new_index_file_path)
2789 tr.registertmp(new_index_file_path)
2779 if self.target[1] is not None:
2790 if self.target[1] is not None:
2780 callback_id = b'000-revlog-split-%d-%s' % self.target
2791 callback_id = b'000-revlog-split-%d-%s' % self.target
2781 else:
2792 else:
2782 callback_id = b'000-revlog-split-%d' % self.target[0]
2793 callback_id = b'000-revlog-split-%d' % self.target[0]
2783 tr.addfinalize(callback_id, finalize_callback)
2794 tr.addfinalize(callback_id, finalize_callback)
2784 tr.addabort(callback_id, abort_callback)
2795 tr.addabort(callback_id, abort_callback)
2785
2796
2786 self._format_flags &= ~FLAG_INLINE_DATA
2797 self._format_flags &= ~FLAG_INLINE_DATA
2787 self._inner.split_inline(
2798 self._inner.split_inline(
2788 tr,
2799 tr,
2789 self._format_flags | self._format_version,
2800 self._format_flags | self._format_version,
2790 new_index_file_path=new_index_file_path,
2801 new_index_file_path=new_index_file_path,
2791 )
2802 )
2792
2803
2793 self._inline = False
2804 self._inline = False
2794 if new_index_file_path is not None:
2805 if new_index_file_path is not None:
2795 self._indexfile = new_index_file_path
2806 self._indexfile = new_index_file_path
2796
2807
2797 nodemaputil.setup_persistent_nodemap(tr, self)
2808 nodemaputil.setup_persistent_nodemap(tr, self)
2798
2809
2799 def _nodeduplicatecallback(self, transaction, node):
2810 def _nodeduplicatecallback(self, transaction, node):
2800 """called when trying to add a node already stored."""
2811 """called when trying to add a node already stored."""
2801
2812
2802 @contextlib.contextmanager
2813 @contextlib.contextmanager
2803 def reading(self):
2814 def reading(self):
2804 with self._inner.reading():
2815 with self._inner.reading():
2805 yield
2816 yield
2806
2817
2807 @contextlib.contextmanager
2818 @contextlib.contextmanager
2808 def _writing(self, transaction):
2819 def _writing(self, transaction):
2809 if self._trypending:
2820 if self._trypending:
2810 msg = b'try to write in a `trypending` revlog: %s'
2821 msg = b'try to write in a `trypending` revlog: %s'
2811 msg %= self.display_id
2822 msg %= self.display_id
2812 raise error.ProgrammingError(msg)
2823 raise error.ProgrammingError(msg)
2813 if self._inner.is_writing:
2824 if self._inner.is_writing:
2814 yield
2825 yield
2815 else:
2826 else:
2816 data_end = None
2827 data_end = None
2817 sidedata_end = None
2828 sidedata_end = None
2818 if self._docket is not None:
2829 if self._docket is not None:
2819 data_end = self._docket.data_end
2830 data_end = self._docket.data_end
2820 sidedata_end = self._docket.sidedata_end
2831 sidedata_end = self._docket.sidedata_end
2821 with self._inner.writing(
2832 with self._inner.writing(
2822 transaction,
2833 transaction,
2823 data_end=data_end,
2834 data_end=data_end,
2824 sidedata_end=sidedata_end,
2835 sidedata_end=sidedata_end,
2825 ):
2836 ):
2826 yield
2837 yield
2827 if self._docket is not None:
2838 if self._docket is not None:
2828 self._write_docket(transaction)
2839 self._write_docket(transaction)
2829
2840
2830 def _write_docket(self, transaction):
2841 def _write_docket(self, transaction):
2831 """write the current docket on disk
2842 """write the current docket on disk
2832
2843
2833 Exist as a method to help changelog to implement transaction logic
2844 Exist as a method to help changelog to implement transaction logic
2834
2845
2835 We could also imagine using the same transaction logic for all revlog
2846 We could also imagine using the same transaction logic for all revlog
2836 since docket are cheap."""
2847 since docket are cheap."""
2837 self._docket.write(transaction)
2848 self._docket.write(transaction)
2838
2849
2839 def addrevision(
2850 def addrevision(
2840 self,
2851 self,
2841 text,
2852 text,
2842 transaction,
2853 transaction,
2843 link,
2854 link,
2844 p1,
2855 p1,
2845 p2,
2856 p2,
2846 cachedelta=None,
2857 cachedelta=None,
2847 node=None,
2858 node=None,
2848 flags=REVIDX_DEFAULT_FLAGS,
2859 flags=REVIDX_DEFAULT_FLAGS,
2849 deltacomputer=None,
2860 deltacomputer=None,
2850 sidedata=None,
2861 sidedata=None,
2851 ):
2862 ):
2852 """add a revision to the log
2863 """add a revision to the log
2853
2864
2854 text - the revision data to add
2865 text - the revision data to add
2855 transaction - the transaction object used for rollback
2866 transaction - the transaction object used for rollback
2856 link - the linkrev data to add
2867 link - the linkrev data to add
2857 p1, p2 - the parent nodeids of the revision
2868 p1, p2 - the parent nodeids of the revision
2858 cachedelta - an optional precomputed delta
2869 cachedelta - an optional precomputed delta
2859 node - nodeid of revision; typically node is not specified, and it is
2870 node - nodeid of revision; typically node is not specified, and it is
2860 computed by default as hash(text, p1, p2), however subclasses might
2871 computed by default as hash(text, p1, p2), however subclasses might
2861 use different hashing method (and override checkhash() in such case)
2872 use different hashing method (and override checkhash() in such case)
2862 flags - the known flags to set on the revision
2873 flags - the known flags to set on the revision
2863 deltacomputer - an optional deltacomputer instance shared between
2874 deltacomputer - an optional deltacomputer instance shared between
2864 multiple calls
2875 multiple calls
2865 """
2876 """
2866 if link == nullrev:
2877 if link == nullrev:
2867 raise error.RevlogError(
2878 raise error.RevlogError(
2868 _(b"attempted to add linkrev -1 to %s") % self.display_id
2879 _(b"attempted to add linkrev -1 to %s") % self.display_id
2869 )
2880 )
2870
2881
2871 if sidedata is None:
2882 if sidedata is None:
2872 sidedata = {}
2883 sidedata = {}
2873 elif sidedata and not self.feature_config.has_side_data:
2884 elif sidedata and not self.feature_config.has_side_data:
2874 raise error.ProgrammingError(
2885 raise error.ProgrammingError(
2875 _(b"trying to add sidedata to a revlog who don't support them")
2886 _(b"trying to add sidedata to a revlog who don't support them")
2876 )
2887 )
2877
2888
2878 if flags:
2889 if flags:
2879 node = node or self.hash(text, p1, p2)
2890 node = node or self.hash(text, p1, p2)
2880
2891
2881 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2892 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2882
2893
2883 # If the flag processor modifies the revision data, ignore any provided
2894 # If the flag processor modifies the revision data, ignore any provided
2884 # cachedelta.
2895 # cachedelta.
2885 if rawtext != text:
2896 if rawtext != text:
2886 cachedelta = None
2897 cachedelta = None
2887
2898
2888 if len(rawtext) > _maxentrysize:
2899 if len(rawtext) > _maxentrysize:
2889 raise error.RevlogError(
2900 raise error.RevlogError(
2890 _(
2901 _(
2891 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2902 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2892 )
2903 )
2893 % (self.display_id, len(rawtext))
2904 % (self.display_id, len(rawtext))
2894 )
2905 )
2895
2906
2896 node = node or self.hash(rawtext, p1, p2)
2907 node = node or self.hash(rawtext, p1, p2)
2897 rev = self.index.get_rev(node)
2908 rev = self.index.get_rev(node)
2898 if rev is not None:
2909 if rev is not None:
2899 return rev
2910 return rev
2900
2911
2901 if validatehash:
2912 if validatehash:
2902 self.checkhash(rawtext, node, p1=p1, p2=p2)
2913 self.checkhash(rawtext, node, p1=p1, p2=p2)
2903
2914
2904 return self.addrawrevision(
2915 return self.addrawrevision(
2905 rawtext,
2916 rawtext,
2906 transaction,
2917 transaction,
2907 link,
2918 link,
2908 p1,
2919 p1,
2909 p2,
2920 p2,
2910 node,
2921 node,
2911 flags,
2922 flags,
2912 cachedelta=cachedelta,
2923 cachedelta=cachedelta,
2913 deltacomputer=deltacomputer,
2924 deltacomputer=deltacomputer,
2914 sidedata=sidedata,
2925 sidedata=sidedata,
2915 )
2926 )
2916
2927
2917 def addrawrevision(
2928 def addrawrevision(
2918 self,
2929 self,
2919 rawtext,
2930 rawtext,
2920 transaction,
2931 transaction,
2921 link,
2932 link,
2922 p1,
2933 p1,
2923 p2,
2934 p2,
2924 node,
2935 node,
2925 flags,
2936 flags,
2926 cachedelta=None,
2937 cachedelta=None,
2927 deltacomputer=None,
2938 deltacomputer=None,
2928 sidedata=None,
2939 sidedata=None,
2929 ):
2940 ):
2930 """add a raw revision with known flags, node and parents
2941 """add a raw revision with known flags, node and parents
2931 useful when reusing a revision not stored in this revlog (ex: received
2942 useful when reusing a revision not stored in this revlog (ex: received
2932 over wire, or read from an external bundle).
2943 over wire, or read from an external bundle).
2933 """
2944 """
2934 with self._writing(transaction):
2945 with self._writing(transaction):
2935 return self._addrevision(
2946 return self._addrevision(
2936 node,
2947 node,
2937 rawtext,
2948 rawtext,
2938 transaction,
2949 transaction,
2939 link,
2950 link,
2940 p1,
2951 p1,
2941 p2,
2952 p2,
2942 flags,
2953 flags,
2943 cachedelta,
2954 cachedelta,
2944 deltacomputer=deltacomputer,
2955 deltacomputer=deltacomputer,
2945 sidedata=sidedata,
2956 sidedata=sidedata,
2946 )
2957 )
2947
2958
2948 def compress(self, data):
2959 def compress(self, data):
2949 return self._inner.compress(data)
2960 return self._inner.compress(data)
2950
2961
2951 def decompress(self, data):
2962 def decompress(self, data):
2952 return self._inner.decompress(data)
2963 return self._inner.decompress(data)
2953
2964
2954 def _addrevision(
2965 def _addrevision(
2955 self,
2966 self,
2956 node,
2967 node,
2957 rawtext,
2968 rawtext,
2958 transaction,
2969 transaction,
2959 link,
2970 link,
2960 p1,
2971 p1,
2961 p2,
2972 p2,
2962 flags,
2973 flags,
2963 cachedelta,
2974 cachedelta,
2964 alwayscache=False,
2975 alwayscache=False,
2965 deltacomputer=None,
2976 deltacomputer=None,
2966 sidedata=None,
2977 sidedata=None,
2967 ):
2978 ):
2968 """internal function to add revisions to the log
2979 """internal function to add revisions to the log
2969
2980
2970 see addrevision for argument descriptions.
2981 see addrevision for argument descriptions.
2971
2982
2972 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2983 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2973
2984
2974 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2985 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2975 be used.
2986 be used.
2976
2987
2977 invariants:
2988 invariants:
2978 - rawtext is optional (can be None); if not set, cachedelta must be set.
2989 - rawtext is optional (can be None); if not set, cachedelta must be set.
2979 if both are set, they must correspond to each other.
2990 if both are set, they must correspond to each other.
2980 """
2991 """
2981 if node == self.nullid:
2992 if node == self.nullid:
2982 raise error.RevlogError(
2993 raise error.RevlogError(
2983 _(b"%s: attempt to add null revision") % self.display_id
2994 _(b"%s: attempt to add null revision") % self.display_id
2984 )
2995 )
2985 if (
2996 if (
2986 node == self.nodeconstants.wdirid
2997 node == self.nodeconstants.wdirid
2987 or node in self.nodeconstants.wdirfilenodeids
2998 or node in self.nodeconstants.wdirfilenodeids
2988 ):
2999 ):
2989 raise error.RevlogError(
3000 raise error.RevlogError(
2990 _(b"%s: attempt to add wdir revision") % self.display_id
3001 _(b"%s: attempt to add wdir revision") % self.display_id
2991 )
3002 )
2992 if self._inner._writinghandles is None:
3003 if self._inner._writinghandles is None:
2993 msg = b'adding revision outside `revlog._writing` context'
3004 msg = b'adding revision outside `revlog._writing` context'
2994 raise error.ProgrammingError(msg)
3005 raise error.ProgrammingError(msg)
2995
3006
2996 btext = [rawtext]
3007 btext = [rawtext]
2997
3008
2998 curr = len(self)
3009 curr = len(self)
2999 prev = curr - 1
3010 prev = curr - 1
3000
3011
3001 offset = self._get_data_offset(prev)
3012 offset = self._get_data_offset(prev)
3002
3013
3003 if self._concurrencychecker:
3014 if self._concurrencychecker:
3004 ifh, dfh, sdfh = self._inner._writinghandles
3015 ifh, dfh, sdfh = self._inner._writinghandles
3005 # XXX no checking for the sidedata file
3016 # XXX no checking for the sidedata file
3006 if self._inline:
3017 if self._inline:
3007 # offset is "as if" it were in the .d file, so we need to add on
3018 # offset is "as if" it were in the .d file, so we need to add on
3008 # the size of the entry metadata.
3019 # the size of the entry metadata.
3009 self._concurrencychecker(
3020 self._concurrencychecker(
3010 ifh, self._indexfile, offset + curr * self.index.entry_size
3021 ifh, self._indexfile, offset + curr * self.index.entry_size
3011 )
3022 )
3012 else:
3023 else:
3013 # Entries in the .i are a consistent size.
3024 # Entries in the .i are a consistent size.
3014 self._concurrencychecker(
3025 self._concurrencychecker(
3015 ifh, self._indexfile, curr * self.index.entry_size
3026 ifh, self._indexfile, curr * self.index.entry_size
3016 )
3027 )
3017 self._concurrencychecker(dfh, self._datafile, offset)
3028 self._concurrencychecker(dfh, self._datafile, offset)
3018
3029
3019 p1r, p2r = self.rev(p1), self.rev(p2)
3030 p1r, p2r = self.rev(p1), self.rev(p2)
3020
3031
3021 # full versions are inserted when the needed deltas
3032 # full versions are inserted when the needed deltas
3022 # become comparable to the uncompressed text
3033 # become comparable to the uncompressed text
3023 if rawtext is None:
3034 if rawtext is None:
3024 # need rawtext size, before changed by flag processors, which is
3035 # need rawtext size, before changed by flag processors, which is
3025 # the non-raw size. use revlog explicitly to avoid filelog's extra
3036 # the non-raw size. use revlog explicitly to avoid filelog's extra
3026 # logic that might remove metadata size.
3037 # logic that might remove metadata size.
3027 textlen = mdiff.patchedsize(
3038 textlen = mdiff.patchedsize(
3028 revlog.size(self, cachedelta[0]), cachedelta[1]
3039 revlog.size(self, cachedelta[0]), cachedelta[1]
3029 )
3040 )
3030 else:
3041 else:
3031 textlen = len(rawtext)
3042 textlen = len(rawtext)
3032
3043
3033 if deltacomputer is None:
3044 if deltacomputer is None:
3034 write_debug = None
3045 write_debug = None
3035 if self.delta_config.debug_delta:
3046 if self.delta_config.debug_delta:
3036 write_debug = transaction._report
3047 write_debug = transaction._report
3037 deltacomputer = deltautil.deltacomputer(
3048 deltacomputer = deltautil.deltacomputer(
3038 self, write_debug=write_debug
3049 self, write_debug=write_debug
3039 )
3050 )
3040
3051
3041 if cachedelta is not None and len(cachedelta) == 2:
3052 if cachedelta is not None and len(cachedelta) == 2:
3042 # If the cached delta has no information about how it should be
3053 # If the cached delta has no information about how it should be
3043 # reused, add the default reuse instruction according to the
3054 # reused, add the default reuse instruction according to the
3044 # revlog's configuration.
3055 # revlog's configuration.
3045 if (
3056 if (
3046 self.delta_config.general_delta
3057 self.delta_config.general_delta
3047 and self.delta_config.lazy_delta_base
3058 and self.delta_config.lazy_delta_base
3048 ):
3059 ):
3049 delta_base_reuse = DELTA_BASE_REUSE_TRY
3060 delta_base_reuse = DELTA_BASE_REUSE_TRY
3050 else:
3061 else:
3051 delta_base_reuse = DELTA_BASE_REUSE_NO
3062 delta_base_reuse = DELTA_BASE_REUSE_NO
3052 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3063 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3053
3064
3054 revinfo = revlogutils.revisioninfo(
3065 revinfo = revlogutils.revisioninfo(
3055 node,
3066 node,
3056 p1,
3067 p1,
3057 p2,
3068 p2,
3058 btext,
3069 btext,
3059 textlen,
3070 textlen,
3060 cachedelta,
3071 cachedelta,
3061 flags,
3072 flags,
3062 )
3073 )
3063
3074
3064 deltainfo = deltacomputer.finddeltainfo(revinfo)
3075 deltainfo = deltacomputer.finddeltainfo(revinfo)
3065
3076
3066 compression_mode = COMP_MODE_INLINE
3077 compression_mode = COMP_MODE_INLINE
3067 if self._docket is not None:
3078 if self._docket is not None:
3068 default_comp = self._docket.default_compression_header
3079 default_comp = self._docket.default_compression_header
3069 r = deltautil.delta_compression(default_comp, deltainfo)
3080 r = deltautil.delta_compression(default_comp, deltainfo)
3070 compression_mode, deltainfo = r
3081 compression_mode, deltainfo = r
3071
3082
3072 sidedata_compression_mode = COMP_MODE_INLINE
3083 sidedata_compression_mode = COMP_MODE_INLINE
3073 if sidedata and self.feature_config.has_side_data:
3084 if sidedata and self.feature_config.has_side_data:
3074 sidedata_compression_mode = COMP_MODE_PLAIN
3085 sidedata_compression_mode = COMP_MODE_PLAIN
3075 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3086 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3076 sidedata_offset = self._docket.sidedata_end
3087 sidedata_offset = self._docket.sidedata_end
3077 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3088 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3078 if (
3089 if (
3079 h != b'u'
3090 h != b'u'
3080 and comp_sidedata[0:1] != b'\0'
3091 and comp_sidedata[0:1] != b'\0'
3081 and len(comp_sidedata) < len(serialized_sidedata)
3092 and len(comp_sidedata) < len(serialized_sidedata)
3082 ):
3093 ):
3083 assert not h
3094 assert not h
3084 if (
3095 if (
3085 comp_sidedata[0:1]
3096 comp_sidedata[0:1]
3086 == self._docket.default_compression_header
3097 == self._docket.default_compression_header
3087 ):
3098 ):
3088 sidedata_compression_mode = COMP_MODE_DEFAULT
3099 sidedata_compression_mode = COMP_MODE_DEFAULT
3089 serialized_sidedata = comp_sidedata
3100 serialized_sidedata = comp_sidedata
3090 else:
3101 else:
3091 sidedata_compression_mode = COMP_MODE_INLINE
3102 sidedata_compression_mode = COMP_MODE_INLINE
3092 serialized_sidedata = comp_sidedata
3103 serialized_sidedata = comp_sidedata
3093 else:
3104 else:
3094 serialized_sidedata = b""
3105 serialized_sidedata = b""
3095 # Don't store the offset if the sidedata is empty, that way
3106 # Don't store the offset if the sidedata is empty, that way
3096 # we can easily detect empty sidedata and they will be no different
3107 # we can easily detect empty sidedata and they will be no different
3097 # than ones we manually add.
3108 # than ones we manually add.
3098 sidedata_offset = 0
3109 sidedata_offset = 0
3099
3110
3100 rank = RANK_UNKNOWN
3111 rank = RANK_UNKNOWN
3101 if self.feature_config.compute_rank:
3112 if self.feature_config.compute_rank:
3102 if (p1r, p2r) == (nullrev, nullrev):
3113 if (p1r, p2r) == (nullrev, nullrev):
3103 rank = 1
3114 rank = 1
3104 elif p1r != nullrev and p2r == nullrev:
3115 elif p1r != nullrev and p2r == nullrev:
3105 rank = 1 + self.fast_rank(p1r)
3116 rank = 1 + self.fast_rank(p1r)
3106 elif p1r == nullrev and p2r != nullrev:
3117 elif p1r == nullrev and p2r != nullrev:
3107 rank = 1 + self.fast_rank(p2r)
3118 rank = 1 + self.fast_rank(p2r)
3108 else: # merge node
3119 else: # merge node
3109 if rustdagop is not None and self.index.rust_ext_compat:
3120 if rustdagop is not None and self.index.rust_ext_compat:
3110 rank = rustdagop.rank(self.index, p1r, p2r)
3121 rank = rustdagop.rank(self.index, p1r, p2r)
3111 else:
3122 else:
3112 pmin, pmax = sorted((p1r, p2r))
3123 pmin, pmax = sorted((p1r, p2r))
3113 rank = 1 + self.fast_rank(pmax)
3124 rank = 1 + self.fast_rank(pmax)
3114 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3125 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3115
3126
3116 e = revlogutils.entry(
3127 e = revlogutils.entry(
3117 flags=flags,
3128 flags=flags,
3118 data_offset=offset,
3129 data_offset=offset,
3119 data_compressed_length=deltainfo.deltalen,
3130 data_compressed_length=deltainfo.deltalen,
3120 data_uncompressed_length=textlen,
3131 data_uncompressed_length=textlen,
3121 data_compression_mode=compression_mode,
3132 data_compression_mode=compression_mode,
3122 data_delta_base=deltainfo.base,
3133 data_delta_base=deltainfo.base,
3123 link_rev=link,
3134 link_rev=link,
3124 parent_rev_1=p1r,
3135 parent_rev_1=p1r,
3125 parent_rev_2=p2r,
3136 parent_rev_2=p2r,
3126 node_id=node,
3137 node_id=node,
3127 sidedata_offset=sidedata_offset,
3138 sidedata_offset=sidedata_offset,
3128 sidedata_compressed_length=len(serialized_sidedata),
3139 sidedata_compressed_length=len(serialized_sidedata),
3129 sidedata_compression_mode=sidedata_compression_mode,
3140 sidedata_compression_mode=sidedata_compression_mode,
3130 rank=rank,
3141 rank=rank,
3131 )
3142 )
3132
3143
3133 self.index.append(e)
3144 self.index.append(e)
3134 entry = self.index.entry_binary(curr)
3145 entry = self.index.entry_binary(curr)
3135 if curr == 0 and self._docket is None:
3146 if curr == 0 and self._docket is None:
3136 header = self._format_flags | self._format_version
3147 header = self._format_flags | self._format_version
3137 header = self.index.pack_header(header)
3148 header = self.index.pack_header(header)
3138 entry = header + entry
3149 entry = header + entry
3139 self._writeentry(
3150 self._writeentry(
3140 transaction,
3151 transaction,
3141 entry,
3152 entry,
3142 deltainfo.data,
3153 deltainfo.data,
3143 link,
3154 link,
3144 offset,
3155 offset,
3145 serialized_sidedata,
3156 serialized_sidedata,
3146 sidedata_offset,
3157 sidedata_offset,
3147 )
3158 )
3148
3159
3149 rawtext = btext[0]
3160 rawtext = btext[0]
3150
3161
3151 if alwayscache and rawtext is None:
3162 if alwayscache and rawtext is None:
3152 rawtext = deltacomputer.buildtext(revinfo)
3163 rawtext = deltacomputer.buildtext(revinfo)
3153
3164
3154 if type(rawtext) == bytes: # only accept immutable objects
3165 if type(rawtext) == bytes: # only accept immutable objects
3155 self._inner._revisioncache = (node, curr, rawtext)
3166 self._inner._revisioncache = (node, curr, rawtext)
3156 self._chainbasecache[curr] = deltainfo.chainbase
3167 self._chainbasecache[curr] = deltainfo.chainbase
3157 return curr
3168 return curr
3158
3169
3159 def _get_data_offset(self, prev):
3170 def _get_data_offset(self, prev):
3160 """Returns the current offset in the (in-transaction) data file.
3171 """Returns the current offset in the (in-transaction) data file.
3161 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3172 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3162 file to store that information: since sidedata can be rewritten to the
3173 file to store that information: since sidedata can be rewritten to the
3163 end of the data file within a transaction, you can have cases where, for
3174 end of the data file within a transaction, you can have cases where, for
3164 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3175 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3165 to `n - 1`'s sidedata being written after `n`'s data.
3176 to `n - 1`'s sidedata being written after `n`'s data.
3166
3177
3167 TODO cache this in a docket file before getting out of experimental."""
3178 TODO cache this in a docket file before getting out of experimental."""
3168 if self._docket is None:
3179 if self._docket is None:
3169 return self.end(prev)
3180 return self.end(prev)
3170 else:
3181 else:
3171 return self._docket.data_end
3182 return self._docket.data_end
3172
3183
3173 def _writeentry(
3184 def _writeentry(
3174 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3185 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3175 ):
3186 ):
3176 # Files opened in a+ mode have inconsistent behavior on various
3187 # Files opened in a+ mode have inconsistent behavior on various
3177 # platforms. Windows requires that a file positioning call be made
3188 # platforms. Windows requires that a file positioning call be made
3178 # when the file handle transitions between reads and writes. See
3189 # when the file handle transitions between reads and writes. See
3179 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3190 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3180 # platforms, Python or the platform itself can be buggy. Some versions
3191 # platforms, Python or the platform itself can be buggy. Some versions
3181 # of Solaris have been observed to not append at the end of the file
3192 # of Solaris have been observed to not append at the end of the file
3182 # if the file was seeked to before the end. See issue4943 for more.
3193 # if the file was seeked to before the end. See issue4943 for more.
3183 #
3194 #
3184 # We work around this issue by inserting a seek() before writing.
3195 # We work around this issue by inserting a seek() before writing.
3185 # Note: This is likely not necessary on Python 3. However, because
3196 # Note: This is likely not necessary on Python 3. However, because
3186 # the file handle is reused for reads and may be seeked there, we need
3197 # the file handle is reused for reads and may be seeked there, we need
3187 # to be careful before changing this.
3198 # to be careful before changing this.
3188 if self._inner._writinghandles is None:
3199 if self._inner._writinghandles is None:
3189 msg = b'adding revision outside `revlog._writing` context'
3200 msg = b'adding revision outside `revlog._writing` context'
3190 raise error.ProgrammingError(msg)
3201 raise error.ProgrammingError(msg)
3191 ifh, dfh, sdfh = self._inner._writinghandles
3202 ifh, dfh, sdfh = self._inner._writinghandles
3192 if self._docket is None:
3203 if self._docket is None:
3193 ifh.seek(0, os.SEEK_END)
3204 ifh.seek(0, os.SEEK_END)
3194 else:
3205 else:
3195 ifh.seek(self._docket.index_end, os.SEEK_SET)
3206 ifh.seek(self._docket.index_end, os.SEEK_SET)
3196 if dfh:
3207 if dfh:
3197 if self._docket is None:
3208 if self._docket is None:
3198 dfh.seek(0, os.SEEK_END)
3209 dfh.seek(0, os.SEEK_END)
3199 else:
3210 else:
3200 dfh.seek(self._docket.data_end, os.SEEK_SET)
3211 dfh.seek(self._docket.data_end, os.SEEK_SET)
3201 if sdfh:
3212 if sdfh:
3202 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3213 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3203
3214
3204 curr = len(self) - 1
3215 curr = len(self) - 1
3205 if not self._inline:
3216 if not self._inline:
3206 transaction.add(self._datafile, offset)
3217 transaction.add(self._datafile, offset)
3207 if self._sidedatafile:
3218 if self._sidedatafile:
3208 transaction.add(self._sidedatafile, sidedata_offset)
3219 transaction.add(self._sidedatafile, sidedata_offset)
3209 transaction.add(self._indexfile, curr * len(entry))
3220 transaction.add(self._indexfile, curr * len(entry))
3210 if data[0]:
3221 if data[0]:
3211 dfh.write(data[0])
3222 dfh.write(data[0])
3212 dfh.write(data[1])
3223 dfh.write(data[1])
3213 if sidedata:
3224 if sidedata:
3214 sdfh.write(sidedata)
3225 sdfh.write(sidedata)
3215 ifh.write(entry)
3226 ifh.write(entry)
3216 else:
3227 else:
3217 offset += curr * self.index.entry_size
3228 offset += curr * self.index.entry_size
3218 transaction.add(self._indexfile, offset)
3229 transaction.add(self._indexfile, offset)
3219 ifh.write(entry)
3230 ifh.write(entry)
3220 ifh.write(data[0])
3231 ifh.write(data[0])
3221 ifh.write(data[1])
3232 ifh.write(data[1])
3222 assert not sidedata
3233 assert not sidedata
3223 self._enforceinlinesize(transaction)
3234 self._enforceinlinesize(transaction)
3224 if self._docket is not None:
3235 if self._docket is not None:
3225 # revlog-v2 always has 3 writing handles, help Pytype
3236 # revlog-v2 always has 3 writing handles, help Pytype
3226 wh1 = self._inner._writinghandles[0]
3237 wh1 = self._inner._writinghandles[0]
3227 wh2 = self._inner._writinghandles[1]
3238 wh2 = self._inner._writinghandles[1]
3228 wh3 = self._inner._writinghandles[2]
3239 wh3 = self._inner._writinghandles[2]
3229 assert wh1 is not None
3240 assert wh1 is not None
3230 assert wh2 is not None
3241 assert wh2 is not None
3231 assert wh3 is not None
3242 assert wh3 is not None
3232 self._docket.index_end = wh1.tell()
3243 self._docket.index_end = wh1.tell()
3233 self._docket.data_end = wh2.tell()
3244 self._docket.data_end = wh2.tell()
3234 self._docket.sidedata_end = wh3.tell()
3245 self._docket.sidedata_end = wh3.tell()
3235
3246
3236 nodemaputil.setup_persistent_nodemap(transaction, self)
3247 nodemaputil.setup_persistent_nodemap(transaction, self)
3237
3248
3238 def addgroup(
3249 def addgroup(
3239 self,
3250 self,
3240 deltas,
3251 deltas,
3241 linkmapper,
3252 linkmapper,
3242 transaction,
3253 transaction,
3243 alwayscache=False,
3254 alwayscache=False,
3244 addrevisioncb=None,
3255 addrevisioncb=None,
3245 duplicaterevisioncb=None,
3256 duplicaterevisioncb=None,
3246 debug_info=None,
3257 debug_info=None,
3247 delta_base_reuse_policy=None,
3258 delta_base_reuse_policy=None,
3248 ):
3259 ):
3249 """
3260 """
3250 add a delta group
3261 add a delta group
3251
3262
3252 given a set of deltas, add them to the revision log. the
3263 given a set of deltas, add them to the revision log. the
3253 first delta is against its parent, which should be in our
3264 first delta is against its parent, which should be in our
3254 log, the rest are against the previous delta.
3265 log, the rest are against the previous delta.
3255
3266
3256 If ``addrevisioncb`` is defined, it will be called with arguments of
3267 If ``addrevisioncb`` is defined, it will be called with arguments of
3257 this revlog and the node that was added.
3268 this revlog and the node that was added.
3258 """
3269 """
3259
3270
3260 if self._adding_group:
3271 if self._adding_group:
3261 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3272 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3262
3273
3263 # read the default delta-base reuse policy from revlog config if the
3274 # read the default delta-base reuse policy from revlog config if the
3264 # group did not specify one.
3275 # group did not specify one.
3265 if delta_base_reuse_policy is None:
3276 if delta_base_reuse_policy is None:
3266 if (
3277 if (
3267 self.delta_config.general_delta
3278 self.delta_config.general_delta
3268 and self.delta_config.lazy_delta_base
3279 and self.delta_config.lazy_delta_base
3269 ):
3280 ):
3270 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3281 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3271 else:
3282 else:
3272 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3283 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3273
3284
3274 self._adding_group = True
3285 self._adding_group = True
3275 empty = True
3286 empty = True
3276 try:
3287 try:
3277 with self._writing(transaction):
3288 with self._writing(transaction):
3278 write_debug = None
3289 write_debug = None
3279 if self.delta_config.debug_delta:
3290 if self.delta_config.debug_delta:
3280 write_debug = transaction._report
3291 write_debug = transaction._report
3281 deltacomputer = deltautil.deltacomputer(
3292 deltacomputer = deltautil.deltacomputer(
3282 self,
3293 self,
3283 write_debug=write_debug,
3294 write_debug=write_debug,
3284 debug_info=debug_info,
3295 debug_info=debug_info,
3285 )
3296 )
3286 # loop through our set of deltas
3297 # loop through our set of deltas
3287 for data in deltas:
3298 for data in deltas:
3288 (
3299 (
3289 node,
3300 node,
3290 p1,
3301 p1,
3291 p2,
3302 p2,
3292 linknode,
3303 linknode,
3293 deltabase,
3304 deltabase,
3294 delta,
3305 delta,
3295 flags,
3306 flags,
3296 sidedata,
3307 sidedata,
3297 ) = data
3308 ) = data
3298 link = linkmapper(linknode)
3309 link = linkmapper(linknode)
3299 flags = flags or REVIDX_DEFAULT_FLAGS
3310 flags = flags or REVIDX_DEFAULT_FLAGS
3300
3311
3301 rev = self.index.get_rev(node)
3312 rev = self.index.get_rev(node)
3302 if rev is not None:
3313 if rev is not None:
3303 # this can happen if two branches make the same change
3314 # this can happen if two branches make the same change
3304 self._nodeduplicatecallback(transaction, rev)
3315 self._nodeduplicatecallback(transaction, rev)
3305 if duplicaterevisioncb:
3316 if duplicaterevisioncb:
3306 duplicaterevisioncb(self, rev)
3317 duplicaterevisioncb(self, rev)
3307 empty = False
3318 empty = False
3308 continue
3319 continue
3309
3320
3310 for p in (p1, p2):
3321 for p in (p1, p2):
3311 if not self.index.has_node(p):
3322 if not self.index.has_node(p):
3312 raise error.LookupError(
3323 raise error.LookupError(
3313 p, self.radix, _(b'unknown parent')
3324 p, self.radix, _(b'unknown parent')
3314 )
3325 )
3315
3326
3316 if not self.index.has_node(deltabase):
3327 if not self.index.has_node(deltabase):
3317 raise error.LookupError(
3328 raise error.LookupError(
3318 deltabase, self.display_id, _(b'unknown delta base')
3329 deltabase, self.display_id, _(b'unknown delta base')
3319 )
3330 )
3320
3331
3321 baserev = self.rev(deltabase)
3332 baserev = self.rev(deltabase)
3322
3333
3323 if baserev != nullrev and self.iscensored(baserev):
3334 if baserev != nullrev and self.iscensored(baserev):
3324 # if base is censored, delta must be full replacement in a
3335 # if base is censored, delta must be full replacement in a
3325 # single patch operation
3336 # single patch operation
3326 hlen = struct.calcsize(b">lll")
3337 hlen = struct.calcsize(b">lll")
3327 oldlen = self.rawsize(baserev)
3338 oldlen = self.rawsize(baserev)
3328 newlen = len(delta) - hlen
3339 newlen = len(delta) - hlen
3329 if delta[:hlen] != mdiff.replacediffheader(
3340 if delta[:hlen] != mdiff.replacediffheader(
3330 oldlen, newlen
3341 oldlen, newlen
3331 ):
3342 ):
3332 raise error.CensoredBaseError(
3343 raise error.CensoredBaseError(
3333 self.display_id, self.node(baserev)
3344 self.display_id, self.node(baserev)
3334 )
3345 )
3335
3346
3336 if not flags and self._peek_iscensored(baserev, delta):
3347 if not flags and self._peek_iscensored(baserev, delta):
3337 flags |= REVIDX_ISCENSORED
3348 flags |= REVIDX_ISCENSORED
3338
3349
3339 # We assume consumers of addrevisioncb will want to retrieve
3350 # We assume consumers of addrevisioncb will want to retrieve
3340 # the added revision, which will require a call to
3351 # the added revision, which will require a call to
3341 # revision(). revision() will fast path if there is a cache
3352 # revision(). revision() will fast path if there is a cache
3342 # hit. So, we tell _addrevision() to always cache in this case.
3353 # hit. So, we tell _addrevision() to always cache in this case.
3343 # We're only using addgroup() in the context of changegroup
3354 # We're only using addgroup() in the context of changegroup
3344 # generation so the revision data can always be handled as raw
3355 # generation so the revision data can always be handled as raw
3345 # by the flagprocessor.
3356 # by the flagprocessor.
3346 rev = self._addrevision(
3357 rev = self._addrevision(
3347 node,
3358 node,
3348 None,
3359 None,
3349 transaction,
3360 transaction,
3350 link,
3361 link,
3351 p1,
3362 p1,
3352 p2,
3363 p2,
3353 flags,
3364 flags,
3354 (baserev, delta, delta_base_reuse_policy),
3365 (baserev, delta, delta_base_reuse_policy),
3355 alwayscache=alwayscache,
3366 alwayscache=alwayscache,
3356 deltacomputer=deltacomputer,
3367 deltacomputer=deltacomputer,
3357 sidedata=sidedata,
3368 sidedata=sidedata,
3358 )
3369 )
3359
3370
3360 if addrevisioncb:
3371 if addrevisioncb:
3361 addrevisioncb(self, rev)
3372 addrevisioncb(self, rev)
3362 empty = False
3373 empty = False
3363 finally:
3374 finally:
3364 self._adding_group = False
3375 self._adding_group = False
3365 return not empty
3376 return not empty
3366
3377
3367 def iscensored(self, rev):
3378 def iscensored(self, rev):
3368 """Check if a file revision is censored."""
3379 """Check if a file revision is censored."""
3369 if not self.feature_config.censorable:
3380 if not self.feature_config.censorable:
3370 return False
3381 return False
3371
3382
3372 return self.flags(rev) & REVIDX_ISCENSORED
3383 return self.flags(rev) & REVIDX_ISCENSORED
3373
3384
3374 def _peek_iscensored(self, baserev, delta):
3385 def _peek_iscensored(self, baserev, delta):
3375 """Quickly check if a delta produces a censored revision."""
3386 """Quickly check if a delta produces a censored revision."""
3376 if not self.feature_config.censorable:
3387 if not self.feature_config.censorable:
3377 return False
3388 return False
3378
3389
3379 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3390 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3380
3391
3381 def getstrippoint(self, minlink):
3392 def getstrippoint(self, minlink):
3382 """find the minimum rev that must be stripped to strip the linkrev
3393 """find the minimum rev that must be stripped to strip the linkrev
3383
3394
3384 Returns a tuple containing the minimum rev and a set of all revs that
3395 Returns a tuple containing the minimum rev and a set of all revs that
3385 have linkrevs that will be broken by this strip.
3396 have linkrevs that will be broken by this strip.
3386 """
3397 """
3387 return storageutil.resolvestripinfo(
3398 return storageutil.resolvestripinfo(
3388 minlink,
3399 minlink,
3389 len(self) - 1,
3400 len(self) - 1,
3390 self.headrevs(),
3401 self.headrevs(),
3391 self.linkrev,
3402 self.linkrev,
3392 self.parentrevs,
3403 self.parentrevs,
3393 )
3404 )
3394
3405
3395 def strip(self, minlink, transaction):
3406 def strip(self, minlink, transaction):
3396 """truncate the revlog on the first revision with a linkrev >= minlink
3407 """truncate the revlog on the first revision with a linkrev >= minlink
3397
3408
3398 This function is called when we're stripping revision minlink and
3409 This function is called when we're stripping revision minlink and
3399 its descendants from the repository.
3410 its descendants from the repository.
3400
3411
3401 We have to remove all revisions with linkrev >= minlink, because
3412 We have to remove all revisions with linkrev >= minlink, because
3402 the equivalent changelog revisions will be renumbered after the
3413 the equivalent changelog revisions will be renumbered after the
3403 strip.
3414 strip.
3404
3415
3405 So we truncate the revlog on the first of these revisions, and
3416 So we truncate the revlog on the first of these revisions, and
3406 trust that the caller has saved the revisions that shouldn't be
3417 trust that the caller has saved the revisions that shouldn't be
3407 removed and that it'll re-add them after this truncation.
3418 removed and that it'll re-add them after this truncation.
3408 """
3419 """
3409 if len(self) == 0:
3420 if len(self) == 0:
3410 return
3421 return
3411
3422
3412 rev, _ = self.getstrippoint(minlink)
3423 rev, _ = self.getstrippoint(minlink)
3413 if rev == len(self):
3424 if rev == len(self):
3414 return
3425 return
3415
3426
3416 # first truncate the files on disk
3427 # first truncate the files on disk
3417 data_end = self.start(rev)
3428 data_end = self.start(rev)
3418 if not self._inline:
3429 if not self._inline:
3419 transaction.add(self._datafile, data_end)
3430 transaction.add(self._datafile, data_end)
3420 end = rev * self.index.entry_size
3431 end = rev * self.index.entry_size
3421 else:
3432 else:
3422 end = data_end + (rev * self.index.entry_size)
3433 end = data_end + (rev * self.index.entry_size)
3423
3434
3424 if self._sidedatafile:
3435 if self._sidedatafile:
3425 sidedata_end = self.sidedata_cut_off(rev)
3436 sidedata_end = self.sidedata_cut_off(rev)
3426 transaction.add(self._sidedatafile, sidedata_end)
3437 transaction.add(self._sidedatafile, sidedata_end)
3427
3438
3428 transaction.add(self._indexfile, end)
3439 transaction.add(self._indexfile, end)
3429 if self._docket is not None:
3440 if self._docket is not None:
3430 # XXX we could, leverage the docket while stripping. However it is
3441 # XXX we could, leverage the docket while stripping. However it is
3431 # not powerfull enough at the time of this comment
3442 # not powerfull enough at the time of this comment
3432 self._docket.index_end = end
3443 self._docket.index_end = end
3433 self._docket.data_end = data_end
3444 self._docket.data_end = data_end
3434 self._docket.sidedata_end = sidedata_end
3445 self._docket.sidedata_end = sidedata_end
3435 self._docket.write(transaction, stripping=True)
3446 self._docket.write(transaction, stripping=True)
3436
3447
3437 # then reset internal state in memory to forget those revisions
3448 # then reset internal state in memory to forget those revisions
3438 self._inner._revisioncache = None
3449 self._inner._revisioncache = None
3439 self._chaininfocache = util.lrucachedict(500)
3450 self._chaininfocache = util.lrucachedict(500)
3440 self._inner._segmentfile.clear_cache()
3451 self._inner._segmentfile.clear_cache()
3441 self._inner._segmentfile_sidedata.clear_cache()
3452 self._inner._segmentfile_sidedata.clear_cache()
3442
3453
3443 del self.index[rev:-1]
3454 del self.index[rev:-1]
3444
3455
3445 def checksize(self):
3456 def checksize(self):
3446 """Check size of index and data files
3457 """Check size of index and data files
3447
3458
3448 return a (dd, di) tuple.
3459 return a (dd, di) tuple.
3449 - dd: extra bytes for the "data" file
3460 - dd: extra bytes for the "data" file
3450 - di: extra bytes for the "index" file
3461 - di: extra bytes for the "index" file
3451
3462
3452 A healthy revlog will return (0, 0).
3463 A healthy revlog will return (0, 0).
3453 """
3464 """
3454 expected = 0
3465 expected = 0
3455 if len(self):
3466 if len(self):
3456 expected = max(0, self.end(len(self) - 1))
3467 expected = max(0, self.end(len(self) - 1))
3457
3468
3458 try:
3469 try:
3459 with self._datafp() as f:
3470 with self._datafp() as f:
3460 f.seek(0, io.SEEK_END)
3471 f.seek(0, io.SEEK_END)
3461 actual = f.tell()
3472 actual = f.tell()
3462 dd = actual - expected
3473 dd = actual - expected
3463 except FileNotFoundError:
3474 except FileNotFoundError:
3464 dd = 0
3475 dd = 0
3465
3476
3466 try:
3477 try:
3467 f = self.opener(self._indexfile)
3478 f = self.opener(self._indexfile)
3468 f.seek(0, io.SEEK_END)
3479 f.seek(0, io.SEEK_END)
3469 actual = f.tell()
3480 actual = f.tell()
3470 f.close()
3481 f.close()
3471 s = self.index.entry_size
3482 s = self.index.entry_size
3472 i = max(0, actual // s)
3483 i = max(0, actual // s)
3473 di = actual - (i * s)
3484 di = actual - (i * s)
3474 if self._inline:
3485 if self._inline:
3475 databytes = 0
3486 databytes = 0
3476 for r in self:
3487 for r in self:
3477 databytes += max(0, self.length(r))
3488 databytes += max(0, self.length(r))
3478 dd = 0
3489 dd = 0
3479 di = actual - len(self) * s - databytes
3490 di = actual - len(self) * s - databytes
3480 except FileNotFoundError:
3491 except FileNotFoundError:
3481 di = 0
3492 di = 0
3482
3493
3483 return (dd, di)
3494 return (dd, di)
3484
3495
3485 def files(self):
3496 def files(self):
3486 """return list of files that compose this revlog"""
3497 """return list of files that compose this revlog"""
3487 res = [self._indexfile]
3498 res = [self._indexfile]
3488 if self._docket_file is None:
3499 if self._docket_file is None:
3489 if not self._inline:
3500 if not self._inline:
3490 res.append(self._datafile)
3501 res.append(self._datafile)
3491 else:
3502 else:
3492 res.append(self._docket_file)
3503 res.append(self._docket_file)
3493 res.extend(self._docket.old_index_filepaths(include_empty=False))
3504 res.extend(self._docket.old_index_filepaths(include_empty=False))
3494 if self._docket.data_end:
3505 if self._docket.data_end:
3495 res.append(self._datafile)
3506 res.append(self._datafile)
3496 res.extend(self._docket.old_data_filepaths(include_empty=False))
3507 res.extend(self._docket.old_data_filepaths(include_empty=False))
3497 if self._docket.sidedata_end:
3508 if self._docket.sidedata_end:
3498 res.append(self._sidedatafile)
3509 res.append(self._sidedatafile)
3499 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3510 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3500 return res
3511 return res
3501
3512
3502 def emitrevisions(
3513 def emitrevisions(
3503 self,
3514 self,
3504 nodes,
3515 nodes,
3505 nodesorder=None,
3516 nodesorder=None,
3506 revisiondata=False,
3517 revisiondata=False,
3507 assumehaveparentrevisions=False,
3518 assumehaveparentrevisions=False,
3508 deltamode=repository.CG_DELTAMODE_STD,
3519 deltamode=repository.CG_DELTAMODE_STD,
3509 sidedata_helpers=None,
3520 sidedata_helpers=None,
3510 debug_info=None,
3521 debug_info=None,
3511 ):
3522 ):
3512 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3523 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3513 raise error.ProgrammingError(
3524 raise error.ProgrammingError(
3514 b'unhandled value for nodesorder: %s' % nodesorder
3525 b'unhandled value for nodesorder: %s' % nodesorder
3515 )
3526 )
3516
3527
3517 if nodesorder is None and not self.delta_config.general_delta:
3528 if nodesorder is None and not self.delta_config.general_delta:
3518 nodesorder = b'storage'
3529 nodesorder = b'storage'
3519
3530
3520 if (
3531 if (
3521 not self._storedeltachains
3532 not self._storedeltachains
3522 and deltamode != repository.CG_DELTAMODE_PREV
3533 and deltamode != repository.CG_DELTAMODE_PREV
3523 ):
3534 ):
3524 deltamode = repository.CG_DELTAMODE_FULL
3535 deltamode = repository.CG_DELTAMODE_FULL
3525
3536
3526 return storageutil.emitrevisions(
3537 return storageutil.emitrevisions(
3527 self,
3538 self,
3528 nodes,
3539 nodes,
3529 nodesorder,
3540 nodesorder,
3530 revlogrevisiondelta,
3541 revlogrevisiondelta,
3531 deltaparentfn=self.deltaparent,
3542 deltaparentfn=self.deltaparent,
3532 candeltafn=self._candelta,
3543 candeltafn=self._candelta,
3533 rawsizefn=self.rawsize,
3544 rawsizefn=self.rawsize,
3534 revdifffn=self.revdiff,
3545 revdifffn=self.revdiff,
3535 flagsfn=self.flags,
3546 flagsfn=self.flags,
3536 deltamode=deltamode,
3547 deltamode=deltamode,
3537 revisiondata=revisiondata,
3548 revisiondata=revisiondata,
3538 assumehaveparentrevisions=assumehaveparentrevisions,
3549 assumehaveparentrevisions=assumehaveparentrevisions,
3539 sidedata_helpers=sidedata_helpers,
3550 sidedata_helpers=sidedata_helpers,
3540 debug_info=debug_info,
3551 debug_info=debug_info,
3541 )
3552 )
3542
3553
3543 DELTAREUSEALWAYS = b'always'
3554 DELTAREUSEALWAYS = b'always'
3544 DELTAREUSESAMEREVS = b'samerevs'
3555 DELTAREUSESAMEREVS = b'samerevs'
3545 DELTAREUSENEVER = b'never'
3556 DELTAREUSENEVER = b'never'
3546
3557
3547 DELTAREUSEFULLADD = b'fulladd'
3558 DELTAREUSEFULLADD = b'fulladd'
3548
3559
3549 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3560 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3550
3561
3551 def clone(
3562 def clone(
3552 self,
3563 self,
3553 tr,
3564 tr,
3554 destrevlog,
3565 destrevlog,
3555 addrevisioncb=None,
3566 addrevisioncb=None,
3556 deltareuse=DELTAREUSESAMEREVS,
3567 deltareuse=DELTAREUSESAMEREVS,
3557 forcedeltabothparents=None,
3568 forcedeltabothparents=None,
3558 sidedata_helpers=None,
3569 sidedata_helpers=None,
3559 ):
3570 ):
3560 """Copy this revlog to another, possibly with format changes.
3571 """Copy this revlog to another, possibly with format changes.
3561
3572
3562 The destination revlog will contain the same revisions and nodes.
3573 The destination revlog will contain the same revisions and nodes.
3563 However, it may not be bit-for-bit identical due to e.g. delta encoding
3574 However, it may not be bit-for-bit identical due to e.g. delta encoding
3564 differences.
3575 differences.
3565
3576
3566 The ``deltareuse`` argument control how deltas from the existing revlog
3577 The ``deltareuse`` argument control how deltas from the existing revlog
3567 are preserved in the destination revlog. The argument can have the
3578 are preserved in the destination revlog. The argument can have the
3568 following values:
3579 following values:
3569
3580
3570 DELTAREUSEALWAYS
3581 DELTAREUSEALWAYS
3571 Deltas will always be reused (if possible), even if the destination
3582 Deltas will always be reused (if possible), even if the destination
3572 revlog would not select the same revisions for the delta. This is the
3583 revlog would not select the same revisions for the delta. This is the
3573 fastest mode of operation.
3584 fastest mode of operation.
3574 DELTAREUSESAMEREVS
3585 DELTAREUSESAMEREVS
3575 Deltas will be reused if the destination revlog would pick the same
3586 Deltas will be reused if the destination revlog would pick the same
3576 revisions for the delta. This mode strikes a balance between speed
3587 revisions for the delta. This mode strikes a balance between speed
3577 and optimization.
3588 and optimization.
3578 DELTAREUSENEVER
3589 DELTAREUSENEVER
3579 Deltas will never be reused. This is the slowest mode of execution.
3590 Deltas will never be reused. This is the slowest mode of execution.
3580 This mode can be used to recompute deltas (e.g. if the diff/delta
3591 This mode can be used to recompute deltas (e.g. if the diff/delta
3581 algorithm changes).
3592 algorithm changes).
3582 DELTAREUSEFULLADD
3593 DELTAREUSEFULLADD
3583 Revision will be re-added as if their were new content. This is
3594 Revision will be re-added as if their were new content. This is
3584 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3595 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3585 eg: large file detection and handling.
3596 eg: large file detection and handling.
3586
3597
3587 Delta computation can be slow, so the choice of delta reuse policy can
3598 Delta computation can be slow, so the choice of delta reuse policy can
3588 significantly affect run time.
3599 significantly affect run time.
3589
3600
3590 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3601 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3591 two extremes. Deltas will be reused if they are appropriate. But if the
3602 two extremes. Deltas will be reused if they are appropriate. But if the
3592 delta could choose a better revision, it will do so. This means if you
3603 delta could choose a better revision, it will do so. This means if you
3593 are converting a non-generaldelta revlog to a generaldelta revlog,
3604 are converting a non-generaldelta revlog to a generaldelta revlog,
3594 deltas will be recomputed if the delta's parent isn't a parent of the
3605 deltas will be recomputed if the delta's parent isn't a parent of the
3595 revision.
3606 revision.
3596
3607
3597 In addition to the delta policy, the ``forcedeltabothparents``
3608 In addition to the delta policy, the ``forcedeltabothparents``
3598 argument controls whether to force compute deltas against both parents
3609 argument controls whether to force compute deltas against both parents
3599 for merges. By default, the current default is used.
3610 for merges. By default, the current default is used.
3600
3611
3601 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3612 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3602 `sidedata_helpers`.
3613 `sidedata_helpers`.
3603 """
3614 """
3604 if deltareuse not in self.DELTAREUSEALL:
3615 if deltareuse not in self.DELTAREUSEALL:
3605 raise ValueError(
3616 raise ValueError(
3606 _(b'value for deltareuse invalid: %s') % deltareuse
3617 _(b'value for deltareuse invalid: %s') % deltareuse
3607 )
3618 )
3608
3619
3609 if len(destrevlog):
3620 if len(destrevlog):
3610 raise ValueError(_(b'destination revlog is not empty'))
3621 raise ValueError(_(b'destination revlog is not empty'))
3611
3622
3612 if getattr(self, 'filteredrevs', None):
3623 if getattr(self, 'filteredrevs', None):
3613 raise ValueError(_(b'source revlog has filtered revisions'))
3624 raise ValueError(_(b'source revlog has filtered revisions'))
3614 if getattr(destrevlog, 'filteredrevs', None):
3625 if getattr(destrevlog, 'filteredrevs', None):
3615 raise ValueError(_(b'destination revlog has filtered revisions'))
3626 raise ValueError(_(b'destination revlog has filtered revisions'))
3616
3627
3617 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3628 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3618 # if possible.
3629 # if possible.
3619 old_delta_config = destrevlog.delta_config
3630 old_delta_config = destrevlog.delta_config
3620 destrevlog.delta_config = destrevlog.delta_config.copy()
3631 destrevlog.delta_config = destrevlog.delta_config.copy()
3621
3632
3622 try:
3633 try:
3623 if deltareuse == self.DELTAREUSEALWAYS:
3634 if deltareuse == self.DELTAREUSEALWAYS:
3624 destrevlog.delta_config.lazy_delta_base = True
3635 destrevlog.delta_config.lazy_delta_base = True
3625 destrevlog.delta_config.lazy_delta = True
3636 destrevlog.delta_config.lazy_delta = True
3626 elif deltareuse == self.DELTAREUSESAMEREVS:
3637 elif deltareuse == self.DELTAREUSESAMEREVS:
3627 destrevlog.delta_config.lazy_delta_base = False
3638 destrevlog.delta_config.lazy_delta_base = False
3628 destrevlog.delta_config.lazy_delta = True
3639 destrevlog.delta_config.lazy_delta = True
3629 elif deltareuse == self.DELTAREUSENEVER:
3640 elif deltareuse == self.DELTAREUSENEVER:
3630 destrevlog.delta_config.lazy_delta_base = False
3641 destrevlog.delta_config.lazy_delta_base = False
3631 destrevlog.delta_config.lazy_delta = False
3642 destrevlog.delta_config.lazy_delta = False
3632
3643
3633 delta_both_parents = (
3644 delta_both_parents = (
3634 forcedeltabothparents or old_delta_config.delta_both_parents
3645 forcedeltabothparents or old_delta_config.delta_both_parents
3635 )
3646 )
3636 destrevlog.delta_config.delta_both_parents = delta_both_parents
3647 destrevlog.delta_config.delta_both_parents = delta_both_parents
3637
3648
3638 with self.reading(), destrevlog._writing(tr):
3649 with self.reading(), destrevlog._writing(tr):
3639 self._clone(
3650 self._clone(
3640 tr,
3651 tr,
3641 destrevlog,
3652 destrevlog,
3642 addrevisioncb,
3653 addrevisioncb,
3643 deltareuse,
3654 deltareuse,
3644 forcedeltabothparents,
3655 forcedeltabothparents,
3645 sidedata_helpers,
3656 sidedata_helpers,
3646 )
3657 )
3647
3658
3648 finally:
3659 finally:
3649 destrevlog.delta_config = old_delta_config
3660 destrevlog.delta_config = old_delta_config
3650
3661
3651 def _clone(
3662 def _clone(
3652 self,
3663 self,
3653 tr,
3664 tr,
3654 destrevlog,
3665 destrevlog,
3655 addrevisioncb,
3666 addrevisioncb,
3656 deltareuse,
3667 deltareuse,
3657 forcedeltabothparents,
3668 forcedeltabothparents,
3658 sidedata_helpers,
3669 sidedata_helpers,
3659 ):
3670 ):
3660 """perform the core duty of `revlog.clone` after parameter processing"""
3671 """perform the core duty of `revlog.clone` after parameter processing"""
3661 write_debug = None
3672 write_debug = None
3662 if self.delta_config.debug_delta:
3673 if self.delta_config.debug_delta:
3663 write_debug = tr._report
3674 write_debug = tr._report
3664 deltacomputer = deltautil.deltacomputer(
3675 deltacomputer = deltautil.deltacomputer(
3665 destrevlog,
3676 destrevlog,
3666 write_debug=write_debug,
3677 write_debug=write_debug,
3667 )
3678 )
3668 index = self.index
3679 index = self.index
3669 for rev in self:
3680 for rev in self:
3670 entry = index[rev]
3681 entry = index[rev]
3671
3682
3672 # Some classes override linkrev to take filtered revs into
3683 # Some classes override linkrev to take filtered revs into
3673 # account. Use raw entry from index.
3684 # account. Use raw entry from index.
3674 flags = entry[0] & 0xFFFF
3685 flags = entry[0] & 0xFFFF
3675 linkrev = entry[4]
3686 linkrev = entry[4]
3676 p1 = index[entry[5]][7]
3687 p1 = index[entry[5]][7]
3677 p2 = index[entry[6]][7]
3688 p2 = index[entry[6]][7]
3678 node = entry[7]
3689 node = entry[7]
3679
3690
3680 # (Possibly) reuse the delta from the revlog if allowed and
3691 # (Possibly) reuse the delta from the revlog if allowed and
3681 # the revlog chunk is a delta.
3692 # the revlog chunk is a delta.
3682 cachedelta = None
3693 cachedelta = None
3683 rawtext = None
3694 rawtext = None
3684 if deltareuse == self.DELTAREUSEFULLADD:
3695 if deltareuse == self.DELTAREUSEFULLADD:
3685 text = self._revisiondata(rev)
3696 text = self._revisiondata(rev)
3686 sidedata = self.sidedata(rev)
3697 sidedata = self.sidedata(rev)
3687
3698
3688 if sidedata_helpers is not None:
3699 if sidedata_helpers is not None:
3689 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3700 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3690 self, sidedata_helpers, sidedata, rev
3701 self, sidedata_helpers, sidedata, rev
3691 )
3702 )
3692 flags = flags | new_flags[0] & ~new_flags[1]
3703 flags = flags | new_flags[0] & ~new_flags[1]
3693
3704
3694 destrevlog.addrevision(
3705 destrevlog.addrevision(
3695 text,
3706 text,
3696 tr,
3707 tr,
3697 linkrev,
3708 linkrev,
3698 p1,
3709 p1,
3699 p2,
3710 p2,
3700 cachedelta=cachedelta,
3711 cachedelta=cachedelta,
3701 node=node,
3712 node=node,
3702 flags=flags,
3713 flags=flags,
3703 deltacomputer=deltacomputer,
3714 deltacomputer=deltacomputer,
3704 sidedata=sidedata,
3715 sidedata=sidedata,
3705 )
3716 )
3706 else:
3717 else:
3707 if destrevlog.delta_config.lazy_delta:
3718 if destrevlog.delta_config.lazy_delta:
3708 dp = self.deltaparent(rev)
3719 dp = self.deltaparent(rev)
3709 if dp != nullrev:
3720 if dp != nullrev:
3710 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3721 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3711
3722
3712 sidedata = None
3723 sidedata = None
3713 if not cachedelta:
3724 if not cachedelta:
3714 try:
3725 try:
3715 rawtext = self._revisiondata(rev)
3726 rawtext = self._revisiondata(rev)
3716 except error.CensoredNodeError as censored:
3727 except error.CensoredNodeError as censored:
3717 assert flags & REVIDX_ISCENSORED
3728 assert flags & REVIDX_ISCENSORED
3718 rawtext = censored.tombstone
3729 rawtext = censored.tombstone
3719 sidedata = self.sidedata(rev)
3730 sidedata = self.sidedata(rev)
3720 if sidedata is None:
3731 if sidedata is None:
3721 sidedata = self.sidedata(rev)
3732 sidedata = self.sidedata(rev)
3722
3733
3723 if sidedata_helpers is not None:
3734 if sidedata_helpers is not None:
3724 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3735 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3725 self, sidedata_helpers, sidedata, rev
3736 self, sidedata_helpers, sidedata, rev
3726 )
3737 )
3727 flags = flags | new_flags[0] & ~new_flags[1]
3738 flags = flags | new_flags[0] & ~new_flags[1]
3728
3739
3729 destrevlog._addrevision(
3740 destrevlog._addrevision(
3730 node,
3741 node,
3731 rawtext,
3742 rawtext,
3732 tr,
3743 tr,
3733 linkrev,
3744 linkrev,
3734 p1,
3745 p1,
3735 p2,
3746 p2,
3736 flags,
3747 flags,
3737 cachedelta,
3748 cachedelta,
3738 deltacomputer=deltacomputer,
3749 deltacomputer=deltacomputer,
3739 sidedata=sidedata,
3750 sidedata=sidedata,
3740 )
3751 )
3741
3752
3742 if addrevisioncb:
3753 if addrevisioncb:
3743 addrevisioncb(self, rev, node)
3754 addrevisioncb(self, rev, node)
3744
3755
3745 def censorrevision(self, tr, censornode, tombstone=b''):
3756 def censorrevision(self, tr, censornode, tombstone=b''):
3746 if self._format_version == REVLOGV0:
3757 if self._format_version == REVLOGV0:
3747 raise error.RevlogError(
3758 raise error.RevlogError(
3748 _(b'cannot censor with version %d revlogs')
3759 _(b'cannot censor with version %d revlogs')
3749 % self._format_version
3760 % self._format_version
3750 )
3761 )
3751 elif self._format_version == REVLOGV1:
3762 elif self._format_version == REVLOGV1:
3752 rewrite.v1_censor(self, tr, censornode, tombstone)
3763 rewrite.v1_censor(self, tr, censornode, tombstone)
3753 else:
3764 else:
3754 rewrite.v2_censor(self, tr, censornode, tombstone)
3765 rewrite.v2_censor(self, tr, censornode, tombstone)
3755
3766
3756 def verifyintegrity(self, state):
3767 def verifyintegrity(self, state):
3757 """Verifies the integrity of the revlog.
3768 """Verifies the integrity of the revlog.
3758
3769
3759 Yields ``revlogproblem`` instances describing problems that are
3770 Yields ``revlogproblem`` instances describing problems that are
3760 found.
3771 found.
3761 """
3772 """
3762 dd, di = self.checksize()
3773 dd, di = self.checksize()
3763 if dd:
3774 if dd:
3764 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3775 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3765 if di:
3776 if di:
3766 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3777 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3767
3778
3768 version = self._format_version
3779 version = self._format_version
3769
3780
3770 # The verifier tells us what version revlog we should be.
3781 # The verifier tells us what version revlog we should be.
3771 if version != state[b'expectedversion']:
3782 if version != state[b'expectedversion']:
3772 yield revlogproblem(
3783 yield revlogproblem(
3773 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3784 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3774 % (self.display_id, version, state[b'expectedversion'])
3785 % (self.display_id, version, state[b'expectedversion'])
3775 )
3786 )
3776
3787
3777 state[b'skipread'] = set()
3788 state[b'skipread'] = set()
3778 state[b'safe_renamed'] = set()
3789 state[b'safe_renamed'] = set()
3779
3790
3780 for rev in self:
3791 for rev in self:
3781 node = self.node(rev)
3792 node = self.node(rev)
3782
3793
3783 # Verify contents. 4 cases to care about:
3794 # Verify contents. 4 cases to care about:
3784 #
3795 #
3785 # common: the most common case
3796 # common: the most common case
3786 # rename: with a rename
3797 # rename: with a rename
3787 # meta: file content starts with b'\1\n', the metadata
3798 # meta: file content starts with b'\1\n', the metadata
3788 # header defined in filelog.py, but without a rename
3799 # header defined in filelog.py, but without a rename
3789 # ext: content stored externally
3800 # ext: content stored externally
3790 #
3801 #
3791 # More formally, their differences are shown below:
3802 # More formally, their differences are shown below:
3792 #
3803 #
3793 # | common | rename | meta | ext
3804 # | common | rename | meta | ext
3794 # -------------------------------------------------------
3805 # -------------------------------------------------------
3795 # flags() | 0 | 0 | 0 | not 0
3806 # flags() | 0 | 0 | 0 | not 0
3796 # renamed() | False | True | False | ?
3807 # renamed() | False | True | False | ?
3797 # rawtext[0:2]=='\1\n'| False | True | True | ?
3808 # rawtext[0:2]=='\1\n'| False | True | True | ?
3798 #
3809 #
3799 # "rawtext" means the raw text stored in revlog data, which
3810 # "rawtext" means the raw text stored in revlog data, which
3800 # could be retrieved by "rawdata(rev)". "text"
3811 # could be retrieved by "rawdata(rev)". "text"
3801 # mentioned below is "revision(rev)".
3812 # mentioned below is "revision(rev)".
3802 #
3813 #
3803 # There are 3 different lengths stored physically:
3814 # There are 3 different lengths stored physically:
3804 # 1. L1: rawsize, stored in revlog index
3815 # 1. L1: rawsize, stored in revlog index
3805 # 2. L2: len(rawtext), stored in revlog data
3816 # 2. L2: len(rawtext), stored in revlog data
3806 # 3. L3: len(text), stored in revlog data if flags==0, or
3817 # 3. L3: len(text), stored in revlog data if flags==0, or
3807 # possibly somewhere else if flags!=0
3818 # possibly somewhere else if flags!=0
3808 #
3819 #
3809 # L1 should be equal to L2. L3 could be different from them.
3820 # L1 should be equal to L2. L3 could be different from them.
3810 # "text" may or may not affect commit hash depending on flag
3821 # "text" may or may not affect commit hash depending on flag
3811 # processors (see flagutil.addflagprocessor).
3822 # processors (see flagutil.addflagprocessor).
3812 #
3823 #
3813 # | common | rename | meta | ext
3824 # | common | rename | meta | ext
3814 # -------------------------------------------------
3825 # -------------------------------------------------
3815 # rawsize() | L1 | L1 | L1 | L1
3826 # rawsize() | L1 | L1 | L1 | L1
3816 # size() | L1 | L2-LM | L1(*) | L1 (?)
3827 # size() | L1 | L2-LM | L1(*) | L1 (?)
3817 # len(rawtext) | L2 | L2 | L2 | L2
3828 # len(rawtext) | L2 | L2 | L2 | L2
3818 # len(text) | L2 | L2 | L2 | L3
3829 # len(text) | L2 | L2 | L2 | L3
3819 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3830 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3820 #
3831 #
3821 # LM: length of metadata, depending on rawtext
3832 # LM: length of metadata, depending on rawtext
3822 # (*): not ideal, see comment in filelog.size
3833 # (*): not ideal, see comment in filelog.size
3823 # (?): could be "- len(meta)" if the resolved content has
3834 # (?): could be "- len(meta)" if the resolved content has
3824 # rename metadata
3835 # rename metadata
3825 #
3836 #
3826 # Checks needed to be done:
3837 # Checks needed to be done:
3827 # 1. length check: L1 == L2, in all cases.
3838 # 1. length check: L1 == L2, in all cases.
3828 # 2. hash check: depending on flag processor, we may need to
3839 # 2. hash check: depending on flag processor, we may need to
3829 # use either "text" (external), or "rawtext" (in revlog).
3840 # use either "text" (external), or "rawtext" (in revlog).
3830
3841
3831 try:
3842 try:
3832 skipflags = state.get(b'skipflags', 0)
3843 skipflags = state.get(b'skipflags', 0)
3833 if skipflags:
3844 if skipflags:
3834 skipflags &= self.flags(rev)
3845 skipflags &= self.flags(rev)
3835
3846
3836 _verify_revision(self, skipflags, state, node)
3847 _verify_revision(self, skipflags, state, node)
3837
3848
3838 l1 = self.rawsize(rev)
3849 l1 = self.rawsize(rev)
3839 l2 = len(self.rawdata(node))
3850 l2 = len(self.rawdata(node))
3840
3851
3841 if l1 != l2:
3852 if l1 != l2:
3842 yield revlogproblem(
3853 yield revlogproblem(
3843 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3854 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3844 node=node,
3855 node=node,
3845 )
3856 )
3846
3857
3847 except error.CensoredNodeError:
3858 except error.CensoredNodeError:
3848 if state[b'erroroncensored']:
3859 if state[b'erroroncensored']:
3849 yield revlogproblem(
3860 yield revlogproblem(
3850 error=_(b'censored file data'), node=node
3861 error=_(b'censored file data'), node=node
3851 )
3862 )
3852 state[b'skipread'].add(node)
3863 state[b'skipread'].add(node)
3853 except Exception as e:
3864 except Exception as e:
3854 yield revlogproblem(
3865 yield revlogproblem(
3855 error=_(b'unpacking %s: %s')
3866 error=_(b'unpacking %s: %s')
3856 % (short(node), stringutil.forcebytestr(e)),
3867 % (short(node), stringutil.forcebytestr(e)),
3857 node=node,
3868 node=node,
3858 )
3869 )
3859 state[b'skipread'].add(node)
3870 state[b'skipread'].add(node)
3860
3871
3861 def storageinfo(
3872 def storageinfo(
3862 self,
3873 self,
3863 exclusivefiles=False,
3874 exclusivefiles=False,
3864 sharedfiles=False,
3875 sharedfiles=False,
3865 revisionscount=False,
3876 revisionscount=False,
3866 trackedsize=False,
3877 trackedsize=False,
3867 storedsize=False,
3878 storedsize=False,
3868 ):
3879 ):
3869 d = {}
3880 d = {}
3870
3881
3871 if exclusivefiles:
3882 if exclusivefiles:
3872 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3883 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3873 if not self._inline:
3884 if not self._inline:
3874 d[b'exclusivefiles'].append((self.opener, self._datafile))
3885 d[b'exclusivefiles'].append((self.opener, self._datafile))
3875
3886
3876 if sharedfiles:
3887 if sharedfiles:
3877 d[b'sharedfiles'] = []
3888 d[b'sharedfiles'] = []
3878
3889
3879 if revisionscount:
3890 if revisionscount:
3880 d[b'revisionscount'] = len(self)
3891 d[b'revisionscount'] = len(self)
3881
3892
3882 if trackedsize:
3893 if trackedsize:
3883 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3894 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3884
3895
3885 if storedsize:
3896 if storedsize:
3886 d[b'storedsize'] = sum(
3897 d[b'storedsize'] = sum(
3887 self.opener.stat(path).st_size for path in self.files()
3898 self.opener.stat(path).st_size for path in self.files()
3888 )
3899 )
3889
3900
3890 return d
3901 return d
3891
3902
3892 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3903 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3893 if not self.feature_config.has_side_data:
3904 if not self.feature_config.has_side_data:
3894 return
3905 return
3895 # revlog formats with sidedata support does not support inline
3906 # revlog formats with sidedata support does not support inline
3896 assert not self._inline
3907 assert not self._inline
3897 if not helpers[1] and not helpers[2]:
3908 if not helpers[1] and not helpers[2]:
3898 # Nothing to generate or remove
3909 # Nothing to generate or remove
3899 return
3910 return
3900
3911
3901 new_entries = []
3912 new_entries = []
3902 # append the new sidedata
3913 # append the new sidedata
3903 with self._writing(transaction):
3914 with self._writing(transaction):
3904 ifh, dfh, sdfh = self._inner._writinghandles
3915 ifh, dfh, sdfh = self._inner._writinghandles
3905 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3916 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3906
3917
3907 current_offset = sdfh.tell()
3918 current_offset = sdfh.tell()
3908 for rev in range(startrev, endrev + 1):
3919 for rev in range(startrev, endrev + 1):
3909 entry = self.index[rev]
3920 entry = self.index[rev]
3910 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3921 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3911 store=self,
3922 store=self,
3912 sidedata_helpers=helpers,
3923 sidedata_helpers=helpers,
3913 sidedata={},
3924 sidedata={},
3914 rev=rev,
3925 rev=rev,
3915 )
3926 )
3916
3927
3917 serialized_sidedata = sidedatautil.serialize_sidedata(
3928 serialized_sidedata = sidedatautil.serialize_sidedata(
3918 new_sidedata
3929 new_sidedata
3919 )
3930 )
3920
3931
3921 sidedata_compression_mode = COMP_MODE_INLINE
3932 sidedata_compression_mode = COMP_MODE_INLINE
3922 if serialized_sidedata and self.feature_config.has_side_data:
3933 if serialized_sidedata and self.feature_config.has_side_data:
3923 sidedata_compression_mode = COMP_MODE_PLAIN
3934 sidedata_compression_mode = COMP_MODE_PLAIN
3924 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3935 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3925 if (
3936 if (
3926 h != b'u'
3937 h != b'u'
3927 and comp_sidedata[0] != b'\0'
3938 and comp_sidedata[0] != b'\0'
3928 and len(comp_sidedata) < len(serialized_sidedata)
3939 and len(comp_sidedata) < len(serialized_sidedata)
3929 ):
3940 ):
3930 assert not h
3941 assert not h
3931 if (
3942 if (
3932 comp_sidedata[0]
3943 comp_sidedata[0]
3933 == self._docket.default_compression_header
3944 == self._docket.default_compression_header
3934 ):
3945 ):
3935 sidedata_compression_mode = COMP_MODE_DEFAULT
3946 sidedata_compression_mode = COMP_MODE_DEFAULT
3936 serialized_sidedata = comp_sidedata
3947 serialized_sidedata = comp_sidedata
3937 else:
3948 else:
3938 sidedata_compression_mode = COMP_MODE_INLINE
3949 sidedata_compression_mode = COMP_MODE_INLINE
3939 serialized_sidedata = comp_sidedata
3950 serialized_sidedata = comp_sidedata
3940 if entry[8] != 0 or entry[9] != 0:
3951 if entry[8] != 0 or entry[9] != 0:
3941 # rewriting entries that already have sidedata is not
3952 # rewriting entries that already have sidedata is not
3942 # supported yet, because it introduces garbage data in the
3953 # supported yet, because it introduces garbage data in the
3943 # revlog.
3954 # revlog.
3944 msg = b"rewriting existing sidedata is not supported yet"
3955 msg = b"rewriting existing sidedata is not supported yet"
3945 raise error.Abort(msg)
3956 raise error.Abort(msg)
3946
3957
3947 # Apply (potential) flags to add and to remove after running
3958 # Apply (potential) flags to add and to remove after running
3948 # the sidedata helpers
3959 # the sidedata helpers
3949 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3960 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3950 entry_update = (
3961 entry_update = (
3951 current_offset,
3962 current_offset,
3952 len(serialized_sidedata),
3963 len(serialized_sidedata),
3953 new_offset_flags,
3964 new_offset_flags,
3954 sidedata_compression_mode,
3965 sidedata_compression_mode,
3955 )
3966 )
3956
3967
3957 # the sidedata computation might have move the file cursors around
3968 # the sidedata computation might have move the file cursors around
3958 sdfh.seek(current_offset, os.SEEK_SET)
3969 sdfh.seek(current_offset, os.SEEK_SET)
3959 sdfh.write(serialized_sidedata)
3970 sdfh.write(serialized_sidedata)
3960 new_entries.append(entry_update)
3971 new_entries.append(entry_update)
3961 current_offset += len(serialized_sidedata)
3972 current_offset += len(serialized_sidedata)
3962 self._docket.sidedata_end = sdfh.tell()
3973 self._docket.sidedata_end = sdfh.tell()
3963
3974
3964 # rewrite the new index entries
3975 # rewrite the new index entries
3965 ifh.seek(startrev * self.index.entry_size)
3976 ifh.seek(startrev * self.index.entry_size)
3966 for i, e in enumerate(new_entries):
3977 for i, e in enumerate(new_entries):
3967 rev = startrev + i
3978 rev = startrev + i
3968 self.index.replace_sidedata_info(rev, *e)
3979 self.index.replace_sidedata_info(rev, *e)
3969 packed = self.index.entry_binary(rev)
3980 packed = self.index.entry_binary(rev)
3970 if rev == 0 and self._docket is None:
3981 if rev == 0 and self._docket is None:
3971 header = self._format_flags | self._format_version
3982 header = self._format_flags | self._format_version
3972 header = self.index.pack_header(header)
3983 header = self.index.pack_header(header)
3973 packed = header + packed
3984 packed = header + packed
3974 ifh.write(packed)
3985 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now