##// END OF EJS Templates
revlog: synchronise the various attribute holding the index filename...
marmoute -
r51982:c2c24b6b default
parent child Browse files
Show More
@@ -1,3907 +1,3917 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of inline data embedded into a revlog
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class _Config:
244 class _Config:
245 def copy(self):
245 def copy(self):
246 return self.__class__(**self.__dict__)
246 return self.__class__(**self.__dict__)
247
247
248
248
249 @attr.s()
249 @attr.s()
250 class FeatureConfig(_Config):
250 class FeatureConfig(_Config):
251 """Hold configuration values about the available revlog features"""
251 """Hold configuration values about the available revlog features"""
252
252
253 # the default compression engine
253 # the default compression engine
254 compression_engine = attr.ib(default=b'zlib')
254 compression_engine = attr.ib(default=b'zlib')
255 # compression engines options
255 # compression engines options
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257
257
258 # can we use censor on this revlog
258 # can we use censor on this revlog
259 censorable = attr.ib(default=False)
259 censorable = attr.ib(default=False)
260 # does this revlog use the "side data" feature
260 # does this revlog use the "side data" feature
261 has_side_data = attr.ib(default=False)
261 has_side_data = attr.ib(default=False)
262 # might remove rank configuration once the computation has no impact
262 # might remove rank configuration once the computation has no impact
263 compute_rank = attr.ib(default=False)
263 compute_rank = attr.ib(default=False)
264 # parent order is supposed to be semantically irrelevant, so we
264 # parent order is supposed to be semantically irrelevant, so we
265 # normally resort parents to ensure that the first parent is non-null,
265 # normally resort parents to ensure that the first parent is non-null,
266 # if there is a non-null parent at all.
266 # if there is a non-null parent at all.
267 # filelog abuses the parent order as flag to mark some instances of
267 # filelog abuses the parent order as flag to mark some instances of
268 # meta-encoded files, so allow it to disable this behavior.
268 # meta-encoded files, so allow it to disable this behavior.
269 canonical_parent_order = attr.ib(default=False)
269 canonical_parent_order = attr.ib(default=False)
270 # can ellipsis commit be used
270 # can ellipsis commit be used
271 enable_ellipsis = attr.ib(default=False)
271 enable_ellipsis = attr.ib(default=False)
272
272
273 def copy(self):
273 def copy(self):
274 new = super().copy()
274 new = super().copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
276 return new
276 return new
277
277
278
278
279 @attr.s()
279 @attr.s()
280 class DataConfig(_Config):
280 class DataConfig(_Config):
281 """Hold configuration value about how the revlog data are read"""
281 """Hold configuration value about how the revlog data are read"""
282
282
283 # should we try to open the "pending" version of the revlog
283 # should we try to open the "pending" version of the revlog
284 try_pending = attr.ib(default=False)
284 try_pending = attr.ib(default=False)
285 # should we try to open the "splitted" version of the revlog
285 # should we try to open the "splitted" version of the revlog
286 try_split = attr.ib(default=False)
286 try_split = attr.ib(default=False)
287 # When True, indexfile should be opened with checkambig=True at writing,
287 # When True, indexfile should be opened with checkambig=True at writing,
288 # to avoid file stat ambiguity.
288 # to avoid file stat ambiguity.
289 check_ambig = attr.ib(default=False)
289 check_ambig = attr.ib(default=False)
290
290
291 # If true, use mmap instead of reading to deal with large index
291 # If true, use mmap instead of reading to deal with large index
292 mmap_large_index = attr.ib(default=False)
292 mmap_large_index = attr.ib(default=False)
293 # how much data is large
293 # how much data is large
294 mmap_index_threshold = attr.ib(default=None)
294 mmap_index_threshold = attr.ib(default=None)
295 # How much data to read and cache into the raw revlog data cache.
295 # How much data to read and cache into the raw revlog data cache.
296 chunk_cache_size = attr.ib(default=65536)
296 chunk_cache_size = attr.ib(default=65536)
297
297
298 # Allow sparse reading of the revlog data
298 # Allow sparse reading of the revlog data
299 with_sparse_read = attr.ib(default=False)
299 with_sparse_read = attr.ib(default=False)
300 # minimal density of a sparse read chunk
300 # minimal density of a sparse read chunk
301 sr_density_threshold = attr.ib(default=0.50)
301 sr_density_threshold = attr.ib(default=0.50)
302 # minimal size of data we skip when performing sparse read
302 # minimal size of data we skip when performing sparse read
303 sr_min_gap_size = attr.ib(default=262144)
303 sr_min_gap_size = attr.ib(default=262144)
304
304
305 # are delta encoded against arbitrary bases.
305 # are delta encoded against arbitrary bases.
306 generaldelta = attr.ib(default=False)
306 generaldelta = attr.ib(default=False)
307
307
308
308
309 @attr.s()
309 @attr.s()
310 class DeltaConfig(_Config):
310 class DeltaConfig(_Config):
311 """Hold configuration value about how new delta are computed
311 """Hold configuration value about how new delta are computed
312
312
313 Some attributes are duplicated from DataConfig to help havign each object
313 Some attributes are duplicated from DataConfig to help havign each object
314 self contained.
314 self contained.
315 """
315 """
316
316
317 # can delta be encoded against arbitrary bases.
317 # can delta be encoded against arbitrary bases.
318 general_delta = attr.ib(default=False)
318 general_delta = attr.ib(default=False)
319 # Allow sparse writing of the revlog data
319 # Allow sparse writing of the revlog data
320 sparse_revlog = attr.ib(default=False)
320 sparse_revlog = attr.ib(default=False)
321 # maximum length of a delta chain
321 # maximum length of a delta chain
322 max_chain_len = attr.ib(default=None)
322 max_chain_len = attr.ib(default=None)
323 # Maximum distance between delta chain base start and end
323 # Maximum distance between delta chain base start and end
324 max_deltachain_span = attr.ib(default=-1)
324 max_deltachain_span = attr.ib(default=-1)
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 # compression for the data content.
326 # compression for the data content.
327 upper_bound_comp = attr.ib(default=None)
327 upper_bound_comp = attr.ib(default=None)
328 # Should we try a delta against both parent
328 # Should we try a delta against both parent
329 delta_both_parents = attr.ib(default=True)
329 delta_both_parents = attr.ib(default=True)
330 # Test delta base candidate group by chunk of this maximal size.
330 # Test delta base candidate group by chunk of this maximal size.
331 candidate_group_chunk_size = attr.ib(default=0)
331 candidate_group_chunk_size = attr.ib(default=0)
332 # Should we display debug information about delta computation
332 # Should we display debug information about delta computation
333 debug_delta = attr.ib(default=False)
333 debug_delta = attr.ib(default=False)
334 # trust incoming delta by default
334 # trust incoming delta by default
335 lazy_delta = attr.ib(default=True)
335 lazy_delta = attr.ib(default=True)
336 # trust the base of incoming delta by default
336 # trust the base of incoming delta by default
337 lazy_delta_base = attr.ib(default=False)
337 lazy_delta_base = attr.ib(default=False)
338
338
339
339
340 class _InnerRevlog:
340 class _InnerRevlog:
341 """An inner layer of the revlog object
341 """An inner layer of the revlog object
342
342
343 That layer exist to be able to delegate some operation to Rust, its
343 That layer exist to be able to delegate some operation to Rust, its
344 boundaries are arbitrary and based on what we can delegate to Rust.
344 boundaries are arbitrary and based on what we can delegate to Rust.
345 """
345 """
346
346
347 def __init__(
347 def __init__(
348 self,
348 self,
349 opener,
349 opener,
350 index,
350 index,
351 index_file,
351 index_file,
352 data_file,
352 data_file,
353 sidedata_file,
353 sidedata_file,
354 inline,
354 inline,
355 data_config,
355 data_config,
356 chunk_cache,
356 chunk_cache,
357 ):
357 ):
358 self.opener = opener
358 self.opener = opener
359 self.index = index
359 self.index = index
360
360
361 self.index_file = index_file
361 self.__index_file = index_file
362 self.data_file = data_file
362 self.data_file = data_file
363 self.sidedata_file = sidedata_file
363 self.sidedata_file = sidedata_file
364 self.inline = inline
364 self.inline = inline
365 self.data_config = data_config
365 self.data_config = data_config
366
366
367 # index
367 # index
368
368
369 # 3-tuple of file handles being used for active writing.
369 # 3-tuple of file handles being used for active writing.
370 self._writinghandles = None
370 self._writinghandles = None
371
371
372 self._segmentfile = randomaccessfile.randomaccessfile(
372 self._segmentfile = randomaccessfile.randomaccessfile(
373 self.opener,
373 self.opener,
374 (self.index_file if self.inline else self.data_file),
374 (self.index_file if self.inline else self.data_file),
375 self.data_config.chunk_cache_size,
375 self.data_config.chunk_cache_size,
376 chunk_cache,
376 chunk_cache,
377 )
377 )
378 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
378 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
379 self.opener,
379 self.opener,
380 self.sidedata_file,
380 self.sidedata_file,
381 self.data_config.chunk_cache_size,
381 self.data_config.chunk_cache_size,
382 )
382 )
383
383
384 @property
385 def index_file(self):
386 return self.__index_file
387
388 @index_file.setter
389 def index_file(self, new_index_file):
390 self.__index_file = new_index_file
391 if self.inline:
392 self._segmentfile.filename = new_index_file
393
384 # Derived from index values.
394 # Derived from index values.
385
395
386 def start(self, rev):
396 def start(self, rev):
387 """the offset of the data chunk for this revision"""
397 """the offset of the data chunk for this revision"""
388 return int(self.index[rev][0] >> 16)
398 return int(self.index[rev][0] >> 16)
389
399
390 def length(self, rev):
400 def length(self, rev):
391 """the length of the data chunk for this revision"""
401 """the length of the data chunk for this revision"""
392 return self.index[rev][1]
402 return self.index[rev][1]
393
403
394 def end(self, rev):
404 def end(self, rev):
395 """the end of the data chunk for this revision"""
405 """the end of the data chunk for this revision"""
396 return self.start(rev) + self.length(rev)
406 return self.start(rev) + self.length(rev)
397
407
398 @contextlib.contextmanager
408 @contextlib.contextmanager
399 def reading(self):
409 def reading(self):
400 """Context manager that keeps data and sidedata files open for reading"""
410 """Context manager that keeps data and sidedata files open for reading"""
401 if len(self.index) == 0:
411 if len(self.index) == 0:
402 yield # nothing to be read
412 yield # nothing to be read
403 else:
413 else:
404 with self._segmentfile.reading():
414 with self._segmentfile.reading():
405 with self._segmentfile_sidedata.reading():
415 with self._segmentfile_sidedata.reading():
406 yield
416 yield
407
417
408 @property
418 @property
409 def is_writing(self):
419 def is_writing(self):
410 """True is a writing context is open"""
420 """True is a writing context is open"""
411 return self._writinghandles is not None
421 return self._writinghandles is not None
412
422
413 @contextlib.contextmanager
423 @contextlib.contextmanager
414 def writing(self, transaction, data_end=None, sidedata_end=None):
424 def writing(self, transaction, data_end=None, sidedata_end=None):
415 """Open the revlog files for writing
425 """Open the revlog files for writing
416
426
417 Add content to a revlog should be done within such context.
427 Add content to a revlog should be done within such context.
418 """
428 """
419 if self.is_writing:
429 if self.is_writing:
420 yield
430 yield
421 else:
431 else:
422 ifh = dfh = sdfh = None
432 ifh = dfh = sdfh = None
423 try:
433 try:
424 r = len(self.index)
434 r = len(self.index)
425 # opening the data file.
435 # opening the data file.
426 dsize = 0
436 dsize = 0
427 if r:
437 if r:
428 dsize = self.end(r - 1)
438 dsize = self.end(r - 1)
429 dfh = None
439 dfh = None
430 if not self.inline:
440 if not self.inline:
431 try:
441 try:
432 dfh = self.opener(self.data_file, mode=b"r+")
442 dfh = self.opener(self.data_file, mode=b"r+")
433 if data_end is None:
443 if data_end is None:
434 dfh.seek(0, os.SEEK_END)
444 dfh.seek(0, os.SEEK_END)
435 else:
445 else:
436 dfh.seek(data_end, os.SEEK_SET)
446 dfh.seek(data_end, os.SEEK_SET)
437 except FileNotFoundError:
447 except FileNotFoundError:
438 dfh = self.opener(self.data_file, mode=b"w+")
448 dfh = self.opener(self.data_file, mode=b"w+")
439 transaction.add(self.data_file, dsize)
449 transaction.add(self.data_file, dsize)
440 if self.sidedata_file is not None:
450 if self.sidedata_file is not None:
441 assert sidedata_end is not None
451 assert sidedata_end is not None
442 # revlog-v2 does not inline, help Pytype
452 # revlog-v2 does not inline, help Pytype
443 assert dfh is not None
453 assert dfh is not None
444 try:
454 try:
445 sdfh = self.opener(self.sidedata_file, mode=b"r+")
455 sdfh = self.opener(self.sidedata_file, mode=b"r+")
446 dfh.seek(sidedata_end, os.SEEK_SET)
456 dfh.seek(sidedata_end, os.SEEK_SET)
447 except FileNotFoundError:
457 except FileNotFoundError:
448 sdfh = self.opener(self.sidedata_file, mode=b"w+")
458 sdfh = self.opener(self.sidedata_file, mode=b"w+")
449 transaction.add(self.sidedata_file, sidedata_end)
459 transaction.add(self.sidedata_file, sidedata_end)
450
460
451 # opening the index file.
461 # opening the index file.
452 isize = r * self.index.entry_size
462 isize = r * self.index.entry_size
453 ifh = self.__index_write_fp()
463 ifh = self.__index_write_fp()
454 if self.inline:
464 if self.inline:
455 transaction.add(self.index_file, dsize + isize)
465 transaction.add(self.index_file, dsize + isize)
456 else:
466 else:
457 transaction.add(self.index_file, isize)
467 transaction.add(self.index_file, isize)
458 # exposing all file handle for writing.
468 # exposing all file handle for writing.
459 self._writinghandles = (ifh, dfh, sdfh)
469 self._writinghandles = (ifh, dfh, sdfh)
460 self._segmentfile.writing_handle = ifh if self.inline else dfh
470 self._segmentfile.writing_handle = ifh if self.inline else dfh
461 self._segmentfile_sidedata.writing_handle = sdfh
471 self._segmentfile_sidedata.writing_handle = sdfh
462 yield
472 yield
463 finally:
473 finally:
464 self._writinghandles = None
474 self._writinghandles = None
465 self._segmentfile.writing_handle = None
475 self._segmentfile.writing_handle = None
466 self._segmentfile_sidedata.writing_handle = None
476 self._segmentfile_sidedata.writing_handle = None
467 if dfh is not None:
477 if dfh is not None:
468 dfh.close()
478 dfh.close()
469 if sdfh is not None:
479 if sdfh is not None:
470 sdfh.close()
480 sdfh.close()
471 # closing the index file last to avoid exposing referent to
481 # closing the index file last to avoid exposing referent to
472 # potential unflushed data content.
482 # potential unflushed data content.
473 if ifh is not None:
483 if ifh is not None:
474 ifh.close()
484 ifh.close()
475
485
476 def __index_write_fp(self, index_end=None):
486 def __index_write_fp(self, index_end=None):
477 """internal method to open the index file for writing
487 """internal method to open the index file for writing
478
488
479 You should not use this directly and use `_writing` instead
489 You should not use this directly and use `_writing` instead
480 """
490 """
481 try:
491 try:
482 f = self.opener(
492 f = self.opener(
483 self.index_file,
493 self.index_file,
484 mode=b"r+",
494 mode=b"r+",
485 checkambig=self.data_config.check_ambig,
495 checkambig=self.data_config.check_ambig,
486 )
496 )
487 if index_end is None:
497 if index_end is None:
488 f.seek(0, os.SEEK_END)
498 f.seek(0, os.SEEK_END)
489 else:
499 else:
490 f.seek(index_end, os.SEEK_SET)
500 f.seek(index_end, os.SEEK_SET)
491 return f
501 return f
492 except FileNotFoundError:
502 except FileNotFoundError:
493 return self.opener(
503 return self.opener(
494 self.index_file,
504 self.index_file,
495 mode=b"w+",
505 mode=b"w+",
496 checkambig=self.data_config.check_ambig,
506 checkambig=self.data_config.check_ambig,
497 )
507 )
498
508
499 def __index_new_fp(self):
509 def __index_new_fp(self):
500 """internal method to create a new index file for writing
510 """internal method to create a new index file for writing
501
511
502 You should not use this unless you are upgrading from inline revlog
512 You should not use this unless you are upgrading from inline revlog
503 """
513 """
504 return self.opener(
514 return self.opener(
505 self.index_file,
515 self.index_file,
506 mode=b"w",
516 mode=b"w",
507 checkambig=self.data_config.check_ambig,
517 checkambig=self.data_config.check_ambig,
508 atomictemp=True,
518 atomictemp=True,
509 )
519 )
510
520
511 def get_segment_for_revs(self, startrev, endrev):
521 def get_segment_for_revs(self, startrev, endrev):
512 """Obtain a segment of raw data corresponding to a range of revisions.
522 """Obtain a segment of raw data corresponding to a range of revisions.
513
523
514 Accepts the start and end revisions and an optional already-open
524 Accepts the start and end revisions and an optional already-open
515 file handle to be used for reading. If the file handle is read, its
525 file handle to be used for reading. If the file handle is read, its
516 seek position will not be preserved.
526 seek position will not be preserved.
517
527
518 Requests for data may be satisfied by a cache.
528 Requests for data may be satisfied by a cache.
519
529
520 Returns a 2-tuple of (offset, data) for the requested range of
530 Returns a 2-tuple of (offset, data) for the requested range of
521 revisions. Offset is the integer offset from the beginning of the
531 revisions. Offset is the integer offset from the beginning of the
522 revlog and data is a str or buffer of the raw byte data.
532 revlog and data is a str or buffer of the raw byte data.
523
533
524 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
534 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
525 to determine where each revision's data begins and ends.
535 to determine where each revision's data begins and ends.
526
536
527 API: we should consider making this a private part of the InnerRevlog
537 API: we should consider making this a private part of the InnerRevlog
528 at some point.
538 at some point.
529 """
539 """
530 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
540 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
531 # (functions are expensive).
541 # (functions are expensive).
532 index = self.index
542 index = self.index
533 istart = index[startrev]
543 istart = index[startrev]
534 start = int(istart[0] >> 16)
544 start = int(istart[0] >> 16)
535 if startrev == endrev:
545 if startrev == endrev:
536 end = start + istart[1]
546 end = start + istart[1]
537 else:
547 else:
538 iend = index[endrev]
548 iend = index[endrev]
539 end = int(iend[0] >> 16) + iend[1]
549 end = int(iend[0] >> 16) + iend[1]
540
550
541 if self.inline:
551 if self.inline:
542 start += (startrev + 1) * self.index.entry_size
552 start += (startrev + 1) * self.index.entry_size
543 end += (endrev + 1) * self.index.entry_size
553 end += (endrev + 1) * self.index.entry_size
544 length = end - start
554 length = end - start
545
555
546 return start, self._segmentfile.read_chunk(start, length)
556 return start, self._segmentfile.read_chunk(start, length)
547
557
548
558
549 class revlog:
559 class revlog:
550 """
560 """
551 the underlying revision storage object
561 the underlying revision storage object
552
562
553 A revlog consists of two parts, an index and the revision data.
563 A revlog consists of two parts, an index and the revision data.
554
564
555 The index is a file with a fixed record size containing
565 The index is a file with a fixed record size containing
556 information on each revision, including its nodeid (hash), the
566 information on each revision, including its nodeid (hash), the
557 nodeids of its parents, the position and offset of its data within
567 nodeids of its parents, the position and offset of its data within
558 the data file, and the revision it's based on. Finally, each entry
568 the data file, and the revision it's based on. Finally, each entry
559 contains a linkrev entry that can serve as a pointer to external
569 contains a linkrev entry that can serve as a pointer to external
560 data.
570 data.
561
571
562 The revision data itself is a linear collection of data chunks.
572 The revision data itself is a linear collection of data chunks.
563 Each chunk represents a revision and is usually represented as a
573 Each chunk represents a revision and is usually represented as a
564 delta against the previous chunk. To bound lookup time, runs of
574 delta against the previous chunk. To bound lookup time, runs of
565 deltas are limited to about 2 times the length of the original
575 deltas are limited to about 2 times the length of the original
566 version data. This makes retrieval of a version proportional to
576 version data. This makes retrieval of a version proportional to
567 its size, or O(1) relative to the number of revisions.
577 its size, or O(1) relative to the number of revisions.
568
578
569 Both pieces of the revlog are written to in an append-only
579 Both pieces of the revlog are written to in an append-only
570 fashion, which means we never need to rewrite a file to insert or
580 fashion, which means we never need to rewrite a file to insert or
571 remove data, and can use some simple techniques to avoid the need
581 remove data, and can use some simple techniques to avoid the need
572 for locking while reading.
582 for locking while reading.
573
583
574 If checkambig, indexfile is opened with checkambig=True at
584 If checkambig, indexfile is opened with checkambig=True at
575 writing, to avoid file stat ambiguity.
585 writing, to avoid file stat ambiguity.
576
586
577 If mmaplargeindex is True, and an mmapindexthreshold is set, the
587 If mmaplargeindex is True, and an mmapindexthreshold is set, the
578 index will be mmapped rather than read if it is larger than the
588 index will be mmapped rather than read if it is larger than the
579 configured threshold.
589 configured threshold.
580
590
581 If censorable is True, the revlog can have censored revisions.
591 If censorable is True, the revlog can have censored revisions.
582
592
583 If `upperboundcomp` is not None, this is the expected maximal gain from
593 If `upperboundcomp` is not None, this is the expected maximal gain from
584 compression for the data content.
594 compression for the data content.
585
595
586 `concurrencychecker` is an optional function that receives 3 arguments: a
596 `concurrencychecker` is an optional function that receives 3 arguments: a
587 file handle, a filename, and an expected position. It should check whether
597 file handle, a filename, and an expected position. It should check whether
588 the current position in the file handle is valid, and log/warn/fail (by
598 the current position in the file handle is valid, and log/warn/fail (by
589 raising).
599 raising).
590
600
591 See mercurial/revlogutils/contants.py for details about the content of an
601 See mercurial/revlogutils/contants.py for details about the content of an
592 index entry.
602 index entry.
593 """
603 """
594
604
595 _flagserrorclass = error.RevlogError
605 _flagserrorclass = error.RevlogError
596
606
597 @staticmethod
607 @staticmethod
598 def is_inline_index(header_bytes):
608 def is_inline_index(header_bytes):
599 """Determine if a revlog is inline from the initial bytes of the index"""
609 """Determine if a revlog is inline from the initial bytes of the index"""
600 header = INDEX_HEADER.unpack(header_bytes)[0]
610 header = INDEX_HEADER.unpack(header_bytes)[0]
601
611
602 _format_flags = header & ~0xFFFF
612 _format_flags = header & ~0xFFFF
603 _format_version = header & 0xFFFF
613 _format_version = header & 0xFFFF
604
614
605 features = FEATURES_BY_VERSION[_format_version]
615 features = FEATURES_BY_VERSION[_format_version]
606 return features[b'inline'](_format_flags)
616 return features[b'inline'](_format_flags)
607
617
608 def __init__(
618 def __init__(
609 self,
619 self,
610 opener,
620 opener,
611 target,
621 target,
612 radix,
622 radix,
613 postfix=None, # only exist for `tmpcensored` now
623 postfix=None, # only exist for `tmpcensored` now
614 checkambig=False,
624 checkambig=False,
615 mmaplargeindex=False,
625 mmaplargeindex=False,
616 censorable=False,
626 censorable=False,
617 upperboundcomp=None,
627 upperboundcomp=None,
618 persistentnodemap=False,
628 persistentnodemap=False,
619 concurrencychecker=None,
629 concurrencychecker=None,
620 trypending=False,
630 trypending=False,
621 try_split=False,
631 try_split=False,
622 canonical_parent_order=True,
632 canonical_parent_order=True,
623 ):
633 ):
624 """
634 """
625 create a revlog object
635 create a revlog object
626
636
627 opener is a function that abstracts the file opening operation
637 opener is a function that abstracts the file opening operation
628 and can be used to implement COW semantics or the like.
638 and can be used to implement COW semantics or the like.
629
639
630 `target`: a (KIND, ID) tuple that identify the content stored in
640 `target`: a (KIND, ID) tuple that identify the content stored in
631 this revlog. It help the rest of the code to understand what the revlog
641 this revlog. It help the rest of the code to understand what the revlog
632 is about without having to resort to heuristic and index filename
642 is about without having to resort to heuristic and index filename
633 analysis. Note: that this must be reliably be set by normal code, but
643 analysis. Note: that this must be reliably be set by normal code, but
634 that test, debug, or performance measurement code might not set this to
644 that test, debug, or performance measurement code might not set this to
635 accurate value.
645 accurate value.
636 """
646 """
637
647
638 self.radix = radix
648 self.radix = radix
639
649
640 self._docket_file = None
650 self._docket_file = None
641 self._indexfile = None
651 self._indexfile = None
642 self._datafile = None
652 self._datafile = None
643 self._sidedatafile = None
653 self._sidedatafile = None
644 self._nodemap_file = None
654 self._nodemap_file = None
645 self.postfix = postfix
655 self.postfix = postfix
646 self._trypending = trypending
656 self._trypending = trypending
647 self._try_split = try_split
657 self._try_split = try_split
648 self.opener = opener
658 self.opener = opener
649 if persistentnodemap:
659 if persistentnodemap:
650 self._nodemap_file = nodemaputil.get_nodemap_file(self)
660 self._nodemap_file = nodemaputil.get_nodemap_file(self)
651
661
652 assert target[0] in ALL_KINDS
662 assert target[0] in ALL_KINDS
653 assert len(target) == 2
663 assert len(target) == 2
654 self.target = target
664 self.target = target
655 if b'feature-config' in self.opener.options:
665 if b'feature-config' in self.opener.options:
656 self.feature_config = self.opener.options[b'feature-config'].copy()
666 self.feature_config = self.opener.options[b'feature-config'].copy()
657 else:
667 else:
658 self.feature_config = FeatureConfig()
668 self.feature_config = FeatureConfig()
659 self.feature_config.censorable = censorable
669 self.feature_config.censorable = censorable
660 self.feature_config.canonical_parent_order = canonical_parent_order
670 self.feature_config.canonical_parent_order = canonical_parent_order
661 if b'data-config' in self.opener.options:
671 if b'data-config' in self.opener.options:
662 self.data_config = self.opener.options[b'data-config'].copy()
672 self.data_config = self.opener.options[b'data-config'].copy()
663 else:
673 else:
664 self.data_config = DataConfig()
674 self.data_config = DataConfig()
665 self.data_config.check_ambig = checkambig
675 self.data_config.check_ambig = checkambig
666 self.data_config.mmap_large_index = mmaplargeindex
676 self.data_config.mmap_large_index = mmaplargeindex
667 if b'delta-config' in self.opener.options:
677 if b'delta-config' in self.opener.options:
668 self.delta_config = self.opener.options[b'delta-config'].copy()
678 self.delta_config = self.opener.options[b'delta-config'].copy()
669 else:
679 else:
670 self.delta_config = DeltaConfig()
680 self.delta_config = DeltaConfig()
671 self.delta_config.upper_bound_comp = upperboundcomp
681 self.delta_config.upper_bound_comp = upperboundcomp
672
682
673 # 3-tuple of (node, rev, text) for a raw revision.
683 # 3-tuple of (node, rev, text) for a raw revision.
674 self._revisioncache = None
684 self._revisioncache = None
675 # Maps rev to chain base rev.
685 # Maps rev to chain base rev.
676 self._chainbasecache = util.lrucachedict(100)
686 self._chainbasecache = util.lrucachedict(100)
677
687
678 self.index = None
688 self.index = None
679 self._docket = None
689 self._docket = None
680 self._nodemap_docket = None
690 self._nodemap_docket = None
681 # Mapping of partial identifiers to full nodes.
691 # Mapping of partial identifiers to full nodes.
682 self._pcache = {}
692 self._pcache = {}
683
693
684 # other optionnals features
694 # other optionnals features
685
695
686 # Make copy of flag processors so each revlog instance can support
696 # Make copy of flag processors so each revlog instance can support
687 # custom flags.
697 # custom flags.
688 self._flagprocessors = dict(flagutil.flagprocessors)
698 self._flagprocessors = dict(flagutil.flagprocessors)
689 # prevent nesting of addgroup
699 # prevent nesting of addgroup
690 self._adding_group = None
700 self._adding_group = None
691
701
692 chunk_cache = self._loadindex()
702 chunk_cache = self._loadindex()
693 self._load_inner(chunk_cache)
703 self._load_inner(chunk_cache)
694
704
695 self._concurrencychecker = concurrencychecker
705 self._concurrencychecker = concurrencychecker
696
706
697 @property
707 @property
698 def _generaldelta(self):
708 def _generaldelta(self):
699 """temporary compatibility proxy"""
709 """temporary compatibility proxy"""
700 util.nouideprecwarn(
710 util.nouideprecwarn(
701 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
711 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
702 )
712 )
703 return self.delta_config.general_delta
713 return self.delta_config.general_delta
704
714
705 @property
715 @property
706 def _checkambig(self):
716 def _checkambig(self):
707 """temporary compatibility proxy"""
717 """temporary compatibility proxy"""
708 util.nouideprecwarn(
718 util.nouideprecwarn(
709 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
719 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
710 )
720 )
711 return self.data_config.check_ambig
721 return self.data_config.check_ambig
712
722
713 @property
723 @property
714 def _mmaplargeindex(self):
724 def _mmaplargeindex(self):
715 """temporary compatibility proxy"""
725 """temporary compatibility proxy"""
716 util.nouideprecwarn(
726 util.nouideprecwarn(
717 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
727 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
718 )
728 )
719 return self.data_config.mmap_large_index
729 return self.data_config.mmap_large_index
720
730
721 @property
731 @property
722 def _censorable(self):
732 def _censorable(self):
723 """temporary compatibility proxy"""
733 """temporary compatibility proxy"""
724 util.nouideprecwarn(
734 util.nouideprecwarn(
725 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
735 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
726 )
736 )
727 return self.feature_config.censorable
737 return self.feature_config.censorable
728
738
729 @property
739 @property
730 def _chunkcachesize(self):
740 def _chunkcachesize(self):
731 """temporary compatibility proxy"""
741 """temporary compatibility proxy"""
732 util.nouideprecwarn(
742 util.nouideprecwarn(
733 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
743 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
734 )
744 )
735 return self.data_config.chunk_cache_size
745 return self.data_config.chunk_cache_size
736
746
737 @property
747 @property
738 def _maxchainlen(self):
748 def _maxchainlen(self):
739 """temporary compatibility proxy"""
749 """temporary compatibility proxy"""
740 util.nouideprecwarn(
750 util.nouideprecwarn(
741 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
751 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
742 )
752 )
743 return self.delta_config.max_chain_len
753 return self.delta_config.max_chain_len
744
754
745 @property
755 @property
746 def _deltabothparents(self):
756 def _deltabothparents(self):
747 """temporary compatibility proxy"""
757 """temporary compatibility proxy"""
748 util.nouideprecwarn(
758 util.nouideprecwarn(
749 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
759 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
750 )
760 )
751 return self.delta_config.delta_both_parents
761 return self.delta_config.delta_both_parents
752
762
753 @property
763 @property
754 def _candidate_group_chunk_size(self):
764 def _candidate_group_chunk_size(self):
755 """temporary compatibility proxy"""
765 """temporary compatibility proxy"""
756 util.nouideprecwarn(
766 util.nouideprecwarn(
757 b"use revlog.delta_config.candidate_group_chunk_size",
767 b"use revlog.delta_config.candidate_group_chunk_size",
758 b"6.6",
768 b"6.6",
759 stacklevel=2,
769 stacklevel=2,
760 )
770 )
761 return self.delta_config.candidate_group_chunk_size
771 return self.delta_config.candidate_group_chunk_size
762
772
763 @property
773 @property
764 def _debug_delta(self):
774 def _debug_delta(self):
765 """temporary compatibility proxy"""
775 """temporary compatibility proxy"""
766 util.nouideprecwarn(
776 util.nouideprecwarn(
767 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
777 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
768 )
778 )
769 return self.delta_config.debug_delta
779 return self.delta_config.debug_delta
770
780
771 @property
781 @property
772 def _compengine(self):
782 def _compengine(self):
773 """temporary compatibility proxy"""
783 """temporary compatibility proxy"""
774 util.nouideprecwarn(
784 util.nouideprecwarn(
775 b"use revlog.feature_config.compression_engine",
785 b"use revlog.feature_config.compression_engine",
776 b"6.6",
786 b"6.6",
777 stacklevel=2,
787 stacklevel=2,
778 )
788 )
779 return self.feature_config.compression_engine
789 return self.feature_config.compression_engine
780
790
781 @property
791 @property
782 def upperboundcomp(self):
792 def upperboundcomp(self):
783 """temporary compatibility proxy"""
793 """temporary compatibility proxy"""
784 util.nouideprecwarn(
794 util.nouideprecwarn(
785 b"use revlog.delta_config.upper_bound_comp",
795 b"use revlog.delta_config.upper_bound_comp",
786 b"6.6",
796 b"6.6",
787 stacklevel=2,
797 stacklevel=2,
788 )
798 )
789 return self.delta_config.upper_bound_comp
799 return self.delta_config.upper_bound_comp
790
800
791 @property
801 @property
792 def _compengineopts(self):
802 def _compengineopts(self):
793 """temporary compatibility proxy"""
803 """temporary compatibility proxy"""
794 util.nouideprecwarn(
804 util.nouideprecwarn(
795 b"use revlog.feature_config.compression_engine_options",
805 b"use revlog.feature_config.compression_engine_options",
796 b"6.6",
806 b"6.6",
797 stacklevel=2,
807 stacklevel=2,
798 )
808 )
799 return self.feature_config.compression_engine_options
809 return self.feature_config.compression_engine_options
800
810
801 @property
811 @property
802 def _maxdeltachainspan(self):
812 def _maxdeltachainspan(self):
803 """temporary compatibility proxy"""
813 """temporary compatibility proxy"""
804 util.nouideprecwarn(
814 util.nouideprecwarn(
805 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
815 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
806 )
816 )
807 return self.delta_config.max_deltachain_span
817 return self.delta_config.max_deltachain_span
808
818
809 @property
819 @property
810 def _withsparseread(self):
820 def _withsparseread(self):
811 """temporary compatibility proxy"""
821 """temporary compatibility proxy"""
812 util.nouideprecwarn(
822 util.nouideprecwarn(
813 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
823 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
814 )
824 )
815 return self.data_config.with_sparse_read
825 return self.data_config.with_sparse_read
816
826
817 @property
827 @property
818 def _sparserevlog(self):
828 def _sparserevlog(self):
819 """temporary compatibility proxy"""
829 """temporary compatibility proxy"""
820 util.nouideprecwarn(
830 util.nouideprecwarn(
821 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
831 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
822 )
832 )
823 return self.delta_config.sparse_revlog
833 return self.delta_config.sparse_revlog
824
834
825 @property
835 @property
826 def hassidedata(self):
836 def hassidedata(self):
827 """temporary compatibility proxy"""
837 """temporary compatibility proxy"""
828 util.nouideprecwarn(
838 util.nouideprecwarn(
829 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
839 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
830 )
840 )
831 return self.feature_config.has_side_data
841 return self.feature_config.has_side_data
832
842
833 @property
843 @property
834 def _srdensitythreshold(self):
844 def _srdensitythreshold(self):
835 """temporary compatibility proxy"""
845 """temporary compatibility proxy"""
836 util.nouideprecwarn(
846 util.nouideprecwarn(
837 b"use revlog.data_config.sr_density_threshold",
847 b"use revlog.data_config.sr_density_threshold",
838 b"6.6",
848 b"6.6",
839 stacklevel=2,
849 stacklevel=2,
840 )
850 )
841 return self.data_config.sr_density_threshold
851 return self.data_config.sr_density_threshold
842
852
843 @property
853 @property
844 def _srmingapsize(self):
854 def _srmingapsize(self):
845 """temporary compatibility proxy"""
855 """temporary compatibility proxy"""
846 util.nouideprecwarn(
856 util.nouideprecwarn(
847 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
857 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
848 )
858 )
849 return self.data_config.sr_min_gap_size
859 return self.data_config.sr_min_gap_size
850
860
851 @property
861 @property
852 def _compute_rank(self):
862 def _compute_rank(self):
853 """temporary compatibility proxy"""
863 """temporary compatibility proxy"""
854 util.nouideprecwarn(
864 util.nouideprecwarn(
855 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
865 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
856 )
866 )
857 return self.feature_config.compute_rank
867 return self.feature_config.compute_rank
858
868
859 @property
869 @property
860 def canonical_parent_order(self):
870 def canonical_parent_order(self):
861 """temporary compatibility proxy"""
871 """temporary compatibility proxy"""
862 util.nouideprecwarn(
872 util.nouideprecwarn(
863 b"use revlog.feature_config.canonical_parent_order",
873 b"use revlog.feature_config.canonical_parent_order",
864 b"6.6",
874 b"6.6",
865 stacklevel=2,
875 stacklevel=2,
866 )
876 )
867 return self.feature_config.canonical_parent_order
877 return self.feature_config.canonical_parent_order
868
878
869 @property
879 @property
870 def _lazydelta(self):
880 def _lazydelta(self):
871 """temporary compatibility proxy"""
881 """temporary compatibility proxy"""
872 util.nouideprecwarn(
882 util.nouideprecwarn(
873 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
883 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
874 )
884 )
875 return self.delta_config.lazy_delta
885 return self.delta_config.lazy_delta
876
886
877 @property
887 @property
878 def _lazydeltabase(self):
888 def _lazydeltabase(self):
879 """temporary compatibility proxy"""
889 """temporary compatibility proxy"""
880 util.nouideprecwarn(
890 util.nouideprecwarn(
881 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
891 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
882 )
892 )
883 return self.delta_config.lazy_delta_base
893 return self.delta_config.lazy_delta_base
884
894
885 def _init_opts(self):
895 def _init_opts(self):
886 """process options (from above/config) to setup associated default revlog mode
896 """process options (from above/config) to setup associated default revlog mode
887
897
888 These values might be affected when actually reading on disk information.
898 These values might be affected when actually reading on disk information.
889
899
890 The relevant values are returned for use in _loadindex().
900 The relevant values are returned for use in _loadindex().
891
901
892 * newversionflags:
902 * newversionflags:
893 version header to use if we need to create a new revlog
903 version header to use if we need to create a new revlog
894
904
895 * mmapindexthreshold:
905 * mmapindexthreshold:
896 minimal index size for start to use mmap
906 minimal index size for start to use mmap
897
907
898 * force_nodemap:
908 * force_nodemap:
899 force the usage of a "development" version of the nodemap code
909 force the usage of a "development" version of the nodemap code
900 """
910 """
901 opts = self.opener.options
911 opts = self.opener.options
902
912
903 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
913 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
904 new_header = CHANGELOGV2
914 new_header = CHANGELOGV2
905 compute_rank = opts.get(b'changelogv2.compute-rank', True)
915 compute_rank = opts.get(b'changelogv2.compute-rank', True)
906 self.feature_config.compute_rank = compute_rank
916 self.feature_config.compute_rank = compute_rank
907 elif b'revlogv2' in opts:
917 elif b'revlogv2' in opts:
908 new_header = REVLOGV2
918 new_header = REVLOGV2
909 elif b'revlogv1' in opts:
919 elif b'revlogv1' in opts:
910 new_header = REVLOGV1 | FLAG_INLINE_DATA
920 new_header = REVLOGV1 | FLAG_INLINE_DATA
911 if b'generaldelta' in opts:
921 if b'generaldelta' in opts:
912 new_header |= FLAG_GENERALDELTA
922 new_header |= FLAG_GENERALDELTA
913 elif b'revlogv0' in self.opener.options:
923 elif b'revlogv0' in self.opener.options:
914 new_header = REVLOGV0
924 new_header = REVLOGV0
915 else:
925 else:
916 new_header = REVLOG_DEFAULT_VERSION
926 new_header = REVLOG_DEFAULT_VERSION
917
927
918 mmapindexthreshold = None
928 mmapindexthreshold = None
919 if self.data_config.mmap_large_index:
929 if self.data_config.mmap_large_index:
920 mmapindexthreshold = self.data_config.mmap_index_threshold
930 mmapindexthreshold = self.data_config.mmap_index_threshold
921 if self.feature_config.enable_ellipsis:
931 if self.feature_config.enable_ellipsis:
922 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
932 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
923
933
924 # revlog v0 doesn't have flag processors
934 # revlog v0 doesn't have flag processors
925 for flag, processor in opts.get(b'flagprocessors', {}).items():
935 for flag, processor in opts.get(b'flagprocessors', {}).items():
926 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
936 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
927
937
928 chunk_cache_size = self.data_config.chunk_cache_size
938 chunk_cache_size = self.data_config.chunk_cache_size
929 if chunk_cache_size <= 0:
939 if chunk_cache_size <= 0:
930 raise error.RevlogError(
940 raise error.RevlogError(
931 _(b'revlog chunk cache size %r is not greater than 0')
941 _(b'revlog chunk cache size %r is not greater than 0')
932 % chunk_cache_size
942 % chunk_cache_size
933 )
943 )
934 elif chunk_cache_size & (chunk_cache_size - 1):
944 elif chunk_cache_size & (chunk_cache_size - 1):
935 raise error.RevlogError(
945 raise error.RevlogError(
936 _(b'revlog chunk cache size %r is not a power of 2')
946 _(b'revlog chunk cache size %r is not a power of 2')
937 % chunk_cache_size
947 % chunk_cache_size
938 )
948 )
939 force_nodemap = opts.get(b'devel-force-nodemap', False)
949 force_nodemap = opts.get(b'devel-force-nodemap', False)
940 return new_header, mmapindexthreshold, force_nodemap
950 return new_header, mmapindexthreshold, force_nodemap
941
951
942 def _get_data(self, filepath, mmap_threshold, size=None):
952 def _get_data(self, filepath, mmap_threshold, size=None):
943 """return a file content with or without mmap
953 """return a file content with or without mmap
944
954
945 If the file is missing return the empty string"""
955 If the file is missing return the empty string"""
946 try:
956 try:
947 with self.opener(filepath) as fp:
957 with self.opener(filepath) as fp:
948 if mmap_threshold is not None:
958 if mmap_threshold is not None:
949 file_size = self.opener.fstat(fp).st_size
959 file_size = self.opener.fstat(fp).st_size
950 if file_size >= mmap_threshold:
960 if file_size >= mmap_threshold:
951 if size is not None:
961 if size is not None:
952 # avoid potentiel mmap crash
962 # avoid potentiel mmap crash
953 size = min(file_size, size)
963 size = min(file_size, size)
954 # TODO: should .close() to release resources without
964 # TODO: should .close() to release resources without
955 # relying on Python GC
965 # relying on Python GC
956 if size is None:
966 if size is None:
957 return util.buffer(util.mmapread(fp))
967 return util.buffer(util.mmapread(fp))
958 else:
968 else:
959 return util.buffer(util.mmapread(fp, size))
969 return util.buffer(util.mmapread(fp, size))
960 if size is None:
970 if size is None:
961 return fp.read()
971 return fp.read()
962 else:
972 else:
963 return fp.read(size)
973 return fp.read(size)
964 except FileNotFoundError:
974 except FileNotFoundError:
965 return b''
975 return b''
966
976
967 def get_streams(self, max_linkrev, force_inline=False):
977 def get_streams(self, max_linkrev, force_inline=False):
968 """return a list of streams that represent this revlog
978 """return a list of streams that represent this revlog
969
979
970 This is used by stream-clone to do bytes to bytes copies of a repository.
980 This is used by stream-clone to do bytes to bytes copies of a repository.
971
981
972 This streams data for all revisions that refer to a changelog revision up
982 This streams data for all revisions that refer to a changelog revision up
973 to `max_linkrev`.
983 to `max_linkrev`.
974
984
975 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
985 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
976
986
977 It returns is a list of three-tuple:
987 It returns is a list of three-tuple:
978
988
979 [
989 [
980 (filename, bytes_stream, stream_size),
990 (filename, bytes_stream, stream_size),
981 …
991 …
982 ]
992 ]
983 """
993 """
984 n = len(self)
994 n = len(self)
985 index = self.index
995 index = self.index
986 while n > 0:
996 while n > 0:
987 linkrev = index[n - 1][4]
997 linkrev = index[n - 1][4]
988 if linkrev < max_linkrev:
998 if linkrev < max_linkrev:
989 break
999 break
990 # note: this loop will rarely go through multiple iterations, since
1000 # note: this loop will rarely go through multiple iterations, since
991 # it only traverses commits created during the current streaming
1001 # it only traverses commits created during the current streaming
992 # pull operation.
1002 # pull operation.
993 #
1003 #
994 # If this become a problem, using a binary search should cap the
1004 # If this become a problem, using a binary search should cap the
995 # runtime of this.
1005 # runtime of this.
996 n = n - 1
1006 n = n - 1
997 if n == 0:
1007 if n == 0:
998 # no data to send
1008 # no data to send
999 return []
1009 return []
1000 index_size = n * index.entry_size
1010 index_size = n * index.entry_size
1001 data_size = self.end(n - 1)
1011 data_size = self.end(n - 1)
1002
1012
1003 # XXX we might have been split (or stripped) since the object
1013 # XXX we might have been split (or stripped) since the object
1004 # initialization, We need to close this race too, but having a way to
1014 # initialization, We need to close this race too, but having a way to
1005 # pre-open the file we feed to the revlog and never closing them before
1015 # pre-open the file we feed to the revlog and never closing them before
1006 # we are done streaming.
1016 # we are done streaming.
1007
1017
1008 if self._inline:
1018 if self._inline:
1009
1019
1010 def get_stream():
1020 def get_stream():
1011 with self.opener(self._indexfile, mode=b"r") as fp:
1021 with self.opener(self._indexfile, mode=b"r") as fp:
1012 yield None
1022 yield None
1013 size = index_size + data_size
1023 size = index_size + data_size
1014 if size <= 65536:
1024 if size <= 65536:
1015 yield fp.read(size)
1025 yield fp.read(size)
1016 else:
1026 else:
1017 yield from util.filechunkiter(fp, limit=size)
1027 yield from util.filechunkiter(fp, limit=size)
1018
1028
1019 inline_stream = get_stream()
1029 inline_stream = get_stream()
1020 next(inline_stream)
1030 next(inline_stream)
1021 return [
1031 return [
1022 (self._indexfile, inline_stream, index_size + data_size),
1032 (self._indexfile, inline_stream, index_size + data_size),
1023 ]
1033 ]
1024 elif force_inline:
1034 elif force_inline:
1025
1035
1026 def get_stream():
1036 def get_stream():
1027 with self.reading():
1037 with self.reading():
1028 yield None
1038 yield None
1029
1039
1030 for rev in range(n):
1040 for rev in range(n):
1031 idx = self.index.entry_binary(rev)
1041 idx = self.index.entry_binary(rev)
1032 if rev == 0 and self._docket is None:
1042 if rev == 0 and self._docket is None:
1033 # re-inject the inline flag
1043 # re-inject the inline flag
1034 header = self._format_flags
1044 header = self._format_flags
1035 header |= self._format_version
1045 header |= self._format_version
1036 header |= FLAG_INLINE_DATA
1046 header |= FLAG_INLINE_DATA
1037 header = self.index.pack_header(header)
1047 header = self.index.pack_header(header)
1038 idx = header + idx
1048 idx = header + idx
1039 yield idx
1049 yield idx
1040 yield self._inner.get_segment_for_revs(rev, rev)[1]
1050 yield self._inner.get_segment_for_revs(rev, rev)[1]
1041
1051
1042 inline_stream = get_stream()
1052 inline_stream = get_stream()
1043 next(inline_stream)
1053 next(inline_stream)
1044 return [
1054 return [
1045 (self._indexfile, inline_stream, index_size + data_size),
1055 (self._indexfile, inline_stream, index_size + data_size),
1046 ]
1056 ]
1047 else:
1057 else:
1048
1058
1049 def get_index_stream():
1059 def get_index_stream():
1050 with self.opener(self._indexfile, mode=b"r") as fp:
1060 with self.opener(self._indexfile, mode=b"r") as fp:
1051 yield None
1061 yield None
1052 if index_size <= 65536:
1062 if index_size <= 65536:
1053 yield fp.read(index_size)
1063 yield fp.read(index_size)
1054 else:
1064 else:
1055 yield from util.filechunkiter(fp, limit=index_size)
1065 yield from util.filechunkiter(fp, limit=index_size)
1056
1066
1057 def get_data_stream():
1067 def get_data_stream():
1058 with self._datafp() as fp:
1068 with self._datafp() as fp:
1059 yield None
1069 yield None
1060 if data_size <= 65536:
1070 if data_size <= 65536:
1061 yield fp.read(data_size)
1071 yield fp.read(data_size)
1062 else:
1072 else:
1063 yield from util.filechunkiter(fp, limit=data_size)
1073 yield from util.filechunkiter(fp, limit=data_size)
1064
1074
1065 index_stream = get_index_stream()
1075 index_stream = get_index_stream()
1066 next(index_stream)
1076 next(index_stream)
1067 data_stream = get_data_stream()
1077 data_stream = get_data_stream()
1068 next(data_stream)
1078 next(data_stream)
1069 return [
1079 return [
1070 (self._datafile, data_stream, data_size),
1080 (self._datafile, data_stream, data_size),
1071 (self._indexfile, index_stream, index_size),
1081 (self._indexfile, index_stream, index_size),
1072 ]
1082 ]
1073
1083
1074 def _loadindex(self, docket=None):
1084 def _loadindex(self, docket=None):
1075
1085
1076 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1086 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1077
1087
1078 if self.postfix is not None:
1088 if self.postfix is not None:
1079 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1089 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1080 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1090 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1081 entry_point = b'%s.i.a' % self.radix
1091 entry_point = b'%s.i.a' % self.radix
1082 elif self._try_split and self.opener.exists(self._split_index_file):
1092 elif self._try_split and self.opener.exists(self._split_index_file):
1083 entry_point = self._split_index_file
1093 entry_point = self._split_index_file
1084 else:
1094 else:
1085 entry_point = b'%s.i' % self.radix
1095 entry_point = b'%s.i' % self.radix
1086
1096
1087 if docket is not None:
1097 if docket is not None:
1088 self._docket = docket
1098 self._docket = docket
1089 self._docket_file = entry_point
1099 self._docket_file = entry_point
1090 else:
1100 else:
1091 self._initempty = True
1101 self._initempty = True
1092 entry_data = self._get_data(entry_point, mmapindexthreshold)
1102 entry_data = self._get_data(entry_point, mmapindexthreshold)
1093 if len(entry_data) > 0:
1103 if len(entry_data) > 0:
1094 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1104 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1095 self._initempty = False
1105 self._initempty = False
1096 else:
1106 else:
1097 header = new_header
1107 header = new_header
1098
1108
1099 self._format_flags = header & ~0xFFFF
1109 self._format_flags = header & ~0xFFFF
1100 self._format_version = header & 0xFFFF
1110 self._format_version = header & 0xFFFF
1101
1111
1102 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1112 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1103 if supported_flags is None:
1113 if supported_flags is None:
1104 msg = _(b'unknown version (%d) in revlog %s')
1114 msg = _(b'unknown version (%d) in revlog %s')
1105 msg %= (self._format_version, self.display_id)
1115 msg %= (self._format_version, self.display_id)
1106 raise error.RevlogError(msg)
1116 raise error.RevlogError(msg)
1107 elif self._format_flags & ~supported_flags:
1117 elif self._format_flags & ~supported_flags:
1108 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1118 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1109 display_flag = self._format_flags >> 16
1119 display_flag = self._format_flags >> 16
1110 msg %= (display_flag, self._format_version, self.display_id)
1120 msg %= (display_flag, self._format_version, self.display_id)
1111 raise error.RevlogError(msg)
1121 raise error.RevlogError(msg)
1112
1122
1113 features = FEATURES_BY_VERSION[self._format_version]
1123 features = FEATURES_BY_VERSION[self._format_version]
1114 self._inline = features[b'inline'](self._format_flags)
1124 self._inline = features[b'inline'](self._format_flags)
1115 self.delta_config.general_delta = features[b'generaldelta'](
1125 self.delta_config.general_delta = features[b'generaldelta'](
1116 self._format_flags
1126 self._format_flags
1117 )
1127 )
1118 self.feature_config.has_side_data = features[b'sidedata']
1128 self.feature_config.has_side_data = features[b'sidedata']
1119
1129
1120 if not features[b'docket']:
1130 if not features[b'docket']:
1121 self._indexfile = entry_point
1131 self._indexfile = entry_point
1122 index_data = entry_data
1132 index_data = entry_data
1123 else:
1133 else:
1124 self._docket_file = entry_point
1134 self._docket_file = entry_point
1125 if self._initempty:
1135 if self._initempty:
1126 self._docket = docketutil.default_docket(self, header)
1136 self._docket = docketutil.default_docket(self, header)
1127 else:
1137 else:
1128 self._docket = docketutil.parse_docket(
1138 self._docket = docketutil.parse_docket(
1129 self, entry_data, use_pending=self._trypending
1139 self, entry_data, use_pending=self._trypending
1130 )
1140 )
1131
1141
1132 if self._docket is not None:
1142 if self._docket is not None:
1133 self._indexfile = self._docket.index_filepath()
1143 self._indexfile = self._docket.index_filepath()
1134 index_data = b''
1144 index_data = b''
1135 index_size = self._docket.index_end
1145 index_size = self._docket.index_end
1136 if index_size > 0:
1146 if index_size > 0:
1137 index_data = self._get_data(
1147 index_data = self._get_data(
1138 self._indexfile, mmapindexthreshold, size=index_size
1148 self._indexfile, mmapindexthreshold, size=index_size
1139 )
1149 )
1140 if len(index_data) < index_size:
1150 if len(index_data) < index_size:
1141 msg = _(b'too few index data for %s: got %d, expected %d')
1151 msg = _(b'too few index data for %s: got %d, expected %d')
1142 msg %= (self.display_id, len(index_data), index_size)
1152 msg %= (self.display_id, len(index_data), index_size)
1143 raise error.RevlogError(msg)
1153 raise error.RevlogError(msg)
1144
1154
1145 self._inline = False
1155 self._inline = False
1146 # generaldelta implied by version 2 revlogs.
1156 # generaldelta implied by version 2 revlogs.
1147 self.delta_config.general_delta = True
1157 self.delta_config.general_delta = True
1148 # the logic for persistent nodemap will be dealt with within the
1158 # the logic for persistent nodemap will be dealt with within the
1149 # main docket, so disable it for now.
1159 # main docket, so disable it for now.
1150 self._nodemap_file = None
1160 self._nodemap_file = None
1151
1161
1152 if self._docket is not None:
1162 if self._docket is not None:
1153 self._datafile = self._docket.data_filepath()
1163 self._datafile = self._docket.data_filepath()
1154 self._sidedatafile = self._docket.sidedata_filepath()
1164 self._sidedatafile = self._docket.sidedata_filepath()
1155 elif self.postfix is None:
1165 elif self.postfix is None:
1156 self._datafile = b'%s.d' % self.radix
1166 self._datafile = b'%s.d' % self.radix
1157 else:
1167 else:
1158 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1168 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1159
1169
1160 self.nodeconstants = sha1nodeconstants
1170 self.nodeconstants = sha1nodeconstants
1161 self.nullid = self.nodeconstants.nullid
1171 self.nullid = self.nodeconstants.nullid
1162
1172
1163 # sparse-revlog can't be on without general-delta (issue6056)
1173 # sparse-revlog can't be on without general-delta (issue6056)
1164 if not self.delta_config.general_delta:
1174 if not self.delta_config.general_delta:
1165 self.delta_config.sparse_revlog = False
1175 self.delta_config.sparse_revlog = False
1166
1176
1167 self._storedeltachains = True
1177 self._storedeltachains = True
1168
1178
1169 devel_nodemap = (
1179 devel_nodemap = (
1170 self._nodemap_file
1180 self._nodemap_file
1171 and force_nodemap
1181 and force_nodemap
1172 and parse_index_v1_nodemap is not None
1182 and parse_index_v1_nodemap is not None
1173 )
1183 )
1174
1184
1175 use_rust_index = False
1185 use_rust_index = False
1176 if rustrevlog is not None:
1186 if rustrevlog is not None:
1177 if self._nodemap_file is not None:
1187 if self._nodemap_file is not None:
1178 use_rust_index = True
1188 use_rust_index = True
1179 else:
1189 else:
1180 use_rust_index = self.opener.options.get(b'rust.index')
1190 use_rust_index = self.opener.options.get(b'rust.index')
1181
1191
1182 self._parse_index = parse_index_v1
1192 self._parse_index = parse_index_v1
1183 if self._format_version == REVLOGV0:
1193 if self._format_version == REVLOGV0:
1184 self._parse_index = revlogv0.parse_index_v0
1194 self._parse_index = revlogv0.parse_index_v0
1185 elif self._format_version == REVLOGV2:
1195 elif self._format_version == REVLOGV2:
1186 self._parse_index = parse_index_v2
1196 self._parse_index = parse_index_v2
1187 elif self._format_version == CHANGELOGV2:
1197 elif self._format_version == CHANGELOGV2:
1188 self._parse_index = parse_index_cl_v2
1198 self._parse_index = parse_index_cl_v2
1189 elif devel_nodemap:
1199 elif devel_nodemap:
1190 self._parse_index = parse_index_v1_nodemap
1200 self._parse_index = parse_index_v1_nodemap
1191 elif use_rust_index:
1201 elif use_rust_index:
1192 self._parse_index = parse_index_v1_mixed
1202 self._parse_index = parse_index_v1_mixed
1193 try:
1203 try:
1194 d = self._parse_index(index_data, self._inline)
1204 d = self._parse_index(index_data, self._inline)
1195 index, chunkcache = d
1205 index, chunkcache = d
1196 use_nodemap = (
1206 use_nodemap = (
1197 not self._inline
1207 not self._inline
1198 and self._nodemap_file is not None
1208 and self._nodemap_file is not None
1199 and hasattr(index, 'update_nodemap_data')
1209 and hasattr(index, 'update_nodemap_data')
1200 )
1210 )
1201 if use_nodemap:
1211 if use_nodemap:
1202 nodemap_data = nodemaputil.persisted_data(self)
1212 nodemap_data = nodemaputil.persisted_data(self)
1203 if nodemap_data is not None:
1213 if nodemap_data is not None:
1204 docket = nodemap_data[0]
1214 docket = nodemap_data[0]
1205 if (
1215 if (
1206 len(d[0]) > docket.tip_rev
1216 len(d[0]) > docket.tip_rev
1207 and d[0][docket.tip_rev][7] == docket.tip_node
1217 and d[0][docket.tip_rev][7] == docket.tip_node
1208 ):
1218 ):
1209 # no changelog tampering
1219 # no changelog tampering
1210 self._nodemap_docket = docket
1220 self._nodemap_docket = docket
1211 index.update_nodemap_data(*nodemap_data)
1221 index.update_nodemap_data(*nodemap_data)
1212 except (ValueError, IndexError):
1222 except (ValueError, IndexError):
1213 raise error.RevlogError(
1223 raise error.RevlogError(
1214 _(b"index %s is corrupted") % self.display_id
1224 _(b"index %s is corrupted") % self.display_id
1215 )
1225 )
1216 self.index = index
1226 self.index = index
1217 # revnum -> (chain-length, sum-delta-length)
1227 # revnum -> (chain-length, sum-delta-length)
1218 self._chaininfocache = util.lrucachedict(500)
1228 self._chaininfocache = util.lrucachedict(500)
1219 # revlog header -> revlog compressor
1229 # revlog header -> revlog compressor
1220 self._decompressors = {}
1230 self._decompressors = {}
1221
1231
1222 return chunkcache
1232 return chunkcache
1223
1233
1224 def _load_inner(self, chunk_cache):
1234 def _load_inner(self, chunk_cache):
1225 self._inner = _InnerRevlog(
1235 self._inner = _InnerRevlog(
1226 opener=self.opener,
1236 opener=self.opener,
1227 index=self.index,
1237 index=self.index,
1228 index_file=self._indexfile,
1238 index_file=self._indexfile,
1229 data_file=self._datafile,
1239 data_file=self._datafile,
1230 sidedata_file=self._sidedatafile,
1240 sidedata_file=self._sidedatafile,
1231 inline=self._inline,
1241 inline=self._inline,
1232 data_config=self.data_config,
1242 data_config=self.data_config,
1233 chunk_cache=chunk_cache,
1243 chunk_cache=chunk_cache,
1234 )
1244 )
1235
1245
1236 def get_revlog(self):
1246 def get_revlog(self):
1237 """simple function to mirror API of other not-really-revlog API"""
1247 """simple function to mirror API of other not-really-revlog API"""
1238 return self
1248 return self
1239
1249
1240 @util.propertycache
1250 @util.propertycache
1241 def revlog_kind(self):
1251 def revlog_kind(self):
1242 return self.target[0]
1252 return self.target[0]
1243
1253
1244 @util.propertycache
1254 @util.propertycache
1245 def display_id(self):
1255 def display_id(self):
1246 """The public facing "ID" of the revlog that we use in message"""
1256 """The public facing "ID" of the revlog that we use in message"""
1247 if self.revlog_kind == KIND_FILELOG:
1257 if self.revlog_kind == KIND_FILELOG:
1248 # Reference the file without the "data/" prefix, so it is familiar
1258 # Reference the file without the "data/" prefix, so it is familiar
1249 # to the user.
1259 # to the user.
1250 return self.target[1]
1260 return self.target[1]
1251 else:
1261 else:
1252 return self.radix
1262 return self.radix
1253
1263
1254 def _get_decompressor(self, t):
1264 def _get_decompressor(self, t):
1255 try:
1265 try:
1256 compressor = self._decompressors[t]
1266 compressor = self._decompressors[t]
1257 except KeyError:
1267 except KeyError:
1258 try:
1268 try:
1259 engine = util.compengines.forrevlogheader(t)
1269 engine = util.compengines.forrevlogheader(t)
1260 compressor = engine.revlogcompressor(
1270 compressor = engine.revlogcompressor(
1261 self.feature_config.compression_engine_options
1271 self.feature_config.compression_engine_options
1262 )
1272 )
1263 self._decompressors[t] = compressor
1273 self._decompressors[t] = compressor
1264 except KeyError:
1274 except KeyError:
1265 raise error.RevlogError(
1275 raise error.RevlogError(
1266 _(b'unknown compression type %s') % binascii.hexlify(t)
1276 _(b'unknown compression type %s') % binascii.hexlify(t)
1267 )
1277 )
1268 return compressor
1278 return compressor
1269
1279
1270 @util.propertycache
1280 @util.propertycache
1271 def _compressor(self):
1281 def _compressor(self):
1272 engine = util.compengines[self.feature_config.compression_engine]
1282 engine = util.compengines[self.feature_config.compression_engine]
1273 return engine.revlogcompressor(
1283 return engine.revlogcompressor(
1274 self.feature_config.compression_engine_options
1284 self.feature_config.compression_engine_options
1275 )
1285 )
1276
1286
1277 @util.propertycache
1287 @util.propertycache
1278 def _decompressor(self):
1288 def _decompressor(self):
1279 """the default decompressor"""
1289 """the default decompressor"""
1280 if self._docket is None:
1290 if self._docket is None:
1281 return None
1291 return None
1282 t = self._docket.default_compression_header
1292 t = self._docket.default_compression_header
1283 c = self._get_decompressor(t)
1293 c = self._get_decompressor(t)
1284 return c.decompress
1294 return c.decompress
1285
1295
1286 def _datafp(self, mode=b'r'):
1296 def _datafp(self, mode=b'r'):
1287 """file object for the revlog's data file"""
1297 """file object for the revlog's data file"""
1288 return self.opener(self._datafile, mode=mode)
1298 return self.opener(self._datafile, mode=mode)
1289
1299
1290 def tiprev(self):
1300 def tiprev(self):
1291 return len(self.index) - 1
1301 return len(self.index) - 1
1292
1302
1293 def tip(self):
1303 def tip(self):
1294 return self.node(self.tiprev())
1304 return self.node(self.tiprev())
1295
1305
1296 def __contains__(self, rev):
1306 def __contains__(self, rev):
1297 return 0 <= rev < len(self)
1307 return 0 <= rev < len(self)
1298
1308
1299 def __len__(self):
1309 def __len__(self):
1300 return len(self.index)
1310 return len(self.index)
1301
1311
1302 def __iter__(self):
1312 def __iter__(self):
1303 return iter(range(len(self)))
1313 return iter(range(len(self)))
1304
1314
1305 def revs(self, start=0, stop=None):
1315 def revs(self, start=0, stop=None):
1306 """iterate over all rev in this revlog (from start to stop)"""
1316 """iterate over all rev in this revlog (from start to stop)"""
1307 return storageutil.iterrevs(len(self), start=start, stop=stop)
1317 return storageutil.iterrevs(len(self), start=start, stop=stop)
1308
1318
1309 def hasnode(self, node):
1319 def hasnode(self, node):
1310 try:
1320 try:
1311 self.rev(node)
1321 self.rev(node)
1312 return True
1322 return True
1313 except KeyError:
1323 except KeyError:
1314 return False
1324 return False
1315
1325
1316 def _candelta(self, baserev, rev):
1326 def _candelta(self, baserev, rev):
1317 """whether two revisions (baserev, rev) can be delta-ed or not"""
1327 """whether two revisions (baserev, rev) can be delta-ed or not"""
1318 # Disable delta if either rev requires a content-changing flag
1328 # Disable delta if either rev requires a content-changing flag
1319 # processor (ex. LFS). This is because such flag processor can alter
1329 # processor (ex. LFS). This is because such flag processor can alter
1320 # the rawtext content that the delta will be based on, and two clients
1330 # the rawtext content that the delta will be based on, and two clients
1321 # could have a same revlog node with different flags (i.e. different
1331 # could have a same revlog node with different flags (i.e. different
1322 # rawtext contents) and the delta could be incompatible.
1332 # rawtext contents) and the delta could be incompatible.
1323 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1333 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1324 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1334 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1325 ):
1335 ):
1326 return False
1336 return False
1327 return True
1337 return True
1328
1338
1329 def update_caches(self, transaction):
1339 def update_caches(self, transaction):
1330 """update on disk cache
1340 """update on disk cache
1331
1341
1332 If a transaction is passed, the update may be delayed to transaction
1342 If a transaction is passed, the update may be delayed to transaction
1333 commit."""
1343 commit."""
1334 if self._nodemap_file is not None:
1344 if self._nodemap_file is not None:
1335 if transaction is None:
1345 if transaction is None:
1336 nodemaputil.update_persistent_nodemap(self)
1346 nodemaputil.update_persistent_nodemap(self)
1337 else:
1347 else:
1338 nodemaputil.setup_persistent_nodemap(transaction, self)
1348 nodemaputil.setup_persistent_nodemap(transaction, self)
1339
1349
1340 def clearcaches(self):
1350 def clearcaches(self):
1341 """Clear in-memory caches"""
1351 """Clear in-memory caches"""
1342 self._revisioncache = None
1352 self._revisioncache = None
1343 self._chainbasecache.clear()
1353 self._chainbasecache.clear()
1344 self._inner._segmentfile.clear_cache()
1354 self._inner._segmentfile.clear_cache()
1345 self._inner._segmentfile_sidedata.clear_cache()
1355 self._inner._segmentfile_sidedata.clear_cache()
1346 self._pcache = {}
1356 self._pcache = {}
1347 self._nodemap_docket = None
1357 self._nodemap_docket = None
1348 self.index.clearcaches()
1358 self.index.clearcaches()
1349 # The python code is the one responsible for validating the docket, we
1359 # The python code is the one responsible for validating the docket, we
1350 # end up having to refresh it here.
1360 # end up having to refresh it here.
1351 use_nodemap = (
1361 use_nodemap = (
1352 not self._inline
1362 not self._inline
1353 and self._nodemap_file is not None
1363 and self._nodemap_file is not None
1354 and hasattr(self.index, 'update_nodemap_data')
1364 and hasattr(self.index, 'update_nodemap_data')
1355 )
1365 )
1356 if use_nodemap:
1366 if use_nodemap:
1357 nodemap_data = nodemaputil.persisted_data(self)
1367 nodemap_data = nodemaputil.persisted_data(self)
1358 if nodemap_data is not None:
1368 if nodemap_data is not None:
1359 self._nodemap_docket = nodemap_data[0]
1369 self._nodemap_docket = nodemap_data[0]
1360 self.index.update_nodemap_data(*nodemap_data)
1370 self.index.update_nodemap_data(*nodemap_data)
1361
1371
1362 def rev(self, node):
1372 def rev(self, node):
1363 """return the revision number associated with a <nodeid>"""
1373 """return the revision number associated with a <nodeid>"""
1364 try:
1374 try:
1365 return self.index.rev(node)
1375 return self.index.rev(node)
1366 except TypeError:
1376 except TypeError:
1367 raise
1377 raise
1368 except error.RevlogError:
1378 except error.RevlogError:
1369 # parsers.c radix tree lookup failed
1379 # parsers.c radix tree lookup failed
1370 if (
1380 if (
1371 node == self.nodeconstants.wdirid
1381 node == self.nodeconstants.wdirid
1372 or node in self.nodeconstants.wdirfilenodeids
1382 or node in self.nodeconstants.wdirfilenodeids
1373 ):
1383 ):
1374 raise error.WdirUnsupported
1384 raise error.WdirUnsupported
1375 raise error.LookupError(node, self.display_id, _(b'no node'))
1385 raise error.LookupError(node, self.display_id, _(b'no node'))
1376
1386
1377 # Accessors for index entries.
1387 # Accessors for index entries.
1378
1388
1379 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1389 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1380 # are flags.
1390 # are flags.
1381 def start(self, rev):
1391 def start(self, rev):
1382 return int(self.index[rev][0] >> 16)
1392 return int(self.index[rev][0] >> 16)
1383
1393
1384 def sidedata_cut_off(self, rev):
1394 def sidedata_cut_off(self, rev):
1385 sd_cut_off = self.index[rev][8]
1395 sd_cut_off = self.index[rev][8]
1386 if sd_cut_off != 0:
1396 if sd_cut_off != 0:
1387 return sd_cut_off
1397 return sd_cut_off
1388 # This is some annoying dance, because entries without sidedata
1398 # This is some annoying dance, because entries without sidedata
1389 # currently use 0 as their ofsset. (instead of previous-offset +
1399 # currently use 0 as their ofsset. (instead of previous-offset +
1390 # previous-size)
1400 # previous-size)
1391 #
1401 #
1392 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1402 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1393 # In the meantime, we need this.
1403 # In the meantime, we need this.
1394 while 0 <= rev:
1404 while 0 <= rev:
1395 e = self.index[rev]
1405 e = self.index[rev]
1396 if e[9] != 0:
1406 if e[9] != 0:
1397 return e[8] + e[9]
1407 return e[8] + e[9]
1398 rev -= 1
1408 rev -= 1
1399 return 0
1409 return 0
1400
1410
1401 def flags(self, rev):
1411 def flags(self, rev):
1402 return self.index[rev][0] & 0xFFFF
1412 return self.index[rev][0] & 0xFFFF
1403
1413
1404 def length(self, rev):
1414 def length(self, rev):
1405 return self.index[rev][1]
1415 return self.index[rev][1]
1406
1416
1407 def sidedata_length(self, rev):
1417 def sidedata_length(self, rev):
1408 if not self.feature_config.has_side_data:
1418 if not self.feature_config.has_side_data:
1409 return 0
1419 return 0
1410 return self.index[rev][9]
1420 return self.index[rev][9]
1411
1421
1412 def rawsize(self, rev):
1422 def rawsize(self, rev):
1413 """return the length of the uncompressed text for a given revision"""
1423 """return the length of the uncompressed text for a given revision"""
1414 l = self.index[rev][2]
1424 l = self.index[rev][2]
1415 if l >= 0:
1425 if l >= 0:
1416 return l
1426 return l
1417
1427
1418 t = self.rawdata(rev)
1428 t = self.rawdata(rev)
1419 return len(t)
1429 return len(t)
1420
1430
1421 def size(self, rev):
1431 def size(self, rev):
1422 """length of non-raw text (processed by a "read" flag processor)"""
1432 """length of non-raw text (processed by a "read" flag processor)"""
1423 # fast path: if no "read" flag processor could change the content,
1433 # fast path: if no "read" flag processor could change the content,
1424 # size is rawsize. note: ELLIPSIS is known to not change the content.
1434 # size is rawsize. note: ELLIPSIS is known to not change the content.
1425 flags = self.flags(rev)
1435 flags = self.flags(rev)
1426 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1436 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1427 return self.rawsize(rev)
1437 return self.rawsize(rev)
1428
1438
1429 return len(self.revision(rev))
1439 return len(self.revision(rev))
1430
1440
1431 def fast_rank(self, rev):
1441 def fast_rank(self, rev):
1432 """Return the rank of a revision if already known, or None otherwise.
1442 """Return the rank of a revision if already known, or None otherwise.
1433
1443
1434 The rank of a revision is the size of the sub-graph it defines as a
1444 The rank of a revision is the size of the sub-graph it defines as a
1435 head. Equivalently, the rank of a revision `r` is the size of the set
1445 head. Equivalently, the rank of a revision `r` is the size of the set
1436 `ancestors(r)`, `r` included.
1446 `ancestors(r)`, `r` included.
1437
1447
1438 This method returns the rank retrieved from the revlog in constant
1448 This method returns the rank retrieved from the revlog in constant
1439 time. It makes no attempt at computing unknown values for versions of
1449 time. It makes no attempt at computing unknown values for versions of
1440 the revlog which do not persist the rank.
1450 the revlog which do not persist the rank.
1441 """
1451 """
1442 rank = self.index[rev][ENTRY_RANK]
1452 rank = self.index[rev][ENTRY_RANK]
1443 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1453 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1444 return None
1454 return None
1445 if rev == nullrev:
1455 if rev == nullrev:
1446 return 0 # convention
1456 return 0 # convention
1447 return rank
1457 return rank
1448
1458
1449 def chainbase(self, rev):
1459 def chainbase(self, rev):
1450 base = self._chainbasecache.get(rev)
1460 base = self._chainbasecache.get(rev)
1451 if base is not None:
1461 if base is not None:
1452 return base
1462 return base
1453
1463
1454 index = self.index
1464 index = self.index
1455 iterrev = rev
1465 iterrev = rev
1456 base = index[iterrev][3]
1466 base = index[iterrev][3]
1457 while base != iterrev:
1467 while base != iterrev:
1458 iterrev = base
1468 iterrev = base
1459 base = index[iterrev][3]
1469 base = index[iterrev][3]
1460
1470
1461 self._chainbasecache[rev] = base
1471 self._chainbasecache[rev] = base
1462 return base
1472 return base
1463
1473
1464 def linkrev(self, rev):
1474 def linkrev(self, rev):
1465 return self.index[rev][4]
1475 return self.index[rev][4]
1466
1476
1467 def parentrevs(self, rev):
1477 def parentrevs(self, rev):
1468 try:
1478 try:
1469 entry = self.index[rev]
1479 entry = self.index[rev]
1470 except IndexError:
1480 except IndexError:
1471 if rev == wdirrev:
1481 if rev == wdirrev:
1472 raise error.WdirUnsupported
1482 raise error.WdirUnsupported
1473 raise
1483 raise
1474
1484
1475 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1485 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1476 return entry[6], entry[5]
1486 return entry[6], entry[5]
1477 else:
1487 else:
1478 return entry[5], entry[6]
1488 return entry[5], entry[6]
1479
1489
1480 # fast parentrevs(rev) where rev isn't filtered
1490 # fast parentrevs(rev) where rev isn't filtered
1481 _uncheckedparentrevs = parentrevs
1491 _uncheckedparentrevs = parentrevs
1482
1492
1483 def node(self, rev):
1493 def node(self, rev):
1484 try:
1494 try:
1485 return self.index[rev][7]
1495 return self.index[rev][7]
1486 except IndexError:
1496 except IndexError:
1487 if rev == wdirrev:
1497 if rev == wdirrev:
1488 raise error.WdirUnsupported
1498 raise error.WdirUnsupported
1489 raise
1499 raise
1490
1500
1491 # Derived from index values.
1501 # Derived from index values.
1492
1502
1493 def end(self, rev):
1503 def end(self, rev):
1494 return self.start(rev) + self.length(rev)
1504 return self.start(rev) + self.length(rev)
1495
1505
1496 def parents(self, node):
1506 def parents(self, node):
1497 i = self.index
1507 i = self.index
1498 d = i[self.rev(node)]
1508 d = i[self.rev(node)]
1499 # inline node() to avoid function call overhead
1509 # inline node() to avoid function call overhead
1500 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1510 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1501 return i[d[6]][7], i[d[5]][7]
1511 return i[d[6]][7], i[d[5]][7]
1502 else:
1512 else:
1503 return i[d[5]][7], i[d[6]][7]
1513 return i[d[5]][7], i[d[6]][7]
1504
1514
1505 def chainlen(self, rev):
1515 def chainlen(self, rev):
1506 return self._chaininfo(rev)[0]
1516 return self._chaininfo(rev)[0]
1507
1517
1508 def _chaininfo(self, rev):
1518 def _chaininfo(self, rev):
1509 chaininfocache = self._chaininfocache
1519 chaininfocache = self._chaininfocache
1510 if rev in chaininfocache:
1520 if rev in chaininfocache:
1511 return chaininfocache[rev]
1521 return chaininfocache[rev]
1512 index = self.index
1522 index = self.index
1513 generaldelta = self.delta_config.general_delta
1523 generaldelta = self.delta_config.general_delta
1514 iterrev = rev
1524 iterrev = rev
1515 e = index[iterrev]
1525 e = index[iterrev]
1516 clen = 0
1526 clen = 0
1517 compresseddeltalen = 0
1527 compresseddeltalen = 0
1518 while iterrev != e[3]:
1528 while iterrev != e[3]:
1519 clen += 1
1529 clen += 1
1520 compresseddeltalen += e[1]
1530 compresseddeltalen += e[1]
1521 if generaldelta:
1531 if generaldelta:
1522 iterrev = e[3]
1532 iterrev = e[3]
1523 else:
1533 else:
1524 iterrev -= 1
1534 iterrev -= 1
1525 if iterrev in chaininfocache:
1535 if iterrev in chaininfocache:
1526 t = chaininfocache[iterrev]
1536 t = chaininfocache[iterrev]
1527 clen += t[0]
1537 clen += t[0]
1528 compresseddeltalen += t[1]
1538 compresseddeltalen += t[1]
1529 break
1539 break
1530 e = index[iterrev]
1540 e = index[iterrev]
1531 else:
1541 else:
1532 # Add text length of base since decompressing that also takes
1542 # Add text length of base since decompressing that also takes
1533 # work. For cache hits the length is already included.
1543 # work. For cache hits the length is already included.
1534 compresseddeltalen += e[1]
1544 compresseddeltalen += e[1]
1535 r = (clen, compresseddeltalen)
1545 r = (clen, compresseddeltalen)
1536 chaininfocache[rev] = r
1546 chaininfocache[rev] = r
1537 return r
1547 return r
1538
1548
1539 def _deltachain(self, rev, stoprev=None):
1549 def _deltachain(self, rev, stoprev=None):
1540 """Obtain the delta chain for a revision.
1550 """Obtain the delta chain for a revision.
1541
1551
1542 ``stoprev`` specifies a revision to stop at. If not specified, we
1552 ``stoprev`` specifies a revision to stop at. If not specified, we
1543 stop at the base of the chain.
1553 stop at the base of the chain.
1544
1554
1545 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1555 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1546 revs in ascending order and ``stopped`` is a bool indicating whether
1556 revs in ascending order and ``stopped`` is a bool indicating whether
1547 ``stoprev`` was hit.
1557 ``stoprev`` was hit.
1548 """
1558 """
1549 generaldelta = self.delta_config.general_delta
1559 generaldelta = self.delta_config.general_delta
1550 # Try C implementation.
1560 # Try C implementation.
1551 try:
1561 try:
1552 return self.index.deltachain(rev, stoprev, generaldelta)
1562 return self.index.deltachain(rev, stoprev, generaldelta)
1553 except AttributeError:
1563 except AttributeError:
1554 pass
1564 pass
1555
1565
1556 chain = []
1566 chain = []
1557
1567
1558 # Alias to prevent attribute lookup in tight loop.
1568 # Alias to prevent attribute lookup in tight loop.
1559 index = self.index
1569 index = self.index
1560
1570
1561 iterrev = rev
1571 iterrev = rev
1562 e = index[iterrev]
1572 e = index[iterrev]
1563 while iterrev != e[3] and iterrev != stoprev:
1573 while iterrev != e[3] and iterrev != stoprev:
1564 chain.append(iterrev)
1574 chain.append(iterrev)
1565 if generaldelta:
1575 if generaldelta:
1566 iterrev = e[3]
1576 iterrev = e[3]
1567 else:
1577 else:
1568 iterrev -= 1
1578 iterrev -= 1
1569 e = index[iterrev]
1579 e = index[iterrev]
1570
1580
1571 if iterrev == stoprev:
1581 if iterrev == stoprev:
1572 stopped = True
1582 stopped = True
1573 else:
1583 else:
1574 chain.append(iterrev)
1584 chain.append(iterrev)
1575 stopped = False
1585 stopped = False
1576
1586
1577 chain.reverse()
1587 chain.reverse()
1578 return chain, stopped
1588 return chain, stopped
1579
1589
1580 def ancestors(self, revs, stoprev=0, inclusive=False):
1590 def ancestors(self, revs, stoprev=0, inclusive=False):
1581 """Generate the ancestors of 'revs' in reverse revision order.
1591 """Generate the ancestors of 'revs' in reverse revision order.
1582 Does not generate revs lower than stoprev.
1592 Does not generate revs lower than stoprev.
1583
1593
1584 See the documentation for ancestor.lazyancestors for more details."""
1594 See the documentation for ancestor.lazyancestors for more details."""
1585
1595
1586 # first, make sure start revisions aren't filtered
1596 # first, make sure start revisions aren't filtered
1587 revs = list(revs)
1597 revs = list(revs)
1588 checkrev = self.node
1598 checkrev = self.node
1589 for r in revs:
1599 for r in revs:
1590 checkrev(r)
1600 checkrev(r)
1591 # and we're sure ancestors aren't filtered as well
1601 # and we're sure ancestors aren't filtered as well
1592
1602
1593 if rustancestor is not None and self.index.rust_ext_compat:
1603 if rustancestor is not None and self.index.rust_ext_compat:
1594 lazyancestors = rustancestor.LazyAncestors
1604 lazyancestors = rustancestor.LazyAncestors
1595 arg = self.index
1605 arg = self.index
1596 else:
1606 else:
1597 lazyancestors = ancestor.lazyancestors
1607 lazyancestors = ancestor.lazyancestors
1598 arg = self._uncheckedparentrevs
1608 arg = self._uncheckedparentrevs
1599 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1609 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1600
1610
1601 def descendants(self, revs):
1611 def descendants(self, revs):
1602 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1612 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1603
1613
1604 def findcommonmissing(self, common=None, heads=None):
1614 def findcommonmissing(self, common=None, heads=None):
1605 """Return a tuple of the ancestors of common and the ancestors of heads
1615 """Return a tuple of the ancestors of common and the ancestors of heads
1606 that are not ancestors of common. In revset terminology, we return the
1616 that are not ancestors of common. In revset terminology, we return the
1607 tuple:
1617 tuple:
1608
1618
1609 ::common, (::heads) - (::common)
1619 ::common, (::heads) - (::common)
1610
1620
1611 The list is sorted by revision number, meaning it is
1621 The list is sorted by revision number, meaning it is
1612 topologically sorted.
1622 topologically sorted.
1613
1623
1614 'heads' and 'common' are both lists of node IDs. If heads is
1624 'heads' and 'common' are both lists of node IDs. If heads is
1615 not supplied, uses all of the revlog's heads. If common is not
1625 not supplied, uses all of the revlog's heads. If common is not
1616 supplied, uses nullid."""
1626 supplied, uses nullid."""
1617 if common is None:
1627 if common is None:
1618 common = [self.nullid]
1628 common = [self.nullid]
1619 if heads is None:
1629 if heads is None:
1620 heads = self.heads()
1630 heads = self.heads()
1621
1631
1622 common = [self.rev(n) for n in common]
1632 common = [self.rev(n) for n in common]
1623 heads = [self.rev(n) for n in heads]
1633 heads = [self.rev(n) for n in heads]
1624
1634
1625 # we want the ancestors, but inclusive
1635 # we want the ancestors, but inclusive
1626 class lazyset:
1636 class lazyset:
1627 def __init__(self, lazyvalues):
1637 def __init__(self, lazyvalues):
1628 self.addedvalues = set()
1638 self.addedvalues = set()
1629 self.lazyvalues = lazyvalues
1639 self.lazyvalues = lazyvalues
1630
1640
1631 def __contains__(self, value):
1641 def __contains__(self, value):
1632 return value in self.addedvalues or value in self.lazyvalues
1642 return value in self.addedvalues or value in self.lazyvalues
1633
1643
1634 def __iter__(self):
1644 def __iter__(self):
1635 added = self.addedvalues
1645 added = self.addedvalues
1636 for r in added:
1646 for r in added:
1637 yield r
1647 yield r
1638 for r in self.lazyvalues:
1648 for r in self.lazyvalues:
1639 if not r in added:
1649 if not r in added:
1640 yield r
1650 yield r
1641
1651
1642 def add(self, value):
1652 def add(self, value):
1643 self.addedvalues.add(value)
1653 self.addedvalues.add(value)
1644
1654
1645 def update(self, values):
1655 def update(self, values):
1646 self.addedvalues.update(values)
1656 self.addedvalues.update(values)
1647
1657
1648 has = lazyset(self.ancestors(common))
1658 has = lazyset(self.ancestors(common))
1649 has.add(nullrev)
1659 has.add(nullrev)
1650 has.update(common)
1660 has.update(common)
1651
1661
1652 # take all ancestors from heads that aren't in has
1662 # take all ancestors from heads that aren't in has
1653 missing = set()
1663 missing = set()
1654 visit = collections.deque(r for r in heads if r not in has)
1664 visit = collections.deque(r for r in heads if r not in has)
1655 while visit:
1665 while visit:
1656 r = visit.popleft()
1666 r = visit.popleft()
1657 if r in missing:
1667 if r in missing:
1658 continue
1668 continue
1659 else:
1669 else:
1660 missing.add(r)
1670 missing.add(r)
1661 for p in self.parentrevs(r):
1671 for p in self.parentrevs(r):
1662 if p not in has:
1672 if p not in has:
1663 visit.append(p)
1673 visit.append(p)
1664 missing = list(missing)
1674 missing = list(missing)
1665 missing.sort()
1675 missing.sort()
1666 return has, [self.node(miss) for miss in missing]
1676 return has, [self.node(miss) for miss in missing]
1667
1677
1668 def incrementalmissingrevs(self, common=None):
1678 def incrementalmissingrevs(self, common=None):
1669 """Return an object that can be used to incrementally compute the
1679 """Return an object that can be used to incrementally compute the
1670 revision numbers of the ancestors of arbitrary sets that are not
1680 revision numbers of the ancestors of arbitrary sets that are not
1671 ancestors of common. This is an ancestor.incrementalmissingancestors
1681 ancestors of common. This is an ancestor.incrementalmissingancestors
1672 object.
1682 object.
1673
1683
1674 'common' is a list of revision numbers. If common is not supplied, uses
1684 'common' is a list of revision numbers. If common is not supplied, uses
1675 nullrev.
1685 nullrev.
1676 """
1686 """
1677 if common is None:
1687 if common is None:
1678 common = [nullrev]
1688 common = [nullrev]
1679
1689
1680 if rustancestor is not None and self.index.rust_ext_compat:
1690 if rustancestor is not None and self.index.rust_ext_compat:
1681 return rustancestor.MissingAncestors(self.index, common)
1691 return rustancestor.MissingAncestors(self.index, common)
1682 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1692 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1683
1693
1684 def findmissingrevs(self, common=None, heads=None):
1694 def findmissingrevs(self, common=None, heads=None):
1685 """Return the revision numbers of the ancestors of heads that
1695 """Return the revision numbers of the ancestors of heads that
1686 are not ancestors of common.
1696 are not ancestors of common.
1687
1697
1688 More specifically, return a list of revision numbers corresponding to
1698 More specifically, return a list of revision numbers corresponding to
1689 nodes N such that every N satisfies the following constraints:
1699 nodes N such that every N satisfies the following constraints:
1690
1700
1691 1. N is an ancestor of some node in 'heads'
1701 1. N is an ancestor of some node in 'heads'
1692 2. N is not an ancestor of any node in 'common'
1702 2. N is not an ancestor of any node in 'common'
1693
1703
1694 The list is sorted by revision number, meaning it is
1704 The list is sorted by revision number, meaning it is
1695 topologically sorted.
1705 topologically sorted.
1696
1706
1697 'heads' and 'common' are both lists of revision numbers. If heads is
1707 'heads' and 'common' are both lists of revision numbers. If heads is
1698 not supplied, uses all of the revlog's heads. If common is not
1708 not supplied, uses all of the revlog's heads. If common is not
1699 supplied, uses nullid."""
1709 supplied, uses nullid."""
1700 if common is None:
1710 if common is None:
1701 common = [nullrev]
1711 common = [nullrev]
1702 if heads is None:
1712 if heads is None:
1703 heads = self.headrevs()
1713 heads = self.headrevs()
1704
1714
1705 inc = self.incrementalmissingrevs(common=common)
1715 inc = self.incrementalmissingrevs(common=common)
1706 return inc.missingancestors(heads)
1716 return inc.missingancestors(heads)
1707
1717
1708 def findmissing(self, common=None, heads=None):
1718 def findmissing(self, common=None, heads=None):
1709 """Return the ancestors of heads that are not ancestors of common.
1719 """Return the ancestors of heads that are not ancestors of common.
1710
1720
1711 More specifically, return a list of nodes N such that every N
1721 More specifically, return a list of nodes N such that every N
1712 satisfies the following constraints:
1722 satisfies the following constraints:
1713
1723
1714 1. N is an ancestor of some node in 'heads'
1724 1. N is an ancestor of some node in 'heads'
1715 2. N is not an ancestor of any node in 'common'
1725 2. N is not an ancestor of any node in 'common'
1716
1726
1717 The list is sorted by revision number, meaning it is
1727 The list is sorted by revision number, meaning it is
1718 topologically sorted.
1728 topologically sorted.
1719
1729
1720 'heads' and 'common' are both lists of node IDs. If heads is
1730 'heads' and 'common' are both lists of node IDs. If heads is
1721 not supplied, uses all of the revlog's heads. If common is not
1731 not supplied, uses all of the revlog's heads. If common is not
1722 supplied, uses nullid."""
1732 supplied, uses nullid."""
1723 if common is None:
1733 if common is None:
1724 common = [self.nullid]
1734 common = [self.nullid]
1725 if heads is None:
1735 if heads is None:
1726 heads = self.heads()
1736 heads = self.heads()
1727
1737
1728 common = [self.rev(n) for n in common]
1738 common = [self.rev(n) for n in common]
1729 heads = [self.rev(n) for n in heads]
1739 heads = [self.rev(n) for n in heads]
1730
1740
1731 inc = self.incrementalmissingrevs(common=common)
1741 inc = self.incrementalmissingrevs(common=common)
1732 return [self.node(r) for r in inc.missingancestors(heads)]
1742 return [self.node(r) for r in inc.missingancestors(heads)]
1733
1743
1734 def nodesbetween(self, roots=None, heads=None):
1744 def nodesbetween(self, roots=None, heads=None):
1735 """Return a topological path from 'roots' to 'heads'.
1745 """Return a topological path from 'roots' to 'heads'.
1736
1746
1737 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1747 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1738 topologically sorted list of all nodes N that satisfy both of
1748 topologically sorted list of all nodes N that satisfy both of
1739 these constraints:
1749 these constraints:
1740
1750
1741 1. N is a descendant of some node in 'roots'
1751 1. N is a descendant of some node in 'roots'
1742 2. N is an ancestor of some node in 'heads'
1752 2. N is an ancestor of some node in 'heads'
1743
1753
1744 Every node is considered to be both a descendant and an ancestor
1754 Every node is considered to be both a descendant and an ancestor
1745 of itself, so every reachable node in 'roots' and 'heads' will be
1755 of itself, so every reachable node in 'roots' and 'heads' will be
1746 included in 'nodes'.
1756 included in 'nodes'.
1747
1757
1748 'outroots' is the list of reachable nodes in 'roots', i.e., the
1758 'outroots' is the list of reachable nodes in 'roots', i.e., the
1749 subset of 'roots' that is returned in 'nodes'. Likewise,
1759 subset of 'roots' that is returned in 'nodes'. Likewise,
1750 'outheads' is the subset of 'heads' that is also in 'nodes'.
1760 'outheads' is the subset of 'heads' that is also in 'nodes'.
1751
1761
1752 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1762 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1753 unspecified, uses nullid as the only root. If 'heads' is
1763 unspecified, uses nullid as the only root. If 'heads' is
1754 unspecified, uses list of all of the revlog's heads."""
1764 unspecified, uses list of all of the revlog's heads."""
1755 nonodes = ([], [], [])
1765 nonodes = ([], [], [])
1756 if roots is not None:
1766 if roots is not None:
1757 roots = list(roots)
1767 roots = list(roots)
1758 if not roots:
1768 if not roots:
1759 return nonodes
1769 return nonodes
1760 lowestrev = min([self.rev(n) for n in roots])
1770 lowestrev = min([self.rev(n) for n in roots])
1761 else:
1771 else:
1762 roots = [self.nullid] # Everybody's a descendant of nullid
1772 roots = [self.nullid] # Everybody's a descendant of nullid
1763 lowestrev = nullrev
1773 lowestrev = nullrev
1764 if (lowestrev == nullrev) and (heads is None):
1774 if (lowestrev == nullrev) and (heads is None):
1765 # We want _all_ the nodes!
1775 # We want _all_ the nodes!
1766 return (
1776 return (
1767 [self.node(r) for r in self],
1777 [self.node(r) for r in self],
1768 [self.nullid],
1778 [self.nullid],
1769 list(self.heads()),
1779 list(self.heads()),
1770 )
1780 )
1771 if heads is None:
1781 if heads is None:
1772 # All nodes are ancestors, so the latest ancestor is the last
1782 # All nodes are ancestors, so the latest ancestor is the last
1773 # node.
1783 # node.
1774 highestrev = len(self) - 1
1784 highestrev = len(self) - 1
1775 # Set ancestors to None to signal that every node is an ancestor.
1785 # Set ancestors to None to signal that every node is an ancestor.
1776 ancestors = None
1786 ancestors = None
1777 # Set heads to an empty dictionary for later discovery of heads
1787 # Set heads to an empty dictionary for later discovery of heads
1778 heads = {}
1788 heads = {}
1779 else:
1789 else:
1780 heads = list(heads)
1790 heads = list(heads)
1781 if not heads:
1791 if not heads:
1782 return nonodes
1792 return nonodes
1783 ancestors = set()
1793 ancestors = set()
1784 # Turn heads into a dictionary so we can remove 'fake' heads.
1794 # Turn heads into a dictionary so we can remove 'fake' heads.
1785 # Also, later we will be using it to filter out the heads we can't
1795 # Also, later we will be using it to filter out the heads we can't
1786 # find from roots.
1796 # find from roots.
1787 heads = dict.fromkeys(heads, False)
1797 heads = dict.fromkeys(heads, False)
1788 # Start at the top and keep marking parents until we're done.
1798 # Start at the top and keep marking parents until we're done.
1789 nodestotag = set(heads)
1799 nodestotag = set(heads)
1790 # Remember where the top was so we can use it as a limit later.
1800 # Remember where the top was so we can use it as a limit later.
1791 highestrev = max([self.rev(n) for n in nodestotag])
1801 highestrev = max([self.rev(n) for n in nodestotag])
1792 while nodestotag:
1802 while nodestotag:
1793 # grab a node to tag
1803 # grab a node to tag
1794 n = nodestotag.pop()
1804 n = nodestotag.pop()
1795 # Never tag nullid
1805 # Never tag nullid
1796 if n == self.nullid:
1806 if n == self.nullid:
1797 continue
1807 continue
1798 # A node's revision number represents its place in a
1808 # A node's revision number represents its place in a
1799 # topologically sorted list of nodes.
1809 # topologically sorted list of nodes.
1800 r = self.rev(n)
1810 r = self.rev(n)
1801 if r >= lowestrev:
1811 if r >= lowestrev:
1802 if n not in ancestors:
1812 if n not in ancestors:
1803 # If we are possibly a descendant of one of the roots
1813 # If we are possibly a descendant of one of the roots
1804 # and we haven't already been marked as an ancestor
1814 # and we haven't already been marked as an ancestor
1805 ancestors.add(n) # Mark as ancestor
1815 ancestors.add(n) # Mark as ancestor
1806 # Add non-nullid parents to list of nodes to tag.
1816 # Add non-nullid parents to list of nodes to tag.
1807 nodestotag.update(
1817 nodestotag.update(
1808 [p for p in self.parents(n) if p != self.nullid]
1818 [p for p in self.parents(n) if p != self.nullid]
1809 )
1819 )
1810 elif n in heads: # We've seen it before, is it a fake head?
1820 elif n in heads: # We've seen it before, is it a fake head?
1811 # So it is, real heads should not be the ancestors of
1821 # So it is, real heads should not be the ancestors of
1812 # any other heads.
1822 # any other heads.
1813 heads.pop(n)
1823 heads.pop(n)
1814 if not ancestors:
1824 if not ancestors:
1815 return nonodes
1825 return nonodes
1816 # Now that we have our set of ancestors, we want to remove any
1826 # Now that we have our set of ancestors, we want to remove any
1817 # roots that are not ancestors.
1827 # roots that are not ancestors.
1818
1828
1819 # If one of the roots was nullid, everything is included anyway.
1829 # If one of the roots was nullid, everything is included anyway.
1820 if lowestrev > nullrev:
1830 if lowestrev > nullrev:
1821 # But, since we weren't, let's recompute the lowest rev to not
1831 # But, since we weren't, let's recompute the lowest rev to not
1822 # include roots that aren't ancestors.
1832 # include roots that aren't ancestors.
1823
1833
1824 # Filter out roots that aren't ancestors of heads
1834 # Filter out roots that aren't ancestors of heads
1825 roots = [root for root in roots if root in ancestors]
1835 roots = [root for root in roots if root in ancestors]
1826 # Recompute the lowest revision
1836 # Recompute the lowest revision
1827 if roots:
1837 if roots:
1828 lowestrev = min([self.rev(root) for root in roots])
1838 lowestrev = min([self.rev(root) for root in roots])
1829 else:
1839 else:
1830 # No more roots? Return empty list
1840 # No more roots? Return empty list
1831 return nonodes
1841 return nonodes
1832 else:
1842 else:
1833 # We are descending from nullid, and don't need to care about
1843 # We are descending from nullid, and don't need to care about
1834 # any other roots.
1844 # any other roots.
1835 lowestrev = nullrev
1845 lowestrev = nullrev
1836 roots = [self.nullid]
1846 roots = [self.nullid]
1837 # Transform our roots list into a set.
1847 # Transform our roots list into a set.
1838 descendants = set(roots)
1848 descendants = set(roots)
1839 # Also, keep the original roots so we can filter out roots that aren't
1849 # Also, keep the original roots so we can filter out roots that aren't
1840 # 'real' roots (i.e. are descended from other roots).
1850 # 'real' roots (i.e. are descended from other roots).
1841 roots = descendants.copy()
1851 roots = descendants.copy()
1842 # Our topologically sorted list of output nodes.
1852 # Our topologically sorted list of output nodes.
1843 orderedout = []
1853 orderedout = []
1844 # Don't start at nullid since we don't want nullid in our output list,
1854 # Don't start at nullid since we don't want nullid in our output list,
1845 # and if nullid shows up in descendants, empty parents will look like
1855 # and if nullid shows up in descendants, empty parents will look like
1846 # they're descendants.
1856 # they're descendants.
1847 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1857 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1848 n = self.node(r)
1858 n = self.node(r)
1849 isdescendant = False
1859 isdescendant = False
1850 if lowestrev == nullrev: # Everybody is a descendant of nullid
1860 if lowestrev == nullrev: # Everybody is a descendant of nullid
1851 isdescendant = True
1861 isdescendant = True
1852 elif n in descendants:
1862 elif n in descendants:
1853 # n is already a descendant
1863 # n is already a descendant
1854 isdescendant = True
1864 isdescendant = True
1855 # This check only needs to be done here because all the roots
1865 # This check only needs to be done here because all the roots
1856 # will start being marked is descendants before the loop.
1866 # will start being marked is descendants before the loop.
1857 if n in roots:
1867 if n in roots:
1858 # If n was a root, check if it's a 'real' root.
1868 # If n was a root, check if it's a 'real' root.
1859 p = tuple(self.parents(n))
1869 p = tuple(self.parents(n))
1860 # If any of its parents are descendants, it's not a root.
1870 # If any of its parents are descendants, it's not a root.
1861 if (p[0] in descendants) or (p[1] in descendants):
1871 if (p[0] in descendants) or (p[1] in descendants):
1862 roots.remove(n)
1872 roots.remove(n)
1863 else:
1873 else:
1864 p = tuple(self.parents(n))
1874 p = tuple(self.parents(n))
1865 # A node is a descendant if either of its parents are
1875 # A node is a descendant if either of its parents are
1866 # descendants. (We seeded the dependents list with the roots
1876 # descendants. (We seeded the dependents list with the roots
1867 # up there, remember?)
1877 # up there, remember?)
1868 if (p[0] in descendants) or (p[1] in descendants):
1878 if (p[0] in descendants) or (p[1] in descendants):
1869 descendants.add(n)
1879 descendants.add(n)
1870 isdescendant = True
1880 isdescendant = True
1871 if isdescendant and ((ancestors is None) or (n in ancestors)):
1881 if isdescendant and ((ancestors is None) or (n in ancestors)):
1872 # Only include nodes that are both descendants and ancestors.
1882 # Only include nodes that are both descendants and ancestors.
1873 orderedout.append(n)
1883 orderedout.append(n)
1874 if (ancestors is not None) and (n in heads):
1884 if (ancestors is not None) and (n in heads):
1875 # We're trying to figure out which heads are reachable
1885 # We're trying to figure out which heads are reachable
1876 # from roots.
1886 # from roots.
1877 # Mark this head as having been reached
1887 # Mark this head as having been reached
1878 heads[n] = True
1888 heads[n] = True
1879 elif ancestors is None:
1889 elif ancestors is None:
1880 # Otherwise, we're trying to discover the heads.
1890 # Otherwise, we're trying to discover the heads.
1881 # Assume this is a head because if it isn't, the next step
1891 # Assume this is a head because if it isn't, the next step
1882 # will eventually remove it.
1892 # will eventually remove it.
1883 heads[n] = True
1893 heads[n] = True
1884 # But, obviously its parents aren't.
1894 # But, obviously its parents aren't.
1885 for p in self.parents(n):
1895 for p in self.parents(n):
1886 heads.pop(p, None)
1896 heads.pop(p, None)
1887 heads = [head for head, flag in heads.items() if flag]
1897 heads = [head for head, flag in heads.items() if flag]
1888 roots = list(roots)
1898 roots = list(roots)
1889 assert orderedout
1899 assert orderedout
1890 assert roots
1900 assert roots
1891 assert heads
1901 assert heads
1892 return (orderedout, roots, heads)
1902 return (orderedout, roots, heads)
1893
1903
1894 def headrevs(self, revs=None):
1904 def headrevs(self, revs=None):
1895 if revs is None:
1905 if revs is None:
1896 try:
1906 try:
1897 return self.index.headrevs()
1907 return self.index.headrevs()
1898 except AttributeError:
1908 except AttributeError:
1899 return self._headrevs()
1909 return self._headrevs()
1900 if rustdagop is not None and self.index.rust_ext_compat:
1910 if rustdagop is not None and self.index.rust_ext_compat:
1901 return rustdagop.headrevs(self.index, revs)
1911 return rustdagop.headrevs(self.index, revs)
1902 return dagop.headrevs(revs, self._uncheckedparentrevs)
1912 return dagop.headrevs(revs, self._uncheckedparentrevs)
1903
1913
1904 def computephases(self, roots):
1914 def computephases(self, roots):
1905 return self.index.computephasesmapsets(roots)
1915 return self.index.computephasesmapsets(roots)
1906
1916
1907 def _headrevs(self):
1917 def _headrevs(self):
1908 count = len(self)
1918 count = len(self)
1909 if not count:
1919 if not count:
1910 return [nullrev]
1920 return [nullrev]
1911 # we won't iter over filtered rev so nobody is a head at start
1921 # we won't iter over filtered rev so nobody is a head at start
1912 ishead = [0] * (count + 1)
1922 ishead = [0] * (count + 1)
1913 index = self.index
1923 index = self.index
1914 for r in self:
1924 for r in self:
1915 ishead[r] = 1 # I may be an head
1925 ishead[r] = 1 # I may be an head
1916 e = index[r]
1926 e = index[r]
1917 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1927 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1918 return [r for r, val in enumerate(ishead) if val]
1928 return [r for r, val in enumerate(ishead) if val]
1919
1929
1920 def heads(self, start=None, stop=None):
1930 def heads(self, start=None, stop=None):
1921 """return the list of all nodes that have no children
1931 """return the list of all nodes that have no children
1922
1932
1923 if start is specified, only heads that are descendants of
1933 if start is specified, only heads that are descendants of
1924 start will be returned
1934 start will be returned
1925 if stop is specified, it will consider all the revs from stop
1935 if stop is specified, it will consider all the revs from stop
1926 as if they had no children
1936 as if they had no children
1927 """
1937 """
1928 if start is None and stop is None:
1938 if start is None and stop is None:
1929 if not len(self):
1939 if not len(self):
1930 return [self.nullid]
1940 return [self.nullid]
1931 return [self.node(r) for r in self.headrevs()]
1941 return [self.node(r) for r in self.headrevs()]
1932
1942
1933 if start is None:
1943 if start is None:
1934 start = nullrev
1944 start = nullrev
1935 else:
1945 else:
1936 start = self.rev(start)
1946 start = self.rev(start)
1937
1947
1938 stoprevs = {self.rev(n) for n in stop or []}
1948 stoprevs = {self.rev(n) for n in stop or []}
1939
1949
1940 revs = dagop.headrevssubset(
1950 revs = dagop.headrevssubset(
1941 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1951 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1942 )
1952 )
1943
1953
1944 return [self.node(rev) for rev in revs]
1954 return [self.node(rev) for rev in revs]
1945
1955
1946 def children(self, node):
1956 def children(self, node):
1947 """find the children of a given node"""
1957 """find the children of a given node"""
1948 c = []
1958 c = []
1949 p = self.rev(node)
1959 p = self.rev(node)
1950 for r in self.revs(start=p + 1):
1960 for r in self.revs(start=p + 1):
1951 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1961 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1952 if prevs:
1962 if prevs:
1953 for pr in prevs:
1963 for pr in prevs:
1954 if pr == p:
1964 if pr == p:
1955 c.append(self.node(r))
1965 c.append(self.node(r))
1956 elif p == nullrev:
1966 elif p == nullrev:
1957 c.append(self.node(r))
1967 c.append(self.node(r))
1958 return c
1968 return c
1959
1969
1960 def commonancestorsheads(self, a, b):
1970 def commonancestorsheads(self, a, b):
1961 """calculate all the heads of the common ancestors of nodes a and b"""
1971 """calculate all the heads of the common ancestors of nodes a and b"""
1962 a, b = self.rev(a), self.rev(b)
1972 a, b = self.rev(a), self.rev(b)
1963 ancs = self._commonancestorsheads(a, b)
1973 ancs = self._commonancestorsheads(a, b)
1964 return pycompat.maplist(self.node, ancs)
1974 return pycompat.maplist(self.node, ancs)
1965
1975
1966 def _commonancestorsheads(self, *revs):
1976 def _commonancestorsheads(self, *revs):
1967 """calculate all the heads of the common ancestors of revs"""
1977 """calculate all the heads of the common ancestors of revs"""
1968 try:
1978 try:
1969 ancs = self.index.commonancestorsheads(*revs)
1979 ancs = self.index.commonancestorsheads(*revs)
1970 except (AttributeError, OverflowError): # C implementation failed
1980 except (AttributeError, OverflowError): # C implementation failed
1971 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1981 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1972 return ancs
1982 return ancs
1973
1983
1974 def isancestor(self, a, b):
1984 def isancestor(self, a, b):
1975 """return True if node a is an ancestor of node b
1985 """return True if node a is an ancestor of node b
1976
1986
1977 A revision is considered an ancestor of itself."""
1987 A revision is considered an ancestor of itself."""
1978 a, b = self.rev(a), self.rev(b)
1988 a, b = self.rev(a), self.rev(b)
1979 return self.isancestorrev(a, b)
1989 return self.isancestorrev(a, b)
1980
1990
1981 def isancestorrev(self, a, b):
1991 def isancestorrev(self, a, b):
1982 """return True if revision a is an ancestor of revision b
1992 """return True if revision a is an ancestor of revision b
1983
1993
1984 A revision is considered an ancestor of itself.
1994 A revision is considered an ancestor of itself.
1985
1995
1986 The implementation of this is trivial but the use of
1996 The implementation of this is trivial but the use of
1987 reachableroots is not."""
1997 reachableroots is not."""
1988 if a == nullrev:
1998 if a == nullrev:
1989 return True
1999 return True
1990 elif a == b:
2000 elif a == b:
1991 return True
2001 return True
1992 elif a > b:
2002 elif a > b:
1993 return False
2003 return False
1994 return bool(self.reachableroots(a, [b], [a], includepath=False))
2004 return bool(self.reachableroots(a, [b], [a], includepath=False))
1995
2005
1996 def reachableroots(self, minroot, heads, roots, includepath=False):
2006 def reachableroots(self, minroot, heads, roots, includepath=False):
1997 """return (heads(::(<roots> and <roots>::<heads>)))
2007 """return (heads(::(<roots> and <roots>::<heads>)))
1998
2008
1999 If includepath is True, return (<roots>::<heads>)."""
2009 If includepath is True, return (<roots>::<heads>)."""
2000 try:
2010 try:
2001 return self.index.reachableroots2(
2011 return self.index.reachableroots2(
2002 minroot, heads, roots, includepath
2012 minroot, heads, roots, includepath
2003 )
2013 )
2004 except AttributeError:
2014 except AttributeError:
2005 return dagop._reachablerootspure(
2015 return dagop._reachablerootspure(
2006 self.parentrevs, minroot, roots, heads, includepath
2016 self.parentrevs, minroot, roots, heads, includepath
2007 )
2017 )
2008
2018
2009 def ancestor(self, a, b):
2019 def ancestor(self, a, b):
2010 """calculate the "best" common ancestor of nodes a and b"""
2020 """calculate the "best" common ancestor of nodes a and b"""
2011
2021
2012 a, b = self.rev(a), self.rev(b)
2022 a, b = self.rev(a), self.rev(b)
2013 try:
2023 try:
2014 ancs = self.index.ancestors(a, b)
2024 ancs = self.index.ancestors(a, b)
2015 except (AttributeError, OverflowError):
2025 except (AttributeError, OverflowError):
2016 ancs = ancestor.ancestors(self.parentrevs, a, b)
2026 ancs = ancestor.ancestors(self.parentrevs, a, b)
2017 if ancs:
2027 if ancs:
2018 # choose a consistent winner when there's a tie
2028 # choose a consistent winner when there's a tie
2019 return min(map(self.node, ancs))
2029 return min(map(self.node, ancs))
2020 return self.nullid
2030 return self.nullid
2021
2031
2022 def _match(self, id):
2032 def _match(self, id):
2023 if isinstance(id, int):
2033 if isinstance(id, int):
2024 # rev
2034 # rev
2025 return self.node(id)
2035 return self.node(id)
2026 if len(id) == self.nodeconstants.nodelen:
2036 if len(id) == self.nodeconstants.nodelen:
2027 # possibly a binary node
2037 # possibly a binary node
2028 # odds of a binary node being all hex in ASCII are 1 in 10**25
2038 # odds of a binary node being all hex in ASCII are 1 in 10**25
2029 try:
2039 try:
2030 node = id
2040 node = id
2031 self.rev(node) # quick search the index
2041 self.rev(node) # quick search the index
2032 return node
2042 return node
2033 except error.LookupError:
2043 except error.LookupError:
2034 pass # may be partial hex id
2044 pass # may be partial hex id
2035 try:
2045 try:
2036 # str(rev)
2046 # str(rev)
2037 rev = int(id)
2047 rev = int(id)
2038 if b"%d" % rev != id:
2048 if b"%d" % rev != id:
2039 raise ValueError
2049 raise ValueError
2040 if rev < 0:
2050 if rev < 0:
2041 rev = len(self) + rev
2051 rev = len(self) + rev
2042 if rev < 0 or rev >= len(self):
2052 if rev < 0 or rev >= len(self):
2043 raise ValueError
2053 raise ValueError
2044 return self.node(rev)
2054 return self.node(rev)
2045 except (ValueError, OverflowError):
2055 except (ValueError, OverflowError):
2046 pass
2056 pass
2047 if len(id) == 2 * self.nodeconstants.nodelen:
2057 if len(id) == 2 * self.nodeconstants.nodelen:
2048 try:
2058 try:
2049 # a full hex nodeid?
2059 # a full hex nodeid?
2050 node = bin(id)
2060 node = bin(id)
2051 self.rev(node)
2061 self.rev(node)
2052 return node
2062 return node
2053 except (binascii.Error, error.LookupError):
2063 except (binascii.Error, error.LookupError):
2054 pass
2064 pass
2055
2065
2056 def _partialmatch(self, id):
2066 def _partialmatch(self, id):
2057 # we don't care wdirfilenodeids as they should be always full hash
2067 # we don't care wdirfilenodeids as they should be always full hash
2058 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2068 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2059 ambiguous = False
2069 ambiguous = False
2060 try:
2070 try:
2061 partial = self.index.partialmatch(id)
2071 partial = self.index.partialmatch(id)
2062 if partial and self.hasnode(partial):
2072 if partial and self.hasnode(partial):
2063 if maybewdir:
2073 if maybewdir:
2064 # single 'ff...' match in radix tree, ambiguous with wdir
2074 # single 'ff...' match in radix tree, ambiguous with wdir
2065 ambiguous = True
2075 ambiguous = True
2066 else:
2076 else:
2067 return partial
2077 return partial
2068 elif maybewdir:
2078 elif maybewdir:
2069 # no 'ff...' match in radix tree, wdir identified
2079 # no 'ff...' match in radix tree, wdir identified
2070 raise error.WdirUnsupported
2080 raise error.WdirUnsupported
2071 else:
2081 else:
2072 return None
2082 return None
2073 except error.RevlogError:
2083 except error.RevlogError:
2074 # parsers.c radix tree lookup gave multiple matches
2084 # parsers.c radix tree lookup gave multiple matches
2075 # fast path: for unfiltered changelog, radix tree is accurate
2085 # fast path: for unfiltered changelog, radix tree is accurate
2076 if not getattr(self, 'filteredrevs', None):
2086 if not getattr(self, 'filteredrevs', None):
2077 ambiguous = True
2087 ambiguous = True
2078 # fall through to slow path that filters hidden revisions
2088 # fall through to slow path that filters hidden revisions
2079 except (AttributeError, ValueError):
2089 except (AttributeError, ValueError):
2080 # we are pure python, or key is not hex
2090 # we are pure python, or key is not hex
2081 pass
2091 pass
2082 if ambiguous:
2092 if ambiguous:
2083 raise error.AmbiguousPrefixLookupError(
2093 raise error.AmbiguousPrefixLookupError(
2084 id, self.display_id, _(b'ambiguous identifier')
2094 id, self.display_id, _(b'ambiguous identifier')
2085 )
2095 )
2086
2096
2087 if id in self._pcache:
2097 if id in self._pcache:
2088 return self._pcache[id]
2098 return self._pcache[id]
2089
2099
2090 if len(id) <= 40:
2100 if len(id) <= 40:
2091 # hex(node)[:...]
2101 # hex(node)[:...]
2092 l = len(id) // 2 * 2 # grab an even number of digits
2102 l = len(id) // 2 * 2 # grab an even number of digits
2093 try:
2103 try:
2094 # we're dropping the last digit, so let's check that it's hex,
2104 # we're dropping the last digit, so let's check that it's hex,
2095 # to avoid the expensive computation below if it's not
2105 # to avoid the expensive computation below if it's not
2096 if len(id) % 2 > 0:
2106 if len(id) % 2 > 0:
2097 if not (id[-1] in hexdigits):
2107 if not (id[-1] in hexdigits):
2098 return None
2108 return None
2099 prefix = bin(id[:l])
2109 prefix = bin(id[:l])
2100 except binascii.Error:
2110 except binascii.Error:
2101 pass
2111 pass
2102 else:
2112 else:
2103 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2113 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2104 nl = [
2114 nl = [
2105 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2115 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2106 ]
2116 ]
2107 if self.nodeconstants.nullhex.startswith(id):
2117 if self.nodeconstants.nullhex.startswith(id):
2108 nl.append(self.nullid)
2118 nl.append(self.nullid)
2109 if len(nl) > 0:
2119 if len(nl) > 0:
2110 if len(nl) == 1 and not maybewdir:
2120 if len(nl) == 1 and not maybewdir:
2111 self._pcache[id] = nl[0]
2121 self._pcache[id] = nl[0]
2112 return nl[0]
2122 return nl[0]
2113 raise error.AmbiguousPrefixLookupError(
2123 raise error.AmbiguousPrefixLookupError(
2114 id, self.display_id, _(b'ambiguous identifier')
2124 id, self.display_id, _(b'ambiguous identifier')
2115 )
2125 )
2116 if maybewdir:
2126 if maybewdir:
2117 raise error.WdirUnsupported
2127 raise error.WdirUnsupported
2118 return None
2128 return None
2119
2129
2120 def lookup(self, id):
2130 def lookup(self, id):
2121 """locate a node based on:
2131 """locate a node based on:
2122 - revision number or str(revision number)
2132 - revision number or str(revision number)
2123 - nodeid or subset of hex nodeid
2133 - nodeid or subset of hex nodeid
2124 """
2134 """
2125 n = self._match(id)
2135 n = self._match(id)
2126 if n is not None:
2136 if n is not None:
2127 return n
2137 return n
2128 n = self._partialmatch(id)
2138 n = self._partialmatch(id)
2129 if n:
2139 if n:
2130 return n
2140 return n
2131
2141
2132 raise error.LookupError(id, self.display_id, _(b'no match found'))
2142 raise error.LookupError(id, self.display_id, _(b'no match found'))
2133
2143
2134 def shortest(self, node, minlength=1):
2144 def shortest(self, node, minlength=1):
2135 """Find the shortest unambiguous prefix that matches node."""
2145 """Find the shortest unambiguous prefix that matches node."""
2136
2146
2137 def isvalid(prefix):
2147 def isvalid(prefix):
2138 try:
2148 try:
2139 matchednode = self._partialmatch(prefix)
2149 matchednode = self._partialmatch(prefix)
2140 except error.AmbiguousPrefixLookupError:
2150 except error.AmbiguousPrefixLookupError:
2141 return False
2151 return False
2142 except error.WdirUnsupported:
2152 except error.WdirUnsupported:
2143 # single 'ff...' match
2153 # single 'ff...' match
2144 return True
2154 return True
2145 if matchednode is None:
2155 if matchednode is None:
2146 raise error.LookupError(node, self.display_id, _(b'no node'))
2156 raise error.LookupError(node, self.display_id, _(b'no node'))
2147 return True
2157 return True
2148
2158
2149 def maybewdir(prefix):
2159 def maybewdir(prefix):
2150 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2160 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2151
2161
2152 hexnode = hex(node)
2162 hexnode = hex(node)
2153
2163
2154 def disambiguate(hexnode, minlength):
2164 def disambiguate(hexnode, minlength):
2155 """Disambiguate against wdirid."""
2165 """Disambiguate against wdirid."""
2156 for length in range(minlength, len(hexnode) + 1):
2166 for length in range(minlength, len(hexnode) + 1):
2157 prefix = hexnode[:length]
2167 prefix = hexnode[:length]
2158 if not maybewdir(prefix):
2168 if not maybewdir(prefix):
2159 return prefix
2169 return prefix
2160
2170
2161 if not getattr(self, 'filteredrevs', None):
2171 if not getattr(self, 'filteredrevs', None):
2162 try:
2172 try:
2163 length = max(self.index.shortest(node), minlength)
2173 length = max(self.index.shortest(node), minlength)
2164 return disambiguate(hexnode, length)
2174 return disambiguate(hexnode, length)
2165 except error.RevlogError:
2175 except error.RevlogError:
2166 if node != self.nodeconstants.wdirid:
2176 if node != self.nodeconstants.wdirid:
2167 raise error.LookupError(
2177 raise error.LookupError(
2168 node, self.display_id, _(b'no node')
2178 node, self.display_id, _(b'no node')
2169 )
2179 )
2170 except AttributeError:
2180 except AttributeError:
2171 # Fall through to pure code
2181 # Fall through to pure code
2172 pass
2182 pass
2173
2183
2174 if node == self.nodeconstants.wdirid:
2184 if node == self.nodeconstants.wdirid:
2175 for length in range(minlength, len(hexnode) + 1):
2185 for length in range(minlength, len(hexnode) + 1):
2176 prefix = hexnode[:length]
2186 prefix = hexnode[:length]
2177 if isvalid(prefix):
2187 if isvalid(prefix):
2178 return prefix
2188 return prefix
2179
2189
2180 for length in range(minlength, len(hexnode) + 1):
2190 for length in range(minlength, len(hexnode) + 1):
2181 prefix = hexnode[:length]
2191 prefix = hexnode[:length]
2182 if isvalid(prefix):
2192 if isvalid(prefix):
2183 return disambiguate(hexnode, length)
2193 return disambiguate(hexnode, length)
2184
2194
2185 def cmp(self, node, text):
2195 def cmp(self, node, text):
2186 """compare text with a given file revision
2196 """compare text with a given file revision
2187
2197
2188 returns True if text is different than what is stored.
2198 returns True if text is different than what is stored.
2189 """
2199 """
2190 p1, p2 = self.parents(node)
2200 p1, p2 = self.parents(node)
2191 return storageutil.hashrevisionsha1(text, p1, p2) != node
2201 return storageutil.hashrevisionsha1(text, p1, p2) != node
2192
2202
2193 def _chunk(self, rev):
2203 def _chunk(self, rev):
2194 """Obtain a single decompressed chunk for a revision.
2204 """Obtain a single decompressed chunk for a revision.
2195
2205
2196 Accepts an integer revision and an optional already-open file handle
2206 Accepts an integer revision and an optional already-open file handle
2197 to be used for reading. If used, the seek position of the file will not
2207 to be used for reading. If used, the seek position of the file will not
2198 be preserved.
2208 be preserved.
2199
2209
2200 Returns a str holding uncompressed data for the requested revision.
2210 Returns a str holding uncompressed data for the requested revision.
2201 """
2211 """
2202 compression_mode = self.index[rev][10]
2212 compression_mode = self.index[rev][10]
2203 data = self._inner.get_segment_for_revs(rev, rev)[1]
2213 data = self._inner.get_segment_for_revs(rev, rev)[1]
2204 if compression_mode == COMP_MODE_PLAIN:
2214 if compression_mode == COMP_MODE_PLAIN:
2205 return data
2215 return data
2206 elif compression_mode == COMP_MODE_DEFAULT:
2216 elif compression_mode == COMP_MODE_DEFAULT:
2207 return self._decompressor(data)
2217 return self._decompressor(data)
2208 elif compression_mode == COMP_MODE_INLINE:
2218 elif compression_mode == COMP_MODE_INLINE:
2209 return self.decompress(data)
2219 return self.decompress(data)
2210 else:
2220 else:
2211 msg = b'unknown compression mode %d'
2221 msg = b'unknown compression mode %d'
2212 msg %= compression_mode
2222 msg %= compression_mode
2213 raise error.RevlogError(msg)
2223 raise error.RevlogError(msg)
2214
2224
2215 def _chunks(self, revs, targetsize=None):
2225 def _chunks(self, revs, targetsize=None):
2216 """Obtain decompressed chunks for the specified revisions.
2226 """Obtain decompressed chunks for the specified revisions.
2217
2227
2218 Accepts an iterable of numeric revisions that are assumed to be in
2228 Accepts an iterable of numeric revisions that are assumed to be in
2219 ascending order. Also accepts an optional already-open file handle
2229 ascending order. Also accepts an optional already-open file handle
2220 to be used for reading. If used, the seek position of the file will
2230 to be used for reading. If used, the seek position of the file will
2221 not be preserved.
2231 not be preserved.
2222
2232
2223 This function is similar to calling ``self._chunk()`` multiple times,
2233 This function is similar to calling ``self._chunk()`` multiple times,
2224 but is faster.
2234 but is faster.
2225
2235
2226 Returns a list with decompressed data for each requested revision.
2236 Returns a list with decompressed data for each requested revision.
2227 """
2237 """
2228 if not revs:
2238 if not revs:
2229 return []
2239 return []
2230 start = self.start
2240 start = self.start
2231 length = self.length
2241 length = self.length
2232 inline = self._inline
2242 inline = self._inline
2233 iosize = self.index.entry_size
2243 iosize = self.index.entry_size
2234 buffer = util.buffer
2244 buffer = util.buffer
2235
2245
2236 l = []
2246 l = []
2237 ladd = l.append
2247 ladd = l.append
2238
2248
2239 if not self.data_config.with_sparse_read:
2249 if not self.data_config.with_sparse_read:
2240 slicedchunks = (revs,)
2250 slicedchunks = (revs,)
2241 else:
2251 else:
2242 slicedchunks = deltautil.slicechunk(
2252 slicedchunks = deltautil.slicechunk(
2243 self, revs, targetsize=targetsize
2253 self, revs, targetsize=targetsize
2244 )
2254 )
2245
2255
2246 for revschunk in slicedchunks:
2256 for revschunk in slicedchunks:
2247 firstrev = revschunk[0]
2257 firstrev = revschunk[0]
2248 # Skip trailing revisions with empty diff
2258 # Skip trailing revisions with empty diff
2249 for lastrev in revschunk[::-1]:
2259 for lastrev in revschunk[::-1]:
2250 if length(lastrev) != 0:
2260 if length(lastrev) != 0:
2251 break
2261 break
2252
2262
2253 try:
2263 try:
2254 offset, data = self._inner.get_segment_for_revs(
2264 offset, data = self._inner.get_segment_for_revs(
2255 firstrev,
2265 firstrev,
2256 lastrev,
2266 lastrev,
2257 )
2267 )
2258 except OverflowError:
2268 except OverflowError:
2259 # issue4215 - we can't cache a run of chunks greater than
2269 # issue4215 - we can't cache a run of chunks greater than
2260 # 2G on Windows
2270 # 2G on Windows
2261 return [self._chunk(rev) for rev in revschunk]
2271 return [self._chunk(rev) for rev in revschunk]
2262
2272
2263 decomp = self.decompress
2273 decomp = self.decompress
2264 # self._decompressor might be None, but will not be used in that case
2274 # self._decompressor might be None, but will not be used in that case
2265 def_decomp = self._decompressor
2275 def_decomp = self._decompressor
2266 for rev in revschunk:
2276 for rev in revschunk:
2267 chunkstart = start(rev)
2277 chunkstart = start(rev)
2268 if inline:
2278 if inline:
2269 chunkstart += (rev + 1) * iosize
2279 chunkstart += (rev + 1) * iosize
2270 chunklength = length(rev)
2280 chunklength = length(rev)
2271 comp_mode = self.index[rev][10]
2281 comp_mode = self.index[rev][10]
2272 c = buffer(data, chunkstart - offset, chunklength)
2282 c = buffer(data, chunkstart - offset, chunklength)
2273 if comp_mode == COMP_MODE_PLAIN:
2283 if comp_mode == COMP_MODE_PLAIN:
2274 ladd(c)
2284 ladd(c)
2275 elif comp_mode == COMP_MODE_INLINE:
2285 elif comp_mode == COMP_MODE_INLINE:
2276 ladd(decomp(c))
2286 ladd(decomp(c))
2277 elif comp_mode == COMP_MODE_DEFAULT:
2287 elif comp_mode == COMP_MODE_DEFAULT:
2278 ladd(def_decomp(c))
2288 ladd(def_decomp(c))
2279 else:
2289 else:
2280 msg = b'unknown compression mode %d'
2290 msg = b'unknown compression mode %d'
2281 msg %= comp_mode
2291 msg %= comp_mode
2282 raise error.RevlogError(msg)
2292 raise error.RevlogError(msg)
2283
2293
2284 return l
2294 return l
2285
2295
2286 def deltaparent(self, rev):
2296 def deltaparent(self, rev):
2287 """return deltaparent of the given revision"""
2297 """return deltaparent of the given revision"""
2288 base = self.index[rev][3]
2298 base = self.index[rev][3]
2289 if base == rev:
2299 if base == rev:
2290 return nullrev
2300 return nullrev
2291 elif self.delta_config.general_delta:
2301 elif self.delta_config.general_delta:
2292 return base
2302 return base
2293 else:
2303 else:
2294 return rev - 1
2304 return rev - 1
2295
2305
2296 def issnapshot(self, rev):
2306 def issnapshot(self, rev):
2297 """tells whether rev is a snapshot"""
2307 """tells whether rev is a snapshot"""
2298 if not self.delta_config.sparse_revlog:
2308 if not self.delta_config.sparse_revlog:
2299 return self.deltaparent(rev) == nullrev
2309 return self.deltaparent(rev) == nullrev
2300 elif hasattr(self.index, 'issnapshot'):
2310 elif hasattr(self.index, 'issnapshot'):
2301 # directly assign the method to cache the testing and access
2311 # directly assign the method to cache the testing and access
2302 self.issnapshot = self.index.issnapshot
2312 self.issnapshot = self.index.issnapshot
2303 return self.issnapshot(rev)
2313 return self.issnapshot(rev)
2304 if rev == nullrev:
2314 if rev == nullrev:
2305 return True
2315 return True
2306 entry = self.index[rev]
2316 entry = self.index[rev]
2307 base = entry[3]
2317 base = entry[3]
2308 if base == rev:
2318 if base == rev:
2309 return True
2319 return True
2310 if base == nullrev:
2320 if base == nullrev:
2311 return True
2321 return True
2312 p1 = entry[5]
2322 p1 = entry[5]
2313 while self.length(p1) == 0:
2323 while self.length(p1) == 0:
2314 b = self.deltaparent(p1)
2324 b = self.deltaparent(p1)
2315 if b == p1:
2325 if b == p1:
2316 break
2326 break
2317 p1 = b
2327 p1 = b
2318 p2 = entry[6]
2328 p2 = entry[6]
2319 while self.length(p2) == 0:
2329 while self.length(p2) == 0:
2320 b = self.deltaparent(p2)
2330 b = self.deltaparent(p2)
2321 if b == p2:
2331 if b == p2:
2322 break
2332 break
2323 p2 = b
2333 p2 = b
2324 if base == p1 or base == p2:
2334 if base == p1 or base == p2:
2325 return False
2335 return False
2326 return self.issnapshot(base)
2336 return self.issnapshot(base)
2327
2337
2328 def snapshotdepth(self, rev):
2338 def snapshotdepth(self, rev):
2329 """number of snapshot in the chain before this one"""
2339 """number of snapshot in the chain before this one"""
2330 if not self.issnapshot(rev):
2340 if not self.issnapshot(rev):
2331 raise error.ProgrammingError(b'revision %d not a snapshot')
2341 raise error.ProgrammingError(b'revision %d not a snapshot')
2332 return len(self._deltachain(rev)[0]) - 1
2342 return len(self._deltachain(rev)[0]) - 1
2333
2343
2334 def revdiff(self, rev1, rev2):
2344 def revdiff(self, rev1, rev2):
2335 """return or calculate a delta between two revisions
2345 """return or calculate a delta between two revisions
2336
2346
2337 The delta calculated is in binary form and is intended to be written to
2347 The delta calculated is in binary form and is intended to be written to
2338 revlog data directly. So this function needs raw revision data.
2348 revlog data directly. So this function needs raw revision data.
2339 """
2349 """
2340 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2350 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2341 return bytes(self._chunk(rev2))
2351 return bytes(self._chunk(rev2))
2342
2352
2343 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2353 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2344
2354
2345 def revision(self, nodeorrev):
2355 def revision(self, nodeorrev):
2346 """return an uncompressed revision of a given node or revision
2356 """return an uncompressed revision of a given node or revision
2347 number.
2357 number.
2348 """
2358 """
2349 return self._revisiondata(nodeorrev)
2359 return self._revisiondata(nodeorrev)
2350
2360
2351 def sidedata(self, nodeorrev):
2361 def sidedata(self, nodeorrev):
2352 """a map of extra data related to the changeset but not part of the hash
2362 """a map of extra data related to the changeset but not part of the hash
2353
2363
2354 This function currently return a dictionary. However, more advanced
2364 This function currently return a dictionary. However, more advanced
2355 mapping object will likely be used in the future for a more
2365 mapping object will likely be used in the future for a more
2356 efficient/lazy code.
2366 efficient/lazy code.
2357 """
2367 """
2358 # deal with <nodeorrev> argument type
2368 # deal with <nodeorrev> argument type
2359 if isinstance(nodeorrev, int):
2369 if isinstance(nodeorrev, int):
2360 rev = nodeorrev
2370 rev = nodeorrev
2361 else:
2371 else:
2362 rev = self.rev(nodeorrev)
2372 rev = self.rev(nodeorrev)
2363 return self._sidedata(rev)
2373 return self._sidedata(rev)
2364
2374
2365 def _revisiondata(self, nodeorrev, raw=False):
2375 def _revisiondata(self, nodeorrev, raw=False):
2366 # deal with <nodeorrev> argument type
2376 # deal with <nodeorrev> argument type
2367 if isinstance(nodeorrev, int):
2377 if isinstance(nodeorrev, int):
2368 rev = nodeorrev
2378 rev = nodeorrev
2369 node = self.node(rev)
2379 node = self.node(rev)
2370 else:
2380 else:
2371 node = nodeorrev
2381 node = nodeorrev
2372 rev = None
2382 rev = None
2373
2383
2374 # fast path the special `nullid` rev
2384 # fast path the special `nullid` rev
2375 if node == self.nullid:
2385 if node == self.nullid:
2376 return b""
2386 return b""
2377
2387
2378 # ``rawtext`` is the text as stored inside the revlog. Might be the
2388 # ``rawtext`` is the text as stored inside the revlog. Might be the
2379 # revision or might need to be processed to retrieve the revision.
2389 # revision or might need to be processed to retrieve the revision.
2380 rev, rawtext, validated = self._rawtext(node, rev)
2390 rev, rawtext, validated = self._rawtext(node, rev)
2381
2391
2382 if raw and validated:
2392 if raw and validated:
2383 # if we don't want to process the raw text and that raw
2393 # if we don't want to process the raw text and that raw
2384 # text is cached, we can exit early.
2394 # text is cached, we can exit early.
2385 return rawtext
2395 return rawtext
2386 if rev is None:
2396 if rev is None:
2387 rev = self.rev(node)
2397 rev = self.rev(node)
2388 # the revlog's flag for this revision
2398 # the revlog's flag for this revision
2389 # (usually alter its state or content)
2399 # (usually alter its state or content)
2390 flags = self.flags(rev)
2400 flags = self.flags(rev)
2391
2401
2392 if validated and flags == REVIDX_DEFAULT_FLAGS:
2402 if validated and flags == REVIDX_DEFAULT_FLAGS:
2393 # no extra flags set, no flag processor runs, text = rawtext
2403 # no extra flags set, no flag processor runs, text = rawtext
2394 return rawtext
2404 return rawtext
2395
2405
2396 if raw:
2406 if raw:
2397 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2407 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2398 text = rawtext
2408 text = rawtext
2399 else:
2409 else:
2400 r = flagutil.processflagsread(self, rawtext, flags)
2410 r = flagutil.processflagsread(self, rawtext, flags)
2401 text, validatehash = r
2411 text, validatehash = r
2402 if validatehash:
2412 if validatehash:
2403 self.checkhash(text, node, rev=rev)
2413 self.checkhash(text, node, rev=rev)
2404 if not validated:
2414 if not validated:
2405 self._revisioncache = (node, rev, rawtext)
2415 self._revisioncache = (node, rev, rawtext)
2406
2416
2407 return text
2417 return text
2408
2418
2409 def _rawtext(self, node, rev):
2419 def _rawtext(self, node, rev):
2410 """return the possibly unvalidated rawtext for a revision
2420 """return the possibly unvalidated rawtext for a revision
2411
2421
2412 returns (rev, rawtext, validated)
2422 returns (rev, rawtext, validated)
2413 """
2423 """
2414
2424
2415 # revision in the cache (could be useful to apply delta)
2425 # revision in the cache (could be useful to apply delta)
2416 cachedrev = None
2426 cachedrev = None
2417 # An intermediate text to apply deltas to
2427 # An intermediate text to apply deltas to
2418 basetext = None
2428 basetext = None
2419
2429
2420 # Check if we have the entry in cache
2430 # Check if we have the entry in cache
2421 # The cache entry looks like (node, rev, rawtext)
2431 # The cache entry looks like (node, rev, rawtext)
2422 if self._revisioncache:
2432 if self._revisioncache:
2423 if self._revisioncache[0] == node:
2433 if self._revisioncache[0] == node:
2424 return (rev, self._revisioncache[2], True)
2434 return (rev, self._revisioncache[2], True)
2425 cachedrev = self._revisioncache[1]
2435 cachedrev = self._revisioncache[1]
2426
2436
2427 if rev is None:
2437 if rev is None:
2428 rev = self.rev(node)
2438 rev = self.rev(node)
2429
2439
2430 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2440 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2431 if stopped:
2441 if stopped:
2432 basetext = self._revisioncache[2]
2442 basetext = self._revisioncache[2]
2433
2443
2434 # drop cache to save memory, the caller is expected to
2444 # drop cache to save memory, the caller is expected to
2435 # update self._revisioncache after validating the text
2445 # update self._revisioncache after validating the text
2436 self._revisioncache = None
2446 self._revisioncache = None
2437
2447
2438 targetsize = None
2448 targetsize = None
2439 rawsize = self.index[rev][2]
2449 rawsize = self.index[rev][2]
2440 if 0 <= rawsize:
2450 if 0 <= rawsize:
2441 targetsize = 4 * rawsize
2451 targetsize = 4 * rawsize
2442
2452
2443 bins = self._chunks(chain, targetsize=targetsize)
2453 bins = self._chunks(chain, targetsize=targetsize)
2444 if basetext is None:
2454 if basetext is None:
2445 basetext = bytes(bins[0])
2455 basetext = bytes(bins[0])
2446 bins = bins[1:]
2456 bins = bins[1:]
2447
2457
2448 rawtext = mdiff.patches(basetext, bins)
2458 rawtext = mdiff.patches(basetext, bins)
2449 del basetext # let us have a chance to free memory early
2459 del basetext # let us have a chance to free memory early
2450 return (rev, rawtext, False)
2460 return (rev, rawtext, False)
2451
2461
2452 def _sidedata(self, rev):
2462 def _sidedata(self, rev):
2453 """Return the sidedata for a given revision number."""
2463 """Return the sidedata for a given revision number."""
2454 index_entry = self.index[rev]
2464 index_entry = self.index[rev]
2455 sidedata_offset = index_entry[8]
2465 sidedata_offset = index_entry[8]
2456 sidedata_size = index_entry[9]
2466 sidedata_size = index_entry[9]
2457
2467
2458 if self._inline:
2468 if self._inline:
2459 sidedata_offset += self.index.entry_size * (1 + rev)
2469 sidedata_offset += self.index.entry_size * (1 + rev)
2460 if sidedata_size == 0:
2470 if sidedata_size == 0:
2461 return {}
2471 return {}
2462
2472
2463 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2473 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2464 filename = self._sidedatafile
2474 filename = self._sidedatafile
2465 end = self._docket.sidedata_end
2475 end = self._docket.sidedata_end
2466 offset = sidedata_offset
2476 offset = sidedata_offset
2467 length = sidedata_size
2477 length = sidedata_size
2468 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2478 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2469 raise error.RevlogError(m)
2479 raise error.RevlogError(m)
2470
2480
2471 comp_segment = self._inner._segmentfile_sidedata.read_chunk(
2481 comp_segment = self._inner._segmentfile_sidedata.read_chunk(
2472 sidedata_offset, sidedata_size
2482 sidedata_offset, sidedata_size
2473 )
2483 )
2474
2484
2475 comp = self.index[rev][11]
2485 comp = self.index[rev][11]
2476 if comp == COMP_MODE_PLAIN:
2486 if comp == COMP_MODE_PLAIN:
2477 segment = comp_segment
2487 segment = comp_segment
2478 elif comp == COMP_MODE_DEFAULT:
2488 elif comp == COMP_MODE_DEFAULT:
2479 segment = self._decompressor(comp_segment)
2489 segment = self._decompressor(comp_segment)
2480 elif comp == COMP_MODE_INLINE:
2490 elif comp == COMP_MODE_INLINE:
2481 segment = self.decompress(comp_segment)
2491 segment = self.decompress(comp_segment)
2482 else:
2492 else:
2483 msg = b'unknown compression mode %d'
2493 msg = b'unknown compression mode %d'
2484 msg %= comp
2494 msg %= comp
2485 raise error.RevlogError(msg)
2495 raise error.RevlogError(msg)
2486
2496
2487 sidedata = sidedatautil.deserialize_sidedata(segment)
2497 sidedata = sidedatautil.deserialize_sidedata(segment)
2488 return sidedata
2498 return sidedata
2489
2499
2490 def rawdata(self, nodeorrev):
2500 def rawdata(self, nodeorrev):
2491 """return an uncompressed raw data of a given node or revision number."""
2501 """return an uncompressed raw data of a given node or revision number."""
2492 return self._revisiondata(nodeorrev, raw=True)
2502 return self._revisiondata(nodeorrev, raw=True)
2493
2503
2494 def hash(self, text, p1, p2):
2504 def hash(self, text, p1, p2):
2495 """Compute a node hash.
2505 """Compute a node hash.
2496
2506
2497 Available as a function so that subclasses can replace the hash
2507 Available as a function so that subclasses can replace the hash
2498 as needed.
2508 as needed.
2499 """
2509 """
2500 return storageutil.hashrevisionsha1(text, p1, p2)
2510 return storageutil.hashrevisionsha1(text, p1, p2)
2501
2511
2502 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2512 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2503 """Check node hash integrity.
2513 """Check node hash integrity.
2504
2514
2505 Available as a function so that subclasses can extend hash mismatch
2515 Available as a function so that subclasses can extend hash mismatch
2506 behaviors as needed.
2516 behaviors as needed.
2507 """
2517 """
2508 try:
2518 try:
2509 if p1 is None and p2 is None:
2519 if p1 is None and p2 is None:
2510 p1, p2 = self.parents(node)
2520 p1, p2 = self.parents(node)
2511 if node != self.hash(text, p1, p2):
2521 if node != self.hash(text, p1, p2):
2512 # Clear the revision cache on hash failure. The revision cache
2522 # Clear the revision cache on hash failure. The revision cache
2513 # only stores the raw revision and clearing the cache does have
2523 # only stores the raw revision and clearing the cache does have
2514 # the side-effect that we won't have a cache hit when the raw
2524 # the side-effect that we won't have a cache hit when the raw
2515 # revision data is accessed. But this case should be rare and
2525 # revision data is accessed. But this case should be rare and
2516 # it is extra work to teach the cache about the hash
2526 # it is extra work to teach the cache about the hash
2517 # verification state.
2527 # verification state.
2518 if self._revisioncache and self._revisioncache[0] == node:
2528 if self._revisioncache and self._revisioncache[0] == node:
2519 self._revisioncache = None
2529 self._revisioncache = None
2520
2530
2521 revornode = rev
2531 revornode = rev
2522 if revornode is None:
2532 if revornode is None:
2523 revornode = templatefilters.short(hex(node))
2533 revornode = templatefilters.short(hex(node))
2524 raise error.RevlogError(
2534 raise error.RevlogError(
2525 _(b"integrity check failed on %s:%s")
2535 _(b"integrity check failed on %s:%s")
2526 % (self.display_id, pycompat.bytestr(revornode))
2536 % (self.display_id, pycompat.bytestr(revornode))
2527 )
2537 )
2528 except error.RevlogError:
2538 except error.RevlogError:
2529 if self.feature_config.censorable and storageutil.iscensoredtext(
2539 if self.feature_config.censorable and storageutil.iscensoredtext(
2530 text
2540 text
2531 ):
2541 ):
2532 raise error.CensoredNodeError(self.display_id, node, text)
2542 raise error.CensoredNodeError(self.display_id, node, text)
2533 raise
2543 raise
2534
2544
2535 @property
2545 @property
2536 def _split_index_file(self):
2546 def _split_index_file(self):
2537 """the path where to expect the index of an ongoing splitting operation
2547 """the path where to expect the index of an ongoing splitting operation
2538
2548
2539 The file will only exist if a splitting operation is in progress, but
2549 The file will only exist if a splitting operation is in progress, but
2540 it is always expected at the same location."""
2550 it is always expected at the same location."""
2541 parts = self.radix.split(b'/')
2551 parts = self.radix.split(b'/')
2542 if len(parts) > 1:
2552 if len(parts) > 1:
2543 # adds a '-s' prefix to the ``data/` or `meta/` base
2553 # adds a '-s' prefix to the ``data/` or `meta/` base
2544 head = parts[0] + b'-s'
2554 head = parts[0] + b'-s'
2545 mids = parts[1:-1]
2555 mids = parts[1:-1]
2546 tail = parts[-1] + b'.i'
2556 tail = parts[-1] + b'.i'
2547 pieces = [head] + mids + [tail]
2557 pieces = [head] + mids + [tail]
2548 return b'/'.join(pieces)
2558 return b'/'.join(pieces)
2549 else:
2559 else:
2550 # the revlog is stored at the root of the store (changelog or
2560 # the revlog is stored at the root of the store (changelog or
2551 # manifest), no risk of collision.
2561 # manifest), no risk of collision.
2552 return self.radix + b'.i.s'
2562 return self.radix + b'.i.s'
2553
2563
2554 def _enforceinlinesize(self, tr, side_write=True):
2564 def _enforceinlinesize(self, tr, side_write=True):
2555 """Check if the revlog is too big for inline and convert if so.
2565 """Check if the revlog is too big for inline and convert if so.
2556
2566
2557 This should be called after revisions are added to the revlog. If the
2567 This should be called after revisions are added to the revlog. If the
2558 revlog has grown too large to be an inline revlog, it will convert it
2568 revlog has grown too large to be an inline revlog, it will convert it
2559 to use multiple index and data files.
2569 to use multiple index and data files.
2560 """
2570 """
2561 tiprev = len(self) - 1
2571 tiprev = len(self) - 1
2562 total_size = self.start(tiprev) + self.length(tiprev)
2572 total_size = self.start(tiprev) + self.length(tiprev)
2563 if not self._inline or total_size < _maxinline:
2573 if not self._inline or total_size < _maxinline:
2564 return
2574 return
2565
2575
2566 if self._docket is not None:
2576 if self._docket is not None:
2567 msg = b"inline revlog should not have a docket"
2577 msg = b"inline revlog should not have a docket"
2568 raise error.ProgrammingError(msg)
2578 raise error.ProgrammingError(msg)
2569
2579
2570 troffset = tr.findoffset(self._indexfile)
2580 troffset = tr.findoffset(self._indexfile)
2571 if troffset is None:
2581 if troffset is None:
2572 raise error.RevlogError(
2582 raise error.RevlogError(
2573 _(b"%s not found in the transaction") % self._indexfile
2583 _(b"%s not found in the transaction") % self._indexfile
2574 )
2584 )
2575 if troffset:
2585 if troffset:
2576 tr.addbackup(self._indexfile, for_offset=True)
2586 tr.addbackup(self._indexfile, for_offset=True)
2577 tr.add(self._datafile, 0)
2587 tr.add(self._datafile, 0)
2578
2588
2579 existing_handles = False
2589 existing_handles = False
2580 if self._inner._writinghandles is not None:
2590 if self._inner._writinghandles is not None:
2581 existing_handles = True
2591 existing_handles = True
2582 fp = self._inner._writinghandles[0]
2592 fp = self._inner._writinghandles[0]
2583 fp.flush()
2593 fp.flush()
2584 fp.close()
2594 fp.close()
2585 # We can't use the cached file handle after close(). So prevent
2595 # We can't use the cached file handle after close(). So prevent
2586 # its usage.
2596 # its usage.
2587 self._inner._writinghandles = None
2597 self._inner._writinghandles = None
2588 self._inner._segmentfile.writing_handle = None
2598 self._inner._segmentfile.writing_handle = None
2589 # No need to deal with sidedata writing handle as it is only
2599 # No need to deal with sidedata writing handle as it is only
2590 # relevant with revlog-v2 which is never inline, not reaching
2600 # relevant with revlog-v2 which is never inline, not reaching
2591 # this code
2601 # this code
2592 if side_write:
2602 if side_write:
2593 old_index_file_path = self._indexfile
2603 old_index_file_path = self._indexfile
2594 new_index_file_path = self._split_index_file
2604 new_index_file_path = self._split_index_file
2595 opener = self.opener
2605 opener = self.opener
2596 weak_self = weakref.ref(self)
2606 weak_self = weakref.ref(self)
2597
2607
2598 # the "split" index replace the real index when the transaction is finalized
2608 # the "split" index replace the real index when the transaction is finalized
2599 def finalize_callback(tr):
2609 def finalize_callback(tr):
2600 opener.rename(
2610 opener.rename(
2601 new_index_file_path,
2611 new_index_file_path,
2602 old_index_file_path,
2612 old_index_file_path,
2603 checkambig=True,
2613 checkambig=True,
2604 )
2614 )
2605 maybe_self = weak_self()
2615 maybe_self = weak_self()
2606 if maybe_self is not None:
2616 if maybe_self is not None:
2607 maybe_self._indexfile = old_index_file_path
2617 maybe_self._indexfile = old_index_file_path
2608 maybe_self._inner.index_file = maybe_self._indexfile
2618 maybe_self._inner.index_file = maybe_self._indexfile
2609
2619
2610 def abort_callback(tr):
2620 def abort_callback(tr):
2611 maybe_self = weak_self()
2621 maybe_self = weak_self()
2612 if maybe_self is not None:
2622 if maybe_self is not None:
2613 maybe_self._indexfile = old_index_file_path
2623 maybe_self._indexfile = old_index_file_path
2614 maybe_self._inner.index_file = old_index_file_path
2624 maybe_self._inner.index_file = old_index_file_path
2615
2625
2616 tr.registertmp(new_index_file_path)
2626 tr.registertmp(new_index_file_path)
2617 if self.target[1] is not None:
2627 if self.target[1] is not None:
2618 callback_id = b'000-revlog-split-%d-%s' % self.target
2628 callback_id = b'000-revlog-split-%d-%s' % self.target
2619 else:
2629 else:
2620 callback_id = b'000-revlog-split-%d' % self.target[0]
2630 callback_id = b'000-revlog-split-%d' % self.target[0]
2621 tr.addfinalize(callback_id, finalize_callback)
2631 tr.addfinalize(callback_id, finalize_callback)
2622 tr.addabort(callback_id, abort_callback)
2632 tr.addabort(callback_id, abort_callback)
2623
2633
2624 new_dfh = self._datafp(b'w+')
2634 new_dfh = self._datafp(b'w+')
2625 new_dfh.truncate(0) # drop any potentially existing data
2635 new_dfh.truncate(0) # drop any potentially existing data
2626 try:
2636 try:
2627 with self.reading():
2637 with self.reading():
2628 for r in self:
2638 for r in self:
2629 new_dfh.write(self._inner.get_segment_for_revs(r, r)[1])
2639 new_dfh.write(self._inner.get_segment_for_revs(r, r)[1])
2630 new_dfh.flush()
2640 new_dfh.flush()
2631
2641
2632 if side_write:
2642 if side_write:
2633 self._indexfile = new_index_file_path
2643 self._indexfile = new_index_file_path
2634 self._inner.index_file = self._indexfile
2644 self._inner.index_file = self._indexfile
2635 with self._inner._InnerRevlog__index_new_fp() as fp:
2645 with self._inner._InnerRevlog__index_new_fp() as fp:
2636 self._format_flags &= ~FLAG_INLINE_DATA
2646 self._format_flags &= ~FLAG_INLINE_DATA
2637 self._inline = False
2647 self._inline = False
2638 self._inner.inline = False
2648 self._inner.inline = False
2639 for i in self:
2649 for i in self:
2640 e = self.index.entry_binary(i)
2650 e = self.index.entry_binary(i)
2641 if i == 0:
2651 if i == 0:
2642 header = self._format_flags | self._format_version
2652 header = self._format_flags | self._format_version
2643 header = self.index.pack_header(header)
2653 header = self.index.pack_header(header)
2644 e = header + e
2654 e = header + e
2645 fp.write(e)
2655 fp.write(e)
2646
2656
2647 # If we don't use side-write, the temp file replace the real
2657 # If we don't use side-write, the temp file replace the real
2648 # index when we exit the context manager
2658 # index when we exit the context manager
2649
2659
2650 nodemaputil.setup_persistent_nodemap(tr, self)
2660 nodemaputil.setup_persistent_nodemap(tr, self)
2651 self._inner._segmentfile = randomaccessfile.randomaccessfile(
2661 self._inner._segmentfile = randomaccessfile.randomaccessfile(
2652 self.opener,
2662 self.opener,
2653 self._datafile,
2663 self._datafile,
2654 self.data_config.chunk_cache_size,
2664 self.data_config.chunk_cache_size,
2655 )
2665 )
2656
2666
2657 if existing_handles:
2667 if existing_handles:
2658 # switched from inline to conventional reopen the index
2668 # switched from inline to conventional reopen the index
2659 index_end = None
2669 index_end = None
2660 ifh = self._inner._InnerRevlog__index_write_fp(
2670 ifh = self._inner._InnerRevlog__index_write_fp(
2661 index_end=index_end
2671 index_end=index_end
2662 )
2672 )
2663 self._inner._writinghandles = (ifh, new_dfh, None)
2673 self._inner._writinghandles = (ifh, new_dfh, None)
2664 self._inner._segmentfile.writing_handle = new_dfh
2674 self._inner._segmentfile.writing_handle = new_dfh
2665 new_dfh = None
2675 new_dfh = None
2666 # No need to deal with sidedata writing handle as it is only
2676 # No need to deal with sidedata writing handle as it is only
2667 # relevant with revlog-v2 which is never inline, not reaching
2677 # relevant with revlog-v2 which is never inline, not reaching
2668 # this code
2678 # this code
2669 finally:
2679 finally:
2670 if new_dfh is not None:
2680 if new_dfh is not None:
2671 new_dfh.close()
2681 new_dfh.close()
2672
2682
2673 def _nodeduplicatecallback(self, transaction, node):
2683 def _nodeduplicatecallback(self, transaction, node):
2674 """called when trying to add a node already stored."""
2684 """called when trying to add a node already stored."""
2675
2685
2676 @contextlib.contextmanager
2686 @contextlib.contextmanager
2677 def reading(self):
2687 def reading(self):
2678 with self._inner.reading():
2688 with self._inner.reading():
2679 yield
2689 yield
2680
2690
2681 @contextlib.contextmanager
2691 @contextlib.contextmanager
2682 def _writing(self, transaction):
2692 def _writing(self, transaction):
2683 if self._trypending:
2693 if self._trypending:
2684 msg = b'try to write in a `trypending` revlog: %s'
2694 msg = b'try to write in a `trypending` revlog: %s'
2685 msg %= self.display_id
2695 msg %= self.display_id
2686 raise error.ProgrammingError(msg)
2696 raise error.ProgrammingError(msg)
2687 if self._inner.is_writing:
2697 if self._inner.is_writing:
2688 yield
2698 yield
2689 else:
2699 else:
2690 data_end = None
2700 data_end = None
2691 sidedata_end = None
2701 sidedata_end = None
2692 if self._docket is not None:
2702 if self._docket is not None:
2693 data_end = self._docket.data_end
2703 data_end = self._docket.data_end
2694 sidedata_end = self._docket.sidedata_end
2704 sidedata_end = self._docket.sidedata_end
2695 with self._inner.writing(
2705 with self._inner.writing(
2696 transaction,
2706 transaction,
2697 data_end=data_end,
2707 data_end=data_end,
2698 sidedata_end=sidedata_end,
2708 sidedata_end=sidedata_end,
2699 ):
2709 ):
2700 yield
2710 yield
2701 if self._docket is not None:
2711 if self._docket is not None:
2702 self._write_docket(transaction)
2712 self._write_docket(transaction)
2703
2713
2704 def _write_docket(self, transaction):
2714 def _write_docket(self, transaction):
2705 """write the current docket on disk
2715 """write the current docket on disk
2706
2716
2707 Exist as a method to help changelog to implement transaction logic
2717 Exist as a method to help changelog to implement transaction logic
2708
2718
2709 We could also imagine using the same transaction logic for all revlog
2719 We could also imagine using the same transaction logic for all revlog
2710 since docket are cheap."""
2720 since docket are cheap."""
2711 self._docket.write(transaction)
2721 self._docket.write(transaction)
2712
2722
2713 def addrevision(
2723 def addrevision(
2714 self,
2724 self,
2715 text,
2725 text,
2716 transaction,
2726 transaction,
2717 link,
2727 link,
2718 p1,
2728 p1,
2719 p2,
2729 p2,
2720 cachedelta=None,
2730 cachedelta=None,
2721 node=None,
2731 node=None,
2722 flags=REVIDX_DEFAULT_FLAGS,
2732 flags=REVIDX_DEFAULT_FLAGS,
2723 deltacomputer=None,
2733 deltacomputer=None,
2724 sidedata=None,
2734 sidedata=None,
2725 ):
2735 ):
2726 """add a revision to the log
2736 """add a revision to the log
2727
2737
2728 text - the revision data to add
2738 text - the revision data to add
2729 transaction - the transaction object used for rollback
2739 transaction - the transaction object used for rollback
2730 link - the linkrev data to add
2740 link - the linkrev data to add
2731 p1, p2 - the parent nodeids of the revision
2741 p1, p2 - the parent nodeids of the revision
2732 cachedelta - an optional precomputed delta
2742 cachedelta - an optional precomputed delta
2733 node - nodeid of revision; typically node is not specified, and it is
2743 node - nodeid of revision; typically node is not specified, and it is
2734 computed by default as hash(text, p1, p2), however subclasses might
2744 computed by default as hash(text, p1, p2), however subclasses might
2735 use different hashing method (and override checkhash() in such case)
2745 use different hashing method (and override checkhash() in such case)
2736 flags - the known flags to set on the revision
2746 flags - the known flags to set on the revision
2737 deltacomputer - an optional deltacomputer instance shared between
2747 deltacomputer - an optional deltacomputer instance shared between
2738 multiple calls
2748 multiple calls
2739 """
2749 """
2740 if link == nullrev:
2750 if link == nullrev:
2741 raise error.RevlogError(
2751 raise error.RevlogError(
2742 _(b"attempted to add linkrev -1 to %s") % self.display_id
2752 _(b"attempted to add linkrev -1 to %s") % self.display_id
2743 )
2753 )
2744
2754
2745 if sidedata is None:
2755 if sidedata is None:
2746 sidedata = {}
2756 sidedata = {}
2747 elif sidedata and not self.feature_config.has_side_data:
2757 elif sidedata and not self.feature_config.has_side_data:
2748 raise error.ProgrammingError(
2758 raise error.ProgrammingError(
2749 _(b"trying to add sidedata to a revlog who don't support them")
2759 _(b"trying to add sidedata to a revlog who don't support them")
2750 )
2760 )
2751
2761
2752 if flags:
2762 if flags:
2753 node = node or self.hash(text, p1, p2)
2763 node = node or self.hash(text, p1, p2)
2754
2764
2755 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2765 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2756
2766
2757 # If the flag processor modifies the revision data, ignore any provided
2767 # If the flag processor modifies the revision data, ignore any provided
2758 # cachedelta.
2768 # cachedelta.
2759 if rawtext != text:
2769 if rawtext != text:
2760 cachedelta = None
2770 cachedelta = None
2761
2771
2762 if len(rawtext) > _maxentrysize:
2772 if len(rawtext) > _maxentrysize:
2763 raise error.RevlogError(
2773 raise error.RevlogError(
2764 _(
2774 _(
2765 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2775 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2766 )
2776 )
2767 % (self.display_id, len(rawtext))
2777 % (self.display_id, len(rawtext))
2768 )
2778 )
2769
2779
2770 node = node or self.hash(rawtext, p1, p2)
2780 node = node or self.hash(rawtext, p1, p2)
2771 rev = self.index.get_rev(node)
2781 rev = self.index.get_rev(node)
2772 if rev is not None:
2782 if rev is not None:
2773 return rev
2783 return rev
2774
2784
2775 if validatehash:
2785 if validatehash:
2776 self.checkhash(rawtext, node, p1=p1, p2=p2)
2786 self.checkhash(rawtext, node, p1=p1, p2=p2)
2777
2787
2778 return self.addrawrevision(
2788 return self.addrawrevision(
2779 rawtext,
2789 rawtext,
2780 transaction,
2790 transaction,
2781 link,
2791 link,
2782 p1,
2792 p1,
2783 p2,
2793 p2,
2784 node,
2794 node,
2785 flags,
2795 flags,
2786 cachedelta=cachedelta,
2796 cachedelta=cachedelta,
2787 deltacomputer=deltacomputer,
2797 deltacomputer=deltacomputer,
2788 sidedata=sidedata,
2798 sidedata=sidedata,
2789 )
2799 )
2790
2800
2791 def addrawrevision(
2801 def addrawrevision(
2792 self,
2802 self,
2793 rawtext,
2803 rawtext,
2794 transaction,
2804 transaction,
2795 link,
2805 link,
2796 p1,
2806 p1,
2797 p2,
2807 p2,
2798 node,
2808 node,
2799 flags,
2809 flags,
2800 cachedelta=None,
2810 cachedelta=None,
2801 deltacomputer=None,
2811 deltacomputer=None,
2802 sidedata=None,
2812 sidedata=None,
2803 ):
2813 ):
2804 """add a raw revision with known flags, node and parents
2814 """add a raw revision with known flags, node and parents
2805 useful when reusing a revision not stored in this revlog (ex: received
2815 useful when reusing a revision not stored in this revlog (ex: received
2806 over wire, or read from an external bundle).
2816 over wire, or read from an external bundle).
2807 """
2817 """
2808 with self._writing(transaction):
2818 with self._writing(transaction):
2809 return self._addrevision(
2819 return self._addrevision(
2810 node,
2820 node,
2811 rawtext,
2821 rawtext,
2812 transaction,
2822 transaction,
2813 link,
2823 link,
2814 p1,
2824 p1,
2815 p2,
2825 p2,
2816 flags,
2826 flags,
2817 cachedelta,
2827 cachedelta,
2818 deltacomputer=deltacomputer,
2828 deltacomputer=deltacomputer,
2819 sidedata=sidedata,
2829 sidedata=sidedata,
2820 )
2830 )
2821
2831
2822 def compress(self, data):
2832 def compress(self, data):
2823 """Generate a possibly-compressed representation of data."""
2833 """Generate a possibly-compressed representation of data."""
2824 if not data:
2834 if not data:
2825 return b'', data
2835 return b'', data
2826
2836
2827 compressed = self._compressor.compress(data)
2837 compressed = self._compressor.compress(data)
2828
2838
2829 if compressed:
2839 if compressed:
2830 # The revlog compressor added the header in the returned data.
2840 # The revlog compressor added the header in the returned data.
2831 return b'', compressed
2841 return b'', compressed
2832
2842
2833 if data[0:1] == b'\0':
2843 if data[0:1] == b'\0':
2834 return b'', data
2844 return b'', data
2835 return b'u', data
2845 return b'u', data
2836
2846
2837 def decompress(self, data):
2847 def decompress(self, data):
2838 """Decompress a revlog chunk.
2848 """Decompress a revlog chunk.
2839
2849
2840 The chunk is expected to begin with a header identifying the
2850 The chunk is expected to begin with a header identifying the
2841 format type so it can be routed to an appropriate decompressor.
2851 format type so it can be routed to an appropriate decompressor.
2842 """
2852 """
2843 if not data:
2853 if not data:
2844 return data
2854 return data
2845
2855
2846 # Revlogs are read much more frequently than they are written and many
2856 # Revlogs are read much more frequently than they are written and many
2847 # chunks only take microseconds to decompress, so performance is
2857 # chunks only take microseconds to decompress, so performance is
2848 # important here.
2858 # important here.
2849 #
2859 #
2850 # We can make a few assumptions about revlogs:
2860 # We can make a few assumptions about revlogs:
2851 #
2861 #
2852 # 1) the majority of chunks will be compressed (as opposed to inline
2862 # 1) the majority of chunks will be compressed (as opposed to inline
2853 # raw data).
2863 # raw data).
2854 # 2) decompressing *any* data will likely by at least 10x slower than
2864 # 2) decompressing *any* data will likely by at least 10x slower than
2855 # returning raw inline data.
2865 # returning raw inline data.
2856 # 3) we want to prioritize common and officially supported compression
2866 # 3) we want to prioritize common and officially supported compression
2857 # engines
2867 # engines
2858 #
2868 #
2859 # It follows that we want to optimize for "decompress compressed data
2869 # It follows that we want to optimize for "decompress compressed data
2860 # when encoded with common and officially supported compression engines"
2870 # when encoded with common and officially supported compression engines"
2861 # case over "raw data" and "data encoded by less common or non-official
2871 # case over "raw data" and "data encoded by less common or non-official
2862 # compression engines." That is why we have the inline lookup first
2872 # compression engines." That is why we have the inline lookup first
2863 # followed by the compengines lookup.
2873 # followed by the compengines lookup.
2864 #
2874 #
2865 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2875 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2866 # compressed chunks. And this matters for changelog and manifest reads.
2876 # compressed chunks. And this matters for changelog and manifest reads.
2867 t = data[0:1]
2877 t = data[0:1]
2868
2878
2869 if t == b'x':
2879 if t == b'x':
2870 try:
2880 try:
2871 return _zlibdecompress(data)
2881 return _zlibdecompress(data)
2872 except zlib.error as e:
2882 except zlib.error as e:
2873 raise error.RevlogError(
2883 raise error.RevlogError(
2874 _(b'revlog decompress error: %s')
2884 _(b'revlog decompress error: %s')
2875 % stringutil.forcebytestr(e)
2885 % stringutil.forcebytestr(e)
2876 )
2886 )
2877 # '\0' is more common than 'u' so it goes first.
2887 # '\0' is more common than 'u' so it goes first.
2878 elif t == b'\0':
2888 elif t == b'\0':
2879 return data
2889 return data
2880 elif t == b'u':
2890 elif t == b'u':
2881 return util.buffer(data, 1)
2891 return util.buffer(data, 1)
2882
2892
2883 compressor = self._get_decompressor(t)
2893 compressor = self._get_decompressor(t)
2884
2894
2885 return compressor.decompress(data)
2895 return compressor.decompress(data)
2886
2896
2887 def _addrevision(
2897 def _addrevision(
2888 self,
2898 self,
2889 node,
2899 node,
2890 rawtext,
2900 rawtext,
2891 transaction,
2901 transaction,
2892 link,
2902 link,
2893 p1,
2903 p1,
2894 p2,
2904 p2,
2895 flags,
2905 flags,
2896 cachedelta,
2906 cachedelta,
2897 alwayscache=False,
2907 alwayscache=False,
2898 deltacomputer=None,
2908 deltacomputer=None,
2899 sidedata=None,
2909 sidedata=None,
2900 ):
2910 ):
2901 """internal function to add revisions to the log
2911 """internal function to add revisions to the log
2902
2912
2903 see addrevision for argument descriptions.
2913 see addrevision for argument descriptions.
2904
2914
2905 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2915 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2906
2916
2907 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2917 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2908 be used.
2918 be used.
2909
2919
2910 invariants:
2920 invariants:
2911 - rawtext is optional (can be None); if not set, cachedelta must be set.
2921 - rawtext is optional (can be None); if not set, cachedelta must be set.
2912 if both are set, they must correspond to each other.
2922 if both are set, they must correspond to each other.
2913 """
2923 """
2914 if node == self.nullid:
2924 if node == self.nullid:
2915 raise error.RevlogError(
2925 raise error.RevlogError(
2916 _(b"%s: attempt to add null revision") % self.display_id
2926 _(b"%s: attempt to add null revision") % self.display_id
2917 )
2927 )
2918 if (
2928 if (
2919 node == self.nodeconstants.wdirid
2929 node == self.nodeconstants.wdirid
2920 or node in self.nodeconstants.wdirfilenodeids
2930 or node in self.nodeconstants.wdirfilenodeids
2921 ):
2931 ):
2922 raise error.RevlogError(
2932 raise error.RevlogError(
2923 _(b"%s: attempt to add wdir revision") % self.display_id
2933 _(b"%s: attempt to add wdir revision") % self.display_id
2924 )
2934 )
2925 if self._inner._writinghandles is None:
2935 if self._inner._writinghandles is None:
2926 msg = b'adding revision outside `revlog._writing` context'
2936 msg = b'adding revision outside `revlog._writing` context'
2927 raise error.ProgrammingError(msg)
2937 raise error.ProgrammingError(msg)
2928
2938
2929 btext = [rawtext]
2939 btext = [rawtext]
2930
2940
2931 curr = len(self)
2941 curr = len(self)
2932 prev = curr - 1
2942 prev = curr - 1
2933
2943
2934 offset = self._get_data_offset(prev)
2944 offset = self._get_data_offset(prev)
2935
2945
2936 if self._concurrencychecker:
2946 if self._concurrencychecker:
2937 ifh, dfh, sdfh = self._inner._writinghandles
2947 ifh, dfh, sdfh = self._inner._writinghandles
2938 # XXX no checking for the sidedata file
2948 # XXX no checking for the sidedata file
2939 if self._inline:
2949 if self._inline:
2940 # offset is "as if" it were in the .d file, so we need to add on
2950 # offset is "as if" it were in the .d file, so we need to add on
2941 # the size of the entry metadata.
2951 # the size of the entry metadata.
2942 self._concurrencychecker(
2952 self._concurrencychecker(
2943 ifh, self._indexfile, offset + curr * self.index.entry_size
2953 ifh, self._indexfile, offset + curr * self.index.entry_size
2944 )
2954 )
2945 else:
2955 else:
2946 # Entries in the .i are a consistent size.
2956 # Entries in the .i are a consistent size.
2947 self._concurrencychecker(
2957 self._concurrencychecker(
2948 ifh, self._indexfile, curr * self.index.entry_size
2958 ifh, self._indexfile, curr * self.index.entry_size
2949 )
2959 )
2950 self._concurrencychecker(dfh, self._datafile, offset)
2960 self._concurrencychecker(dfh, self._datafile, offset)
2951
2961
2952 p1r, p2r = self.rev(p1), self.rev(p2)
2962 p1r, p2r = self.rev(p1), self.rev(p2)
2953
2963
2954 # full versions are inserted when the needed deltas
2964 # full versions are inserted when the needed deltas
2955 # become comparable to the uncompressed text
2965 # become comparable to the uncompressed text
2956 if rawtext is None:
2966 if rawtext is None:
2957 # need rawtext size, before changed by flag processors, which is
2967 # need rawtext size, before changed by flag processors, which is
2958 # the non-raw size. use revlog explicitly to avoid filelog's extra
2968 # the non-raw size. use revlog explicitly to avoid filelog's extra
2959 # logic that might remove metadata size.
2969 # logic that might remove metadata size.
2960 textlen = mdiff.patchedsize(
2970 textlen = mdiff.patchedsize(
2961 revlog.size(self, cachedelta[0]), cachedelta[1]
2971 revlog.size(self, cachedelta[0]), cachedelta[1]
2962 )
2972 )
2963 else:
2973 else:
2964 textlen = len(rawtext)
2974 textlen = len(rawtext)
2965
2975
2966 if deltacomputer is None:
2976 if deltacomputer is None:
2967 write_debug = None
2977 write_debug = None
2968 if self.delta_config.debug_delta:
2978 if self.delta_config.debug_delta:
2969 write_debug = transaction._report
2979 write_debug = transaction._report
2970 deltacomputer = deltautil.deltacomputer(
2980 deltacomputer = deltautil.deltacomputer(
2971 self, write_debug=write_debug
2981 self, write_debug=write_debug
2972 )
2982 )
2973
2983
2974 if cachedelta is not None and len(cachedelta) == 2:
2984 if cachedelta is not None and len(cachedelta) == 2:
2975 # If the cached delta has no information about how it should be
2985 # If the cached delta has no information about how it should be
2976 # reused, add the default reuse instruction according to the
2986 # reused, add the default reuse instruction according to the
2977 # revlog's configuration.
2987 # revlog's configuration.
2978 if (
2988 if (
2979 self.delta_config.general_delta
2989 self.delta_config.general_delta
2980 and self.delta_config.lazy_delta_base
2990 and self.delta_config.lazy_delta_base
2981 ):
2991 ):
2982 delta_base_reuse = DELTA_BASE_REUSE_TRY
2992 delta_base_reuse = DELTA_BASE_REUSE_TRY
2983 else:
2993 else:
2984 delta_base_reuse = DELTA_BASE_REUSE_NO
2994 delta_base_reuse = DELTA_BASE_REUSE_NO
2985 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2995 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2986
2996
2987 revinfo = revlogutils.revisioninfo(
2997 revinfo = revlogutils.revisioninfo(
2988 node,
2998 node,
2989 p1,
2999 p1,
2990 p2,
3000 p2,
2991 btext,
3001 btext,
2992 textlen,
3002 textlen,
2993 cachedelta,
3003 cachedelta,
2994 flags,
3004 flags,
2995 )
3005 )
2996
3006
2997 deltainfo = deltacomputer.finddeltainfo(revinfo)
3007 deltainfo = deltacomputer.finddeltainfo(revinfo)
2998
3008
2999 compression_mode = COMP_MODE_INLINE
3009 compression_mode = COMP_MODE_INLINE
3000 if self._docket is not None:
3010 if self._docket is not None:
3001 default_comp = self._docket.default_compression_header
3011 default_comp = self._docket.default_compression_header
3002 r = deltautil.delta_compression(default_comp, deltainfo)
3012 r = deltautil.delta_compression(default_comp, deltainfo)
3003 compression_mode, deltainfo = r
3013 compression_mode, deltainfo = r
3004
3014
3005 sidedata_compression_mode = COMP_MODE_INLINE
3015 sidedata_compression_mode = COMP_MODE_INLINE
3006 if sidedata and self.feature_config.has_side_data:
3016 if sidedata and self.feature_config.has_side_data:
3007 sidedata_compression_mode = COMP_MODE_PLAIN
3017 sidedata_compression_mode = COMP_MODE_PLAIN
3008 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3018 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3009 sidedata_offset = self._docket.sidedata_end
3019 sidedata_offset = self._docket.sidedata_end
3010 h, comp_sidedata = self.compress(serialized_sidedata)
3020 h, comp_sidedata = self.compress(serialized_sidedata)
3011 if (
3021 if (
3012 h != b'u'
3022 h != b'u'
3013 and comp_sidedata[0:1] != b'\0'
3023 and comp_sidedata[0:1] != b'\0'
3014 and len(comp_sidedata) < len(serialized_sidedata)
3024 and len(comp_sidedata) < len(serialized_sidedata)
3015 ):
3025 ):
3016 assert not h
3026 assert not h
3017 if (
3027 if (
3018 comp_sidedata[0:1]
3028 comp_sidedata[0:1]
3019 == self._docket.default_compression_header
3029 == self._docket.default_compression_header
3020 ):
3030 ):
3021 sidedata_compression_mode = COMP_MODE_DEFAULT
3031 sidedata_compression_mode = COMP_MODE_DEFAULT
3022 serialized_sidedata = comp_sidedata
3032 serialized_sidedata = comp_sidedata
3023 else:
3033 else:
3024 sidedata_compression_mode = COMP_MODE_INLINE
3034 sidedata_compression_mode = COMP_MODE_INLINE
3025 serialized_sidedata = comp_sidedata
3035 serialized_sidedata = comp_sidedata
3026 else:
3036 else:
3027 serialized_sidedata = b""
3037 serialized_sidedata = b""
3028 # Don't store the offset if the sidedata is empty, that way
3038 # Don't store the offset if the sidedata is empty, that way
3029 # we can easily detect empty sidedata and they will be no different
3039 # we can easily detect empty sidedata and they will be no different
3030 # than ones we manually add.
3040 # than ones we manually add.
3031 sidedata_offset = 0
3041 sidedata_offset = 0
3032
3042
3033 rank = RANK_UNKNOWN
3043 rank = RANK_UNKNOWN
3034 if self.feature_config.compute_rank:
3044 if self.feature_config.compute_rank:
3035 if (p1r, p2r) == (nullrev, nullrev):
3045 if (p1r, p2r) == (nullrev, nullrev):
3036 rank = 1
3046 rank = 1
3037 elif p1r != nullrev and p2r == nullrev:
3047 elif p1r != nullrev and p2r == nullrev:
3038 rank = 1 + self.fast_rank(p1r)
3048 rank = 1 + self.fast_rank(p1r)
3039 elif p1r == nullrev and p2r != nullrev:
3049 elif p1r == nullrev and p2r != nullrev:
3040 rank = 1 + self.fast_rank(p2r)
3050 rank = 1 + self.fast_rank(p2r)
3041 else: # merge node
3051 else: # merge node
3042 if rustdagop is not None and self.index.rust_ext_compat:
3052 if rustdagop is not None and self.index.rust_ext_compat:
3043 rank = rustdagop.rank(self.index, p1r, p2r)
3053 rank = rustdagop.rank(self.index, p1r, p2r)
3044 else:
3054 else:
3045 pmin, pmax = sorted((p1r, p2r))
3055 pmin, pmax = sorted((p1r, p2r))
3046 rank = 1 + self.fast_rank(pmax)
3056 rank = 1 + self.fast_rank(pmax)
3047 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3057 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3048
3058
3049 e = revlogutils.entry(
3059 e = revlogutils.entry(
3050 flags=flags,
3060 flags=flags,
3051 data_offset=offset,
3061 data_offset=offset,
3052 data_compressed_length=deltainfo.deltalen,
3062 data_compressed_length=deltainfo.deltalen,
3053 data_uncompressed_length=textlen,
3063 data_uncompressed_length=textlen,
3054 data_compression_mode=compression_mode,
3064 data_compression_mode=compression_mode,
3055 data_delta_base=deltainfo.base,
3065 data_delta_base=deltainfo.base,
3056 link_rev=link,
3066 link_rev=link,
3057 parent_rev_1=p1r,
3067 parent_rev_1=p1r,
3058 parent_rev_2=p2r,
3068 parent_rev_2=p2r,
3059 node_id=node,
3069 node_id=node,
3060 sidedata_offset=sidedata_offset,
3070 sidedata_offset=sidedata_offset,
3061 sidedata_compressed_length=len(serialized_sidedata),
3071 sidedata_compressed_length=len(serialized_sidedata),
3062 sidedata_compression_mode=sidedata_compression_mode,
3072 sidedata_compression_mode=sidedata_compression_mode,
3063 rank=rank,
3073 rank=rank,
3064 )
3074 )
3065
3075
3066 self.index.append(e)
3076 self.index.append(e)
3067 entry = self.index.entry_binary(curr)
3077 entry = self.index.entry_binary(curr)
3068 if curr == 0 and self._docket is None:
3078 if curr == 0 and self._docket is None:
3069 header = self._format_flags | self._format_version
3079 header = self._format_flags | self._format_version
3070 header = self.index.pack_header(header)
3080 header = self.index.pack_header(header)
3071 entry = header + entry
3081 entry = header + entry
3072 self._writeentry(
3082 self._writeentry(
3073 transaction,
3083 transaction,
3074 entry,
3084 entry,
3075 deltainfo.data,
3085 deltainfo.data,
3076 link,
3086 link,
3077 offset,
3087 offset,
3078 serialized_sidedata,
3088 serialized_sidedata,
3079 sidedata_offset,
3089 sidedata_offset,
3080 )
3090 )
3081
3091
3082 rawtext = btext[0]
3092 rawtext = btext[0]
3083
3093
3084 if alwayscache and rawtext is None:
3094 if alwayscache and rawtext is None:
3085 rawtext = deltacomputer.buildtext(revinfo)
3095 rawtext = deltacomputer.buildtext(revinfo)
3086
3096
3087 if type(rawtext) == bytes: # only accept immutable objects
3097 if type(rawtext) == bytes: # only accept immutable objects
3088 self._revisioncache = (node, curr, rawtext)
3098 self._revisioncache = (node, curr, rawtext)
3089 self._chainbasecache[curr] = deltainfo.chainbase
3099 self._chainbasecache[curr] = deltainfo.chainbase
3090 return curr
3100 return curr
3091
3101
3092 def _get_data_offset(self, prev):
3102 def _get_data_offset(self, prev):
3093 """Returns the current offset in the (in-transaction) data file.
3103 """Returns the current offset in the (in-transaction) data file.
3094 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3104 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3095 file to store that information: since sidedata can be rewritten to the
3105 file to store that information: since sidedata can be rewritten to the
3096 end of the data file within a transaction, you can have cases where, for
3106 end of the data file within a transaction, you can have cases where, for
3097 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3107 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3098 to `n - 1`'s sidedata being written after `n`'s data.
3108 to `n - 1`'s sidedata being written after `n`'s data.
3099
3109
3100 TODO cache this in a docket file before getting out of experimental."""
3110 TODO cache this in a docket file before getting out of experimental."""
3101 if self._docket is None:
3111 if self._docket is None:
3102 return self.end(prev)
3112 return self.end(prev)
3103 else:
3113 else:
3104 return self._docket.data_end
3114 return self._docket.data_end
3105
3115
3106 def _writeentry(
3116 def _writeentry(
3107 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3117 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3108 ):
3118 ):
3109 # Files opened in a+ mode have inconsistent behavior on various
3119 # Files opened in a+ mode have inconsistent behavior on various
3110 # platforms. Windows requires that a file positioning call be made
3120 # platforms. Windows requires that a file positioning call be made
3111 # when the file handle transitions between reads and writes. See
3121 # when the file handle transitions between reads and writes. See
3112 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3122 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3113 # platforms, Python or the platform itself can be buggy. Some versions
3123 # platforms, Python or the platform itself can be buggy. Some versions
3114 # of Solaris have been observed to not append at the end of the file
3124 # of Solaris have been observed to not append at the end of the file
3115 # if the file was seeked to before the end. See issue4943 for more.
3125 # if the file was seeked to before the end. See issue4943 for more.
3116 #
3126 #
3117 # We work around this issue by inserting a seek() before writing.
3127 # We work around this issue by inserting a seek() before writing.
3118 # Note: This is likely not necessary on Python 3. However, because
3128 # Note: This is likely not necessary on Python 3. However, because
3119 # the file handle is reused for reads and may be seeked there, we need
3129 # the file handle is reused for reads and may be seeked there, we need
3120 # to be careful before changing this.
3130 # to be careful before changing this.
3121 if self._inner._writinghandles is None:
3131 if self._inner._writinghandles is None:
3122 msg = b'adding revision outside `revlog._writing` context'
3132 msg = b'adding revision outside `revlog._writing` context'
3123 raise error.ProgrammingError(msg)
3133 raise error.ProgrammingError(msg)
3124 ifh, dfh, sdfh = self._inner._writinghandles
3134 ifh, dfh, sdfh = self._inner._writinghandles
3125 if self._docket is None:
3135 if self._docket is None:
3126 ifh.seek(0, os.SEEK_END)
3136 ifh.seek(0, os.SEEK_END)
3127 else:
3137 else:
3128 ifh.seek(self._docket.index_end, os.SEEK_SET)
3138 ifh.seek(self._docket.index_end, os.SEEK_SET)
3129 if dfh:
3139 if dfh:
3130 if self._docket is None:
3140 if self._docket is None:
3131 dfh.seek(0, os.SEEK_END)
3141 dfh.seek(0, os.SEEK_END)
3132 else:
3142 else:
3133 dfh.seek(self._docket.data_end, os.SEEK_SET)
3143 dfh.seek(self._docket.data_end, os.SEEK_SET)
3134 if sdfh:
3144 if sdfh:
3135 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3145 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3136
3146
3137 curr = len(self) - 1
3147 curr = len(self) - 1
3138 if not self._inline:
3148 if not self._inline:
3139 transaction.add(self._datafile, offset)
3149 transaction.add(self._datafile, offset)
3140 if self._sidedatafile:
3150 if self._sidedatafile:
3141 transaction.add(self._sidedatafile, sidedata_offset)
3151 transaction.add(self._sidedatafile, sidedata_offset)
3142 transaction.add(self._indexfile, curr * len(entry))
3152 transaction.add(self._indexfile, curr * len(entry))
3143 if data[0]:
3153 if data[0]:
3144 dfh.write(data[0])
3154 dfh.write(data[0])
3145 dfh.write(data[1])
3155 dfh.write(data[1])
3146 if sidedata:
3156 if sidedata:
3147 sdfh.write(sidedata)
3157 sdfh.write(sidedata)
3148 ifh.write(entry)
3158 ifh.write(entry)
3149 else:
3159 else:
3150 offset += curr * self.index.entry_size
3160 offset += curr * self.index.entry_size
3151 transaction.add(self._indexfile, offset)
3161 transaction.add(self._indexfile, offset)
3152 ifh.write(entry)
3162 ifh.write(entry)
3153 ifh.write(data[0])
3163 ifh.write(data[0])
3154 ifh.write(data[1])
3164 ifh.write(data[1])
3155 assert not sidedata
3165 assert not sidedata
3156 self._enforceinlinesize(transaction)
3166 self._enforceinlinesize(transaction)
3157 if self._docket is not None:
3167 if self._docket is not None:
3158 # revlog-v2 always has 3 writing handles, help Pytype
3168 # revlog-v2 always has 3 writing handles, help Pytype
3159 wh1 = self._inner._writinghandles[0]
3169 wh1 = self._inner._writinghandles[0]
3160 wh2 = self._inner._writinghandles[1]
3170 wh2 = self._inner._writinghandles[1]
3161 wh3 = self._inner._writinghandles[2]
3171 wh3 = self._inner._writinghandles[2]
3162 assert wh1 is not None
3172 assert wh1 is not None
3163 assert wh2 is not None
3173 assert wh2 is not None
3164 assert wh3 is not None
3174 assert wh3 is not None
3165 self._docket.index_end = wh1.tell()
3175 self._docket.index_end = wh1.tell()
3166 self._docket.data_end = wh2.tell()
3176 self._docket.data_end = wh2.tell()
3167 self._docket.sidedata_end = wh3.tell()
3177 self._docket.sidedata_end = wh3.tell()
3168
3178
3169 nodemaputil.setup_persistent_nodemap(transaction, self)
3179 nodemaputil.setup_persistent_nodemap(transaction, self)
3170
3180
3171 def addgroup(
3181 def addgroup(
3172 self,
3182 self,
3173 deltas,
3183 deltas,
3174 linkmapper,
3184 linkmapper,
3175 transaction,
3185 transaction,
3176 alwayscache=False,
3186 alwayscache=False,
3177 addrevisioncb=None,
3187 addrevisioncb=None,
3178 duplicaterevisioncb=None,
3188 duplicaterevisioncb=None,
3179 debug_info=None,
3189 debug_info=None,
3180 delta_base_reuse_policy=None,
3190 delta_base_reuse_policy=None,
3181 ):
3191 ):
3182 """
3192 """
3183 add a delta group
3193 add a delta group
3184
3194
3185 given a set of deltas, add them to the revision log. the
3195 given a set of deltas, add them to the revision log. the
3186 first delta is against its parent, which should be in our
3196 first delta is against its parent, which should be in our
3187 log, the rest are against the previous delta.
3197 log, the rest are against the previous delta.
3188
3198
3189 If ``addrevisioncb`` is defined, it will be called with arguments of
3199 If ``addrevisioncb`` is defined, it will be called with arguments of
3190 this revlog and the node that was added.
3200 this revlog and the node that was added.
3191 """
3201 """
3192
3202
3193 if self._adding_group:
3203 if self._adding_group:
3194 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3204 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3195
3205
3196 # read the default delta-base reuse policy from revlog config if the
3206 # read the default delta-base reuse policy from revlog config if the
3197 # group did not specify one.
3207 # group did not specify one.
3198 if delta_base_reuse_policy is None:
3208 if delta_base_reuse_policy is None:
3199 if (
3209 if (
3200 self.delta_config.general_delta
3210 self.delta_config.general_delta
3201 and self.delta_config.lazy_delta_base
3211 and self.delta_config.lazy_delta_base
3202 ):
3212 ):
3203 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3213 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3204 else:
3214 else:
3205 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3215 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3206
3216
3207 self._adding_group = True
3217 self._adding_group = True
3208 empty = True
3218 empty = True
3209 try:
3219 try:
3210 with self._writing(transaction):
3220 with self._writing(transaction):
3211 write_debug = None
3221 write_debug = None
3212 if self.delta_config.debug_delta:
3222 if self.delta_config.debug_delta:
3213 write_debug = transaction._report
3223 write_debug = transaction._report
3214 deltacomputer = deltautil.deltacomputer(
3224 deltacomputer = deltautil.deltacomputer(
3215 self,
3225 self,
3216 write_debug=write_debug,
3226 write_debug=write_debug,
3217 debug_info=debug_info,
3227 debug_info=debug_info,
3218 )
3228 )
3219 # loop through our set of deltas
3229 # loop through our set of deltas
3220 for data in deltas:
3230 for data in deltas:
3221 (
3231 (
3222 node,
3232 node,
3223 p1,
3233 p1,
3224 p2,
3234 p2,
3225 linknode,
3235 linknode,
3226 deltabase,
3236 deltabase,
3227 delta,
3237 delta,
3228 flags,
3238 flags,
3229 sidedata,
3239 sidedata,
3230 ) = data
3240 ) = data
3231 link = linkmapper(linknode)
3241 link = linkmapper(linknode)
3232 flags = flags or REVIDX_DEFAULT_FLAGS
3242 flags = flags or REVIDX_DEFAULT_FLAGS
3233
3243
3234 rev = self.index.get_rev(node)
3244 rev = self.index.get_rev(node)
3235 if rev is not None:
3245 if rev is not None:
3236 # this can happen if two branches make the same change
3246 # this can happen if two branches make the same change
3237 self._nodeduplicatecallback(transaction, rev)
3247 self._nodeduplicatecallback(transaction, rev)
3238 if duplicaterevisioncb:
3248 if duplicaterevisioncb:
3239 duplicaterevisioncb(self, rev)
3249 duplicaterevisioncb(self, rev)
3240 empty = False
3250 empty = False
3241 continue
3251 continue
3242
3252
3243 for p in (p1, p2):
3253 for p in (p1, p2):
3244 if not self.index.has_node(p):
3254 if not self.index.has_node(p):
3245 raise error.LookupError(
3255 raise error.LookupError(
3246 p, self.radix, _(b'unknown parent')
3256 p, self.radix, _(b'unknown parent')
3247 )
3257 )
3248
3258
3249 if not self.index.has_node(deltabase):
3259 if not self.index.has_node(deltabase):
3250 raise error.LookupError(
3260 raise error.LookupError(
3251 deltabase, self.display_id, _(b'unknown delta base')
3261 deltabase, self.display_id, _(b'unknown delta base')
3252 )
3262 )
3253
3263
3254 baserev = self.rev(deltabase)
3264 baserev = self.rev(deltabase)
3255
3265
3256 if baserev != nullrev and self.iscensored(baserev):
3266 if baserev != nullrev and self.iscensored(baserev):
3257 # if base is censored, delta must be full replacement in a
3267 # if base is censored, delta must be full replacement in a
3258 # single patch operation
3268 # single patch operation
3259 hlen = struct.calcsize(b">lll")
3269 hlen = struct.calcsize(b">lll")
3260 oldlen = self.rawsize(baserev)
3270 oldlen = self.rawsize(baserev)
3261 newlen = len(delta) - hlen
3271 newlen = len(delta) - hlen
3262 if delta[:hlen] != mdiff.replacediffheader(
3272 if delta[:hlen] != mdiff.replacediffheader(
3263 oldlen, newlen
3273 oldlen, newlen
3264 ):
3274 ):
3265 raise error.CensoredBaseError(
3275 raise error.CensoredBaseError(
3266 self.display_id, self.node(baserev)
3276 self.display_id, self.node(baserev)
3267 )
3277 )
3268
3278
3269 if not flags and self._peek_iscensored(baserev, delta):
3279 if not flags and self._peek_iscensored(baserev, delta):
3270 flags |= REVIDX_ISCENSORED
3280 flags |= REVIDX_ISCENSORED
3271
3281
3272 # We assume consumers of addrevisioncb will want to retrieve
3282 # We assume consumers of addrevisioncb will want to retrieve
3273 # the added revision, which will require a call to
3283 # the added revision, which will require a call to
3274 # revision(). revision() will fast path if there is a cache
3284 # revision(). revision() will fast path if there is a cache
3275 # hit. So, we tell _addrevision() to always cache in this case.
3285 # hit. So, we tell _addrevision() to always cache in this case.
3276 # We're only using addgroup() in the context of changegroup
3286 # We're only using addgroup() in the context of changegroup
3277 # generation so the revision data can always be handled as raw
3287 # generation so the revision data can always be handled as raw
3278 # by the flagprocessor.
3288 # by the flagprocessor.
3279 rev = self._addrevision(
3289 rev = self._addrevision(
3280 node,
3290 node,
3281 None,
3291 None,
3282 transaction,
3292 transaction,
3283 link,
3293 link,
3284 p1,
3294 p1,
3285 p2,
3295 p2,
3286 flags,
3296 flags,
3287 (baserev, delta, delta_base_reuse_policy),
3297 (baserev, delta, delta_base_reuse_policy),
3288 alwayscache=alwayscache,
3298 alwayscache=alwayscache,
3289 deltacomputer=deltacomputer,
3299 deltacomputer=deltacomputer,
3290 sidedata=sidedata,
3300 sidedata=sidedata,
3291 )
3301 )
3292
3302
3293 if addrevisioncb:
3303 if addrevisioncb:
3294 addrevisioncb(self, rev)
3304 addrevisioncb(self, rev)
3295 empty = False
3305 empty = False
3296 finally:
3306 finally:
3297 self._adding_group = False
3307 self._adding_group = False
3298 return not empty
3308 return not empty
3299
3309
3300 def iscensored(self, rev):
3310 def iscensored(self, rev):
3301 """Check if a file revision is censored."""
3311 """Check if a file revision is censored."""
3302 if not self.feature_config.censorable:
3312 if not self.feature_config.censorable:
3303 return False
3313 return False
3304
3314
3305 return self.flags(rev) & REVIDX_ISCENSORED
3315 return self.flags(rev) & REVIDX_ISCENSORED
3306
3316
3307 def _peek_iscensored(self, baserev, delta):
3317 def _peek_iscensored(self, baserev, delta):
3308 """Quickly check if a delta produces a censored revision."""
3318 """Quickly check if a delta produces a censored revision."""
3309 if not self.feature_config.censorable:
3319 if not self.feature_config.censorable:
3310 return False
3320 return False
3311
3321
3312 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3322 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3313
3323
3314 def getstrippoint(self, minlink):
3324 def getstrippoint(self, minlink):
3315 """find the minimum rev that must be stripped to strip the linkrev
3325 """find the minimum rev that must be stripped to strip the linkrev
3316
3326
3317 Returns a tuple containing the minimum rev and a set of all revs that
3327 Returns a tuple containing the minimum rev and a set of all revs that
3318 have linkrevs that will be broken by this strip.
3328 have linkrevs that will be broken by this strip.
3319 """
3329 """
3320 return storageutil.resolvestripinfo(
3330 return storageutil.resolvestripinfo(
3321 minlink,
3331 minlink,
3322 len(self) - 1,
3332 len(self) - 1,
3323 self.headrevs(),
3333 self.headrevs(),
3324 self.linkrev,
3334 self.linkrev,
3325 self.parentrevs,
3335 self.parentrevs,
3326 )
3336 )
3327
3337
3328 def strip(self, minlink, transaction):
3338 def strip(self, minlink, transaction):
3329 """truncate the revlog on the first revision with a linkrev >= minlink
3339 """truncate the revlog on the first revision with a linkrev >= minlink
3330
3340
3331 This function is called when we're stripping revision minlink and
3341 This function is called when we're stripping revision minlink and
3332 its descendants from the repository.
3342 its descendants from the repository.
3333
3343
3334 We have to remove all revisions with linkrev >= minlink, because
3344 We have to remove all revisions with linkrev >= minlink, because
3335 the equivalent changelog revisions will be renumbered after the
3345 the equivalent changelog revisions will be renumbered after the
3336 strip.
3346 strip.
3337
3347
3338 So we truncate the revlog on the first of these revisions, and
3348 So we truncate the revlog on the first of these revisions, and
3339 trust that the caller has saved the revisions that shouldn't be
3349 trust that the caller has saved the revisions that shouldn't be
3340 removed and that it'll re-add them after this truncation.
3350 removed and that it'll re-add them after this truncation.
3341 """
3351 """
3342 if len(self) == 0:
3352 if len(self) == 0:
3343 return
3353 return
3344
3354
3345 rev, _ = self.getstrippoint(minlink)
3355 rev, _ = self.getstrippoint(minlink)
3346 if rev == len(self):
3356 if rev == len(self):
3347 return
3357 return
3348
3358
3349 # first truncate the files on disk
3359 # first truncate the files on disk
3350 data_end = self.start(rev)
3360 data_end = self.start(rev)
3351 if not self._inline:
3361 if not self._inline:
3352 transaction.add(self._datafile, data_end)
3362 transaction.add(self._datafile, data_end)
3353 end = rev * self.index.entry_size
3363 end = rev * self.index.entry_size
3354 else:
3364 else:
3355 end = data_end + (rev * self.index.entry_size)
3365 end = data_end + (rev * self.index.entry_size)
3356
3366
3357 if self._sidedatafile:
3367 if self._sidedatafile:
3358 sidedata_end = self.sidedata_cut_off(rev)
3368 sidedata_end = self.sidedata_cut_off(rev)
3359 transaction.add(self._sidedatafile, sidedata_end)
3369 transaction.add(self._sidedatafile, sidedata_end)
3360
3370
3361 transaction.add(self._indexfile, end)
3371 transaction.add(self._indexfile, end)
3362 if self._docket is not None:
3372 if self._docket is not None:
3363 # XXX we could, leverage the docket while stripping. However it is
3373 # XXX we could, leverage the docket while stripping. However it is
3364 # not powerfull enough at the time of this comment
3374 # not powerfull enough at the time of this comment
3365 self._docket.index_end = end
3375 self._docket.index_end = end
3366 self._docket.data_end = data_end
3376 self._docket.data_end = data_end
3367 self._docket.sidedata_end = sidedata_end
3377 self._docket.sidedata_end = sidedata_end
3368 self._docket.write(transaction, stripping=True)
3378 self._docket.write(transaction, stripping=True)
3369
3379
3370 # then reset internal state in memory to forget those revisions
3380 # then reset internal state in memory to forget those revisions
3371 self._revisioncache = None
3381 self._revisioncache = None
3372 self._chaininfocache = util.lrucachedict(500)
3382 self._chaininfocache = util.lrucachedict(500)
3373 self._inner._segmentfile.clear_cache()
3383 self._inner._segmentfile.clear_cache()
3374 self._inner._segmentfile_sidedata.clear_cache()
3384 self._inner._segmentfile_sidedata.clear_cache()
3375
3385
3376 del self.index[rev:-1]
3386 del self.index[rev:-1]
3377
3387
3378 def checksize(self):
3388 def checksize(self):
3379 """Check size of index and data files
3389 """Check size of index and data files
3380
3390
3381 return a (dd, di) tuple.
3391 return a (dd, di) tuple.
3382 - dd: extra bytes for the "data" file
3392 - dd: extra bytes for the "data" file
3383 - di: extra bytes for the "index" file
3393 - di: extra bytes for the "index" file
3384
3394
3385 A healthy revlog will return (0, 0).
3395 A healthy revlog will return (0, 0).
3386 """
3396 """
3387 expected = 0
3397 expected = 0
3388 if len(self):
3398 if len(self):
3389 expected = max(0, self.end(len(self) - 1))
3399 expected = max(0, self.end(len(self) - 1))
3390
3400
3391 try:
3401 try:
3392 with self._datafp() as f:
3402 with self._datafp() as f:
3393 f.seek(0, io.SEEK_END)
3403 f.seek(0, io.SEEK_END)
3394 actual = f.tell()
3404 actual = f.tell()
3395 dd = actual - expected
3405 dd = actual - expected
3396 except FileNotFoundError:
3406 except FileNotFoundError:
3397 dd = 0
3407 dd = 0
3398
3408
3399 try:
3409 try:
3400 f = self.opener(self._indexfile)
3410 f = self.opener(self._indexfile)
3401 f.seek(0, io.SEEK_END)
3411 f.seek(0, io.SEEK_END)
3402 actual = f.tell()
3412 actual = f.tell()
3403 f.close()
3413 f.close()
3404 s = self.index.entry_size
3414 s = self.index.entry_size
3405 i = max(0, actual // s)
3415 i = max(0, actual // s)
3406 di = actual - (i * s)
3416 di = actual - (i * s)
3407 if self._inline:
3417 if self._inline:
3408 databytes = 0
3418 databytes = 0
3409 for r in self:
3419 for r in self:
3410 databytes += max(0, self.length(r))
3420 databytes += max(0, self.length(r))
3411 dd = 0
3421 dd = 0
3412 di = actual - len(self) * s - databytes
3422 di = actual - len(self) * s - databytes
3413 except FileNotFoundError:
3423 except FileNotFoundError:
3414 di = 0
3424 di = 0
3415
3425
3416 return (dd, di)
3426 return (dd, di)
3417
3427
3418 def files(self):
3428 def files(self):
3419 """return list of files that compose this revlog"""
3429 """return list of files that compose this revlog"""
3420 res = [self._indexfile]
3430 res = [self._indexfile]
3421 if self._docket_file is None:
3431 if self._docket_file is None:
3422 if not self._inline:
3432 if not self._inline:
3423 res.append(self._datafile)
3433 res.append(self._datafile)
3424 else:
3434 else:
3425 res.append(self._docket_file)
3435 res.append(self._docket_file)
3426 res.extend(self._docket.old_index_filepaths(include_empty=False))
3436 res.extend(self._docket.old_index_filepaths(include_empty=False))
3427 if self._docket.data_end:
3437 if self._docket.data_end:
3428 res.append(self._datafile)
3438 res.append(self._datafile)
3429 res.extend(self._docket.old_data_filepaths(include_empty=False))
3439 res.extend(self._docket.old_data_filepaths(include_empty=False))
3430 if self._docket.sidedata_end:
3440 if self._docket.sidedata_end:
3431 res.append(self._sidedatafile)
3441 res.append(self._sidedatafile)
3432 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3442 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3433 return res
3443 return res
3434
3444
3435 def emitrevisions(
3445 def emitrevisions(
3436 self,
3446 self,
3437 nodes,
3447 nodes,
3438 nodesorder=None,
3448 nodesorder=None,
3439 revisiondata=False,
3449 revisiondata=False,
3440 assumehaveparentrevisions=False,
3450 assumehaveparentrevisions=False,
3441 deltamode=repository.CG_DELTAMODE_STD,
3451 deltamode=repository.CG_DELTAMODE_STD,
3442 sidedata_helpers=None,
3452 sidedata_helpers=None,
3443 debug_info=None,
3453 debug_info=None,
3444 ):
3454 ):
3445 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3455 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3446 raise error.ProgrammingError(
3456 raise error.ProgrammingError(
3447 b'unhandled value for nodesorder: %s' % nodesorder
3457 b'unhandled value for nodesorder: %s' % nodesorder
3448 )
3458 )
3449
3459
3450 if nodesorder is None and not self.delta_config.general_delta:
3460 if nodesorder is None and not self.delta_config.general_delta:
3451 nodesorder = b'storage'
3461 nodesorder = b'storage'
3452
3462
3453 if (
3463 if (
3454 not self._storedeltachains
3464 not self._storedeltachains
3455 and deltamode != repository.CG_DELTAMODE_PREV
3465 and deltamode != repository.CG_DELTAMODE_PREV
3456 ):
3466 ):
3457 deltamode = repository.CG_DELTAMODE_FULL
3467 deltamode = repository.CG_DELTAMODE_FULL
3458
3468
3459 return storageutil.emitrevisions(
3469 return storageutil.emitrevisions(
3460 self,
3470 self,
3461 nodes,
3471 nodes,
3462 nodesorder,
3472 nodesorder,
3463 revlogrevisiondelta,
3473 revlogrevisiondelta,
3464 deltaparentfn=self.deltaparent,
3474 deltaparentfn=self.deltaparent,
3465 candeltafn=self._candelta,
3475 candeltafn=self._candelta,
3466 rawsizefn=self.rawsize,
3476 rawsizefn=self.rawsize,
3467 revdifffn=self.revdiff,
3477 revdifffn=self.revdiff,
3468 flagsfn=self.flags,
3478 flagsfn=self.flags,
3469 deltamode=deltamode,
3479 deltamode=deltamode,
3470 revisiondata=revisiondata,
3480 revisiondata=revisiondata,
3471 assumehaveparentrevisions=assumehaveparentrevisions,
3481 assumehaveparentrevisions=assumehaveparentrevisions,
3472 sidedata_helpers=sidedata_helpers,
3482 sidedata_helpers=sidedata_helpers,
3473 debug_info=debug_info,
3483 debug_info=debug_info,
3474 )
3484 )
3475
3485
3476 DELTAREUSEALWAYS = b'always'
3486 DELTAREUSEALWAYS = b'always'
3477 DELTAREUSESAMEREVS = b'samerevs'
3487 DELTAREUSESAMEREVS = b'samerevs'
3478 DELTAREUSENEVER = b'never'
3488 DELTAREUSENEVER = b'never'
3479
3489
3480 DELTAREUSEFULLADD = b'fulladd'
3490 DELTAREUSEFULLADD = b'fulladd'
3481
3491
3482 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3492 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3483
3493
3484 def clone(
3494 def clone(
3485 self,
3495 self,
3486 tr,
3496 tr,
3487 destrevlog,
3497 destrevlog,
3488 addrevisioncb=None,
3498 addrevisioncb=None,
3489 deltareuse=DELTAREUSESAMEREVS,
3499 deltareuse=DELTAREUSESAMEREVS,
3490 forcedeltabothparents=None,
3500 forcedeltabothparents=None,
3491 sidedata_helpers=None,
3501 sidedata_helpers=None,
3492 ):
3502 ):
3493 """Copy this revlog to another, possibly with format changes.
3503 """Copy this revlog to another, possibly with format changes.
3494
3504
3495 The destination revlog will contain the same revisions and nodes.
3505 The destination revlog will contain the same revisions and nodes.
3496 However, it may not be bit-for-bit identical due to e.g. delta encoding
3506 However, it may not be bit-for-bit identical due to e.g. delta encoding
3497 differences.
3507 differences.
3498
3508
3499 The ``deltareuse`` argument control how deltas from the existing revlog
3509 The ``deltareuse`` argument control how deltas from the existing revlog
3500 are preserved in the destination revlog. The argument can have the
3510 are preserved in the destination revlog. The argument can have the
3501 following values:
3511 following values:
3502
3512
3503 DELTAREUSEALWAYS
3513 DELTAREUSEALWAYS
3504 Deltas will always be reused (if possible), even if the destination
3514 Deltas will always be reused (if possible), even if the destination
3505 revlog would not select the same revisions for the delta. This is the
3515 revlog would not select the same revisions for the delta. This is the
3506 fastest mode of operation.
3516 fastest mode of operation.
3507 DELTAREUSESAMEREVS
3517 DELTAREUSESAMEREVS
3508 Deltas will be reused if the destination revlog would pick the same
3518 Deltas will be reused if the destination revlog would pick the same
3509 revisions for the delta. This mode strikes a balance between speed
3519 revisions for the delta. This mode strikes a balance between speed
3510 and optimization.
3520 and optimization.
3511 DELTAREUSENEVER
3521 DELTAREUSENEVER
3512 Deltas will never be reused. This is the slowest mode of execution.
3522 Deltas will never be reused. This is the slowest mode of execution.
3513 This mode can be used to recompute deltas (e.g. if the diff/delta
3523 This mode can be used to recompute deltas (e.g. if the diff/delta
3514 algorithm changes).
3524 algorithm changes).
3515 DELTAREUSEFULLADD
3525 DELTAREUSEFULLADD
3516 Revision will be re-added as if their were new content. This is
3526 Revision will be re-added as if their were new content. This is
3517 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3527 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3518 eg: large file detection and handling.
3528 eg: large file detection and handling.
3519
3529
3520 Delta computation can be slow, so the choice of delta reuse policy can
3530 Delta computation can be slow, so the choice of delta reuse policy can
3521 significantly affect run time.
3531 significantly affect run time.
3522
3532
3523 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3533 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3524 two extremes. Deltas will be reused if they are appropriate. But if the
3534 two extremes. Deltas will be reused if they are appropriate. But if the
3525 delta could choose a better revision, it will do so. This means if you
3535 delta could choose a better revision, it will do so. This means if you
3526 are converting a non-generaldelta revlog to a generaldelta revlog,
3536 are converting a non-generaldelta revlog to a generaldelta revlog,
3527 deltas will be recomputed if the delta's parent isn't a parent of the
3537 deltas will be recomputed if the delta's parent isn't a parent of the
3528 revision.
3538 revision.
3529
3539
3530 In addition to the delta policy, the ``forcedeltabothparents``
3540 In addition to the delta policy, the ``forcedeltabothparents``
3531 argument controls whether to force compute deltas against both parents
3541 argument controls whether to force compute deltas against both parents
3532 for merges. By default, the current default is used.
3542 for merges. By default, the current default is used.
3533
3543
3534 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3544 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3535 `sidedata_helpers`.
3545 `sidedata_helpers`.
3536 """
3546 """
3537 if deltareuse not in self.DELTAREUSEALL:
3547 if deltareuse not in self.DELTAREUSEALL:
3538 raise ValueError(
3548 raise ValueError(
3539 _(b'value for deltareuse invalid: %s') % deltareuse
3549 _(b'value for deltareuse invalid: %s') % deltareuse
3540 )
3550 )
3541
3551
3542 if len(destrevlog):
3552 if len(destrevlog):
3543 raise ValueError(_(b'destination revlog is not empty'))
3553 raise ValueError(_(b'destination revlog is not empty'))
3544
3554
3545 if getattr(self, 'filteredrevs', None):
3555 if getattr(self, 'filteredrevs', None):
3546 raise ValueError(_(b'source revlog has filtered revisions'))
3556 raise ValueError(_(b'source revlog has filtered revisions'))
3547 if getattr(destrevlog, 'filteredrevs', None):
3557 if getattr(destrevlog, 'filteredrevs', None):
3548 raise ValueError(_(b'destination revlog has filtered revisions'))
3558 raise ValueError(_(b'destination revlog has filtered revisions'))
3549
3559
3550 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3560 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3551 # if possible.
3561 # if possible.
3552 old_delta_config = destrevlog.delta_config
3562 old_delta_config = destrevlog.delta_config
3553 destrevlog.delta_config = destrevlog.delta_config.copy()
3563 destrevlog.delta_config = destrevlog.delta_config.copy()
3554
3564
3555 try:
3565 try:
3556 if deltareuse == self.DELTAREUSEALWAYS:
3566 if deltareuse == self.DELTAREUSEALWAYS:
3557 destrevlog.delta_config.lazy_delta_base = True
3567 destrevlog.delta_config.lazy_delta_base = True
3558 destrevlog.delta_config.lazy_delta = True
3568 destrevlog.delta_config.lazy_delta = True
3559 elif deltareuse == self.DELTAREUSESAMEREVS:
3569 elif deltareuse == self.DELTAREUSESAMEREVS:
3560 destrevlog.delta_config.lazy_delta_base = False
3570 destrevlog.delta_config.lazy_delta_base = False
3561 destrevlog.delta_config.lazy_delta = True
3571 destrevlog.delta_config.lazy_delta = True
3562 elif deltareuse == self.DELTAREUSENEVER:
3572 elif deltareuse == self.DELTAREUSENEVER:
3563 destrevlog.delta_config.lazy_delta_base = False
3573 destrevlog.delta_config.lazy_delta_base = False
3564 destrevlog.delta_config.lazy_delta = False
3574 destrevlog.delta_config.lazy_delta = False
3565
3575
3566 delta_both_parents = (
3576 delta_both_parents = (
3567 forcedeltabothparents or old_delta_config.delta_both_parents
3577 forcedeltabothparents or old_delta_config.delta_both_parents
3568 )
3578 )
3569 destrevlog.delta_config.delta_both_parents = delta_both_parents
3579 destrevlog.delta_config.delta_both_parents = delta_both_parents
3570
3580
3571 with self.reading(), destrevlog._writing(tr):
3581 with self.reading(), destrevlog._writing(tr):
3572 self._clone(
3582 self._clone(
3573 tr,
3583 tr,
3574 destrevlog,
3584 destrevlog,
3575 addrevisioncb,
3585 addrevisioncb,
3576 deltareuse,
3586 deltareuse,
3577 forcedeltabothparents,
3587 forcedeltabothparents,
3578 sidedata_helpers,
3588 sidedata_helpers,
3579 )
3589 )
3580
3590
3581 finally:
3591 finally:
3582 destrevlog.delta_config = old_delta_config
3592 destrevlog.delta_config = old_delta_config
3583
3593
3584 def _clone(
3594 def _clone(
3585 self,
3595 self,
3586 tr,
3596 tr,
3587 destrevlog,
3597 destrevlog,
3588 addrevisioncb,
3598 addrevisioncb,
3589 deltareuse,
3599 deltareuse,
3590 forcedeltabothparents,
3600 forcedeltabothparents,
3591 sidedata_helpers,
3601 sidedata_helpers,
3592 ):
3602 ):
3593 """perform the core duty of `revlog.clone` after parameter processing"""
3603 """perform the core duty of `revlog.clone` after parameter processing"""
3594 write_debug = None
3604 write_debug = None
3595 if self.delta_config.debug_delta:
3605 if self.delta_config.debug_delta:
3596 write_debug = tr._report
3606 write_debug = tr._report
3597 deltacomputer = deltautil.deltacomputer(
3607 deltacomputer = deltautil.deltacomputer(
3598 destrevlog,
3608 destrevlog,
3599 write_debug=write_debug,
3609 write_debug=write_debug,
3600 )
3610 )
3601 index = self.index
3611 index = self.index
3602 for rev in self:
3612 for rev in self:
3603 entry = index[rev]
3613 entry = index[rev]
3604
3614
3605 # Some classes override linkrev to take filtered revs into
3615 # Some classes override linkrev to take filtered revs into
3606 # account. Use raw entry from index.
3616 # account. Use raw entry from index.
3607 flags = entry[0] & 0xFFFF
3617 flags = entry[0] & 0xFFFF
3608 linkrev = entry[4]
3618 linkrev = entry[4]
3609 p1 = index[entry[5]][7]
3619 p1 = index[entry[5]][7]
3610 p2 = index[entry[6]][7]
3620 p2 = index[entry[6]][7]
3611 node = entry[7]
3621 node = entry[7]
3612
3622
3613 # (Possibly) reuse the delta from the revlog if allowed and
3623 # (Possibly) reuse the delta from the revlog if allowed and
3614 # the revlog chunk is a delta.
3624 # the revlog chunk is a delta.
3615 cachedelta = None
3625 cachedelta = None
3616 rawtext = None
3626 rawtext = None
3617 if deltareuse == self.DELTAREUSEFULLADD:
3627 if deltareuse == self.DELTAREUSEFULLADD:
3618 text = self._revisiondata(rev)
3628 text = self._revisiondata(rev)
3619 sidedata = self.sidedata(rev)
3629 sidedata = self.sidedata(rev)
3620
3630
3621 if sidedata_helpers is not None:
3631 if sidedata_helpers is not None:
3622 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3632 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3623 self, sidedata_helpers, sidedata, rev
3633 self, sidedata_helpers, sidedata, rev
3624 )
3634 )
3625 flags = flags | new_flags[0] & ~new_flags[1]
3635 flags = flags | new_flags[0] & ~new_flags[1]
3626
3636
3627 destrevlog.addrevision(
3637 destrevlog.addrevision(
3628 text,
3638 text,
3629 tr,
3639 tr,
3630 linkrev,
3640 linkrev,
3631 p1,
3641 p1,
3632 p2,
3642 p2,
3633 cachedelta=cachedelta,
3643 cachedelta=cachedelta,
3634 node=node,
3644 node=node,
3635 flags=flags,
3645 flags=flags,
3636 deltacomputer=deltacomputer,
3646 deltacomputer=deltacomputer,
3637 sidedata=sidedata,
3647 sidedata=sidedata,
3638 )
3648 )
3639 else:
3649 else:
3640 if destrevlog.delta_config.lazy_delta:
3650 if destrevlog.delta_config.lazy_delta:
3641 dp = self.deltaparent(rev)
3651 dp = self.deltaparent(rev)
3642 if dp != nullrev:
3652 if dp != nullrev:
3643 cachedelta = (dp, bytes(self._chunk(rev)))
3653 cachedelta = (dp, bytes(self._chunk(rev)))
3644
3654
3645 sidedata = None
3655 sidedata = None
3646 if not cachedelta:
3656 if not cachedelta:
3647 try:
3657 try:
3648 rawtext = self._revisiondata(rev)
3658 rawtext = self._revisiondata(rev)
3649 except error.CensoredNodeError as censored:
3659 except error.CensoredNodeError as censored:
3650 assert flags & REVIDX_ISCENSORED
3660 assert flags & REVIDX_ISCENSORED
3651 rawtext = censored.tombstone
3661 rawtext = censored.tombstone
3652 sidedata = self.sidedata(rev)
3662 sidedata = self.sidedata(rev)
3653 if sidedata is None:
3663 if sidedata is None:
3654 sidedata = self.sidedata(rev)
3664 sidedata = self.sidedata(rev)
3655
3665
3656 if sidedata_helpers is not None:
3666 if sidedata_helpers is not None:
3657 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3667 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3658 self, sidedata_helpers, sidedata, rev
3668 self, sidedata_helpers, sidedata, rev
3659 )
3669 )
3660 flags = flags | new_flags[0] & ~new_flags[1]
3670 flags = flags | new_flags[0] & ~new_flags[1]
3661
3671
3662 destrevlog._addrevision(
3672 destrevlog._addrevision(
3663 node,
3673 node,
3664 rawtext,
3674 rawtext,
3665 tr,
3675 tr,
3666 linkrev,
3676 linkrev,
3667 p1,
3677 p1,
3668 p2,
3678 p2,
3669 flags,
3679 flags,
3670 cachedelta,
3680 cachedelta,
3671 deltacomputer=deltacomputer,
3681 deltacomputer=deltacomputer,
3672 sidedata=sidedata,
3682 sidedata=sidedata,
3673 )
3683 )
3674
3684
3675 if addrevisioncb:
3685 if addrevisioncb:
3676 addrevisioncb(self, rev, node)
3686 addrevisioncb(self, rev, node)
3677
3687
3678 def censorrevision(self, tr, censornode, tombstone=b''):
3688 def censorrevision(self, tr, censornode, tombstone=b''):
3679 if self._format_version == REVLOGV0:
3689 if self._format_version == REVLOGV0:
3680 raise error.RevlogError(
3690 raise error.RevlogError(
3681 _(b'cannot censor with version %d revlogs')
3691 _(b'cannot censor with version %d revlogs')
3682 % self._format_version
3692 % self._format_version
3683 )
3693 )
3684 elif self._format_version == REVLOGV1:
3694 elif self._format_version == REVLOGV1:
3685 rewrite.v1_censor(self, tr, censornode, tombstone)
3695 rewrite.v1_censor(self, tr, censornode, tombstone)
3686 else:
3696 else:
3687 rewrite.v2_censor(self, tr, censornode, tombstone)
3697 rewrite.v2_censor(self, tr, censornode, tombstone)
3688
3698
3689 def verifyintegrity(self, state):
3699 def verifyintegrity(self, state):
3690 """Verifies the integrity of the revlog.
3700 """Verifies the integrity of the revlog.
3691
3701
3692 Yields ``revlogproblem`` instances describing problems that are
3702 Yields ``revlogproblem`` instances describing problems that are
3693 found.
3703 found.
3694 """
3704 """
3695 dd, di = self.checksize()
3705 dd, di = self.checksize()
3696 if dd:
3706 if dd:
3697 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3707 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3698 if di:
3708 if di:
3699 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3709 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3700
3710
3701 version = self._format_version
3711 version = self._format_version
3702
3712
3703 # The verifier tells us what version revlog we should be.
3713 # The verifier tells us what version revlog we should be.
3704 if version != state[b'expectedversion']:
3714 if version != state[b'expectedversion']:
3705 yield revlogproblem(
3715 yield revlogproblem(
3706 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3716 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3707 % (self.display_id, version, state[b'expectedversion'])
3717 % (self.display_id, version, state[b'expectedversion'])
3708 )
3718 )
3709
3719
3710 state[b'skipread'] = set()
3720 state[b'skipread'] = set()
3711 state[b'safe_renamed'] = set()
3721 state[b'safe_renamed'] = set()
3712
3722
3713 for rev in self:
3723 for rev in self:
3714 node = self.node(rev)
3724 node = self.node(rev)
3715
3725
3716 # Verify contents. 4 cases to care about:
3726 # Verify contents. 4 cases to care about:
3717 #
3727 #
3718 # common: the most common case
3728 # common: the most common case
3719 # rename: with a rename
3729 # rename: with a rename
3720 # meta: file content starts with b'\1\n', the metadata
3730 # meta: file content starts with b'\1\n', the metadata
3721 # header defined in filelog.py, but without a rename
3731 # header defined in filelog.py, but without a rename
3722 # ext: content stored externally
3732 # ext: content stored externally
3723 #
3733 #
3724 # More formally, their differences are shown below:
3734 # More formally, their differences are shown below:
3725 #
3735 #
3726 # | common | rename | meta | ext
3736 # | common | rename | meta | ext
3727 # -------------------------------------------------------
3737 # -------------------------------------------------------
3728 # flags() | 0 | 0 | 0 | not 0
3738 # flags() | 0 | 0 | 0 | not 0
3729 # renamed() | False | True | False | ?
3739 # renamed() | False | True | False | ?
3730 # rawtext[0:2]=='\1\n'| False | True | True | ?
3740 # rawtext[0:2]=='\1\n'| False | True | True | ?
3731 #
3741 #
3732 # "rawtext" means the raw text stored in revlog data, which
3742 # "rawtext" means the raw text stored in revlog data, which
3733 # could be retrieved by "rawdata(rev)". "text"
3743 # could be retrieved by "rawdata(rev)". "text"
3734 # mentioned below is "revision(rev)".
3744 # mentioned below is "revision(rev)".
3735 #
3745 #
3736 # There are 3 different lengths stored physically:
3746 # There are 3 different lengths stored physically:
3737 # 1. L1: rawsize, stored in revlog index
3747 # 1. L1: rawsize, stored in revlog index
3738 # 2. L2: len(rawtext), stored in revlog data
3748 # 2. L2: len(rawtext), stored in revlog data
3739 # 3. L3: len(text), stored in revlog data if flags==0, or
3749 # 3. L3: len(text), stored in revlog data if flags==0, or
3740 # possibly somewhere else if flags!=0
3750 # possibly somewhere else if flags!=0
3741 #
3751 #
3742 # L1 should be equal to L2. L3 could be different from them.
3752 # L1 should be equal to L2. L3 could be different from them.
3743 # "text" may or may not affect commit hash depending on flag
3753 # "text" may or may not affect commit hash depending on flag
3744 # processors (see flagutil.addflagprocessor).
3754 # processors (see flagutil.addflagprocessor).
3745 #
3755 #
3746 # | common | rename | meta | ext
3756 # | common | rename | meta | ext
3747 # -------------------------------------------------
3757 # -------------------------------------------------
3748 # rawsize() | L1 | L1 | L1 | L1
3758 # rawsize() | L1 | L1 | L1 | L1
3749 # size() | L1 | L2-LM | L1(*) | L1 (?)
3759 # size() | L1 | L2-LM | L1(*) | L1 (?)
3750 # len(rawtext) | L2 | L2 | L2 | L2
3760 # len(rawtext) | L2 | L2 | L2 | L2
3751 # len(text) | L2 | L2 | L2 | L3
3761 # len(text) | L2 | L2 | L2 | L3
3752 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3762 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3753 #
3763 #
3754 # LM: length of metadata, depending on rawtext
3764 # LM: length of metadata, depending on rawtext
3755 # (*): not ideal, see comment in filelog.size
3765 # (*): not ideal, see comment in filelog.size
3756 # (?): could be "- len(meta)" if the resolved content has
3766 # (?): could be "- len(meta)" if the resolved content has
3757 # rename metadata
3767 # rename metadata
3758 #
3768 #
3759 # Checks needed to be done:
3769 # Checks needed to be done:
3760 # 1. length check: L1 == L2, in all cases.
3770 # 1. length check: L1 == L2, in all cases.
3761 # 2. hash check: depending on flag processor, we may need to
3771 # 2. hash check: depending on flag processor, we may need to
3762 # use either "text" (external), or "rawtext" (in revlog).
3772 # use either "text" (external), or "rawtext" (in revlog).
3763
3773
3764 try:
3774 try:
3765 skipflags = state.get(b'skipflags', 0)
3775 skipflags = state.get(b'skipflags', 0)
3766 if skipflags:
3776 if skipflags:
3767 skipflags &= self.flags(rev)
3777 skipflags &= self.flags(rev)
3768
3778
3769 _verify_revision(self, skipflags, state, node)
3779 _verify_revision(self, skipflags, state, node)
3770
3780
3771 l1 = self.rawsize(rev)
3781 l1 = self.rawsize(rev)
3772 l2 = len(self.rawdata(node))
3782 l2 = len(self.rawdata(node))
3773
3783
3774 if l1 != l2:
3784 if l1 != l2:
3775 yield revlogproblem(
3785 yield revlogproblem(
3776 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3786 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3777 node=node,
3787 node=node,
3778 )
3788 )
3779
3789
3780 except error.CensoredNodeError:
3790 except error.CensoredNodeError:
3781 if state[b'erroroncensored']:
3791 if state[b'erroroncensored']:
3782 yield revlogproblem(
3792 yield revlogproblem(
3783 error=_(b'censored file data'), node=node
3793 error=_(b'censored file data'), node=node
3784 )
3794 )
3785 state[b'skipread'].add(node)
3795 state[b'skipread'].add(node)
3786 except Exception as e:
3796 except Exception as e:
3787 yield revlogproblem(
3797 yield revlogproblem(
3788 error=_(b'unpacking %s: %s')
3798 error=_(b'unpacking %s: %s')
3789 % (short(node), stringutil.forcebytestr(e)),
3799 % (short(node), stringutil.forcebytestr(e)),
3790 node=node,
3800 node=node,
3791 )
3801 )
3792 state[b'skipread'].add(node)
3802 state[b'skipread'].add(node)
3793
3803
3794 def storageinfo(
3804 def storageinfo(
3795 self,
3805 self,
3796 exclusivefiles=False,
3806 exclusivefiles=False,
3797 sharedfiles=False,
3807 sharedfiles=False,
3798 revisionscount=False,
3808 revisionscount=False,
3799 trackedsize=False,
3809 trackedsize=False,
3800 storedsize=False,
3810 storedsize=False,
3801 ):
3811 ):
3802 d = {}
3812 d = {}
3803
3813
3804 if exclusivefiles:
3814 if exclusivefiles:
3805 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3815 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3806 if not self._inline:
3816 if not self._inline:
3807 d[b'exclusivefiles'].append((self.opener, self._datafile))
3817 d[b'exclusivefiles'].append((self.opener, self._datafile))
3808
3818
3809 if sharedfiles:
3819 if sharedfiles:
3810 d[b'sharedfiles'] = []
3820 d[b'sharedfiles'] = []
3811
3821
3812 if revisionscount:
3822 if revisionscount:
3813 d[b'revisionscount'] = len(self)
3823 d[b'revisionscount'] = len(self)
3814
3824
3815 if trackedsize:
3825 if trackedsize:
3816 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3826 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3817
3827
3818 if storedsize:
3828 if storedsize:
3819 d[b'storedsize'] = sum(
3829 d[b'storedsize'] = sum(
3820 self.opener.stat(path).st_size for path in self.files()
3830 self.opener.stat(path).st_size for path in self.files()
3821 )
3831 )
3822
3832
3823 return d
3833 return d
3824
3834
3825 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3835 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3826 if not self.feature_config.has_side_data:
3836 if not self.feature_config.has_side_data:
3827 return
3837 return
3828 # revlog formats with sidedata support does not support inline
3838 # revlog formats with sidedata support does not support inline
3829 assert not self._inline
3839 assert not self._inline
3830 if not helpers[1] and not helpers[2]:
3840 if not helpers[1] and not helpers[2]:
3831 # Nothing to generate or remove
3841 # Nothing to generate or remove
3832 return
3842 return
3833
3843
3834 new_entries = []
3844 new_entries = []
3835 # append the new sidedata
3845 # append the new sidedata
3836 with self._writing(transaction):
3846 with self._writing(transaction):
3837 ifh, dfh, sdfh = self._inner._writinghandles
3847 ifh, dfh, sdfh = self._inner._writinghandles
3838 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3848 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3839
3849
3840 current_offset = sdfh.tell()
3850 current_offset = sdfh.tell()
3841 for rev in range(startrev, endrev + 1):
3851 for rev in range(startrev, endrev + 1):
3842 entry = self.index[rev]
3852 entry = self.index[rev]
3843 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3853 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3844 store=self,
3854 store=self,
3845 sidedata_helpers=helpers,
3855 sidedata_helpers=helpers,
3846 sidedata={},
3856 sidedata={},
3847 rev=rev,
3857 rev=rev,
3848 )
3858 )
3849
3859
3850 serialized_sidedata = sidedatautil.serialize_sidedata(
3860 serialized_sidedata = sidedatautil.serialize_sidedata(
3851 new_sidedata
3861 new_sidedata
3852 )
3862 )
3853
3863
3854 sidedata_compression_mode = COMP_MODE_INLINE
3864 sidedata_compression_mode = COMP_MODE_INLINE
3855 if serialized_sidedata and self.feature_config.has_side_data:
3865 if serialized_sidedata and self.feature_config.has_side_data:
3856 sidedata_compression_mode = COMP_MODE_PLAIN
3866 sidedata_compression_mode = COMP_MODE_PLAIN
3857 h, comp_sidedata = self.compress(serialized_sidedata)
3867 h, comp_sidedata = self.compress(serialized_sidedata)
3858 if (
3868 if (
3859 h != b'u'
3869 h != b'u'
3860 and comp_sidedata[0] != b'\0'
3870 and comp_sidedata[0] != b'\0'
3861 and len(comp_sidedata) < len(serialized_sidedata)
3871 and len(comp_sidedata) < len(serialized_sidedata)
3862 ):
3872 ):
3863 assert not h
3873 assert not h
3864 if (
3874 if (
3865 comp_sidedata[0]
3875 comp_sidedata[0]
3866 == self._docket.default_compression_header
3876 == self._docket.default_compression_header
3867 ):
3877 ):
3868 sidedata_compression_mode = COMP_MODE_DEFAULT
3878 sidedata_compression_mode = COMP_MODE_DEFAULT
3869 serialized_sidedata = comp_sidedata
3879 serialized_sidedata = comp_sidedata
3870 else:
3880 else:
3871 sidedata_compression_mode = COMP_MODE_INLINE
3881 sidedata_compression_mode = COMP_MODE_INLINE
3872 serialized_sidedata = comp_sidedata
3882 serialized_sidedata = comp_sidedata
3873 if entry[8] != 0 or entry[9] != 0:
3883 if entry[8] != 0 or entry[9] != 0:
3874 # rewriting entries that already have sidedata is not
3884 # rewriting entries that already have sidedata is not
3875 # supported yet, because it introduces garbage data in the
3885 # supported yet, because it introduces garbage data in the
3876 # revlog.
3886 # revlog.
3877 msg = b"rewriting existing sidedata is not supported yet"
3887 msg = b"rewriting existing sidedata is not supported yet"
3878 raise error.Abort(msg)
3888 raise error.Abort(msg)
3879
3889
3880 # Apply (potential) flags to add and to remove after running
3890 # Apply (potential) flags to add and to remove after running
3881 # the sidedata helpers
3891 # the sidedata helpers
3882 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3892 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3883 entry_update = (
3893 entry_update = (
3884 current_offset,
3894 current_offset,
3885 len(serialized_sidedata),
3895 len(serialized_sidedata),
3886 new_offset_flags,
3896 new_offset_flags,
3887 sidedata_compression_mode,
3897 sidedata_compression_mode,
3888 )
3898 )
3889
3899
3890 # the sidedata computation might have move the file cursors around
3900 # the sidedata computation might have move the file cursors around
3891 sdfh.seek(current_offset, os.SEEK_SET)
3901 sdfh.seek(current_offset, os.SEEK_SET)
3892 sdfh.write(serialized_sidedata)
3902 sdfh.write(serialized_sidedata)
3893 new_entries.append(entry_update)
3903 new_entries.append(entry_update)
3894 current_offset += len(serialized_sidedata)
3904 current_offset += len(serialized_sidedata)
3895 self._docket.sidedata_end = sdfh.tell()
3905 self._docket.sidedata_end = sdfh.tell()
3896
3906
3897 # rewrite the new index entries
3907 # rewrite the new index entries
3898 ifh.seek(startrev * self.index.entry_size)
3908 ifh.seek(startrev * self.index.entry_size)
3899 for i, e in enumerate(new_entries):
3909 for i, e in enumerate(new_entries):
3900 rev = startrev + i
3910 rev = startrev + i
3901 self.index.replace_sidedata_info(rev, *e)
3911 self.index.replace_sidedata_info(rev, *e)
3902 packed = self.index.entry_binary(rev)
3912 packed = self.index.entry_binary(rev)
3903 if rev == 0 and self._docket is None:
3913 if rev == 0 and self._docket is None:
3904 header = self._format_flags | self._format_version
3914 header = self._format_flags | self._format_version
3905 header = self.index.pack_header(header)
3915 header = self.index.pack_header(header)
3906 packed = header + packed
3916 packed = header + packed
3907 ifh.write(packed)
3917 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now