##// END OF EJS Templates
inline-changelog: fix pending transaction visibility when splitting...
marmoute -
r52531:1721d983 stable
parent child Browse files
Show More
@@ -1,4081 +1,4088
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import functools
19 import functools
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import weakref
23 import weakref
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullrev,
30 nullrev,
31 sha1nodeconstants,
31 sha1nodeconstants,
32 short,
32 short,
33 wdirrev,
33 wdirrev,
34 )
34 )
35 from .i18n import _
35 from .i18n import _
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 CHANGELOGV2,
38 CHANGELOGV2,
39 COMP_MODE_DEFAULT,
39 COMP_MODE_DEFAULT,
40 COMP_MODE_INLINE,
40 COMP_MODE_INLINE,
41 COMP_MODE_PLAIN,
41 COMP_MODE_PLAIN,
42 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_NO,
43 DELTA_BASE_REUSE_TRY,
43 DELTA_BASE_REUSE_TRY,
44 ENTRY_RANK,
44 ENTRY_RANK,
45 FEATURES_BY_VERSION,
45 FEATURES_BY_VERSION,
46 FLAG_GENERALDELTA,
46 FLAG_GENERALDELTA,
47 FLAG_INLINE_DATA,
47 FLAG_INLINE_DATA,
48 INDEX_HEADER,
48 INDEX_HEADER,
49 KIND_CHANGELOG,
49 KIND_CHANGELOG,
50 KIND_FILELOG,
50 KIND_FILELOG,
51 RANK_UNKNOWN,
51 RANK_UNKNOWN,
52 REVLOGV0,
52 REVLOGV0,
53 REVLOGV1,
53 REVLOGV1,
54 REVLOGV1_FLAGS,
54 REVLOGV1_FLAGS,
55 REVLOGV2,
55 REVLOGV2,
56 REVLOGV2_FLAGS,
56 REVLOGV2_FLAGS,
57 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FLAGS,
58 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_FORMAT,
59 REVLOG_DEFAULT_VERSION,
59 REVLOG_DEFAULT_VERSION,
60 SUPPORTED_FLAGS,
60 SUPPORTED_FLAGS,
61 )
61 )
62 from .revlogutils.flagutil import (
62 from .revlogutils.flagutil import (
63 REVIDX_DEFAULT_FLAGS,
63 REVIDX_DEFAULT_FLAGS,
64 REVIDX_ELLIPSIS,
64 REVIDX_ELLIPSIS,
65 REVIDX_EXTSTORED,
65 REVIDX_EXTSTORED,
66 REVIDX_FLAGS_ORDER,
66 REVIDX_FLAGS_ORDER,
67 REVIDX_HASCOPIESINFO,
67 REVIDX_HASCOPIESINFO,
68 REVIDX_ISCENSORED,
68 REVIDX_ISCENSORED,
69 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 REVIDX_RAWTEXT_CHANGING_FLAGS,
70 )
70 )
71 from .thirdparty import attr
71 from .thirdparty import attr
72 from . import (
72 from . import (
73 ancestor,
73 ancestor,
74 dagop,
74 dagop,
75 error,
75 error,
76 mdiff,
76 mdiff,
77 policy,
77 policy,
78 pycompat,
78 pycompat,
79 revlogutils,
79 revlogutils,
80 templatefilters,
80 templatefilters,
81 util,
81 util,
82 )
82 )
83 from .interfaces import (
83 from .interfaces import (
84 repository,
84 repository,
85 util as interfaceutil,
85 util as interfaceutil,
86 )
86 )
87 from .revlogutils import (
87 from .revlogutils import (
88 deltas as deltautil,
88 deltas as deltautil,
89 docket as docketutil,
89 docket as docketutil,
90 flagutil,
90 flagutil,
91 nodemap as nodemaputil,
91 nodemap as nodemaputil,
92 randomaccessfile,
92 randomaccessfile,
93 revlogv0,
93 revlogv0,
94 rewrite,
94 rewrite,
95 sidedata as sidedatautil,
95 sidedata as sidedatautil,
96 )
96 )
97 from .utils import (
97 from .utils import (
98 storageutil,
98 storageutil,
99 stringutil,
99 stringutil,
100 )
100 )
101
101
102 # blanked usage of all the name to prevent pyflakes constraints
102 # blanked usage of all the name to prevent pyflakes constraints
103 # We need these name available in the module for extensions.
103 # We need these name available in the module for extensions.
104
104
105 REVLOGV0
105 REVLOGV0
106 REVLOGV1
106 REVLOGV1
107 REVLOGV2
107 REVLOGV2
108 CHANGELOGV2
108 CHANGELOGV2
109 FLAG_INLINE_DATA
109 FLAG_INLINE_DATA
110 FLAG_GENERALDELTA
110 FLAG_GENERALDELTA
111 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FLAGS
112 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_FORMAT
113 REVLOG_DEFAULT_VERSION
113 REVLOG_DEFAULT_VERSION
114 REVLOGV1_FLAGS
114 REVLOGV1_FLAGS
115 REVLOGV2_FLAGS
115 REVLOGV2_FLAGS
116 REVIDX_ISCENSORED
116 REVIDX_ISCENSORED
117 REVIDX_ELLIPSIS
117 REVIDX_ELLIPSIS
118 REVIDX_HASCOPIESINFO
118 REVIDX_HASCOPIESINFO
119 REVIDX_EXTSTORED
119 REVIDX_EXTSTORED
120 REVIDX_DEFAULT_FLAGS
120 REVIDX_DEFAULT_FLAGS
121 REVIDX_FLAGS_ORDER
121 REVIDX_FLAGS_ORDER
122 REVIDX_RAWTEXT_CHANGING_FLAGS
122 REVIDX_RAWTEXT_CHANGING_FLAGS
123
123
124 parsers = policy.importmod('parsers')
124 parsers = policy.importmod('parsers')
125 rustancestor = policy.importrust('ancestor')
125 rustancestor = policy.importrust('ancestor')
126 rustdagop = policy.importrust('dagop')
126 rustdagop = policy.importrust('dagop')
127 rustrevlog = policy.importrust('revlog')
127 rustrevlog = policy.importrust('revlog')
128
128
129 # Aliased for performance.
129 # Aliased for performance.
130 _zlibdecompress = zlib.decompress
130 _zlibdecompress = zlib.decompress
131
131
132 # max size of inline data embedded into a revlog
132 # max size of inline data embedded into a revlog
133 _maxinline = 131072
133 _maxinline = 131072
134
134
135
135
136 # Flag processors for REVIDX_ELLIPSIS.
136 # Flag processors for REVIDX_ELLIPSIS.
137 def ellipsisreadprocessor(rl, text):
137 def ellipsisreadprocessor(rl, text):
138 return text, False
138 return text, False
139
139
140
140
141 def ellipsiswriteprocessor(rl, text):
141 def ellipsiswriteprocessor(rl, text):
142 return text, False
142 return text, False
143
143
144
144
145 def ellipsisrawprocessor(rl, text):
145 def ellipsisrawprocessor(rl, text):
146 return False
146 return False
147
147
148
148
149 ellipsisprocessor = (
149 ellipsisprocessor = (
150 ellipsisreadprocessor,
150 ellipsisreadprocessor,
151 ellipsiswriteprocessor,
151 ellipsiswriteprocessor,
152 ellipsisrawprocessor,
152 ellipsisrawprocessor,
153 )
153 )
154
154
155
155
156 def _verify_revision(rl, skipflags, state, node):
156 def _verify_revision(rl, skipflags, state, node):
157 """Verify the integrity of the given revlog ``node`` while providing a hook
157 """Verify the integrity of the given revlog ``node`` while providing a hook
158 point for extensions to influence the operation."""
158 point for extensions to influence the operation."""
159 if skipflags:
159 if skipflags:
160 state[b'skipread'].add(node)
160 state[b'skipread'].add(node)
161 else:
161 else:
162 # Side-effect: read content and verify hash.
162 # Side-effect: read content and verify hash.
163 rl.revision(node)
163 rl.revision(node)
164
164
165
165
166 # True if a fast implementation for persistent-nodemap is available
166 # True if a fast implementation for persistent-nodemap is available
167 #
167 #
168 # We also consider we have a "fast" implementation in "pure" python because
168 # We also consider we have a "fast" implementation in "pure" python because
169 # people using pure don't really have performance consideration (and a
169 # people using pure don't really have performance consideration (and a
170 # wheelbarrow of other slowness source)
170 # wheelbarrow of other slowness source)
171 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
171 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
172 parsers, 'BaseIndexObject'
172 parsers, 'BaseIndexObject'
173 )
173 )
174
174
175
175
176 @interfaceutil.implementer(repository.irevisiondelta)
176 @interfaceutil.implementer(repository.irevisiondelta)
177 @attr.s(slots=True)
177 @attr.s(slots=True)
178 class revlogrevisiondelta:
178 class revlogrevisiondelta:
179 node = attr.ib()
179 node = attr.ib()
180 p1node = attr.ib()
180 p1node = attr.ib()
181 p2node = attr.ib()
181 p2node = attr.ib()
182 basenode = attr.ib()
182 basenode = attr.ib()
183 flags = attr.ib()
183 flags = attr.ib()
184 baserevisionsize = attr.ib()
184 baserevisionsize = attr.ib()
185 revision = attr.ib()
185 revision = attr.ib()
186 delta = attr.ib()
186 delta = attr.ib()
187 sidedata = attr.ib()
187 sidedata = attr.ib()
188 protocol_flags = attr.ib()
188 protocol_flags = attr.ib()
189 linknode = attr.ib(default=None)
189 linknode = attr.ib(default=None)
190
190
191
191
192 @interfaceutil.implementer(repository.iverifyproblem)
192 @interfaceutil.implementer(repository.iverifyproblem)
193 @attr.s(frozen=True)
193 @attr.s(frozen=True)
194 class revlogproblem:
194 class revlogproblem:
195 warning = attr.ib(default=None)
195 warning = attr.ib(default=None)
196 error = attr.ib(default=None)
196 error = attr.ib(default=None)
197 node = attr.ib(default=None)
197 node = attr.ib(default=None)
198
198
199
199
200 def parse_index_v1(data, inline):
200 def parse_index_v1(data, inline):
201 # call the C implementation to parse the index data
201 # call the C implementation to parse the index data
202 index, cache = parsers.parse_index2(data, inline)
202 index, cache = parsers.parse_index2(data, inline)
203 return index, cache
203 return index, cache
204
204
205
205
206 def parse_index_v2(data, inline):
206 def parse_index_v2(data, inline):
207 # call the C implementation to parse the index data
207 # call the C implementation to parse the index data
208 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
208 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
209 return index, cache
209 return index, cache
210
210
211
211
212 def parse_index_cl_v2(data, inline):
212 def parse_index_cl_v2(data, inline):
213 # call the C implementation to parse the index data
213 # call the C implementation to parse the index data
214 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
214 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
215 return index, cache
215 return index, cache
216
216
217
217
218 if hasattr(parsers, 'parse_index_devel_nodemap'):
218 if hasattr(parsers, 'parse_index_devel_nodemap'):
219
219
220 def parse_index_v1_nodemap(data, inline):
220 def parse_index_v1_nodemap(data, inline):
221 index, cache = parsers.parse_index_devel_nodemap(data, inline)
221 index, cache = parsers.parse_index_devel_nodemap(data, inline)
222 return index, cache
222 return index, cache
223
223
224
224
225 else:
225 else:
226 parse_index_v1_nodemap = None
226 parse_index_v1_nodemap = None
227
227
228
228
229 def parse_index_v1_rust(data, inline, default_header):
229 def parse_index_v1_rust(data, inline, default_header):
230 cache = (0, data) if inline else None
230 cache = (0, data) if inline else None
231 return rustrevlog.Index(data, default_header), cache
231 return rustrevlog.Index(data, default_header), cache
232
232
233
233
234 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
234 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
235 # signed integer)
235 # signed integer)
236 _maxentrysize = 0x7FFFFFFF
236 _maxentrysize = 0x7FFFFFFF
237
237
238 FILE_TOO_SHORT_MSG = _(
238 FILE_TOO_SHORT_MSG = _(
239 b'cannot read from revlog %s;'
239 b'cannot read from revlog %s;'
240 b' expected %d bytes from offset %d, data size is %d'
240 b' expected %d bytes from offset %d, data size is %d'
241 )
241 )
242
242
243 hexdigits = b'0123456789abcdefABCDEF'
243 hexdigits = b'0123456789abcdefABCDEF'
244
244
245
245
246 class _Config:
246 class _Config:
247 def copy(self):
247 def copy(self):
248 return self.__class__(**self.__dict__)
248 return self.__class__(**self.__dict__)
249
249
250
250
251 @attr.s()
251 @attr.s()
252 class FeatureConfig(_Config):
252 class FeatureConfig(_Config):
253 """Hold configuration values about the available revlog features"""
253 """Hold configuration values about the available revlog features"""
254
254
255 # the default compression engine
255 # the default compression engine
256 compression_engine = attr.ib(default=b'zlib')
256 compression_engine = attr.ib(default=b'zlib')
257 # compression engines options
257 # compression engines options
258 compression_engine_options = attr.ib(default=attr.Factory(dict))
258 compression_engine_options = attr.ib(default=attr.Factory(dict))
259
259
260 # can we use censor on this revlog
260 # can we use censor on this revlog
261 censorable = attr.ib(default=False)
261 censorable = attr.ib(default=False)
262 # does this revlog use the "side data" feature
262 # does this revlog use the "side data" feature
263 has_side_data = attr.ib(default=False)
263 has_side_data = attr.ib(default=False)
264 # might remove rank configuration once the computation has no impact
264 # might remove rank configuration once the computation has no impact
265 compute_rank = attr.ib(default=False)
265 compute_rank = attr.ib(default=False)
266 # parent order is supposed to be semantically irrelevant, so we
266 # parent order is supposed to be semantically irrelevant, so we
267 # normally resort parents to ensure that the first parent is non-null,
267 # normally resort parents to ensure that the first parent is non-null,
268 # if there is a non-null parent at all.
268 # if there is a non-null parent at all.
269 # filelog abuses the parent order as flag to mark some instances of
269 # filelog abuses the parent order as flag to mark some instances of
270 # meta-encoded files, so allow it to disable this behavior.
270 # meta-encoded files, so allow it to disable this behavior.
271 canonical_parent_order = attr.ib(default=False)
271 canonical_parent_order = attr.ib(default=False)
272 # can ellipsis commit be used
272 # can ellipsis commit be used
273 enable_ellipsis = attr.ib(default=False)
273 enable_ellipsis = attr.ib(default=False)
274
274
275 def copy(self):
275 def copy(self):
276 new = super().copy()
276 new = super().copy()
277 new.compression_engine_options = self.compression_engine_options.copy()
277 new.compression_engine_options = self.compression_engine_options.copy()
278 return new
278 return new
279
279
280
280
281 @attr.s()
281 @attr.s()
282 class DataConfig(_Config):
282 class DataConfig(_Config):
283 """Hold configuration value about how the revlog data are read"""
283 """Hold configuration value about how the revlog data are read"""
284
284
285 # should we try to open the "pending" version of the revlog
285 # should we try to open the "pending" version of the revlog
286 try_pending = attr.ib(default=False)
286 try_pending = attr.ib(default=False)
287 # should we try to open the "splitted" version of the revlog
287 # should we try to open the "splitted" version of the revlog
288 try_split = attr.ib(default=False)
288 try_split = attr.ib(default=False)
289 # When True, indexfile should be opened with checkambig=True at writing,
289 # When True, indexfile should be opened with checkambig=True at writing,
290 # to avoid file stat ambiguity.
290 # to avoid file stat ambiguity.
291 check_ambig = attr.ib(default=False)
291 check_ambig = attr.ib(default=False)
292
292
293 # If true, use mmap instead of reading to deal with large index
293 # If true, use mmap instead of reading to deal with large index
294 mmap_large_index = attr.ib(default=False)
294 mmap_large_index = attr.ib(default=False)
295 # how much data is large
295 # how much data is large
296 mmap_index_threshold = attr.ib(default=None)
296 mmap_index_threshold = attr.ib(default=None)
297 # How much data to read and cache into the raw revlog data cache.
297 # How much data to read and cache into the raw revlog data cache.
298 chunk_cache_size = attr.ib(default=65536)
298 chunk_cache_size = attr.ib(default=65536)
299
299
300 # The size of the uncompressed cache compared to the largest revision seen.
300 # The size of the uncompressed cache compared to the largest revision seen.
301 uncompressed_cache_factor = attr.ib(default=None)
301 uncompressed_cache_factor = attr.ib(default=None)
302
302
303 # The number of chunk cached
303 # The number of chunk cached
304 uncompressed_cache_count = attr.ib(default=None)
304 uncompressed_cache_count = attr.ib(default=None)
305
305
306 # Allow sparse reading of the revlog data
306 # Allow sparse reading of the revlog data
307 with_sparse_read = attr.ib(default=False)
307 with_sparse_read = attr.ib(default=False)
308 # minimal density of a sparse read chunk
308 # minimal density of a sparse read chunk
309 sr_density_threshold = attr.ib(default=0.50)
309 sr_density_threshold = attr.ib(default=0.50)
310 # minimal size of data we skip when performing sparse read
310 # minimal size of data we skip when performing sparse read
311 sr_min_gap_size = attr.ib(default=262144)
311 sr_min_gap_size = attr.ib(default=262144)
312
312
313 # are delta encoded against arbitrary bases.
313 # are delta encoded against arbitrary bases.
314 generaldelta = attr.ib(default=False)
314 generaldelta = attr.ib(default=False)
315
315
316
316
317 @attr.s()
317 @attr.s()
318 class DeltaConfig(_Config):
318 class DeltaConfig(_Config):
319 """Hold configuration value about how new delta are computed
319 """Hold configuration value about how new delta are computed
320
320
321 Some attributes are duplicated from DataConfig to help havign each object
321 Some attributes are duplicated from DataConfig to help havign each object
322 self contained.
322 self contained.
323 """
323 """
324
324
325 # can delta be encoded against arbitrary bases.
325 # can delta be encoded against arbitrary bases.
326 general_delta = attr.ib(default=False)
326 general_delta = attr.ib(default=False)
327 # Allow sparse writing of the revlog data
327 # Allow sparse writing of the revlog data
328 sparse_revlog = attr.ib(default=False)
328 sparse_revlog = attr.ib(default=False)
329 # maximum length of a delta chain
329 # maximum length of a delta chain
330 max_chain_len = attr.ib(default=None)
330 max_chain_len = attr.ib(default=None)
331 # Maximum distance between delta chain base start and end
331 # Maximum distance between delta chain base start and end
332 max_deltachain_span = attr.ib(default=-1)
332 max_deltachain_span = attr.ib(default=-1)
333 # If `upper_bound_comp` is not None, this is the expected maximal gain from
333 # If `upper_bound_comp` is not None, this is the expected maximal gain from
334 # compression for the data content.
334 # compression for the data content.
335 upper_bound_comp = attr.ib(default=None)
335 upper_bound_comp = attr.ib(default=None)
336 # Should we try a delta against both parent
336 # Should we try a delta against both parent
337 delta_both_parents = attr.ib(default=True)
337 delta_both_parents = attr.ib(default=True)
338 # Test delta base candidate group by chunk of this maximal size.
338 # Test delta base candidate group by chunk of this maximal size.
339 candidate_group_chunk_size = attr.ib(default=0)
339 candidate_group_chunk_size = attr.ib(default=0)
340 # Should we display debug information about delta computation
340 # Should we display debug information about delta computation
341 debug_delta = attr.ib(default=False)
341 debug_delta = attr.ib(default=False)
342 # trust incoming delta by default
342 # trust incoming delta by default
343 lazy_delta = attr.ib(default=True)
343 lazy_delta = attr.ib(default=True)
344 # trust the base of incoming delta by default
344 # trust the base of incoming delta by default
345 lazy_delta_base = attr.ib(default=False)
345 lazy_delta_base = attr.ib(default=False)
346
346
347
347
348 class _InnerRevlog:
348 class _InnerRevlog:
349 """An inner layer of the revlog object
349 """An inner layer of the revlog object
350
350
351 That layer exist to be able to delegate some operation to Rust, its
351 That layer exist to be able to delegate some operation to Rust, its
352 boundaries are arbitrary and based on what we can delegate to Rust.
352 boundaries are arbitrary and based on what we can delegate to Rust.
353 """
353 """
354
354
355 def __init__(
355 def __init__(
356 self,
356 self,
357 opener,
357 opener,
358 index,
358 index,
359 index_file,
359 index_file,
360 data_file,
360 data_file,
361 sidedata_file,
361 sidedata_file,
362 inline,
362 inline,
363 data_config,
363 data_config,
364 delta_config,
364 delta_config,
365 feature_config,
365 feature_config,
366 chunk_cache,
366 chunk_cache,
367 default_compression_header,
367 default_compression_header,
368 ):
368 ):
369 self.opener = opener
369 self.opener = opener
370 self.index = index
370 self.index = index
371
371
372 self.index_file = index_file
372 self.index_file = index_file
373 self.data_file = data_file
373 self.data_file = data_file
374 self.sidedata_file = sidedata_file
374 self.sidedata_file = sidedata_file
375 self.inline = inline
375 self.inline = inline
376 self.data_config = data_config
376 self.data_config = data_config
377 self.delta_config = delta_config
377 self.delta_config = delta_config
378 self.feature_config = feature_config
378 self.feature_config = feature_config
379
379
380 # used during diverted write.
380 # used during diverted write.
381 self._orig_index_file = None
381 self._orig_index_file = None
382
382
383 self._default_compression_header = default_compression_header
383 self._default_compression_header = default_compression_header
384
384
385 # index
385 # index
386
386
387 # 3-tuple of file handles being used for active writing.
387 # 3-tuple of file handles being used for active writing.
388 self._writinghandles = None
388 self._writinghandles = None
389
389
390 self._segmentfile = randomaccessfile.randomaccessfile(
390 self._segmentfile = randomaccessfile.randomaccessfile(
391 self.opener,
391 self.opener,
392 (self.index_file if self.inline else self.data_file),
392 (self.index_file if self.inline else self.data_file),
393 self.data_config.chunk_cache_size,
393 self.data_config.chunk_cache_size,
394 chunk_cache,
394 chunk_cache,
395 )
395 )
396 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
396 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
397 self.opener,
397 self.opener,
398 self.sidedata_file,
398 self.sidedata_file,
399 self.data_config.chunk_cache_size,
399 self.data_config.chunk_cache_size,
400 )
400 )
401
401
402 # revlog header -> revlog compressor
402 # revlog header -> revlog compressor
403 self._decompressors = {}
403 self._decompressors = {}
404 # 3-tuple of (node, rev, text) for a raw revision.
404 # 3-tuple of (node, rev, text) for a raw revision.
405 self._revisioncache = None
405 self._revisioncache = None
406
406
407 # cache some uncompressed chunks
407 # cache some uncompressed chunks
408 # rev β†’ uncompressed_chunk
408 # rev β†’ uncompressed_chunk
409 #
409 #
410 # the max cost is dynamically updated to be proportionnal to the
410 # the max cost is dynamically updated to be proportionnal to the
411 # size of revision we actually encounter.
411 # size of revision we actually encounter.
412 self._uncompressed_chunk_cache = None
412 self._uncompressed_chunk_cache = None
413 if self.data_config.uncompressed_cache_factor is not None:
413 if self.data_config.uncompressed_cache_factor is not None:
414 self._uncompressed_chunk_cache = util.lrucachedict(
414 self._uncompressed_chunk_cache = util.lrucachedict(
415 self.data_config.uncompressed_cache_count,
415 self.data_config.uncompressed_cache_count,
416 maxcost=65536, # some arbitrary initial value
416 maxcost=65536, # some arbitrary initial value
417 )
417 )
418
418
419 self._delay_buffer = None
419 self._delay_buffer = None
420
420
421 def __len__(self):
421 def __len__(self):
422 return len(self.index)
422 return len(self.index)
423
423
424 def clear_cache(self):
424 def clear_cache(self):
425 assert not self.is_delaying
425 assert not self.is_delaying
426 self._revisioncache = None
426 self._revisioncache = None
427 if self._uncompressed_chunk_cache is not None:
427 if self._uncompressed_chunk_cache is not None:
428 self._uncompressed_chunk_cache.clear()
428 self._uncompressed_chunk_cache.clear()
429 self._segmentfile.clear_cache()
429 self._segmentfile.clear_cache()
430 self._segmentfile_sidedata.clear_cache()
430 self._segmentfile_sidedata.clear_cache()
431
431
432 @property
432 @property
433 def canonical_index_file(self):
433 def canonical_index_file(self):
434 if self._orig_index_file is not None:
434 if self._orig_index_file is not None:
435 return self._orig_index_file
435 return self._orig_index_file
436 return self.index_file
436 return self.index_file
437
437
438 @property
438 @property
439 def is_delaying(self):
439 def is_delaying(self):
440 """is the revlog is currently delaying the visibility of written data?
440 """is the revlog is currently delaying the visibility of written data?
441
441
442 The delaying mechanism can be either in-memory or written on disk in a
442 The delaying mechanism can be either in-memory or written on disk in a
443 side-file."""
443 side-file."""
444 return (self._delay_buffer is not None) or (
444 return (self._delay_buffer is not None) or (
445 self._orig_index_file is not None
445 self._orig_index_file is not None
446 )
446 )
447
447
448 # Derived from index values.
448 # Derived from index values.
449
449
450 def start(self, rev):
450 def start(self, rev):
451 """the offset of the data chunk for this revision"""
451 """the offset of the data chunk for this revision"""
452 return int(self.index[rev][0] >> 16)
452 return int(self.index[rev][0] >> 16)
453
453
454 def length(self, rev):
454 def length(self, rev):
455 """the length of the data chunk for this revision"""
455 """the length of the data chunk for this revision"""
456 return self.index[rev][1]
456 return self.index[rev][1]
457
457
458 def end(self, rev):
458 def end(self, rev):
459 """the end of the data chunk for this revision"""
459 """the end of the data chunk for this revision"""
460 return self.start(rev) + self.length(rev)
460 return self.start(rev) + self.length(rev)
461
461
462 def deltaparent(self, rev):
462 def deltaparent(self, rev):
463 """return deltaparent of the given revision"""
463 """return deltaparent of the given revision"""
464 base = self.index[rev][3]
464 base = self.index[rev][3]
465 if base == rev:
465 if base == rev:
466 return nullrev
466 return nullrev
467 elif self.delta_config.general_delta:
467 elif self.delta_config.general_delta:
468 return base
468 return base
469 else:
469 else:
470 return rev - 1
470 return rev - 1
471
471
472 def issnapshot(self, rev):
472 def issnapshot(self, rev):
473 """tells whether rev is a snapshot"""
473 """tells whether rev is a snapshot"""
474 if not self.delta_config.sparse_revlog:
474 if not self.delta_config.sparse_revlog:
475 return self.deltaparent(rev) == nullrev
475 return self.deltaparent(rev) == nullrev
476 elif hasattr(self.index, 'issnapshot'):
476 elif hasattr(self.index, 'issnapshot'):
477 # directly assign the method to cache the testing and access
477 # directly assign the method to cache the testing and access
478 self.issnapshot = self.index.issnapshot
478 self.issnapshot = self.index.issnapshot
479 return self.issnapshot(rev)
479 return self.issnapshot(rev)
480 if rev == nullrev:
480 if rev == nullrev:
481 return True
481 return True
482 entry = self.index[rev]
482 entry = self.index[rev]
483 base = entry[3]
483 base = entry[3]
484 if base == rev:
484 if base == rev:
485 return True
485 return True
486 if base == nullrev:
486 if base == nullrev:
487 return True
487 return True
488 p1 = entry[5]
488 p1 = entry[5]
489 while self.length(p1) == 0:
489 while self.length(p1) == 0:
490 b = self.deltaparent(p1)
490 b = self.deltaparent(p1)
491 if b == p1:
491 if b == p1:
492 break
492 break
493 p1 = b
493 p1 = b
494 p2 = entry[6]
494 p2 = entry[6]
495 while self.length(p2) == 0:
495 while self.length(p2) == 0:
496 b = self.deltaparent(p2)
496 b = self.deltaparent(p2)
497 if b == p2:
497 if b == p2:
498 break
498 break
499 p2 = b
499 p2 = b
500 if base == p1 or base == p2:
500 if base == p1 or base == p2:
501 return False
501 return False
502 return self.issnapshot(base)
502 return self.issnapshot(base)
503
503
504 def _deltachain(self, rev, stoprev=None):
504 def _deltachain(self, rev, stoprev=None):
505 """Obtain the delta chain for a revision.
505 """Obtain the delta chain for a revision.
506
506
507 ``stoprev`` specifies a revision to stop at. If not specified, we
507 ``stoprev`` specifies a revision to stop at. If not specified, we
508 stop at the base of the chain.
508 stop at the base of the chain.
509
509
510 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
510 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
511 revs in ascending order and ``stopped`` is a bool indicating whether
511 revs in ascending order and ``stopped`` is a bool indicating whether
512 ``stoprev`` was hit.
512 ``stoprev`` was hit.
513 """
513 """
514 generaldelta = self.delta_config.general_delta
514 generaldelta = self.delta_config.general_delta
515 # Try C implementation.
515 # Try C implementation.
516 try:
516 try:
517 return self.index.deltachain(rev, stoprev, generaldelta)
517 return self.index.deltachain(rev, stoprev, generaldelta)
518 except AttributeError:
518 except AttributeError:
519 pass
519 pass
520
520
521 chain = []
521 chain = []
522
522
523 # Alias to prevent attribute lookup in tight loop.
523 # Alias to prevent attribute lookup in tight loop.
524 index = self.index
524 index = self.index
525
525
526 iterrev = rev
526 iterrev = rev
527 e = index[iterrev]
527 e = index[iterrev]
528 while iterrev != e[3] and iterrev != stoprev:
528 while iterrev != e[3] and iterrev != stoprev:
529 chain.append(iterrev)
529 chain.append(iterrev)
530 if generaldelta:
530 if generaldelta:
531 iterrev = e[3]
531 iterrev = e[3]
532 else:
532 else:
533 iterrev -= 1
533 iterrev -= 1
534 e = index[iterrev]
534 e = index[iterrev]
535
535
536 if iterrev == stoprev:
536 if iterrev == stoprev:
537 stopped = True
537 stopped = True
538 else:
538 else:
539 chain.append(iterrev)
539 chain.append(iterrev)
540 stopped = False
540 stopped = False
541
541
542 chain.reverse()
542 chain.reverse()
543 return chain, stopped
543 return chain, stopped
544
544
545 @util.propertycache
545 @util.propertycache
546 def _compressor(self):
546 def _compressor(self):
547 engine = util.compengines[self.feature_config.compression_engine]
547 engine = util.compengines[self.feature_config.compression_engine]
548 return engine.revlogcompressor(
548 return engine.revlogcompressor(
549 self.feature_config.compression_engine_options
549 self.feature_config.compression_engine_options
550 )
550 )
551
551
552 @util.propertycache
552 @util.propertycache
553 def _decompressor(self):
553 def _decompressor(self):
554 """the default decompressor"""
554 """the default decompressor"""
555 if self._default_compression_header is None:
555 if self._default_compression_header is None:
556 return None
556 return None
557 t = self._default_compression_header
557 t = self._default_compression_header
558 c = self._get_decompressor(t)
558 c = self._get_decompressor(t)
559 return c.decompress
559 return c.decompress
560
560
561 def _get_decompressor(self, t):
561 def _get_decompressor(self, t):
562 try:
562 try:
563 compressor = self._decompressors[t]
563 compressor = self._decompressors[t]
564 except KeyError:
564 except KeyError:
565 try:
565 try:
566 engine = util.compengines.forrevlogheader(t)
566 engine = util.compengines.forrevlogheader(t)
567 compressor = engine.revlogcompressor(
567 compressor = engine.revlogcompressor(
568 self.feature_config.compression_engine_options
568 self.feature_config.compression_engine_options
569 )
569 )
570 self._decompressors[t] = compressor
570 self._decompressors[t] = compressor
571 except KeyError:
571 except KeyError:
572 raise error.RevlogError(
572 raise error.RevlogError(
573 _(b'unknown compression type %s') % binascii.hexlify(t)
573 _(b'unknown compression type %s') % binascii.hexlify(t)
574 )
574 )
575 return compressor
575 return compressor
576
576
577 def compress(self, data):
577 def compress(self, data):
578 """Generate a possibly-compressed representation of data."""
578 """Generate a possibly-compressed representation of data."""
579 if not data:
579 if not data:
580 return b'', data
580 return b'', data
581
581
582 compressed = self._compressor.compress(data)
582 compressed = self._compressor.compress(data)
583
583
584 if compressed:
584 if compressed:
585 # The revlog compressor added the header in the returned data.
585 # The revlog compressor added the header in the returned data.
586 return b'', compressed
586 return b'', compressed
587
587
588 if data[0:1] == b'\0':
588 if data[0:1] == b'\0':
589 return b'', data
589 return b'', data
590 return b'u', data
590 return b'u', data
591
591
592 def decompress(self, data):
592 def decompress(self, data):
593 """Decompress a revlog chunk.
593 """Decompress a revlog chunk.
594
594
595 The chunk is expected to begin with a header identifying the
595 The chunk is expected to begin with a header identifying the
596 format type so it can be routed to an appropriate decompressor.
596 format type so it can be routed to an appropriate decompressor.
597 """
597 """
598 if not data:
598 if not data:
599 return data
599 return data
600
600
601 # Revlogs are read much more frequently than they are written and many
601 # Revlogs are read much more frequently than they are written and many
602 # chunks only take microseconds to decompress, so performance is
602 # chunks only take microseconds to decompress, so performance is
603 # important here.
603 # important here.
604 #
604 #
605 # We can make a few assumptions about revlogs:
605 # We can make a few assumptions about revlogs:
606 #
606 #
607 # 1) the majority of chunks will be compressed (as opposed to inline
607 # 1) the majority of chunks will be compressed (as opposed to inline
608 # raw data).
608 # raw data).
609 # 2) decompressing *any* data will likely by at least 10x slower than
609 # 2) decompressing *any* data will likely by at least 10x slower than
610 # returning raw inline data.
610 # returning raw inline data.
611 # 3) we want to prioritize common and officially supported compression
611 # 3) we want to prioritize common and officially supported compression
612 # engines
612 # engines
613 #
613 #
614 # It follows that we want to optimize for "decompress compressed data
614 # It follows that we want to optimize for "decompress compressed data
615 # when encoded with common and officially supported compression engines"
615 # when encoded with common and officially supported compression engines"
616 # case over "raw data" and "data encoded by less common or non-official
616 # case over "raw data" and "data encoded by less common or non-official
617 # compression engines." That is why we have the inline lookup first
617 # compression engines." That is why we have the inline lookup first
618 # followed by the compengines lookup.
618 # followed by the compengines lookup.
619 #
619 #
620 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
620 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
621 # compressed chunks. And this matters for changelog and manifest reads.
621 # compressed chunks. And this matters for changelog and manifest reads.
622 t = data[0:1]
622 t = data[0:1]
623
623
624 if t == b'x':
624 if t == b'x':
625 try:
625 try:
626 return _zlibdecompress(data)
626 return _zlibdecompress(data)
627 except zlib.error as e:
627 except zlib.error as e:
628 raise error.RevlogError(
628 raise error.RevlogError(
629 _(b'revlog decompress error: %s')
629 _(b'revlog decompress error: %s')
630 % stringutil.forcebytestr(e)
630 % stringutil.forcebytestr(e)
631 )
631 )
632 # '\0' is more common than 'u' so it goes first.
632 # '\0' is more common than 'u' so it goes first.
633 elif t == b'\0':
633 elif t == b'\0':
634 return data
634 return data
635 elif t == b'u':
635 elif t == b'u':
636 return util.buffer(data, 1)
636 return util.buffer(data, 1)
637
637
638 compressor = self._get_decompressor(t)
638 compressor = self._get_decompressor(t)
639
639
640 return compressor.decompress(data)
640 return compressor.decompress(data)
641
641
642 @contextlib.contextmanager
642 @contextlib.contextmanager
643 def reading(self):
643 def reading(self):
644 """Context manager that keeps data and sidedata files open for reading"""
644 """Context manager that keeps data and sidedata files open for reading"""
645 if len(self.index) == 0:
645 if len(self.index) == 0:
646 yield # nothing to be read
646 yield # nothing to be read
647 elif self._delay_buffer is not None and self.inline:
647 elif self._delay_buffer is not None and self.inline:
648 msg = "revlog with delayed write should not be inline"
648 msg = "revlog with delayed write should not be inline"
649 raise error.ProgrammingError(msg)
649 raise error.ProgrammingError(msg)
650 else:
650 else:
651 with self._segmentfile.reading():
651 with self._segmentfile.reading():
652 with self._segmentfile_sidedata.reading():
652 with self._segmentfile_sidedata.reading():
653 yield
653 yield
654
654
655 @property
655 @property
656 def is_writing(self):
656 def is_writing(self):
657 """True is a writing context is open"""
657 """True is a writing context is open"""
658 return self._writinghandles is not None
658 return self._writinghandles is not None
659
659
660 @property
660 @property
661 def is_open(self):
661 def is_open(self):
662 """True if any file handle is being held
662 """True if any file handle is being held
663
663
664 Used for assert and debug in the python code"""
664 Used for assert and debug in the python code"""
665 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
665 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
666
666
667 @contextlib.contextmanager
667 @contextlib.contextmanager
668 def writing(self, transaction, data_end=None, sidedata_end=None):
668 def writing(self, transaction, data_end=None, sidedata_end=None):
669 """Open the revlog files for writing
669 """Open the revlog files for writing
670
670
671 Add content to a revlog should be done within such context.
671 Add content to a revlog should be done within such context.
672 """
672 """
673 if self.is_writing:
673 if self.is_writing:
674 yield
674 yield
675 else:
675 else:
676 ifh = dfh = sdfh = None
676 ifh = dfh = sdfh = None
677 try:
677 try:
678 r = len(self.index)
678 r = len(self.index)
679 # opening the data file.
679 # opening the data file.
680 dsize = 0
680 dsize = 0
681 if r:
681 if r:
682 dsize = self.end(r - 1)
682 dsize = self.end(r - 1)
683 dfh = None
683 dfh = None
684 if not self.inline:
684 if not self.inline:
685 try:
685 try:
686 dfh = self.opener(self.data_file, mode=b"r+")
686 dfh = self.opener(self.data_file, mode=b"r+")
687 if data_end is None:
687 if data_end is None:
688 dfh.seek(0, os.SEEK_END)
688 dfh.seek(0, os.SEEK_END)
689 else:
689 else:
690 dfh.seek(data_end, os.SEEK_SET)
690 dfh.seek(data_end, os.SEEK_SET)
691 except FileNotFoundError:
691 except FileNotFoundError:
692 dfh = self.opener(self.data_file, mode=b"w+")
692 dfh = self.opener(self.data_file, mode=b"w+")
693 transaction.add(self.data_file, dsize)
693 transaction.add(self.data_file, dsize)
694 if self.sidedata_file is not None:
694 if self.sidedata_file is not None:
695 assert sidedata_end is not None
695 assert sidedata_end is not None
696 # revlog-v2 does not inline, help Pytype
696 # revlog-v2 does not inline, help Pytype
697 assert dfh is not None
697 assert dfh is not None
698 try:
698 try:
699 sdfh = self.opener(self.sidedata_file, mode=b"r+")
699 sdfh = self.opener(self.sidedata_file, mode=b"r+")
700 dfh.seek(sidedata_end, os.SEEK_SET)
700 dfh.seek(sidedata_end, os.SEEK_SET)
701 except FileNotFoundError:
701 except FileNotFoundError:
702 sdfh = self.opener(self.sidedata_file, mode=b"w+")
702 sdfh = self.opener(self.sidedata_file, mode=b"w+")
703 transaction.add(self.sidedata_file, sidedata_end)
703 transaction.add(self.sidedata_file, sidedata_end)
704
704
705 # opening the index file.
705 # opening the index file.
706 isize = r * self.index.entry_size
706 isize = r * self.index.entry_size
707 ifh = self.__index_write_fp()
707 ifh = self.__index_write_fp()
708 if self.inline:
708 if self.inline:
709 transaction.add(self.index_file, dsize + isize)
709 transaction.add(self.index_file, dsize + isize)
710 else:
710 else:
711 transaction.add(self.index_file, isize)
711 transaction.add(self.index_file, isize)
712 # exposing all file handle for writing.
712 # exposing all file handle for writing.
713 self._writinghandles = (ifh, dfh, sdfh)
713 self._writinghandles = (ifh, dfh, sdfh)
714 self._segmentfile.writing_handle = ifh if self.inline else dfh
714 self._segmentfile.writing_handle = ifh if self.inline else dfh
715 self._segmentfile_sidedata.writing_handle = sdfh
715 self._segmentfile_sidedata.writing_handle = sdfh
716 yield
716 yield
717 finally:
717 finally:
718 self._writinghandles = None
718 self._writinghandles = None
719 self._segmentfile.writing_handle = None
719 self._segmentfile.writing_handle = None
720 self._segmentfile_sidedata.writing_handle = None
720 self._segmentfile_sidedata.writing_handle = None
721 if dfh is not None:
721 if dfh is not None:
722 dfh.close()
722 dfh.close()
723 if sdfh is not None:
723 if sdfh is not None:
724 sdfh.close()
724 sdfh.close()
725 # closing the index file last to avoid exposing referent to
725 # closing the index file last to avoid exposing referent to
726 # potential unflushed data content.
726 # potential unflushed data content.
727 if ifh is not None:
727 if ifh is not None:
728 ifh.close()
728 ifh.close()
729
729
730 def __index_write_fp(self, index_end=None):
730 def __index_write_fp(self, index_end=None):
731 """internal method to open the index file for writing
731 """internal method to open the index file for writing
732
732
733 You should not use this directly and use `_writing` instead
733 You should not use this directly and use `_writing` instead
734 """
734 """
735 try:
735 try:
736 if self._delay_buffer is None:
736 if self._delay_buffer is None:
737 f = self.opener(
737 f = self.opener(
738 self.index_file,
738 self.index_file,
739 mode=b"r+",
739 mode=b"r+",
740 checkambig=self.data_config.check_ambig,
740 checkambig=self.data_config.check_ambig,
741 )
741 )
742 else:
742 else:
743 # check_ambig affect we way we open file for writing, however
743 # check_ambig affect we way we open file for writing, however
744 # here, we do not actually open a file for writting as write
744 # here, we do not actually open a file for writting as write
745 # will appened to a delay_buffer. So check_ambig is not
745 # will appened to a delay_buffer. So check_ambig is not
746 # meaningful and unneeded here.
746 # meaningful and unneeded here.
747 f = randomaccessfile.appender(
747 f = randomaccessfile.appender(
748 self.opener, self.index_file, b"r+", self._delay_buffer
748 self.opener, self.index_file, b"r+", self._delay_buffer
749 )
749 )
750 if index_end is None:
750 if index_end is None:
751 f.seek(0, os.SEEK_END)
751 f.seek(0, os.SEEK_END)
752 else:
752 else:
753 f.seek(index_end, os.SEEK_SET)
753 f.seek(index_end, os.SEEK_SET)
754 return f
754 return f
755 except FileNotFoundError:
755 except FileNotFoundError:
756 if self._delay_buffer is None:
756 if self._delay_buffer is None:
757 return self.opener(
757 return self.opener(
758 self.index_file,
758 self.index_file,
759 mode=b"w+",
759 mode=b"w+",
760 checkambig=self.data_config.check_ambig,
760 checkambig=self.data_config.check_ambig,
761 )
761 )
762 else:
762 else:
763 return randomaccessfile.appender(
763 return randomaccessfile.appender(
764 self.opener, self.index_file, b"w+", self._delay_buffer
764 self.opener, self.index_file, b"w+", self._delay_buffer
765 )
765 )
766
766
767 def __index_new_fp(self):
767 def __index_new_fp(self):
768 """internal method to create a new index file for writing
768 """internal method to create a new index file for writing
769
769
770 You should not use this unless you are upgrading from inline revlog
770 You should not use this unless you are upgrading from inline revlog
771 """
771 """
772 return self.opener(
772 return self.opener(
773 self.index_file,
773 self.index_file,
774 mode=b"w",
774 mode=b"w",
775 checkambig=self.data_config.check_ambig,
775 checkambig=self.data_config.check_ambig,
776 )
776 )
777
777
778 def split_inline(self, tr, header, new_index_file_path=None):
778 def split_inline(self, tr, header, new_index_file_path=None):
779 """split the data of an inline revlog into an index and a data file"""
779 """split the data of an inline revlog into an index and a data file"""
780 assert self._delay_buffer is None
780 assert self._delay_buffer is None
781 existing_handles = False
781 existing_handles = False
782 if self._writinghandles is not None:
782 if self._writinghandles is not None:
783 existing_handles = True
783 existing_handles = True
784 fp = self._writinghandles[0]
784 fp = self._writinghandles[0]
785 fp.flush()
785 fp.flush()
786 fp.close()
786 fp.close()
787 # We can't use the cached file handle after close(). So prevent
787 # We can't use the cached file handle after close(). So prevent
788 # its usage.
788 # its usage.
789 self._writinghandles = None
789 self._writinghandles = None
790 self._segmentfile.writing_handle = None
790 self._segmentfile.writing_handle = None
791 # No need to deal with sidedata writing handle as it is only
791 # No need to deal with sidedata writing handle as it is only
792 # relevant with revlog-v2 which is never inline, not reaching
792 # relevant with revlog-v2 which is never inline, not reaching
793 # this code
793 # this code
794
794
795 new_dfh = self.opener(self.data_file, mode=b"w+")
795 new_dfh = self.opener(self.data_file, mode=b"w+")
796 new_dfh.truncate(0) # drop any potentially existing data
796 new_dfh.truncate(0) # drop any potentially existing data
797 try:
797 try:
798 with self.reading():
798 with self.reading():
799 for r in range(len(self.index)):
799 for r in range(len(self.index)):
800 new_dfh.write(self.get_segment_for_revs(r, r)[1])
800 new_dfh.write(self.get_segment_for_revs(r, r)[1])
801 new_dfh.flush()
801 new_dfh.flush()
802
802
803 if new_index_file_path is not None:
803 if new_index_file_path is not None:
804 self.index_file = new_index_file_path
804 self.index_file = new_index_file_path
805 with self.__index_new_fp() as fp:
805 with self.__index_new_fp() as fp:
806 self.inline = False
806 self.inline = False
807 for i in range(len(self.index)):
807 for i in range(len(self.index)):
808 e = self.index.entry_binary(i)
808 e = self.index.entry_binary(i)
809 if i == 0:
809 if i == 0:
810 packed_header = self.index.pack_header(header)
810 packed_header = self.index.pack_header(header)
811 e = packed_header + e
811 e = packed_header + e
812 fp.write(e)
812 fp.write(e)
813
813
814 # If we don't use side-write, the temp file replace the real
814 # If we don't use side-write, the temp file replace the real
815 # index when we exit the context manager
815 # index when we exit the context manager
816
816
817 self._segmentfile = randomaccessfile.randomaccessfile(
817 self._segmentfile = randomaccessfile.randomaccessfile(
818 self.opener,
818 self.opener,
819 self.data_file,
819 self.data_file,
820 self.data_config.chunk_cache_size,
820 self.data_config.chunk_cache_size,
821 )
821 )
822
822
823 if existing_handles:
823 if existing_handles:
824 # switched from inline to conventional reopen the index
824 # switched from inline to conventional reopen the index
825 ifh = self.__index_write_fp()
825 ifh = self.__index_write_fp()
826 self._writinghandles = (ifh, new_dfh, None)
826 self._writinghandles = (ifh, new_dfh, None)
827 self._segmentfile.writing_handle = new_dfh
827 self._segmentfile.writing_handle = new_dfh
828 new_dfh = None
828 new_dfh = None
829 # No need to deal with sidedata writing handle as it is only
829 # No need to deal with sidedata writing handle as it is only
830 # relevant with revlog-v2 which is never inline, not reaching
830 # relevant with revlog-v2 which is never inline, not reaching
831 # this code
831 # this code
832 finally:
832 finally:
833 if new_dfh is not None:
833 if new_dfh is not None:
834 new_dfh.close()
834 new_dfh.close()
835 return self.index_file
835 return self.index_file
836
836
837 def get_segment_for_revs(self, startrev, endrev):
837 def get_segment_for_revs(self, startrev, endrev):
838 """Obtain a segment of raw data corresponding to a range of revisions.
838 """Obtain a segment of raw data corresponding to a range of revisions.
839
839
840 Accepts the start and end revisions and an optional already-open
840 Accepts the start and end revisions and an optional already-open
841 file handle to be used for reading. If the file handle is read, its
841 file handle to be used for reading. If the file handle is read, its
842 seek position will not be preserved.
842 seek position will not be preserved.
843
843
844 Requests for data may be satisfied by a cache.
844 Requests for data may be satisfied by a cache.
845
845
846 Returns a 2-tuple of (offset, data) for the requested range of
846 Returns a 2-tuple of (offset, data) for the requested range of
847 revisions. Offset is the integer offset from the beginning of the
847 revisions. Offset is the integer offset from the beginning of the
848 revlog and data is a str or buffer of the raw byte data.
848 revlog and data is a str or buffer of the raw byte data.
849
849
850 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
850 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
851 to determine where each revision's data begins and ends.
851 to determine where each revision's data begins and ends.
852
852
853 API: we should consider making this a private part of the InnerRevlog
853 API: we should consider making this a private part of the InnerRevlog
854 at some point.
854 at some point.
855 """
855 """
856 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
856 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
857 # (functions are expensive).
857 # (functions are expensive).
858 index = self.index
858 index = self.index
859 istart = index[startrev]
859 istart = index[startrev]
860 start = int(istart[0] >> 16)
860 start = int(istart[0] >> 16)
861 if startrev == endrev:
861 if startrev == endrev:
862 end = start + istart[1]
862 end = start + istart[1]
863 else:
863 else:
864 iend = index[endrev]
864 iend = index[endrev]
865 end = int(iend[0] >> 16) + iend[1]
865 end = int(iend[0] >> 16) + iend[1]
866
866
867 if self.inline:
867 if self.inline:
868 start += (startrev + 1) * self.index.entry_size
868 start += (startrev + 1) * self.index.entry_size
869 end += (endrev + 1) * self.index.entry_size
869 end += (endrev + 1) * self.index.entry_size
870 length = end - start
870 length = end - start
871
871
872 return start, self._segmentfile.read_chunk(start, length)
872 return start, self._segmentfile.read_chunk(start, length)
873
873
874 def _chunk(self, rev):
874 def _chunk(self, rev):
875 """Obtain a single decompressed chunk for a revision.
875 """Obtain a single decompressed chunk for a revision.
876
876
877 Accepts an integer revision and an optional already-open file handle
877 Accepts an integer revision and an optional already-open file handle
878 to be used for reading. If used, the seek position of the file will not
878 to be used for reading. If used, the seek position of the file will not
879 be preserved.
879 be preserved.
880
880
881 Returns a str holding uncompressed data for the requested revision.
881 Returns a str holding uncompressed data for the requested revision.
882 """
882 """
883 if self._uncompressed_chunk_cache is not None:
883 if self._uncompressed_chunk_cache is not None:
884 uncomp = self._uncompressed_chunk_cache.get(rev)
884 uncomp = self._uncompressed_chunk_cache.get(rev)
885 if uncomp is not None:
885 if uncomp is not None:
886 return uncomp
886 return uncomp
887
887
888 compression_mode = self.index[rev][10]
888 compression_mode = self.index[rev][10]
889 data = self.get_segment_for_revs(rev, rev)[1]
889 data = self.get_segment_for_revs(rev, rev)[1]
890 if compression_mode == COMP_MODE_PLAIN:
890 if compression_mode == COMP_MODE_PLAIN:
891 uncomp = data
891 uncomp = data
892 elif compression_mode == COMP_MODE_DEFAULT:
892 elif compression_mode == COMP_MODE_DEFAULT:
893 uncomp = self._decompressor(data)
893 uncomp = self._decompressor(data)
894 elif compression_mode == COMP_MODE_INLINE:
894 elif compression_mode == COMP_MODE_INLINE:
895 uncomp = self.decompress(data)
895 uncomp = self.decompress(data)
896 else:
896 else:
897 msg = b'unknown compression mode %d'
897 msg = b'unknown compression mode %d'
898 msg %= compression_mode
898 msg %= compression_mode
899 raise error.RevlogError(msg)
899 raise error.RevlogError(msg)
900 if self._uncompressed_chunk_cache is not None:
900 if self._uncompressed_chunk_cache is not None:
901 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
901 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
902 return uncomp
902 return uncomp
903
903
904 def _chunks(self, revs, targetsize=None):
904 def _chunks(self, revs, targetsize=None):
905 """Obtain decompressed chunks for the specified revisions.
905 """Obtain decompressed chunks for the specified revisions.
906
906
907 Accepts an iterable of numeric revisions that are assumed to be in
907 Accepts an iterable of numeric revisions that are assumed to be in
908 ascending order. Also accepts an optional already-open file handle
908 ascending order. Also accepts an optional already-open file handle
909 to be used for reading. If used, the seek position of the file will
909 to be used for reading. If used, the seek position of the file will
910 not be preserved.
910 not be preserved.
911
911
912 This function is similar to calling ``self._chunk()`` multiple times,
912 This function is similar to calling ``self._chunk()`` multiple times,
913 but is faster.
913 but is faster.
914
914
915 Returns a list with decompressed data for each requested revision.
915 Returns a list with decompressed data for each requested revision.
916 """
916 """
917 if not revs:
917 if not revs:
918 return []
918 return []
919 start = self.start
919 start = self.start
920 length = self.length
920 length = self.length
921 inline = self.inline
921 inline = self.inline
922 iosize = self.index.entry_size
922 iosize = self.index.entry_size
923 buffer = util.buffer
923 buffer = util.buffer
924
924
925 fetched_revs = []
925 fetched_revs = []
926 fadd = fetched_revs.append
926 fadd = fetched_revs.append
927
927
928 chunks = []
928 chunks = []
929 ladd = chunks.append
929 ladd = chunks.append
930
930
931 if self._uncompressed_chunk_cache is None:
931 if self._uncompressed_chunk_cache is None:
932 fetched_revs = revs
932 fetched_revs = revs
933 else:
933 else:
934 for rev in revs:
934 for rev in revs:
935 cached_value = self._uncompressed_chunk_cache.get(rev)
935 cached_value = self._uncompressed_chunk_cache.get(rev)
936 if cached_value is None:
936 if cached_value is None:
937 fadd(rev)
937 fadd(rev)
938 else:
938 else:
939 ladd((rev, cached_value))
939 ladd((rev, cached_value))
940
940
941 if not fetched_revs:
941 if not fetched_revs:
942 slicedchunks = ()
942 slicedchunks = ()
943 elif not self.data_config.with_sparse_read:
943 elif not self.data_config.with_sparse_read:
944 slicedchunks = (fetched_revs,)
944 slicedchunks = (fetched_revs,)
945 else:
945 else:
946 slicedchunks = deltautil.slicechunk(
946 slicedchunks = deltautil.slicechunk(
947 self,
947 self,
948 fetched_revs,
948 fetched_revs,
949 targetsize=targetsize,
949 targetsize=targetsize,
950 )
950 )
951
951
952 for revschunk in slicedchunks:
952 for revschunk in slicedchunks:
953 firstrev = revschunk[0]
953 firstrev = revschunk[0]
954 # Skip trailing revisions with empty diff
954 # Skip trailing revisions with empty diff
955 for lastrev in revschunk[::-1]:
955 for lastrev in revschunk[::-1]:
956 if length(lastrev) != 0:
956 if length(lastrev) != 0:
957 break
957 break
958
958
959 try:
959 try:
960 offset, data = self.get_segment_for_revs(firstrev, lastrev)
960 offset, data = self.get_segment_for_revs(firstrev, lastrev)
961 except OverflowError:
961 except OverflowError:
962 # issue4215 - we can't cache a run of chunks greater than
962 # issue4215 - we can't cache a run of chunks greater than
963 # 2G on Windows
963 # 2G on Windows
964 for rev in revschunk:
964 for rev in revschunk:
965 ladd((rev, self._chunk(rev)))
965 ladd((rev, self._chunk(rev)))
966
966
967 decomp = self.decompress
967 decomp = self.decompress
968 # self._decompressor might be None, but will not be used in that case
968 # self._decompressor might be None, but will not be used in that case
969 def_decomp = self._decompressor
969 def_decomp = self._decompressor
970 for rev in revschunk:
970 for rev in revschunk:
971 chunkstart = start(rev)
971 chunkstart = start(rev)
972 if inline:
972 if inline:
973 chunkstart += (rev + 1) * iosize
973 chunkstart += (rev + 1) * iosize
974 chunklength = length(rev)
974 chunklength = length(rev)
975 comp_mode = self.index[rev][10]
975 comp_mode = self.index[rev][10]
976 c = buffer(data, chunkstart - offset, chunklength)
976 c = buffer(data, chunkstart - offset, chunklength)
977 if comp_mode == COMP_MODE_PLAIN:
977 if comp_mode == COMP_MODE_PLAIN:
978 c = c
978 c = c
979 elif comp_mode == COMP_MODE_INLINE:
979 elif comp_mode == COMP_MODE_INLINE:
980 c = decomp(c)
980 c = decomp(c)
981 elif comp_mode == COMP_MODE_DEFAULT:
981 elif comp_mode == COMP_MODE_DEFAULT:
982 c = def_decomp(c)
982 c = def_decomp(c)
983 else:
983 else:
984 msg = b'unknown compression mode %d'
984 msg = b'unknown compression mode %d'
985 msg %= comp_mode
985 msg %= comp_mode
986 raise error.RevlogError(msg)
986 raise error.RevlogError(msg)
987 ladd((rev, c))
987 ladd((rev, c))
988 if self._uncompressed_chunk_cache is not None:
988 if self._uncompressed_chunk_cache is not None:
989 self._uncompressed_chunk_cache.insert(rev, c, len(c))
989 self._uncompressed_chunk_cache.insert(rev, c, len(c))
990
990
991 chunks.sort()
991 chunks.sort()
992 return [x[1] for x in chunks]
992 return [x[1] for x in chunks]
993
993
994 def raw_text(self, node, rev):
994 def raw_text(self, node, rev):
995 """return the possibly unvalidated rawtext for a revision
995 """return the possibly unvalidated rawtext for a revision
996
996
997 returns (rev, rawtext, validated)
997 returns (rev, rawtext, validated)
998 """
998 """
999
999
1000 # revision in the cache (could be useful to apply delta)
1000 # revision in the cache (could be useful to apply delta)
1001 cachedrev = None
1001 cachedrev = None
1002 # An intermediate text to apply deltas to
1002 # An intermediate text to apply deltas to
1003 basetext = None
1003 basetext = None
1004
1004
1005 # Check if we have the entry in cache
1005 # Check if we have the entry in cache
1006 # The cache entry looks like (node, rev, rawtext)
1006 # The cache entry looks like (node, rev, rawtext)
1007 if self._revisioncache:
1007 if self._revisioncache:
1008 cachedrev = self._revisioncache[1]
1008 cachedrev = self._revisioncache[1]
1009
1009
1010 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1010 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1011 if stopped:
1011 if stopped:
1012 basetext = self._revisioncache[2]
1012 basetext = self._revisioncache[2]
1013
1013
1014 # drop cache to save memory, the caller is expected to
1014 # drop cache to save memory, the caller is expected to
1015 # update self._inner._revisioncache after validating the text
1015 # update self._inner._revisioncache after validating the text
1016 self._revisioncache = None
1016 self._revisioncache = None
1017
1017
1018 targetsize = None
1018 targetsize = None
1019 rawsize = self.index[rev][2]
1019 rawsize = self.index[rev][2]
1020 if 0 <= rawsize:
1020 if 0 <= rawsize:
1021 targetsize = 4 * rawsize
1021 targetsize = 4 * rawsize
1022
1022
1023 if self._uncompressed_chunk_cache is not None:
1023 if self._uncompressed_chunk_cache is not None:
1024 # dynamically update the uncompressed_chunk_cache size to the
1024 # dynamically update the uncompressed_chunk_cache size to the
1025 # largest revision we saw in this revlog.
1025 # largest revision we saw in this revlog.
1026 factor = self.data_config.uncompressed_cache_factor
1026 factor = self.data_config.uncompressed_cache_factor
1027 candidate_size = rawsize * factor
1027 candidate_size = rawsize * factor
1028 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1028 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1029 self._uncompressed_chunk_cache.maxcost = candidate_size
1029 self._uncompressed_chunk_cache.maxcost = candidate_size
1030
1030
1031 bins = self._chunks(chain, targetsize=targetsize)
1031 bins = self._chunks(chain, targetsize=targetsize)
1032 if basetext is None:
1032 if basetext is None:
1033 basetext = bytes(bins[0])
1033 basetext = bytes(bins[0])
1034 bins = bins[1:]
1034 bins = bins[1:]
1035
1035
1036 rawtext = mdiff.patches(basetext, bins)
1036 rawtext = mdiff.patches(basetext, bins)
1037 del basetext # let us have a chance to free memory early
1037 del basetext # let us have a chance to free memory early
1038 return (rev, rawtext, False)
1038 return (rev, rawtext, False)
1039
1039
1040 def sidedata(self, rev, sidedata_end):
1040 def sidedata(self, rev, sidedata_end):
1041 """Return the sidedata for a given revision number."""
1041 """Return the sidedata for a given revision number."""
1042 index_entry = self.index[rev]
1042 index_entry = self.index[rev]
1043 sidedata_offset = index_entry[8]
1043 sidedata_offset = index_entry[8]
1044 sidedata_size = index_entry[9]
1044 sidedata_size = index_entry[9]
1045
1045
1046 if self.inline:
1046 if self.inline:
1047 sidedata_offset += self.index.entry_size * (1 + rev)
1047 sidedata_offset += self.index.entry_size * (1 + rev)
1048 if sidedata_size == 0:
1048 if sidedata_size == 0:
1049 return {}
1049 return {}
1050
1050
1051 if sidedata_end < sidedata_offset + sidedata_size:
1051 if sidedata_end < sidedata_offset + sidedata_size:
1052 filename = self.sidedata_file
1052 filename = self.sidedata_file
1053 end = sidedata_end
1053 end = sidedata_end
1054 offset = sidedata_offset
1054 offset = sidedata_offset
1055 length = sidedata_size
1055 length = sidedata_size
1056 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1056 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1057 raise error.RevlogError(m)
1057 raise error.RevlogError(m)
1058
1058
1059 comp_segment = self._segmentfile_sidedata.read_chunk(
1059 comp_segment = self._segmentfile_sidedata.read_chunk(
1060 sidedata_offset, sidedata_size
1060 sidedata_offset, sidedata_size
1061 )
1061 )
1062
1062
1063 comp = self.index[rev][11]
1063 comp = self.index[rev][11]
1064 if comp == COMP_MODE_PLAIN:
1064 if comp == COMP_MODE_PLAIN:
1065 segment = comp_segment
1065 segment = comp_segment
1066 elif comp == COMP_MODE_DEFAULT:
1066 elif comp == COMP_MODE_DEFAULT:
1067 segment = self._decompressor(comp_segment)
1067 segment = self._decompressor(comp_segment)
1068 elif comp == COMP_MODE_INLINE:
1068 elif comp == COMP_MODE_INLINE:
1069 segment = self.decompress(comp_segment)
1069 segment = self.decompress(comp_segment)
1070 else:
1070 else:
1071 msg = b'unknown compression mode %d'
1071 msg = b'unknown compression mode %d'
1072 msg %= comp
1072 msg %= comp
1073 raise error.RevlogError(msg)
1073 raise error.RevlogError(msg)
1074
1074
1075 sidedata = sidedatautil.deserialize_sidedata(segment)
1075 sidedata = sidedatautil.deserialize_sidedata(segment)
1076 return sidedata
1076 return sidedata
1077
1077
1078 def write_entry(
1078 def write_entry(
1079 self,
1079 self,
1080 transaction,
1080 transaction,
1081 entry,
1081 entry,
1082 data,
1082 data,
1083 link,
1083 link,
1084 offset,
1084 offset,
1085 sidedata,
1085 sidedata,
1086 sidedata_offset,
1086 sidedata_offset,
1087 index_end,
1087 index_end,
1088 data_end,
1088 data_end,
1089 sidedata_end,
1089 sidedata_end,
1090 ):
1090 ):
1091 # Files opened in a+ mode have inconsistent behavior on various
1091 # Files opened in a+ mode have inconsistent behavior on various
1092 # platforms. Windows requires that a file positioning call be made
1092 # platforms. Windows requires that a file positioning call be made
1093 # when the file handle transitions between reads and writes. See
1093 # when the file handle transitions between reads and writes. See
1094 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1094 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1095 # platforms, Python or the platform itself can be buggy. Some versions
1095 # platforms, Python or the platform itself can be buggy. Some versions
1096 # of Solaris have been observed to not append at the end of the file
1096 # of Solaris have been observed to not append at the end of the file
1097 # if the file was seeked to before the end. See issue4943 for more.
1097 # if the file was seeked to before the end. See issue4943 for more.
1098 #
1098 #
1099 # We work around this issue by inserting a seek() before writing.
1099 # We work around this issue by inserting a seek() before writing.
1100 # Note: This is likely not necessary on Python 3. However, because
1100 # Note: This is likely not necessary on Python 3. However, because
1101 # the file handle is reused for reads and may be seeked there, we need
1101 # the file handle is reused for reads and may be seeked there, we need
1102 # to be careful before changing this.
1102 # to be careful before changing this.
1103 if self._writinghandles is None:
1103 if self._writinghandles is None:
1104 msg = b'adding revision outside `revlog._writing` context'
1104 msg = b'adding revision outside `revlog._writing` context'
1105 raise error.ProgrammingError(msg)
1105 raise error.ProgrammingError(msg)
1106 ifh, dfh, sdfh = self._writinghandles
1106 ifh, dfh, sdfh = self._writinghandles
1107 if index_end is None:
1107 if index_end is None:
1108 ifh.seek(0, os.SEEK_END)
1108 ifh.seek(0, os.SEEK_END)
1109 else:
1109 else:
1110 ifh.seek(index_end, os.SEEK_SET)
1110 ifh.seek(index_end, os.SEEK_SET)
1111 if dfh:
1111 if dfh:
1112 if data_end is None:
1112 if data_end is None:
1113 dfh.seek(0, os.SEEK_END)
1113 dfh.seek(0, os.SEEK_END)
1114 else:
1114 else:
1115 dfh.seek(data_end, os.SEEK_SET)
1115 dfh.seek(data_end, os.SEEK_SET)
1116 if sdfh:
1116 if sdfh:
1117 sdfh.seek(sidedata_end, os.SEEK_SET)
1117 sdfh.seek(sidedata_end, os.SEEK_SET)
1118
1118
1119 curr = len(self.index) - 1
1119 curr = len(self.index) - 1
1120 if not self.inline:
1120 if not self.inline:
1121 transaction.add(self.data_file, offset)
1121 transaction.add(self.data_file, offset)
1122 if self.sidedata_file:
1122 if self.sidedata_file:
1123 transaction.add(self.sidedata_file, sidedata_offset)
1123 transaction.add(self.sidedata_file, sidedata_offset)
1124 transaction.add(self.canonical_index_file, curr * len(entry))
1124 transaction.add(self.canonical_index_file, curr * len(entry))
1125 if data[0]:
1125 if data[0]:
1126 dfh.write(data[0])
1126 dfh.write(data[0])
1127 dfh.write(data[1])
1127 dfh.write(data[1])
1128 if sidedata:
1128 if sidedata:
1129 sdfh.write(sidedata)
1129 sdfh.write(sidedata)
1130 if self._delay_buffer is None:
1130 if self._delay_buffer is None:
1131 ifh.write(entry)
1131 ifh.write(entry)
1132 else:
1132 else:
1133 self._delay_buffer.append(entry)
1133 self._delay_buffer.append(entry)
1134 elif self._delay_buffer is not None:
1134 elif self._delay_buffer is not None:
1135 msg = b'invalid delayed write on inline revlog'
1135 msg = b'invalid delayed write on inline revlog'
1136 raise error.ProgrammingError(msg)
1136 raise error.ProgrammingError(msg)
1137 else:
1137 else:
1138 offset += curr * self.index.entry_size
1138 offset += curr * self.index.entry_size
1139 transaction.add(self.canonical_index_file, offset)
1139 transaction.add(self.canonical_index_file, offset)
1140 assert not sidedata
1140 assert not sidedata
1141 ifh.write(entry)
1141 ifh.write(entry)
1142 ifh.write(data[0])
1142 ifh.write(data[0])
1143 ifh.write(data[1])
1143 ifh.write(data[1])
1144 return (
1144 return (
1145 ifh.tell(),
1145 ifh.tell(),
1146 dfh.tell() if dfh else None,
1146 dfh.tell() if dfh else None,
1147 sdfh.tell() if sdfh else None,
1147 sdfh.tell() if sdfh else None,
1148 )
1148 )
1149
1149
1150 def _divert_index(self):
1150 def _divert_index(self):
1151 return self.index_file + b'.a'
1151 index_file = self.index_file
1152 # when we encounter a legacy inline-changelog, split it. However it is
1153 # important to use the expected filename for pending content
1154 # (<radix>.a) otherwise hooks won't be seeing the content of the
1155 # pending transaction.
1156 if index_file.endswith(b'.s'):
1157 index_file = self.index_file[:-2]
1158 return index_file + b'.a'
1152
1159
1153 def delay(self):
1160 def delay(self):
1154 assert not self.is_open
1161 assert not self.is_open
1155 if self.inline:
1162 if self.inline:
1156 msg = "revlog with delayed write should not be inline"
1163 msg = "revlog with delayed write should not be inline"
1157 raise error.ProgrammingError(msg)
1164 raise error.ProgrammingError(msg)
1158 if self._delay_buffer is not None or self._orig_index_file is not None:
1165 if self._delay_buffer is not None or self._orig_index_file is not None:
1159 # delay or divert already in place
1166 # delay or divert already in place
1160 return None
1167 return None
1161 elif len(self.index) == 0:
1168 elif len(self.index) == 0:
1162 self._orig_index_file = self.index_file
1169 self._orig_index_file = self.index_file
1163 self.index_file = self._divert_index()
1170 self.index_file = self._divert_index()
1164 assert self._orig_index_file is not None
1171 assert self._orig_index_file is not None
1165 assert self.index_file is not None
1172 assert self.index_file is not None
1166 if self.opener.exists(self.index_file):
1173 if self.opener.exists(self.index_file):
1167 self.opener.unlink(self.index_file)
1174 self.opener.unlink(self.index_file)
1168 return self.index_file
1175 return self.index_file
1169 else:
1176 else:
1170 self._delay_buffer = []
1177 self._delay_buffer = []
1171 return None
1178 return None
1172
1179
1173 def write_pending(self):
1180 def write_pending(self):
1174 assert not self.is_open
1181 assert not self.is_open
1175 if self.inline:
1182 if self.inline:
1176 msg = "revlog with delayed write should not be inline"
1183 msg = "revlog with delayed write should not be inline"
1177 raise error.ProgrammingError(msg)
1184 raise error.ProgrammingError(msg)
1178 if self._orig_index_file is not None:
1185 if self._orig_index_file is not None:
1179 return None, True
1186 return None, True
1180 any_pending = False
1187 any_pending = False
1181 pending_index_file = self._divert_index()
1188 pending_index_file = self._divert_index()
1182 if self.opener.exists(pending_index_file):
1189 if self.opener.exists(pending_index_file):
1183 self.opener.unlink(pending_index_file)
1190 self.opener.unlink(pending_index_file)
1184 util.copyfile(
1191 util.copyfile(
1185 self.opener.join(self.index_file),
1192 self.opener.join(self.index_file),
1186 self.opener.join(pending_index_file),
1193 self.opener.join(pending_index_file),
1187 )
1194 )
1188 if self._delay_buffer:
1195 if self._delay_buffer:
1189 with self.opener(pending_index_file, b'r+') as ifh:
1196 with self.opener(pending_index_file, b'r+') as ifh:
1190 ifh.seek(0, os.SEEK_END)
1197 ifh.seek(0, os.SEEK_END)
1191 ifh.write(b"".join(self._delay_buffer))
1198 ifh.write(b"".join(self._delay_buffer))
1192 any_pending = True
1199 any_pending = True
1193 self._delay_buffer = None
1200 self._delay_buffer = None
1194 self._orig_index_file = self.index_file
1201 self._orig_index_file = self.index_file
1195 self.index_file = pending_index_file
1202 self.index_file = pending_index_file
1196 return self.index_file, any_pending
1203 return self.index_file, any_pending
1197
1204
1198 def finalize_pending(self):
1205 def finalize_pending(self):
1199 assert not self.is_open
1206 assert not self.is_open
1200 if self.inline:
1207 if self.inline:
1201 msg = "revlog with delayed write should not be inline"
1208 msg = "revlog with delayed write should not be inline"
1202 raise error.ProgrammingError(msg)
1209 raise error.ProgrammingError(msg)
1203
1210
1204 delay = self._delay_buffer is not None
1211 delay = self._delay_buffer is not None
1205 divert = self._orig_index_file is not None
1212 divert = self._orig_index_file is not None
1206
1213
1207 if delay and divert:
1214 if delay and divert:
1208 assert False, "unreachable"
1215 assert False, "unreachable"
1209 elif delay:
1216 elif delay:
1210 if self._delay_buffer:
1217 if self._delay_buffer:
1211 with self.opener(self.index_file, b'r+') as ifh:
1218 with self.opener(self.index_file, b'r+') as ifh:
1212 ifh.seek(0, os.SEEK_END)
1219 ifh.seek(0, os.SEEK_END)
1213 ifh.write(b"".join(self._delay_buffer))
1220 ifh.write(b"".join(self._delay_buffer))
1214 self._delay_buffer = None
1221 self._delay_buffer = None
1215 elif divert:
1222 elif divert:
1216 if self.opener.exists(self.index_file):
1223 if self.opener.exists(self.index_file):
1217 self.opener.rename(
1224 self.opener.rename(
1218 self.index_file,
1225 self.index_file,
1219 self._orig_index_file,
1226 self._orig_index_file,
1220 checkambig=True,
1227 checkambig=True,
1221 )
1228 )
1222 self.index_file = self._orig_index_file
1229 self.index_file = self._orig_index_file
1223 self._orig_index_file = None
1230 self._orig_index_file = None
1224 else:
1231 else:
1225 msg = b"not delay or divert found on this revlog"
1232 msg = b"not delay or divert found on this revlog"
1226 raise error.ProgrammingError(msg)
1233 raise error.ProgrammingError(msg)
1227 return self.canonical_index_file
1234 return self.canonical_index_file
1228
1235
1229
1236
1230 class revlog:
1237 class revlog:
1231 """
1238 """
1232 the underlying revision storage object
1239 the underlying revision storage object
1233
1240
1234 A revlog consists of two parts, an index and the revision data.
1241 A revlog consists of two parts, an index and the revision data.
1235
1242
1236 The index is a file with a fixed record size containing
1243 The index is a file with a fixed record size containing
1237 information on each revision, including its nodeid (hash), the
1244 information on each revision, including its nodeid (hash), the
1238 nodeids of its parents, the position and offset of its data within
1245 nodeids of its parents, the position and offset of its data within
1239 the data file, and the revision it's based on. Finally, each entry
1246 the data file, and the revision it's based on. Finally, each entry
1240 contains a linkrev entry that can serve as a pointer to external
1247 contains a linkrev entry that can serve as a pointer to external
1241 data.
1248 data.
1242
1249
1243 The revision data itself is a linear collection of data chunks.
1250 The revision data itself is a linear collection of data chunks.
1244 Each chunk represents a revision and is usually represented as a
1251 Each chunk represents a revision and is usually represented as a
1245 delta against the previous chunk. To bound lookup time, runs of
1252 delta against the previous chunk. To bound lookup time, runs of
1246 deltas are limited to about 2 times the length of the original
1253 deltas are limited to about 2 times the length of the original
1247 version data. This makes retrieval of a version proportional to
1254 version data. This makes retrieval of a version proportional to
1248 its size, or O(1) relative to the number of revisions.
1255 its size, or O(1) relative to the number of revisions.
1249
1256
1250 Both pieces of the revlog are written to in an append-only
1257 Both pieces of the revlog are written to in an append-only
1251 fashion, which means we never need to rewrite a file to insert or
1258 fashion, which means we never need to rewrite a file to insert or
1252 remove data, and can use some simple techniques to avoid the need
1259 remove data, and can use some simple techniques to avoid the need
1253 for locking while reading.
1260 for locking while reading.
1254
1261
1255 If checkambig, indexfile is opened with checkambig=True at
1262 If checkambig, indexfile is opened with checkambig=True at
1256 writing, to avoid file stat ambiguity.
1263 writing, to avoid file stat ambiguity.
1257
1264
1258 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1265 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1259 index will be mmapped rather than read if it is larger than the
1266 index will be mmapped rather than read if it is larger than the
1260 configured threshold.
1267 configured threshold.
1261
1268
1262 If censorable is True, the revlog can have censored revisions.
1269 If censorable is True, the revlog can have censored revisions.
1263
1270
1264 If `upperboundcomp` is not None, this is the expected maximal gain from
1271 If `upperboundcomp` is not None, this is the expected maximal gain from
1265 compression for the data content.
1272 compression for the data content.
1266
1273
1267 `concurrencychecker` is an optional function that receives 3 arguments: a
1274 `concurrencychecker` is an optional function that receives 3 arguments: a
1268 file handle, a filename, and an expected position. It should check whether
1275 file handle, a filename, and an expected position. It should check whether
1269 the current position in the file handle is valid, and log/warn/fail (by
1276 the current position in the file handle is valid, and log/warn/fail (by
1270 raising).
1277 raising).
1271
1278
1272 See mercurial/revlogutils/contants.py for details about the content of an
1279 See mercurial/revlogutils/contants.py for details about the content of an
1273 index entry.
1280 index entry.
1274 """
1281 """
1275
1282
1276 _flagserrorclass = error.RevlogError
1283 _flagserrorclass = error.RevlogError
1277
1284
1278 @staticmethod
1285 @staticmethod
1279 def is_inline_index(header_bytes):
1286 def is_inline_index(header_bytes):
1280 """Determine if a revlog is inline from the initial bytes of the index"""
1287 """Determine if a revlog is inline from the initial bytes of the index"""
1281 if len(header_bytes) == 0:
1288 if len(header_bytes) == 0:
1282 return True
1289 return True
1283
1290
1284 header = INDEX_HEADER.unpack(header_bytes)[0]
1291 header = INDEX_HEADER.unpack(header_bytes)[0]
1285
1292
1286 _format_flags = header & ~0xFFFF
1293 _format_flags = header & ~0xFFFF
1287 _format_version = header & 0xFFFF
1294 _format_version = header & 0xFFFF
1288
1295
1289 features = FEATURES_BY_VERSION[_format_version]
1296 features = FEATURES_BY_VERSION[_format_version]
1290 return features[b'inline'](_format_flags)
1297 return features[b'inline'](_format_flags)
1291
1298
1292 def __init__(
1299 def __init__(
1293 self,
1300 self,
1294 opener,
1301 opener,
1295 target,
1302 target,
1296 radix,
1303 radix,
1297 postfix=None, # only exist for `tmpcensored` now
1304 postfix=None, # only exist for `tmpcensored` now
1298 checkambig=False,
1305 checkambig=False,
1299 mmaplargeindex=False,
1306 mmaplargeindex=False,
1300 censorable=False,
1307 censorable=False,
1301 upperboundcomp=None,
1308 upperboundcomp=None,
1302 persistentnodemap=False,
1309 persistentnodemap=False,
1303 concurrencychecker=None,
1310 concurrencychecker=None,
1304 trypending=False,
1311 trypending=False,
1305 try_split=False,
1312 try_split=False,
1306 canonical_parent_order=True,
1313 canonical_parent_order=True,
1307 data_config=None,
1314 data_config=None,
1308 delta_config=None,
1315 delta_config=None,
1309 feature_config=None,
1316 feature_config=None,
1310 may_inline=True, # may inline new revlog
1317 may_inline=True, # may inline new revlog
1311 ):
1318 ):
1312 """
1319 """
1313 create a revlog object
1320 create a revlog object
1314
1321
1315 opener is a function that abstracts the file opening operation
1322 opener is a function that abstracts the file opening operation
1316 and can be used to implement COW semantics or the like.
1323 and can be used to implement COW semantics or the like.
1317
1324
1318 `target`: a (KIND, ID) tuple that identify the content stored in
1325 `target`: a (KIND, ID) tuple that identify the content stored in
1319 this revlog. It help the rest of the code to understand what the revlog
1326 this revlog. It help the rest of the code to understand what the revlog
1320 is about without having to resort to heuristic and index filename
1327 is about without having to resort to heuristic and index filename
1321 analysis. Note: that this must be reliably be set by normal code, but
1328 analysis. Note: that this must be reliably be set by normal code, but
1322 that test, debug, or performance measurement code might not set this to
1329 that test, debug, or performance measurement code might not set this to
1323 accurate value.
1330 accurate value.
1324 """
1331 """
1325
1332
1326 self.radix = radix
1333 self.radix = radix
1327
1334
1328 self._docket_file = None
1335 self._docket_file = None
1329 self._indexfile = None
1336 self._indexfile = None
1330 self._datafile = None
1337 self._datafile = None
1331 self._sidedatafile = None
1338 self._sidedatafile = None
1332 self._nodemap_file = None
1339 self._nodemap_file = None
1333 self.postfix = postfix
1340 self.postfix = postfix
1334 self._trypending = trypending
1341 self._trypending = trypending
1335 self._try_split = try_split
1342 self._try_split = try_split
1336 self._may_inline = may_inline
1343 self._may_inline = may_inline
1337 self.opener = opener
1344 self.opener = opener
1338 if persistentnodemap:
1345 if persistentnodemap:
1339 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1346 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1340
1347
1341 assert target[0] in ALL_KINDS
1348 assert target[0] in ALL_KINDS
1342 assert len(target) == 2
1349 assert len(target) == 2
1343 self.target = target
1350 self.target = target
1344 if feature_config is not None:
1351 if feature_config is not None:
1345 self.feature_config = feature_config.copy()
1352 self.feature_config = feature_config.copy()
1346 elif b'feature-config' in self.opener.options:
1353 elif b'feature-config' in self.opener.options:
1347 self.feature_config = self.opener.options[b'feature-config'].copy()
1354 self.feature_config = self.opener.options[b'feature-config'].copy()
1348 else:
1355 else:
1349 self.feature_config = FeatureConfig()
1356 self.feature_config = FeatureConfig()
1350 self.feature_config.censorable = censorable
1357 self.feature_config.censorable = censorable
1351 self.feature_config.canonical_parent_order = canonical_parent_order
1358 self.feature_config.canonical_parent_order = canonical_parent_order
1352 if data_config is not None:
1359 if data_config is not None:
1353 self.data_config = data_config.copy()
1360 self.data_config = data_config.copy()
1354 elif b'data-config' in self.opener.options:
1361 elif b'data-config' in self.opener.options:
1355 self.data_config = self.opener.options[b'data-config'].copy()
1362 self.data_config = self.opener.options[b'data-config'].copy()
1356 else:
1363 else:
1357 self.data_config = DataConfig()
1364 self.data_config = DataConfig()
1358 self.data_config.check_ambig = checkambig
1365 self.data_config.check_ambig = checkambig
1359 self.data_config.mmap_large_index = mmaplargeindex
1366 self.data_config.mmap_large_index = mmaplargeindex
1360 if delta_config is not None:
1367 if delta_config is not None:
1361 self.delta_config = delta_config.copy()
1368 self.delta_config = delta_config.copy()
1362 elif b'delta-config' in self.opener.options:
1369 elif b'delta-config' in self.opener.options:
1363 self.delta_config = self.opener.options[b'delta-config'].copy()
1370 self.delta_config = self.opener.options[b'delta-config'].copy()
1364 else:
1371 else:
1365 self.delta_config = DeltaConfig()
1372 self.delta_config = DeltaConfig()
1366 self.delta_config.upper_bound_comp = upperboundcomp
1373 self.delta_config.upper_bound_comp = upperboundcomp
1367
1374
1368 # Maps rev to chain base rev.
1375 # Maps rev to chain base rev.
1369 self._chainbasecache = util.lrucachedict(100)
1376 self._chainbasecache = util.lrucachedict(100)
1370
1377
1371 self.index = None
1378 self.index = None
1372 self._docket = None
1379 self._docket = None
1373 self._nodemap_docket = None
1380 self._nodemap_docket = None
1374 # Mapping of partial identifiers to full nodes.
1381 # Mapping of partial identifiers to full nodes.
1375 self._pcache = {}
1382 self._pcache = {}
1376
1383
1377 # other optionnals features
1384 # other optionnals features
1378
1385
1379 # Make copy of flag processors so each revlog instance can support
1386 # Make copy of flag processors so each revlog instance can support
1380 # custom flags.
1387 # custom flags.
1381 self._flagprocessors = dict(flagutil.flagprocessors)
1388 self._flagprocessors = dict(flagutil.flagprocessors)
1382 # prevent nesting of addgroup
1389 # prevent nesting of addgroup
1383 self._adding_group = None
1390 self._adding_group = None
1384
1391
1385 chunk_cache = self._loadindex()
1392 chunk_cache = self._loadindex()
1386 self._load_inner(chunk_cache)
1393 self._load_inner(chunk_cache)
1387 self._concurrencychecker = concurrencychecker
1394 self._concurrencychecker = concurrencychecker
1388
1395
1389 def _init_opts(self):
1396 def _init_opts(self):
1390 """process options (from above/config) to setup associated default revlog mode
1397 """process options (from above/config) to setup associated default revlog mode
1391
1398
1392 These values might be affected when actually reading on disk information.
1399 These values might be affected when actually reading on disk information.
1393
1400
1394 The relevant values are returned for use in _loadindex().
1401 The relevant values are returned for use in _loadindex().
1395
1402
1396 * newversionflags:
1403 * newversionflags:
1397 version header to use if we need to create a new revlog
1404 version header to use if we need to create a new revlog
1398
1405
1399 * mmapindexthreshold:
1406 * mmapindexthreshold:
1400 minimal index size for start to use mmap
1407 minimal index size for start to use mmap
1401
1408
1402 * force_nodemap:
1409 * force_nodemap:
1403 force the usage of a "development" version of the nodemap code
1410 force the usage of a "development" version of the nodemap code
1404 """
1411 """
1405 opts = self.opener.options
1412 opts = self.opener.options
1406
1413
1407 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1414 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1408 new_header = CHANGELOGV2
1415 new_header = CHANGELOGV2
1409 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1416 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1410 self.feature_config.compute_rank = compute_rank
1417 self.feature_config.compute_rank = compute_rank
1411 elif b'revlogv2' in opts:
1418 elif b'revlogv2' in opts:
1412 new_header = REVLOGV2
1419 new_header = REVLOGV2
1413 elif b'revlogv1' in opts:
1420 elif b'revlogv1' in opts:
1414 new_header = REVLOGV1
1421 new_header = REVLOGV1
1415 if self._may_inline:
1422 if self._may_inline:
1416 new_header |= FLAG_INLINE_DATA
1423 new_header |= FLAG_INLINE_DATA
1417 if b'generaldelta' in opts:
1424 if b'generaldelta' in opts:
1418 new_header |= FLAG_GENERALDELTA
1425 new_header |= FLAG_GENERALDELTA
1419 elif b'revlogv0' in self.opener.options:
1426 elif b'revlogv0' in self.opener.options:
1420 new_header = REVLOGV0
1427 new_header = REVLOGV0
1421 else:
1428 else:
1422 new_header = REVLOG_DEFAULT_VERSION
1429 new_header = REVLOG_DEFAULT_VERSION
1423
1430
1424 mmapindexthreshold = None
1431 mmapindexthreshold = None
1425 if self.data_config.mmap_large_index:
1432 if self.data_config.mmap_large_index:
1426 mmapindexthreshold = self.data_config.mmap_index_threshold
1433 mmapindexthreshold = self.data_config.mmap_index_threshold
1427 if self.feature_config.enable_ellipsis:
1434 if self.feature_config.enable_ellipsis:
1428 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1435 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1429
1436
1430 # revlog v0 doesn't have flag processors
1437 # revlog v0 doesn't have flag processors
1431 for flag, processor in opts.get(b'flagprocessors', {}).items():
1438 for flag, processor in opts.get(b'flagprocessors', {}).items():
1432 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1439 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1433
1440
1434 chunk_cache_size = self.data_config.chunk_cache_size
1441 chunk_cache_size = self.data_config.chunk_cache_size
1435 if chunk_cache_size <= 0:
1442 if chunk_cache_size <= 0:
1436 raise error.RevlogError(
1443 raise error.RevlogError(
1437 _(b'revlog chunk cache size %r is not greater than 0')
1444 _(b'revlog chunk cache size %r is not greater than 0')
1438 % chunk_cache_size
1445 % chunk_cache_size
1439 )
1446 )
1440 elif chunk_cache_size & (chunk_cache_size - 1):
1447 elif chunk_cache_size & (chunk_cache_size - 1):
1441 raise error.RevlogError(
1448 raise error.RevlogError(
1442 _(b'revlog chunk cache size %r is not a power of 2')
1449 _(b'revlog chunk cache size %r is not a power of 2')
1443 % chunk_cache_size
1450 % chunk_cache_size
1444 )
1451 )
1445 force_nodemap = opts.get(b'devel-force-nodemap', False)
1452 force_nodemap = opts.get(b'devel-force-nodemap', False)
1446 return new_header, mmapindexthreshold, force_nodemap
1453 return new_header, mmapindexthreshold, force_nodemap
1447
1454
1448 def _get_data(self, filepath, mmap_threshold, size=None):
1455 def _get_data(self, filepath, mmap_threshold, size=None):
1449 """return a file content with or without mmap
1456 """return a file content with or without mmap
1450
1457
1451 If the file is missing return the empty string"""
1458 If the file is missing return the empty string"""
1452 try:
1459 try:
1453 with self.opener(filepath) as fp:
1460 with self.opener(filepath) as fp:
1454 if mmap_threshold is not None:
1461 if mmap_threshold is not None:
1455 file_size = self.opener.fstat(fp).st_size
1462 file_size = self.opener.fstat(fp).st_size
1456 if file_size >= mmap_threshold:
1463 if file_size >= mmap_threshold:
1457 if size is not None:
1464 if size is not None:
1458 # avoid potentiel mmap crash
1465 # avoid potentiel mmap crash
1459 size = min(file_size, size)
1466 size = min(file_size, size)
1460 # TODO: should .close() to release resources without
1467 # TODO: should .close() to release resources without
1461 # relying on Python GC
1468 # relying on Python GC
1462 if size is None:
1469 if size is None:
1463 return util.buffer(util.mmapread(fp))
1470 return util.buffer(util.mmapread(fp))
1464 else:
1471 else:
1465 return util.buffer(util.mmapread(fp, size))
1472 return util.buffer(util.mmapread(fp, size))
1466 if size is None:
1473 if size is None:
1467 return fp.read()
1474 return fp.read()
1468 else:
1475 else:
1469 return fp.read(size)
1476 return fp.read(size)
1470 except FileNotFoundError:
1477 except FileNotFoundError:
1471 return b''
1478 return b''
1472
1479
1473 def get_streams(self, max_linkrev, force_inline=False):
1480 def get_streams(self, max_linkrev, force_inline=False):
1474 """return a list of streams that represent this revlog
1481 """return a list of streams that represent this revlog
1475
1482
1476 This is used by stream-clone to do bytes to bytes copies of a repository.
1483 This is used by stream-clone to do bytes to bytes copies of a repository.
1477
1484
1478 This streams data for all revisions that refer to a changelog revision up
1485 This streams data for all revisions that refer to a changelog revision up
1479 to `max_linkrev`.
1486 to `max_linkrev`.
1480
1487
1481 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1488 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1482
1489
1483 It returns is a list of three-tuple:
1490 It returns is a list of three-tuple:
1484
1491
1485 [
1492 [
1486 (filename, bytes_stream, stream_size),
1493 (filename, bytes_stream, stream_size),
1487 …
1494 …
1488 ]
1495 ]
1489 """
1496 """
1490 n = len(self)
1497 n = len(self)
1491 index = self.index
1498 index = self.index
1492 while n > 0:
1499 while n > 0:
1493 linkrev = index[n - 1][4]
1500 linkrev = index[n - 1][4]
1494 if linkrev < max_linkrev:
1501 if linkrev < max_linkrev:
1495 break
1502 break
1496 # note: this loop will rarely go through multiple iterations, since
1503 # note: this loop will rarely go through multiple iterations, since
1497 # it only traverses commits created during the current streaming
1504 # it only traverses commits created during the current streaming
1498 # pull operation.
1505 # pull operation.
1499 #
1506 #
1500 # If this become a problem, using a binary search should cap the
1507 # If this become a problem, using a binary search should cap the
1501 # runtime of this.
1508 # runtime of this.
1502 n = n - 1
1509 n = n - 1
1503 if n == 0:
1510 if n == 0:
1504 # no data to send
1511 # no data to send
1505 return []
1512 return []
1506 index_size = n * index.entry_size
1513 index_size = n * index.entry_size
1507 data_size = self.end(n - 1)
1514 data_size = self.end(n - 1)
1508
1515
1509 # XXX we might have been split (or stripped) since the object
1516 # XXX we might have been split (or stripped) since the object
1510 # initialization, We need to close this race too, but having a way to
1517 # initialization, We need to close this race too, but having a way to
1511 # pre-open the file we feed to the revlog and never closing them before
1518 # pre-open the file we feed to the revlog and never closing them before
1512 # we are done streaming.
1519 # we are done streaming.
1513
1520
1514 if self._inline:
1521 if self._inline:
1515
1522
1516 def get_stream():
1523 def get_stream():
1517 with self.opener(self._indexfile, mode=b"r") as fp:
1524 with self.opener(self._indexfile, mode=b"r") as fp:
1518 yield None
1525 yield None
1519 size = index_size + data_size
1526 size = index_size + data_size
1520 if size <= 65536:
1527 if size <= 65536:
1521 yield fp.read(size)
1528 yield fp.read(size)
1522 else:
1529 else:
1523 yield from util.filechunkiter(fp, limit=size)
1530 yield from util.filechunkiter(fp, limit=size)
1524
1531
1525 inline_stream = get_stream()
1532 inline_stream = get_stream()
1526 next(inline_stream)
1533 next(inline_stream)
1527 return [
1534 return [
1528 (self._indexfile, inline_stream, index_size + data_size),
1535 (self._indexfile, inline_stream, index_size + data_size),
1529 ]
1536 ]
1530 elif force_inline:
1537 elif force_inline:
1531
1538
1532 def get_stream():
1539 def get_stream():
1533 with self.reading():
1540 with self.reading():
1534 yield None
1541 yield None
1535
1542
1536 for rev in range(n):
1543 for rev in range(n):
1537 idx = self.index.entry_binary(rev)
1544 idx = self.index.entry_binary(rev)
1538 if rev == 0 and self._docket is None:
1545 if rev == 0 and self._docket is None:
1539 # re-inject the inline flag
1546 # re-inject the inline flag
1540 header = self._format_flags
1547 header = self._format_flags
1541 header |= self._format_version
1548 header |= self._format_version
1542 header |= FLAG_INLINE_DATA
1549 header |= FLAG_INLINE_DATA
1543 header = self.index.pack_header(header)
1550 header = self.index.pack_header(header)
1544 idx = header + idx
1551 idx = header + idx
1545 yield idx
1552 yield idx
1546 yield self._inner.get_segment_for_revs(rev, rev)[1]
1553 yield self._inner.get_segment_for_revs(rev, rev)[1]
1547
1554
1548 inline_stream = get_stream()
1555 inline_stream = get_stream()
1549 next(inline_stream)
1556 next(inline_stream)
1550 return [
1557 return [
1551 (self._indexfile, inline_stream, index_size + data_size),
1558 (self._indexfile, inline_stream, index_size + data_size),
1552 ]
1559 ]
1553 else:
1560 else:
1554
1561
1555 def get_index_stream():
1562 def get_index_stream():
1556 with self.opener(self._indexfile, mode=b"r") as fp:
1563 with self.opener(self._indexfile, mode=b"r") as fp:
1557 yield None
1564 yield None
1558 if index_size <= 65536:
1565 if index_size <= 65536:
1559 yield fp.read(index_size)
1566 yield fp.read(index_size)
1560 else:
1567 else:
1561 yield from util.filechunkiter(fp, limit=index_size)
1568 yield from util.filechunkiter(fp, limit=index_size)
1562
1569
1563 def get_data_stream():
1570 def get_data_stream():
1564 with self._datafp() as fp:
1571 with self._datafp() as fp:
1565 yield None
1572 yield None
1566 if data_size <= 65536:
1573 if data_size <= 65536:
1567 yield fp.read(data_size)
1574 yield fp.read(data_size)
1568 else:
1575 else:
1569 yield from util.filechunkiter(fp, limit=data_size)
1576 yield from util.filechunkiter(fp, limit=data_size)
1570
1577
1571 index_stream = get_index_stream()
1578 index_stream = get_index_stream()
1572 next(index_stream)
1579 next(index_stream)
1573 data_stream = get_data_stream()
1580 data_stream = get_data_stream()
1574 next(data_stream)
1581 next(data_stream)
1575 return [
1582 return [
1576 (self._datafile, data_stream, data_size),
1583 (self._datafile, data_stream, data_size),
1577 (self._indexfile, index_stream, index_size),
1584 (self._indexfile, index_stream, index_size),
1578 ]
1585 ]
1579
1586
1580 def _loadindex(self, docket=None):
1587 def _loadindex(self, docket=None):
1581 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1588 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1582
1589
1583 if self.postfix is not None:
1590 if self.postfix is not None:
1584 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1591 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1585 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1592 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1586 entry_point = b'%s.i.a' % self.radix
1593 entry_point = b'%s.i.a' % self.radix
1587 elif self._try_split and self.opener.exists(self._split_index_file):
1594 elif self._try_split and self.opener.exists(self._split_index_file):
1588 entry_point = self._split_index_file
1595 entry_point = self._split_index_file
1589 else:
1596 else:
1590 entry_point = b'%s.i' % self.radix
1597 entry_point = b'%s.i' % self.radix
1591
1598
1592 if docket is not None:
1599 if docket is not None:
1593 self._docket = docket
1600 self._docket = docket
1594 self._docket_file = entry_point
1601 self._docket_file = entry_point
1595 else:
1602 else:
1596 self._initempty = True
1603 self._initempty = True
1597 entry_data = self._get_data(entry_point, mmapindexthreshold)
1604 entry_data = self._get_data(entry_point, mmapindexthreshold)
1598 if len(entry_data) > 0:
1605 if len(entry_data) > 0:
1599 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1606 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1600 self._initempty = False
1607 self._initempty = False
1601 else:
1608 else:
1602 header = new_header
1609 header = new_header
1603
1610
1604 self._format_flags = header & ~0xFFFF
1611 self._format_flags = header & ~0xFFFF
1605 self._format_version = header & 0xFFFF
1612 self._format_version = header & 0xFFFF
1606
1613
1607 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1614 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1608 if supported_flags is None:
1615 if supported_flags is None:
1609 msg = _(b'unknown version (%d) in revlog %s')
1616 msg = _(b'unknown version (%d) in revlog %s')
1610 msg %= (self._format_version, self.display_id)
1617 msg %= (self._format_version, self.display_id)
1611 raise error.RevlogError(msg)
1618 raise error.RevlogError(msg)
1612 elif self._format_flags & ~supported_flags:
1619 elif self._format_flags & ~supported_flags:
1613 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1620 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1614 display_flag = self._format_flags >> 16
1621 display_flag = self._format_flags >> 16
1615 msg %= (display_flag, self._format_version, self.display_id)
1622 msg %= (display_flag, self._format_version, self.display_id)
1616 raise error.RevlogError(msg)
1623 raise error.RevlogError(msg)
1617
1624
1618 features = FEATURES_BY_VERSION[self._format_version]
1625 features = FEATURES_BY_VERSION[self._format_version]
1619 self._inline = features[b'inline'](self._format_flags)
1626 self._inline = features[b'inline'](self._format_flags)
1620 self.delta_config.general_delta = features[b'generaldelta'](
1627 self.delta_config.general_delta = features[b'generaldelta'](
1621 self._format_flags
1628 self._format_flags
1622 )
1629 )
1623 self.feature_config.has_side_data = features[b'sidedata']
1630 self.feature_config.has_side_data = features[b'sidedata']
1624
1631
1625 if not features[b'docket']:
1632 if not features[b'docket']:
1626 self._indexfile = entry_point
1633 self._indexfile = entry_point
1627 index_data = entry_data
1634 index_data = entry_data
1628 else:
1635 else:
1629 self._docket_file = entry_point
1636 self._docket_file = entry_point
1630 if self._initempty:
1637 if self._initempty:
1631 self._docket = docketutil.default_docket(self, header)
1638 self._docket = docketutil.default_docket(self, header)
1632 else:
1639 else:
1633 self._docket = docketutil.parse_docket(
1640 self._docket = docketutil.parse_docket(
1634 self, entry_data, use_pending=self._trypending
1641 self, entry_data, use_pending=self._trypending
1635 )
1642 )
1636
1643
1637 if self._docket is not None:
1644 if self._docket is not None:
1638 self._indexfile = self._docket.index_filepath()
1645 self._indexfile = self._docket.index_filepath()
1639 index_data = b''
1646 index_data = b''
1640 index_size = self._docket.index_end
1647 index_size = self._docket.index_end
1641 if index_size > 0:
1648 if index_size > 0:
1642 index_data = self._get_data(
1649 index_data = self._get_data(
1643 self._indexfile, mmapindexthreshold, size=index_size
1650 self._indexfile, mmapindexthreshold, size=index_size
1644 )
1651 )
1645 if len(index_data) < index_size:
1652 if len(index_data) < index_size:
1646 msg = _(b'too few index data for %s: got %d, expected %d')
1653 msg = _(b'too few index data for %s: got %d, expected %d')
1647 msg %= (self.display_id, len(index_data), index_size)
1654 msg %= (self.display_id, len(index_data), index_size)
1648 raise error.RevlogError(msg)
1655 raise error.RevlogError(msg)
1649
1656
1650 self._inline = False
1657 self._inline = False
1651 # generaldelta implied by version 2 revlogs.
1658 # generaldelta implied by version 2 revlogs.
1652 self.delta_config.general_delta = True
1659 self.delta_config.general_delta = True
1653 # the logic for persistent nodemap will be dealt with within the
1660 # the logic for persistent nodemap will be dealt with within the
1654 # main docket, so disable it for now.
1661 # main docket, so disable it for now.
1655 self._nodemap_file = None
1662 self._nodemap_file = None
1656
1663
1657 if self._docket is not None:
1664 if self._docket is not None:
1658 self._datafile = self._docket.data_filepath()
1665 self._datafile = self._docket.data_filepath()
1659 self._sidedatafile = self._docket.sidedata_filepath()
1666 self._sidedatafile = self._docket.sidedata_filepath()
1660 elif self.postfix is None:
1667 elif self.postfix is None:
1661 self._datafile = b'%s.d' % self.radix
1668 self._datafile = b'%s.d' % self.radix
1662 else:
1669 else:
1663 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1670 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1664
1671
1665 self.nodeconstants = sha1nodeconstants
1672 self.nodeconstants = sha1nodeconstants
1666 self.nullid = self.nodeconstants.nullid
1673 self.nullid = self.nodeconstants.nullid
1667
1674
1668 # sparse-revlog can't be on without general-delta (issue6056)
1675 # sparse-revlog can't be on without general-delta (issue6056)
1669 if not self.delta_config.general_delta:
1676 if not self.delta_config.general_delta:
1670 self.delta_config.sparse_revlog = False
1677 self.delta_config.sparse_revlog = False
1671
1678
1672 self._storedeltachains = True
1679 self._storedeltachains = True
1673
1680
1674 devel_nodemap = (
1681 devel_nodemap = (
1675 self._nodemap_file
1682 self._nodemap_file
1676 and force_nodemap
1683 and force_nodemap
1677 and parse_index_v1_nodemap is not None
1684 and parse_index_v1_nodemap is not None
1678 )
1685 )
1679
1686
1680 use_rust_index = False
1687 use_rust_index = False
1681 if rustrevlog is not None and self._nodemap_file is not None:
1688 if rustrevlog is not None and self._nodemap_file is not None:
1682 # we would like to use the rust_index in all case, especially
1689 # we would like to use the rust_index in all case, especially
1683 # because it is necessary for AncestorsIterator and LazyAncestors
1690 # because it is necessary for AncestorsIterator and LazyAncestors
1684 # since the 6.7 cycle.
1691 # since the 6.7 cycle.
1685 #
1692 #
1686 # However, the performance impact of inconditionnaly building the
1693 # However, the performance impact of inconditionnaly building the
1687 # nodemap is currently a problem for non-persistent nodemap
1694 # nodemap is currently a problem for non-persistent nodemap
1688 # repository.
1695 # repository.
1689 use_rust_index = True
1696 use_rust_index = True
1690
1697
1691 self._parse_index = parse_index_v1
1698 self._parse_index = parse_index_v1
1692 if self._format_version == REVLOGV0:
1699 if self._format_version == REVLOGV0:
1693 self._parse_index = revlogv0.parse_index_v0
1700 self._parse_index = revlogv0.parse_index_v0
1694 elif self._format_version == REVLOGV2:
1701 elif self._format_version == REVLOGV2:
1695 self._parse_index = parse_index_v2
1702 self._parse_index = parse_index_v2
1696 elif self._format_version == CHANGELOGV2:
1703 elif self._format_version == CHANGELOGV2:
1697 self._parse_index = parse_index_cl_v2
1704 self._parse_index = parse_index_cl_v2
1698 elif devel_nodemap:
1705 elif devel_nodemap:
1699 self._parse_index = parse_index_v1_nodemap
1706 self._parse_index = parse_index_v1_nodemap
1700 elif use_rust_index:
1707 elif use_rust_index:
1701 self._parse_index = functools.partial(
1708 self._parse_index = functools.partial(
1702 parse_index_v1_rust, default_header=new_header
1709 parse_index_v1_rust, default_header=new_header
1703 )
1710 )
1704 try:
1711 try:
1705 d = self._parse_index(index_data, self._inline)
1712 d = self._parse_index(index_data, self._inline)
1706 index, chunkcache = d
1713 index, chunkcache = d
1707 use_nodemap = (
1714 use_nodemap = (
1708 not self._inline
1715 not self._inline
1709 and self._nodemap_file is not None
1716 and self._nodemap_file is not None
1710 and hasattr(index, 'update_nodemap_data')
1717 and hasattr(index, 'update_nodemap_data')
1711 )
1718 )
1712 if use_nodemap:
1719 if use_nodemap:
1713 nodemap_data = nodemaputil.persisted_data(self)
1720 nodemap_data = nodemaputil.persisted_data(self)
1714 if nodemap_data is not None:
1721 if nodemap_data is not None:
1715 docket = nodemap_data[0]
1722 docket = nodemap_data[0]
1716 if (
1723 if (
1717 len(d[0]) > docket.tip_rev
1724 len(d[0]) > docket.tip_rev
1718 and d[0][docket.tip_rev][7] == docket.tip_node
1725 and d[0][docket.tip_rev][7] == docket.tip_node
1719 ):
1726 ):
1720 # no changelog tampering
1727 # no changelog tampering
1721 self._nodemap_docket = docket
1728 self._nodemap_docket = docket
1722 index.update_nodemap_data(*nodemap_data)
1729 index.update_nodemap_data(*nodemap_data)
1723 except (ValueError, IndexError):
1730 except (ValueError, IndexError):
1724 raise error.RevlogError(
1731 raise error.RevlogError(
1725 _(b"index %s is corrupted") % self.display_id
1732 _(b"index %s is corrupted") % self.display_id
1726 )
1733 )
1727 self.index = index
1734 self.index = index
1728 # revnum -> (chain-length, sum-delta-length)
1735 # revnum -> (chain-length, sum-delta-length)
1729 self._chaininfocache = util.lrucachedict(500)
1736 self._chaininfocache = util.lrucachedict(500)
1730
1737
1731 return chunkcache
1738 return chunkcache
1732
1739
1733 def _load_inner(self, chunk_cache):
1740 def _load_inner(self, chunk_cache):
1734 if self._docket is None:
1741 if self._docket is None:
1735 default_compression_header = None
1742 default_compression_header = None
1736 else:
1743 else:
1737 default_compression_header = self._docket.default_compression_header
1744 default_compression_header = self._docket.default_compression_header
1738
1745
1739 self._inner = _InnerRevlog(
1746 self._inner = _InnerRevlog(
1740 opener=self.opener,
1747 opener=self.opener,
1741 index=self.index,
1748 index=self.index,
1742 index_file=self._indexfile,
1749 index_file=self._indexfile,
1743 data_file=self._datafile,
1750 data_file=self._datafile,
1744 sidedata_file=self._sidedatafile,
1751 sidedata_file=self._sidedatafile,
1745 inline=self._inline,
1752 inline=self._inline,
1746 data_config=self.data_config,
1753 data_config=self.data_config,
1747 delta_config=self.delta_config,
1754 delta_config=self.delta_config,
1748 feature_config=self.feature_config,
1755 feature_config=self.feature_config,
1749 chunk_cache=chunk_cache,
1756 chunk_cache=chunk_cache,
1750 default_compression_header=default_compression_header,
1757 default_compression_header=default_compression_header,
1751 )
1758 )
1752
1759
1753 def get_revlog(self):
1760 def get_revlog(self):
1754 """simple function to mirror API of other not-really-revlog API"""
1761 """simple function to mirror API of other not-really-revlog API"""
1755 return self
1762 return self
1756
1763
1757 @util.propertycache
1764 @util.propertycache
1758 def revlog_kind(self):
1765 def revlog_kind(self):
1759 return self.target[0]
1766 return self.target[0]
1760
1767
1761 @util.propertycache
1768 @util.propertycache
1762 def display_id(self):
1769 def display_id(self):
1763 """The public facing "ID" of the revlog that we use in message"""
1770 """The public facing "ID" of the revlog that we use in message"""
1764 if self.revlog_kind == KIND_FILELOG:
1771 if self.revlog_kind == KIND_FILELOG:
1765 # Reference the file without the "data/" prefix, so it is familiar
1772 # Reference the file without the "data/" prefix, so it is familiar
1766 # to the user.
1773 # to the user.
1767 return self.target[1]
1774 return self.target[1]
1768 else:
1775 else:
1769 return self.radix
1776 return self.radix
1770
1777
1771 def _datafp(self, mode=b'r'):
1778 def _datafp(self, mode=b'r'):
1772 """file object for the revlog's data file"""
1779 """file object for the revlog's data file"""
1773 return self.opener(self._datafile, mode=mode)
1780 return self.opener(self._datafile, mode=mode)
1774
1781
1775 def tiprev(self):
1782 def tiprev(self):
1776 return len(self.index) - 1
1783 return len(self.index) - 1
1777
1784
1778 def tip(self):
1785 def tip(self):
1779 return self.node(self.tiprev())
1786 return self.node(self.tiprev())
1780
1787
1781 def __contains__(self, rev):
1788 def __contains__(self, rev):
1782 return 0 <= rev < len(self)
1789 return 0 <= rev < len(self)
1783
1790
1784 def __len__(self):
1791 def __len__(self):
1785 return len(self.index)
1792 return len(self.index)
1786
1793
1787 def __iter__(self):
1794 def __iter__(self):
1788 return iter(range(len(self)))
1795 return iter(range(len(self)))
1789
1796
1790 def revs(self, start=0, stop=None):
1797 def revs(self, start=0, stop=None):
1791 """iterate over all rev in this revlog (from start to stop)"""
1798 """iterate over all rev in this revlog (from start to stop)"""
1792 return storageutil.iterrevs(len(self), start=start, stop=stop)
1799 return storageutil.iterrevs(len(self), start=start, stop=stop)
1793
1800
1794 def hasnode(self, node):
1801 def hasnode(self, node):
1795 try:
1802 try:
1796 self.rev(node)
1803 self.rev(node)
1797 return True
1804 return True
1798 except KeyError:
1805 except KeyError:
1799 return False
1806 return False
1800
1807
1801 def _candelta(self, baserev, rev):
1808 def _candelta(self, baserev, rev):
1802 """whether two revisions (baserev, rev) can be delta-ed or not"""
1809 """whether two revisions (baserev, rev) can be delta-ed or not"""
1803 # Disable delta if either rev requires a content-changing flag
1810 # Disable delta if either rev requires a content-changing flag
1804 # processor (ex. LFS). This is because such flag processor can alter
1811 # processor (ex. LFS). This is because such flag processor can alter
1805 # the rawtext content that the delta will be based on, and two clients
1812 # the rawtext content that the delta will be based on, and two clients
1806 # could have a same revlog node with different flags (i.e. different
1813 # could have a same revlog node with different flags (i.e. different
1807 # rawtext contents) and the delta could be incompatible.
1814 # rawtext contents) and the delta could be incompatible.
1808 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1815 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1809 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1816 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1810 ):
1817 ):
1811 return False
1818 return False
1812 return True
1819 return True
1813
1820
1814 def update_caches(self, transaction):
1821 def update_caches(self, transaction):
1815 """update on disk cache
1822 """update on disk cache
1816
1823
1817 If a transaction is passed, the update may be delayed to transaction
1824 If a transaction is passed, the update may be delayed to transaction
1818 commit."""
1825 commit."""
1819 if self._nodemap_file is not None:
1826 if self._nodemap_file is not None:
1820 if transaction is None:
1827 if transaction is None:
1821 nodemaputil.update_persistent_nodemap(self)
1828 nodemaputil.update_persistent_nodemap(self)
1822 else:
1829 else:
1823 nodemaputil.setup_persistent_nodemap(transaction, self)
1830 nodemaputil.setup_persistent_nodemap(transaction, self)
1824
1831
1825 def clearcaches(self):
1832 def clearcaches(self):
1826 """Clear in-memory caches"""
1833 """Clear in-memory caches"""
1827 self._chainbasecache.clear()
1834 self._chainbasecache.clear()
1828 self._inner.clear_cache()
1835 self._inner.clear_cache()
1829 self._pcache = {}
1836 self._pcache = {}
1830 self._nodemap_docket = None
1837 self._nodemap_docket = None
1831 self.index.clearcaches()
1838 self.index.clearcaches()
1832 # The python code is the one responsible for validating the docket, we
1839 # The python code is the one responsible for validating the docket, we
1833 # end up having to refresh it here.
1840 # end up having to refresh it here.
1834 use_nodemap = (
1841 use_nodemap = (
1835 not self._inline
1842 not self._inline
1836 and self._nodemap_file is not None
1843 and self._nodemap_file is not None
1837 and hasattr(self.index, 'update_nodemap_data')
1844 and hasattr(self.index, 'update_nodemap_data')
1838 )
1845 )
1839 if use_nodemap:
1846 if use_nodemap:
1840 nodemap_data = nodemaputil.persisted_data(self)
1847 nodemap_data = nodemaputil.persisted_data(self)
1841 if nodemap_data is not None:
1848 if nodemap_data is not None:
1842 self._nodemap_docket = nodemap_data[0]
1849 self._nodemap_docket = nodemap_data[0]
1843 self.index.update_nodemap_data(*nodemap_data)
1850 self.index.update_nodemap_data(*nodemap_data)
1844
1851
1845 def rev(self, node):
1852 def rev(self, node):
1846 """return the revision number associated with a <nodeid>"""
1853 """return the revision number associated with a <nodeid>"""
1847 try:
1854 try:
1848 return self.index.rev(node)
1855 return self.index.rev(node)
1849 except TypeError:
1856 except TypeError:
1850 raise
1857 raise
1851 except error.RevlogError:
1858 except error.RevlogError:
1852 # parsers.c radix tree lookup failed
1859 # parsers.c radix tree lookup failed
1853 if (
1860 if (
1854 node == self.nodeconstants.wdirid
1861 node == self.nodeconstants.wdirid
1855 or node in self.nodeconstants.wdirfilenodeids
1862 or node in self.nodeconstants.wdirfilenodeids
1856 ):
1863 ):
1857 raise error.WdirUnsupported
1864 raise error.WdirUnsupported
1858 raise error.LookupError(node, self.display_id, _(b'no node'))
1865 raise error.LookupError(node, self.display_id, _(b'no node'))
1859
1866
1860 # Accessors for index entries.
1867 # Accessors for index entries.
1861
1868
1862 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1869 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1863 # are flags.
1870 # are flags.
1864 def start(self, rev):
1871 def start(self, rev):
1865 return int(self.index[rev][0] >> 16)
1872 return int(self.index[rev][0] >> 16)
1866
1873
1867 def sidedata_cut_off(self, rev):
1874 def sidedata_cut_off(self, rev):
1868 sd_cut_off = self.index[rev][8]
1875 sd_cut_off = self.index[rev][8]
1869 if sd_cut_off != 0:
1876 if sd_cut_off != 0:
1870 return sd_cut_off
1877 return sd_cut_off
1871 # This is some annoying dance, because entries without sidedata
1878 # This is some annoying dance, because entries without sidedata
1872 # currently use 0 as their ofsset. (instead of previous-offset +
1879 # currently use 0 as their ofsset. (instead of previous-offset +
1873 # previous-size)
1880 # previous-size)
1874 #
1881 #
1875 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1882 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1876 # In the meantime, we need this.
1883 # In the meantime, we need this.
1877 while 0 <= rev:
1884 while 0 <= rev:
1878 e = self.index[rev]
1885 e = self.index[rev]
1879 if e[9] != 0:
1886 if e[9] != 0:
1880 return e[8] + e[9]
1887 return e[8] + e[9]
1881 rev -= 1
1888 rev -= 1
1882 return 0
1889 return 0
1883
1890
1884 def flags(self, rev):
1891 def flags(self, rev):
1885 return self.index[rev][0] & 0xFFFF
1892 return self.index[rev][0] & 0xFFFF
1886
1893
1887 def length(self, rev):
1894 def length(self, rev):
1888 return self.index[rev][1]
1895 return self.index[rev][1]
1889
1896
1890 def sidedata_length(self, rev):
1897 def sidedata_length(self, rev):
1891 if not self.feature_config.has_side_data:
1898 if not self.feature_config.has_side_data:
1892 return 0
1899 return 0
1893 return self.index[rev][9]
1900 return self.index[rev][9]
1894
1901
1895 def rawsize(self, rev):
1902 def rawsize(self, rev):
1896 """return the length of the uncompressed text for a given revision"""
1903 """return the length of the uncompressed text for a given revision"""
1897 l = self.index[rev][2]
1904 l = self.index[rev][2]
1898 if l >= 0:
1905 if l >= 0:
1899 return l
1906 return l
1900
1907
1901 t = self.rawdata(rev)
1908 t = self.rawdata(rev)
1902 return len(t)
1909 return len(t)
1903
1910
1904 def size(self, rev):
1911 def size(self, rev):
1905 """length of non-raw text (processed by a "read" flag processor)"""
1912 """length of non-raw text (processed by a "read" flag processor)"""
1906 # fast path: if no "read" flag processor could change the content,
1913 # fast path: if no "read" flag processor could change the content,
1907 # size is rawsize. note: ELLIPSIS is known to not change the content.
1914 # size is rawsize. note: ELLIPSIS is known to not change the content.
1908 flags = self.flags(rev)
1915 flags = self.flags(rev)
1909 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1916 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1910 return self.rawsize(rev)
1917 return self.rawsize(rev)
1911
1918
1912 return len(self.revision(rev))
1919 return len(self.revision(rev))
1913
1920
1914 def fast_rank(self, rev):
1921 def fast_rank(self, rev):
1915 """Return the rank of a revision if already known, or None otherwise.
1922 """Return the rank of a revision if already known, or None otherwise.
1916
1923
1917 The rank of a revision is the size of the sub-graph it defines as a
1924 The rank of a revision is the size of the sub-graph it defines as a
1918 head. Equivalently, the rank of a revision `r` is the size of the set
1925 head. Equivalently, the rank of a revision `r` is the size of the set
1919 `ancestors(r)`, `r` included.
1926 `ancestors(r)`, `r` included.
1920
1927
1921 This method returns the rank retrieved from the revlog in constant
1928 This method returns the rank retrieved from the revlog in constant
1922 time. It makes no attempt at computing unknown values for versions of
1929 time. It makes no attempt at computing unknown values for versions of
1923 the revlog which do not persist the rank.
1930 the revlog which do not persist the rank.
1924 """
1931 """
1925 rank = self.index[rev][ENTRY_RANK]
1932 rank = self.index[rev][ENTRY_RANK]
1926 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1933 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1927 return None
1934 return None
1928 if rev == nullrev:
1935 if rev == nullrev:
1929 return 0 # convention
1936 return 0 # convention
1930 return rank
1937 return rank
1931
1938
1932 def chainbase(self, rev):
1939 def chainbase(self, rev):
1933 base = self._chainbasecache.get(rev)
1940 base = self._chainbasecache.get(rev)
1934 if base is not None:
1941 if base is not None:
1935 return base
1942 return base
1936
1943
1937 index = self.index
1944 index = self.index
1938 iterrev = rev
1945 iterrev = rev
1939 base = index[iterrev][3]
1946 base = index[iterrev][3]
1940 while base != iterrev:
1947 while base != iterrev:
1941 iterrev = base
1948 iterrev = base
1942 base = index[iterrev][3]
1949 base = index[iterrev][3]
1943
1950
1944 self._chainbasecache[rev] = base
1951 self._chainbasecache[rev] = base
1945 return base
1952 return base
1946
1953
1947 def linkrev(self, rev):
1954 def linkrev(self, rev):
1948 return self.index[rev][4]
1955 return self.index[rev][4]
1949
1956
1950 def parentrevs(self, rev):
1957 def parentrevs(self, rev):
1951 try:
1958 try:
1952 entry = self.index[rev]
1959 entry = self.index[rev]
1953 except IndexError:
1960 except IndexError:
1954 if rev == wdirrev:
1961 if rev == wdirrev:
1955 raise error.WdirUnsupported
1962 raise error.WdirUnsupported
1956 raise
1963 raise
1957
1964
1958 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1965 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1959 return entry[6], entry[5]
1966 return entry[6], entry[5]
1960 else:
1967 else:
1961 return entry[5], entry[6]
1968 return entry[5], entry[6]
1962
1969
1963 # fast parentrevs(rev) where rev isn't filtered
1970 # fast parentrevs(rev) where rev isn't filtered
1964 _uncheckedparentrevs = parentrevs
1971 _uncheckedparentrevs = parentrevs
1965
1972
1966 def node(self, rev):
1973 def node(self, rev):
1967 try:
1974 try:
1968 return self.index[rev][7]
1975 return self.index[rev][7]
1969 except IndexError:
1976 except IndexError:
1970 if rev == wdirrev:
1977 if rev == wdirrev:
1971 raise error.WdirUnsupported
1978 raise error.WdirUnsupported
1972 raise
1979 raise
1973
1980
1974 # Derived from index values.
1981 # Derived from index values.
1975
1982
1976 def end(self, rev):
1983 def end(self, rev):
1977 return self.start(rev) + self.length(rev)
1984 return self.start(rev) + self.length(rev)
1978
1985
1979 def parents(self, node):
1986 def parents(self, node):
1980 i = self.index
1987 i = self.index
1981 d = i[self.rev(node)]
1988 d = i[self.rev(node)]
1982 # inline node() to avoid function call overhead
1989 # inline node() to avoid function call overhead
1983 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1990 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1984 return i[d[6]][7], i[d[5]][7]
1991 return i[d[6]][7], i[d[5]][7]
1985 else:
1992 else:
1986 return i[d[5]][7], i[d[6]][7]
1993 return i[d[5]][7], i[d[6]][7]
1987
1994
1988 def chainlen(self, rev):
1995 def chainlen(self, rev):
1989 return self._chaininfo(rev)[0]
1996 return self._chaininfo(rev)[0]
1990
1997
1991 def _chaininfo(self, rev):
1998 def _chaininfo(self, rev):
1992 chaininfocache = self._chaininfocache
1999 chaininfocache = self._chaininfocache
1993 if rev in chaininfocache:
2000 if rev in chaininfocache:
1994 return chaininfocache[rev]
2001 return chaininfocache[rev]
1995 index = self.index
2002 index = self.index
1996 generaldelta = self.delta_config.general_delta
2003 generaldelta = self.delta_config.general_delta
1997 iterrev = rev
2004 iterrev = rev
1998 e = index[iterrev]
2005 e = index[iterrev]
1999 clen = 0
2006 clen = 0
2000 compresseddeltalen = 0
2007 compresseddeltalen = 0
2001 while iterrev != e[3]:
2008 while iterrev != e[3]:
2002 clen += 1
2009 clen += 1
2003 compresseddeltalen += e[1]
2010 compresseddeltalen += e[1]
2004 if generaldelta:
2011 if generaldelta:
2005 iterrev = e[3]
2012 iterrev = e[3]
2006 else:
2013 else:
2007 iterrev -= 1
2014 iterrev -= 1
2008 if iterrev in chaininfocache:
2015 if iterrev in chaininfocache:
2009 t = chaininfocache[iterrev]
2016 t = chaininfocache[iterrev]
2010 clen += t[0]
2017 clen += t[0]
2011 compresseddeltalen += t[1]
2018 compresseddeltalen += t[1]
2012 break
2019 break
2013 e = index[iterrev]
2020 e = index[iterrev]
2014 else:
2021 else:
2015 # Add text length of base since decompressing that also takes
2022 # Add text length of base since decompressing that also takes
2016 # work. For cache hits the length is already included.
2023 # work. For cache hits the length is already included.
2017 compresseddeltalen += e[1]
2024 compresseddeltalen += e[1]
2018 r = (clen, compresseddeltalen)
2025 r = (clen, compresseddeltalen)
2019 chaininfocache[rev] = r
2026 chaininfocache[rev] = r
2020 return r
2027 return r
2021
2028
2022 def _deltachain(self, rev, stoprev=None):
2029 def _deltachain(self, rev, stoprev=None):
2023 return self._inner._deltachain(rev, stoprev=stoprev)
2030 return self._inner._deltachain(rev, stoprev=stoprev)
2024
2031
2025 def ancestors(self, revs, stoprev=0, inclusive=False):
2032 def ancestors(self, revs, stoprev=0, inclusive=False):
2026 """Generate the ancestors of 'revs' in reverse revision order.
2033 """Generate the ancestors of 'revs' in reverse revision order.
2027 Does not generate revs lower than stoprev.
2034 Does not generate revs lower than stoprev.
2028
2035
2029 See the documentation for ancestor.lazyancestors for more details."""
2036 See the documentation for ancestor.lazyancestors for more details."""
2030
2037
2031 # first, make sure start revisions aren't filtered
2038 # first, make sure start revisions aren't filtered
2032 revs = list(revs)
2039 revs = list(revs)
2033 checkrev = self.node
2040 checkrev = self.node
2034 for r in revs:
2041 for r in revs:
2035 checkrev(r)
2042 checkrev(r)
2036 # and we're sure ancestors aren't filtered as well
2043 # and we're sure ancestors aren't filtered as well
2037
2044
2038 if rustancestor is not None and self.index.rust_ext_compat:
2045 if rustancestor is not None and self.index.rust_ext_compat:
2039 lazyancestors = rustancestor.LazyAncestors
2046 lazyancestors = rustancestor.LazyAncestors
2040 arg = self.index
2047 arg = self.index
2041 else:
2048 else:
2042 lazyancestors = ancestor.lazyancestors
2049 lazyancestors = ancestor.lazyancestors
2043 arg = self._uncheckedparentrevs
2050 arg = self._uncheckedparentrevs
2044 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2051 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2045
2052
2046 def descendants(self, revs):
2053 def descendants(self, revs):
2047 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2054 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2048
2055
2049 def findcommonmissing(self, common=None, heads=None):
2056 def findcommonmissing(self, common=None, heads=None):
2050 """Return a tuple of the ancestors of common and the ancestors of heads
2057 """Return a tuple of the ancestors of common and the ancestors of heads
2051 that are not ancestors of common. In revset terminology, we return the
2058 that are not ancestors of common. In revset terminology, we return the
2052 tuple:
2059 tuple:
2053
2060
2054 ::common, (::heads) - (::common)
2061 ::common, (::heads) - (::common)
2055
2062
2056 The list is sorted by revision number, meaning it is
2063 The list is sorted by revision number, meaning it is
2057 topologically sorted.
2064 topologically sorted.
2058
2065
2059 'heads' and 'common' are both lists of node IDs. If heads is
2066 'heads' and 'common' are both lists of node IDs. If heads is
2060 not supplied, uses all of the revlog's heads. If common is not
2067 not supplied, uses all of the revlog's heads. If common is not
2061 supplied, uses nullid."""
2068 supplied, uses nullid."""
2062 if common is None:
2069 if common is None:
2063 common = [self.nullid]
2070 common = [self.nullid]
2064 if heads is None:
2071 if heads is None:
2065 heads = self.heads()
2072 heads = self.heads()
2066
2073
2067 common = [self.rev(n) for n in common]
2074 common = [self.rev(n) for n in common]
2068 heads = [self.rev(n) for n in heads]
2075 heads = [self.rev(n) for n in heads]
2069
2076
2070 # we want the ancestors, but inclusive
2077 # we want the ancestors, but inclusive
2071 class lazyset:
2078 class lazyset:
2072 def __init__(self, lazyvalues):
2079 def __init__(self, lazyvalues):
2073 self.addedvalues = set()
2080 self.addedvalues = set()
2074 self.lazyvalues = lazyvalues
2081 self.lazyvalues = lazyvalues
2075
2082
2076 def __contains__(self, value):
2083 def __contains__(self, value):
2077 return value in self.addedvalues or value in self.lazyvalues
2084 return value in self.addedvalues or value in self.lazyvalues
2078
2085
2079 def __iter__(self):
2086 def __iter__(self):
2080 added = self.addedvalues
2087 added = self.addedvalues
2081 for r in added:
2088 for r in added:
2082 yield r
2089 yield r
2083 for r in self.lazyvalues:
2090 for r in self.lazyvalues:
2084 if not r in added:
2091 if not r in added:
2085 yield r
2092 yield r
2086
2093
2087 def add(self, value):
2094 def add(self, value):
2088 self.addedvalues.add(value)
2095 self.addedvalues.add(value)
2089
2096
2090 def update(self, values):
2097 def update(self, values):
2091 self.addedvalues.update(values)
2098 self.addedvalues.update(values)
2092
2099
2093 has = lazyset(self.ancestors(common))
2100 has = lazyset(self.ancestors(common))
2094 has.add(nullrev)
2101 has.add(nullrev)
2095 has.update(common)
2102 has.update(common)
2096
2103
2097 # take all ancestors from heads that aren't in has
2104 # take all ancestors from heads that aren't in has
2098 missing = set()
2105 missing = set()
2099 visit = collections.deque(r for r in heads if r not in has)
2106 visit = collections.deque(r for r in heads if r not in has)
2100 while visit:
2107 while visit:
2101 r = visit.popleft()
2108 r = visit.popleft()
2102 if r in missing:
2109 if r in missing:
2103 continue
2110 continue
2104 else:
2111 else:
2105 missing.add(r)
2112 missing.add(r)
2106 for p in self.parentrevs(r):
2113 for p in self.parentrevs(r):
2107 if p not in has:
2114 if p not in has:
2108 visit.append(p)
2115 visit.append(p)
2109 missing = list(missing)
2116 missing = list(missing)
2110 missing.sort()
2117 missing.sort()
2111 return has, [self.node(miss) for miss in missing]
2118 return has, [self.node(miss) for miss in missing]
2112
2119
2113 def incrementalmissingrevs(self, common=None):
2120 def incrementalmissingrevs(self, common=None):
2114 """Return an object that can be used to incrementally compute the
2121 """Return an object that can be used to incrementally compute the
2115 revision numbers of the ancestors of arbitrary sets that are not
2122 revision numbers of the ancestors of arbitrary sets that are not
2116 ancestors of common. This is an ancestor.incrementalmissingancestors
2123 ancestors of common. This is an ancestor.incrementalmissingancestors
2117 object.
2124 object.
2118
2125
2119 'common' is a list of revision numbers. If common is not supplied, uses
2126 'common' is a list of revision numbers. If common is not supplied, uses
2120 nullrev.
2127 nullrev.
2121 """
2128 """
2122 if common is None:
2129 if common is None:
2123 common = [nullrev]
2130 common = [nullrev]
2124
2131
2125 if rustancestor is not None and self.index.rust_ext_compat:
2132 if rustancestor is not None and self.index.rust_ext_compat:
2126 return rustancestor.MissingAncestors(self.index, common)
2133 return rustancestor.MissingAncestors(self.index, common)
2127 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2134 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2128
2135
2129 def findmissingrevs(self, common=None, heads=None):
2136 def findmissingrevs(self, common=None, heads=None):
2130 """Return the revision numbers of the ancestors of heads that
2137 """Return the revision numbers of the ancestors of heads that
2131 are not ancestors of common.
2138 are not ancestors of common.
2132
2139
2133 More specifically, return a list of revision numbers corresponding to
2140 More specifically, return a list of revision numbers corresponding to
2134 nodes N such that every N satisfies the following constraints:
2141 nodes N such that every N satisfies the following constraints:
2135
2142
2136 1. N is an ancestor of some node in 'heads'
2143 1. N is an ancestor of some node in 'heads'
2137 2. N is not an ancestor of any node in 'common'
2144 2. N is not an ancestor of any node in 'common'
2138
2145
2139 The list is sorted by revision number, meaning it is
2146 The list is sorted by revision number, meaning it is
2140 topologically sorted.
2147 topologically sorted.
2141
2148
2142 'heads' and 'common' are both lists of revision numbers. If heads is
2149 'heads' and 'common' are both lists of revision numbers. If heads is
2143 not supplied, uses all of the revlog's heads. If common is not
2150 not supplied, uses all of the revlog's heads. If common is not
2144 supplied, uses nullid."""
2151 supplied, uses nullid."""
2145 if common is None:
2152 if common is None:
2146 common = [nullrev]
2153 common = [nullrev]
2147 if heads is None:
2154 if heads is None:
2148 heads = self.headrevs()
2155 heads = self.headrevs()
2149
2156
2150 inc = self.incrementalmissingrevs(common=common)
2157 inc = self.incrementalmissingrevs(common=common)
2151 return inc.missingancestors(heads)
2158 return inc.missingancestors(heads)
2152
2159
2153 def findmissing(self, common=None, heads=None):
2160 def findmissing(self, common=None, heads=None):
2154 """Return the ancestors of heads that are not ancestors of common.
2161 """Return the ancestors of heads that are not ancestors of common.
2155
2162
2156 More specifically, return a list of nodes N such that every N
2163 More specifically, return a list of nodes N such that every N
2157 satisfies the following constraints:
2164 satisfies the following constraints:
2158
2165
2159 1. N is an ancestor of some node in 'heads'
2166 1. N is an ancestor of some node in 'heads'
2160 2. N is not an ancestor of any node in 'common'
2167 2. N is not an ancestor of any node in 'common'
2161
2168
2162 The list is sorted by revision number, meaning it is
2169 The list is sorted by revision number, meaning it is
2163 topologically sorted.
2170 topologically sorted.
2164
2171
2165 'heads' and 'common' are both lists of node IDs. If heads is
2172 'heads' and 'common' are both lists of node IDs. If heads is
2166 not supplied, uses all of the revlog's heads. If common is not
2173 not supplied, uses all of the revlog's heads. If common is not
2167 supplied, uses nullid."""
2174 supplied, uses nullid."""
2168 if common is None:
2175 if common is None:
2169 common = [self.nullid]
2176 common = [self.nullid]
2170 if heads is None:
2177 if heads is None:
2171 heads = self.heads()
2178 heads = self.heads()
2172
2179
2173 common = [self.rev(n) for n in common]
2180 common = [self.rev(n) for n in common]
2174 heads = [self.rev(n) for n in heads]
2181 heads = [self.rev(n) for n in heads]
2175
2182
2176 inc = self.incrementalmissingrevs(common=common)
2183 inc = self.incrementalmissingrevs(common=common)
2177 return [self.node(r) for r in inc.missingancestors(heads)]
2184 return [self.node(r) for r in inc.missingancestors(heads)]
2178
2185
2179 def nodesbetween(self, roots=None, heads=None):
2186 def nodesbetween(self, roots=None, heads=None):
2180 """Return a topological path from 'roots' to 'heads'.
2187 """Return a topological path from 'roots' to 'heads'.
2181
2188
2182 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2189 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2183 topologically sorted list of all nodes N that satisfy both of
2190 topologically sorted list of all nodes N that satisfy both of
2184 these constraints:
2191 these constraints:
2185
2192
2186 1. N is a descendant of some node in 'roots'
2193 1. N is a descendant of some node in 'roots'
2187 2. N is an ancestor of some node in 'heads'
2194 2. N is an ancestor of some node in 'heads'
2188
2195
2189 Every node is considered to be both a descendant and an ancestor
2196 Every node is considered to be both a descendant and an ancestor
2190 of itself, so every reachable node in 'roots' and 'heads' will be
2197 of itself, so every reachable node in 'roots' and 'heads' will be
2191 included in 'nodes'.
2198 included in 'nodes'.
2192
2199
2193 'outroots' is the list of reachable nodes in 'roots', i.e., the
2200 'outroots' is the list of reachable nodes in 'roots', i.e., the
2194 subset of 'roots' that is returned in 'nodes'. Likewise,
2201 subset of 'roots' that is returned in 'nodes'. Likewise,
2195 'outheads' is the subset of 'heads' that is also in 'nodes'.
2202 'outheads' is the subset of 'heads' that is also in 'nodes'.
2196
2203
2197 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2204 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2198 unspecified, uses nullid as the only root. If 'heads' is
2205 unspecified, uses nullid as the only root. If 'heads' is
2199 unspecified, uses list of all of the revlog's heads."""
2206 unspecified, uses list of all of the revlog's heads."""
2200 nonodes = ([], [], [])
2207 nonodes = ([], [], [])
2201 if roots is not None:
2208 if roots is not None:
2202 roots = list(roots)
2209 roots = list(roots)
2203 if not roots:
2210 if not roots:
2204 return nonodes
2211 return nonodes
2205 lowestrev = min([self.rev(n) for n in roots])
2212 lowestrev = min([self.rev(n) for n in roots])
2206 else:
2213 else:
2207 roots = [self.nullid] # Everybody's a descendant of nullid
2214 roots = [self.nullid] # Everybody's a descendant of nullid
2208 lowestrev = nullrev
2215 lowestrev = nullrev
2209 if (lowestrev == nullrev) and (heads is None):
2216 if (lowestrev == nullrev) and (heads is None):
2210 # We want _all_ the nodes!
2217 # We want _all_ the nodes!
2211 return (
2218 return (
2212 [self.node(r) for r in self],
2219 [self.node(r) for r in self],
2213 [self.nullid],
2220 [self.nullid],
2214 list(self.heads()),
2221 list(self.heads()),
2215 )
2222 )
2216 if heads is None:
2223 if heads is None:
2217 # All nodes are ancestors, so the latest ancestor is the last
2224 # All nodes are ancestors, so the latest ancestor is the last
2218 # node.
2225 # node.
2219 highestrev = len(self) - 1
2226 highestrev = len(self) - 1
2220 # Set ancestors to None to signal that every node is an ancestor.
2227 # Set ancestors to None to signal that every node is an ancestor.
2221 ancestors = None
2228 ancestors = None
2222 # Set heads to an empty dictionary for later discovery of heads
2229 # Set heads to an empty dictionary for later discovery of heads
2223 heads = {}
2230 heads = {}
2224 else:
2231 else:
2225 heads = list(heads)
2232 heads = list(heads)
2226 if not heads:
2233 if not heads:
2227 return nonodes
2234 return nonodes
2228 ancestors = set()
2235 ancestors = set()
2229 # Turn heads into a dictionary so we can remove 'fake' heads.
2236 # Turn heads into a dictionary so we can remove 'fake' heads.
2230 # Also, later we will be using it to filter out the heads we can't
2237 # Also, later we will be using it to filter out the heads we can't
2231 # find from roots.
2238 # find from roots.
2232 heads = dict.fromkeys(heads, False)
2239 heads = dict.fromkeys(heads, False)
2233 # Start at the top and keep marking parents until we're done.
2240 # Start at the top and keep marking parents until we're done.
2234 nodestotag = set(heads)
2241 nodestotag = set(heads)
2235 # Remember where the top was so we can use it as a limit later.
2242 # Remember where the top was so we can use it as a limit later.
2236 highestrev = max([self.rev(n) for n in nodestotag])
2243 highestrev = max([self.rev(n) for n in nodestotag])
2237 while nodestotag:
2244 while nodestotag:
2238 # grab a node to tag
2245 # grab a node to tag
2239 n = nodestotag.pop()
2246 n = nodestotag.pop()
2240 # Never tag nullid
2247 # Never tag nullid
2241 if n == self.nullid:
2248 if n == self.nullid:
2242 continue
2249 continue
2243 # A node's revision number represents its place in a
2250 # A node's revision number represents its place in a
2244 # topologically sorted list of nodes.
2251 # topologically sorted list of nodes.
2245 r = self.rev(n)
2252 r = self.rev(n)
2246 if r >= lowestrev:
2253 if r >= lowestrev:
2247 if n not in ancestors:
2254 if n not in ancestors:
2248 # If we are possibly a descendant of one of the roots
2255 # If we are possibly a descendant of one of the roots
2249 # and we haven't already been marked as an ancestor
2256 # and we haven't already been marked as an ancestor
2250 ancestors.add(n) # Mark as ancestor
2257 ancestors.add(n) # Mark as ancestor
2251 # Add non-nullid parents to list of nodes to tag.
2258 # Add non-nullid parents to list of nodes to tag.
2252 nodestotag.update(
2259 nodestotag.update(
2253 [p for p in self.parents(n) if p != self.nullid]
2260 [p for p in self.parents(n) if p != self.nullid]
2254 )
2261 )
2255 elif n in heads: # We've seen it before, is it a fake head?
2262 elif n in heads: # We've seen it before, is it a fake head?
2256 # So it is, real heads should not be the ancestors of
2263 # So it is, real heads should not be the ancestors of
2257 # any other heads.
2264 # any other heads.
2258 heads.pop(n)
2265 heads.pop(n)
2259 if not ancestors:
2266 if not ancestors:
2260 return nonodes
2267 return nonodes
2261 # Now that we have our set of ancestors, we want to remove any
2268 # Now that we have our set of ancestors, we want to remove any
2262 # roots that are not ancestors.
2269 # roots that are not ancestors.
2263
2270
2264 # If one of the roots was nullid, everything is included anyway.
2271 # If one of the roots was nullid, everything is included anyway.
2265 if lowestrev > nullrev:
2272 if lowestrev > nullrev:
2266 # But, since we weren't, let's recompute the lowest rev to not
2273 # But, since we weren't, let's recompute the lowest rev to not
2267 # include roots that aren't ancestors.
2274 # include roots that aren't ancestors.
2268
2275
2269 # Filter out roots that aren't ancestors of heads
2276 # Filter out roots that aren't ancestors of heads
2270 roots = [root for root in roots if root in ancestors]
2277 roots = [root for root in roots if root in ancestors]
2271 # Recompute the lowest revision
2278 # Recompute the lowest revision
2272 if roots:
2279 if roots:
2273 lowestrev = min([self.rev(root) for root in roots])
2280 lowestrev = min([self.rev(root) for root in roots])
2274 else:
2281 else:
2275 # No more roots? Return empty list
2282 # No more roots? Return empty list
2276 return nonodes
2283 return nonodes
2277 else:
2284 else:
2278 # We are descending from nullid, and don't need to care about
2285 # We are descending from nullid, and don't need to care about
2279 # any other roots.
2286 # any other roots.
2280 lowestrev = nullrev
2287 lowestrev = nullrev
2281 roots = [self.nullid]
2288 roots = [self.nullid]
2282 # Transform our roots list into a set.
2289 # Transform our roots list into a set.
2283 descendants = set(roots)
2290 descendants = set(roots)
2284 # Also, keep the original roots so we can filter out roots that aren't
2291 # Also, keep the original roots so we can filter out roots that aren't
2285 # 'real' roots (i.e. are descended from other roots).
2292 # 'real' roots (i.e. are descended from other roots).
2286 roots = descendants.copy()
2293 roots = descendants.copy()
2287 # Our topologically sorted list of output nodes.
2294 # Our topologically sorted list of output nodes.
2288 orderedout = []
2295 orderedout = []
2289 # Don't start at nullid since we don't want nullid in our output list,
2296 # Don't start at nullid since we don't want nullid in our output list,
2290 # and if nullid shows up in descendants, empty parents will look like
2297 # and if nullid shows up in descendants, empty parents will look like
2291 # they're descendants.
2298 # they're descendants.
2292 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2299 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2293 n = self.node(r)
2300 n = self.node(r)
2294 isdescendant = False
2301 isdescendant = False
2295 if lowestrev == nullrev: # Everybody is a descendant of nullid
2302 if lowestrev == nullrev: # Everybody is a descendant of nullid
2296 isdescendant = True
2303 isdescendant = True
2297 elif n in descendants:
2304 elif n in descendants:
2298 # n is already a descendant
2305 # n is already a descendant
2299 isdescendant = True
2306 isdescendant = True
2300 # This check only needs to be done here because all the roots
2307 # This check only needs to be done here because all the roots
2301 # will start being marked is descendants before the loop.
2308 # will start being marked is descendants before the loop.
2302 if n in roots:
2309 if n in roots:
2303 # If n was a root, check if it's a 'real' root.
2310 # If n was a root, check if it's a 'real' root.
2304 p = tuple(self.parents(n))
2311 p = tuple(self.parents(n))
2305 # If any of its parents are descendants, it's not a root.
2312 # If any of its parents are descendants, it's not a root.
2306 if (p[0] in descendants) or (p[1] in descendants):
2313 if (p[0] in descendants) or (p[1] in descendants):
2307 roots.remove(n)
2314 roots.remove(n)
2308 else:
2315 else:
2309 p = tuple(self.parents(n))
2316 p = tuple(self.parents(n))
2310 # A node is a descendant if either of its parents are
2317 # A node is a descendant if either of its parents are
2311 # descendants. (We seeded the dependents list with the roots
2318 # descendants. (We seeded the dependents list with the roots
2312 # up there, remember?)
2319 # up there, remember?)
2313 if (p[0] in descendants) or (p[1] in descendants):
2320 if (p[0] in descendants) or (p[1] in descendants):
2314 descendants.add(n)
2321 descendants.add(n)
2315 isdescendant = True
2322 isdescendant = True
2316 if isdescendant and ((ancestors is None) or (n in ancestors)):
2323 if isdescendant and ((ancestors is None) or (n in ancestors)):
2317 # Only include nodes that are both descendants and ancestors.
2324 # Only include nodes that are both descendants and ancestors.
2318 orderedout.append(n)
2325 orderedout.append(n)
2319 if (ancestors is not None) and (n in heads):
2326 if (ancestors is not None) and (n in heads):
2320 # We're trying to figure out which heads are reachable
2327 # We're trying to figure out which heads are reachable
2321 # from roots.
2328 # from roots.
2322 # Mark this head as having been reached
2329 # Mark this head as having been reached
2323 heads[n] = True
2330 heads[n] = True
2324 elif ancestors is None:
2331 elif ancestors is None:
2325 # Otherwise, we're trying to discover the heads.
2332 # Otherwise, we're trying to discover the heads.
2326 # Assume this is a head because if it isn't, the next step
2333 # Assume this is a head because if it isn't, the next step
2327 # will eventually remove it.
2334 # will eventually remove it.
2328 heads[n] = True
2335 heads[n] = True
2329 # But, obviously its parents aren't.
2336 # But, obviously its parents aren't.
2330 for p in self.parents(n):
2337 for p in self.parents(n):
2331 heads.pop(p, None)
2338 heads.pop(p, None)
2332 heads = [head for head, flag in heads.items() if flag]
2339 heads = [head for head, flag in heads.items() if flag]
2333 roots = list(roots)
2340 roots = list(roots)
2334 assert orderedout
2341 assert orderedout
2335 assert roots
2342 assert roots
2336 assert heads
2343 assert heads
2337 return (orderedout, roots, heads)
2344 return (orderedout, roots, heads)
2338
2345
2339 def headrevs(self, revs=None):
2346 def headrevs(self, revs=None):
2340 if revs is None:
2347 if revs is None:
2341 try:
2348 try:
2342 return self.index.headrevs()
2349 return self.index.headrevs()
2343 except AttributeError:
2350 except AttributeError:
2344 return self._headrevs()
2351 return self._headrevs()
2345 if rustdagop is not None and self.index.rust_ext_compat:
2352 if rustdagop is not None and self.index.rust_ext_compat:
2346 return rustdagop.headrevs(self.index, revs)
2353 return rustdagop.headrevs(self.index, revs)
2347 return dagop.headrevs(revs, self._uncheckedparentrevs)
2354 return dagop.headrevs(revs, self._uncheckedparentrevs)
2348
2355
2349 def headrevsdiff(self, start, stop):
2356 def headrevsdiff(self, start, stop):
2350 try:
2357 try:
2351 return self.index.headrevsdiff(start, stop)
2358 return self.index.headrevsdiff(start, stop)
2352 except AttributeError:
2359 except AttributeError:
2353 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2360 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2354
2361
2355 def computephases(self, roots):
2362 def computephases(self, roots):
2356 return self.index.computephasesmapsets(roots)
2363 return self.index.computephasesmapsets(roots)
2357
2364
2358 def _headrevs(self):
2365 def _headrevs(self):
2359 count = len(self)
2366 count = len(self)
2360 if not count:
2367 if not count:
2361 return [nullrev]
2368 return [nullrev]
2362 # we won't iter over filtered rev so nobody is a head at start
2369 # we won't iter over filtered rev so nobody is a head at start
2363 ishead = [0] * (count + 1)
2370 ishead = [0] * (count + 1)
2364 index = self.index
2371 index = self.index
2365 for r in self:
2372 for r in self:
2366 ishead[r] = 1 # I may be an head
2373 ishead[r] = 1 # I may be an head
2367 e = index[r]
2374 e = index[r]
2368 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2375 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2369 return [r for r, val in enumerate(ishead) if val]
2376 return [r for r, val in enumerate(ishead) if val]
2370
2377
2371 def _head_node_ids(self):
2378 def _head_node_ids(self):
2372 try:
2379 try:
2373 return self.index.head_node_ids()
2380 return self.index.head_node_ids()
2374 except AttributeError:
2381 except AttributeError:
2375 return [self.node(r) for r in self.headrevs()]
2382 return [self.node(r) for r in self.headrevs()]
2376
2383
2377 def heads(self, start=None, stop=None):
2384 def heads(self, start=None, stop=None):
2378 """return the list of all nodes that have no children
2385 """return the list of all nodes that have no children
2379
2386
2380 if start is specified, only heads that are descendants of
2387 if start is specified, only heads that are descendants of
2381 start will be returned
2388 start will be returned
2382 if stop is specified, it will consider all the revs from stop
2389 if stop is specified, it will consider all the revs from stop
2383 as if they had no children
2390 as if they had no children
2384 """
2391 """
2385 if start is None and stop is None:
2392 if start is None and stop is None:
2386 if not len(self):
2393 if not len(self):
2387 return [self.nullid]
2394 return [self.nullid]
2388 return self._head_node_ids()
2395 return self._head_node_ids()
2389 if start is None:
2396 if start is None:
2390 start = nullrev
2397 start = nullrev
2391 else:
2398 else:
2392 start = self.rev(start)
2399 start = self.rev(start)
2393
2400
2394 stoprevs = {self.rev(n) for n in stop or []}
2401 stoprevs = {self.rev(n) for n in stop or []}
2395
2402
2396 revs = dagop.headrevssubset(
2403 revs = dagop.headrevssubset(
2397 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2404 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2398 )
2405 )
2399
2406
2400 return [self.node(rev) for rev in revs]
2407 return [self.node(rev) for rev in revs]
2401
2408
2402 def diffheads(self, start, stop):
2409 def diffheads(self, start, stop):
2403 """return the nodes that make up the difference between
2410 """return the nodes that make up the difference between
2404 heads of revs before `start` and heads of revs before `stop`"""
2411 heads of revs before `start` and heads of revs before `stop`"""
2405 removed, added = self.headrevsdiff(start, stop)
2412 removed, added = self.headrevsdiff(start, stop)
2406 return [self.node(r) for r in removed], [self.node(r) for r in added]
2413 return [self.node(r) for r in removed], [self.node(r) for r in added]
2407
2414
2408 def children(self, node):
2415 def children(self, node):
2409 """find the children of a given node"""
2416 """find the children of a given node"""
2410 c = []
2417 c = []
2411 p = self.rev(node)
2418 p = self.rev(node)
2412 for r in self.revs(start=p + 1):
2419 for r in self.revs(start=p + 1):
2413 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2420 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2414 if prevs:
2421 if prevs:
2415 for pr in prevs:
2422 for pr in prevs:
2416 if pr == p:
2423 if pr == p:
2417 c.append(self.node(r))
2424 c.append(self.node(r))
2418 elif p == nullrev:
2425 elif p == nullrev:
2419 c.append(self.node(r))
2426 c.append(self.node(r))
2420 return c
2427 return c
2421
2428
2422 def commonancestorsheads(self, a, b):
2429 def commonancestorsheads(self, a, b):
2423 """calculate all the heads of the common ancestors of nodes a and b"""
2430 """calculate all the heads of the common ancestors of nodes a and b"""
2424 a, b = self.rev(a), self.rev(b)
2431 a, b = self.rev(a), self.rev(b)
2425 ancs = self._commonancestorsheads(a, b)
2432 ancs = self._commonancestorsheads(a, b)
2426 return pycompat.maplist(self.node, ancs)
2433 return pycompat.maplist(self.node, ancs)
2427
2434
2428 def _commonancestorsheads(self, *revs):
2435 def _commonancestorsheads(self, *revs):
2429 """calculate all the heads of the common ancestors of revs"""
2436 """calculate all the heads of the common ancestors of revs"""
2430 try:
2437 try:
2431 ancs = self.index.commonancestorsheads(*revs)
2438 ancs = self.index.commonancestorsheads(*revs)
2432 except (AttributeError, OverflowError): # C implementation failed
2439 except (AttributeError, OverflowError): # C implementation failed
2433 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2440 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2434 return ancs
2441 return ancs
2435
2442
2436 def isancestor(self, a, b):
2443 def isancestor(self, a, b):
2437 """return True if node a is an ancestor of node b
2444 """return True if node a is an ancestor of node b
2438
2445
2439 A revision is considered an ancestor of itself."""
2446 A revision is considered an ancestor of itself."""
2440 a, b = self.rev(a), self.rev(b)
2447 a, b = self.rev(a), self.rev(b)
2441 return self.isancestorrev(a, b)
2448 return self.isancestorrev(a, b)
2442
2449
2443 def isancestorrev(self, a, b):
2450 def isancestorrev(self, a, b):
2444 """return True if revision a is an ancestor of revision b
2451 """return True if revision a is an ancestor of revision b
2445
2452
2446 A revision is considered an ancestor of itself.
2453 A revision is considered an ancestor of itself.
2447
2454
2448 The implementation of this is trivial but the use of
2455 The implementation of this is trivial but the use of
2449 reachableroots is not."""
2456 reachableroots is not."""
2450 if a == nullrev:
2457 if a == nullrev:
2451 return True
2458 return True
2452 elif a == b:
2459 elif a == b:
2453 return True
2460 return True
2454 elif a > b:
2461 elif a > b:
2455 return False
2462 return False
2456 return bool(self.reachableroots(a, [b], [a], includepath=False))
2463 return bool(self.reachableroots(a, [b], [a], includepath=False))
2457
2464
2458 def reachableroots(self, minroot, heads, roots, includepath=False):
2465 def reachableroots(self, minroot, heads, roots, includepath=False):
2459 """return (heads(::(<roots> and <roots>::<heads>)))
2466 """return (heads(::(<roots> and <roots>::<heads>)))
2460
2467
2461 If includepath is True, return (<roots>::<heads>)."""
2468 If includepath is True, return (<roots>::<heads>)."""
2462 try:
2469 try:
2463 return self.index.reachableroots2(
2470 return self.index.reachableroots2(
2464 minroot, heads, roots, includepath
2471 minroot, heads, roots, includepath
2465 )
2472 )
2466 except AttributeError:
2473 except AttributeError:
2467 return dagop._reachablerootspure(
2474 return dagop._reachablerootspure(
2468 self.parentrevs, minroot, roots, heads, includepath
2475 self.parentrevs, minroot, roots, heads, includepath
2469 )
2476 )
2470
2477
2471 def ancestor(self, a, b):
2478 def ancestor(self, a, b):
2472 """calculate the "best" common ancestor of nodes a and b"""
2479 """calculate the "best" common ancestor of nodes a and b"""
2473
2480
2474 a, b = self.rev(a), self.rev(b)
2481 a, b = self.rev(a), self.rev(b)
2475 try:
2482 try:
2476 ancs = self.index.ancestors(a, b)
2483 ancs = self.index.ancestors(a, b)
2477 except (AttributeError, OverflowError):
2484 except (AttributeError, OverflowError):
2478 ancs = ancestor.ancestors(self.parentrevs, a, b)
2485 ancs = ancestor.ancestors(self.parentrevs, a, b)
2479 if ancs:
2486 if ancs:
2480 # choose a consistent winner when there's a tie
2487 # choose a consistent winner when there's a tie
2481 return min(map(self.node, ancs))
2488 return min(map(self.node, ancs))
2482 return self.nullid
2489 return self.nullid
2483
2490
2484 def _match(self, id):
2491 def _match(self, id):
2485 if isinstance(id, int):
2492 if isinstance(id, int):
2486 # rev
2493 # rev
2487 return self.node(id)
2494 return self.node(id)
2488 if len(id) == self.nodeconstants.nodelen:
2495 if len(id) == self.nodeconstants.nodelen:
2489 # possibly a binary node
2496 # possibly a binary node
2490 # odds of a binary node being all hex in ASCII are 1 in 10**25
2497 # odds of a binary node being all hex in ASCII are 1 in 10**25
2491 try:
2498 try:
2492 node = id
2499 node = id
2493 self.rev(node) # quick search the index
2500 self.rev(node) # quick search the index
2494 return node
2501 return node
2495 except error.LookupError:
2502 except error.LookupError:
2496 pass # may be partial hex id
2503 pass # may be partial hex id
2497 try:
2504 try:
2498 # str(rev)
2505 # str(rev)
2499 rev = int(id)
2506 rev = int(id)
2500 if b"%d" % rev != id:
2507 if b"%d" % rev != id:
2501 raise ValueError
2508 raise ValueError
2502 if rev < 0:
2509 if rev < 0:
2503 rev = len(self) + rev
2510 rev = len(self) + rev
2504 if rev < 0 or rev >= len(self):
2511 if rev < 0 or rev >= len(self):
2505 raise ValueError
2512 raise ValueError
2506 return self.node(rev)
2513 return self.node(rev)
2507 except (ValueError, OverflowError):
2514 except (ValueError, OverflowError):
2508 pass
2515 pass
2509 if len(id) == 2 * self.nodeconstants.nodelen:
2516 if len(id) == 2 * self.nodeconstants.nodelen:
2510 try:
2517 try:
2511 # a full hex nodeid?
2518 # a full hex nodeid?
2512 node = bin(id)
2519 node = bin(id)
2513 self.rev(node)
2520 self.rev(node)
2514 return node
2521 return node
2515 except (binascii.Error, error.LookupError):
2522 except (binascii.Error, error.LookupError):
2516 pass
2523 pass
2517
2524
2518 def _partialmatch(self, id):
2525 def _partialmatch(self, id):
2519 # we don't care wdirfilenodeids as they should be always full hash
2526 # we don't care wdirfilenodeids as they should be always full hash
2520 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2527 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2521 ambiguous = False
2528 ambiguous = False
2522 try:
2529 try:
2523 partial = self.index.partialmatch(id)
2530 partial = self.index.partialmatch(id)
2524 if partial and self.hasnode(partial):
2531 if partial and self.hasnode(partial):
2525 if maybewdir:
2532 if maybewdir:
2526 # single 'ff...' match in radix tree, ambiguous with wdir
2533 # single 'ff...' match in radix tree, ambiguous with wdir
2527 ambiguous = True
2534 ambiguous = True
2528 else:
2535 else:
2529 return partial
2536 return partial
2530 elif maybewdir:
2537 elif maybewdir:
2531 # no 'ff...' match in radix tree, wdir identified
2538 # no 'ff...' match in radix tree, wdir identified
2532 raise error.WdirUnsupported
2539 raise error.WdirUnsupported
2533 else:
2540 else:
2534 return None
2541 return None
2535 except error.RevlogError:
2542 except error.RevlogError:
2536 # parsers.c radix tree lookup gave multiple matches
2543 # parsers.c radix tree lookup gave multiple matches
2537 # fast path: for unfiltered changelog, radix tree is accurate
2544 # fast path: for unfiltered changelog, radix tree is accurate
2538 if not getattr(self, 'filteredrevs', None):
2545 if not getattr(self, 'filteredrevs', None):
2539 ambiguous = True
2546 ambiguous = True
2540 # fall through to slow path that filters hidden revisions
2547 # fall through to slow path that filters hidden revisions
2541 except (AttributeError, ValueError):
2548 except (AttributeError, ValueError):
2542 # we are pure python, or key is not hex
2549 # we are pure python, or key is not hex
2543 pass
2550 pass
2544 if ambiguous:
2551 if ambiguous:
2545 raise error.AmbiguousPrefixLookupError(
2552 raise error.AmbiguousPrefixLookupError(
2546 id, self.display_id, _(b'ambiguous identifier')
2553 id, self.display_id, _(b'ambiguous identifier')
2547 )
2554 )
2548
2555
2549 if id in self._pcache:
2556 if id in self._pcache:
2550 return self._pcache[id]
2557 return self._pcache[id]
2551
2558
2552 if len(id) <= 40:
2559 if len(id) <= 40:
2553 # hex(node)[:...]
2560 # hex(node)[:...]
2554 l = len(id) // 2 * 2 # grab an even number of digits
2561 l = len(id) // 2 * 2 # grab an even number of digits
2555 try:
2562 try:
2556 # we're dropping the last digit, so let's check that it's hex,
2563 # we're dropping the last digit, so let's check that it's hex,
2557 # to avoid the expensive computation below if it's not
2564 # to avoid the expensive computation below if it's not
2558 if len(id) % 2 > 0:
2565 if len(id) % 2 > 0:
2559 if not (id[-1] in hexdigits):
2566 if not (id[-1] in hexdigits):
2560 return None
2567 return None
2561 prefix = bin(id[:l])
2568 prefix = bin(id[:l])
2562 except binascii.Error:
2569 except binascii.Error:
2563 pass
2570 pass
2564 else:
2571 else:
2565 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2572 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2566 nl = [
2573 nl = [
2567 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2574 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2568 ]
2575 ]
2569 if self.nodeconstants.nullhex.startswith(id):
2576 if self.nodeconstants.nullhex.startswith(id):
2570 nl.append(self.nullid)
2577 nl.append(self.nullid)
2571 if len(nl) > 0:
2578 if len(nl) > 0:
2572 if len(nl) == 1 and not maybewdir:
2579 if len(nl) == 1 and not maybewdir:
2573 self._pcache[id] = nl[0]
2580 self._pcache[id] = nl[0]
2574 return nl[0]
2581 return nl[0]
2575 raise error.AmbiguousPrefixLookupError(
2582 raise error.AmbiguousPrefixLookupError(
2576 id, self.display_id, _(b'ambiguous identifier')
2583 id, self.display_id, _(b'ambiguous identifier')
2577 )
2584 )
2578 if maybewdir:
2585 if maybewdir:
2579 raise error.WdirUnsupported
2586 raise error.WdirUnsupported
2580 return None
2587 return None
2581
2588
2582 def lookup(self, id):
2589 def lookup(self, id):
2583 """locate a node based on:
2590 """locate a node based on:
2584 - revision number or str(revision number)
2591 - revision number or str(revision number)
2585 - nodeid or subset of hex nodeid
2592 - nodeid or subset of hex nodeid
2586 """
2593 """
2587 n = self._match(id)
2594 n = self._match(id)
2588 if n is not None:
2595 if n is not None:
2589 return n
2596 return n
2590 n = self._partialmatch(id)
2597 n = self._partialmatch(id)
2591 if n:
2598 if n:
2592 return n
2599 return n
2593
2600
2594 raise error.LookupError(id, self.display_id, _(b'no match found'))
2601 raise error.LookupError(id, self.display_id, _(b'no match found'))
2595
2602
2596 def shortest(self, node, minlength=1):
2603 def shortest(self, node, minlength=1):
2597 """Find the shortest unambiguous prefix that matches node."""
2604 """Find the shortest unambiguous prefix that matches node."""
2598
2605
2599 def isvalid(prefix):
2606 def isvalid(prefix):
2600 try:
2607 try:
2601 matchednode = self._partialmatch(prefix)
2608 matchednode = self._partialmatch(prefix)
2602 except error.AmbiguousPrefixLookupError:
2609 except error.AmbiguousPrefixLookupError:
2603 return False
2610 return False
2604 except error.WdirUnsupported:
2611 except error.WdirUnsupported:
2605 # single 'ff...' match
2612 # single 'ff...' match
2606 return True
2613 return True
2607 if matchednode is None:
2614 if matchednode is None:
2608 raise error.LookupError(node, self.display_id, _(b'no node'))
2615 raise error.LookupError(node, self.display_id, _(b'no node'))
2609 return True
2616 return True
2610
2617
2611 def maybewdir(prefix):
2618 def maybewdir(prefix):
2612 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2619 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2613
2620
2614 hexnode = hex(node)
2621 hexnode = hex(node)
2615
2622
2616 def disambiguate(hexnode, minlength):
2623 def disambiguate(hexnode, minlength):
2617 """Disambiguate against wdirid."""
2624 """Disambiguate against wdirid."""
2618 for length in range(minlength, len(hexnode) + 1):
2625 for length in range(minlength, len(hexnode) + 1):
2619 prefix = hexnode[:length]
2626 prefix = hexnode[:length]
2620 if not maybewdir(prefix):
2627 if not maybewdir(prefix):
2621 return prefix
2628 return prefix
2622
2629
2623 if not getattr(self, 'filteredrevs', None):
2630 if not getattr(self, 'filteredrevs', None):
2624 try:
2631 try:
2625 length = max(self.index.shortest(node), minlength)
2632 length = max(self.index.shortest(node), minlength)
2626 return disambiguate(hexnode, length)
2633 return disambiguate(hexnode, length)
2627 except error.RevlogError:
2634 except error.RevlogError:
2628 if node != self.nodeconstants.wdirid:
2635 if node != self.nodeconstants.wdirid:
2629 raise error.LookupError(
2636 raise error.LookupError(
2630 node, self.display_id, _(b'no node')
2637 node, self.display_id, _(b'no node')
2631 )
2638 )
2632 except AttributeError:
2639 except AttributeError:
2633 # Fall through to pure code
2640 # Fall through to pure code
2634 pass
2641 pass
2635
2642
2636 if node == self.nodeconstants.wdirid:
2643 if node == self.nodeconstants.wdirid:
2637 for length in range(minlength, len(hexnode) + 1):
2644 for length in range(minlength, len(hexnode) + 1):
2638 prefix = hexnode[:length]
2645 prefix = hexnode[:length]
2639 if isvalid(prefix):
2646 if isvalid(prefix):
2640 return prefix
2647 return prefix
2641
2648
2642 for length in range(minlength, len(hexnode) + 1):
2649 for length in range(minlength, len(hexnode) + 1):
2643 prefix = hexnode[:length]
2650 prefix = hexnode[:length]
2644 if isvalid(prefix):
2651 if isvalid(prefix):
2645 return disambiguate(hexnode, length)
2652 return disambiguate(hexnode, length)
2646
2653
2647 def cmp(self, node, text):
2654 def cmp(self, node, text):
2648 """compare text with a given file revision
2655 """compare text with a given file revision
2649
2656
2650 returns True if text is different than what is stored.
2657 returns True if text is different than what is stored.
2651 """
2658 """
2652 p1, p2 = self.parents(node)
2659 p1, p2 = self.parents(node)
2653 return storageutil.hashrevisionsha1(text, p1, p2) != node
2660 return storageutil.hashrevisionsha1(text, p1, p2) != node
2654
2661
2655 def deltaparent(self, rev):
2662 def deltaparent(self, rev):
2656 """return deltaparent of the given revision"""
2663 """return deltaparent of the given revision"""
2657 base = self.index[rev][3]
2664 base = self.index[rev][3]
2658 if base == rev:
2665 if base == rev:
2659 return nullrev
2666 return nullrev
2660 elif self.delta_config.general_delta:
2667 elif self.delta_config.general_delta:
2661 return base
2668 return base
2662 else:
2669 else:
2663 return rev - 1
2670 return rev - 1
2664
2671
2665 def issnapshot(self, rev):
2672 def issnapshot(self, rev):
2666 """tells whether rev is a snapshot"""
2673 """tells whether rev is a snapshot"""
2667 ret = self._inner.issnapshot(rev)
2674 ret = self._inner.issnapshot(rev)
2668 self.issnapshot = self._inner.issnapshot
2675 self.issnapshot = self._inner.issnapshot
2669 return ret
2676 return ret
2670
2677
2671 def snapshotdepth(self, rev):
2678 def snapshotdepth(self, rev):
2672 """number of snapshot in the chain before this one"""
2679 """number of snapshot in the chain before this one"""
2673 if not self.issnapshot(rev):
2680 if not self.issnapshot(rev):
2674 raise error.ProgrammingError(b'revision %d not a snapshot')
2681 raise error.ProgrammingError(b'revision %d not a snapshot')
2675 return len(self._inner._deltachain(rev)[0]) - 1
2682 return len(self._inner._deltachain(rev)[0]) - 1
2676
2683
2677 def revdiff(self, rev1, rev2):
2684 def revdiff(self, rev1, rev2):
2678 """return or calculate a delta between two revisions
2685 """return or calculate a delta between two revisions
2679
2686
2680 The delta calculated is in binary form and is intended to be written to
2687 The delta calculated is in binary form and is intended to be written to
2681 revlog data directly. So this function needs raw revision data.
2688 revlog data directly. So this function needs raw revision data.
2682 """
2689 """
2683 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2690 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2684 return bytes(self._inner._chunk(rev2))
2691 return bytes(self._inner._chunk(rev2))
2685
2692
2686 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2693 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2687
2694
2688 def revision(self, nodeorrev):
2695 def revision(self, nodeorrev):
2689 """return an uncompressed revision of a given node or revision
2696 """return an uncompressed revision of a given node or revision
2690 number.
2697 number.
2691 """
2698 """
2692 return self._revisiondata(nodeorrev)
2699 return self._revisiondata(nodeorrev)
2693
2700
2694 def sidedata(self, nodeorrev):
2701 def sidedata(self, nodeorrev):
2695 """a map of extra data related to the changeset but not part of the hash
2702 """a map of extra data related to the changeset but not part of the hash
2696
2703
2697 This function currently return a dictionary. However, more advanced
2704 This function currently return a dictionary. However, more advanced
2698 mapping object will likely be used in the future for a more
2705 mapping object will likely be used in the future for a more
2699 efficient/lazy code.
2706 efficient/lazy code.
2700 """
2707 """
2701 # deal with <nodeorrev> argument type
2708 # deal with <nodeorrev> argument type
2702 if isinstance(nodeorrev, int):
2709 if isinstance(nodeorrev, int):
2703 rev = nodeorrev
2710 rev = nodeorrev
2704 else:
2711 else:
2705 rev = self.rev(nodeorrev)
2712 rev = self.rev(nodeorrev)
2706 return self._sidedata(rev)
2713 return self._sidedata(rev)
2707
2714
2708 def _rawtext(self, node, rev):
2715 def _rawtext(self, node, rev):
2709 """return the possibly unvalidated rawtext for a revision
2716 """return the possibly unvalidated rawtext for a revision
2710
2717
2711 returns (rev, rawtext, validated)
2718 returns (rev, rawtext, validated)
2712 """
2719 """
2713 # Check if we have the entry in cache
2720 # Check if we have the entry in cache
2714 # The cache entry looks like (node, rev, rawtext)
2721 # The cache entry looks like (node, rev, rawtext)
2715 if self._inner._revisioncache:
2722 if self._inner._revisioncache:
2716 if self._inner._revisioncache[0] == node:
2723 if self._inner._revisioncache[0] == node:
2717 return (rev, self._inner._revisioncache[2], True)
2724 return (rev, self._inner._revisioncache[2], True)
2718
2725
2719 if rev is None:
2726 if rev is None:
2720 rev = self.rev(node)
2727 rev = self.rev(node)
2721
2728
2722 return self._inner.raw_text(node, rev)
2729 return self._inner.raw_text(node, rev)
2723
2730
2724 def _revisiondata(self, nodeorrev, raw=False):
2731 def _revisiondata(self, nodeorrev, raw=False):
2725 # deal with <nodeorrev> argument type
2732 # deal with <nodeorrev> argument type
2726 if isinstance(nodeorrev, int):
2733 if isinstance(nodeorrev, int):
2727 rev = nodeorrev
2734 rev = nodeorrev
2728 node = self.node(rev)
2735 node = self.node(rev)
2729 else:
2736 else:
2730 node = nodeorrev
2737 node = nodeorrev
2731 rev = None
2738 rev = None
2732
2739
2733 # fast path the special `nullid` rev
2740 # fast path the special `nullid` rev
2734 if node == self.nullid:
2741 if node == self.nullid:
2735 return b""
2742 return b""
2736
2743
2737 # ``rawtext`` is the text as stored inside the revlog. Might be the
2744 # ``rawtext`` is the text as stored inside the revlog. Might be the
2738 # revision or might need to be processed to retrieve the revision.
2745 # revision or might need to be processed to retrieve the revision.
2739 rev, rawtext, validated = self._rawtext(node, rev)
2746 rev, rawtext, validated = self._rawtext(node, rev)
2740
2747
2741 if raw and validated:
2748 if raw and validated:
2742 # if we don't want to process the raw text and that raw
2749 # if we don't want to process the raw text and that raw
2743 # text is cached, we can exit early.
2750 # text is cached, we can exit early.
2744 return rawtext
2751 return rawtext
2745 if rev is None:
2752 if rev is None:
2746 rev = self.rev(node)
2753 rev = self.rev(node)
2747 # the revlog's flag for this revision
2754 # the revlog's flag for this revision
2748 # (usually alter its state or content)
2755 # (usually alter its state or content)
2749 flags = self.flags(rev)
2756 flags = self.flags(rev)
2750
2757
2751 if validated and flags == REVIDX_DEFAULT_FLAGS:
2758 if validated and flags == REVIDX_DEFAULT_FLAGS:
2752 # no extra flags set, no flag processor runs, text = rawtext
2759 # no extra flags set, no flag processor runs, text = rawtext
2753 return rawtext
2760 return rawtext
2754
2761
2755 if raw:
2762 if raw:
2756 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2763 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2757 text = rawtext
2764 text = rawtext
2758 else:
2765 else:
2759 r = flagutil.processflagsread(self, rawtext, flags)
2766 r = flagutil.processflagsread(self, rawtext, flags)
2760 text, validatehash = r
2767 text, validatehash = r
2761 if validatehash:
2768 if validatehash:
2762 self.checkhash(text, node, rev=rev)
2769 self.checkhash(text, node, rev=rev)
2763 if not validated:
2770 if not validated:
2764 self._inner._revisioncache = (node, rev, rawtext)
2771 self._inner._revisioncache = (node, rev, rawtext)
2765
2772
2766 return text
2773 return text
2767
2774
2768 def _sidedata(self, rev):
2775 def _sidedata(self, rev):
2769 """Return the sidedata for a given revision number."""
2776 """Return the sidedata for a given revision number."""
2770 sidedata_end = None
2777 sidedata_end = None
2771 if self._docket is not None:
2778 if self._docket is not None:
2772 sidedata_end = self._docket.sidedata_end
2779 sidedata_end = self._docket.sidedata_end
2773 return self._inner.sidedata(rev, sidedata_end)
2780 return self._inner.sidedata(rev, sidedata_end)
2774
2781
2775 def rawdata(self, nodeorrev):
2782 def rawdata(self, nodeorrev):
2776 """return an uncompressed raw data of a given node or revision number."""
2783 """return an uncompressed raw data of a given node or revision number."""
2777 return self._revisiondata(nodeorrev, raw=True)
2784 return self._revisiondata(nodeorrev, raw=True)
2778
2785
2779 def hash(self, text, p1, p2):
2786 def hash(self, text, p1, p2):
2780 """Compute a node hash.
2787 """Compute a node hash.
2781
2788
2782 Available as a function so that subclasses can replace the hash
2789 Available as a function so that subclasses can replace the hash
2783 as needed.
2790 as needed.
2784 """
2791 """
2785 return storageutil.hashrevisionsha1(text, p1, p2)
2792 return storageutil.hashrevisionsha1(text, p1, p2)
2786
2793
2787 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2794 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2788 """Check node hash integrity.
2795 """Check node hash integrity.
2789
2796
2790 Available as a function so that subclasses can extend hash mismatch
2797 Available as a function so that subclasses can extend hash mismatch
2791 behaviors as needed.
2798 behaviors as needed.
2792 """
2799 """
2793 try:
2800 try:
2794 if p1 is None and p2 is None:
2801 if p1 is None and p2 is None:
2795 p1, p2 = self.parents(node)
2802 p1, p2 = self.parents(node)
2796 if node != self.hash(text, p1, p2):
2803 if node != self.hash(text, p1, p2):
2797 # Clear the revision cache on hash failure. The revision cache
2804 # Clear the revision cache on hash failure. The revision cache
2798 # only stores the raw revision and clearing the cache does have
2805 # only stores the raw revision and clearing the cache does have
2799 # the side-effect that we won't have a cache hit when the raw
2806 # the side-effect that we won't have a cache hit when the raw
2800 # revision data is accessed. But this case should be rare and
2807 # revision data is accessed. But this case should be rare and
2801 # it is extra work to teach the cache about the hash
2808 # it is extra work to teach the cache about the hash
2802 # verification state.
2809 # verification state.
2803 if (
2810 if (
2804 self._inner._revisioncache
2811 self._inner._revisioncache
2805 and self._inner._revisioncache[0] == node
2812 and self._inner._revisioncache[0] == node
2806 ):
2813 ):
2807 self._inner._revisioncache = None
2814 self._inner._revisioncache = None
2808
2815
2809 revornode = rev
2816 revornode = rev
2810 if revornode is None:
2817 if revornode is None:
2811 revornode = templatefilters.short(hex(node))
2818 revornode = templatefilters.short(hex(node))
2812 raise error.RevlogError(
2819 raise error.RevlogError(
2813 _(b"integrity check failed on %s:%s")
2820 _(b"integrity check failed on %s:%s")
2814 % (self.display_id, pycompat.bytestr(revornode))
2821 % (self.display_id, pycompat.bytestr(revornode))
2815 )
2822 )
2816 except error.RevlogError:
2823 except error.RevlogError:
2817 if self.feature_config.censorable and storageutil.iscensoredtext(
2824 if self.feature_config.censorable and storageutil.iscensoredtext(
2818 text
2825 text
2819 ):
2826 ):
2820 raise error.CensoredNodeError(self.display_id, node, text)
2827 raise error.CensoredNodeError(self.display_id, node, text)
2821 raise
2828 raise
2822
2829
2823 @property
2830 @property
2824 def _split_index_file(self):
2831 def _split_index_file(self):
2825 """the path where to expect the index of an ongoing splitting operation
2832 """the path where to expect the index of an ongoing splitting operation
2826
2833
2827 The file will only exist if a splitting operation is in progress, but
2834 The file will only exist if a splitting operation is in progress, but
2828 it is always expected at the same location."""
2835 it is always expected at the same location."""
2829 parts = self.radix.split(b'/')
2836 parts = self.radix.split(b'/')
2830 if len(parts) > 1:
2837 if len(parts) > 1:
2831 # adds a '-s' prefix to the ``data/` or `meta/` base
2838 # adds a '-s' prefix to the ``data/` or `meta/` base
2832 head = parts[0] + b'-s'
2839 head = parts[0] + b'-s'
2833 mids = parts[1:-1]
2840 mids = parts[1:-1]
2834 tail = parts[-1] + b'.i'
2841 tail = parts[-1] + b'.i'
2835 pieces = [head] + mids + [tail]
2842 pieces = [head] + mids + [tail]
2836 return b'/'.join(pieces)
2843 return b'/'.join(pieces)
2837 else:
2844 else:
2838 # the revlog is stored at the root of the store (changelog or
2845 # the revlog is stored at the root of the store (changelog or
2839 # manifest), no risk of collision.
2846 # manifest), no risk of collision.
2840 return self.radix + b'.i.s'
2847 return self.radix + b'.i.s'
2841
2848
2842 def _enforceinlinesize(self, tr):
2849 def _enforceinlinesize(self, tr):
2843 """Check if the revlog is too big for inline and convert if so.
2850 """Check if the revlog is too big for inline and convert if so.
2844
2851
2845 This should be called after revisions are added to the revlog. If the
2852 This should be called after revisions are added to the revlog. If the
2846 revlog has grown too large to be an inline revlog, it will convert it
2853 revlog has grown too large to be an inline revlog, it will convert it
2847 to use multiple index and data files.
2854 to use multiple index and data files.
2848 """
2855 """
2849 tiprev = len(self) - 1
2856 tiprev = len(self) - 1
2850 total_size = self.start(tiprev) + self.length(tiprev)
2857 total_size = self.start(tiprev) + self.length(tiprev)
2851 if not self._inline or (self._may_inline and total_size < _maxinline):
2858 if not self._inline or (self._may_inline and total_size < _maxinline):
2852 return
2859 return
2853
2860
2854 if self._docket is not None:
2861 if self._docket is not None:
2855 msg = b"inline revlog should not have a docket"
2862 msg = b"inline revlog should not have a docket"
2856 raise error.ProgrammingError(msg)
2863 raise error.ProgrammingError(msg)
2857
2864
2858 # In the common case, we enforce inline size because the revlog has
2865 # In the common case, we enforce inline size because the revlog has
2859 # been appened too. And in such case, it must have an initial offset
2866 # been appened too. And in such case, it must have an initial offset
2860 # recorded in the transaction.
2867 # recorded in the transaction.
2861 troffset = tr.findoffset(self._inner.canonical_index_file)
2868 troffset = tr.findoffset(self._inner.canonical_index_file)
2862 pre_touched = troffset is not None
2869 pre_touched = troffset is not None
2863 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2870 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2864 raise error.RevlogError(
2871 raise error.RevlogError(
2865 _(b"%s not found in the transaction") % self._indexfile
2872 _(b"%s not found in the transaction") % self._indexfile
2866 )
2873 )
2867
2874
2868 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2875 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2869 tr.add(self._datafile, 0)
2876 tr.add(self._datafile, 0)
2870
2877
2871 new_index_file_path = None
2878 new_index_file_path = None
2872 old_index_file_path = self._indexfile
2879 old_index_file_path = self._indexfile
2873 new_index_file_path = self._split_index_file
2880 new_index_file_path = self._split_index_file
2874 opener = self.opener
2881 opener = self.opener
2875 weak_self = weakref.ref(self)
2882 weak_self = weakref.ref(self)
2876
2883
2877 # the "split" index replace the real index when the transaction is
2884 # the "split" index replace the real index when the transaction is
2878 # finalized
2885 # finalized
2879 def finalize_callback(tr):
2886 def finalize_callback(tr):
2880 opener.rename(
2887 opener.rename(
2881 new_index_file_path,
2888 new_index_file_path,
2882 old_index_file_path,
2889 old_index_file_path,
2883 checkambig=True,
2890 checkambig=True,
2884 )
2891 )
2885 maybe_self = weak_self()
2892 maybe_self = weak_self()
2886 if maybe_self is not None:
2893 if maybe_self is not None:
2887 maybe_self._indexfile = old_index_file_path
2894 maybe_self._indexfile = old_index_file_path
2888 maybe_self._inner.index_file = maybe_self._indexfile
2895 maybe_self._inner.index_file = maybe_self._indexfile
2889
2896
2890 def abort_callback(tr):
2897 def abort_callback(tr):
2891 maybe_self = weak_self()
2898 maybe_self = weak_self()
2892 if maybe_self is not None:
2899 if maybe_self is not None:
2893 maybe_self._indexfile = old_index_file_path
2900 maybe_self._indexfile = old_index_file_path
2894 maybe_self._inner.inline = True
2901 maybe_self._inner.inline = True
2895 maybe_self._inner.index_file = old_index_file_path
2902 maybe_self._inner.index_file = old_index_file_path
2896
2903
2897 tr.registertmp(new_index_file_path)
2904 tr.registertmp(new_index_file_path)
2898 # we use 001 here to make this this happens after the finalisation of
2905 # we use 001 here to make this this happens after the finalisation of
2899 # pending changelog write (using 000). Otherwise the two finalizer
2906 # pending changelog write (using 000). Otherwise the two finalizer
2900 # would step over each other and delete the changelog.i file.
2907 # would step over each other and delete the changelog.i file.
2901 if self.target[1] is not None:
2908 if self.target[1] is not None:
2902 callback_id = b'001-revlog-split-%d-%s' % self.target
2909 callback_id = b'001-revlog-split-%d-%s' % self.target
2903 else:
2910 else:
2904 callback_id = b'001-revlog-split-%d' % self.target[0]
2911 callback_id = b'001-revlog-split-%d' % self.target[0]
2905 tr.addfinalize(callback_id, finalize_callback)
2912 tr.addfinalize(callback_id, finalize_callback)
2906 tr.addabort(callback_id, abort_callback)
2913 tr.addabort(callback_id, abort_callback)
2907
2914
2908 self._format_flags &= ~FLAG_INLINE_DATA
2915 self._format_flags &= ~FLAG_INLINE_DATA
2909 self._inner.split_inline(
2916 self._inner.split_inline(
2910 tr,
2917 tr,
2911 self._format_flags | self._format_version,
2918 self._format_flags | self._format_version,
2912 new_index_file_path=new_index_file_path,
2919 new_index_file_path=new_index_file_path,
2913 )
2920 )
2914
2921
2915 self._inline = False
2922 self._inline = False
2916 if new_index_file_path is not None:
2923 if new_index_file_path is not None:
2917 self._indexfile = new_index_file_path
2924 self._indexfile = new_index_file_path
2918
2925
2919 nodemaputil.setup_persistent_nodemap(tr, self)
2926 nodemaputil.setup_persistent_nodemap(tr, self)
2920
2927
2921 def _nodeduplicatecallback(self, transaction, node):
2928 def _nodeduplicatecallback(self, transaction, node):
2922 """called when trying to add a node already stored."""
2929 """called when trying to add a node already stored."""
2923
2930
2924 @contextlib.contextmanager
2931 @contextlib.contextmanager
2925 def reading(self):
2932 def reading(self):
2926 with self._inner.reading():
2933 with self._inner.reading():
2927 yield
2934 yield
2928
2935
2929 @contextlib.contextmanager
2936 @contextlib.contextmanager
2930 def _writing(self, transaction):
2937 def _writing(self, transaction):
2931 if self._trypending:
2938 if self._trypending:
2932 msg = b'try to write in a `trypending` revlog: %s'
2939 msg = b'try to write in a `trypending` revlog: %s'
2933 msg %= self.display_id
2940 msg %= self.display_id
2934 raise error.ProgrammingError(msg)
2941 raise error.ProgrammingError(msg)
2935 if self._inner.is_writing:
2942 if self._inner.is_writing:
2936 yield
2943 yield
2937 else:
2944 else:
2938 data_end = None
2945 data_end = None
2939 sidedata_end = None
2946 sidedata_end = None
2940 if self._docket is not None:
2947 if self._docket is not None:
2941 data_end = self._docket.data_end
2948 data_end = self._docket.data_end
2942 sidedata_end = self._docket.sidedata_end
2949 sidedata_end = self._docket.sidedata_end
2943 with self._inner.writing(
2950 with self._inner.writing(
2944 transaction,
2951 transaction,
2945 data_end=data_end,
2952 data_end=data_end,
2946 sidedata_end=sidedata_end,
2953 sidedata_end=sidedata_end,
2947 ):
2954 ):
2948 yield
2955 yield
2949 if self._docket is not None:
2956 if self._docket is not None:
2950 self._write_docket(transaction)
2957 self._write_docket(transaction)
2951
2958
2952 @property
2959 @property
2953 def is_delaying(self):
2960 def is_delaying(self):
2954 return self._inner.is_delaying
2961 return self._inner.is_delaying
2955
2962
2956 def _write_docket(self, transaction):
2963 def _write_docket(self, transaction):
2957 """write the current docket on disk
2964 """write the current docket on disk
2958
2965
2959 Exist as a method to help changelog to implement transaction logic
2966 Exist as a method to help changelog to implement transaction logic
2960
2967
2961 We could also imagine using the same transaction logic for all revlog
2968 We could also imagine using the same transaction logic for all revlog
2962 since docket are cheap."""
2969 since docket are cheap."""
2963 self._docket.write(transaction)
2970 self._docket.write(transaction)
2964
2971
2965 def addrevision(
2972 def addrevision(
2966 self,
2973 self,
2967 text,
2974 text,
2968 transaction,
2975 transaction,
2969 link,
2976 link,
2970 p1,
2977 p1,
2971 p2,
2978 p2,
2972 cachedelta=None,
2979 cachedelta=None,
2973 node=None,
2980 node=None,
2974 flags=REVIDX_DEFAULT_FLAGS,
2981 flags=REVIDX_DEFAULT_FLAGS,
2975 deltacomputer=None,
2982 deltacomputer=None,
2976 sidedata=None,
2983 sidedata=None,
2977 ):
2984 ):
2978 """add a revision to the log
2985 """add a revision to the log
2979
2986
2980 text - the revision data to add
2987 text - the revision data to add
2981 transaction - the transaction object used for rollback
2988 transaction - the transaction object used for rollback
2982 link - the linkrev data to add
2989 link - the linkrev data to add
2983 p1, p2 - the parent nodeids of the revision
2990 p1, p2 - the parent nodeids of the revision
2984 cachedelta - an optional precomputed delta
2991 cachedelta - an optional precomputed delta
2985 node - nodeid of revision; typically node is not specified, and it is
2992 node - nodeid of revision; typically node is not specified, and it is
2986 computed by default as hash(text, p1, p2), however subclasses might
2993 computed by default as hash(text, p1, p2), however subclasses might
2987 use different hashing method (and override checkhash() in such case)
2994 use different hashing method (and override checkhash() in such case)
2988 flags - the known flags to set on the revision
2995 flags - the known flags to set on the revision
2989 deltacomputer - an optional deltacomputer instance shared between
2996 deltacomputer - an optional deltacomputer instance shared between
2990 multiple calls
2997 multiple calls
2991 """
2998 """
2992 if link == nullrev:
2999 if link == nullrev:
2993 raise error.RevlogError(
3000 raise error.RevlogError(
2994 _(b"attempted to add linkrev -1 to %s") % self.display_id
3001 _(b"attempted to add linkrev -1 to %s") % self.display_id
2995 )
3002 )
2996
3003
2997 if sidedata is None:
3004 if sidedata is None:
2998 sidedata = {}
3005 sidedata = {}
2999 elif sidedata and not self.feature_config.has_side_data:
3006 elif sidedata and not self.feature_config.has_side_data:
3000 raise error.ProgrammingError(
3007 raise error.ProgrammingError(
3001 _(b"trying to add sidedata to a revlog who don't support them")
3008 _(b"trying to add sidedata to a revlog who don't support them")
3002 )
3009 )
3003
3010
3004 if flags:
3011 if flags:
3005 node = node or self.hash(text, p1, p2)
3012 node = node or self.hash(text, p1, p2)
3006
3013
3007 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3014 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3008
3015
3009 # If the flag processor modifies the revision data, ignore any provided
3016 # If the flag processor modifies the revision data, ignore any provided
3010 # cachedelta.
3017 # cachedelta.
3011 if rawtext != text:
3018 if rawtext != text:
3012 cachedelta = None
3019 cachedelta = None
3013
3020
3014 if len(rawtext) > _maxentrysize:
3021 if len(rawtext) > _maxentrysize:
3015 raise error.RevlogError(
3022 raise error.RevlogError(
3016 _(
3023 _(
3017 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3024 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3018 )
3025 )
3019 % (self.display_id, len(rawtext))
3026 % (self.display_id, len(rawtext))
3020 )
3027 )
3021
3028
3022 node = node or self.hash(rawtext, p1, p2)
3029 node = node or self.hash(rawtext, p1, p2)
3023 rev = self.index.get_rev(node)
3030 rev = self.index.get_rev(node)
3024 if rev is not None:
3031 if rev is not None:
3025 return rev
3032 return rev
3026
3033
3027 if validatehash:
3034 if validatehash:
3028 self.checkhash(rawtext, node, p1=p1, p2=p2)
3035 self.checkhash(rawtext, node, p1=p1, p2=p2)
3029
3036
3030 return self.addrawrevision(
3037 return self.addrawrevision(
3031 rawtext,
3038 rawtext,
3032 transaction,
3039 transaction,
3033 link,
3040 link,
3034 p1,
3041 p1,
3035 p2,
3042 p2,
3036 node,
3043 node,
3037 flags,
3044 flags,
3038 cachedelta=cachedelta,
3045 cachedelta=cachedelta,
3039 deltacomputer=deltacomputer,
3046 deltacomputer=deltacomputer,
3040 sidedata=sidedata,
3047 sidedata=sidedata,
3041 )
3048 )
3042
3049
3043 def addrawrevision(
3050 def addrawrevision(
3044 self,
3051 self,
3045 rawtext,
3052 rawtext,
3046 transaction,
3053 transaction,
3047 link,
3054 link,
3048 p1,
3055 p1,
3049 p2,
3056 p2,
3050 node,
3057 node,
3051 flags,
3058 flags,
3052 cachedelta=None,
3059 cachedelta=None,
3053 deltacomputer=None,
3060 deltacomputer=None,
3054 sidedata=None,
3061 sidedata=None,
3055 ):
3062 ):
3056 """add a raw revision with known flags, node and parents
3063 """add a raw revision with known flags, node and parents
3057 useful when reusing a revision not stored in this revlog (ex: received
3064 useful when reusing a revision not stored in this revlog (ex: received
3058 over wire, or read from an external bundle).
3065 over wire, or read from an external bundle).
3059 """
3066 """
3060 with self._writing(transaction):
3067 with self._writing(transaction):
3061 return self._addrevision(
3068 return self._addrevision(
3062 node,
3069 node,
3063 rawtext,
3070 rawtext,
3064 transaction,
3071 transaction,
3065 link,
3072 link,
3066 p1,
3073 p1,
3067 p2,
3074 p2,
3068 flags,
3075 flags,
3069 cachedelta,
3076 cachedelta,
3070 deltacomputer=deltacomputer,
3077 deltacomputer=deltacomputer,
3071 sidedata=sidedata,
3078 sidedata=sidedata,
3072 )
3079 )
3073
3080
3074 def compress(self, data):
3081 def compress(self, data):
3075 return self._inner.compress(data)
3082 return self._inner.compress(data)
3076
3083
3077 def decompress(self, data):
3084 def decompress(self, data):
3078 return self._inner.decompress(data)
3085 return self._inner.decompress(data)
3079
3086
3080 def _addrevision(
3087 def _addrevision(
3081 self,
3088 self,
3082 node,
3089 node,
3083 rawtext,
3090 rawtext,
3084 transaction,
3091 transaction,
3085 link,
3092 link,
3086 p1,
3093 p1,
3087 p2,
3094 p2,
3088 flags,
3095 flags,
3089 cachedelta,
3096 cachedelta,
3090 alwayscache=False,
3097 alwayscache=False,
3091 deltacomputer=None,
3098 deltacomputer=None,
3092 sidedata=None,
3099 sidedata=None,
3093 ):
3100 ):
3094 """internal function to add revisions to the log
3101 """internal function to add revisions to the log
3095
3102
3096 see addrevision for argument descriptions.
3103 see addrevision for argument descriptions.
3097
3104
3098 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3105 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3099
3106
3100 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3107 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3101 be used.
3108 be used.
3102
3109
3103 invariants:
3110 invariants:
3104 - rawtext is optional (can be None); if not set, cachedelta must be set.
3111 - rawtext is optional (can be None); if not set, cachedelta must be set.
3105 if both are set, they must correspond to each other.
3112 if both are set, they must correspond to each other.
3106 """
3113 """
3107 if node == self.nullid:
3114 if node == self.nullid:
3108 raise error.RevlogError(
3115 raise error.RevlogError(
3109 _(b"%s: attempt to add null revision") % self.display_id
3116 _(b"%s: attempt to add null revision") % self.display_id
3110 )
3117 )
3111 if (
3118 if (
3112 node == self.nodeconstants.wdirid
3119 node == self.nodeconstants.wdirid
3113 or node in self.nodeconstants.wdirfilenodeids
3120 or node in self.nodeconstants.wdirfilenodeids
3114 ):
3121 ):
3115 raise error.RevlogError(
3122 raise error.RevlogError(
3116 _(b"%s: attempt to add wdir revision") % self.display_id
3123 _(b"%s: attempt to add wdir revision") % self.display_id
3117 )
3124 )
3118 if self._inner._writinghandles is None:
3125 if self._inner._writinghandles is None:
3119 msg = b'adding revision outside `revlog._writing` context'
3126 msg = b'adding revision outside `revlog._writing` context'
3120 raise error.ProgrammingError(msg)
3127 raise error.ProgrammingError(msg)
3121
3128
3122 btext = [rawtext]
3129 btext = [rawtext]
3123
3130
3124 curr = len(self)
3131 curr = len(self)
3125 prev = curr - 1
3132 prev = curr - 1
3126
3133
3127 offset = self._get_data_offset(prev)
3134 offset = self._get_data_offset(prev)
3128
3135
3129 if self._concurrencychecker:
3136 if self._concurrencychecker:
3130 ifh, dfh, sdfh = self._inner._writinghandles
3137 ifh, dfh, sdfh = self._inner._writinghandles
3131 # XXX no checking for the sidedata file
3138 # XXX no checking for the sidedata file
3132 if self._inline:
3139 if self._inline:
3133 # offset is "as if" it were in the .d file, so we need to add on
3140 # offset is "as if" it were in the .d file, so we need to add on
3134 # the size of the entry metadata.
3141 # the size of the entry metadata.
3135 self._concurrencychecker(
3142 self._concurrencychecker(
3136 ifh, self._indexfile, offset + curr * self.index.entry_size
3143 ifh, self._indexfile, offset + curr * self.index.entry_size
3137 )
3144 )
3138 else:
3145 else:
3139 # Entries in the .i are a consistent size.
3146 # Entries in the .i are a consistent size.
3140 self._concurrencychecker(
3147 self._concurrencychecker(
3141 ifh, self._indexfile, curr * self.index.entry_size
3148 ifh, self._indexfile, curr * self.index.entry_size
3142 )
3149 )
3143 self._concurrencychecker(dfh, self._datafile, offset)
3150 self._concurrencychecker(dfh, self._datafile, offset)
3144
3151
3145 p1r, p2r = self.rev(p1), self.rev(p2)
3152 p1r, p2r = self.rev(p1), self.rev(p2)
3146
3153
3147 # full versions are inserted when the needed deltas
3154 # full versions are inserted when the needed deltas
3148 # become comparable to the uncompressed text
3155 # become comparable to the uncompressed text
3149 if rawtext is None:
3156 if rawtext is None:
3150 # need rawtext size, before changed by flag processors, which is
3157 # need rawtext size, before changed by flag processors, which is
3151 # the non-raw size. use revlog explicitly to avoid filelog's extra
3158 # the non-raw size. use revlog explicitly to avoid filelog's extra
3152 # logic that might remove metadata size.
3159 # logic that might remove metadata size.
3153 textlen = mdiff.patchedsize(
3160 textlen = mdiff.patchedsize(
3154 revlog.size(self, cachedelta[0]), cachedelta[1]
3161 revlog.size(self, cachedelta[0]), cachedelta[1]
3155 )
3162 )
3156 else:
3163 else:
3157 textlen = len(rawtext)
3164 textlen = len(rawtext)
3158
3165
3159 if deltacomputer is None:
3166 if deltacomputer is None:
3160 write_debug = None
3167 write_debug = None
3161 if self.delta_config.debug_delta:
3168 if self.delta_config.debug_delta:
3162 write_debug = transaction._report
3169 write_debug = transaction._report
3163 deltacomputer = deltautil.deltacomputer(
3170 deltacomputer = deltautil.deltacomputer(
3164 self, write_debug=write_debug
3171 self, write_debug=write_debug
3165 )
3172 )
3166
3173
3167 if cachedelta is not None and len(cachedelta) == 2:
3174 if cachedelta is not None and len(cachedelta) == 2:
3168 # If the cached delta has no information about how it should be
3175 # If the cached delta has no information about how it should be
3169 # reused, add the default reuse instruction according to the
3176 # reused, add the default reuse instruction according to the
3170 # revlog's configuration.
3177 # revlog's configuration.
3171 if (
3178 if (
3172 self.delta_config.general_delta
3179 self.delta_config.general_delta
3173 and self.delta_config.lazy_delta_base
3180 and self.delta_config.lazy_delta_base
3174 ):
3181 ):
3175 delta_base_reuse = DELTA_BASE_REUSE_TRY
3182 delta_base_reuse = DELTA_BASE_REUSE_TRY
3176 else:
3183 else:
3177 delta_base_reuse = DELTA_BASE_REUSE_NO
3184 delta_base_reuse = DELTA_BASE_REUSE_NO
3178 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3185 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3179
3186
3180 revinfo = revlogutils.revisioninfo(
3187 revinfo = revlogutils.revisioninfo(
3181 node,
3188 node,
3182 p1,
3189 p1,
3183 p2,
3190 p2,
3184 btext,
3191 btext,
3185 textlen,
3192 textlen,
3186 cachedelta,
3193 cachedelta,
3187 flags,
3194 flags,
3188 )
3195 )
3189
3196
3190 deltainfo = deltacomputer.finddeltainfo(revinfo)
3197 deltainfo = deltacomputer.finddeltainfo(revinfo)
3191
3198
3192 compression_mode = COMP_MODE_INLINE
3199 compression_mode = COMP_MODE_INLINE
3193 if self._docket is not None:
3200 if self._docket is not None:
3194 default_comp = self._docket.default_compression_header
3201 default_comp = self._docket.default_compression_header
3195 r = deltautil.delta_compression(default_comp, deltainfo)
3202 r = deltautil.delta_compression(default_comp, deltainfo)
3196 compression_mode, deltainfo = r
3203 compression_mode, deltainfo = r
3197
3204
3198 sidedata_compression_mode = COMP_MODE_INLINE
3205 sidedata_compression_mode = COMP_MODE_INLINE
3199 if sidedata and self.feature_config.has_side_data:
3206 if sidedata and self.feature_config.has_side_data:
3200 sidedata_compression_mode = COMP_MODE_PLAIN
3207 sidedata_compression_mode = COMP_MODE_PLAIN
3201 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3208 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3202 sidedata_offset = self._docket.sidedata_end
3209 sidedata_offset = self._docket.sidedata_end
3203 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3210 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3204 if (
3211 if (
3205 h != b'u'
3212 h != b'u'
3206 and comp_sidedata[0:1] != b'\0'
3213 and comp_sidedata[0:1] != b'\0'
3207 and len(comp_sidedata) < len(serialized_sidedata)
3214 and len(comp_sidedata) < len(serialized_sidedata)
3208 ):
3215 ):
3209 assert not h
3216 assert not h
3210 if (
3217 if (
3211 comp_sidedata[0:1]
3218 comp_sidedata[0:1]
3212 == self._docket.default_compression_header
3219 == self._docket.default_compression_header
3213 ):
3220 ):
3214 sidedata_compression_mode = COMP_MODE_DEFAULT
3221 sidedata_compression_mode = COMP_MODE_DEFAULT
3215 serialized_sidedata = comp_sidedata
3222 serialized_sidedata = comp_sidedata
3216 else:
3223 else:
3217 sidedata_compression_mode = COMP_MODE_INLINE
3224 sidedata_compression_mode = COMP_MODE_INLINE
3218 serialized_sidedata = comp_sidedata
3225 serialized_sidedata = comp_sidedata
3219 else:
3226 else:
3220 serialized_sidedata = b""
3227 serialized_sidedata = b""
3221 # Don't store the offset if the sidedata is empty, that way
3228 # Don't store the offset if the sidedata is empty, that way
3222 # we can easily detect empty sidedata and they will be no different
3229 # we can easily detect empty sidedata and they will be no different
3223 # than ones we manually add.
3230 # than ones we manually add.
3224 sidedata_offset = 0
3231 sidedata_offset = 0
3225
3232
3226 rank = RANK_UNKNOWN
3233 rank = RANK_UNKNOWN
3227 if self.feature_config.compute_rank:
3234 if self.feature_config.compute_rank:
3228 if (p1r, p2r) == (nullrev, nullrev):
3235 if (p1r, p2r) == (nullrev, nullrev):
3229 rank = 1
3236 rank = 1
3230 elif p1r != nullrev and p2r == nullrev:
3237 elif p1r != nullrev and p2r == nullrev:
3231 rank = 1 + self.fast_rank(p1r)
3238 rank = 1 + self.fast_rank(p1r)
3232 elif p1r == nullrev and p2r != nullrev:
3239 elif p1r == nullrev and p2r != nullrev:
3233 rank = 1 + self.fast_rank(p2r)
3240 rank = 1 + self.fast_rank(p2r)
3234 else: # merge node
3241 else: # merge node
3235 if rustdagop is not None and self.index.rust_ext_compat:
3242 if rustdagop is not None and self.index.rust_ext_compat:
3236 rank = rustdagop.rank(self.index, p1r, p2r)
3243 rank = rustdagop.rank(self.index, p1r, p2r)
3237 else:
3244 else:
3238 pmin, pmax = sorted((p1r, p2r))
3245 pmin, pmax = sorted((p1r, p2r))
3239 rank = 1 + self.fast_rank(pmax)
3246 rank = 1 + self.fast_rank(pmax)
3240 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3247 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3241
3248
3242 e = revlogutils.entry(
3249 e = revlogutils.entry(
3243 flags=flags,
3250 flags=flags,
3244 data_offset=offset,
3251 data_offset=offset,
3245 data_compressed_length=deltainfo.deltalen,
3252 data_compressed_length=deltainfo.deltalen,
3246 data_uncompressed_length=textlen,
3253 data_uncompressed_length=textlen,
3247 data_compression_mode=compression_mode,
3254 data_compression_mode=compression_mode,
3248 data_delta_base=deltainfo.base,
3255 data_delta_base=deltainfo.base,
3249 link_rev=link,
3256 link_rev=link,
3250 parent_rev_1=p1r,
3257 parent_rev_1=p1r,
3251 parent_rev_2=p2r,
3258 parent_rev_2=p2r,
3252 node_id=node,
3259 node_id=node,
3253 sidedata_offset=sidedata_offset,
3260 sidedata_offset=sidedata_offset,
3254 sidedata_compressed_length=len(serialized_sidedata),
3261 sidedata_compressed_length=len(serialized_sidedata),
3255 sidedata_compression_mode=sidedata_compression_mode,
3262 sidedata_compression_mode=sidedata_compression_mode,
3256 rank=rank,
3263 rank=rank,
3257 )
3264 )
3258
3265
3259 self.index.append(e)
3266 self.index.append(e)
3260 entry = self.index.entry_binary(curr)
3267 entry = self.index.entry_binary(curr)
3261 if curr == 0 and self._docket is None:
3268 if curr == 0 and self._docket is None:
3262 header = self._format_flags | self._format_version
3269 header = self._format_flags | self._format_version
3263 header = self.index.pack_header(header)
3270 header = self.index.pack_header(header)
3264 entry = header + entry
3271 entry = header + entry
3265 self._writeentry(
3272 self._writeentry(
3266 transaction,
3273 transaction,
3267 entry,
3274 entry,
3268 deltainfo.data,
3275 deltainfo.data,
3269 link,
3276 link,
3270 offset,
3277 offset,
3271 serialized_sidedata,
3278 serialized_sidedata,
3272 sidedata_offset,
3279 sidedata_offset,
3273 )
3280 )
3274
3281
3275 rawtext = btext[0]
3282 rawtext = btext[0]
3276
3283
3277 if alwayscache and rawtext is None:
3284 if alwayscache and rawtext is None:
3278 rawtext = deltacomputer.buildtext(revinfo)
3285 rawtext = deltacomputer.buildtext(revinfo)
3279
3286
3280 if type(rawtext) == bytes: # only accept immutable objects
3287 if type(rawtext) == bytes: # only accept immutable objects
3281 self._inner._revisioncache = (node, curr, rawtext)
3288 self._inner._revisioncache = (node, curr, rawtext)
3282 self._chainbasecache[curr] = deltainfo.chainbase
3289 self._chainbasecache[curr] = deltainfo.chainbase
3283 return curr
3290 return curr
3284
3291
3285 def _get_data_offset(self, prev):
3292 def _get_data_offset(self, prev):
3286 """Returns the current offset in the (in-transaction) data file.
3293 """Returns the current offset in the (in-transaction) data file.
3287 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3294 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3288 file to store that information: since sidedata can be rewritten to the
3295 file to store that information: since sidedata can be rewritten to the
3289 end of the data file within a transaction, you can have cases where, for
3296 end of the data file within a transaction, you can have cases where, for
3290 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3297 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3291 to `n - 1`'s sidedata being written after `n`'s data.
3298 to `n - 1`'s sidedata being written after `n`'s data.
3292
3299
3293 TODO cache this in a docket file before getting out of experimental."""
3300 TODO cache this in a docket file before getting out of experimental."""
3294 if self._docket is None:
3301 if self._docket is None:
3295 return self.end(prev)
3302 return self.end(prev)
3296 else:
3303 else:
3297 return self._docket.data_end
3304 return self._docket.data_end
3298
3305
3299 def _writeentry(
3306 def _writeentry(
3300 self,
3307 self,
3301 transaction,
3308 transaction,
3302 entry,
3309 entry,
3303 data,
3310 data,
3304 link,
3311 link,
3305 offset,
3312 offset,
3306 sidedata,
3313 sidedata,
3307 sidedata_offset,
3314 sidedata_offset,
3308 ):
3315 ):
3309 # Files opened in a+ mode have inconsistent behavior on various
3316 # Files opened in a+ mode have inconsistent behavior on various
3310 # platforms. Windows requires that a file positioning call be made
3317 # platforms. Windows requires that a file positioning call be made
3311 # when the file handle transitions between reads and writes. See
3318 # when the file handle transitions between reads and writes. See
3312 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3319 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3313 # platforms, Python or the platform itself can be buggy. Some versions
3320 # platforms, Python or the platform itself can be buggy. Some versions
3314 # of Solaris have been observed to not append at the end of the file
3321 # of Solaris have been observed to not append at the end of the file
3315 # if the file was seeked to before the end. See issue4943 for more.
3322 # if the file was seeked to before the end. See issue4943 for more.
3316 #
3323 #
3317 # We work around this issue by inserting a seek() before writing.
3324 # We work around this issue by inserting a seek() before writing.
3318 # Note: This is likely not necessary on Python 3. However, because
3325 # Note: This is likely not necessary on Python 3. However, because
3319 # the file handle is reused for reads and may be seeked there, we need
3326 # the file handle is reused for reads and may be seeked there, we need
3320 # to be careful before changing this.
3327 # to be careful before changing this.
3321 index_end = data_end = sidedata_end = None
3328 index_end = data_end = sidedata_end = None
3322 if self._docket is not None:
3329 if self._docket is not None:
3323 index_end = self._docket.index_end
3330 index_end = self._docket.index_end
3324 data_end = self._docket.data_end
3331 data_end = self._docket.data_end
3325 sidedata_end = self._docket.sidedata_end
3332 sidedata_end = self._docket.sidedata_end
3326
3333
3327 files_end = self._inner.write_entry(
3334 files_end = self._inner.write_entry(
3328 transaction,
3335 transaction,
3329 entry,
3336 entry,
3330 data,
3337 data,
3331 link,
3338 link,
3332 offset,
3339 offset,
3333 sidedata,
3340 sidedata,
3334 sidedata_offset,
3341 sidedata_offset,
3335 index_end,
3342 index_end,
3336 data_end,
3343 data_end,
3337 sidedata_end,
3344 sidedata_end,
3338 )
3345 )
3339 self._enforceinlinesize(transaction)
3346 self._enforceinlinesize(transaction)
3340 if self._docket is not None:
3347 if self._docket is not None:
3341 self._docket.index_end = files_end[0]
3348 self._docket.index_end = files_end[0]
3342 self._docket.data_end = files_end[1]
3349 self._docket.data_end = files_end[1]
3343 self._docket.sidedata_end = files_end[2]
3350 self._docket.sidedata_end = files_end[2]
3344
3351
3345 nodemaputil.setup_persistent_nodemap(transaction, self)
3352 nodemaputil.setup_persistent_nodemap(transaction, self)
3346
3353
3347 def addgroup(
3354 def addgroup(
3348 self,
3355 self,
3349 deltas,
3356 deltas,
3350 linkmapper,
3357 linkmapper,
3351 transaction,
3358 transaction,
3352 alwayscache=False,
3359 alwayscache=False,
3353 addrevisioncb=None,
3360 addrevisioncb=None,
3354 duplicaterevisioncb=None,
3361 duplicaterevisioncb=None,
3355 debug_info=None,
3362 debug_info=None,
3356 delta_base_reuse_policy=None,
3363 delta_base_reuse_policy=None,
3357 ):
3364 ):
3358 """
3365 """
3359 add a delta group
3366 add a delta group
3360
3367
3361 given a set of deltas, add them to the revision log. the
3368 given a set of deltas, add them to the revision log. the
3362 first delta is against its parent, which should be in our
3369 first delta is against its parent, which should be in our
3363 log, the rest are against the previous delta.
3370 log, the rest are against the previous delta.
3364
3371
3365 If ``addrevisioncb`` is defined, it will be called with arguments of
3372 If ``addrevisioncb`` is defined, it will be called with arguments of
3366 this revlog and the node that was added.
3373 this revlog and the node that was added.
3367 """
3374 """
3368
3375
3369 if self._adding_group:
3376 if self._adding_group:
3370 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3377 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3371
3378
3372 # read the default delta-base reuse policy from revlog config if the
3379 # read the default delta-base reuse policy from revlog config if the
3373 # group did not specify one.
3380 # group did not specify one.
3374 if delta_base_reuse_policy is None:
3381 if delta_base_reuse_policy is None:
3375 if (
3382 if (
3376 self.delta_config.general_delta
3383 self.delta_config.general_delta
3377 and self.delta_config.lazy_delta_base
3384 and self.delta_config.lazy_delta_base
3378 ):
3385 ):
3379 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3386 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3380 else:
3387 else:
3381 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3388 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3382
3389
3383 self._adding_group = True
3390 self._adding_group = True
3384 empty = True
3391 empty = True
3385 try:
3392 try:
3386 with self._writing(transaction):
3393 with self._writing(transaction):
3387 write_debug = None
3394 write_debug = None
3388 if self.delta_config.debug_delta:
3395 if self.delta_config.debug_delta:
3389 write_debug = transaction._report
3396 write_debug = transaction._report
3390 deltacomputer = deltautil.deltacomputer(
3397 deltacomputer = deltautil.deltacomputer(
3391 self,
3398 self,
3392 write_debug=write_debug,
3399 write_debug=write_debug,
3393 debug_info=debug_info,
3400 debug_info=debug_info,
3394 )
3401 )
3395 # loop through our set of deltas
3402 # loop through our set of deltas
3396 for data in deltas:
3403 for data in deltas:
3397 (
3404 (
3398 node,
3405 node,
3399 p1,
3406 p1,
3400 p2,
3407 p2,
3401 linknode,
3408 linknode,
3402 deltabase,
3409 deltabase,
3403 delta,
3410 delta,
3404 flags,
3411 flags,
3405 sidedata,
3412 sidedata,
3406 ) = data
3413 ) = data
3407 link = linkmapper(linknode)
3414 link = linkmapper(linknode)
3408 flags = flags or REVIDX_DEFAULT_FLAGS
3415 flags = flags or REVIDX_DEFAULT_FLAGS
3409
3416
3410 rev = self.index.get_rev(node)
3417 rev = self.index.get_rev(node)
3411 if rev is not None:
3418 if rev is not None:
3412 # this can happen if two branches make the same change
3419 # this can happen if two branches make the same change
3413 self._nodeduplicatecallback(transaction, rev)
3420 self._nodeduplicatecallback(transaction, rev)
3414 if duplicaterevisioncb:
3421 if duplicaterevisioncb:
3415 duplicaterevisioncb(self, rev)
3422 duplicaterevisioncb(self, rev)
3416 empty = False
3423 empty = False
3417 continue
3424 continue
3418
3425
3419 for p in (p1, p2):
3426 for p in (p1, p2):
3420 if not self.index.has_node(p):
3427 if not self.index.has_node(p):
3421 raise error.LookupError(
3428 raise error.LookupError(
3422 p, self.radix, _(b'unknown parent')
3429 p, self.radix, _(b'unknown parent')
3423 )
3430 )
3424
3431
3425 if not self.index.has_node(deltabase):
3432 if not self.index.has_node(deltabase):
3426 raise error.LookupError(
3433 raise error.LookupError(
3427 deltabase, self.display_id, _(b'unknown delta base')
3434 deltabase, self.display_id, _(b'unknown delta base')
3428 )
3435 )
3429
3436
3430 baserev = self.rev(deltabase)
3437 baserev = self.rev(deltabase)
3431
3438
3432 if baserev != nullrev and self.iscensored(baserev):
3439 if baserev != nullrev and self.iscensored(baserev):
3433 # if base is censored, delta must be full replacement in a
3440 # if base is censored, delta must be full replacement in a
3434 # single patch operation
3441 # single patch operation
3435 hlen = struct.calcsize(b">lll")
3442 hlen = struct.calcsize(b">lll")
3436 oldlen = self.rawsize(baserev)
3443 oldlen = self.rawsize(baserev)
3437 newlen = len(delta) - hlen
3444 newlen = len(delta) - hlen
3438 if delta[:hlen] != mdiff.replacediffheader(
3445 if delta[:hlen] != mdiff.replacediffheader(
3439 oldlen, newlen
3446 oldlen, newlen
3440 ):
3447 ):
3441 raise error.CensoredBaseError(
3448 raise error.CensoredBaseError(
3442 self.display_id, self.node(baserev)
3449 self.display_id, self.node(baserev)
3443 )
3450 )
3444
3451
3445 if not flags and self._peek_iscensored(baserev, delta):
3452 if not flags and self._peek_iscensored(baserev, delta):
3446 flags |= REVIDX_ISCENSORED
3453 flags |= REVIDX_ISCENSORED
3447
3454
3448 # We assume consumers of addrevisioncb will want to retrieve
3455 # We assume consumers of addrevisioncb will want to retrieve
3449 # the added revision, which will require a call to
3456 # the added revision, which will require a call to
3450 # revision(). revision() will fast path if there is a cache
3457 # revision(). revision() will fast path if there is a cache
3451 # hit. So, we tell _addrevision() to always cache in this case.
3458 # hit. So, we tell _addrevision() to always cache in this case.
3452 # We're only using addgroup() in the context of changegroup
3459 # We're only using addgroup() in the context of changegroup
3453 # generation so the revision data can always be handled as raw
3460 # generation so the revision data can always be handled as raw
3454 # by the flagprocessor.
3461 # by the flagprocessor.
3455 rev = self._addrevision(
3462 rev = self._addrevision(
3456 node,
3463 node,
3457 None,
3464 None,
3458 transaction,
3465 transaction,
3459 link,
3466 link,
3460 p1,
3467 p1,
3461 p2,
3468 p2,
3462 flags,
3469 flags,
3463 (baserev, delta, delta_base_reuse_policy),
3470 (baserev, delta, delta_base_reuse_policy),
3464 alwayscache=alwayscache,
3471 alwayscache=alwayscache,
3465 deltacomputer=deltacomputer,
3472 deltacomputer=deltacomputer,
3466 sidedata=sidedata,
3473 sidedata=sidedata,
3467 )
3474 )
3468
3475
3469 if addrevisioncb:
3476 if addrevisioncb:
3470 addrevisioncb(self, rev)
3477 addrevisioncb(self, rev)
3471 empty = False
3478 empty = False
3472 finally:
3479 finally:
3473 self._adding_group = False
3480 self._adding_group = False
3474 return not empty
3481 return not empty
3475
3482
3476 def iscensored(self, rev):
3483 def iscensored(self, rev):
3477 """Check if a file revision is censored."""
3484 """Check if a file revision is censored."""
3478 if not self.feature_config.censorable:
3485 if not self.feature_config.censorable:
3479 return False
3486 return False
3480
3487
3481 return self.flags(rev) & REVIDX_ISCENSORED
3488 return self.flags(rev) & REVIDX_ISCENSORED
3482
3489
3483 def _peek_iscensored(self, baserev, delta):
3490 def _peek_iscensored(self, baserev, delta):
3484 """Quickly check if a delta produces a censored revision."""
3491 """Quickly check if a delta produces a censored revision."""
3485 if not self.feature_config.censorable:
3492 if not self.feature_config.censorable:
3486 return False
3493 return False
3487
3494
3488 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3495 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3489
3496
3490 def getstrippoint(self, minlink):
3497 def getstrippoint(self, minlink):
3491 """find the minimum rev that must be stripped to strip the linkrev
3498 """find the minimum rev that must be stripped to strip the linkrev
3492
3499
3493 Returns a tuple containing the minimum rev and a set of all revs that
3500 Returns a tuple containing the minimum rev and a set of all revs that
3494 have linkrevs that will be broken by this strip.
3501 have linkrevs that will be broken by this strip.
3495 """
3502 """
3496 return storageutil.resolvestripinfo(
3503 return storageutil.resolvestripinfo(
3497 minlink,
3504 minlink,
3498 len(self) - 1,
3505 len(self) - 1,
3499 self.headrevs(),
3506 self.headrevs(),
3500 self.linkrev,
3507 self.linkrev,
3501 self.parentrevs,
3508 self.parentrevs,
3502 )
3509 )
3503
3510
3504 def strip(self, minlink, transaction):
3511 def strip(self, minlink, transaction):
3505 """truncate the revlog on the first revision with a linkrev >= minlink
3512 """truncate the revlog on the first revision with a linkrev >= minlink
3506
3513
3507 This function is called when we're stripping revision minlink and
3514 This function is called when we're stripping revision minlink and
3508 its descendants from the repository.
3515 its descendants from the repository.
3509
3516
3510 We have to remove all revisions with linkrev >= minlink, because
3517 We have to remove all revisions with linkrev >= minlink, because
3511 the equivalent changelog revisions will be renumbered after the
3518 the equivalent changelog revisions will be renumbered after the
3512 strip.
3519 strip.
3513
3520
3514 So we truncate the revlog on the first of these revisions, and
3521 So we truncate the revlog on the first of these revisions, and
3515 trust that the caller has saved the revisions that shouldn't be
3522 trust that the caller has saved the revisions that shouldn't be
3516 removed and that it'll re-add them after this truncation.
3523 removed and that it'll re-add them after this truncation.
3517 """
3524 """
3518 if len(self) == 0:
3525 if len(self) == 0:
3519 return
3526 return
3520
3527
3521 rev, _ = self.getstrippoint(minlink)
3528 rev, _ = self.getstrippoint(minlink)
3522 if rev == len(self):
3529 if rev == len(self):
3523 return
3530 return
3524
3531
3525 # first truncate the files on disk
3532 # first truncate the files on disk
3526 data_end = self.start(rev)
3533 data_end = self.start(rev)
3527 if not self._inline:
3534 if not self._inline:
3528 transaction.add(self._datafile, data_end)
3535 transaction.add(self._datafile, data_end)
3529 end = rev * self.index.entry_size
3536 end = rev * self.index.entry_size
3530 else:
3537 else:
3531 end = data_end + (rev * self.index.entry_size)
3538 end = data_end + (rev * self.index.entry_size)
3532
3539
3533 if self._sidedatafile:
3540 if self._sidedatafile:
3534 sidedata_end = self.sidedata_cut_off(rev)
3541 sidedata_end = self.sidedata_cut_off(rev)
3535 transaction.add(self._sidedatafile, sidedata_end)
3542 transaction.add(self._sidedatafile, sidedata_end)
3536
3543
3537 transaction.add(self._indexfile, end)
3544 transaction.add(self._indexfile, end)
3538 if self._docket is not None:
3545 if self._docket is not None:
3539 # XXX we could, leverage the docket while stripping. However it is
3546 # XXX we could, leverage the docket while stripping. However it is
3540 # not powerfull enough at the time of this comment
3547 # not powerfull enough at the time of this comment
3541 self._docket.index_end = end
3548 self._docket.index_end = end
3542 self._docket.data_end = data_end
3549 self._docket.data_end = data_end
3543 self._docket.sidedata_end = sidedata_end
3550 self._docket.sidedata_end = sidedata_end
3544 self._docket.write(transaction, stripping=True)
3551 self._docket.write(transaction, stripping=True)
3545
3552
3546 # then reset internal state in memory to forget those revisions
3553 # then reset internal state in memory to forget those revisions
3547 self._chaininfocache = util.lrucachedict(500)
3554 self._chaininfocache = util.lrucachedict(500)
3548 self._inner.clear_cache()
3555 self._inner.clear_cache()
3549
3556
3550 del self.index[rev:-1]
3557 del self.index[rev:-1]
3551
3558
3552 def checksize(self):
3559 def checksize(self):
3553 """Check size of index and data files
3560 """Check size of index and data files
3554
3561
3555 return a (dd, di) tuple.
3562 return a (dd, di) tuple.
3556 - dd: extra bytes for the "data" file
3563 - dd: extra bytes for the "data" file
3557 - di: extra bytes for the "index" file
3564 - di: extra bytes for the "index" file
3558
3565
3559 A healthy revlog will return (0, 0).
3566 A healthy revlog will return (0, 0).
3560 """
3567 """
3561 expected = 0
3568 expected = 0
3562 if len(self):
3569 if len(self):
3563 expected = max(0, self.end(len(self) - 1))
3570 expected = max(0, self.end(len(self) - 1))
3564
3571
3565 try:
3572 try:
3566 with self._datafp() as f:
3573 with self._datafp() as f:
3567 f.seek(0, io.SEEK_END)
3574 f.seek(0, io.SEEK_END)
3568 actual = f.tell()
3575 actual = f.tell()
3569 dd = actual - expected
3576 dd = actual - expected
3570 except FileNotFoundError:
3577 except FileNotFoundError:
3571 dd = 0
3578 dd = 0
3572
3579
3573 try:
3580 try:
3574 f = self.opener(self._indexfile)
3581 f = self.opener(self._indexfile)
3575 f.seek(0, io.SEEK_END)
3582 f.seek(0, io.SEEK_END)
3576 actual = f.tell()
3583 actual = f.tell()
3577 f.close()
3584 f.close()
3578 s = self.index.entry_size
3585 s = self.index.entry_size
3579 i = max(0, actual // s)
3586 i = max(0, actual // s)
3580 di = actual - (i * s)
3587 di = actual - (i * s)
3581 if self._inline:
3588 if self._inline:
3582 databytes = 0
3589 databytes = 0
3583 for r in self:
3590 for r in self:
3584 databytes += max(0, self.length(r))
3591 databytes += max(0, self.length(r))
3585 dd = 0
3592 dd = 0
3586 di = actual - len(self) * s - databytes
3593 di = actual - len(self) * s - databytes
3587 except FileNotFoundError:
3594 except FileNotFoundError:
3588 di = 0
3595 di = 0
3589
3596
3590 return (dd, di)
3597 return (dd, di)
3591
3598
3592 def files(self):
3599 def files(self):
3593 """return list of files that compose this revlog"""
3600 """return list of files that compose this revlog"""
3594 res = [self._indexfile]
3601 res = [self._indexfile]
3595 if self._docket_file is None:
3602 if self._docket_file is None:
3596 if not self._inline:
3603 if not self._inline:
3597 res.append(self._datafile)
3604 res.append(self._datafile)
3598 else:
3605 else:
3599 res.append(self._docket_file)
3606 res.append(self._docket_file)
3600 res.extend(self._docket.old_index_filepaths(include_empty=False))
3607 res.extend(self._docket.old_index_filepaths(include_empty=False))
3601 if self._docket.data_end:
3608 if self._docket.data_end:
3602 res.append(self._datafile)
3609 res.append(self._datafile)
3603 res.extend(self._docket.old_data_filepaths(include_empty=False))
3610 res.extend(self._docket.old_data_filepaths(include_empty=False))
3604 if self._docket.sidedata_end:
3611 if self._docket.sidedata_end:
3605 res.append(self._sidedatafile)
3612 res.append(self._sidedatafile)
3606 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3613 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3607 return res
3614 return res
3608
3615
3609 def emitrevisions(
3616 def emitrevisions(
3610 self,
3617 self,
3611 nodes,
3618 nodes,
3612 nodesorder=None,
3619 nodesorder=None,
3613 revisiondata=False,
3620 revisiondata=False,
3614 assumehaveparentrevisions=False,
3621 assumehaveparentrevisions=False,
3615 deltamode=repository.CG_DELTAMODE_STD,
3622 deltamode=repository.CG_DELTAMODE_STD,
3616 sidedata_helpers=None,
3623 sidedata_helpers=None,
3617 debug_info=None,
3624 debug_info=None,
3618 ):
3625 ):
3619 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3626 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3620 raise error.ProgrammingError(
3627 raise error.ProgrammingError(
3621 b'unhandled value for nodesorder: %s' % nodesorder
3628 b'unhandled value for nodesorder: %s' % nodesorder
3622 )
3629 )
3623
3630
3624 if nodesorder is None and not self.delta_config.general_delta:
3631 if nodesorder is None and not self.delta_config.general_delta:
3625 nodesorder = b'storage'
3632 nodesorder = b'storage'
3626
3633
3627 if (
3634 if (
3628 not self._storedeltachains
3635 not self._storedeltachains
3629 and deltamode != repository.CG_DELTAMODE_PREV
3636 and deltamode != repository.CG_DELTAMODE_PREV
3630 ):
3637 ):
3631 deltamode = repository.CG_DELTAMODE_FULL
3638 deltamode = repository.CG_DELTAMODE_FULL
3632
3639
3633 return storageutil.emitrevisions(
3640 return storageutil.emitrevisions(
3634 self,
3641 self,
3635 nodes,
3642 nodes,
3636 nodesorder,
3643 nodesorder,
3637 revlogrevisiondelta,
3644 revlogrevisiondelta,
3638 deltaparentfn=self.deltaparent,
3645 deltaparentfn=self.deltaparent,
3639 candeltafn=self._candelta,
3646 candeltafn=self._candelta,
3640 rawsizefn=self.rawsize,
3647 rawsizefn=self.rawsize,
3641 revdifffn=self.revdiff,
3648 revdifffn=self.revdiff,
3642 flagsfn=self.flags,
3649 flagsfn=self.flags,
3643 deltamode=deltamode,
3650 deltamode=deltamode,
3644 revisiondata=revisiondata,
3651 revisiondata=revisiondata,
3645 assumehaveparentrevisions=assumehaveparentrevisions,
3652 assumehaveparentrevisions=assumehaveparentrevisions,
3646 sidedata_helpers=sidedata_helpers,
3653 sidedata_helpers=sidedata_helpers,
3647 debug_info=debug_info,
3654 debug_info=debug_info,
3648 )
3655 )
3649
3656
3650 DELTAREUSEALWAYS = b'always'
3657 DELTAREUSEALWAYS = b'always'
3651 DELTAREUSESAMEREVS = b'samerevs'
3658 DELTAREUSESAMEREVS = b'samerevs'
3652 DELTAREUSENEVER = b'never'
3659 DELTAREUSENEVER = b'never'
3653
3660
3654 DELTAREUSEFULLADD = b'fulladd'
3661 DELTAREUSEFULLADD = b'fulladd'
3655
3662
3656 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3663 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3657
3664
3658 def clone(
3665 def clone(
3659 self,
3666 self,
3660 tr,
3667 tr,
3661 destrevlog,
3668 destrevlog,
3662 addrevisioncb=None,
3669 addrevisioncb=None,
3663 deltareuse=DELTAREUSESAMEREVS,
3670 deltareuse=DELTAREUSESAMEREVS,
3664 forcedeltabothparents=None,
3671 forcedeltabothparents=None,
3665 sidedata_helpers=None,
3672 sidedata_helpers=None,
3666 ):
3673 ):
3667 """Copy this revlog to another, possibly with format changes.
3674 """Copy this revlog to another, possibly with format changes.
3668
3675
3669 The destination revlog will contain the same revisions and nodes.
3676 The destination revlog will contain the same revisions and nodes.
3670 However, it may not be bit-for-bit identical due to e.g. delta encoding
3677 However, it may not be bit-for-bit identical due to e.g. delta encoding
3671 differences.
3678 differences.
3672
3679
3673 The ``deltareuse`` argument control how deltas from the existing revlog
3680 The ``deltareuse`` argument control how deltas from the existing revlog
3674 are preserved in the destination revlog. The argument can have the
3681 are preserved in the destination revlog. The argument can have the
3675 following values:
3682 following values:
3676
3683
3677 DELTAREUSEALWAYS
3684 DELTAREUSEALWAYS
3678 Deltas will always be reused (if possible), even if the destination
3685 Deltas will always be reused (if possible), even if the destination
3679 revlog would not select the same revisions for the delta. This is the
3686 revlog would not select the same revisions for the delta. This is the
3680 fastest mode of operation.
3687 fastest mode of operation.
3681 DELTAREUSESAMEREVS
3688 DELTAREUSESAMEREVS
3682 Deltas will be reused if the destination revlog would pick the same
3689 Deltas will be reused if the destination revlog would pick the same
3683 revisions for the delta. This mode strikes a balance between speed
3690 revisions for the delta. This mode strikes a balance between speed
3684 and optimization.
3691 and optimization.
3685 DELTAREUSENEVER
3692 DELTAREUSENEVER
3686 Deltas will never be reused. This is the slowest mode of execution.
3693 Deltas will never be reused. This is the slowest mode of execution.
3687 This mode can be used to recompute deltas (e.g. if the diff/delta
3694 This mode can be used to recompute deltas (e.g. if the diff/delta
3688 algorithm changes).
3695 algorithm changes).
3689 DELTAREUSEFULLADD
3696 DELTAREUSEFULLADD
3690 Revision will be re-added as if their were new content. This is
3697 Revision will be re-added as if their were new content. This is
3691 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3698 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3692 eg: large file detection and handling.
3699 eg: large file detection and handling.
3693
3700
3694 Delta computation can be slow, so the choice of delta reuse policy can
3701 Delta computation can be slow, so the choice of delta reuse policy can
3695 significantly affect run time.
3702 significantly affect run time.
3696
3703
3697 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3704 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3698 two extremes. Deltas will be reused if they are appropriate. But if the
3705 two extremes. Deltas will be reused if they are appropriate. But if the
3699 delta could choose a better revision, it will do so. This means if you
3706 delta could choose a better revision, it will do so. This means if you
3700 are converting a non-generaldelta revlog to a generaldelta revlog,
3707 are converting a non-generaldelta revlog to a generaldelta revlog,
3701 deltas will be recomputed if the delta's parent isn't a parent of the
3708 deltas will be recomputed if the delta's parent isn't a parent of the
3702 revision.
3709 revision.
3703
3710
3704 In addition to the delta policy, the ``forcedeltabothparents``
3711 In addition to the delta policy, the ``forcedeltabothparents``
3705 argument controls whether to force compute deltas against both parents
3712 argument controls whether to force compute deltas against both parents
3706 for merges. By default, the current default is used.
3713 for merges. By default, the current default is used.
3707
3714
3708 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3715 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3709 `sidedata_helpers`.
3716 `sidedata_helpers`.
3710 """
3717 """
3711 if deltareuse not in self.DELTAREUSEALL:
3718 if deltareuse not in self.DELTAREUSEALL:
3712 raise ValueError(
3719 raise ValueError(
3713 _(b'value for deltareuse invalid: %s') % deltareuse
3720 _(b'value for deltareuse invalid: %s') % deltareuse
3714 )
3721 )
3715
3722
3716 if len(destrevlog):
3723 if len(destrevlog):
3717 raise ValueError(_(b'destination revlog is not empty'))
3724 raise ValueError(_(b'destination revlog is not empty'))
3718
3725
3719 if getattr(self, 'filteredrevs', None):
3726 if getattr(self, 'filteredrevs', None):
3720 raise ValueError(_(b'source revlog has filtered revisions'))
3727 raise ValueError(_(b'source revlog has filtered revisions'))
3721 if getattr(destrevlog, 'filteredrevs', None):
3728 if getattr(destrevlog, 'filteredrevs', None):
3722 raise ValueError(_(b'destination revlog has filtered revisions'))
3729 raise ValueError(_(b'destination revlog has filtered revisions'))
3723
3730
3724 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3731 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3725 # if possible.
3732 # if possible.
3726 old_delta_config = destrevlog.delta_config
3733 old_delta_config = destrevlog.delta_config
3727 destrevlog.delta_config = destrevlog.delta_config.copy()
3734 destrevlog.delta_config = destrevlog.delta_config.copy()
3728
3735
3729 try:
3736 try:
3730 if deltareuse == self.DELTAREUSEALWAYS:
3737 if deltareuse == self.DELTAREUSEALWAYS:
3731 destrevlog.delta_config.lazy_delta_base = True
3738 destrevlog.delta_config.lazy_delta_base = True
3732 destrevlog.delta_config.lazy_delta = True
3739 destrevlog.delta_config.lazy_delta = True
3733 elif deltareuse == self.DELTAREUSESAMEREVS:
3740 elif deltareuse == self.DELTAREUSESAMEREVS:
3734 destrevlog.delta_config.lazy_delta_base = False
3741 destrevlog.delta_config.lazy_delta_base = False
3735 destrevlog.delta_config.lazy_delta = True
3742 destrevlog.delta_config.lazy_delta = True
3736 elif deltareuse == self.DELTAREUSENEVER:
3743 elif deltareuse == self.DELTAREUSENEVER:
3737 destrevlog.delta_config.lazy_delta_base = False
3744 destrevlog.delta_config.lazy_delta_base = False
3738 destrevlog.delta_config.lazy_delta = False
3745 destrevlog.delta_config.lazy_delta = False
3739
3746
3740 delta_both_parents = (
3747 delta_both_parents = (
3741 forcedeltabothparents or old_delta_config.delta_both_parents
3748 forcedeltabothparents or old_delta_config.delta_both_parents
3742 )
3749 )
3743 destrevlog.delta_config.delta_both_parents = delta_both_parents
3750 destrevlog.delta_config.delta_both_parents = delta_both_parents
3744
3751
3745 with self.reading(), destrevlog._writing(tr):
3752 with self.reading(), destrevlog._writing(tr):
3746 self._clone(
3753 self._clone(
3747 tr,
3754 tr,
3748 destrevlog,
3755 destrevlog,
3749 addrevisioncb,
3756 addrevisioncb,
3750 deltareuse,
3757 deltareuse,
3751 forcedeltabothparents,
3758 forcedeltabothparents,
3752 sidedata_helpers,
3759 sidedata_helpers,
3753 )
3760 )
3754
3761
3755 finally:
3762 finally:
3756 destrevlog.delta_config = old_delta_config
3763 destrevlog.delta_config = old_delta_config
3757
3764
3758 def _clone(
3765 def _clone(
3759 self,
3766 self,
3760 tr,
3767 tr,
3761 destrevlog,
3768 destrevlog,
3762 addrevisioncb,
3769 addrevisioncb,
3763 deltareuse,
3770 deltareuse,
3764 forcedeltabothparents,
3771 forcedeltabothparents,
3765 sidedata_helpers,
3772 sidedata_helpers,
3766 ):
3773 ):
3767 """perform the core duty of `revlog.clone` after parameter processing"""
3774 """perform the core duty of `revlog.clone` after parameter processing"""
3768 write_debug = None
3775 write_debug = None
3769 if self.delta_config.debug_delta:
3776 if self.delta_config.debug_delta:
3770 write_debug = tr._report
3777 write_debug = tr._report
3771 deltacomputer = deltautil.deltacomputer(
3778 deltacomputer = deltautil.deltacomputer(
3772 destrevlog,
3779 destrevlog,
3773 write_debug=write_debug,
3780 write_debug=write_debug,
3774 )
3781 )
3775 index = self.index
3782 index = self.index
3776 for rev in self:
3783 for rev in self:
3777 entry = index[rev]
3784 entry = index[rev]
3778
3785
3779 # Some classes override linkrev to take filtered revs into
3786 # Some classes override linkrev to take filtered revs into
3780 # account. Use raw entry from index.
3787 # account. Use raw entry from index.
3781 flags = entry[0] & 0xFFFF
3788 flags = entry[0] & 0xFFFF
3782 linkrev = entry[4]
3789 linkrev = entry[4]
3783 p1 = index[entry[5]][7]
3790 p1 = index[entry[5]][7]
3784 p2 = index[entry[6]][7]
3791 p2 = index[entry[6]][7]
3785 node = entry[7]
3792 node = entry[7]
3786
3793
3787 # (Possibly) reuse the delta from the revlog if allowed and
3794 # (Possibly) reuse the delta from the revlog if allowed and
3788 # the revlog chunk is a delta.
3795 # the revlog chunk is a delta.
3789 cachedelta = None
3796 cachedelta = None
3790 rawtext = None
3797 rawtext = None
3791 if deltareuse == self.DELTAREUSEFULLADD:
3798 if deltareuse == self.DELTAREUSEFULLADD:
3792 text = self._revisiondata(rev)
3799 text = self._revisiondata(rev)
3793 sidedata = self.sidedata(rev)
3800 sidedata = self.sidedata(rev)
3794
3801
3795 if sidedata_helpers is not None:
3802 if sidedata_helpers is not None:
3796 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3803 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3797 self, sidedata_helpers, sidedata, rev
3804 self, sidedata_helpers, sidedata, rev
3798 )
3805 )
3799 flags = flags | new_flags[0] & ~new_flags[1]
3806 flags = flags | new_flags[0] & ~new_flags[1]
3800
3807
3801 destrevlog.addrevision(
3808 destrevlog.addrevision(
3802 text,
3809 text,
3803 tr,
3810 tr,
3804 linkrev,
3811 linkrev,
3805 p1,
3812 p1,
3806 p2,
3813 p2,
3807 cachedelta=cachedelta,
3814 cachedelta=cachedelta,
3808 node=node,
3815 node=node,
3809 flags=flags,
3816 flags=flags,
3810 deltacomputer=deltacomputer,
3817 deltacomputer=deltacomputer,
3811 sidedata=sidedata,
3818 sidedata=sidedata,
3812 )
3819 )
3813 else:
3820 else:
3814 if destrevlog.delta_config.lazy_delta:
3821 if destrevlog.delta_config.lazy_delta:
3815 dp = self.deltaparent(rev)
3822 dp = self.deltaparent(rev)
3816 if dp != nullrev:
3823 if dp != nullrev:
3817 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3824 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3818
3825
3819 sidedata = None
3826 sidedata = None
3820 if not cachedelta:
3827 if not cachedelta:
3821 try:
3828 try:
3822 rawtext = self._revisiondata(rev)
3829 rawtext = self._revisiondata(rev)
3823 except error.CensoredNodeError as censored:
3830 except error.CensoredNodeError as censored:
3824 assert flags & REVIDX_ISCENSORED
3831 assert flags & REVIDX_ISCENSORED
3825 rawtext = censored.tombstone
3832 rawtext = censored.tombstone
3826 sidedata = self.sidedata(rev)
3833 sidedata = self.sidedata(rev)
3827 if sidedata is None:
3834 if sidedata is None:
3828 sidedata = self.sidedata(rev)
3835 sidedata = self.sidedata(rev)
3829
3836
3830 if sidedata_helpers is not None:
3837 if sidedata_helpers is not None:
3831 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3838 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3832 self, sidedata_helpers, sidedata, rev
3839 self, sidedata_helpers, sidedata, rev
3833 )
3840 )
3834 flags = flags | new_flags[0] & ~new_flags[1]
3841 flags = flags | new_flags[0] & ~new_flags[1]
3835
3842
3836 destrevlog._addrevision(
3843 destrevlog._addrevision(
3837 node,
3844 node,
3838 rawtext,
3845 rawtext,
3839 tr,
3846 tr,
3840 linkrev,
3847 linkrev,
3841 p1,
3848 p1,
3842 p2,
3849 p2,
3843 flags,
3850 flags,
3844 cachedelta,
3851 cachedelta,
3845 deltacomputer=deltacomputer,
3852 deltacomputer=deltacomputer,
3846 sidedata=sidedata,
3853 sidedata=sidedata,
3847 )
3854 )
3848
3855
3849 if addrevisioncb:
3856 if addrevisioncb:
3850 addrevisioncb(self, rev, node)
3857 addrevisioncb(self, rev, node)
3851
3858
3852 def censorrevision(self, tr, censor_nodes, tombstone=b''):
3859 def censorrevision(self, tr, censor_nodes, tombstone=b''):
3853 if self._format_version == REVLOGV0:
3860 if self._format_version == REVLOGV0:
3854 raise error.RevlogError(
3861 raise error.RevlogError(
3855 _(b'cannot censor with version %d revlogs')
3862 _(b'cannot censor with version %d revlogs')
3856 % self._format_version
3863 % self._format_version
3857 )
3864 )
3858 elif self._format_version == REVLOGV1:
3865 elif self._format_version == REVLOGV1:
3859 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
3866 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
3860 else:
3867 else:
3861 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
3868 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
3862
3869
3863 def verifyintegrity(self, state):
3870 def verifyintegrity(self, state):
3864 """Verifies the integrity of the revlog.
3871 """Verifies the integrity of the revlog.
3865
3872
3866 Yields ``revlogproblem`` instances describing problems that are
3873 Yields ``revlogproblem`` instances describing problems that are
3867 found.
3874 found.
3868 """
3875 """
3869 dd, di = self.checksize()
3876 dd, di = self.checksize()
3870 if dd:
3877 if dd:
3871 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3878 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3872 if di:
3879 if di:
3873 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3880 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3874
3881
3875 version = self._format_version
3882 version = self._format_version
3876
3883
3877 # The verifier tells us what version revlog we should be.
3884 # The verifier tells us what version revlog we should be.
3878 if version != state[b'expectedversion']:
3885 if version != state[b'expectedversion']:
3879 yield revlogproblem(
3886 yield revlogproblem(
3880 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3887 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3881 % (self.display_id, version, state[b'expectedversion'])
3888 % (self.display_id, version, state[b'expectedversion'])
3882 )
3889 )
3883
3890
3884 state[b'skipread'] = set()
3891 state[b'skipread'] = set()
3885 state[b'safe_renamed'] = set()
3892 state[b'safe_renamed'] = set()
3886
3893
3887 for rev in self:
3894 for rev in self:
3888 node = self.node(rev)
3895 node = self.node(rev)
3889
3896
3890 # Verify contents. 4 cases to care about:
3897 # Verify contents. 4 cases to care about:
3891 #
3898 #
3892 # common: the most common case
3899 # common: the most common case
3893 # rename: with a rename
3900 # rename: with a rename
3894 # meta: file content starts with b'\1\n', the metadata
3901 # meta: file content starts with b'\1\n', the metadata
3895 # header defined in filelog.py, but without a rename
3902 # header defined in filelog.py, but without a rename
3896 # ext: content stored externally
3903 # ext: content stored externally
3897 #
3904 #
3898 # More formally, their differences are shown below:
3905 # More formally, their differences are shown below:
3899 #
3906 #
3900 # | common | rename | meta | ext
3907 # | common | rename | meta | ext
3901 # -------------------------------------------------------
3908 # -------------------------------------------------------
3902 # flags() | 0 | 0 | 0 | not 0
3909 # flags() | 0 | 0 | 0 | not 0
3903 # renamed() | False | True | False | ?
3910 # renamed() | False | True | False | ?
3904 # rawtext[0:2]=='\1\n'| False | True | True | ?
3911 # rawtext[0:2]=='\1\n'| False | True | True | ?
3905 #
3912 #
3906 # "rawtext" means the raw text stored in revlog data, which
3913 # "rawtext" means the raw text stored in revlog data, which
3907 # could be retrieved by "rawdata(rev)". "text"
3914 # could be retrieved by "rawdata(rev)". "text"
3908 # mentioned below is "revision(rev)".
3915 # mentioned below is "revision(rev)".
3909 #
3916 #
3910 # There are 3 different lengths stored physically:
3917 # There are 3 different lengths stored physically:
3911 # 1. L1: rawsize, stored in revlog index
3918 # 1. L1: rawsize, stored in revlog index
3912 # 2. L2: len(rawtext), stored in revlog data
3919 # 2. L2: len(rawtext), stored in revlog data
3913 # 3. L3: len(text), stored in revlog data if flags==0, or
3920 # 3. L3: len(text), stored in revlog data if flags==0, or
3914 # possibly somewhere else if flags!=0
3921 # possibly somewhere else if flags!=0
3915 #
3922 #
3916 # L1 should be equal to L2. L3 could be different from them.
3923 # L1 should be equal to L2. L3 could be different from them.
3917 # "text" may or may not affect commit hash depending on flag
3924 # "text" may or may not affect commit hash depending on flag
3918 # processors (see flagutil.addflagprocessor).
3925 # processors (see flagutil.addflagprocessor).
3919 #
3926 #
3920 # | common | rename | meta | ext
3927 # | common | rename | meta | ext
3921 # -------------------------------------------------
3928 # -------------------------------------------------
3922 # rawsize() | L1 | L1 | L1 | L1
3929 # rawsize() | L1 | L1 | L1 | L1
3923 # size() | L1 | L2-LM | L1(*) | L1 (?)
3930 # size() | L1 | L2-LM | L1(*) | L1 (?)
3924 # len(rawtext) | L2 | L2 | L2 | L2
3931 # len(rawtext) | L2 | L2 | L2 | L2
3925 # len(text) | L2 | L2 | L2 | L3
3932 # len(text) | L2 | L2 | L2 | L3
3926 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3933 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3927 #
3934 #
3928 # LM: length of metadata, depending on rawtext
3935 # LM: length of metadata, depending on rawtext
3929 # (*): not ideal, see comment in filelog.size
3936 # (*): not ideal, see comment in filelog.size
3930 # (?): could be "- len(meta)" if the resolved content has
3937 # (?): could be "- len(meta)" if the resolved content has
3931 # rename metadata
3938 # rename metadata
3932 #
3939 #
3933 # Checks needed to be done:
3940 # Checks needed to be done:
3934 # 1. length check: L1 == L2, in all cases.
3941 # 1. length check: L1 == L2, in all cases.
3935 # 2. hash check: depending on flag processor, we may need to
3942 # 2. hash check: depending on flag processor, we may need to
3936 # use either "text" (external), or "rawtext" (in revlog).
3943 # use either "text" (external), or "rawtext" (in revlog).
3937
3944
3938 try:
3945 try:
3939 skipflags = state.get(b'skipflags', 0)
3946 skipflags = state.get(b'skipflags', 0)
3940 if skipflags:
3947 if skipflags:
3941 skipflags &= self.flags(rev)
3948 skipflags &= self.flags(rev)
3942
3949
3943 _verify_revision(self, skipflags, state, node)
3950 _verify_revision(self, skipflags, state, node)
3944
3951
3945 l1 = self.rawsize(rev)
3952 l1 = self.rawsize(rev)
3946 l2 = len(self.rawdata(node))
3953 l2 = len(self.rawdata(node))
3947
3954
3948 if l1 != l2:
3955 if l1 != l2:
3949 yield revlogproblem(
3956 yield revlogproblem(
3950 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3957 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3951 node=node,
3958 node=node,
3952 )
3959 )
3953
3960
3954 except error.CensoredNodeError:
3961 except error.CensoredNodeError:
3955 if state[b'erroroncensored']:
3962 if state[b'erroroncensored']:
3956 yield revlogproblem(
3963 yield revlogproblem(
3957 error=_(b'censored file data'), node=node
3964 error=_(b'censored file data'), node=node
3958 )
3965 )
3959 state[b'skipread'].add(node)
3966 state[b'skipread'].add(node)
3960 except Exception as e:
3967 except Exception as e:
3961 yield revlogproblem(
3968 yield revlogproblem(
3962 error=_(b'unpacking %s: %s')
3969 error=_(b'unpacking %s: %s')
3963 % (short(node), stringutil.forcebytestr(e)),
3970 % (short(node), stringutil.forcebytestr(e)),
3964 node=node,
3971 node=node,
3965 )
3972 )
3966 state[b'skipread'].add(node)
3973 state[b'skipread'].add(node)
3967
3974
3968 def storageinfo(
3975 def storageinfo(
3969 self,
3976 self,
3970 exclusivefiles=False,
3977 exclusivefiles=False,
3971 sharedfiles=False,
3978 sharedfiles=False,
3972 revisionscount=False,
3979 revisionscount=False,
3973 trackedsize=False,
3980 trackedsize=False,
3974 storedsize=False,
3981 storedsize=False,
3975 ):
3982 ):
3976 d = {}
3983 d = {}
3977
3984
3978 if exclusivefiles:
3985 if exclusivefiles:
3979 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3986 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3980 if not self._inline:
3987 if not self._inline:
3981 d[b'exclusivefiles'].append((self.opener, self._datafile))
3988 d[b'exclusivefiles'].append((self.opener, self._datafile))
3982
3989
3983 if sharedfiles:
3990 if sharedfiles:
3984 d[b'sharedfiles'] = []
3991 d[b'sharedfiles'] = []
3985
3992
3986 if revisionscount:
3993 if revisionscount:
3987 d[b'revisionscount'] = len(self)
3994 d[b'revisionscount'] = len(self)
3988
3995
3989 if trackedsize:
3996 if trackedsize:
3990 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3997 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3991
3998
3992 if storedsize:
3999 if storedsize:
3993 d[b'storedsize'] = sum(
4000 d[b'storedsize'] = sum(
3994 self.opener.stat(path).st_size for path in self.files()
4001 self.opener.stat(path).st_size for path in self.files()
3995 )
4002 )
3996
4003
3997 return d
4004 return d
3998
4005
3999 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4006 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4000 if not self.feature_config.has_side_data:
4007 if not self.feature_config.has_side_data:
4001 return
4008 return
4002 # revlog formats with sidedata support does not support inline
4009 # revlog formats with sidedata support does not support inline
4003 assert not self._inline
4010 assert not self._inline
4004 if not helpers[1] and not helpers[2]:
4011 if not helpers[1] and not helpers[2]:
4005 # Nothing to generate or remove
4012 # Nothing to generate or remove
4006 return
4013 return
4007
4014
4008 new_entries = []
4015 new_entries = []
4009 # append the new sidedata
4016 # append the new sidedata
4010 with self._writing(transaction):
4017 with self._writing(transaction):
4011 ifh, dfh, sdfh = self._inner._writinghandles
4018 ifh, dfh, sdfh = self._inner._writinghandles
4012 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4019 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4013
4020
4014 current_offset = sdfh.tell()
4021 current_offset = sdfh.tell()
4015 for rev in range(startrev, endrev + 1):
4022 for rev in range(startrev, endrev + 1):
4016 entry = self.index[rev]
4023 entry = self.index[rev]
4017 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4024 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4018 store=self,
4025 store=self,
4019 sidedata_helpers=helpers,
4026 sidedata_helpers=helpers,
4020 sidedata={},
4027 sidedata={},
4021 rev=rev,
4028 rev=rev,
4022 )
4029 )
4023
4030
4024 serialized_sidedata = sidedatautil.serialize_sidedata(
4031 serialized_sidedata = sidedatautil.serialize_sidedata(
4025 new_sidedata
4032 new_sidedata
4026 )
4033 )
4027
4034
4028 sidedata_compression_mode = COMP_MODE_INLINE
4035 sidedata_compression_mode = COMP_MODE_INLINE
4029 if serialized_sidedata and self.feature_config.has_side_data:
4036 if serialized_sidedata and self.feature_config.has_side_data:
4030 sidedata_compression_mode = COMP_MODE_PLAIN
4037 sidedata_compression_mode = COMP_MODE_PLAIN
4031 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4038 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4032 if (
4039 if (
4033 h != b'u'
4040 h != b'u'
4034 and comp_sidedata[0] != b'\0'
4041 and comp_sidedata[0] != b'\0'
4035 and len(comp_sidedata) < len(serialized_sidedata)
4042 and len(comp_sidedata) < len(serialized_sidedata)
4036 ):
4043 ):
4037 assert not h
4044 assert not h
4038 if (
4045 if (
4039 comp_sidedata[0]
4046 comp_sidedata[0]
4040 == self._docket.default_compression_header
4047 == self._docket.default_compression_header
4041 ):
4048 ):
4042 sidedata_compression_mode = COMP_MODE_DEFAULT
4049 sidedata_compression_mode = COMP_MODE_DEFAULT
4043 serialized_sidedata = comp_sidedata
4050 serialized_sidedata = comp_sidedata
4044 else:
4051 else:
4045 sidedata_compression_mode = COMP_MODE_INLINE
4052 sidedata_compression_mode = COMP_MODE_INLINE
4046 serialized_sidedata = comp_sidedata
4053 serialized_sidedata = comp_sidedata
4047 if entry[8] != 0 or entry[9] != 0:
4054 if entry[8] != 0 or entry[9] != 0:
4048 # rewriting entries that already have sidedata is not
4055 # rewriting entries that already have sidedata is not
4049 # supported yet, because it introduces garbage data in the
4056 # supported yet, because it introduces garbage data in the
4050 # revlog.
4057 # revlog.
4051 msg = b"rewriting existing sidedata is not supported yet"
4058 msg = b"rewriting existing sidedata is not supported yet"
4052 raise error.Abort(msg)
4059 raise error.Abort(msg)
4053
4060
4054 # Apply (potential) flags to add and to remove after running
4061 # Apply (potential) flags to add and to remove after running
4055 # the sidedata helpers
4062 # the sidedata helpers
4056 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4063 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4057 entry_update = (
4064 entry_update = (
4058 current_offset,
4065 current_offset,
4059 len(serialized_sidedata),
4066 len(serialized_sidedata),
4060 new_offset_flags,
4067 new_offset_flags,
4061 sidedata_compression_mode,
4068 sidedata_compression_mode,
4062 )
4069 )
4063
4070
4064 # the sidedata computation might have move the file cursors around
4071 # the sidedata computation might have move the file cursors around
4065 sdfh.seek(current_offset, os.SEEK_SET)
4072 sdfh.seek(current_offset, os.SEEK_SET)
4066 sdfh.write(serialized_sidedata)
4073 sdfh.write(serialized_sidedata)
4067 new_entries.append(entry_update)
4074 new_entries.append(entry_update)
4068 current_offset += len(serialized_sidedata)
4075 current_offset += len(serialized_sidedata)
4069 self._docket.sidedata_end = sdfh.tell()
4076 self._docket.sidedata_end = sdfh.tell()
4070
4077
4071 # rewrite the new index entries
4078 # rewrite the new index entries
4072 ifh.seek(startrev * self.index.entry_size)
4079 ifh.seek(startrev * self.index.entry_size)
4073 for i, e in enumerate(new_entries):
4080 for i, e in enumerate(new_entries):
4074 rev = startrev + i
4081 rev = startrev + i
4075 self.index.replace_sidedata_info(rev, *e)
4082 self.index.replace_sidedata_info(rev, *e)
4076 packed = self.index.entry_binary(rev)
4083 packed = self.index.entry_binary(rev)
4077 if rev == 0 and self._docket is None:
4084 if rev == 0 and self._docket is None:
4078 header = self._format_flags | self._format_version
4085 header = self._format_flags | self._format_version
4079 header = self.index.pack_header(header)
4086 header = self.index.pack_header(header)
4080 packed = header + packed
4087 packed = header + packed
4081 ifh.write(packed)
4088 ifh.write(packed)
@@ -1,307 +1,303
1 ======================================================
1 ======================================================
2 Test operation on repository with an inlined changelog
2 Test operation on repository with an inlined changelog
3 ======================================================
3 ======================================================
4
4
5 Inlined revlog has been a bag of complexity for a long time and the combination
5 Inlined revlog has been a bag of complexity for a long time and the combination
6 with special transaction logic on the changelog was a long source of bugs
6 with special transaction logic on the changelog was a long source of bugs
7 poorly covered by the test suites.
7 poorly covered by the test suites.
8
8
9 We stopped doing any usage of inlined-revlog for changelog in a93e52f0b6ff,
9 We stopped doing any usage of inlined-revlog for changelog in a93e52f0b6ff,
10 upgrading legacy inlined version as soon as possible when we see them. However
10 upgrading legacy inlined version as soon as possible when we see them. However
11 this Mercurial does not produce such inlined-changelog that case is very poorly
11 this Mercurial does not produce such inlined-changelog that case is very poorly
12 covered in the test suites. This test file aims at covering these cases.
12 covered in the test suites. This test file aims at covering these cases.
13
13
14 Double checking test data
14 Double checking test data
15 =========================
15 =========================
16
16
17 We should have a repository around
17 We should have a repository around
18
18
19 $ mkdir sanity-check
19 $ mkdir sanity-check
20 $ cd sanity-check
20 $ cd sanity-check
21 $ tar xf $TESTDIR/bundles/inlined-changelog.tar
21 $ tar xf $TESTDIR/bundles/inlined-changelog.tar
22 $ cd inlined-changelog
22 $ cd inlined-changelog
23 $ hg root
23 $ hg root
24 $TESTTMP/sanity-check/inlined-changelog
24 $TESTTMP/sanity-check/inlined-changelog
25
25
26 The repository should not be corrupted initially
26 The repository should not be corrupted initially
27
27
28 $ hg verify
28 $ hg verify
29 checking changesets
29 checking changesets
30 checking manifests
30 checking manifests
31 crosschecking files in changesets and manifests
31 crosschecking files in changesets and manifests
32 checking files
32 checking files
33 checking dirstate
33 checking dirstate
34 checked 1 changesets with 1 changes to 1 files
34 checked 1 changesets with 1 changes to 1 files
35
35
36 The changelog of that repository MUST be inlined
36 The changelog of that repository MUST be inlined
37
37
38 $ hg debugrevlog -c | grep -E '^flags\b'
38 $ hg debugrevlog -c | grep -E '^flags\b'
39 flags : inline
39 flags : inline
40
40
41 Touching that repository MUST split that inlined changelog
41 Touching that repository MUST split that inlined changelog
42
42
43 $ hg branch foo --quiet
43 $ hg branch foo --quiet
44 $ hg commit -m foo --quiet
44 $ hg commit -m foo --quiet
45 $ hg debugrevlog -c | grep -E '^flags\b'
45 $ hg debugrevlog -c | grep -E '^flags\b'
46 flags : (none)
46 flags : (none)
47
47
48 $ cd ../..
48 $ cd ../..
49
49
50 Test doing a simple commit
50 Test doing a simple commit
51 ==========================
51 ==========================
52
52
53 Simple commit
53 Simple commit
54 -------------
54 -------------
55
55
56 $ mkdir simple-commit
56 $ mkdir simple-commit
57 $ cd simple-commit
57 $ cd simple-commit
58 $ tar xf $TESTDIR/bundles/inlined-changelog.tar
58 $ tar xf $TESTDIR/bundles/inlined-changelog.tar
59 $ cd inlined-changelog
59 $ cd inlined-changelog
60 $ hg up --quiet
60 $ hg up --quiet
61 $ hg log -GT '[{rev}] {desc}\n'
61 $ hg log -GT '[{rev}] {desc}\n'
62 @ [0] first commit
62 @ [0] first commit
63
63
64 $ echo b > b
64 $ echo b > b
65 $ hg add b
65 $ hg add b
66 $ hg commit -m "second changeset"
66 $ hg commit -m "second changeset"
67 $ hg verify
67 $ hg verify
68 checking changesets
68 checking changesets
69 checking manifests
69 checking manifests
70 crosschecking files in changesets and manifests
70 crosschecking files in changesets and manifests
71 checking files
71 checking files
72 checking dirstate
72 checking dirstate
73 checked 2 changesets with 2 changes to 2 files
73 checked 2 changesets with 2 changes to 2 files
74 $ hg log -GT '[{rev}] {desc}\n'
74 $ hg log -GT '[{rev}] {desc}\n'
75 @ [1] second changeset
75 @ [1] second changeset
76 |
76 |
77 o [0] first commit
77 o [0] first commit
78
78
79 $ cd ../..
79 $ cd ../..
80
80
81 Simple commit with a pretxn hook configured
81 Simple commit with a pretxn hook configured
82 -------------------------------------------
82 -------------------------------------------
83
83
84 Before 6.7.3 this used to delete the changelog index
84 Before 6.7.3 this used to delete the changelog index
85
85
86 $ mkdir pretxnclose-commit
86 $ mkdir pretxnclose-commit
87 $ cd pretxnclose-commit
87 $ cd pretxnclose-commit
88 $ tar xf $TESTDIR/bundles/inlined-changelog.tar
88 $ tar xf $TESTDIR/bundles/inlined-changelog.tar
89 $ cat >> inlined-changelog/.hg/hgrc <<EOF
89 $ cat >> inlined-changelog/.hg/hgrc <<EOF
90 > [hooks]
90 > [hooks]
91 > pretxnclose=hg log -r tip -T "pre-txn tip rev: {rev}\n"
91 > pretxnclose=hg log -r tip -T "pre-txn tip rev: {rev}\n"
92 > EOF
92 > EOF
93 $ cd inlined-changelog
93 $ cd inlined-changelog
94 $ hg up --quiet
94 $ hg up --quiet
95 $ hg log -GT '[{rev}] {desc}\n'
95 $ hg log -GT '[{rev}] {desc}\n'
96 @ [0] first commit
96 @ [0] first commit
97
97
98 $ echo b > b
98 $ echo b > b
99 $ hg add b
99 $ hg add b
100 $ hg commit -m "second changeset"
100 $ hg commit -m "second changeset"
101 pre-txn tip rev: 1 (missing-correct-output !)
101 pre-txn tip rev: 1
102 warning: ignoring unknown working parent 11b63e930bf2! (known-bad-output !)
103 pre-txn tip rev: 0 (known-bad-output !)
104 $ hg verify
102 $ hg verify
105 checking changesets
103 checking changesets
106 checking manifests
104 checking manifests
107 crosschecking files in changesets and manifests
105 crosschecking files in changesets and manifests
108 checking files
106 checking files
109 checking dirstate
107 checking dirstate
110 checked 2 changesets with 2 changes to 2 files
108 checked 2 changesets with 2 changes to 2 files
111 $ hg log -GT '[{rev}] {desc}\n'
109 $ hg log -GT '[{rev}] {desc}\n'
112 @ [1] second changeset
110 @ [1] second changeset
113 |
111 |
114 o [0] first commit
112 o [0] first commit
115
113
116 $ cd ../..
114 $ cd ../..
117
115
118 Test pushing to a repository with a repository revlog
116 Test pushing to a repository with a repository revlog
119 =====================================================
117 =====================================================
120
118
121 Simple local push
119 Simple local push
122 -----------------
120 -----------------
123
121
124 $ mkdir simple-local-push
122 $ mkdir simple-local-push
125 $ cd simple-local-push
123 $ cd simple-local-push
126 $ tar xf $TESTDIR/bundles/inlined-changelog.tar
124 $ tar xf $TESTDIR/bundles/inlined-changelog.tar
127 $ hg log -R inlined-changelog -T '[{rev}] {desc}\n'
125 $ hg log -R inlined-changelog -T '[{rev}] {desc}\n'
128 [0] first commit
126 [0] first commit
129
127
130 $ hg clone --pull inlined-changelog client
128 $ hg clone --pull inlined-changelog client
131 requesting all changes
129 requesting all changes
132 adding changesets
130 adding changesets
133 adding manifests
131 adding manifests
134 adding file changes
132 adding file changes
135 added 1 changesets with 1 changes to 1 files
133 added 1 changesets with 1 changes to 1 files
136 new changesets 827f11bfd362
134 new changesets 827f11bfd362
137 updating to branch default
135 updating to branch default
138 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
136 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
139 $ cd client
137 $ cd client
140 $ echo b > b
138 $ echo b > b
141 $ hg add b
139 $ hg add b
142 $ hg commit -m "second changeset"
140 $ hg commit -m "second changeset"
143 $ hg push
141 $ hg push
144 pushing to $TESTTMP/*/inlined-changelog (glob)
142 pushing to $TESTTMP/*/inlined-changelog (glob)
145 searching for changes
143 searching for changes
146 adding changesets
144 adding changesets
147 adding manifests
145 adding manifests
148 adding file changes
146 adding file changes
149 added 1 changesets with 1 changes to 1 files
147 added 1 changesets with 1 changes to 1 files
150 $ cd ..
148 $ cd ..
151
149
152 $ hg verify -R inlined-changelog
150 $ hg verify -R inlined-changelog
153 checking changesets
151 checking changesets
154 checking manifests
152 checking manifests
155 crosschecking files in changesets and manifests
153 crosschecking files in changesets and manifests
156 checking files
154 checking files
157 checking dirstate
155 checking dirstate
158 checked 2 changesets with 2 changes to 2 files
156 checked 2 changesets with 2 changes to 2 files
159 $ hg log -R inlined-changelog -T '[{rev}] {desc}\n'
157 $ hg log -R inlined-changelog -T '[{rev}] {desc}\n'
160 [1] second changeset
158 [1] second changeset
161 [0] first commit
159 [0] first commit
162 $ cd ..
160 $ cd ..
163
161
164 Simple local push with a pretxnchangegroup hook
162 Simple local push with a pretxnchangegroup hook
165 -----------------------------------------------
163 -----------------------------------------------
166
164
167 Before 6.7.3 this used to delete the server changelog
165 Before 6.7.3 this used to delete the server changelog
168
166
169 $ mkdir pretxnchangegroup-local-push
167 $ mkdir pretxnchangegroup-local-push
170 $ cd pretxnchangegroup-local-push
168 $ cd pretxnchangegroup-local-push
171 $ tar xf $TESTDIR/bundles/inlined-changelog.tar
169 $ tar xf $TESTDIR/bundles/inlined-changelog.tar
172 $ cat >> inlined-changelog/.hg/hgrc <<EOF
170 $ cat >> inlined-changelog/.hg/hgrc <<EOF
173 > [hooks]
171 > [hooks]
174 > pretxnchangegroup=hg log -r tip -T "pre-txn tip rev: {rev}\n"
172 > pretxnchangegroup=hg log -r tip -T "pre-txn tip rev: {rev}\n"
175 > EOF
173 > EOF
176 $ hg log -R inlined-changelog -T '[{rev}] {desc}\n'
174 $ hg log -R inlined-changelog -T '[{rev}] {desc}\n'
177 [0] first commit
175 [0] first commit
178
176
179 $ hg clone --pull inlined-changelog client
177 $ hg clone --pull inlined-changelog client
180 requesting all changes
178 requesting all changes
181 adding changesets
179 adding changesets
182 adding manifests
180 adding manifests
183 adding file changes
181 adding file changes
184 added 1 changesets with 1 changes to 1 files
182 added 1 changesets with 1 changes to 1 files
185 new changesets 827f11bfd362
183 new changesets 827f11bfd362
186 updating to branch default
184 updating to branch default
187 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
185 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
188 $ cd client
186 $ cd client
189 $ echo b > b
187 $ echo b > b
190 $ hg add b
188 $ hg add b
191 $ hg commit -m "second changeset"
189 $ hg commit -m "second changeset"
192 $ hg push
190 $ hg push
193 pushing to $TESTTMP/*/inlined-changelog (glob)
191 pushing to $TESTTMP/*/inlined-changelog (glob)
194 searching for changes
192 searching for changes
195 adding changesets
193 adding changesets
196 adding manifests
194 adding manifests
197 adding file changes
195 adding file changes
198 pre-txn tip rev: 1 (missing-correct-output !)
196 pre-txn tip rev: 1
199 pre-txn tip rev: 0 (known-bad-output !)
200 added 1 changesets with 1 changes to 1 files
197 added 1 changesets with 1 changes to 1 files
201 $ cd ..
198 $ cd ..
202
199
203 $ hg verify -R inlined-changelog
200 $ hg verify -R inlined-changelog
204 checking changesets
201 checking changesets
205 checking manifests
202 checking manifests
206 crosschecking files in changesets and manifests
203 crosschecking files in changesets and manifests
207 checking files
204 checking files
208 checking dirstate
205 checking dirstate
209 checked 2 changesets with 2 changes to 2 files
206 checked 2 changesets with 2 changes to 2 files
210 $ hg log -R inlined-changelog -T '[{rev}] {desc}\n'
207 $ hg log -R inlined-changelog -T '[{rev}] {desc}\n'
211 [1] second changeset
208 [1] second changeset
212 [0] first commit
209 [0] first commit
213 $ cd ..
210 $ cd ..
214
211
215 Simple ssh push
212 Simple ssh push
216 -----------------
213 -----------------
217
214
218 $ mkdir simple-ssh-push
215 $ mkdir simple-ssh-push
219 $ cd simple-ssh-push
216 $ cd simple-ssh-push
220 $ tar xf $TESTDIR/bundles/inlined-changelog.tar
217 $ tar xf $TESTDIR/bundles/inlined-changelog.tar
221 $ hg log -R inlined-changelog -T '[{rev}] {desc}\n'
218 $ hg log -R inlined-changelog -T '[{rev}] {desc}\n'
222 [0] first commit
219 [0] first commit
223
220
224 $ hg clone ssh://user@dummy/"`pwd`"/inlined-changelog client
221 $ hg clone ssh://user@dummy/"`pwd`"/inlined-changelog client
225 requesting all changes
222 requesting all changes
226 adding changesets
223 adding changesets
227 adding manifests
224 adding manifests
228 adding file changes
225 adding file changes
229 added 1 changesets with 1 changes to 1 files
226 added 1 changesets with 1 changes to 1 files
230 new changesets 827f11bfd362
227 new changesets 827f11bfd362
231 updating to branch default
228 updating to branch default
232 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
229 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
233 $ cd client
230 $ cd client
234 $ echo b > b
231 $ echo b > b
235 $ hg add b
232 $ hg add b
236 $ hg commit -m "second changeset"
233 $ hg commit -m "second changeset"
237 $ hg push
234 $ hg push
238 pushing to ssh://user@dummy/$TESTTMP/simple-ssh-push/inlined-changelog
235 pushing to ssh://user@dummy/$TESTTMP/simple-ssh-push/inlined-changelog
239 searching for changes
236 searching for changes
240 remote: adding changesets
237 remote: adding changesets
241 remote: adding manifests
238 remote: adding manifests
242 remote: adding file changes
239 remote: adding file changes
243 remote: added 1 changesets with 1 changes to 1 files
240 remote: added 1 changesets with 1 changes to 1 files
244 $ cd ..
241 $ cd ..
245
242
246 $ hg verify -R inlined-changelog
243 $ hg verify -R inlined-changelog
247 checking changesets
244 checking changesets
248 checking manifests
245 checking manifests
249 crosschecking files in changesets and manifests
246 crosschecking files in changesets and manifests
250 checking files
247 checking files
251 checking dirstate
248 checking dirstate
252 checked 2 changesets with 2 changes to 2 files
249 checked 2 changesets with 2 changes to 2 files
253 $ hg log -R inlined-changelog -T '[{rev}] {desc}\n'
250 $ hg log -R inlined-changelog -T '[{rev}] {desc}\n'
254 [1] second changeset
251 [1] second changeset
255 [0] first commit
252 [0] first commit
256 $ cd ..
253 $ cd ..
257
254
258 Simple ssh push with a pretxnchangegroup hook
255 Simple ssh push with a pretxnchangegroup hook
259 -----------------------------------------------
256 -----------------------------------------------
260
257
261 Before 6.7.3 this used to delete the server changelog
258 Before 6.7.3 this used to delete the server changelog
262
259
263 $ mkdir pretxnchangegroup-ssh-push
260 $ mkdir pretxnchangegroup-ssh-push
264 $ cd pretxnchangegroup-ssh-push
261 $ cd pretxnchangegroup-ssh-push
265 $ tar xf $TESTDIR/bundles/inlined-changelog.tar
262 $ tar xf $TESTDIR/bundles/inlined-changelog.tar
266 $ cat >> inlined-changelog/.hg/hgrc <<EOF
263 $ cat >> inlined-changelog/.hg/hgrc <<EOF
267 > [hooks]
264 > [hooks]
268 > pretxnchangegroup=hg log -r tip -T "pre-txn tip rev: {rev}\n"
265 > pretxnchangegroup=hg log -r tip -T "pre-txn tip rev: {rev}\n"
269 > EOF
266 > EOF
270 $ hg log -R inlined-changelog -T '[{rev}] {desc}\n'
267 $ hg log -R inlined-changelog -T '[{rev}] {desc}\n'
271 [0] first commit
268 [0] first commit
272
269
273 $ hg clone ssh://user@dummy/"`pwd`"/inlined-changelog client
270 $ hg clone ssh://user@dummy/"`pwd`"/inlined-changelog client
274 requesting all changes
271 requesting all changes
275 adding changesets
272 adding changesets
276 adding manifests
273 adding manifests
277 adding file changes
274 adding file changes
278 added 1 changesets with 1 changes to 1 files
275 added 1 changesets with 1 changes to 1 files
279 new changesets 827f11bfd362
276 new changesets 827f11bfd362
280 updating to branch default
277 updating to branch default
281 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
278 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
282 $ cd client
279 $ cd client
283 $ echo b > b
280 $ echo b > b
284 $ hg add b
281 $ hg add b
285 $ hg commit -m "second changeset"
282 $ hg commit -m "second changeset"
286 $ hg push
283 $ hg push
287 pushing to ssh://user@dummy/$TESTTMP/pretxnchangegroup-ssh-push/inlined-changelog
284 pushing to ssh://user@dummy/$TESTTMP/pretxnchangegroup-ssh-push/inlined-changelog
288 searching for changes
285 searching for changes
289 remote: adding changesets
286 remote: adding changesets
290 remote: adding manifests
287 remote: adding manifests
291 remote: adding file changes
288 remote: adding file changes
292 remote: pre-txn tip rev: 1 (missing-correct-output !)
289 remote: pre-txn tip rev: 1
293 remote: pre-txn tip rev: 0 (known-bad-output !)
294 remote: added 1 changesets with 1 changes to 1 files
290 remote: added 1 changesets with 1 changes to 1 files
295 $ cd ..
291 $ cd ..
296
292
297 $ hg verify -R inlined-changelog
293 $ hg verify -R inlined-changelog
298 checking changesets
294 checking changesets
299 checking manifests
295 checking manifests
300 crosschecking files in changesets and manifests
296 crosschecking files in changesets and manifests
301 checking files
297 checking files
302 checking dirstate
298 checking dirstate
303 checked 2 changesets with 2 changes to 2 files
299 checked 2 changesets with 2 changes to 2 files
304 $ hg log -R inlined-changelog -T '[{rev}] {desc}\n'
300 $ hg log -R inlined-changelog -T '[{rev}] {desc}\n'
305 [1] second changeset
301 [1] second changeset
306 [0] first commit
302 [0] first commit
307 $ cd ..
303 $ cd ..
General Comments 0
You need to be logged in to leave comments. Login now