##// END OF EJS Templates
revlog: move the splitting-inline-revlog logic inside the inner object...
marmoute -
r51983:de6a8cc2 default
parent child Browse files
Show More
@@ -1,3917 +1,3929 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of inline data embedded into a revlog
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class _Config:
244 class _Config:
245 def copy(self):
245 def copy(self):
246 return self.__class__(**self.__dict__)
246 return self.__class__(**self.__dict__)
247
247
248
248
249 @attr.s()
249 @attr.s()
250 class FeatureConfig(_Config):
250 class FeatureConfig(_Config):
251 """Hold configuration values about the available revlog features"""
251 """Hold configuration values about the available revlog features"""
252
252
253 # the default compression engine
253 # the default compression engine
254 compression_engine = attr.ib(default=b'zlib')
254 compression_engine = attr.ib(default=b'zlib')
255 # compression engines options
255 # compression engines options
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257
257
258 # can we use censor on this revlog
258 # can we use censor on this revlog
259 censorable = attr.ib(default=False)
259 censorable = attr.ib(default=False)
260 # does this revlog use the "side data" feature
260 # does this revlog use the "side data" feature
261 has_side_data = attr.ib(default=False)
261 has_side_data = attr.ib(default=False)
262 # might remove rank configuration once the computation has no impact
262 # might remove rank configuration once the computation has no impact
263 compute_rank = attr.ib(default=False)
263 compute_rank = attr.ib(default=False)
264 # parent order is supposed to be semantically irrelevant, so we
264 # parent order is supposed to be semantically irrelevant, so we
265 # normally resort parents to ensure that the first parent is non-null,
265 # normally resort parents to ensure that the first parent is non-null,
266 # if there is a non-null parent at all.
266 # if there is a non-null parent at all.
267 # filelog abuses the parent order as flag to mark some instances of
267 # filelog abuses the parent order as flag to mark some instances of
268 # meta-encoded files, so allow it to disable this behavior.
268 # meta-encoded files, so allow it to disable this behavior.
269 canonical_parent_order = attr.ib(default=False)
269 canonical_parent_order = attr.ib(default=False)
270 # can ellipsis commit be used
270 # can ellipsis commit be used
271 enable_ellipsis = attr.ib(default=False)
271 enable_ellipsis = attr.ib(default=False)
272
272
273 def copy(self):
273 def copy(self):
274 new = super().copy()
274 new = super().copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
276 return new
276 return new
277
277
278
278
279 @attr.s()
279 @attr.s()
280 class DataConfig(_Config):
280 class DataConfig(_Config):
281 """Hold configuration value about how the revlog data are read"""
281 """Hold configuration value about how the revlog data are read"""
282
282
283 # should we try to open the "pending" version of the revlog
283 # should we try to open the "pending" version of the revlog
284 try_pending = attr.ib(default=False)
284 try_pending = attr.ib(default=False)
285 # should we try to open the "splitted" version of the revlog
285 # should we try to open the "splitted" version of the revlog
286 try_split = attr.ib(default=False)
286 try_split = attr.ib(default=False)
287 # When True, indexfile should be opened with checkambig=True at writing,
287 # When True, indexfile should be opened with checkambig=True at writing,
288 # to avoid file stat ambiguity.
288 # to avoid file stat ambiguity.
289 check_ambig = attr.ib(default=False)
289 check_ambig = attr.ib(default=False)
290
290
291 # If true, use mmap instead of reading to deal with large index
291 # If true, use mmap instead of reading to deal with large index
292 mmap_large_index = attr.ib(default=False)
292 mmap_large_index = attr.ib(default=False)
293 # how much data is large
293 # how much data is large
294 mmap_index_threshold = attr.ib(default=None)
294 mmap_index_threshold = attr.ib(default=None)
295 # How much data to read and cache into the raw revlog data cache.
295 # How much data to read and cache into the raw revlog data cache.
296 chunk_cache_size = attr.ib(default=65536)
296 chunk_cache_size = attr.ib(default=65536)
297
297
298 # Allow sparse reading of the revlog data
298 # Allow sparse reading of the revlog data
299 with_sparse_read = attr.ib(default=False)
299 with_sparse_read = attr.ib(default=False)
300 # minimal density of a sparse read chunk
300 # minimal density of a sparse read chunk
301 sr_density_threshold = attr.ib(default=0.50)
301 sr_density_threshold = attr.ib(default=0.50)
302 # minimal size of data we skip when performing sparse read
302 # minimal size of data we skip when performing sparse read
303 sr_min_gap_size = attr.ib(default=262144)
303 sr_min_gap_size = attr.ib(default=262144)
304
304
305 # are delta encoded against arbitrary bases.
305 # are delta encoded against arbitrary bases.
306 generaldelta = attr.ib(default=False)
306 generaldelta = attr.ib(default=False)
307
307
308
308
309 @attr.s()
309 @attr.s()
310 class DeltaConfig(_Config):
310 class DeltaConfig(_Config):
311 """Hold configuration value about how new delta are computed
311 """Hold configuration value about how new delta are computed
312
312
313 Some attributes are duplicated from DataConfig to help havign each object
313 Some attributes are duplicated from DataConfig to help havign each object
314 self contained.
314 self contained.
315 """
315 """
316
316
317 # can delta be encoded against arbitrary bases.
317 # can delta be encoded against arbitrary bases.
318 general_delta = attr.ib(default=False)
318 general_delta = attr.ib(default=False)
319 # Allow sparse writing of the revlog data
319 # Allow sparse writing of the revlog data
320 sparse_revlog = attr.ib(default=False)
320 sparse_revlog = attr.ib(default=False)
321 # maximum length of a delta chain
321 # maximum length of a delta chain
322 max_chain_len = attr.ib(default=None)
322 max_chain_len = attr.ib(default=None)
323 # Maximum distance between delta chain base start and end
323 # Maximum distance between delta chain base start and end
324 max_deltachain_span = attr.ib(default=-1)
324 max_deltachain_span = attr.ib(default=-1)
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 # compression for the data content.
326 # compression for the data content.
327 upper_bound_comp = attr.ib(default=None)
327 upper_bound_comp = attr.ib(default=None)
328 # Should we try a delta against both parent
328 # Should we try a delta against both parent
329 delta_both_parents = attr.ib(default=True)
329 delta_both_parents = attr.ib(default=True)
330 # Test delta base candidate group by chunk of this maximal size.
330 # Test delta base candidate group by chunk of this maximal size.
331 candidate_group_chunk_size = attr.ib(default=0)
331 candidate_group_chunk_size = attr.ib(default=0)
332 # Should we display debug information about delta computation
332 # Should we display debug information about delta computation
333 debug_delta = attr.ib(default=False)
333 debug_delta = attr.ib(default=False)
334 # trust incoming delta by default
334 # trust incoming delta by default
335 lazy_delta = attr.ib(default=True)
335 lazy_delta = attr.ib(default=True)
336 # trust the base of incoming delta by default
336 # trust the base of incoming delta by default
337 lazy_delta_base = attr.ib(default=False)
337 lazy_delta_base = attr.ib(default=False)
338
338
339
339
340 class _InnerRevlog:
340 class _InnerRevlog:
341 """An inner layer of the revlog object
341 """An inner layer of the revlog object
342
342
343 That layer exist to be able to delegate some operation to Rust, its
343 That layer exist to be able to delegate some operation to Rust, its
344 boundaries are arbitrary and based on what we can delegate to Rust.
344 boundaries are arbitrary and based on what we can delegate to Rust.
345 """
345 """
346
346
347 def __init__(
347 def __init__(
348 self,
348 self,
349 opener,
349 opener,
350 index,
350 index,
351 index_file,
351 index_file,
352 data_file,
352 data_file,
353 sidedata_file,
353 sidedata_file,
354 inline,
354 inline,
355 data_config,
355 data_config,
356 chunk_cache,
356 chunk_cache,
357 ):
357 ):
358 self.opener = opener
358 self.opener = opener
359 self.index = index
359 self.index = index
360
360
361 self.__index_file = index_file
361 self.__index_file = index_file
362 self.data_file = data_file
362 self.data_file = data_file
363 self.sidedata_file = sidedata_file
363 self.sidedata_file = sidedata_file
364 self.inline = inline
364 self.inline = inline
365 self.data_config = data_config
365 self.data_config = data_config
366
366
367 # index
367 # index
368
368
369 # 3-tuple of file handles being used for active writing.
369 # 3-tuple of file handles being used for active writing.
370 self._writinghandles = None
370 self._writinghandles = None
371
371
372 self._segmentfile = randomaccessfile.randomaccessfile(
372 self._segmentfile = randomaccessfile.randomaccessfile(
373 self.opener,
373 self.opener,
374 (self.index_file if self.inline else self.data_file),
374 (self.index_file if self.inline else self.data_file),
375 self.data_config.chunk_cache_size,
375 self.data_config.chunk_cache_size,
376 chunk_cache,
376 chunk_cache,
377 )
377 )
378 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
378 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
379 self.opener,
379 self.opener,
380 self.sidedata_file,
380 self.sidedata_file,
381 self.data_config.chunk_cache_size,
381 self.data_config.chunk_cache_size,
382 )
382 )
383
383
384 @property
384 @property
385 def index_file(self):
385 def index_file(self):
386 return self.__index_file
386 return self.__index_file
387
387
388 @index_file.setter
388 @index_file.setter
389 def index_file(self, new_index_file):
389 def index_file(self, new_index_file):
390 self.__index_file = new_index_file
390 self.__index_file = new_index_file
391 if self.inline:
391 if self.inline:
392 self._segmentfile.filename = new_index_file
392 self._segmentfile.filename = new_index_file
393
393
394 # Derived from index values.
394 # Derived from index values.
395
395
396 def start(self, rev):
396 def start(self, rev):
397 """the offset of the data chunk for this revision"""
397 """the offset of the data chunk for this revision"""
398 return int(self.index[rev][0] >> 16)
398 return int(self.index[rev][0] >> 16)
399
399
400 def length(self, rev):
400 def length(self, rev):
401 """the length of the data chunk for this revision"""
401 """the length of the data chunk for this revision"""
402 return self.index[rev][1]
402 return self.index[rev][1]
403
403
404 def end(self, rev):
404 def end(self, rev):
405 """the end of the data chunk for this revision"""
405 """the end of the data chunk for this revision"""
406 return self.start(rev) + self.length(rev)
406 return self.start(rev) + self.length(rev)
407
407
408 @contextlib.contextmanager
408 @contextlib.contextmanager
409 def reading(self):
409 def reading(self):
410 """Context manager that keeps data and sidedata files open for reading"""
410 """Context manager that keeps data and sidedata files open for reading"""
411 if len(self.index) == 0:
411 if len(self.index) == 0:
412 yield # nothing to be read
412 yield # nothing to be read
413 else:
413 else:
414 with self._segmentfile.reading():
414 with self._segmentfile.reading():
415 with self._segmentfile_sidedata.reading():
415 with self._segmentfile_sidedata.reading():
416 yield
416 yield
417
417
418 @property
418 @property
419 def is_writing(self):
419 def is_writing(self):
420 """True is a writing context is open"""
420 """True is a writing context is open"""
421 return self._writinghandles is not None
421 return self._writinghandles is not None
422
422
423 @contextlib.contextmanager
423 @contextlib.contextmanager
424 def writing(self, transaction, data_end=None, sidedata_end=None):
424 def writing(self, transaction, data_end=None, sidedata_end=None):
425 """Open the revlog files for writing
425 """Open the revlog files for writing
426
426
427 Add content to a revlog should be done within such context.
427 Add content to a revlog should be done within such context.
428 """
428 """
429 if self.is_writing:
429 if self.is_writing:
430 yield
430 yield
431 else:
431 else:
432 ifh = dfh = sdfh = None
432 ifh = dfh = sdfh = None
433 try:
433 try:
434 r = len(self.index)
434 r = len(self.index)
435 # opening the data file.
435 # opening the data file.
436 dsize = 0
436 dsize = 0
437 if r:
437 if r:
438 dsize = self.end(r - 1)
438 dsize = self.end(r - 1)
439 dfh = None
439 dfh = None
440 if not self.inline:
440 if not self.inline:
441 try:
441 try:
442 dfh = self.opener(self.data_file, mode=b"r+")
442 dfh = self.opener(self.data_file, mode=b"r+")
443 if data_end is None:
443 if data_end is None:
444 dfh.seek(0, os.SEEK_END)
444 dfh.seek(0, os.SEEK_END)
445 else:
445 else:
446 dfh.seek(data_end, os.SEEK_SET)
446 dfh.seek(data_end, os.SEEK_SET)
447 except FileNotFoundError:
447 except FileNotFoundError:
448 dfh = self.opener(self.data_file, mode=b"w+")
448 dfh = self.opener(self.data_file, mode=b"w+")
449 transaction.add(self.data_file, dsize)
449 transaction.add(self.data_file, dsize)
450 if self.sidedata_file is not None:
450 if self.sidedata_file is not None:
451 assert sidedata_end is not None
451 assert sidedata_end is not None
452 # revlog-v2 does not inline, help Pytype
452 # revlog-v2 does not inline, help Pytype
453 assert dfh is not None
453 assert dfh is not None
454 try:
454 try:
455 sdfh = self.opener(self.sidedata_file, mode=b"r+")
455 sdfh = self.opener(self.sidedata_file, mode=b"r+")
456 dfh.seek(sidedata_end, os.SEEK_SET)
456 dfh.seek(sidedata_end, os.SEEK_SET)
457 except FileNotFoundError:
457 except FileNotFoundError:
458 sdfh = self.opener(self.sidedata_file, mode=b"w+")
458 sdfh = self.opener(self.sidedata_file, mode=b"w+")
459 transaction.add(self.sidedata_file, sidedata_end)
459 transaction.add(self.sidedata_file, sidedata_end)
460
460
461 # opening the index file.
461 # opening the index file.
462 isize = r * self.index.entry_size
462 isize = r * self.index.entry_size
463 ifh = self.__index_write_fp()
463 ifh = self.__index_write_fp()
464 if self.inline:
464 if self.inline:
465 transaction.add(self.index_file, dsize + isize)
465 transaction.add(self.index_file, dsize + isize)
466 else:
466 else:
467 transaction.add(self.index_file, isize)
467 transaction.add(self.index_file, isize)
468 # exposing all file handle for writing.
468 # exposing all file handle for writing.
469 self._writinghandles = (ifh, dfh, sdfh)
469 self._writinghandles = (ifh, dfh, sdfh)
470 self._segmentfile.writing_handle = ifh if self.inline else dfh
470 self._segmentfile.writing_handle = ifh if self.inline else dfh
471 self._segmentfile_sidedata.writing_handle = sdfh
471 self._segmentfile_sidedata.writing_handle = sdfh
472 yield
472 yield
473 finally:
473 finally:
474 self._writinghandles = None
474 self._writinghandles = None
475 self._segmentfile.writing_handle = None
475 self._segmentfile.writing_handle = None
476 self._segmentfile_sidedata.writing_handle = None
476 self._segmentfile_sidedata.writing_handle = None
477 if dfh is not None:
477 if dfh is not None:
478 dfh.close()
478 dfh.close()
479 if sdfh is not None:
479 if sdfh is not None:
480 sdfh.close()
480 sdfh.close()
481 # closing the index file last to avoid exposing referent to
481 # closing the index file last to avoid exposing referent to
482 # potential unflushed data content.
482 # potential unflushed data content.
483 if ifh is not None:
483 if ifh is not None:
484 ifh.close()
484 ifh.close()
485
485
486 def __index_write_fp(self, index_end=None):
486 def __index_write_fp(self, index_end=None):
487 """internal method to open the index file for writing
487 """internal method to open the index file for writing
488
488
489 You should not use this directly and use `_writing` instead
489 You should not use this directly and use `_writing` instead
490 """
490 """
491 try:
491 try:
492 f = self.opener(
492 f = self.opener(
493 self.index_file,
493 self.index_file,
494 mode=b"r+",
494 mode=b"r+",
495 checkambig=self.data_config.check_ambig,
495 checkambig=self.data_config.check_ambig,
496 )
496 )
497 if index_end is None:
497 if index_end is None:
498 f.seek(0, os.SEEK_END)
498 f.seek(0, os.SEEK_END)
499 else:
499 else:
500 f.seek(index_end, os.SEEK_SET)
500 f.seek(index_end, os.SEEK_SET)
501 return f
501 return f
502 except FileNotFoundError:
502 except FileNotFoundError:
503 return self.opener(
503 return self.opener(
504 self.index_file,
504 self.index_file,
505 mode=b"w+",
505 mode=b"w+",
506 checkambig=self.data_config.check_ambig,
506 checkambig=self.data_config.check_ambig,
507 )
507 )
508
508
509 def __index_new_fp(self):
509 def __index_new_fp(self):
510 """internal method to create a new index file for writing
510 """internal method to create a new index file for writing
511
511
512 You should not use this unless you are upgrading from inline revlog
512 You should not use this unless you are upgrading from inline revlog
513 """
513 """
514 return self.opener(
514 return self.opener(
515 self.index_file,
515 self.index_file,
516 mode=b"w",
516 mode=b"w",
517 checkambig=self.data_config.check_ambig,
517 checkambig=self.data_config.check_ambig,
518 atomictemp=True,
518 atomictemp=True,
519 )
519 )
520
520
521 def split_inline(self, tr, header, new_index_file_path=None):
522 """split the data of an inline revlog into an index and a data file"""
523 existing_handles = False
524 if self._writinghandles is not None:
525 existing_handles = True
526 fp = self._writinghandles[0]
527 fp.flush()
528 fp.close()
529 # We can't use the cached file handle after close(). So prevent
530 # its usage.
531 self._writinghandles = None
532 self._segmentfile.writing_handle = None
533 # No need to deal with sidedata writing handle as it is only
534 # relevant with revlog-v2 which is never inline, not reaching
535 # this code
536
537 new_dfh = self.opener(self.data_file, mode=b"w+")
538 new_dfh.truncate(0) # drop any potentially existing data
539 try:
540 with self.reading():
541 for r in range(len(self.index)):
542 new_dfh.write(self.get_segment_for_revs(r, r)[1])
543 new_dfh.flush()
544
545 if new_index_file_path is not None:
546 self.index_file = new_index_file_path
547 with self.__index_new_fp() as fp:
548 self.inline = False
549 for i in range(len(self.index)):
550 e = self.index.entry_binary(i)
551 if i == 0:
552 packed_header = self.index.pack_header(header)
553 e = packed_header + e
554 fp.write(e)
555
556 # If we don't use side-write, the temp file replace the real
557 # index when we exit the context manager
558
559 self._segmentfile = randomaccessfile.randomaccessfile(
560 self.opener,
561 self.data_file,
562 self.data_config.chunk_cache_size,
563 )
564
565 if existing_handles:
566 # switched from inline to conventional reopen the index
567 ifh = self.__index_write_fp()
568 self._writinghandles = (ifh, new_dfh, None)
569 self._segmentfile.writing_handle = new_dfh
570 new_dfh = None
571 # No need to deal with sidedata writing handle as it is only
572 # relevant with revlog-v2 which is never inline, not reaching
573 # this code
574 finally:
575 if new_dfh is not None:
576 new_dfh.close()
577 return self.index_file
578
521 def get_segment_for_revs(self, startrev, endrev):
579 def get_segment_for_revs(self, startrev, endrev):
522 """Obtain a segment of raw data corresponding to a range of revisions.
580 """Obtain a segment of raw data corresponding to a range of revisions.
523
581
524 Accepts the start and end revisions and an optional already-open
582 Accepts the start and end revisions and an optional already-open
525 file handle to be used for reading. If the file handle is read, its
583 file handle to be used for reading. If the file handle is read, its
526 seek position will not be preserved.
584 seek position will not be preserved.
527
585
528 Requests for data may be satisfied by a cache.
586 Requests for data may be satisfied by a cache.
529
587
530 Returns a 2-tuple of (offset, data) for the requested range of
588 Returns a 2-tuple of (offset, data) for the requested range of
531 revisions. Offset is the integer offset from the beginning of the
589 revisions. Offset is the integer offset from the beginning of the
532 revlog and data is a str or buffer of the raw byte data.
590 revlog and data is a str or buffer of the raw byte data.
533
591
534 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
592 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
535 to determine where each revision's data begins and ends.
593 to determine where each revision's data begins and ends.
536
594
537 API: we should consider making this a private part of the InnerRevlog
595 API: we should consider making this a private part of the InnerRevlog
538 at some point.
596 at some point.
539 """
597 """
540 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
598 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
541 # (functions are expensive).
599 # (functions are expensive).
542 index = self.index
600 index = self.index
543 istart = index[startrev]
601 istart = index[startrev]
544 start = int(istart[0] >> 16)
602 start = int(istart[0] >> 16)
545 if startrev == endrev:
603 if startrev == endrev:
546 end = start + istart[1]
604 end = start + istart[1]
547 else:
605 else:
548 iend = index[endrev]
606 iend = index[endrev]
549 end = int(iend[0] >> 16) + iend[1]
607 end = int(iend[0] >> 16) + iend[1]
550
608
551 if self.inline:
609 if self.inline:
552 start += (startrev + 1) * self.index.entry_size
610 start += (startrev + 1) * self.index.entry_size
553 end += (endrev + 1) * self.index.entry_size
611 end += (endrev + 1) * self.index.entry_size
554 length = end - start
612 length = end - start
555
613
556 return start, self._segmentfile.read_chunk(start, length)
614 return start, self._segmentfile.read_chunk(start, length)
557
615
558
616
559 class revlog:
617 class revlog:
560 """
618 """
561 the underlying revision storage object
619 the underlying revision storage object
562
620
563 A revlog consists of two parts, an index and the revision data.
621 A revlog consists of two parts, an index and the revision data.
564
622
565 The index is a file with a fixed record size containing
623 The index is a file with a fixed record size containing
566 information on each revision, including its nodeid (hash), the
624 information on each revision, including its nodeid (hash), the
567 nodeids of its parents, the position and offset of its data within
625 nodeids of its parents, the position and offset of its data within
568 the data file, and the revision it's based on. Finally, each entry
626 the data file, and the revision it's based on. Finally, each entry
569 contains a linkrev entry that can serve as a pointer to external
627 contains a linkrev entry that can serve as a pointer to external
570 data.
628 data.
571
629
572 The revision data itself is a linear collection of data chunks.
630 The revision data itself is a linear collection of data chunks.
573 Each chunk represents a revision and is usually represented as a
631 Each chunk represents a revision and is usually represented as a
574 delta against the previous chunk. To bound lookup time, runs of
632 delta against the previous chunk. To bound lookup time, runs of
575 deltas are limited to about 2 times the length of the original
633 deltas are limited to about 2 times the length of the original
576 version data. This makes retrieval of a version proportional to
634 version data. This makes retrieval of a version proportional to
577 its size, or O(1) relative to the number of revisions.
635 its size, or O(1) relative to the number of revisions.
578
636
579 Both pieces of the revlog are written to in an append-only
637 Both pieces of the revlog are written to in an append-only
580 fashion, which means we never need to rewrite a file to insert or
638 fashion, which means we never need to rewrite a file to insert or
581 remove data, and can use some simple techniques to avoid the need
639 remove data, and can use some simple techniques to avoid the need
582 for locking while reading.
640 for locking while reading.
583
641
584 If checkambig, indexfile is opened with checkambig=True at
642 If checkambig, indexfile is opened with checkambig=True at
585 writing, to avoid file stat ambiguity.
643 writing, to avoid file stat ambiguity.
586
644
587 If mmaplargeindex is True, and an mmapindexthreshold is set, the
645 If mmaplargeindex is True, and an mmapindexthreshold is set, the
588 index will be mmapped rather than read if it is larger than the
646 index will be mmapped rather than read if it is larger than the
589 configured threshold.
647 configured threshold.
590
648
591 If censorable is True, the revlog can have censored revisions.
649 If censorable is True, the revlog can have censored revisions.
592
650
593 If `upperboundcomp` is not None, this is the expected maximal gain from
651 If `upperboundcomp` is not None, this is the expected maximal gain from
594 compression for the data content.
652 compression for the data content.
595
653
596 `concurrencychecker` is an optional function that receives 3 arguments: a
654 `concurrencychecker` is an optional function that receives 3 arguments: a
597 file handle, a filename, and an expected position. It should check whether
655 file handle, a filename, and an expected position. It should check whether
598 the current position in the file handle is valid, and log/warn/fail (by
656 the current position in the file handle is valid, and log/warn/fail (by
599 raising).
657 raising).
600
658
601 See mercurial/revlogutils/contants.py for details about the content of an
659 See mercurial/revlogutils/contants.py for details about the content of an
602 index entry.
660 index entry.
603 """
661 """
604
662
605 _flagserrorclass = error.RevlogError
663 _flagserrorclass = error.RevlogError
606
664
607 @staticmethod
665 @staticmethod
608 def is_inline_index(header_bytes):
666 def is_inline_index(header_bytes):
609 """Determine if a revlog is inline from the initial bytes of the index"""
667 """Determine if a revlog is inline from the initial bytes of the index"""
610 header = INDEX_HEADER.unpack(header_bytes)[0]
668 header = INDEX_HEADER.unpack(header_bytes)[0]
611
669
612 _format_flags = header & ~0xFFFF
670 _format_flags = header & ~0xFFFF
613 _format_version = header & 0xFFFF
671 _format_version = header & 0xFFFF
614
672
615 features = FEATURES_BY_VERSION[_format_version]
673 features = FEATURES_BY_VERSION[_format_version]
616 return features[b'inline'](_format_flags)
674 return features[b'inline'](_format_flags)
617
675
618 def __init__(
676 def __init__(
619 self,
677 self,
620 opener,
678 opener,
621 target,
679 target,
622 radix,
680 radix,
623 postfix=None, # only exist for `tmpcensored` now
681 postfix=None, # only exist for `tmpcensored` now
624 checkambig=False,
682 checkambig=False,
625 mmaplargeindex=False,
683 mmaplargeindex=False,
626 censorable=False,
684 censorable=False,
627 upperboundcomp=None,
685 upperboundcomp=None,
628 persistentnodemap=False,
686 persistentnodemap=False,
629 concurrencychecker=None,
687 concurrencychecker=None,
630 trypending=False,
688 trypending=False,
631 try_split=False,
689 try_split=False,
632 canonical_parent_order=True,
690 canonical_parent_order=True,
633 ):
691 ):
634 """
692 """
635 create a revlog object
693 create a revlog object
636
694
637 opener is a function that abstracts the file opening operation
695 opener is a function that abstracts the file opening operation
638 and can be used to implement COW semantics or the like.
696 and can be used to implement COW semantics or the like.
639
697
640 `target`: a (KIND, ID) tuple that identify the content stored in
698 `target`: a (KIND, ID) tuple that identify the content stored in
641 this revlog. It help the rest of the code to understand what the revlog
699 this revlog. It help the rest of the code to understand what the revlog
642 is about without having to resort to heuristic and index filename
700 is about without having to resort to heuristic and index filename
643 analysis. Note: that this must be reliably be set by normal code, but
701 analysis. Note: that this must be reliably be set by normal code, but
644 that test, debug, or performance measurement code might not set this to
702 that test, debug, or performance measurement code might not set this to
645 accurate value.
703 accurate value.
646 """
704 """
647
705
648 self.radix = radix
706 self.radix = radix
649
707
650 self._docket_file = None
708 self._docket_file = None
651 self._indexfile = None
709 self._indexfile = None
652 self._datafile = None
710 self._datafile = None
653 self._sidedatafile = None
711 self._sidedatafile = None
654 self._nodemap_file = None
712 self._nodemap_file = None
655 self.postfix = postfix
713 self.postfix = postfix
656 self._trypending = trypending
714 self._trypending = trypending
657 self._try_split = try_split
715 self._try_split = try_split
658 self.opener = opener
716 self.opener = opener
659 if persistentnodemap:
717 if persistentnodemap:
660 self._nodemap_file = nodemaputil.get_nodemap_file(self)
718 self._nodemap_file = nodemaputil.get_nodemap_file(self)
661
719
662 assert target[0] in ALL_KINDS
720 assert target[0] in ALL_KINDS
663 assert len(target) == 2
721 assert len(target) == 2
664 self.target = target
722 self.target = target
665 if b'feature-config' in self.opener.options:
723 if b'feature-config' in self.opener.options:
666 self.feature_config = self.opener.options[b'feature-config'].copy()
724 self.feature_config = self.opener.options[b'feature-config'].copy()
667 else:
725 else:
668 self.feature_config = FeatureConfig()
726 self.feature_config = FeatureConfig()
669 self.feature_config.censorable = censorable
727 self.feature_config.censorable = censorable
670 self.feature_config.canonical_parent_order = canonical_parent_order
728 self.feature_config.canonical_parent_order = canonical_parent_order
671 if b'data-config' in self.opener.options:
729 if b'data-config' in self.opener.options:
672 self.data_config = self.opener.options[b'data-config'].copy()
730 self.data_config = self.opener.options[b'data-config'].copy()
673 else:
731 else:
674 self.data_config = DataConfig()
732 self.data_config = DataConfig()
675 self.data_config.check_ambig = checkambig
733 self.data_config.check_ambig = checkambig
676 self.data_config.mmap_large_index = mmaplargeindex
734 self.data_config.mmap_large_index = mmaplargeindex
677 if b'delta-config' in self.opener.options:
735 if b'delta-config' in self.opener.options:
678 self.delta_config = self.opener.options[b'delta-config'].copy()
736 self.delta_config = self.opener.options[b'delta-config'].copy()
679 else:
737 else:
680 self.delta_config = DeltaConfig()
738 self.delta_config = DeltaConfig()
681 self.delta_config.upper_bound_comp = upperboundcomp
739 self.delta_config.upper_bound_comp = upperboundcomp
682
740
683 # 3-tuple of (node, rev, text) for a raw revision.
741 # 3-tuple of (node, rev, text) for a raw revision.
684 self._revisioncache = None
742 self._revisioncache = None
685 # Maps rev to chain base rev.
743 # Maps rev to chain base rev.
686 self._chainbasecache = util.lrucachedict(100)
744 self._chainbasecache = util.lrucachedict(100)
687
745
688 self.index = None
746 self.index = None
689 self._docket = None
747 self._docket = None
690 self._nodemap_docket = None
748 self._nodemap_docket = None
691 # Mapping of partial identifiers to full nodes.
749 # Mapping of partial identifiers to full nodes.
692 self._pcache = {}
750 self._pcache = {}
693
751
694 # other optionnals features
752 # other optionnals features
695
753
696 # Make copy of flag processors so each revlog instance can support
754 # Make copy of flag processors so each revlog instance can support
697 # custom flags.
755 # custom flags.
698 self._flagprocessors = dict(flagutil.flagprocessors)
756 self._flagprocessors = dict(flagutil.flagprocessors)
699 # prevent nesting of addgroup
757 # prevent nesting of addgroup
700 self._adding_group = None
758 self._adding_group = None
701
759
702 chunk_cache = self._loadindex()
760 chunk_cache = self._loadindex()
703 self._load_inner(chunk_cache)
761 self._load_inner(chunk_cache)
704
762
705 self._concurrencychecker = concurrencychecker
763 self._concurrencychecker = concurrencychecker
706
764
707 @property
765 @property
708 def _generaldelta(self):
766 def _generaldelta(self):
709 """temporary compatibility proxy"""
767 """temporary compatibility proxy"""
710 util.nouideprecwarn(
768 util.nouideprecwarn(
711 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
769 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
712 )
770 )
713 return self.delta_config.general_delta
771 return self.delta_config.general_delta
714
772
715 @property
773 @property
716 def _checkambig(self):
774 def _checkambig(self):
717 """temporary compatibility proxy"""
775 """temporary compatibility proxy"""
718 util.nouideprecwarn(
776 util.nouideprecwarn(
719 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
777 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
720 )
778 )
721 return self.data_config.check_ambig
779 return self.data_config.check_ambig
722
780
723 @property
781 @property
724 def _mmaplargeindex(self):
782 def _mmaplargeindex(self):
725 """temporary compatibility proxy"""
783 """temporary compatibility proxy"""
726 util.nouideprecwarn(
784 util.nouideprecwarn(
727 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
785 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
728 )
786 )
729 return self.data_config.mmap_large_index
787 return self.data_config.mmap_large_index
730
788
731 @property
789 @property
732 def _censorable(self):
790 def _censorable(self):
733 """temporary compatibility proxy"""
791 """temporary compatibility proxy"""
734 util.nouideprecwarn(
792 util.nouideprecwarn(
735 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
793 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
736 )
794 )
737 return self.feature_config.censorable
795 return self.feature_config.censorable
738
796
739 @property
797 @property
740 def _chunkcachesize(self):
798 def _chunkcachesize(self):
741 """temporary compatibility proxy"""
799 """temporary compatibility proxy"""
742 util.nouideprecwarn(
800 util.nouideprecwarn(
743 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
801 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
744 )
802 )
745 return self.data_config.chunk_cache_size
803 return self.data_config.chunk_cache_size
746
804
747 @property
805 @property
748 def _maxchainlen(self):
806 def _maxchainlen(self):
749 """temporary compatibility proxy"""
807 """temporary compatibility proxy"""
750 util.nouideprecwarn(
808 util.nouideprecwarn(
751 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
809 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
752 )
810 )
753 return self.delta_config.max_chain_len
811 return self.delta_config.max_chain_len
754
812
755 @property
813 @property
756 def _deltabothparents(self):
814 def _deltabothparents(self):
757 """temporary compatibility proxy"""
815 """temporary compatibility proxy"""
758 util.nouideprecwarn(
816 util.nouideprecwarn(
759 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
817 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
760 )
818 )
761 return self.delta_config.delta_both_parents
819 return self.delta_config.delta_both_parents
762
820
763 @property
821 @property
764 def _candidate_group_chunk_size(self):
822 def _candidate_group_chunk_size(self):
765 """temporary compatibility proxy"""
823 """temporary compatibility proxy"""
766 util.nouideprecwarn(
824 util.nouideprecwarn(
767 b"use revlog.delta_config.candidate_group_chunk_size",
825 b"use revlog.delta_config.candidate_group_chunk_size",
768 b"6.6",
826 b"6.6",
769 stacklevel=2,
827 stacklevel=2,
770 )
828 )
771 return self.delta_config.candidate_group_chunk_size
829 return self.delta_config.candidate_group_chunk_size
772
830
773 @property
831 @property
774 def _debug_delta(self):
832 def _debug_delta(self):
775 """temporary compatibility proxy"""
833 """temporary compatibility proxy"""
776 util.nouideprecwarn(
834 util.nouideprecwarn(
777 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
835 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
778 )
836 )
779 return self.delta_config.debug_delta
837 return self.delta_config.debug_delta
780
838
781 @property
839 @property
782 def _compengine(self):
840 def _compengine(self):
783 """temporary compatibility proxy"""
841 """temporary compatibility proxy"""
784 util.nouideprecwarn(
842 util.nouideprecwarn(
785 b"use revlog.feature_config.compression_engine",
843 b"use revlog.feature_config.compression_engine",
786 b"6.6",
844 b"6.6",
787 stacklevel=2,
845 stacklevel=2,
788 )
846 )
789 return self.feature_config.compression_engine
847 return self.feature_config.compression_engine
790
848
791 @property
849 @property
792 def upperboundcomp(self):
850 def upperboundcomp(self):
793 """temporary compatibility proxy"""
851 """temporary compatibility proxy"""
794 util.nouideprecwarn(
852 util.nouideprecwarn(
795 b"use revlog.delta_config.upper_bound_comp",
853 b"use revlog.delta_config.upper_bound_comp",
796 b"6.6",
854 b"6.6",
797 stacklevel=2,
855 stacklevel=2,
798 )
856 )
799 return self.delta_config.upper_bound_comp
857 return self.delta_config.upper_bound_comp
800
858
801 @property
859 @property
802 def _compengineopts(self):
860 def _compengineopts(self):
803 """temporary compatibility proxy"""
861 """temporary compatibility proxy"""
804 util.nouideprecwarn(
862 util.nouideprecwarn(
805 b"use revlog.feature_config.compression_engine_options",
863 b"use revlog.feature_config.compression_engine_options",
806 b"6.6",
864 b"6.6",
807 stacklevel=2,
865 stacklevel=2,
808 )
866 )
809 return self.feature_config.compression_engine_options
867 return self.feature_config.compression_engine_options
810
868
811 @property
869 @property
812 def _maxdeltachainspan(self):
870 def _maxdeltachainspan(self):
813 """temporary compatibility proxy"""
871 """temporary compatibility proxy"""
814 util.nouideprecwarn(
872 util.nouideprecwarn(
815 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
873 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
816 )
874 )
817 return self.delta_config.max_deltachain_span
875 return self.delta_config.max_deltachain_span
818
876
819 @property
877 @property
820 def _withsparseread(self):
878 def _withsparseread(self):
821 """temporary compatibility proxy"""
879 """temporary compatibility proxy"""
822 util.nouideprecwarn(
880 util.nouideprecwarn(
823 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
881 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
824 )
882 )
825 return self.data_config.with_sparse_read
883 return self.data_config.with_sparse_read
826
884
827 @property
885 @property
828 def _sparserevlog(self):
886 def _sparserevlog(self):
829 """temporary compatibility proxy"""
887 """temporary compatibility proxy"""
830 util.nouideprecwarn(
888 util.nouideprecwarn(
831 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
889 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
832 )
890 )
833 return self.delta_config.sparse_revlog
891 return self.delta_config.sparse_revlog
834
892
835 @property
893 @property
836 def hassidedata(self):
894 def hassidedata(self):
837 """temporary compatibility proxy"""
895 """temporary compatibility proxy"""
838 util.nouideprecwarn(
896 util.nouideprecwarn(
839 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
897 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
840 )
898 )
841 return self.feature_config.has_side_data
899 return self.feature_config.has_side_data
842
900
843 @property
901 @property
844 def _srdensitythreshold(self):
902 def _srdensitythreshold(self):
845 """temporary compatibility proxy"""
903 """temporary compatibility proxy"""
846 util.nouideprecwarn(
904 util.nouideprecwarn(
847 b"use revlog.data_config.sr_density_threshold",
905 b"use revlog.data_config.sr_density_threshold",
848 b"6.6",
906 b"6.6",
849 stacklevel=2,
907 stacklevel=2,
850 )
908 )
851 return self.data_config.sr_density_threshold
909 return self.data_config.sr_density_threshold
852
910
853 @property
911 @property
854 def _srmingapsize(self):
912 def _srmingapsize(self):
855 """temporary compatibility proxy"""
913 """temporary compatibility proxy"""
856 util.nouideprecwarn(
914 util.nouideprecwarn(
857 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
915 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
858 )
916 )
859 return self.data_config.sr_min_gap_size
917 return self.data_config.sr_min_gap_size
860
918
861 @property
919 @property
862 def _compute_rank(self):
920 def _compute_rank(self):
863 """temporary compatibility proxy"""
921 """temporary compatibility proxy"""
864 util.nouideprecwarn(
922 util.nouideprecwarn(
865 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
923 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
866 )
924 )
867 return self.feature_config.compute_rank
925 return self.feature_config.compute_rank
868
926
869 @property
927 @property
870 def canonical_parent_order(self):
928 def canonical_parent_order(self):
871 """temporary compatibility proxy"""
929 """temporary compatibility proxy"""
872 util.nouideprecwarn(
930 util.nouideprecwarn(
873 b"use revlog.feature_config.canonical_parent_order",
931 b"use revlog.feature_config.canonical_parent_order",
874 b"6.6",
932 b"6.6",
875 stacklevel=2,
933 stacklevel=2,
876 )
934 )
877 return self.feature_config.canonical_parent_order
935 return self.feature_config.canonical_parent_order
878
936
879 @property
937 @property
880 def _lazydelta(self):
938 def _lazydelta(self):
881 """temporary compatibility proxy"""
939 """temporary compatibility proxy"""
882 util.nouideprecwarn(
940 util.nouideprecwarn(
883 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
941 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
884 )
942 )
885 return self.delta_config.lazy_delta
943 return self.delta_config.lazy_delta
886
944
887 @property
945 @property
888 def _lazydeltabase(self):
946 def _lazydeltabase(self):
889 """temporary compatibility proxy"""
947 """temporary compatibility proxy"""
890 util.nouideprecwarn(
948 util.nouideprecwarn(
891 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
949 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
892 )
950 )
893 return self.delta_config.lazy_delta_base
951 return self.delta_config.lazy_delta_base
894
952
895 def _init_opts(self):
953 def _init_opts(self):
896 """process options (from above/config) to setup associated default revlog mode
954 """process options (from above/config) to setup associated default revlog mode
897
955
898 These values might be affected when actually reading on disk information.
956 These values might be affected when actually reading on disk information.
899
957
900 The relevant values are returned for use in _loadindex().
958 The relevant values are returned for use in _loadindex().
901
959
902 * newversionflags:
960 * newversionflags:
903 version header to use if we need to create a new revlog
961 version header to use if we need to create a new revlog
904
962
905 * mmapindexthreshold:
963 * mmapindexthreshold:
906 minimal index size for start to use mmap
964 minimal index size for start to use mmap
907
965
908 * force_nodemap:
966 * force_nodemap:
909 force the usage of a "development" version of the nodemap code
967 force the usage of a "development" version of the nodemap code
910 """
968 """
911 opts = self.opener.options
969 opts = self.opener.options
912
970
913 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
971 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
914 new_header = CHANGELOGV2
972 new_header = CHANGELOGV2
915 compute_rank = opts.get(b'changelogv2.compute-rank', True)
973 compute_rank = opts.get(b'changelogv2.compute-rank', True)
916 self.feature_config.compute_rank = compute_rank
974 self.feature_config.compute_rank = compute_rank
917 elif b'revlogv2' in opts:
975 elif b'revlogv2' in opts:
918 new_header = REVLOGV2
976 new_header = REVLOGV2
919 elif b'revlogv1' in opts:
977 elif b'revlogv1' in opts:
920 new_header = REVLOGV1 | FLAG_INLINE_DATA
978 new_header = REVLOGV1 | FLAG_INLINE_DATA
921 if b'generaldelta' in opts:
979 if b'generaldelta' in opts:
922 new_header |= FLAG_GENERALDELTA
980 new_header |= FLAG_GENERALDELTA
923 elif b'revlogv0' in self.opener.options:
981 elif b'revlogv0' in self.opener.options:
924 new_header = REVLOGV0
982 new_header = REVLOGV0
925 else:
983 else:
926 new_header = REVLOG_DEFAULT_VERSION
984 new_header = REVLOG_DEFAULT_VERSION
927
985
928 mmapindexthreshold = None
986 mmapindexthreshold = None
929 if self.data_config.mmap_large_index:
987 if self.data_config.mmap_large_index:
930 mmapindexthreshold = self.data_config.mmap_index_threshold
988 mmapindexthreshold = self.data_config.mmap_index_threshold
931 if self.feature_config.enable_ellipsis:
989 if self.feature_config.enable_ellipsis:
932 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
990 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
933
991
934 # revlog v0 doesn't have flag processors
992 # revlog v0 doesn't have flag processors
935 for flag, processor in opts.get(b'flagprocessors', {}).items():
993 for flag, processor in opts.get(b'flagprocessors', {}).items():
936 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
994 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
937
995
938 chunk_cache_size = self.data_config.chunk_cache_size
996 chunk_cache_size = self.data_config.chunk_cache_size
939 if chunk_cache_size <= 0:
997 if chunk_cache_size <= 0:
940 raise error.RevlogError(
998 raise error.RevlogError(
941 _(b'revlog chunk cache size %r is not greater than 0')
999 _(b'revlog chunk cache size %r is not greater than 0')
942 % chunk_cache_size
1000 % chunk_cache_size
943 )
1001 )
944 elif chunk_cache_size & (chunk_cache_size - 1):
1002 elif chunk_cache_size & (chunk_cache_size - 1):
945 raise error.RevlogError(
1003 raise error.RevlogError(
946 _(b'revlog chunk cache size %r is not a power of 2')
1004 _(b'revlog chunk cache size %r is not a power of 2')
947 % chunk_cache_size
1005 % chunk_cache_size
948 )
1006 )
949 force_nodemap = opts.get(b'devel-force-nodemap', False)
1007 force_nodemap = opts.get(b'devel-force-nodemap', False)
950 return new_header, mmapindexthreshold, force_nodemap
1008 return new_header, mmapindexthreshold, force_nodemap
951
1009
952 def _get_data(self, filepath, mmap_threshold, size=None):
1010 def _get_data(self, filepath, mmap_threshold, size=None):
953 """return a file content with or without mmap
1011 """return a file content with or without mmap
954
1012
955 If the file is missing return the empty string"""
1013 If the file is missing return the empty string"""
956 try:
1014 try:
957 with self.opener(filepath) as fp:
1015 with self.opener(filepath) as fp:
958 if mmap_threshold is not None:
1016 if mmap_threshold is not None:
959 file_size = self.opener.fstat(fp).st_size
1017 file_size = self.opener.fstat(fp).st_size
960 if file_size >= mmap_threshold:
1018 if file_size >= mmap_threshold:
961 if size is not None:
1019 if size is not None:
962 # avoid potentiel mmap crash
1020 # avoid potentiel mmap crash
963 size = min(file_size, size)
1021 size = min(file_size, size)
964 # TODO: should .close() to release resources without
1022 # TODO: should .close() to release resources without
965 # relying on Python GC
1023 # relying on Python GC
966 if size is None:
1024 if size is None:
967 return util.buffer(util.mmapread(fp))
1025 return util.buffer(util.mmapread(fp))
968 else:
1026 else:
969 return util.buffer(util.mmapread(fp, size))
1027 return util.buffer(util.mmapread(fp, size))
970 if size is None:
1028 if size is None:
971 return fp.read()
1029 return fp.read()
972 else:
1030 else:
973 return fp.read(size)
1031 return fp.read(size)
974 except FileNotFoundError:
1032 except FileNotFoundError:
975 return b''
1033 return b''
976
1034
977 def get_streams(self, max_linkrev, force_inline=False):
1035 def get_streams(self, max_linkrev, force_inline=False):
978 """return a list of streams that represent this revlog
1036 """return a list of streams that represent this revlog
979
1037
980 This is used by stream-clone to do bytes to bytes copies of a repository.
1038 This is used by stream-clone to do bytes to bytes copies of a repository.
981
1039
982 This streams data for all revisions that refer to a changelog revision up
1040 This streams data for all revisions that refer to a changelog revision up
983 to `max_linkrev`.
1041 to `max_linkrev`.
984
1042
985 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1043 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
986
1044
987 It returns is a list of three-tuple:
1045 It returns is a list of three-tuple:
988
1046
989 [
1047 [
990 (filename, bytes_stream, stream_size),
1048 (filename, bytes_stream, stream_size),
991 …
1049 …
992 ]
1050 ]
993 """
1051 """
994 n = len(self)
1052 n = len(self)
995 index = self.index
1053 index = self.index
996 while n > 0:
1054 while n > 0:
997 linkrev = index[n - 1][4]
1055 linkrev = index[n - 1][4]
998 if linkrev < max_linkrev:
1056 if linkrev < max_linkrev:
999 break
1057 break
1000 # note: this loop will rarely go through multiple iterations, since
1058 # note: this loop will rarely go through multiple iterations, since
1001 # it only traverses commits created during the current streaming
1059 # it only traverses commits created during the current streaming
1002 # pull operation.
1060 # pull operation.
1003 #
1061 #
1004 # If this become a problem, using a binary search should cap the
1062 # If this become a problem, using a binary search should cap the
1005 # runtime of this.
1063 # runtime of this.
1006 n = n - 1
1064 n = n - 1
1007 if n == 0:
1065 if n == 0:
1008 # no data to send
1066 # no data to send
1009 return []
1067 return []
1010 index_size = n * index.entry_size
1068 index_size = n * index.entry_size
1011 data_size = self.end(n - 1)
1069 data_size = self.end(n - 1)
1012
1070
1013 # XXX we might have been split (or stripped) since the object
1071 # XXX we might have been split (or stripped) since the object
1014 # initialization, We need to close this race too, but having a way to
1072 # initialization, We need to close this race too, but having a way to
1015 # pre-open the file we feed to the revlog and never closing them before
1073 # pre-open the file we feed to the revlog and never closing them before
1016 # we are done streaming.
1074 # we are done streaming.
1017
1075
1018 if self._inline:
1076 if self._inline:
1019
1077
1020 def get_stream():
1078 def get_stream():
1021 with self.opener(self._indexfile, mode=b"r") as fp:
1079 with self.opener(self._indexfile, mode=b"r") as fp:
1022 yield None
1080 yield None
1023 size = index_size + data_size
1081 size = index_size + data_size
1024 if size <= 65536:
1082 if size <= 65536:
1025 yield fp.read(size)
1083 yield fp.read(size)
1026 else:
1084 else:
1027 yield from util.filechunkiter(fp, limit=size)
1085 yield from util.filechunkiter(fp, limit=size)
1028
1086
1029 inline_stream = get_stream()
1087 inline_stream = get_stream()
1030 next(inline_stream)
1088 next(inline_stream)
1031 return [
1089 return [
1032 (self._indexfile, inline_stream, index_size + data_size),
1090 (self._indexfile, inline_stream, index_size + data_size),
1033 ]
1091 ]
1034 elif force_inline:
1092 elif force_inline:
1035
1093
1036 def get_stream():
1094 def get_stream():
1037 with self.reading():
1095 with self.reading():
1038 yield None
1096 yield None
1039
1097
1040 for rev in range(n):
1098 for rev in range(n):
1041 idx = self.index.entry_binary(rev)
1099 idx = self.index.entry_binary(rev)
1042 if rev == 0 and self._docket is None:
1100 if rev == 0 and self._docket is None:
1043 # re-inject the inline flag
1101 # re-inject the inline flag
1044 header = self._format_flags
1102 header = self._format_flags
1045 header |= self._format_version
1103 header |= self._format_version
1046 header |= FLAG_INLINE_DATA
1104 header |= FLAG_INLINE_DATA
1047 header = self.index.pack_header(header)
1105 header = self.index.pack_header(header)
1048 idx = header + idx
1106 idx = header + idx
1049 yield idx
1107 yield idx
1050 yield self._inner.get_segment_for_revs(rev, rev)[1]
1108 yield self._inner.get_segment_for_revs(rev, rev)[1]
1051
1109
1052 inline_stream = get_stream()
1110 inline_stream = get_stream()
1053 next(inline_stream)
1111 next(inline_stream)
1054 return [
1112 return [
1055 (self._indexfile, inline_stream, index_size + data_size),
1113 (self._indexfile, inline_stream, index_size + data_size),
1056 ]
1114 ]
1057 else:
1115 else:
1058
1116
1059 def get_index_stream():
1117 def get_index_stream():
1060 with self.opener(self._indexfile, mode=b"r") as fp:
1118 with self.opener(self._indexfile, mode=b"r") as fp:
1061 yield None
1119 yield None
1062 if index_size <= 65536:
1120 if index_size <= 65536:
1063 yield fp.read(index_size)
1121 yield fp.read(index_size)
1064 else:
1122 else:
1065 yield from util.filechunkiter(fp, limit=index_size)
1123 yield from util.filechunkiter(fp, limit=index_size)
1066
1124
1067 def get_data_stream():
1125 def get_data_stream():
1068 with self._datafp() as fp:
1126 with self._datafp() as fp:
1069 yield None
1127 yield None
1070 if data_size <= 65536:
1128 if data_size <= 65536:
1071 yield fp.read(data_size)
1129 yield fp.read(data_size)
1072 else:
1130 else:
1073 yield from util.filechunkiter(fp, limit=data_size)
1131 yield from util.filechunkiter(fp, limit=data_size)
1074
1132
1075 index_stream = get_index_stream()
1133 index_stream = get_index_stream()
1076 next(index_stream)
1134 next(index_stream)
1077 data_stream = get_data_stream()
1135 data_stream = get_data_stream()
1078 next(data_stream)
1136 next(data_stream)
1079 return [
1137 return [
1080 (self._datafile, data_stream, data_size),
1138 (self._datafile, data_stream, data_size),
1081 (self._indexfile, index_stream, index_size),
1139 (self._indexfile, index_stream, index_size),
1082 ]
1140 ]
1083
1141
1084 def _loadindex(self, docket=None):
1142 def _loadindex(self, docket=None):
1085
1143
1086 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1144 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1087
1145
1088 if self.postfix is not None:
1146 if self.postfix is not None:
1089 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1147 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1090 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1148 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1091 entry_point = b'%s.i.a' % self.radix
1149 entry_point = b'%s.i.a' % self.radix
1092 elif self._try_split and self.opener.exists(self._split_index_file):
1150 elif self._try_split and self.opener.exists(self._split_index_file):
1093 entry_point = self._split_index_file
1151 entry_point = self._split_index_file
1094 else:
1152 else:
1095 entry_point = b'%s.i' % self.radix
1153 entry_point = b'%s.i' % self.radix
1096
1154
1097 if docket is not None:
1155 if docket is not None:
1098 self._docket = docket
1156 self._docket = docket
1099 self._docket_file = entry_point
1157 self._docket_file = entry_point
1100 else:
1158 else:
1101 self._initempty = True
1159 self._initempty = True
1102 entry_data = self._get_data(entry_point, mmapindexthreshold)
1160 entry_data = self._get_data(entry_point, mmapindexthreshold)
1103 if len(entry_data) > 0:
1161 if len(entry_data) > 0:
1104 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1162 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1105 self._initempty = False
1163 self._initempty = False
1106 else:
1164 else:
1107 header = new_header
1165 header = new_header
1108
1166
1109 self._format_flags = header & ~0xFFFF
1167 self._format_flags = header & ~0xFFFF
1110 self._format_version = header & 0xFFFF
1168 self._format_version = header & 0xFFFF
1111
1169
1112 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1170 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1113 if supported_flags is None:
1171 if supported_flags is None:
1114 msg = _(b'unknown version (%d) in revlog %s')
1172 msg = _(b'unknown version (%d) in revlog %s')
1115 msg %= (self._format_version, self.display_id)
1173 msg %= (self._format_version, self.display_id)
1116 raise error.RevlogError(msg)
1174 raise error.RevlogError(msg)
1117 elif self._format_flags & ~supported_flags:
1175 elif self._format_flags & ~supported_flags:
1118 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1176 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1119 display_flag = self._format_flags >> 16
1177 display_flag = self._format_flags >> 16
1120 msg %= (display_flag, self._format_version, self.display_id)
1178 msg %= (display_flag, self._format_version, self.display_id)
1121 raise error.RevlogError(msg)
1179 raise error.RevlogError(msg)
1122
1180
1123 features = FEATURES_BY_VERSION[self._format_version]
1181 features = FEATURES_BY_VERSION[self._format_version]
1124 self._inline = features[b'inline'](self._format_flags)
1182 self._inline = features[b'inline'](self._format_flags)
1125 self.delta_config.general_delta = features[b'generaldelta'](
1183 self.delta_config.general_delta = features[b'generaldelta'](
1126 self._format_flags
1184 self._format_flags
1127 )
1185 )
1128 self.feature_config.has_side_data = features[b'sidedata']
1186 self.feature_config.has_side_data = features[b'sidedata']
1129
1187
1130 if not features[b'docket']:
1188 if not features[b'docket']:
1131 self._indexfile = entry_point
1189 self._indexfile = entry_point
1132 index_data = entry_data
1190 index_data = entry_data
1133 else:
1191 else:
1134 self._docket_file = entry_point
1192 self._docket_file = entry_point
1135 if self._initempty:
1193 if self._initempty:
1136 self._docket = docketutil.default_docket(self, header)
1194 self._docket = docketutil.default_docket(self, header)
1137 else:
1195 else:
1138 self._docket = docketutil.parse_docket(
1196 self._docket = docketutil.parse_docket(
1139 self, entry_data, use_pending=self._trypending
1197 self, entry_data, use_pending=self._trypending
1140 )
1198 )
1141
1199
1142 if self._docket is not None:
1200 if self._docket is not None:
1143 self._indexfile = self._docket.index_filepath()
1201 self._indexfile = self._docket.index_filepath()
1144 index_data = b''
1202 index_data = b''
1145 index_size = self._docket.index_end
1203 index_size = self._docket.index_end
1146 if index_size > 0:
1204 if index_size > 0:
1147 index_data = self._get_data(
1205 index_data = self._get_data(
1148 self._indexfile, mmapindexthreshold, size=index_size
1206 self._indexfile, mmapindexthreshold, size=index_size
1149 )
1207 )
1150 if len(index_data) < index_size:
1208 if len(index_data) < index_size:
1151 msg = _(b'too few index data for %s: got %d, expected %d')
1209 msg = _(b'too few index data for %s: got %d, expected %d')
1152 msg %= (self.display_id, len(index_data), index_size)
1210 msg %= (self.display_id, len(index_data), index_size)
1153 raise error.RevlogError(msg)
1211 raise error.RevlogError(msg)
1154
1212
1155 self._inline = False
1213 self._inline = False
1156 # generaldelta implied by version 2 revlogs.
1214 # generaldelta implied by version 2 revlogs.
1157 self.delta_config.general_delta = True
1215 self.delta_config.general_delta = True
1158 # the logic for persistent nodemap will be dealt with within the
1216 # the logic for persistent nodemap will be dealt with within the
1159 # main docket, so disable it for now.
1217 # main docket, so disable it for now.
1160 self._nodemap_file = None
1218 self._nodemap_file = None
1161
1219
1162 if self._docket is not None:
1220 if self._docket is not None:
1163 self._datafile = self._docket.data_filepath()
1221 self._datafile = self._docket.data_filepath()
1164 self._sidedatafile = self._docket.sidedata_filepath()
1222 self._sidedatafile = self._docket.sidedata_filepath()
1165 elif self.postfix is None:
1223 elif self.postfix is None:
1166 self._datafile = b'%s.d' % self.radix
1224 self._datafile = b'%s.d' % self.radix
1167 else:
1225 else:
1168 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1226 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1169
1227
1170 self.nodeconstants = sha1nodeconstants
1228 self.nodeconstants = sha1nodeconstants
1171 self.nullid = self.nodeconstants.nullid
1229 self.nullid = self.nodeconstants.nullid
1172
1230
1173 # sparse-revlog can't be on without general-delta (issue6056)
1231 # sparse-revlog can't be on without general-delta (issue6056)
1174 if not self.delta_config.general_delta:
1232 if not self.delta_config.general_delta:
1175 self.delta_config.sparse_revlog = False
1233 self.delta_config.sparse_revlog = False
1176
1234
1177 self._storedeltachains = True
1235 self._storedeltachains = True
1178
1236
1179 devel_nodemap = (
1237 devel_nodemap = (
1180 self._nodemap_file
1238 self._nodemap_file
1181 and force_nodemap
1239 and force_nodemap
1182 and parse_index_v1_nodemap is not None
1240 and parse_index_v1_nodemap is not None
1183 )
1241 )
1184
1242
1185 use_rust_index = False
1243 use_rust_index = False
1186 if rustrevlog is not None:
1244 if rustrevlog is not None:
1187 if self._nodemap_file is not None:
1245 if self._nodemap_file is not None:
1188 use_rust_index = True
1246 use_rust_index = True
1189 else:
1247 else:
1190 use_rust_index = self.opener.options.get(b'rust.index')
1248 use_rust_index = self.opener.options.get(b'rust.index')
1191
1249
1192 self._parse_index = parse_index_v1
1250 self._parse_index = parse_index_v1
1193 if self._format_version == REVLOGV0:
1251 if self._format_version == REVLOGV0:
1194 self._parse_index = revlogv0.parse_index_v0
1252 self._parse_index = revlogv0.parse_index_v0
1195 elif self._format_version == REVLOGV2:
1253 elif self._format_version == REVLOGV2:
1196 self._parse_index = parse_index_v2
1254 self._parse_index = parse_index_v2
1197 elif self._format_version == CHANGELOGV2:
1255 elif self._format_version == CHANGELOGV2:
1198 self._parse_index = parse_index_cl_v2
1256 self._parse_index = parse_index_cl_v2
1199 elif devel_nodemap:
1257 elif devel_nodemap:
1200 self._parse_index = parse_index_v1_nodemap
1258 self._parse_index = parse_index_v1_nodemap
1201 elif use_rust_index:
1259 elif use_rust_index:
1202 self._parse_index = parse_index_v1_mixed
1260 self._parse_index = parse_index_v1_mixed
1203 try:
1261 try:
1204 d = self._parse_index(index_data, self._inline)
1262 d = self._parse_index(index_data, self._inline)
1205 index, chunkcache = d
1263 index, chunkcache = d
1206 use_nodemap = (
1264 use_nodemap = (
1207 not self._inline
1265 not self._inline
1208 and self._nodemap_file is not None
1266 and self._nodemap_file is not None
1209 and hasattr(index, 'update_nodemap_data')
1267 and hasattr(index, 'update_nodemap_data')
1210 )
1268 )
1211 if use_nodemap:
1269 if use_nodemap:
1212 nodemap_data = nodemaputil.persisted_data(self)
1270 nodemap_data = nodemaputil.persisted_data(self)
1213 if nodemap_data is not None:
1271 if nodemap_data is not None:
1214 docket = nodemap_data[0]
1272 docket = nodemap_data[0]
1215 if (
1273 if (
1216 len(d[0]) > docket.tip_rev
1274 len(d[0]) > docket.tip_rev
1217 and d[0][docket.tip_rev][7] == docket.tip_node
1275 and d[0][docket.tip_rev][7] == docket.tip_node
1218 ):
1276 ):
1219 # no changelog tampering
1277 # no changelog tampering
1220 self._nodemap_docket = docket
1278 self._nodemap_docket = docket
1221 index.update_nodemap_data(*nodemap_data)
1279 index.update_nodemap_data(*nodemap_data)
1222 except (ValueError, IndexError):
1280 except (ValueError, IndexError):
1223 raise error.RevlogError(
1281 raise error.RevlogError(
1224 _(b"index %s is corrupted") % self.display_id
1282 _(b"index %s is corrupted") % self.display_id
1225 )
1283 )
1226 self.index = index
1284 self.index = index
1227 # revnum -> (chain-length, sum-delta-length)
1285 # revnum -> (chain-length, sum-delta-length)
1228 self._chaininfocache = util.lrucachedict(500)
1286 self._chaininfocache = util.lrucachedict(500)
1229 # revlog header -> revlog compressor
1287 # revlog header -> revlog compressor
1230 self._decompressors = {}
1288 self._decompressors = {}
1231
1289
1232 return chunkcache
1290 return chunkcache
1233
1291
1234 def _load_inner(self, chunk_cache):
1292 def _load_inner(self, chunk_cache):
1235 self._inner = _InnerRevlog(
1293 self._inner = _InnerRevlog(
1236 opener=self.opener,
1294 opener=self.opener,
1237 index=self.index,
1295 index=self.index,
1238 index_file=self._indexfile,
1296 index_file=self._indexfile,
1239 data_file=self._datafile,
1297 data_file=self._datafile,
1240 sidedata_file=self._sidedatafile,
1298 sidedata_file=self._sidedatafile,
1241 inline=self._inline,
1299 inline=self._inline,
1242 data_config=self.data_config,
1300 data_config=self.data_config,
1243 chunk_cache=chunk_cache,
1301 chunk_cache=chunk_cache,
1244 )
1302 )
1245
1303
1246 def get_revlog(self):
1304 def get_revlog(self):
1247 """simple function to mirror API of other not-really-revlog API"""
1305 """simple function to mirror API of other not-really-revlog API"""
1248 return self
1306 return self
1249
1307
1250 @util.propertycache
1308 @util.propertycache
1251 def revlog_kind(self):
1309 def revlog_kind(self):
1252 return self.target[0]
1310 return self.target[0]
1253
1311
1254 @util.propertycache
1312 @util.propertycache
1255 def display_id(self):
1313 def display_id(self):
1256 """The public facing "ID" of the revlog that we use in message"""
1314 """The public facing "ID" of the revlog that we use in message"""
1257 if self.revlog_kind == KIND_FILELOG:
1315 if self.revlog_kind == KIND_FILELOG:
1258 # Reference the file without the "data/" prefix, so it is familiar
1316 # Reference the file without the "data/" prefix, so it is familiar
1259 # to the user.
1317 # to the user.
1260 return self.target[1]
1318 return self.target[1]
1261 else:
1319 else:
1262 return self.radix
1320 return self.radix
1263
1321
1264 def _get_decompressor(self, t):
1322 def _get_decompressor(self, t):
1265 try:
1323 try:
1266 compressor = self._decompressors[t]
1324 compressor = self._decompressors[t]
1267 except KeyError:
1325 except KeyError:
1268 try:
1326 try:
1269 engine = util.compengines.forrevlogheader(t)
1327 engine = util.compengines.forrevlogheader(t)
1270 compressor = engine.revlogcompressor(
1328 compressor = engine.revlogcompressor(
1271 self.feature_config.compression_engine_options
1329 self.feature_config.compression_engine_options
1272 )
1330 )
1273 self._decompressors[t] = compressor
1331 self._decompressors[t] = compressor
1274 except KeyError:
1332 except KeyError:
1275 raise error.RevlogError(
1333 raise error.RevlogError(
1276 _(b'unknown compression type %s') % binascii.hexlify(t)
1334 _(b'unknown compression type %s') % binascii.hexlify(t)
1277 )
1335 )
1278 return compressor
1336 return compressor
1279
1337
1280 @util.propertycache
1338 @util.propertycache
1281 def _compressor(self):
1339 def _compressor(self):
1282 engine = util.compengines[self.feature_config.compression_engine]
1340 engine = util.compengines[self.feature_config.compression_engine]
1283 return engine.revlogcompressor(
1341 return engine.revlogcompressor(
1284 self.feature_config.compression_engine_options
1342 self.feature_config.compression_engine_options
1285 )
1343 )
1286
1344
1287 @util.propertycache
1345 @util.propertycache
1288 def _decompressor(self):
1346 def _decompressor(self):
1289 """the default decompressor"""
1347 """the default decompressor"""
1290 if self._docket is None:
1348 if self._docket is None:
1291 return None
1349 return None
1292 t = self._docket.default_compression_header
1350 t = self._docket.default_compression_header
1293 c = self._get_decompressor(t)
1351 c = self._get_decompressor(t)
1294 return c.decompress
1352 return c.decompress
1295
1353
1296 def _datafp(self, mode=b'r'):
1354 def _datafp(self, mode=b'r'):
1297 """file object for the revlog's data file"""
1355 """file object for the revlog's data file"""
1298 return self.opener(self._datafile, mode=mode)
1356 return self.opener(self._datafile, mode=mode)
1299
1357
1300 def tiprev(self):
1358 def tiprev(self):
1301 return len(self.index) - 1
1359 return len(self.index) - 1
1302
1360
1303 def tip(self):
1361 def tip(self):
1304 return self.node(self.tiprev())
1362 return self.node(self.tiprev())
1305
1363
1306 def __contains__(self, rev):
1364 def __contains__(self, rev):
1307 return 0 <= rev < len(self)
1365 return 0 <= rev < len(self)
1308
1366
1309 def __len__(self):
1367 def __len__(self):
1310 return len(self.index)
1368 return len(self.index)
1311
1369
1312 def __iter__(self):
1370 def __iter__(self):
1313 return iter(range(len(self)))
1371 return iter(range(len(self)))
1314
1372
1315 def revs(self, start=0, stop=None):
1373 def revs(self, start=0, stop=None):
1316 """iterate over all rev in this revlog (from start to stop)"""
1374 """iterate over all rev in this revlog (from start to stop)"""
1317 return storageutil.iterrevs(len(self), start=start, stop=stop)
1375 return storageutil.iterrevs(len(self), start=start, stop=stop)
1318
1376
1319 def hasnode(self, node):
1377 def hasnode(self, node):
1320 try:
1378 try:
1321 self.rev(node)
1379 self.rev(node)
1322 return True
1380 return True
1323 except KeyError:
1381 except KeyError:
1324 return False
1382 return False
1325
1383
1326 def _candelta(self, baserev, rev):
1384 def _candelta(self, baserev, rev):
1327 """whether two revisions (baserev, rev) can be delta-ed or not"""
1385 """whether two revisions (baserev, rev) can be delta-ed or not"""
1328 # Disable delta if either rev requires a content-changing flag
1386 # Disable delta if either rev requires a content-changing flag
1329 # processor (ex. LFS). This is because such flag processor can alter
1387 # processor (ex. LFS). This is because such flag processor can alter
1330 # the rawtext content that the delta will be based on, and two clients
1388 # the rawtext content that the delta will be based on, and two clients
1331 # could have a same revlog node with different flags (i.e. different
1389 # could have a same revlog node with different flags (i.e. different
1332 # rawtext contents) and the delta could be incompatible.
1390 # rawtext contents) and the delta could be incompatible.
1333 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1391 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1334 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1392 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1335 ):
1393 ):
1336 return False
1394 return False
1337 return True
1395 return True
1338
1396
1339 def update_caches(self, transaction):
1397 def update_caches(self, transaction):
1340 """update on disk cache
1398 """update on disk cache
1341
1399
1342 If a transaction is passed, the update may be delayed to transaction
1400 If a transaction is passed, the update may be delayed to transaction
1343 commit."""
1401 commit."""
1344 if self._nodemap_file is not None:
1402 if self._nodemap_file is not None:
1345 if transaction is None:
1403 if transaction is None:
1346 nodemaputil.update_persistent_nodemap(self)
1404 nodemaputil.update_persistent_nodemap(self)
1347 else:
1405 else:
1348 nodemaputil.setup_persistent_nodemap(transaction, self)
1406 nodemaputil.setup_persistent_nodemap(transaction, self)
1349
1407
1350 def clearcaches(self):
1408 def clearcaches(self):
1351 """Clear in-memory caches"""
1409 """Clear in-memory caches"""
1352 self._revisioncache = None
1410 self._revisioncache = None
1353 self._chainbasecache.clear()
1411 self._chainbasecache.clear()
1354 self._inner._segmentfile.clear_cache()
1412 self._inner._segmentfile.clear_cache()
1355 self._inner._segmentfile_sidedata.clear_cache()
1413 self._inner._segmentfile_sidedata.clear_cache()
1356 self._pcache = {}
1414 self._pcache = {}
1357 self._nodemap_docket = None
1415 self._nodemap_docket = None
1358 self.index.clearcaches()
1416 self.index.clearcaches()
1359 # The python code is the one responsible for validating the docket, we
1417 # The python code is the one responsible for validating the docket, we
1360 # end up having to refresh it here.
1418 # end up having to refresh it here.
1361 use_nodemap = (
1419 use_nodemap = (
1362 not self._inline
1420 not self._inline
1363 and self._nodemap_file is not None
1421 and self._nodemap_file is not None
1364 and hasattr(self.index, 'update_nodemap_data')
1422 and hasattr(self.index, 'update_nodemap_data')
1365 )
1423 )
1366 if use_nodemap:
1424 if use_nodemap:
1367 nodemap_data = nodemaputil.persisted_data(self)
1425 nodemap_data = nodemaputil.persisted_data(self)
1368 if nodemap_data is not None:
1426 if nodemap_data is not None:
1369 self._nodemap_docket = nodemap_data[0]
1427 self._nodemap_docket = nodemap_data[0]
1370 self.index.update_nodemap_data(*nodemap_data)
1428 self.index.update_nodemap_data(*nodemap_data)
1371
1429
1372 def rev(self, node):
1430 def rev(self, node):
1373 """return the revision number associated with a <nodeid>"""
1431 """return the revision number associated with a <nodeid>"""
1374 try:
1432 try:
1375 return self.index.rev(node)
1433 return self.index.rev(node)
1376 except TypeError:
1434 except TypeError:
1377 raise
1435 raise
1378 except error.RevlogError:
1436 except error.RevlogError:
1379 # parsers.c radix tree lookup failed
1437 # parsers.c radix tree lookup failed
1380 if (
1438 if (
1381 node == self.nodeconstants.wdirid
1439 node == self.nodeconstants.wdirid
1382 or node in self.nodeconstants.wdirfilenodeids
1440 or node in self.nodeconstants.wdirfilenodeids
1383 ):
1441 ):
1384 raise error.WdirUnsupported
1442 raise error.WdirUnsupported
1385 raise error.LookupError(node, self.display_id, _(b'no node'))
1443 raise error.LookupError(node, self.display_id, _(b'no node'))
1386
1444
1387 # Accessors for index entries.
1445 # Accessors for index entries.
1388
1446
1389 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1447 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1390 # are flags.
1448 # are flags.
1391 def start(self, rev):
1449 def start(self, rev):
1392 return int(self.index[rev][0] >> 16)
1450 return int(self.index[rev][0] >> 16)
1393
1451
1394 def sidedata_cut_off(self, rev):
1452 def sidedata_cut_off(self, rev):
1395 sd_cut_off = self.index[rev][8]
1453 sd_cut_off = self.index[rev][8]
1396 if sd_cut_off != 0:
1454 if sd_cut_off != 0:
1397 return sd_cut_off
1455 return sd_cut_off
1398 # This is some annoying dance, because entries without sidedata
1456 # This is some annoying dance, because entries without sidedata
1399 # currently use 0 as their ofsset. (instead of previous-offset +
1457 # currently use 0 as their ofsset. (instead of previous-offset +
1400 # previous-size)
1458 # previous-size)
1401 #
1459 #
1402 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1460 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1403 # In the meantime, we need this.
1461 # In the meantime, we need this.
1404 while 0 <= rev:
1462 while 0 <= rev:
1405 e = self.index[rev]
1463 e = self.index[rev]
1406 if e[9] != 0:
1464 if e[9] != 0:
1407 return e[8] + e[9]
1465 return e[8] + e[9]
1408 rev -= 1
1466 rev -= 1
1409 return 0
1467 return 0
1410
1468
1411 def flags(self, rev):
1469 def flags(self, rev):
1412 return self.index[rev][0] & 0xFFFF
1470 return self.index[rev][0] & 0xFFFF
1413
1471
1414 def length(self, rev):
1472 def length(self, rev):
1415 return self.index[rev][1]
1473 return self.index[rev][1]
1416
1474
1417 def sidedata_length(self, rev):
1475 def sidedata_length(self, rev):
1418 if not self.feature_config.has_side_data:
1476 if not self.feature_config.has_side_data:
1419 return 0
1477 return 0
1420 return self.index[rev][9]
1478 return self.index[rev][9]
1421
1479
1422 def rawsize(self, rev):
1480 def rawsize(self, rev):
1423 """return the length of the uncompressed text for a given revision"""
1481 """return the length of the uncompressed text for a given revision"""
1424 l = self.index[rev][2]
1482 l = self.index[rev][2]
1425 if l >= 0:
1483 if l >= 0:
1426 return l
1484 return l
1427
1485
1428 t = self.rawdata(rev)
1486 t = self.rawdata(rev)
1429 return len(t)
1487 return len(t)
1430
1488
1431 def size(self, rev):
1489 def size(self, rev):
1432 """length of non-raw text (processed by a "read" flag processor)"""
1490 """length of non-raw text (processed by a "read" flag processor)"""
1433 # fast path: if no "read" flag processor could change the content,
1491 # fast path: if no "read" flag processor could change the content,
1434 # size is rawsize. note: ELLIPSIS is known to not change the content.
1492 # size is rawsize. note: ELLIPSIS is known to not change the content.
1435 flags = self.flags(rev)
1493 flags = self.flags(rev)
1436 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1494 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1437 return self.rawsize(rev)
1495 return self.rawsize(rev)
1438
1496
1439 return len(self.revision(rev))
1497 return len(self.revision(rev))
1440
1498
1441 def fast_rank(self, rev):
1499 def fast_rank(self, rev):
1442 """Return the rank of a revision if already known, or None otherwise.
1500 """Return the rank of a revision if already known, or None otherwise.
1443
1501
1444 The rank of a revision is the size of the sub-graph it defines as a
1502 The rank of a revision is the size of the sub-graph it defines as a
1445 head. Equivalently, the rank of a revision `r` is the size of the set
1503 head. Equivalently, the rank of a revision `r` is the size of the set
1446 `ancestors(r)`, `r` included.
1504 `ancestors(r)`, `r` included.
1447
1505
1448 This method returns the rank retrieved from the revlog in constant
1506 This method returns the rank retrieved from the revlog in constant
1449 time. It makes no attempt at computing unknown values for versions of
1507 time. It makes no attempt at computing unknown values for versions of
1450 the revlog which do not persist the rank.
1508 the revlog which do not persist the rank.
1451 """
1509 """
1452 rank = self.index[rev][ENTRY_RANK]
1510 rank = self.index[rev][ENTRY_RANK]
1453 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1511 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1454 return None
1512 return None
1455 if rev == nullrev:
1513 if rev == nullrev:
1456 return 0 # convention
1514 return 0 # convention
1457 return rank
1515 return rank
1458
1516
1459 def chainbase(self, rev):
1517 def chainbase(self, rev):
1460 base = self._chainbasecache.get(rev)
1518 base = self._chainbasecache.get(rev)
1461 if base is not None:
1519 if base is not None:
1462 return base
1520 return base
1463
1521
1464 index = self.index
1522 index = self.index
1465 iterrev = rev
1523 iterrev = rev
1466 base = index[iterrev][3]
1524 base = index[iterrev][3]
1467 while base != iterrev:
1525 while base != iterrev:
1468 iterrev = base
1526 iterrev = base
1469 base = index[iterrev][3]
1527 base = index[iterrev][3]
1470
1528
1471 self._chainbasecache[rev] = base
1529 self._chainbasecache[rev] = base
1472 return base
1530 return base
1473
1531
1474 def linkrev(self, rev):
1532 def linkrev(self, rev):
1475 return self.index[rev][4]
1533 return self.index[rev][4]
1476
1534
1477 def parentrevs(self, rev):
1535 def parentrevs(self, rev):
1478 try:
1536 try:
1479 entry = self.index[rev]
1537 entry = self.index[rev]
1480 except IndexError:
1538 except IndexError:
1481 if rev == wdirrev:
1539 if rev == wdirrev:
1482 raise error.WdirUnsupported
1540 raise error.WdirUnsupported
1483 raise
1541 raise
1484
1542
1485 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1543 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1486 return entry[6], entry[5]
1544 return entry[6], entry[5]
1487 else:
1545 else:
1488 return entry[5], entry[6]
1546 return entry[5], entry[6]
1489
1547
1490 # fast parentrevs(rev) where rev isn't filtered
1548 # fast parentrevs(rev) where rev isn't filtered
1491 _uncheckedparentrevs = parentrevs
1549 _uncheckedparentrevs = parentrevs
1492
1550
1493 def node(self, rev):
1551 def node(self, rev):
1494 try:
1552 try:
1495 return self.index[rev][7]
1553 return self.index[rev][7]
1496 except IndexError:
1554 except IndexError:
1497 if rev == wdirrev:
1555 if rev == wdirrev:
1498 raise error.WdirUnsupported
1556 raise error.WdirUnsupported
1499 raise
1557 raise
1500
1558
1501 # Derived from index values.
1559 # Derived from index values.
1502
1560
1503 def end(self, rev):
1561 def end(self, rev):
1504 return self.start(rev) + self.length(rev)
1562 return self.start(rev) + self.length(rev)
1505
1563
1506 def parents(self, node):
1564 def parents(self, node):
1507 i = self.index
1565 i = self.index
1508 d = i[self.rev(node)]
1566 d = i[self.rev(node)]
1509 # inline node() to avoid function call overhead
1567 # inline node() to avoid function call overhead
1510 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1568 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1511 return i[d[6]][7], i[d[5]][7]
1569 return i[d[6]][7], i[d[5]][7]
1512 else:
1570 else:
1513 return i[d[5]][7], i[d[6]][7]
1571 return i[d[5]][7], i[d[6]][7]
1514
1572
1515 def chainlen(self, rev):
1573 def chainlen(self, rev):
1516 return self._chaininfo(rev)[0]
1574 return self._chaininfo(rev)[0]
1517
1575
1518 def _chaininfo(self, rev):
1576 def _chaininfo(self, rev):
1519 chaininfocache = self._chaininfocache
1577 chaininfocache = self._chaininfocache
1520 if rev in chaininfocache:
1578 if rev in chaininfocache:
1521 return chaininfocache[rev]
1579 return chaininfocache[rev]
1522 index = self.index
1580 index = self.index
1523 generaldelta = self.delta_config.general_delta
1581 generaldelta = self.delta_config.general_delta
1524 iterrev = rev
1582 iterrev = rev
1525 e = index[iterrev]
1583 e = index[iterrev]
1526 clen = 0
1584 clen = 0
1527 compresseddeltalen = 0
1585 compresseddeltalen = 0
1528 while iterrev != e[3]:
1586 while iterrev != e[3]:
1529 clen += 1
1587 clen += 1
1530 compresseddeltalen += e[1]
1588 compresseddeltalen += e[1]
1531 if generaldelta:
1589 if generaldelta:
1532 iterrev = e[3]
1590 iterrev = e[3]
1533 else:
1591 else:
1534 iterrev -= 1
1592 iterrev -= 1
1535 if iterrev in chaininfocache:
1593 if iterrev in chaininfocache:
1536 t = chaininfocache[iterrev]
1594 t = chaininfocache[iterrev]
1537 clen += t[0]
1595 clen += t[0]
1538 compresseddeltalen += t[1]
1596 compresseddeltalen += t[1]
1539 break
1597 break
1540 e = index[iterrev]
1598 e = index[iterrev]
1541 else:
1599 else:
1542 # Add text length of base since decompressing that also takes
1600 # Add text length of base since decompressing that also takes
1543 # work. For cache hits the length is already included.
1601 # work. For cache hits the length is already included.
1544 compresseddeltalen += e[1]
1602 compresseddeltalen += e[1]
1545 r = (clen, compresseddeltalen)
1603 r = (clen, compresseddeltalen)
1546 chaininfocache[rev] = r
1604 chaininfocache[rev] = r
1547 return r
1605 return r
1548
1606
1549 def _deltachain(self, rev, stoprev=None):
1607 def _deltachain(self, rev, stoprev=None):
1550 """Obtain the delta chain for a revision.
1608 """Obtain the delta chain for a revision.
1551
1609
1552 ``stoprev`` specifies a revision to stop at. If not specified, we
1610 ``stoprev`` specifies a revision to stop at. If not specified, we
1553 stop at the base of the chain.
1611 stop at the base of the chain.
1554
1612
1555 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1613 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1556 revs in ascending order and ``stopped`` is a bool indicating whether
1614 revs in ascending order and ``stopped`` is a bool indicating whether
1557 ``stoprev`` was hit.
1615 ``stoprev`` was hit.
1558 """
1616 """
1559 generaldelta = self.delta_config.general_delta
1617 generaldelta = self.delta_config.general_delta
1560 # Try C implementation.
1618 # Try C implementation.
1561 try:
1619 try:
1562 return self.index.deltachain(rev, stoprev, generaldelta)
1620 return self.index.deltachain(rev, stoprev, generaldelta)
1563 except AttributeError:
1621 except AttributeError:
1564 pass
1622 pass
1565
1623
1566 chain = []
1624 chain = []
1567
1625
1568 # Alias to prevent attribute lookup in tight loop.
1626 # Alias to prevent attribute lookup in tight loop.
1569 index = self.index
1627 index = self.index
1570
1628
1571 iterrev = rev
1629 iterrev = rev
1572 e = index[iterrev]
1630 e = index[iterrev]
1573 while iterrev != e[3] and iterrev != stoprev:
1631 while iterrev != e[3] and iterrev != stoprev:
1574 chain.append(iterrev)
1632 chain.append(iterrev)
1575 if generaldelta:
1633 if generaldelta:
1576 iterrev = e[3]
1634 iterrev = e[3]
1577 else:
1635 else:
1578 iterrev -= 1
1636 iterrev -= 1
1579 e = index[iterrev]
1637 e = index[iterrev]
1580
1638
1581 if iterrev == stoprev:
1639 if iterrev == stoprev:
1582 stopped = True
1640 stopped = True
1583 else:
1641 else:
1584 chain.append(iterrev)
1642 chain.append(iterrev)
1585 stopped = False
1643 stopped = False
1586
1644
1587 chain.reverse()
1645 chain.reverse()
1588 return chain, stopped
1646 return chain, stopped
1589
1647
1590 def ancestors(self, revs, stoprev=0, inclusive=False):
1648 def ancestors(self, revs, stoprev=0, inclusive=False):
1591 """Generate the ancestors of 'revs' in reverse revision order.
1649 """Generate the ancestors of 'revs' in reverse revision order.
1592 Does not generate revs lower than stoprev.
1650 Does not generate revs lower than stoprev.
1593
1651
1594 See the documentation for ancestor.lazyancestors for more details."""
1652 See the documentation for ancestor.lazyancestors for more details."""
1595
1653
1596 # first, make sure start revisions aren't filtered
1654 # first, make sure start revisions aren't filtered
1597 revs = list(revs)
1655 revs = list(revs)
1598 checkrev = self.node
1656 checkrev = self.node
1599 for r in revs:
1657 for r in revs:
1600 checkrev(r)
1658 checkrev(r)
1601 # and we're sure ancestors aren't filtered as well
1659 # and we're sure ancestors aren't filtered as well
1602
1660
1603 if rustancestor is not None and self.index.rust_ext_compat:
1661 if rustancestor is not None and self.index.rust_ext_compat:
1604 lazyancestors = rustancestor.LazyAncestors
1662 lazyancestors = rustancestor.LazyAncestors
1605 arg = self.index
1663 arg = self.index
1606 else:
1664 else:
1607 lazyancestors = ancestor.lazyancestors
1665 lazyancestors = ancestor.lazyancestors
1608 arg = self._uncheckedparentrevs
1666 arg = self._uncheckedparentrevs
1609 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1667 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1610
1668
1611 def descendants(self, revs):
1669 def descendants(self, revs):
1612 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1670 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1613
1671
1614 def findcommonmissing(self, common=None, heads=None):
1672 def findcommonmissing(self, common=None, heads=None):
1615 """Return a tuple of the ancestors of common and the ancestors of heads
1673 """Return a tuple of the ancestors of common and the ancestors of heads
1616 that are not ancestors of common. In revset terminology, we return the
1674 that are not ancestors of common. In revset terminology, we return the
1617 tuple:
1675 tuple:
1618
1676
1619 ::common, (::heads) - (::common)
1677 ::common, (::heads) - (::common)
1620
1678
1621 The list is sorted by revision number, meaning it is
1679 The list is sorted by revision number, meaning it is
1622 topologically sorted.
1680 topologically sorted.
1623
1681
1624 'heads' and 'common' are both lists of node IDs. If heads is
1682 'heads' and 'common' are both lists of node IDs. If heads is
1625 not supplied, uses all of the revlog's heads. If common is not
1683 not supplied, uses all of the revlog's heads. If common is not
1626 supplied, uses nullid."""
1684 supplied, uses nullid."""
1627 if common is None:
1685 if common is None:
1628 common = [self.nullid]
1686 common = [self.nullid]
1629 if heads is None:
1687 if heads is None:
1630 heads = self.heads()
1688 heads = self.heads()
1631
1689
1632 common = [self.rev(n) for n in common]
1690 common = [self.rev(n) for n in common]
1633 heads = [self.rev(n) for n in heads]
1691 heads = [self.rev(n) for n in heads]
1634
1692
1635 # we want the ancestors, but inclusive
1693 # we want the ancestors, but inclusive
1636 class lazyset:
1694 class lazyset:
1637 def __init__(self, lazyvalues):
1695 def __init__(self, lazyvalues):
1638 self.addedvalues = set()
1696 self.addedvalues = set()
1639 self.lazyvalues = lazyvalues
1697 self.lazyvalues = lazyvalues
1640
1698
1641 def __contains__(self, value):
1699 def __contains__(self, value):
1642 return value in self.addedvalues or value in self.lazyvalues
1700 return value in self.addedvalues or value in self.lazyvalues
1643
1701
1644 def __iter__(self):
1702 def __iter__(self):
1645 added = self.addedvalues
1703 added = self.addedvalues
1646 for r in added:
1704 for r in added:
1647 yield r
1705 yield r
1648 for r in self.lazyvalues:
1706 for r in self.lazyvalues:
1649 if not r in added:
1707 if not r in added:
1650 yield r
1708 yield r
1651
1709
1652 def add(self, value):
1710 def add(self, value):
1653 self.addedvalues.add(value)
1711 self.addedvalues.add(value)
1654
1712
1655 def update(self, values):
1713 def update(self, values):
1656 self.addedvalues.update(values)
1714 self.addedvalues.update(values)
1657
1715
1658 has = lazyset(self.ancestors(common))
1716 has = lazyset(self.ancestors(common))
1659 has.add(nullrev)
1717 has.add(nullrev)
1660 has.update(common)
1718 has.update(common)
1661
1719
1662 # take all ancestors from heads that aren't in has
1720 # take all ancestors from heads that aren't in has
1663 missing = set()
1721 missing = set()
1664 visit = collections.deque(r for r in heads if r not in has)
1722 visit = collections.deque(r for r in heads if r not in has)
1665 while visit:
1723 while visit:
1666 r = visit.popleft()
1724 r = visit.popleft()
1667 if r in missing:
1725 if r in missing:
1668 continue
1726 continue
1669 else:
1727 else:
1670 missing.add(r)
1728 missing.add(r)
1671 for p in self.parentrevs(r):
1729 for p in self.parentrevs(r):
1672 if p not in has:
1730 if p not in has:
1673 visit.append(p)
1731 visit.append(p)
1674 missing = list(missing)
1732 missing = list(missing)
1675 missing.sort()
1733 missing.sort()
1676 return has, [self.node(miss) for miss in missing]
1734 return has, [self.node(miss) for miss in missing]
1677
1735
1678 def incrementalmissingrevs(self, common=None):
1736 def incrementalmissingrevs(self, common=None):
1679 """Return an object that can be used to incrementally compute the
1737 """Return an object that can be used to incrementally compute the
1680 revision numbers of the ancestors of arbitrary sets that are not
1738 revision numbers of the ancestors of arbitrary sets that are not
1681 ancestors of common. This is an ancestor.incrementalmissingancestors
1739 ancestors of common. This is an ancestor.incrementalmissingancestors
1682 object.
1740 object.
1683
1741
1684 'common' is a list of revision numbers. If common is not supplied, uses
1742 'common' is a list of revision numbers. If common is not supplied, uses
1685 nullrev.
1743 nullrev.
1686 """
1744 """
1687 if common is None:
1745 if common is None:
1688 common = [nullrev]
1746 common = [nullrev]
1689
1747
1690 if rustancestor is not None and self.index.rust_ext_compat:
1748 if rustancestor is not None and self.index.rust_ext_compat:
1691 return rustancestor.MissingAncestors(self.index, common)
1749 return rustancestor.MissingAncestors(self.index, common)
1692 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1750 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1693
1751
1694 def findmissingrevs(self, common=None, heads=None):
1752 def findmissingrevs(self, common=None, heads=None):
1695 """Return the revision numbers of the ancestors of heads that
1753 """Return the revision numbers of the ancestors of heads that
1696 are not ancestors of common.
1754 are not ancestors of common.
1697
1755
1698 More specifically, return a list of revision numbers corresponding to
1756 More specifically, return a list of revision numbers corresponding to
1699 nodes N such that every N satisfies the following constraints:
1757 nodes N such that every N satisfies the following constraints:
1700
1758
1701 1. N is an ancestor of some node in 'heads'
1759 1. N is an ancestor of some node in 'heads'
1702 2. N is not an ancestor of any node in 'common'
1760 2. N is not an ancestor of any node in 'common'
1703
1761
1704 The list is sorted by revision number, meaning it is
1762 The list is sorted by revision number, meaning it is
1705 topologically sorted.
1763 topologically sorted.
1706
1764
1707 'heads' and 'common' are both lists of revision numbers. If heads is
1765 'heads' and 'common' are both lists of revision numbers. If heads is
1708 not supplied, uses all of the revlog's heads. If common is not
1766 not supplied, uses all of the revlog's heads. If common is not
1709 supplied, uses nullid."""
1767 supplied, uses nullid."""
1710 if common is None:
1768 if common is None:
1711 common = [nullrev]
1769 common = [nullrev]
1712 if heads is None:
1770 if heads is None:
1713 heads = self.headrevs()
1771 heads = self.headrevs()
1714
1772
1715 inc = self.incrementalmissingrevs(common=common)
1773 inc = self.incrementalmissingrevs(common=common)
1716 return inc.missingancestors(heads)
1774 return inc.missingancestors(heads)
1717
1775
1718 def findmissing(self, common=None, heads=None):
1776 def findmissing(self, common=None, heads=None):
1719 """Return the ancestors of heads that are not ancestors of common.
1777 """Return the ancestors of heads that are not ancestors of common.
1720
1778
1721 More specifically, return a list of nodes N such that every N
1779 More specifically, return a list of nodes N such that every N
1722 satisfies the following constraints:
1780 satisfies the following constraints:
1723
1781
1724 1. N is an ancestor of some node in 'heads'
1782 1. N is an ancestor of some node in 'heads'
1725 2. N is not an ancestor of any node in 'common'
1783 2. N is not an ancestor of any node in 'common'
1726
1784
1727 The list is sorted by revision number, meaning it is
1785 The list is sorted by revision number, meaning it is
1728 topologically sorted.
1786 topologically sorted.
1729
1787
1730 'heads' and 'common' are both lists of node IDs. If heads is
1788 'heads' and 'common' are both lists of node IDs. If heads is
1731 not supplied, uses all of the revlog's heads. If common is not
1789 not supplied, uses all of the revlog's heads. If common is not
1732 supplied, uses nullid."""
1790 supplied, uses nullid."""
1733 if common is None:
1791 if common is None:
1734 common = [self.nullid]
1792 common = [self.nullid]
1735 if heads is None:
1793 if heads is None:
1736 heads = self.heads()
1794 heads = self.heads()
1737
1795
1738 common = [self.rev(n) for n in common]
1796 common = [self.rev(n) for n in common]
1739 heads = [self.rev(n) for n in heads]
1797 heads = [self.rev(n) for n in heads]
1740
1798
1741 inc = self.incrementalmissingrevs(common=common)
1799 inc = self.incrementalmissingrevs(common=common)
1742 return [self.node(r) for r in inc.missingancestors(heads)]
1800 return [self.node(r) for r in inc.missingancestors(heads)]
1743
1801
1744 def nodesbetween(self, roots=None, heads=None):
1802 def nodesbetween(self, roots=None, heads=None):
1745 """Return a topological path from 'roots' to 'heads'.
1803 """Return a topological path from 'roots' to 'heads'.
1746
1804
1747 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1805 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1748 topologically sorted list of all nodes N that satisfy both of
1806 topologically sorted list of all nodes N that satisfy both of
1749 these constraints:
1807 these constraints:
1750
1808
1751 1. N is a descendant of some node in 'roots'
1809 1. N is a descendant of some node in 'roots'
1752 2. N is an ancestor of some node in 'heads'
1810 2. N is an ancestor of some node in 'heads'
1753
1811
1754 Every node is considered to be both a descendant and an ancestor
1812 Every node is considered to be both a descendant and an ancestor
1755 of itself, so every reachable node in 'roots' and 'heads' will be
1813 of itself, so every reachable node in 'roots' and 'heads' will be
1756 included in 'nodes'.
1814 included in 'nodes'.
1757
1815
1758 'outroots' is the list of reachable nodes in 'roots', i.e., the
1816 'outroots' is the list of reachable nodes in 'roots', i.e., the
1759 subset of 'roots' that is returned in 'nodes'. Likewise,
1817 subset of 'roots' that is returned in 'nodes'. Likewise,
1760 'outheads' is the subset of 'heads' that is also in 'nodes'.
1818 'outheads' is the subset of 'heads' that is also in 'nodes'.
1761
1819
1762 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1820 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1763 unspecified, uses nullid as the only root. If 'heads' is
1821 unspecified, uses nullid as the only root. If 'heads' is
1764 unspecified, uses list of all of the revlog's heads."""
1822 unspecified, uses list of all of the revlog's heads."""
1765 nonodes = ([], [], [])
1823 nonodes = ([], [], [])
1766 if roots is not None:
1824 if roots is not None:
1767 roots = list(roots)
1825 roots = list(roots)
1768 if not roots:
1826 if not roots:
1769 return nonodes
1827 return nonodes
1770 lowestrev = min([self.rev(n) for n in roots])
1828 lowestrev = min([self.rev(n) for n in roots])
1771 else:
1829 else:
1772 roots = [self.nullid] # Everybody's a descendant of nullid
1830 roots = [self.nullid] # Everybody's a descendant of nullid
1773 lowestrev = nullrev
1831 lowestrev = nullrev
1774 if (lowestrev == nullrev) and (heads is None):
1832 if (lowestrev == nullrev) and (heads is None):
1775 # We want _all_ the nodes!
1833 # We want _all_ the nodes!
1776 return (
1834 return (
1777 [self.node(r) for r in self],
1835 [self.node(r) for r in self],
1778 [self.nullid],
1836 [self.nullid],
1779 list(self.heads()),
1837 list(self.heads()),
1780 )
1838 )
1781 if heads is None:
1839 if heads is None:
1782 # All nodes are ancestors, so the latest ancestor is the last
1840 # All nodes are ancestors, so the latest ancestor is the last
1783 # node.
1841 # node.
1784 highestrev = len(self) - 1
1842 highestrev = len(self) - 1
1785 # Set ancestors to None to signal that every node is an ancestor.
1843 # Set ancestors to None to signal that every node is an ancestor.
1786 ancestors = None
1844 ancestors = None
1787 # Set heads to an empty dictionary for later discovery of heads
1845 # Set heads to an empty dictionary for later discovery of heads
1788 heads = {}
1846 heads = {}
1789 else:
1847 else:
1790 heads = list(heads)
1848 heads = list(heads)
1791 if not heads:
1849 if not heads:
1792 return nonodes
1850 return nonodes
1793 ancestors = set()
1851 ancestors = set()
1794 # Turn heads into a dictionary so we can remove 'fake' heads.
1852 # Turn heads into a dictionary so we can remove 'fake' heads.
1795 # Also, later we will be using it to filter out the heads we can't
1853 # Also, later we will be using it to filter out the heads we can't
1796 # find from roots.
1854 # find from roots.
1797 heads = dict.fromkeys(heads, False)
1855 heads = dict.fromkeys(heads, False)
1798 # Start at the top and keep marking parents until we're done.
1856 # Start at the top and keep marking parents until we're done.
1799 nodestotag = set(heads)
1857 nodestotag = set(heads)
1800 # Remember where the top was so we can use it as a limit later.
1858 # Remember where the top was so we can use it as a limit later.
1801 highestrev = max([self.rev(n) for n in nodestotag])
1859 highestrev = max([self.rev(n) for n in nodestotag])
1802 while nodestotag:
1860 while nodestotag:
1803 # grab a node to tag
1861 # grab a node to tag
1804 n = nodestotag.pop()
1862 n = nodestotag.pop()
1805 # Never tag nullid
1863 # Never tag nullid
1806 if n == self.nullid:
1864 if n == self.nullid:
1807 continue
1865 continue
1808 # A node's revision number represents its place in a
1866 # A node's revision number represents its place in a
1809 # topologically sorted list of nodes.
1867 # topologically sorted list of nodes.
1810 r = self.rev(n)
1868 r = self.rev(n)
1811 if r >= lowestrev:
1869 if r >= lowestrev:
1812 if n not in ancestors:
1870 if n not in ancestors:
1813 # If we are possibly a descendant of one of the roots
1871 # If we are possibly a descendant of one of the roots
1814 # and we haven't already been marked as an ancestor
1872 # and we haven't already been marked as an ancestor
1815 ancestors.add(n) # Mark as ancestor
1873 ancestors.add(n) # Mark as ancestor
1816 # Add non-nullid parents to list of nodes to tag.
1874 # Add non-nullid parents to list of nodes to tag.
1817 nodestotag.update(
1875 nodestotag.update(
1818 [p for p in self.parents(n) if p != self.nullid]
1876 [p for p in self.parents(n) if p != self.nullid]
1819 )
1877 )
1820 elif n in heads: # We've seen it before, is it a fake head?
1878 elif n in heads: # We've seen it before, is it a fake head?
1821 # So it is, real heads should not be the ancestors of
1879 # So it is, real heads should not be the ancestors of
1822 # any other heads.
1880 # any other heads.
1823 heads.pop(n)
1881 heads.pop(n)
1824 if not ancestors:
1882 if not ancestors:
1825 return nonodes
1883 return nonodes
1826 # Now that we have our set of ancestors, we want to remove any
1884 # Now that we have our set of ancestors, we want to remove any
1827 # roots that are not ancestors.
1885 # roots that are not ancestors.
1828
1886
1829 # If one of the roots was nullid, everything is included anyway.
1887 # If one of the roots was nullid, everything is included anyway.
1830 if lowestrev > nullrev:
1888 if lowestrev > nullrev:
1831 # But, since we weren't, let's recompute the lowest rev to not
1889 # But, since we weren't, let's recompute the lowest rev to not
1832 # include roots that aren't ancestors.
1890 # include roots that aren't ancestors.
1833
1891
1834 # Filter out roots that aren't ancestors of heads
1892 # Filter out roots that aren't ancestors of heads
1835 roots = [root for root in roots if root in ancestors]
1893 roots = [root for root in roots if root in ancestors]
1836 # Recompute the lowest revision
1894 # Recompute the lowest revision
1837 if roots:
1895 if roots:
1838 lowestrev = min([self.rev(root) for root in roots])
1896 lowestrev = min([self.rev(root) for root in roots])
1839 else:
1897 else:
1840 # No more roots? Return empty list
1898 # No more roots? Return empty list
1841 return nonodes
1899 return nonodes
1842 else:
1900 else:
1843 # We are descending from nullid, and don't need to care about
1901 # We are descending from nullid, and don't need to care about
1844 # any other roots.
1902 # any other roots.
1845 lowestrev = nullrev
1903 lowestrev = nullrev
1846 roots = [self.nullid]
1904 roots = [self.nullid]
1847 # Transform our roots list into a set.
1905 # Transform our roots list into a set.
1848 descendants = set(roots)
1906 descendants = set(roots)
1849 # Also, keep the original roots so we can filter out roots that aren't
1907 # Also, keep the original roots so we can filter out roots that aren't
1850 # 'real' roots (i.e. are descended from other roots).
1908 # 'real' roots (i.e. are descended from other roots).
1851 roots = descendants.copy()
1909 roots = descendants.copy()
1852 # Our topologically sorted list of output nodes.
1910 # Our topologically sorted list of output nodes.
1853 orderedout = []
1911 orderedout = []
1854 # Don't start at nullid since we don't want nullid in our output list,
1912 # Don't start at nullid since we don't want nullid in our output list,
1855 # and if nullid shows up in descendants, empty parents will look like
1913 # and if nullid shows up in descendants, empty parents will look like
1856 # they're descendants.
1914 # they're descendants.
1857 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1915 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1858 n = self.node(r)
1916 n = self.node(r)
1859 isdescendant = False
1917 isdescendant = False
1860 if lowestrev == nullrev: # Everybody is a descendant of nullid
1918 if lowestrev == nullrev: # Everybody is a descendant of nullid
1861 isdescendant = True
1919 isdescendant = True
1862 elif n in descendants:
1920 elif n in descendants:
1863 # n is already a descendant
1921 # n is already a descendant
1864 isdescendant = True
1922 isdescendant = True
1865 # This check only needs to be done here because all the roots
1923 # This check only needs to be done here because all the roots
1866 # will start being marked is descendants before the loop.
1924 # will start being marked is descendants before the loop.
1867 if n in roots:
1925 if n in roots:
1868 # If n was a root, check if it's a 'real' root.
1926 # If n was a root, check if it's a 'real' root.
1869 p = tuple(self.parents(n))
1927 p = tuple(self.parents(n))
1870 # If any of its parents are descendants, it's not a root.
1928 # If any of its parents are descendants, it's not a root.
1871 if (p[0] in descendants) or (p[1] in descendants):
1929 if (p[0] in descendants) or (p[1] in descendants):
1872 roots.remove(n)
1930 roots.remove(n)
1873 else:
1931 else:
1874 p = tuple(self.parents(n))
1932 p = tuple(self.parents(n))
1875 # A node is a descendant if either of its parents are
1933 # A node is a descendant if either of its parents are
1876 # descendants. (We seeded the dependents list with the roots
1934 # descendants. (We seeded the dependents list with the roots
1877 # up there, remember?)
1935 # up there, remember?)
1878 if (p[0] in descendants) or (p[1] in descendants):
1936 if (p[0] in descendants) or (p[1] in descendants):
1879 descendants.add(n)
1937 descendants.add(n)
1880 isdescendant = True
1938 isdescendant = True
1881 if isdescendant and ((ancestors is None) or (n in ancestors)):
1939 if isdescendant and ((ancestors is None) or (n in ancestors)):
1882 # Only include nodes that are both descendants and ancestors.
1940 # Only include nodes that are both descendants and ancestors.
1883 orderedout.append(n)
1941 orderedout.append(n)
1884 if (ancestors is not None) and (n in heads):
1942 if (ancestors is not None) and (n in heads):
1885 # We're trying to figure out which heads are reachable
1943 # We're trying to figure out which heads are reachable
1886 # from roots.
1944 # from roots.
1887 # Mark this head as having been reached
1945 # Mark this head as having been reached
1888 heads[n] = True
1946 heads[n] = True
1889 elif ancestors is None:
1947 elif ancestors is None:
1890 # Otherwise, we're trying to discover the heads.
1948 # Otherwise, we're trying to discover the heads.
1891 # Assume this is a head because if it isn't, the next step
1949 # Assume this is a head because if it isn't, the next step
1892 # will eventually remove it.
1950 # will eventually remove it.
1893 heads[n] = True
1951 heads[n] = True
1894 # But, obviously its parents aren't.
1952 # But, obviously its parents aren't.
1895 for p in self.parents(n):
1953 for p in self.parents(n):
1896 heads.pop(p, None)
1954 heads.pop(p, None)
1897 heads = [head for head, flag in heads.items() if flag]
1955 heads = [head for head, flag in heads.items() if flag]
1898 roots = list(roots)
1956 roots = list(roots)
1899 assert orderedout
1957 assert orderedout
1900 assert roots
1958 assert roots
1901 assert heads
1959 assert heads
1902 return (orderedout, roots, heads)
1960 return (orderedout, roots, heads)
1903
1961
1904 def headrevs(self, revs=None):
1962 def headrevs(self, revs=None):
1905 if revs is None:
1963 if revs is None:
1906 try:
1964 try:
1907 return self.index.headrevs()
1965 return self.index.headrevs()
1908 except AttributeError:
1966 except AttributeError:
1909 return self._headrevs()
1967 return self._headrevs()
1910 if rustdagop is not None and self.index.rust_ext_compat:
1968 if rustdagop is not None and self.index.rust_ext_compat:
1911 return rustdagop.headrevs(self.index, revs)
1969 return rustdagop.headrevs(self.index, revs)
1912 return dagop.headrevs(revs, self._uncheckedparentrevs)
1970 return dagop.headrevs(revs, self._uncheckedparentrevs)
1913
1971
1914 def computephases(self, roots):
1972 def computephases(self, roots):
1915 return self.index.computephasesmapsets(roots)
1973 return self.index.computephasesmapsets(roots)
1916
1974
1917 def _headrevs(self):
1975 def _headrevs(self):
1918 count = len(self)
1976 count = len(self)
1919 if not count:
1977 if not count:
1920 return [nullrev]
1978 return [nullrev]
1921 # we won't iter over filtered rev so nobody is a head at start
1979 # we won't iter over filtered rev so nobody is a head at start
1922 ishead = [0] * (count + 1)
1980 ishead = [0] * (count + 1)
1923 index = self.index
1981 index = self.index
1924 for r in self:
1982 for r in self:
1925 ishead[r] = 1 # I may be an head
1983 ishead[r] = 1 # I may be an head
1926 e = index[r]
1984 e = index[r]
1927 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1985 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1928 return [r for r, val in enumerate(ishead) if val]
1986 return [r for r, val in enumerate(ishead) if val]
1929
1987
1930 def heads(self, start=None, stop=None):
1988 def heads(self, start=None, stop=None):
1931 """return the list of all nodes that have no children
1989 """return the list of all nodes that have no children
1932
1990
1933 if start is specified, only heads that are descendants of
1991 if start is specified, only heads that are descendants of
1934 start will be returned
1992 start will be returned
1935 if stop is specified, it will consider all the revs from stop
1993 if stop is specified, it will consider all the revs from stop
1936 as if they had no children
1994 as if they had no children
1937 """
1995 """
1938 if start is None and stop is None:
1996 if start is None and stop is None:
1939 if not len(self):
1997 if not len(self):
1940 return [self.nullid]
1998 return [self.nullid]
1941 return [self.node(r) for r in self.headrevs()]
1999 return [self.node(r) for r in self.headrevs()]
1942
2000
1943 if start is None:
2001 if start is None:
1944 start = nullrev
2002 start = nullrev
1945 else:
2003 else:
1946 start = self.rev(start)
2004 start = self.rev(start)
1947
2005
1948 stoprevs = {self.rev(n) for n in stop or []}
2006 stoprevs = {self.rev(n) for n in stop or []}
1949
2007
1950 revs = dagop.headrevssubset(
2008 revs = dagop.headrevssubset(
1951 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2009 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1952 )
2010 )
1953
2011
1954 return [self.node(rev) for rev in revs]
2012 return [self.node(rev) for rev in revs]
1955
2013
1956 def children(self, node):
2014 def children(self, node):
1957 """find the children of a given node"""
2015 """find the children of a given node"""
1958 c = []
2016 c = []
1959 p = self.rev(node)
2017 p = self.rev(node)
1960 for r in self.revs(start=p + 1):
2018 for r in self.revs(start=p + 1):
1961 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2019 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1962 if prevs:
2020 if prevs:
1963 for pr in prevs:
2021 for pr in prevs:
1964 if pr == p:
2022 if pr == p:
1965 c.append(self.node(r))
2023 c.append(self.node(r))
1966 elif p == nullrev:
2024 elif p == nullrev:
1967 c.append(self.node(r))
2025 c.append(self.node(r))
1968 return c
2026 return c
1969
2027
1970 def commonancestorsheads(self, a, b):
2028 def commonancestorsheads(self, a, b):
1971 """calculate all the heads of the common ancestors of nodes a and b"""
2029 """calculate all the heads of the common ancestors of nodes a and b"""
1972 a, b = self.rev(a), self.rev(b)
2030 a, b = self.rev(a), self.rev(b)
1973 ancs = self._commonancestorsheads(a, b)
2031 ancs = self._commonancestorsheads(a, b)
1974 return pycompat.maplist(self.node, ancs)
2032 return pycompat.maplist(self.node, ancs)
1975
2033
1976 def _commonancestorsheads(self, *revs):
2034 def _commonancestorsheads(self, *revs):
1977 """calculate all the heads of the common ancestors of revs"""
2035 """calculate all the heads of the common ancestors of revs"""
1978 try:
2036 try:
1979 ancs = self.index.commonancestorsheads(*revs)
2037 ancs = self.index.commonancestorsheads(*revs)
1980 except (AttributeError, OverflowError): # C implementation failed
2038 except (AttributeError, OverflowError): # C implementation failed
1981 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2039 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1982 return ancs
2040 return ancs
1983
2041
1984 def isancestor(self, a, b):
2042 def isancestor(self, a, b):
1985 """return True if node a is an ancestor of node b
2043 """return True if node a is an ancestor of node b
1986
2044
1987 A revision is considered an ancestor of itself."""
2045 A revision is considered an ancestor of itself."""
1988 a, b = self.rev(a), self.rev(b)
2046 a, b = self.rev(a), self.rev(b)
1989 return self.isancestorrev(a, b)
2047 return self.isancestorrev(a, b)
1990
2048
1991 def isancestorrev(self, a, b):
2049 def isancestorrev(self, a, b):
1992 """return True if revision a is an ancestor of revision b
2050 """return True if revision a is an ancestor of revision b
1993
2051
1994 A revision is considered an ancestor of itself.
2052 A revision is considered an ancestor of itself.
1995
2053
1996 The implementation of this is trivial but the use of
2054 The implementation of this is trivial but the use of
1997 reachableroots is not."""
2055 reachableroots is not."""
1998 if a == nullrev:
2056 if a == nullrev:
1999 return True
2057 return True
2000 elif a == b:
2058 elif a == b:
2001 return True
2059 return True
2002 elif a > b:
2060 elif a > b:
2003 return False
2061 return False
2004 return bool(self.reachableroots(a, [b], [a], includepath=False))
2062 return bool(self.reachableroots(a, [b], [a], includepath=False))
2005
2063
2006 def reachableroots(self, minroot, heads, roots, includepath=False):
2064 def reachableroots(self, minroot, heads, roots, includepath=False):
2007 """return (heads(::(<roots> and <roots>::<heads>)))
2065 """return (heads(::(<roots> and <roots>::<heads>)))
2008
2066
2009 If includepath is True, return (<roots>::<heads>)."""
2067 If includepath is True, return (<roots>::<heads>)."""
2010 try:
2068 try:
2011 return self.index.reachableroots2(
2069 return self.index.reachableroots2(
2012 minroot, heads, roots, includepath
2070 minroot, heads, roots, includepath
2013 )
2071 )
2014 except AttributeError:
2072 except AttributeError:
2015 return dagop._reachablerootspure(
2073 return dagop._reachablerootspure(
2016 self.parentrevs, minroot, roots, heads, includepath
2074 self.parentrevs, minroot, roots, heads, includepath
2017 )
2075 )
2018
2076
2019 def ancestor(self, a, b):
2077 def ancestor(self, a, b):
2020 """calculate the "best" common ancestor of nodes a and b"""
2078 """calculate the "best" common ancestor of nodes a and b"""
2021
2079
2022 a, b = self.rev(a), self.rev(b)
2080 a, b = self.rev(a), self.rev(b)
2023 try:
2081 try:
2024 ancs = self.index.ancestors(a, b)
2082 ancs = self.index.ancestors(a, b)
2025 except (AttributeError, OverflowError):
2083 except (AttributeError, OverflowError):
2026 ancs = ancestor.ancestors(self.parentrevs, a, b)
2084 ancs = ancestor.ancestors(self.parentrevs, a, b)
2027 if ancs:
2085 if ancs:
2028 # choose a consistent winner when there's a tie
2086 # choose a consistent winner when there's a tie
2029 return min(map(self.node, ancs))
2087 return min(map(self.node, ancs))
2030 return self.nullid
2088 return self.nullid
2031
2089
2032 def _match(self, id):
2090 def _match(self, id):
2033 if isinstance(id, int):
2091 if isinstance(id, int):
2034 # rev
2092 # rev
2035 return self.node(id)
2093 return self.node(id)
2036 if len(id) == self.nodeconstants.nodelen:
2094 if len(id) == self.nodeconstants.nodelen:
2037 # possibly a binary node
2095 # possibly a binary node
2038 # odds of a binary node being all hex in ASCII are 1 in 10**25
2096 # odds of a binary node being all hex in ASCII are 1 in 10**25
2039 try:
2097 try:
2040 node = id
2098 node = id
2041 self.rev(node) # quick search the index
2099 self.rev(node) # quick search the index
2042 return node
2100 return node
2043 except error.LookupError:
2101 except error.LookupError:
2044 pass # may be partial hex id
2102 pass # may be partial hex id
2045 try:
2103 try:
2046 # str(rev)
2104 # str(rev)
2047 rev = int(id)
2105 rev = int(id)
2048 if b"%d" % rev != id:
2106 if b"%d" % rev != id:
2049 raise ValueError
2107 raise ValueError
2050 if rev < 0:
2108 if rev < 0:
2051 rev = len(self) + rev
2109 rev = len(self) + rev
2052 if rev < 0 or rev >= len(self):
2110 if rev < 0 or rev >= len(self):
2053 raise ValueError
2111 raise ValueError
2054 return self.node(rev)
2112 return self.node(rev)
2055 except (ValueError, OverflowError):
2113 except (ValueError, OverflowError):
2056 pass
2114 pass
2057 if len(id) == 2 * self.nodeconstants.nodelen:
2115 if len(id) == 2 * self.nodeconstants.nodelen:
2058 try:
2116 try:
2059 # a full hex nodeid?
2117 # a full hex nodeid?
2060 node = bin(id)
2118 node = bin(id)
2061 self.rev(node)
2119 self.rev(node)
2062 return node
2120 return node
2063 except (binascii.Error, error.LookupError):
2121 except (binascii.Error, error.LookupError):
2064 pass
2122 pass
2065
2123
2066 def _partialmatch(self, id):
2124 def _partialmatch(self, id):
2067 # we don't care wdirfilenodeids as they should be always full hash
2125 # we don't care wdirfilenodeids as they should be always full hash
2068 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2126 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2069 ambiguous = False
2127 ambiguous = False
2070 try:
2128 try:
2071 partial = self.index.partialmatch(id)
2129 partial = self.index.partialmatch(id)
2072 if partial and self.hasnode(partial):
2130 if partial and self.hasnode(partial):
2073 if maybewdir:
2131 if maybewdir:
2074 # single 'ff...' match in radix tree, ambiguous with wdir
2132 # single 'ff...' match in radix tree, ambiguous with wdir
2075 ambiguous = True
2133 ambiguous = True
2076 else:
2134 else:
2077 return partial
2135 return partial
2078 elif maybewdir:
2136 elif maybewdir:
2079 # no 'ff...' match in radix tree, wdir identified
2137 # no 'ff...' match in radix tree, wdir identified
2080 raise error.WdirUnsupported
2138 raise error.WdirUnsupported
2081 else:
2139 else:
2082 return None
2140 return None
2083 except error.RevlogError:
2141 except error.RevlogError:
2084 # parsers.c radix tree lookup gave multiple matches
2142 # parsers.c radix tree lookup gave multiple matches
2085 # fast path: for unfiltered changelog, radix tree is accurate
2143 # fast path: for unfiltered changelog, radix tree is accurate
2086 if not getattr(self, 'filteredrevs', None):
2144 if not getattr(self, 'filteredrevs', None):
2087 ambiguous = True
2145 ambiguous = True
2088 # fall through to slow path that filters hidden revisions
2146 # fall through to slow path that filters hidden revisions
2089 except (AttributeError, ValueError):
2147 except (AttributeError, ValueError):
2090 # we are pure python, or key is not hex
2148 # we are pure python, or key is not hex
2091 pass
2149 pass
2092 if ambiguous:
2150 if ambiguous:
2093 raise error.AmbiguousPrefixLookupError(
2151 raise error.AmbiguousPrefixLookupError(
2094 id, self.display_id, _(b'ambiguous identifier')
2152 id, self.display_id, _(b'ambiguous identifier')
2095 )
2153 )
2096
2154
2097 if id in self._pcache:
2155 if id in self._pcache:
2098 return self._pcache[id]
2156 return self._pcache[id]
2099
2157
2100 if len(id) <= 40:
2158 if len(id) <= 40:
2101 # hex(node)[:...]
2159 # hex(node)[:...]
2102 l = len(id) // 2 * 2 # grab an even number of digits
2160 l = len(id) // 2 * 2 # grab an even number of digits
2103 try:
2161 try:
2104 # we're dropping the last digit, so let's check that it's hex,
2162 # we're dropping the last digit, so let's check that it's hex,
2105 # to avoid the expensive computation below if it's not
2163 # to avoid the expensive computation below if it's not
2106 if len(id) % 2 > 0:
2164 if len(id) % 2 > 0:
2107 if not (id[-1] in hexdigits):
2165 if not (id[-1] in hexdigits):
2108 return None
2166 return None
2109 prefix = bin(id[:l])
2167 prefix = bin(id[:l])
2110 except binascii.Error:
2168 except binascii.Error:
2111 pass
2169 pass
2112 else:
2170 else:
2113 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2171 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2114 nl = [
2172 nl = [
2115 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2173 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2116 ]
2174 ]
2117 if self.nodeconstants.nullhex.startswith(id):
2175 if self.nodeconstants.nullhex.startswith(id):
2118 nl.append(self.nullid)
2176 nl.append(self.nullid)
2119 if len(nl) > 0:
2177 if len(nl) > 0:
2120 if len(nl) == 1 and not maybewdir:
2178 if len(nl) == 1 and not maybewdir:
2121 self._pcache[id] = nl[0]
2179 self._pcache[id] = nl[0]
2122 return nl[0]
2180 return nl[0]
2123 raise error.AmbiguousPrefixLookupError(
2181 raise error.AmbiguousPrefixLookupError(
2124 id, self.display_id, _(b'ambiguous identifier')
2182 id, self.display_id, _(b'ambiguous identifier')
2125 )
2183 )
2126 if maybewdir:
2184 if maybewdir:
2127 raise error.WdirUnsupported
2185 raise error.WdirUnsupported
2128 return None
2186 return None
2129
2187
2130 def lookup(self, id):
2188 def lookup(self, id):
2131 """locate a node based on:
2189 """locate a node based on:
2132 - revision number or str(revision number)
2190 - revision number or str(revision number)
2133 - nodeid or subset of hex nodeid
2191 - nodeid or subset of hex nodeid
2134 """
2192 """
2135 n = self._match(id)
2193 n = self._match(id)
2136 if n is not None:
2194 if n is not None:
2137 return n
2195 return n
2138 n = self._partialmatch(id)
2196 n = self._partialmatch(id)
2139 if n:
2197 if n:
2140 return n
2198 return n
2141
2199
2142 raise error.LookupError(id, self.display_id, _(b'no match found'))
2200 raise error.LookupError(id, self.display_id, _(b'no match found'))
2143
2201
2144 def shortest(self, node, minlength=1):
2202 def shortest(self, node, minlength=1):
2145 """Find the shortest unambiguous prefix that matches node."""
2203 """Find the shortest unambiguous prefix that matches node."""
2146
2204
2147 def isvalid(prefix):
2205 def isvalid(prefix):
2148 try:
2206 try:
2149 matchednode = self._partialmatch(prefix)
2207 matchednode = self._partialmatch(prefix)
2150 except error.AmbiguousPrefixLookupError:
2208 except error.AmbiguousPrefixLookupError:
2151 return False
2209 return False
2152 except error.WdirUnsupported:
2210 except error.WdirUnsupported:
2153 # single 'ff...' match
2211 # single 'ff...' match
2154 return True
2212 return True
2155 if matchednode is None:
2213 if matchednode is None:
2156 raise error.LookupError(node, self.display_id, _(b'no node'))
2214 raise error.LookupError(node, self.display_id, _(b'no node'))
2157 return True
2215 return True
2158
2216
2159 def maybewdir(prefix):
2217 def maybewdir(prefix):
2160 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2218 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2161
2219
2162 hexnode = hex(node)
2220 hexnode = hex(node)
2163
2221
2164 def disambiguate(hexnode, minlength):
2222 def disambiguate(hexnode, minlength):
2165 """Disambiguate against wdirid."""
2223 """Disambiguate against wdirid."""
2166 for length in range(minlength, len(hexnode) + 1):
2224 for length in range(minlength, len(hexnode) + 1):
2167 prefix = hexnode[:length]
2225 prefix = hexnode[:length]
2168 if not maybewdir(prefix):
2226 if not maybewdir(prefix):
2169 return prefix
2227 return prefix
2170
2228
2171 if not getattr(self, 'filteredrevs', None):
2229 if not getattr(self, 'filteredrevs', None):
2172 try:
2230 try:
2173 length = max(self.index.shortest(node), minlength)
2231 length = max(self.index.shortest(node), minlength)
2174 return disambiguate(hexnode, length)
2232 return disambiguate(hexnode, length)
2175 except error.RevlogError:
2233 except error.RevlogError:
2176 if node != self.nodeconstants.wdirid:
2234 if node != self.nodeconstants.wdirid:
2177 raise error.LookupError(
2235 raise error.LookupError(
2178 node, self.display_id, _(b'no node')
2236 node, self.display_id, _(b'no node')
2179 )
2237 )
2180 except AttributeError:
2238 except AttributeError:
2181 # Fall through to pure code
2239 # Fall through to pure code
2182 pass
2240 pass
2183
2241
2184 if node == self.nodeconstants.wdirid:
2242 if node == self.nodeconstants.wdirid:
2185 for length in range(minlength, len(hexnode) + 1):
2243 for length in range(minlength, len(hexnode) + 1):
2186 prefix = hexnode[:length]
2244 prefix = hexnode[:length]
2187 if isvalid(prefix):
2245 if isvalid(prefix):
2188 return prefix
2246 return prefix
2189
2247
2190 for length in range(minlength, len(hexnode) + 1):
2248 for length in range(minlength, len(hexnode) + 1):
2191 prefix = hexnode[:length]
2249 prefix = hexnode[:length]
2192 if isvalid(prefix):
2250 if isvalid(prefix):
2193 return disambiguate(hexnode, length)
2251 return disambiguate(hexnode, length)
2194
2252
2195 def cmp(self, node, text):
2253 def cmp(self, node, text):
2196 """compare text with a given file revision
2254 """compare text with a given file revision
2197
2255
2198 returns True if text is different than what is stored.
2256 returns True if text is different than what is stored.
2199 """
2257 """
2200 p1, p2 = self.parents(node)
2258 p1, p2 = self.parents(node)
2201 return storageutil.hashrevisionsha1(text, p1, p2) != node
2259 return storageutil.hashrevisionsha1(text, p1, p2) != node
2202
2260
2203 def _chunk(self, rev):
2261 def _chunk(self, rev):
2204 """Obtain a single decompressed chunk for a revision.
2262 """Obtain a single decompressed chunk for a revision.
2205
2263
2206 Accepts an integer revision and an optional already-open file handle
2264 Accepts an integer revision and an optional already-open file handle
2207 to be used for reading. If used, the seek position of the file will not
2265 to be used for reading. If used, the seek position of the file will not
2208 be preserved.
2266 be preserved.
2209
2267
2210 Returns a str holding uncompressed data for the requested revision.
2268 Returns a str holding uncompressed data for the requested revision.
2211 """
2269 """
2212 compression_mode = self.index[rev][10]
2270 compression_mode = self.index[rev][10]
2213 data = self._inner.get_segment_for_revs(rev, rev)[1]
2271 data = self._inner.get_segment_for_revs(rev, rev)[1]
2214 if compression_mode == COMP_MODE_PLAIN:
2272 if compression_mode == COMP_MODE_PLAIN:
2215 return data
2273 return data
2216 elif compression_mode == COMP_MODE_DEFAULT:
2274 elif compression_mode == COMP_MODE_DEFAULT:
2217 return self._decompressor(data)
2275 return self._decompressor(data)
2218 elif compression_mode == COMP_MODE_INLINE:
2276 elif compression_mode == COMP_MODE_INLINE:
2219 return self.decompress(data)
2277 return self.decompress(data)
2220 else:
2278 else:
2221 msg = b'unknown compression mode %d'
2279 msg = b'unknown compression mode %d'
2222 msg %= compression_mode
2280 msg %= compression_mode
2223 raise error.RevlogError(msg)
2281 raise error.RevlogError(msg)
2224
2282
2225 def _chunks(self, revs, targetsize=None):
2283 def _chunks(self, revs, targetsize=None):
2226 """Obtain decompressed chunks for the specified revisions.
2284 """Obtain decompressed chunks for the specified revisions.
2227
2285
2228 Accepts an iterable of numeric revisions that are assumed to be in
2286 Accepts an iterable of numeric revisions that are assumed to be in
2229 ascending order. Also accepts an optional already-open file handle
2287 ascending order. Also accepts an optional already-open file handle
2230 to be used for reading. If used, the seek position of the file will
2288 to be used for reading. If used, the seek position of the file will
2231 not be preserved.
2289 not be preserved.
2232
2290
2233 This function is similar to calling ``self._chunk()`` multiple times,
2291 This function is similar to calling ``self._chunk()`` multiple times,
2234 but is faster.
2292 but is faster.
2235
2293
2236 Returns a list with decompressed data for each requested revision.
2294 Returns a list with decompressed data for each requested revision.
2237 """
2295 """
2238 if not revs:
2296 if not revs:
2239 return []
2297 return []
2240 start = self.start
2298 start = self.start
2241 length = self.length
2299 length = self.length
2242 inline = self._inline
2300 inline = self._inline
2243 iosize = self.index.entry_size
2301 iosize = self.index.entry_size
2244 buffer = util.buffer
2302 buffer = util.buffer
2245
2303
2246 l = []
2304 l = []
2247 ladd = l.append
2305 ladd = l.append
2248
2306
2249 if not self.data_config.with_sparse_read:
2307 if not self.data_config.with_sparse_read:
2250 slicedchunks = (revs,)
2308 slicedchunks = (revs,)
2251 else:
2309 else:
2252 slicedchunks = deltautil.slicechunk(
2310 slicedchunks = deltautil.slicechunk(
2253 self, revs, targetsize=targetsize
2311 self, revs, targetsize=targetsize
2254 )
2312 )
2255
2313
2256 for revschunk in slicedchunks:
2314 for revschunk in slicedchunks:
2257 firstrev = revschunk[0]
2315 firstrev = revschunk[0]
2258 # Skip trailing revisions with empty diff
2316 # Skip trailing revisions with empty diff
2259 for lastrev in revschunk[::-1]:
2317 for lastrev in revschunk[::-1]:
2260 if length(lastrev) != 0:
2318 if length(lastrev) != 0:
2261 break
2319 break
2262
2320
2263 try:
2321 try:
2264 offset, data = self._inner.get_segment_for_revs(
2322 offset, data = self._inner.get_segment_for_revs(
2265 firstrev,
2323 firstrev,
2266 lastrev,
2324 lastrev,
2267 )
2325 )
2268 except OverflowError:
2326 except OverflowError:
2269 # issue4215 - we can't cache a run of chunks greater than
2327 # issue4215 - we can't cache a run of chunks greater than
2270 # 2G on Windows
2328 # 2G on Windows
2271 return [self._chunk(rev) for rev in revschunk]
2329 return [self._chunk(rev) for rev in revschunk]
2272
2330
2273 decomp = self.decompress
2331 decomp = self.decompress
2274 # self._decompressor might be None, but will not be used in that case
2332 # self._decompressor might be None, but will not be used in that case
2275 def_decomp = self._decompressor
2333 def_decomp = self._decompressor
2276 for rev in revschunk:
2334 for rev in revschunk:
2277 chunkstart = start(rev)
2335 chunkstart = start(rev)
2278 if inline:
2336 if inline:
2279 chunkstart += (rev + 1) * iosize
2337 chunkstart += (rev + 1) * iosize
2280 chunklength = length(rev)
2338 chunklength = length(rev)
2281 comp_mode = self.index[rev][10]
2339 comp_mode = self.index[rev][10]
2282 c = buffer(data, chunkstart - offset, chunklength)
2340 c = buffer(data, chunkstart - offset, chunklength)
2283 if comp_mode == COMP_MODE_PLAIN:
2341 if comp_mode == COMP_MODE_PLAIN:
2284 ladd(c)
2342 ladd(c)
2285 elif comp_mode == COMP_MODE_INLINE:
2343 elif comp_mode == COMP_MODE_INLINE:
2286 ladd(decomp(c))
2344 ladd(decomp(c))
2287 elif comp_mode == COMP_MODE_DEFAULT:
2345 elif comp_mode == COMP_MODE_DEFAULT:
2288 ladd(def_decomp(c))
2346 ladd(def_decomp(c))
2289 else:
2347 else:
2290 msg = b'unknown compression mode %d'
2348 msg = b'unknown compression mode %d'
2291 msg %= comp_mode
2349 msg %= comp_mode
2292 raise error.RevlogError(msg)
2350 raise error.RevlogError(msg)
2293
2351
2294 return l
2352 return l
2295
2353
2296 def deltaparent(self, rev):
2354 def deltaparent(self, rev):
2297 """return deltaparent of the given revision"""
2355 """return deltaparent of the given revision"""
2298 base = self.index[rev][3]
2356 base = self.index[rev][3]
2299 if base == rev:
2357 if base == rev:
2300 return nullrev
2358 return nullrev
2301 elif self.delta_config.general_delta:
2359 elif self.delta_config.general_delta:
2302 return base
2360 return base
2303 else:
2361 else:
2304 return rev - 1
2362 return rev - 1
2305
2363
2306 def issnapshot(self, rev):
2364 def issnapshot(self, rev):
2307 """tells whether rev is a snapshot"""
2365 """tells whether rev is a snapshot"""
2308 if not self.delta_config.sparse_revlog:
2366 if not self.delta_config.sparse_revlog:
2309 return self.deltaparent(rev) == nullrev
2367 return self.deltaparent(rev) == nullrev
2310 elif hasattr(self.index, 'issnapshot'):
2368 elif hasattr(self.index, 'issnapshot'):
2311 # directly assign the method to cache the testing and access
2369 # directly assign the method to cache the testing and access
2312 self.issnapshot = self.index.issnapshot
2370 self.issnapshot = self.index.issnapshot
2313 return self.issnapshot(rev)
2371 return self.issnapshot(rev)
2314 if rev == nullrev:
2372 if rev == nullrev:
2315 return True
2373 return True
2316 entry = self.index[rev]
2374 entry = self.index[rev]
2317 base = entry[3]
2375 base = entry[3]
2318 if base == rev:
2376 if base == rev:
2319 return True
2377 return True
2320 if base == nullrev:
2378 if base == nullrev:
2321 return True
2379 return True
2322 p1 = entry[5]
2380 p1 = entry[5]
2323 while self.length(p1) == 0:
2381 while self.length(p1) == 0:
2324 b = self.deltaparent(p1)
2382 b = self.deltaparent(p1)
2325 if b == p1:
2383 if b == p1:
2326 break
2384 break
2327 p1 = b
2385 p1 = b
2328 p2 = entry[6]
2386 p2 = entry[6]
2329 while self.length(p2) == 0:
2387 while self.length(p2) == 0:
2330 b = self.deltaparent(p2)
2388 b = self.deltaparent(p2)
2331 if b == p2:
2389 if b == p2:
2332 break
2390 break
2333 p2 = b
2391 p2 = b
2334 if base == p1 or base == p2:
2392 if base == p1 or base == p2:
2335 return False
2393 return False
2336 return self.issnapshot(base)
2394 return self.issnapshot(base)
2337
2395
2338 def snapshotdepth(self, rev):
2396 def snapshotdepth(self, rev):
2339 """number of snapshot in the chain before this one"""
2397 """number of snapshot in the chain before this one"""
2340 if not self.issnapshot(rev):
2398 if not self.issnapshot(rev):
2341 raise error.ProgrammingError(b'revision %d not a snapshot')
2399 raise error.ProgrammingError(b'revision %d not a snapshot')
2342 return len(self._deltachain(rev)[0]) - 1
2400 return len(self._deltachain(rev)[0]) - 1
2343
2401
2344 def revdiff(self, rev1, rev2):
2402 def revdiff(self, rev1, rev2):
2345 """return or calculate a delta between two revisions
2403 """return or calculate a delta between two revisions
2346
2404
2347 The delta calculated is in binary form and is intended to be written to
2405 The delta calculated is in binary form and is intended to be written to
2348 revlog data directly. So this function needs raw revision data.
2406 revlog data directly. So this function needs raw revision data.
2349 """
2407 """
2350 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2408 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2351 return bytes(self._chunk(rev2))
2409 return bytes(self._chunk(rev2))
2352
2410
2353 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2411 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2354
2412
2355 def revision(self, nodeorrev):
2413 def revision(self, nodeorrev):
2356 """return an uncompressed revision of a given node or revision
2414 """return an uncompressed revision of a given node or revision
2357 number.
2415 number.
2358 """
2416 """
2359 return self._revisiondata(nodeorrev)
2417 return self._revisiondata(nodeorrev)
2360
2418
2361 def sidedata(self, nodeorrev):
2419 def sidedata(self, nodeorrev):
2362 """a map of extra data related to the changeset but not part of the hash
2420 """a map of extra data related to the changeset but not part of the hash
2363
2421
2364 This function currently return a dictionary. However, more advanced
2422 This function currently return a dictionary. However, more advanced
2365 mapping object will likely be used in the future for a more
2423 mapping object will likely be used in the future for a more
2366 efficient/lazy code.
2424 efficient/lazy code.
2367 """
2425 """
2368 # deal with <nodeorrev> argument type
2426 # deal with <nodeorrev> argument type
2369 if isinstance(nodeorrev, int):
2427 if isinstance(nodeorrev, int):
2370 rev = nodeorrev
2428 rev = nodeorrev
2371 else:
2429 else:
2372 rev = self.rev(nodeorrev)
2430 rev = self.rev(nodeorrev)
2373 return self._sidedata(rev)
2431 return self._sidedata(rev)
2374
2432
2375 def _revisiondata(self, nodeorrev, raw=False):
2433 def _revisiondata(self, nodeorrev, raw=False):
2376 # deal with <nodeorrev> argument type
2434 # deal with <nodeorrev> argument type
2377 if isinstance(nodeorrev, int):
2435 if isinstance(nodeorrev, int):
2378 rev = nodeorrev
2436 rev = nodeorrev
2379 node = self.node(rev)
2437 node = self.node(rev)
2380 else:
2438 else:
2381 node = nodeorrev
2439 node = nodeorrev
2382 rev = None
2440 rev = None
2383
2441
2384 # fast path the special `nullid` rev
2442 # fast path the special `nullid` rev
2385 if node == self.nullid:
2443 if node == self.nullid:
2386 return b""
2444 return b""
2387
2445
2388 # ``rawtext`` is the text as stored inside the revlog. Might be the
2446 # ``rawtext`` is the text as stored inside the revlog. Might be the
2389 # revision or might need to be processed to retrieve the revision.
2447 # revision or might need to be processed to retrieve the revision.
2390 rev, rawtext, validated = self._rawtext(node, rev)
2448 rev, rawtext, validated = self._rawtext(node, rev)
2391
2449
2392 if raw and validated:
2450 if raw and validated:
2393 # if we don't want to process the raw text and that raw
2451 # if we don't want to process the raw text and that raw
2394 # text is cached, we can exit early.
2452 # text is cached, we can exit early.
2395 return rawtext
2453 return rawtext
2396 if rev is None:
2454 if rev is None:
2397 rev = self.rev(node)
2455 rev = self.rev(node)
2398 # the revlog's flag for this revision
2456 # the revlog's flag for this revision
2399 # (usually alter its state or content)
2457 # (usually alter its state or content)
2400 flags = self.flags(rev)
2458 flags = self.flags(rev)
2401
2459
2402 if validated and flags == REVIDX_DEFAULT_FLAGS:
2460 if validated and flags == REVIDX_DEFAULT_FLAGS:
2403 # no extra flags set, no flag processor runs, text = rawtext
2461 # no extra flags set, no flag processor runs, text = rawtext
2404 return rawtext
2462 return rawtext
2405
2463
2406 if raw:
2464 if raw:
2407 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2465 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2408 text = rawtext
2466 text = rawtext
2409 else:
2467 else:
2410 r = flagutil.processflagsread(self, rawtext, flags)
2468 r = flagutil.processflagsread(self, rawtext, flags)
2411 text, validatehash = r
2469 text, validatehash = r
2412 if validatehash:
2470 if validatehash:
2413 self.checkhash(text, node, rev=rev)
2471 self.checkhash(text, node, rev=rev)
2414 if not validated:
2472 if not validated:
2415 self._revisioncache = (node, rev, rawtext)
2473 self._revisioncache = (node, rev, rawtext)
2416
2474
2417 return text
2475 return text
2418
2476
2419 def _rawtext(self, node, rev):
2477 def _rawtext(self, node, rev):
2420 """return the possibly unvalidated rawtext for a revision
2478 """return the possibly unvalidated rawtext for a revision
2421
2479
2422 returns (rev, rawtext, validated)
2480 returns (rev, rawtext, validated)
2423 """
2481 """
2424
2482
2425 # revision in the cache (could be useful to apply delta)
2483 # revision in the cache (could be useful to apply delta)
2426 cachedrev = None
2484 cachedrev = None
2427 # An intermediate text to apply deltas to
2485 # An intermediate text to apply deltas to
2428 basetext = None
2486 basetext = None
2429
2487
2430 # Check if we have the entry in cache
2488 # Check if we have the entry in cache
2431 # The cache entry looks like (node, rev, rawtext)
2489 # The cache entry looks like (node, rev, rawtext)
2432 if self._revisioncache:
2490 if self._revisioncache:
2433 if self._revisioncache[0] == node:
2491 if self._revisioncache[0] == node:
2434 return (rev, self._revisioncache[2], True)
2492 return (rev, self._revisioncache[2], True)
2435 cachedrev = self._revisioncache[1]
2493 cachedrev = self._revisioncache[1]
2436
2494
2437 if rev is None:
2495 if rev is None:
2438 rev = self.rev(node)
2496 rev = self.rev(node)
2439
2497
2440 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2498 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2441 if stopped:
2499 if stopped:
2442 basetext = self._revisioncache[2]
2500 basetext = self._revisioncache[2]
2443
2501
2444 # drop cache to save memory, the caller is expected to
2502 # drop cache to save memory, the caller is expected to
2445 # update self._revisioncache after validating the text
2503 # update self._revisioncache after validating the text
2446 self._revisioncache = None
2504 self._revisioncache = None
2447
2505
2448 targetsize = None
2506 targetsize = None
2449 rawsize = self.index[rev][2]
2507 rawsize = self.index[rev][2]
2450 if 0 <= rawsize:
2508 if 0 <= rawsize:
2451 targetsize = 4 * rawsize
2509 targetsize = 4 * rawsize
2452
2510
2453 bins = self._chunks(chain, targetsize=targetsize)
2511 bins = self._chunks(chain, targetsize=targetsize)
2454 if basetext is None:
2512 if basetext is None:
2455 basetext = bytes(bins[0])
2513 basetext = bytes(bins[0])
2456 bins = bins[1:]
2514 bins = bins[1:]
2457
2515
2458 rawtext = mdiff.patches(basetext, bins)
2516 rawtext = mdiff.patches(basetext, bins)
2459 del basetext # let us have a chance to free memory early
2517 del basetext # let us have a chance to free memory early
2460 return (rev, rawtext, False)
2518 return (rev, rawtext, False)
2461
2519
2462 def _sidedata(self, rev):
2520 def _sidedata(self, rev):
2463 """Return the sidedata for a given revision number."""
2521 """Return the sidedata for a given revision number."""
2464 index_entry = self.index[rev]
2522 index_entry = self.index[rev]
2465 sidedata_offset = index_entry[8]
2523 sidedata_offset = index_entry[8]
2466 sidedata_size = index_entry[9]
2524 sidedata_size = index_entry[9]
2467
2525
2468 if self._inline:
2526 if self._inline:
2469 sidedata_offset += self.index.entry_size * (1 + rev)
2527 sidedata_offset += self.index.entry_size * (1 + rev)
2470 if sidedata_size == 0:
2528 if sidedata_size == 0:
2471 return {}
2529 return {}
2472
2530
2473 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2531 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2474 filename = self._sidedatafile
2532 filename = self._sidedatafile
2475 end = self._docket.sidedata_end
2533 end = self._docket.sidedata_end
2476 offset = sidedata_offset
2534 offset = sidedata_offset
2477 length = sidedata_size
2535 length = sidedata_size
2478 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2536 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2479 raise error.RevlogError(m)
2537 raise error.RevlogError(m)
2480
2538
2481 comp_segment = self._inner._segmentfile_sidedata.read_chunk(
2539 comp_segment = self._inner._segmentfile_sidedata.read_chunk(
2482 sidedata_offset, sidedata_size
2540 sidedata_offset, sidedata_size
2483 )
2541 )
2484
2542
2485 comp = self.index[rev][11]
2543 comp = self.index[rev][11]
2486 if comp == COMP_MODE_PLAIN:
2544 if comp == COMP_MODE_PLAIN:
2487 segment = comp_segment
2545 segment = comp_segment
2488 elif comp == COMP_MODE_DEFAULT:
2546 elif comp == COMP_MODE_DEFAULT:
2489 segment = self._decompressor(comp_segment)
2547 segment = self._decompressor(comp_segment)
2490 elif comp == COMP_MODE_INLINE:
2548 elif comp == COMP_MODE_INLINE:
2491 segment = self.decompress(comp_segment)
2549 segment = self.decompress(comp_segment)
2492 else:
2550 else:
2493 msg = b'unknown compression mode %d'
2551 msg = b'unknown compression mode %d'
2494 msg %= comp
2552 msg %= comp
2495 raise error.RevlogError(msg)
2553 raise error.RevlogError(msg)
2496
2554
2497 sidedata = sidedatautil.deserialize_sidedata(segment)
2555 sidedata = sidedatautil.deserialize_sidedata(segment)
2498 return sidedata
2556 return sidedata
2499
2557
2500 def rawdata(self, nodeorrev):
2558 def rawdata(self, nodeorrev):
2501 """return an uncompressed raw data of a given node or revision number."""
2559 """return an uncompressed raw data of a given node or revision number."""
2502 return self._revisiondata(nodeorrev, raw=True)
2560 return self._revisiondata(nodeorrev, raw=True)
2503
2561
2504 def hash(self, text, p1, p2):
2562 def hash(self, text, p1, p2):
2505 """Compute a node hash.
2563 """Compute a node hash.
2506
2564
2507 Available as a function so that subclasses can replace the hash
2565 Available as a function so that subclasses can replace the hash
2508 as needed.
2566 as needed.
2509 """
2567 """
2510 return storageutil.hashrevisionsha1(text, p1, p2)
2568 return storageutil.hashrevisionsha1(text, p1, p2)
2511
2569
2512 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2570 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2513 """Check node hash integrity.
2571 """Check node hash integrity.
2514
2572
2515 Available as a function so that subclasses can extend hash mismatch
2573 Available as a function so that subclasses can extend hash mismatch
2516 behaviors as needed.
2574 behaviors as needed.
2517 """
2575 """
2518 try:
2576 try:
2519 if p1 is None and p2 is None:
2577 if p1 is None and p2 is None:
2520 p1, p2 = self.parents(node)
2578 p1, p2 = self.parents(node)
2521 if node != self.hash(text, p1, p2):
2579 if node != self.hash(text, p1, p2):
2522 # Clear the revision cache on hash failure. The revision cache
2580 # Clear the revision cache on hash failure. The revision cache
2523 # only stores the raw revision and clearing the cache does have
2581 # only stores the raw revision and clearing the cache does have
2524 # the side-effect that we won't have a cache hit when the raw
2582 # the side-effect that we won't have a cache hit when the raw
2525 # revision data is accessed. But this case should be rare and
2583 # revision data is accessed. But this case should be rare and
2526 # it is extra work to teach the cache about the hash
2584 # it is extra work to teach the cache about the hash
2527 # verification state.
2585 # verification state.
2528 if self._revisioncache and self._revisioncache[0] == node:
2586 if self._revisioncache and self._revisioncache[0] == node:
2529 self._revisioncache = None
2587 self._revisioncache = None
2530
2588
2531 revornode = rev
2589 revornode = rev
2532 if revornode is None:
2590 if revornode is None:
2533 revornode = templatefilters.short(hex(node))
2591 revornode = templatefilters.short(hex(node))
2534 raise error.RevlogError(
2592 raise error.RevlogError(
2535 _(b"integrity check failed on %s:%s")
2593 _(b"integrity check failed on %s:%s")
2536 % (self.display_id, pycompat.bytestr(revornode))
2594 % (self.display_id, pycompat.bytestr(revornode))
2537 )
2595 )
2538 except error.RevlogError:
2596 except error.RevlogError:
2539 if self.feature_config.censorable and storageutil.iscensoredtext(
2597 if self.feature_config.censorable and storageutil.iscensoredtext(
2540 text
2598 text
2541 ):
2599 ):
2542 raise error.CensoredNodeError(self.display_id, node, text)
2600 raise error.CensoredNodeError(self.display_id, node, text)
2543 raise
2601 raise
2544
2602
2545 @property
2603 @property
2546 def _split_index_file(self):
2604 def _split_index_file(self):
2547 """the path where to expect the index of an ongoing splitting operation
2605 """the path where to expect the index of an ongoing splitting operation
2548
2606
2549 The file will only exist if a splitting operation is in progress, but
2607 The file will only exist if a splitting operation is in progress, but
2550 it is always expected at the same location."""
2608 it is always expected at the same location."""
2551 parts = self.radix.split(b'/')
2609 parts = self.radix.split(b'/')
2552 if len(parts) > 1:
2610 if len(parts) > 1:
2553 # adds a '-s' prefix to the ``data/` or `meta/` base
2611 # adds a '-s' prefix to the ``data/` or `meta/` base
2554 head = parts[0] + b'-s'
2612 head = parts[0] + b'-s'
2555 mids = parts[1:-1]
2613 mids = parts[1:-1]
2556 tail = parts[-1] + b'.i'
2614 tail = parts[-1] + b'.i'
2557 pieces = [head] + mids + [tail]
2615 pieces = [head] + mids + [tail]
2558 return b'/'.join(pieces)
2616 return b'/'.join(pieces)
2559 else:
2617 else:
2560 # the revlog is stored at the root of the store (changelog or
2618 # the revlog is stored at the root of the store (changelog or
2561 # manifest), no risk of collision.
2619 # manifest), no risk of collision.
2562 return self.radix + b'.i.s'
2620 return self.radix + b'.i.s'
2563
2621
2564 def _enforceinlinesize(self, tr, side_write=True):
2622 def _enforceinlinesize(self, tr, side_write=True):
2565 """Check if the revlog is too big for inline and convert if so.
2623 """Check if the revlog is too big for inline and convert if so.
2566
2624
2567 This should be called after revisions are added to the revlog. If the
2625 This should be called after revisions are added to the revlog. If the
2568 revlog has grown too large to be an inline revlog, it will convert it
2626 revlog has grown too large to be an inline revlog, it will convert it
2569 to use multiple index and data files.
2627 to use multiple index and data files.
2570 """
2628 """
2571 tiprev = len(self) - 1
2629 tiprev = len(self) - 1
2572 total_size = self.start(tiprev) + self.length(tiprev)
2630 total_size = self.start(tiprev) + self.length(tiprev)
2573 if not self._inline or total_size < _maxinline:
2631 if not self._inline or total_size < _maxinline:
2574 return
2632 return
2575
2633
2576 if self._docket is not None:
2634 if self._docket is not None:
2577 msg = b"inline revlog should not have a docket"
2635 msg = b"inline revlog should not have a docket"
2578 raise error.ProgrammingError(msg)
2636 raise error.ProgrammingError(msg)
2579
2637
2580 troffset = tr.findoffset(self._indexfile)
2638 troffset = tr.findoffset(self._indexfile)
2581 if troffset is None:
2639 if troffset is None:
2582 raise error.RevlogError(
2640 raise error.RevlogError(
2583 _(b"%s not found in the transaction") % self._indexfile
2641 _(b"%s not found in the transaction") % self._indexfile
2584 )
2642 )
2585 if troffset:
2643 if troffset:
2586 tr.addbackup(self._indexfile, for_offset=True)
2644 tr.addbackup(self._indexfile, for_offset=True)
2587 tr.add(self._datafile, 0)
2645 tr.add(self._datafile, 0)
2588
2646
2589 existing_handles = False
2647 new_index_file_path = None
2590 if self._inner._writinghandles is not None:
2591 existing_handles = True
2592 fp = self._inner._writinghandles[0]
2593 fp.flush()
2594 fp.close()
2595 # We can't use the cached file handle after close(). So prevent
2596 # its usage.
2597 self._inner._writinghandles = None
2598 self._inner._segmentfile.writing_handle = None
2599 # No need to deal with sidedata writing handle as it is only
2600 # relevant with revlog-v2 which is never inline, not reaching
2601 # this code
2602 if side_write:
2648 if side_write:
2603 old_index_file_path = self._indexfile
2649 old_index_file_path = self._indexfile
2604 new_index_file_path = self._split_index_file
2650 new_index_file_path = self._split_index_file
2605 opener = self.opener
2651 opener = self.opener
2606 weak_self = weakref.ref(self)
2652 weak_self = weakref.ref(self)
2607
2653
2608 # the "split" index replace the real index when the transaction is finalized
2654 # the "split" index replace the real index when the transaction is
2655 # finalized
2609 def finalize_callback(tr):
2656 def finalize_callback(tr):
2610 opener.rename(
2657 opener.rename(
2611 new_index_file_path,
2658 new_index_file_path,
2612 old_index_file_path,
2659 old_index_file_path,
2613 checkambig=True,
2660 checkambig=True,
2614 )
2661 )
2615 maybe_self = weak_self()
2662 maybe_self = weak_self()
2616 if maybe_self is not None:
2663 if maybe_self is not None:
2617 maybe_self._indexfile = old_index_file_path
2664 maybe_self._indexfile = old_index_file_path
2618 maybe_self._inner.index_file = maybe_self._indexfile
2665 maybe_self._inner.index_file = maybe_self._indexfile
2619
2666
2620 def abort_callback(tr):
2667 def abort_callback(tr):
2621 maybe_self = weak_self()
2668 maybe_self = weak_self()
2622 if maybe_self is not None:
2669 if maybe_self is not None:
2623 maybe_self._indexfile = old_index_file_path
2670 maybe_self._indexfile = old_index_file_path
2671 maybe_self._inner.inline = True
2624 maybe_self._inner.index_file = old_index_file_path
2672 maybe_self._inner.index_file = old_index_file_path
2625
2673
2626 tr.registertmp(new_index_file_path)
2674 tr.registertmp(new_index_file_path)
2627 if self.target[1] is not None:
2675 if self.target[1] is not None:
2628 callback_id = b'000-revlog-split-%d-%s' % self.target
2676 callback_id = b'000-revlog-split-%d-%s' % self.target
2629 else:
2677 else:
2630 callback_id = b'000-revlog-split-%d' % self.target[0]
2678 callback_id = b'000-revlog-split-%d' % self.target[0]
2631 tr.addfinalize(callback_id, finalize_callback)
2679 tr.addfinalize(callback_id, finalize_callback)
2632 tr.addabort(callback_id, abort_callback)
2680 tr.addabort(callback_id, abort_callback)
2633
2681
2634 new_dfh = self._datafp(b'w+')
2682 self._format_flags &= ~FLAG_INLINE_DATA
2635 new_dfh.truncate(0) # drop any potentially existing data
2683 self._inner.split_inline(
2636 try:
2684 tr,
2637 with self.reading():
2685 self._format_flags | self._format_version,
2638 for r in self:
2686 new_index_file_path=new_index_file_path,
2639 new_dfh.write(self._inner.get_segment_for_revs(r, r)[1])
2687 )
2640 new_dfh.flush()
2688
2641
2689 self._inline = False
2642 if side_write:
2690 if new_index_file_path is not None:
2643 self._indexfile = new_index_file_path
2691 self._indexfile = new_index_file_path
2644 self._inner.index_file = self._indexfile
2692
2645 with self._inner._InnerRevlog__index_new_fp() as fp:
2693 nodemaputil.setup_persistent_nodemap(tr, self)
2646 self._format_flags &= ~FLAG_INLINE_DATA
2647 self._inline = False
2648 self._inner.inline = False
2649 for i in self:
2650 e = self.index.entry_binary(i)
2651 if i == 0:
2652 header = self._format_flags | self._format_version
2653 header = self.index.pack_header(header)
2654 e = header + e
2655 fp.write(e)
2656
2657 # If we don't use side-write, the temp file replace the real
2658 # index when we exit the context manager
2659
2660 nodemaputil.setup_persistent_nodemap(tr, self)
2661 self._inner._segmentfile = randomaccessfile.randomaccessfile(
2662 self.opener,
2663 self._datafile,
2664 self.data_config.chunk_cache_size,
2665 )
2666
2667 if existing_handles:
2668 # switched from inline to conventional reopen the index
2669 index_end = None
2670 ifh = self._inner._InnerRevlog__index_write_fp(
2671 index_end=index_end
2672 )
2673 self._inner._writinghandles = (ifh, new_dfh, None)
2674 self._inner._segmentfile.writing_handle = new_dfh
2675 new_dfh = None
2676 # No need to deal with sidedata writing handle as it is only
2677 # relevant with revlog-v2 which is never inline, not reaching
2678 # this code
2679 finally:
2680 if new_dfh is not None:
2681 new_dfh.close()
2682
2694
2683 def _nodeduplicatecallback(self, transaction, node):
2695 def _nodeduplicatecallback(self, transaction, node):
2684 """called when trying to add a node already stored."""
2696 """called when trying to add a node already stored."""
2685
2697
2686 @contextlib.contextmanager
2698 @contextlib.contextmanager
2687 def reading(self):
2699 def reading(self):
2688 with self._inner.reading():
2700 with self._inner.reading():
2689 yield
2701 yield
2690
2702
2691 @contextlib.contextmanager
2703 @contextlib.contextmanager
2692 def _writing(self, transaction):
2704 def _writing(self, transaction):
2693 if self._trypending:
2705 if self._trypending:
2694 msg = b'try to write in a `trypending` revlog: %s'
2706 msg = b'try to write in a `trypending` revlog: %s'
2695 msg %= self.display_id
2707 msg %= self.display_id
2696 raise error.ProgrammingError(msg)
2708 raise error.ProgrammingError(msg)
2697 if self._inner.is_writing:
2709 if self._inner.is_writing:
2698 yield
2710 yield
2699 else:
2711 else:
2700 data_end = None
2712 data_end = None
2701 sidedata_end = None
2713 sidedata_end = None
2702 if self._docket is not None:
2714 if self._docket is not None:
2703 data_end = self._docket.data_end
2715 data_end = self._docket.data_end
2704 sidedata_end = self._docket.sidedata_end
2716 sidedata_end = self._docket.sidedata_end
2705 with self._inner.writing(
2717 with self._inner.writing(
2706 transaction,
2718 transaction,
2707 data_end=data_end,
2719 data_end=data_end,
2708 sidedata_end=sidedata_end,
2720 sidedata_end=sidedata_end,
2709 ):
2721 ):
2710 yield
2722 yield
2711 if self._docket is not None:
2723 if self._docket is not None:
2712 self._write_docket(transaction)
2724 self._write_docket(transaction)
2713
2725
2714 def _write_docket(self, transaction):
2726 def _write_docket(self, transaction):
2715 """write the current docket on disk
2727 """write the current docket on disk
2716
2728
2717 Exist as a method to help changelog to implement transaction logic
2729 Exist as a method to help changelog to implement transaction logic
2718
2730
2719 We could also imagine using the same transaction logic for all revlog
2731 We could also imagine using the same transaction logic for all revlog
2720 since docket are cheap."""
2732 since docket are cheap."""
2721 self._docket.write(transaction)
2733 self._docket.write(transaction)
2722
2734
2723 def addrevision(
2735 def addrevision(
2724 self,
2736 self,
2725 text,
2737 text,
2726 transaction,
2738 transaction,
2727 link,
2739 link,
2728 p1,
2740 p1,
2729 p2,
2741 p2,
2730 cachedelta=None,
2742 cachedelta=None,
2731 node=None,
2743 node=None,
2732 flags=REVIDX_DEFAULT_FLAGS,
2744 flags=REVIDX_DEFAULT_FLAGS,
2733 deltacomputer=None,
2745 deltacomputer=None,
2734 sidedata=None,
2746 sidedata=None,
2735 ):
2747 ):
2736 """add a revision to the log
2748 """add a revision to the log
2737
2749
2738 text - the revision data to add
2750 text - the revision data to add
2739 transaction - the transaction object used for rollback
2751 transaction - the transaction object used for rollback
2740 link - the linkrev data to add
2752 link - the linkrev data to add
2741 p1, p2 - the parent nodeids of the revision
2753 p1, p2 - the parent nodeids of the revision
2742 cachedelta - an optional precomputed delta
2754 cachedelta - an optional precomputed delta
2743 node - nodeid of revision; typically node is not specified, and it is
2755 node - nodeid of revision; typically node is not specified, and it is
2744 computed by default as hash(text, p1, p2), however subclasses might
2756 computed by default as hash(text, p1, p2), however subclasses might
2745 use different hashing method (and override checkhash() in such case)
2757 use different hashing method (and override checkhash() in such case)
2746 flags - the known flags to set on the revision
2758 flags - the known flags to set on the revision
2747 deltacomputer - an optional deltacomputer instance shared between
2759 deltacomputer - an optional deltacomputer instance shared between
2748 multiple calls
2760 multiple calls
2749 """
2761 """
2750 if link == nullrev:
2762 if link == nullrev:
2751 raise error.RevlogError(
2763 raise error.RevlogError(
2752 _(b"attempted to add linkrev -1 to %s") % self.display_id
2764 _(b"attempted to add linkrev -1 to %s") % self.display_id
2753 )
2765 )
2754
2766
2755 if sidedata is None:
2767 if sidedata is None:
2756 sidedata = {}
2768 sidedata = {}
2757 elif sidedata and not self.feature_config.has_side_data:
2769 elif sidedata and not self.feature_config.has_side_data:
2758 raise error.ProgrammingError(
2770 raise error.ProgrammingError(
2759 _(b"trying to add sidedata to a revlog who don't support them")
2771 _(b"trying to add sidedata to a revlog who don't support them")
2760 )
2772 )
2761
2773
2762 if flags:
2774 if flags:
2763 node = node or self.hash(text, p1, p2)
2775 node = node or self.hash(text, p1, p2)
2764
2776
2765 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2777 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2766
2778
2767 # If the flag processor modifies the revision data, ignore any provided
2779 # If the flag processor modifies the revision data, ignore any provided
2768 # cachedelta.
2780 # cachedelta.
2769 if rawtext != text:
2781 if rawtext != text:
2770 cachedelta = None
2782 cachedelta = None
2771
2783
2772 if len(rawtext) > _maxentrysize:
2784 if len(rawtext) > _maxentrysize:
2773 raise error.RevlogError(
2785 raise error.RevlogError(
2774 _(
2786 _(
2775 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2787 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2776 )
2788 )
2777 % (self.display_id, len(rawtext))
2789 % (self.display_id, len(rawtext))
2778 )
2790 )
2779
2791
2780 node = node or self.hash(rawtext, p1, p2)
2792 node = node or self.hash(rawtext, p1, p2)
2781 rev = self.index.get_rev(node)
2793 rev = self.index.get_rev(node)
2782 if rev is not None:
2794 if rev is not None:
2783 return rev
2795 return rev
2784
2796
2785 if validatehash:
2797 if validatehash:
2786 self.checkhash(rawtext, node, p1=p1, p2=p2)
2798 self.checkhash(rawtext, node, p1=p1, p2=p2)
2787
2799
2788 return self.addrawrevision(
2800 return self.addrawrevision(
2789 rawtext,
2801 rawtext,
2790 transaction,
2802 transaction,
2791 link,
2803 link,
2792 p1,
2804 p1,
2793 p2,
2805 p2,
2794 node,
2806 node,
2795 flags,
2807 flags,
2796 cachedelta=cachedelta,
2808 cachedelta=cachedelta,
2797 deltacomputer=deltacomputer,
2809 deltacomputer=deltacomputer,
2798 sidedata=sidedata,
2810 sidedata=sidedata,
2799 )
2811 )
2800
2812
2801 def addrawrevision(
2813 def addrawrevision(
2802 self,
2814 self,
2803 rawtext,
2815 rawtext,
2804 transaction,
2816 transaction,
2805 link,
2817 link,
2806 p1,
2818 p1,
2807 p2,
2819 p2,
2808 node,
2820 node,
2809 flags,
2821 flags,
2810 cachedelta=None,
2822 cachedelta=None,
2811 deltacomputer=None,
2823 deltacomputer=None,
2812 sidedata=None,
2824 sidedata=None,
2813 ):
2825 ):
2814 """add a raw revision with known flags, node and parents
2826 """add a raw revision with known flags, node and parents
2815 useful when reusing a revision not stored in this revlog (ex: received
2827 useful when reusing a revision not stored in this revlog (ex: received
2816 over wire, or read from an external bundle).
2828 over wire, or read from an external bundle).
2817 """
2829 """
2818 with self._writing(transaction):
2830 with self._writing(transaction):
2819 return self._addrevision(
2831 return self._addrevision(
2820 node,
2832 node,
2821 rawtext,
2833 rawtext,
2822 transaction,
2834 transaction,
2823 link,
2835 link,
2824 p1,
2836 p1,
2825 p2,
2837 p2,
2826 flags,
2838 flags,
2827 cachedelta,
2839 cachedelta,
2828 deltacomputer=deltacomputer,
2840 deltacomputer=deltacomputer,
2829 sidedata=sidedata,
2841 sidedata=sidedata,
2830 )
2842 )
2831
2843
2832 def compress(self, data):
2844 def compress(self, data):
2833 """Generate a possibly-compressed representation of data."""
2845 """Generate a possibly-compressed representation of data."""
2834 if not data:
2846 if not data:
2835 return b'', data
2847 return b'', data
2836
2848
2837 compressed = self._compressor.compress(data)
2849 compressed = self._compressor.compress(data)
2838
2850
2839 if compressed:
2851 if compressed:
2840 # The revlog compressor added the header in the returned data.
2852 # The revlog compressor added the header in the returned data.
2841 return b'', compressed
2853 return b'', compressed
2842
2854
2843 if data[0:1] == b'\0':
2855 if data[0:1] == b'\0':
2844 return b'', data
2856 return b'', data
2845 return b'u', data
2857 return b'u', data
2846
2858
2847 def decompress(self, data):
2859 def decompress(self, data):
2848 """Decompress a revlog chunk.
2860 """Decompress a revlog chunk.
2849
2861
2850 The chunk is expected to begin with a header identifying the
2862 The chunk is expected to begin with a header identifying the
2851 format type so it can be routed to an appropriate decompressor.
2863 format type so it can be routed to an appropriate decompressor.
2852 """
2864 """
2853 if not data:
2865 if not data:
2854 return data
2866 return data
2855
2867
2856 # Revlogs are read much more frequently than they are written and many
2868 # Revlogs are read much more frequently than they are written and many
2857 # chunks only take microseconds to decompress, so performance is
2869 # chunks only take microseconds to decompress, so performance is
2858 # important here.
2870 # important here.
2859 #
2871 #
2860 # We can make a few assumptions about revlogs:
2872 # We can make a few assumptions about revlogs:
2861 #
2873 #
2862 # 1) the majority of chunks will be compressed (as opposed to inline
2874 # 1) the majority of chunks will be compressed (as opposed to inline
2863 # raw data).
2875 # raw data).
2864 # 2) decompressing *any* data will likely by at least 10x slower than
2876 # 2) decompressing *any* data will likely by at least 10x slower than
2865 # returning raw inline data.
2877 # returning raw inline data.
2866 # 3) we want to prioritize common and officially supported compression
2878 # 3) we want to prioritize common and officially supported compression
2867 # engines
2879 # engines
2868 #
2880 #
2869 # It follows that we want to optimize for "decompress compressed data
2881 # It follows that we want to optimize for "decompress compressed data
2870 # when encoded with common and officially supported compression engines"
2882 # when encoded with common and officially supported compression engines"
2871 # case over "raw data" and "data encoded by less common or non-official
2883 # case over "raw data" and "data encoded by less common or non-official
2872 # compression engines." That is why we have the inline lookup first
2884 # compression engines." That is why we have the inline lookup first
2873 # followed by the compengines lookup.
2885 # followed by the compengines lookup.
2874 #
2886 #
2875 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2887 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2876 # compressed chunks. And this matters for changelog and manifest reads.
2888 # compressed chunks. And this matters for changelog and manifest reads.
2877 t = data[0:1]
2889 t = data[0:1]
2878
2890
2879 if t == b'x':
2891 if t == b'x':
2880 try:
2892 try:
2881 return _zlibdecompress(data)
2893 return _zlibdecompress(data)
2882 except zlib.error as e:
2894 except zlib.error as e:
2883 raise error.RevlogError(
2895 raise error.RevlogError(
2884 _(b'revlog decompress error: %s')
2896 _(b'revlog decompress error: %s')
2885 % stringutil.forcebytestr(e)
2897 % stringutil.forcebytestr(e)
2886 )
2898 )
2887 # '\0' is more common than 'u' so it goes first.
2899 # '\0' is more common than 'u' so it goes first.
2888 elif t == b'\0':
2900 elif t == b'\0':
2889 return data
2901 return data
2890 elif t == b'u':
2902 elif t == b'u':
2891 return util.buffer(data, 1)
2903 return util.buffer(data, 1)
2892
2904
2893 compressor = self._get_decompressor(t)
2905 compressor = self._get_decompressor(t)
2894
2906
2895 return compressor.decompress(data)
2907 return compressor.decompress(data)
2896
2908
2897 def _addrevision(
2909 def _addrevision(
2898 self,
2910 self,
2899 node,
2911 node,
2900 rawtext,
2912 rawtext,
2901 transaction,
2913 transaction,
2902 link,
2914 link,
2903 p1,
2915 p1,
2904 p2,
2916 p2,
2905 flags,
2917 flags,
2906 cachedelta,
2918 cachedelta,
2907 alwayscache=False,
2919 alwayscache=False,
2908 deltacomputer=None,
2920 deltacomputer=None,
2909 sidedata=None,
2921 sidedata=None,
2910 ):
2922 ):
2911 """internal function to add revisions to the log
2923 """internal function to add revisions to the log
2912
2924
2913 see addrevision for argument descriptions.
2925 see addrevision for argument descriptions.
2914
2926
2915 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2927 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2916
2928
2917 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2929 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2918 be used.
2930 be used.
2919
2931
2920 invariants:
2932 invariants:
2921 - rawtext is optional (can be None); if not set, cachedelta must be set.
2933 - rawtext is optional (can be None); if not set, cachedelta must be set.
2922 if both are set, they must correspond to each other.
2934 if both are set, they must correspond to each other.
2923 """
2935 """
2924 if node == self.nullid:
2936 if node == self.nullid:
2925 raise error.RevlogError(
2937 raise error.RevlogError(
2926 _(b"%s: attempt to add null revision") % self.display_id
2938 _(b"%s: attempt to add null revision") % self.display_id
2927 )
2939 )
2928 if (
2940 if (
2929 node == self.nodeconstants.wdirid
2941 node == self.nodeconstants.wdirid
2930 or node in self.nodeconstants.wdirfilenodeids
2942 or node in self.nodeconstants.wdirfilenodeids
2931 ):
2943 ):
2932 raise error.RevlogError(
2944 raise error.RevlogError(
2933 _(b"%s: attempt to add wdir revision") % self.display_id
2945 _(b"%s: attempt to add wdir revision") % self.display_id
2934 )
2946 )
2935 if self._inner._writinghandles is None:
2947 if self._inner._writinghandles is None:
2936 msg = b'adding revision outside `revlog._writing` context'
2948 msg = b'adding revision outside `revlog._writing` context'
2937 raise error.ProgrammingError(msg)
2949 raise error.ProgrammingError(msg)
2938
2950
2939 btext = [rawtext]
2951 btext = [rawtext]
2940
2952
2941 curr = len(self)
2953 curr = len(self)
2942 prev = curr - 1
2954 prev = curr - 1
2943
2955
2944 offset = self._get_data_offset(prev)
2956 offset = self._get_data_offset(prev)
2945
2957
2946 if self._concurrencychecker:
2958 if self._concurrencychecker:
2947 ifh, dfh, sdfh = self._inner._writinghandles
2959 ifh, dfh, sdfh = self._inner._writinghandles
2948 # XXX no checking for the sidedata file
2960 # XXX no checking for the sidedata file
2949 if self._inline:
2961 if self._inline:
2950 # offset is "as if" it were in the .d file, so we need to add on
2962 # offset is "as if" it were in the .d file, so we need to add on
2951 # the size of the entry metadata.
2963 # the size of the entry metadata.
2952 self._concurrencychecker(
2964 self._concurrencychecker(
2953 ifh, self._indexfile, offset + curr * self.index.entry_size
2965 ifh, self._indexfile, offset + curr * self.index.entry_size
2954 )
2966 )
2955 else:
2967 else:
2956 # Entries in the .i are a consistent size.
2968 # Entries in the .i are a consistent size.
2957 self._concurrencychecker(
2969 self._concurrencychecker(
2958 ifh, self._indexfile, curr * self.index.entry_size
2970 ifh, self._indexfile, curr * self.index.entry_size
2959 )
2971 )
2960 self._concurrencychecker(dfh, self._datafile, offset)
2972 self._concurrencychecker(dfh, self._datafile, offset)
2961
2973
2962 p1r, p2r = self.rev(p1), self.rev(p2)
2974 p1r, p2r = self.rev(p1), self.rev(p2)
2963
2975
2964 # full versions are inserted when the needed deltas
2976 # full versions are inserted when the needed deltas
2965 # become comparable to the uncompressed text
2977 # become comparable to the uncompressed text
2966 if rawtext is None:
2978 if rawtext is None:
2967 # need rawtext size, before changed by flag processors, which is
2979 # need rawtext size, before changed by flag processors, which is
2968 # the non-raw size. use revlog explicitly to avoid filelog's extra
2980 # the non-raw size. use revlog explicitly to avoid filelog's extra
2969 # logic that might remove metadata size.
2981 # logic that might remove metadata size.
2970 textlen = mdiff.patchedsize(
2982 textlen = mdiff.patchedsize(
2971 revlog.size(self, cachedelta[0]), cachedelta[1]
2983 revlog.size(self, cachedelta[0]), cachedelta[1]
2972 )
2984 )
2973 else:
2985 else:
2974 textlen = len(rawtext)
2986 textlen = len(rawtext)
2975
2987
2976 if deltacomputer is None:
2988 if deltacomputer is None:
2977 write_debug = None
2989 write_debug = None
2978 if self.delta_config.debug_delta:
2990 if self.delta_config.debug_delta:
2979 write_debug = transaction._report
2991 write_debug = transaction._report
2980 deltacomputer = deltautil.deltacomputer(
2992 deltacomputer = deltautil.deltacomputer(
2981 self, write_debug=write_debug
2993 self, write_debug=write_debug
2982 )
2994 )
2983
2995
2984 if cachedelta is not None and len(cachedelta) == 2:
2996 if cachedelta is not None and len(cachedelta) == 2:
2985 # If the cached delta has no information about how it should be
2997 # If the cached delta has no information about how it should be
2986 # reused, add the default reuse instruction according to the
2998 # reused, add the default reuse instruction according to the
2987 # revlog's configuration.
2999 # revlog's configuration.
2988 if (
3000 if (
2989 self.delta_config.general_delta
3001 self.delta_config.general_delta
2990 and self.delta_config.lazy_delta_base
3002 and self.delta_config.lazy_delta_base
2991 ):
3003 ):
2992 delta_base_reuse = DELTA_BASE_REUSE_TRY
3004 delta_base_reuse = DELTA_BASE_REUSE_TRY
2993 else:
3005 else:
2994 delta_base_reuse = DELTA_BASE_REUSE_NO
3006 delta_base_reuse = DELTA_BASE_REUSE_NO
2995 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3007 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2996
3008
2997 revinfo = revlogutils.revisioninfo(
3009 revinfo = revlogutils.revisioninfo(
2998 node,
3010 node,
2999 p1,
3011 p1,
3000 p2,
3012 p2,
3001 btext,
3013 btext,
3002 textlen,
3014 textlen,
3003 cachedelta,
3015 cachedelta,
3004 flags,
3016 flags,
3005 )
3017 )
3006
3018
3007 deltainfo = deltacomputer.finddeltainfo(revinfo)
3019 deltainfo = deltacomputer.finddeltainfo(revinfo)
3008
3020
3009 compression_mode = COMP_MODE_INLINE
3021 compression_mode = COMP_MODE_INLINE
3010 if self._docket is not None:
3022 if self._docket is not None:
3011 default_comp = self._docket.default_compression_header
3023 default_comp = self._docket.default_compression_header
3012 r = deltautil.delta_compression(default_comp, deltainfo)
3024 r = deltautil.delta_compression(default_comp, deltainfo)
3013 compression_mode, deltainfo = r
3025 compression_mode, deltainfo = r
3014
3026
3015 sidedata_compression_mode = COMP_MODE_INLINE
3027 sidedata_compression_mode = COMP_MODE_INLINE
3016 if sidedata and self.feature_config.has_side_data:
3028 if sidedata and self.feature_config.has_side_data:
3017 sidedata_compression_mode = COMP_MODE_PLAIN
3029 sidedata_compression_mode = COMP_MODE_PLAIN
3018 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3030 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3019 sidedata_offset = self._docket.sidedata_end
3031 sidedata_offset = self._docket.sidedata_end
3020 h, comp_sidedata = self.compress(serialized_sidedata)
3032 h, comp_sidedata = self.compress(serialized_sidedata)
3021 if (
3033 if (
3022 h != b'u'
3034 h != b'u'
3023 and comp_sidedata[0:1] != b'\0'
3035 and comp_sidedata[0:1] != b'\0'
3024 and len(comp_sidedata) < len(serialized_sidedata)
3036 and len(comp_sidedata) < len(serialized_sidedata)
3025 ):
3037 ):
3026 assert not h
3038 assert not h
3027 if (
3039 if (
3028 comp_sidedata[0:1]
3040 comp_sidedata[0:1]
3029 == self._docket.default_compression_header
3041 == self._docket.default_compression_header
3030 ):
3042 ):
3031 sidedata_compression_mode = COMP_MODE_DEFAULT
3043 sidedata_compression_mode = COMP_MODE_DEFAULT
3032 serialized_sidedata = comp_sidedata
3044 serialized_sidedata = comp_sidedata
3033 else:
3045 else:
3034 sidedata_compression_mode = COMP_MODE_INLINE
3046 sidedata_compression_mode = COMP_MODE_INLINE
3035 serialized_sidedata = comp_sidedata
3047 serialized_sidedata = comp_sidedata
3036 else:
3048 else:
3037 serialized_sidedata = b""
3049 serialized_sidedata = b""
3038 # Don't store the offset if the sidedata is empty, that way
3050 # Don't store the offset if the sidedata is empty, that way
3039 # we can easily detect empty sidedata and they will be no different
3051 # we can easily detect empty sidedata and they will be no different
3040 # than ones we manually add.
3052 # than ones we manually add.
3041 sidedata_offset = 0
3053 sidedata_offset = 0
3042
3054
3043 rank = RANK_UNKNOWN
3055 rank = RANK_UNKNOWN
3044 if self.feature_config.compute_rank:
3056 if self.feature_config.compute_rank:
3045 if (p1r, p2r) == (nullrev, nullrev):
3057 if (p1r, p2r) == (nullrev, nullrev):
3046 rank = 1
3058 rank = 1
3047 elif p1r != nullrev and p2r == nullrev:
3059 elif p1r != nullrev and p2r == nullrev:
3048 rank = 1 + self.fast_rank(p1r)
3060 rank = 1 + self.fast_rank(p1r)
3049 elif p1r == nullrev and p2r != nullrev:
3061 elif p1r == nullrev and p2r != nullrev:
3050 rank = 1 + self.fast_rank(p2r)
3062 rank = 1 + self.fast_rank(p2r)
3051 else: # merge node
3063 else: # merge node
3052 if rustdagop is not None and self.index.rust_ext_compat:
3064 if rustdagop is not None and self.index.rust_ext_compat:
3053 rank = rustdagop.rank(self.index, p1r, p2r)
3065 rank = rustdagop.rank(self.index, p1r, p2r)
3054 else:
3066 else:
3055 pmin, pmax = sorted((p1r, p2r))
3067 pmin, pmax = sorted((p1r, p2r))
3056 rank = 1 + self.fast_rank(pmax)
3068 rank = 1 + self.fast_rank(pmax)
3057 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3069 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3058
3070
3059 e = revlogutils.entry(
3071 e = revlogutils.entry(
3060 flags=flags,
3072 flags=flags,
3061 data_offset=offset,
3073 data_offset=offset,
3062 data_compressed_length=deltainfo.deltalen,
3074 data_compressed_length=deltainfo.deltalen,
3063 data_uncompressed_length=textlen,
3075 data_uncompressed_length=textlen,
3064 data_compression_mode=compression_mode,
3076 data_compression_mode=compression_mode,
3065 data_delta_base=deltainfo.base,
3077 data_delta_base=deltainfo.base,
3066 link_rev=link,
3078 link_rev=link,
3067 parent_rev_1=p1r,
3079 parent_rev_1=p1r,
3068 parent_rev_2=p2r,
3080 parent_rev_2=p2r,
3069 node_id=node,
3081 node_id=node,
3070 sidedata_offset=sidedata_offset,
3082 sidedata_offset=sidedata_offset,
3071 sidedata_compressed_length=len(serialized_sidedata),
3083 sidedata_compressed_length=len(serialized_sidedata),
3072 sidedata_compression_mode=sidedata_compression_mode,
3084 sidedata_compression_mode=sidedata_compression_mode,
3073 rank=rank,
3085 rank=rank,
3074 )
3086 )
3075
3087
3076 self.index.append(e)
3088 self.index.append(e)
3077 entry = self.index.entry_binary(curr)
3089 entry = self.index.entry_binary(curr)
3078 if curr == 0 and self._docket is None:
3090 if curr == 0 and self._docket is None:
3079 header = self._format_flags | self._format_version
3091 header = self._format_flags | self._format_version
3080 header = self.index.pack_header(header)
3092 header = self.index.pack_header(header)
3081 entry = header + entry
3093 entry = header + entry
3082 self._writeentry(
3094 self._writeentry(
3083 transaction,
3095 transaction,
3084 entry,
3096 entry,
3085 deltainfo.data,
3097 deltainfo.data,
3086 link,
3098 link,
3087 offset,
3099 offset,
3088 serialized_sidedata,
3100 serialized_sidedata,
3089 sidedata_offset,
3101 sidedata_offset,
3090 )
3102 )
3091
3103
3092 rawtext = btext[0]
3104 rawtext = btext[0]
3093
3105
3094 if alwayscache and rawtext is None:
3106 if alwayscache and rawtext is None:
3095 rawtext = deltacomputer.buildtext(revinfo)
3107 rawtext = deltacomputer.buildtext(revinfo)
3096
3108
3097 if type(rawtext) == bytes: # only accept immutable objects
3109 if type(rawtext) == bytes: # only accept immutable objects
3098 self._revisioncache = (node, curr, rawtext)
3110 self._revisioncache = (node, curr, rawtext)
3099 self._chainbasecache[curr] = deltainfo.chainbase
3111 self._chainbasecache[curr] = deltainfo.chainbase
3100 return curr
3112 return curr
3101
3113
3102 def _get_data_offset(self, prev):
3114 def _get_data_offset(self, prev):
3103 """Returns the current offset in the (in-transaction) data file.
3115 """Returns the current offset in the (in-transaction) data file.
3104 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3116 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3105 file to store that information: since sidedata can be rewritten to the
3117 file to store that information: since sidedata can be rewritten to the
3106 end of the data file within a transaction, you can have cases where, for
3118 end of the data file within a transaction, you can have cases where, for
3107 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3119 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3108 to `n - 1`'s sidedata being written after `n`'s data.
3120 to `n - 1`'s sidedata being written after `n`'s data.
3109
3121
3110 TODO cache this in a docket file before getting out of experimental."""
3122 TODO cache this in a docket file before getting out of experimental."""
3111 if self._docket is None:
3123 if self._docket is None:
3112 return self.end(prev)
3124 return self.end(prev)
3113 else:
3125 else:
3114 return self._docket.data_end
3126 return self._docket.data_end
3115
3127
3116 def _writeentry(
3128 def _writeentry(
3117 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3129 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3118 ):
3130 ):
3119 # Files opened in a+ mode have inconsistent behavior on various
3131 # Files opened in a+ mode have inconsistent behavior on various
3120 # platforms. Windows requires that a file positioning call be made
3132 # platforms. Windows requires that a file positioning call be made
3121 # when the file handle transitions between reads and writes. See
3133 # when the file handle transitions between reads and writes. See
3122 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3134 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3123 # platforms, Python or the platform itself can be buggy. Some versions
3135 # platforms, Python or the platform itself can be buggy. Some versions
3124 # of Solaris have been observed to not append at the end of the file
3136 # of Solaris have been observed to not append at the end of the file
3125 # if the file was seeked to before the end. See issue4943 for more.
3137 # if the file was seeked to before the end. See issue4943 for more.
3126 #
3138 #
3127 # We work around this issue by inserting a seek() before writing.
3139 # We work around this issue by inserting a seek() before writing.
3128 # Note: This is likely not necessary on Python 3. However, because
3140 # Note: This is likely not necessary on Python 3. However, because
3129 # the file handle is reused for reads and may be seeked there, we need
3141 # the file handle is reused for reads and may be seeked there, we need
3130 # to be careful before changing this.
3142 # to be careful before changing this.
3131 if self._inner._writinghandles is None:
3143 if self._inner._writinghandles is None:
3132 msg = b'adding revision outside `revlog._writing` context'
3144 msg = b'adding revision outside `revlog._writing` context'
3133 raise error.ProgrammingError(msg)
3145 raise error.ProgrammingError(msg)
3134 ifh, dfh, sdfh = self._inner._writinghandles
3146 ifh, dfh, sdfh = self._inner._writinghandles
3135 if self._docket is None:
3147 if self._docket is None:
3136 ifh.seek(0, os.SEEK_END)
3148 ifh.seek(0, os.SEEK_END)
3137 else:
3149 else:
3138 ifh.seek(self._docket.index_end, os.SEEK_SET)
3150 ifh.seek(self._docket.index_end, os.SEEK_SET)
3139 if dfh:
3151 if dfh:
3140 if self._docket is None:
3152 if self._docket is None:
3141 dfh.seek(0, os.SEEK_END)
3153 dfh.seek(0, os.SEEK_END)
3142 else:
3154 else:
3143 dfh.seek(self._docket.data_end, os.SEEK_SET)
3155 dfh.seek(self._docket.data_end, os.SEEK_SET)
3144 if sdfh:
3156 if sdfh:
3145 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3157 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3146
3158
3147 curr = len(self) - 1
3159 curr = len(self) - 1
3148 if not self._inline:
3160 if not self._inline:
3149 transaction.add(self._datafile, offset)
3161 transaction.add(self._datafile, offset)
3150 if self._sidedatafile:
3162 if self._sidedatafile:
3151 transaction.add(self._sidedatafile, sidedata_offset)
3163 transaction.add(self._sidedatafile, sidedata_offset)
3152 transaction.add(self._indexfile, curr * len(entry))
3164 transaction.add(self._indexfile, curr * len(entry))
3153 if data[0]:
3165 if data[0]:
3154 dfh.write(data[0])
3166 dfh.write(data[0])
3155 dfh.write(data[1])
3167 dfh.write(data[1])
3156 if sidedata:
3168 if sidedata:
3157 sdfh.write(sidedata)
3169 sdfh.write(sidedata)
3158 ifh.write(entry)
3170 ifh.write(entry)
3159 else:
3171 else:
3160 offset += curr * self.index.entry_size
3172 offset += curr * self.index.entry_size
3161 transaction.add(self._indexfile, offset)
3173 transaction.add(self._indexfile, offset)
3162 ifh.write(entry)
3174 ifh.write(entry)
3163 ifh.write(data[0])
3175 ifh.write(data[0])
3164 ifh.write(data[1])
3176 ifh.write(data[1])
3165 assert not sidedata
3177 assert not sidedata
3166 self._enforceinlinesize(transaction)
3178 self._enforceinlinesize(transaction)
3167 if self._docket is not None:
3179 if self._docket is not None:
3168 # revlog-v2 always has 3 writing handles, help Pytype
3180 # revlog-v2 always has 3 writing handles, help Pytype
3169 wh1 = self._inner._writinghandles[0]
3181 wh1 = self._inner._writinghandles[0]
3170 wh2 = self._inner._writinghandles[1]
3182 wh2 = self._inner._writinghandles[1]
3171 wh3 = self._inner._writinghandles[2]
3183 wh3 = self._inner._writinghandles[2]
3172 assert wh1 is not None
3184 assert wh1 is not None
3173 assert wh2 is not None
3185 assert wh2 is not None
3174 assert wh3 is not None
3186 assert wh3 is not None
3175 self._docket.index_end = wh1.tell()
3187 self._docket.index_end = wh1.tell()
3176 self._docket.data_end = wh2.tell()
3188 self._docket.data_end = wh2.tell()
3177 self._docket.sidedata_end = wh3.tell()
3189 self._docket.sidedata_end = wh3.tell()
3178
3190
3179 nodemaputil.setup_persistent_nodemap(transaction, self)
3191 nodemaputil.setup_persistent_nodemap(transaction, self)
3180
3192
3181 def addgroup(
3193 def addgroup(
3182 self,
3194 self,
3183 deltas,
3195 deltas,
3184 linkmapper,
3196 linkmapper,
3185 transaction,
3197 transaction,
3186 alwayscache=False,
3198 alwayscache=False,
3187 addrevisioncb=None,
3199 addrevisioncb=None,
3188 duplicaterevisioncb=None,
3200 duplicaterevisioncb=None,
3189 debug_info=None,
3201 debug_info=None,
3190 delta_base_reuse_policy=None,
3202 delta_base_reuse_policy=None,
3191 ):
3203 ):
3192 """
3204 """
3193 add a delta group
3205 add a delta group
3194
3206
3195 given a set of deltas, add them to the revision log. the
3207 given a set of deltas, add them to the revision log. the
3196 first delta is against its parent, which should be in our
3208 first delta is against its parent, which should be in our
3197 log, the rest are against the previous delta.
3209 log, the rest are against the previous delta.
3198
3210
3199 If ``addrevisioncb`` is defined, it will be called with arguments of
3211 If ``addrevisioncb`` is defined, it will be called with arguments of
3200 this revlog and the node that was added.
3212 this revlog and the node that was added.
3201 """
3213 """
3202
3214
3203 if self._adding_group:
3215 if self._adding_group:
3204 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3216 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3205
3217
3206 # read the default delta-base reuse policy from revlog config if the
3218 # read the default delta-base reuse policy from revlog config if the
3207 # group did not specify one.
3219 # group did not specify one.
3208 if delta_base_reuse_policy is None:
3220 if delta_base_reuse_policy is None:
3209 if (
3221 if (
3210 self.delta_config.general_delta
3222 self.delta_config.general_delta
3211 and self.delta_config.lazy_delta_base
3223 and self.delta_config.lazy_delta_base
3212 ):
3224 ):
3213 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3225 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3214 else:
3226 else:
3215 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3227 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3216
3228
3217 self._adding_group = True
3229 self._adding_group = True
3218 empty = True
3230 empty = True
3219 try:
3231 try:
3220 with self._writing(transaction):
3232 with self._writing(transaction):
3221 write_debug = None
3233 write_debug = None
3222 if self.delta_config.debug_delta:
3234 if self.delta_config.debug_delta:
3223 write_debug = transaction._report
3235 write_debug = transaction._report
3224 deltacomputer = deltautil.deltacomputer(
3236 deltacomputer = deltautil.deltacomputer(
3225 self,
3237 self,
3226 write_debug=write_debug,
3238 write_debug=write_debug,
3227 debug_info=debug_info,
3239 debug_info=debug_info,
3228 )
3240 )
3229 # loop through our set of deltas
3241 # loop through our set of deltas
3230 for data in deltas:
3242 for data in deltas:
3231 (
3243 (
3232 node,
3244 node,
3233 p1,
3245 p1,
3234 p2,
3246 p2,
3235 linknode,
3247 linknode,
3236 deltabase,
3248 deltabase,
3237 delta,
3249 delta,
3238 flags,
3250 flags,
3239 sidedata,
3251 sidedata,
3240 ) = data
3252 ) = data
3241 link = linkmapper(linknode)
3253 link = linkmapper(linknode)
3242 flags = flags or REVIDX_DEFAULT_FLAGS
3254 flags = flags or REVIDX_DEFAULT_FLAGS
3243
3255
3244 rev = self.index.get_rev(node)
3256 rev = self.index.get_rev(node)
3245 if rev is not None:
3257 if rev is not None:
3246 # this can happen if two branches make the same change
3258 # this can happen if two branches make the same change
3247 self._nodeduplicatecallback(transaction, rev)
3259 self._nodeduplicatecallback(transaction, rev)
3248 if duplicaterevisioncb:
3260 if duplicaterevisioncb:
3249 duplicaterevisioncb(self, rev)
3261 duplicaterevisioncb(self, rev)
3250 empty = False
3262 empty = False
3251 continue
3263 continue
3252
3264
3253 for p in (p1, p2):
3265 for p in (p1, p2):
3254 if not self.index.has_node(p):
3266 if not self.index.has_node(p):
3255 raise error.LookupError(
3267 raise error.LookupError(
3256 p, self.radix, _(b'unknown parent')
3268 p, self.radix, _(b'unknown parent')
3257 )
3269 )
3258
3270
3259 if not self.index.has_node(deltabase):
3271 if not self.index.has_node(deltabase):
3260 raise error.LookupError(
3272 raise error.LookupError(
3261 deltabase, self.display_id, _(b'unknown delta base')
3273 deltabase, self.display_id, _(b'unknown delta base')
3262 )
3274 )
3263
3275
3264 baserev = self.rev(deltabase)
3276 baserev = self.rev(deltabase)
3265
3277
3266 if baserev != nullrev and self.iscensored(baserev):
3278 if baserev != nullrev and self.iscensored(baserev):
3267 # if base is censored, delta must be full replacement in a
3279 # if base is censored, delta must be full replacement in a
3268 # single patch operation
3280 # single patch operation
3269 hlen = struct.calcsize(b">lll")
3281 hlen = struct.calcsize(b">lll")
3270 oldlen = self.rawsize(baserev)
3282 oldlen = self.rawsize(baserev)
3271 newlen = len(delta) - hlen
3283 newlen = len(delta) - hlen
3272 if delta[:hlen] != mdiff.replacediffheader(
3284 if delta[:hlen] != mdiff.replacediffheader(
3273 oldlen, newlen
3285 oldlen, newlen
3274 ):
3286 ):
3275 raise error.CensoredBaseError(
3287 raise error.CensoredBaseError(
3276 self.display_id, self.node(baserev)
3288 self.display_id, self.node(baserev)
3277 )
3289 )
3278
3290
3279 if not flags and self._peek_iscensored(baserev, delta):
3291 if not flags and self._peek_iscensored(baserev, delta):
3280 flags |= REVIDX_ISCENSORED
3292 flags |= REVIDX_ISCENSORED
3281
3293
3282 # We assume consumers of addrevisioncb will want to retrieve
3294 # We assume consumers of addrevisioncb will want to retrieve
3283 # the added revision, which will require a call to
3295 # the added revision, which will require a call to
3284 # revision(). revision() will fast path if there is a cache
3296 # revision(). revision() will fast path if there is a cache
3285 # hit. So, we tell _addrevision() to always cache in this case.
3297 # hit. So, we tell _addrevision() to always cache in this case.
3286 # We're only using addgroup() in the context of changegroup
3298 # We're only using addgroup() in the context of changegroup
3287 # generation so the revision data can always be handled as raw
3299 # generation so the revision data can always be handled as raw
3288 # by the flagprocessor.
3300 # by the flagprocessor.
3289 rev = self._addrevision(
3301 rev = self._addrevision(
3290 node,
3302 node,
3291 None,
3303 None,
3292 transaction,
3304 transaction,
3293 link,
3305 link,
3294 p1,
3306 p1,
3295 p2,
3307 p2,
3296 flags,
3308 flags,
3297 (baserev, delta, delta_base_reuse_policy),
3309 (baserev, delta, delta_base_reuse_policy),
3298 alwayscache=alwayscache,
3310 alwayscache=alwayscache,
3299 deltacomputer=deltacomputer,
3311 deltacomputer=deltacomputer,
3300 sidedata=sidedata,
3312 sidedata=sidedata,
3301 )
3313 )
3302
3314
3303 if addrevisioncb:
3315 if addrevisioncb:
3304 addrevisioncb(self, rev)
3316 addrevisioncb(self, rev)
3305 empty = False
3317 empty = False
3306 finally:
3318 finally:
3307 self._adding_group = False
3319 self._adding_group = False
3308 return not empty
3320 return not empty
3309
3321
3310 def iscensored(self, rev):
3322 def iscensored(self, rev):
3311 """Check if a file revision is censored."""
3323 """Check if a file revision is censored."""
3312 if not self.feature_config.censorable:
3324 if not self.feature_config.censorable:
3313 return False
3325 return False
3314
3326
3315 return self.flags(rev) & REVIDX_ISCENSORED
3327 return self.flags(rev) & REVIDX_ISCENSORED
3316
3328
3317 def _peek_iscensored(self, baserev, delta):
3329 def _peek_iscensored(self, baserev, delta):
3318 """Quickly check if a delta produces a censored revision."""
3330 """Quickly check if a delta produces a censored revision."""
3319 if not self.feature_config.censorable:
3331 if not self.feature_config.censorable:
3320 return False
3332 return False
3321
3333
3322 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3334 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3323
3335
3324 def getstrippoint(self, minlink):
3336 def getstrippoint(self, minlink):
3325 """find the minimum rev that must be stripped to strip the linkrev
3337 """find the minimum rev that must be stripped to strip the linkrev
3326
3338
3327 Returns a tuple containing the minimum rev and a set of all revs that
3339 Returns a tuple containing the minimum rev and a set of all revs that
3328 have linkrevs that will be broken by this strip.
3340 have linkrevs that will be broken by this strip.
3329 """
3341 """
3330 return storageutil.resolvestripinfo(
3342 return storageutil.resolvestripinfo(
3331 minlink,
3343 minlink,
3332 len(self) - 1,
3344 len(self) - 1,
3333 self.headrevs(),
3345 self.headrevs(),
3334 self.linkrev,
3346 self.linkrev,
3335 self.parentrevs,
3347 self.parentrevs,
3336 )
3348 )
3337
3349
3338 def strip(self, minlink, transaction):
3350 def strip(self, minlink, transaction):
3339 """truncate the revlog on the first revision with a linkrev >= minlink
3351 """truncate the revlog on the first revision with a linkrev >= minlink
3340
3352
3341 This function is called when we're stripping revision minlink and
3353 This function is called when we're stripping revision minlink and
3342 its descendants from the repository.
3354 its descendants from the repository.
3343
3355
3344 We have to remove all revisions with linkrev >= minlink, because
3356 We have to remove all revisions with linkrev >= minlink, because
3345 the equivalent changelog revisions will be renumbered after the
3357 the equivalent changelog revisions will be renumbered after the
3346 strip.
3358 strip.
3347
3359
3348 So we truncate the revlog on the first of these revisions, and
3360 So we truncate the revlog on the first of these revisions, and
3349 trust that the caller has saved the revisions that shouldn't be
3361 trust that the caller has saved the revisions that shouldn't be
3350 removed and that it'll re-add them after this truncation.
3362 removed and that it'll re-add them after this truncation.
3351 """
3363 """
3352 if len(self) == 0:
3364 if len(self) == 0:
3353 return
3365 return
3354
3366
3355 rev, _ = self.getstrippoint(minlink)
3367 rev, _ = self.getstrippoint(minlink)
3356 if rev == len(self):
3368 if rev == len(self):
3357 return
3369 return
3358
3370
3359 # first truncate the files on disk
3371 # first truncate the files on disk
3360 data_end = self.start(rev)
3372 data_end = self.start(rev)
3361 if not self._inline:
3373 if not self._inline:
3362 transaction.add(self._datafile, data_end)
3374 transaction.add(self._datafile, data_end)
3363 end = rev * self.index.entry_size
3375 end = rev * self.index.entry_size
3364 else:
3376 else:
3365 end = data_end + (rev * self.index.entry_size)
3377 end = data_end + (rev * self.index.entry_size)
3366
3378
3367 if self._sidedatafile:
3379 if self._sidedatafile:
3368 sidedata_end = self.sidedata_cut_off(rev)
3380 sidedata_end = self.sidedata_cut_off(rev)
3369 transaction.add(self._sidedatafile, sidedata_end)
3381 transaction.add(self._sidedatafile, sidedata_end)
3370
3382
3371 transaction.add(self._indexfile, end)
3383 transaction.add(self._indexfile, end)
3372 if self._docket is not None:
3384 if self._docket is not None:
3373 # XXX we could, leverage the docket while stripping. However it is
3385 # XXX we could, leverage the docket while stripping. However it is
3374 # not powerfull enough at the time of this comment
3386 # not powerfull enough at the time of this comment
3375 self._docket.index_end = end
3387 self._docket.index_end = end
3376 self._docket.data_end = data_end
3388 self._docket.data_end = data_end
3377 self._docket.sidedata_end = sidedata_end
3389 self._docket.sidedata_end = sidedata_end
3378 self._docket.write(transaction, stripping=True)
3390 self._docket.write(transaction, stripping=True)
3379
3391
3380 # then reset internal state in memory to forget those revisions
3392 # then reset internal state in memory to forget those revisions
3381 self._revisioncache = None
3393 self._revisioncache = None
3382 self._chaininfocache = util.lrucachedict(500)
3394 self._chaininfocache = util.lrucachedict(500)
3383 self._inner._segmentfile.clear_cache()
3395 self._inner._segmentfile.clear_cache()
3384 self._inner._segmentfile_sidedata.clear_cache()
3396 self._inner._segmentfile_sidedata.clear_cache()
3385
3397
3386 del self.index[rev:-1]
3398 del self.index[rev:-1]
3387
3399
3388 def checksize(self):
3400 def checksize(self):
3389 """Check size of index and data files
3401 """Check size of index and data files
3390
3402
3391 return a (dd, di) tuple.
3403 return a (dd, di) tuple.
3392 - dd: extra bytes for the "data" file
3404 - dd: extra bytes for the "data" file
3393 - di: extra bytes for the "index" file
3405 - di: extra bytes for the "index" file
3394
3406
3395 A healthy revlog will return (0, 0).
3407 A healthy revlog will return (0, 0).
3396 """
3408 """
3397 expected = 0
3409 expected = 0
3398 if len(self):
3410 if len(self):
3399 expected = max(0, self.end(len(self) - 1))
3411 expected = max(0, self.end(len(self) - 1))
3400
3412
3401 try:
3413 try:
3402 with self._datafp() as f:
3414 with self._datafp() as f:
3403 f.seek(0, io.SEEK_END)
3415 f.seek(0, io.SEEK_END)
3404 actual = f.tell()
3416 actual = f.tell()
3405 dd = actual - expected
3417 dd = actual - expected
3406 except FileNotFoundError:
3418 except FileNotFoundError:
3407 dd = 0
3419 dd = 0
3408
3420
3409 try:
3421 try:
3410 f = self.opener(self._indexfile)
3422 f = self.opener(self._indexfile)
3411 f.seek(0, io.SEEK_END)
3423 f.seek(0, io.SEEK_END)
3412 actual = f.tell()
3424 actual = f.tell()
3413 f.close()
3425 f.close()
3414 s = self.index.entry_size
3426 s = self.index.entry_size
3415 i = max(0, actual // s)
3427 i = max(0, actual // s)
3416 di = actual - (i * s)
3428 di = actual - (i * s)
3417 if self._inline:
3429 if self._inline:
3418 databytes = 0
3430 databytes = 0
3419 for r in self:
3431 for r in self:
3420 databytes += max(0, self.length(r))
3432 databytes += max(0, self.length(r))
3421 dd = 0
3433 dd = 0
3422 di = actual - len(self) * s - databytes
3434 di = actual - len(self) * s - databytes
3423 except FileNotFoundError:
3435 except FileNotFoundError:
3424 di = 0
3436 di = 0
3425
3437
3426 return (dd, di)
3438 return (dd, di)
3427
3439
3428 def files(self):
3440 def files(self):
3429 """return list of files that compose this revlog"""
3441 """return list of files that compose this revlog"""
3430 res = [self._indexfile]
3442 res = [self._indexfile]
3431 if self._docket_file is None:
3443 if self._docket_file is None:
3432 if not self._inline:
3444 if not self._inline:
3433 res.append(self._datafile)
3445 res.append(self._datafile)
3434 else:
3446 else:
3435 res.append(self._docket_file)
3447 res.append(self._docket_file)
3436 res.extend(self._docket.old_index_filepaths(include_empty=False))
3448 res.extend(self._docket.old_index_filepaths(include_empty=False))
3437 if self._docket.data_end:
3449 if self._docket.data_end:
3438 res.append(self._datafile)
3450 res.append(self._datafile)
3439 res.extend(self._docket.old_data_filepaths(include_empty=False))
3451 res.extend(self._docket.old_data_filepaths(include_empty=False))
3440 if self._docket.sidedata_end:
3452 if self._docket.sidedata_end:
3441 res.append(self._sidedatafile)
3453 res.append(self._sidedatafile)
3442 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3454 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3443 return res
3455 return res
3444
3456
3445 def emitrevisions(
3457 def emitrevisions(
3446 self,
3458 self,
3447 nodes,
3459 nodes,
3448 nodesorder=None,
3460 nodesorder=None,
3449 revisiondata=False,
3461 revisiondata=False,
3450 assumehaveparentrevisions=False,
3462 assumehaveparentrevisions=False,
3451 deltamode=repository.CG_DELTAMODE_STD,
3463 deltamode=repository.CG_DELTAMODE_STD,
3452 sidedata_helpers=None,
3464 sidedata_helpers=None,
3453 debug_info=None,
3465 debug_info=None,
3454 ):
3466 ):
3455 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3467 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3456 raise error.ProgrammingError(
3468 raise error.ProgrammingError(
3457 b'unhandled value for nodesorder: %s' % nodesorder
3469 b'unhandled value for nodesorder: %s' % nodesorder
3458 )
3470 )
3459
3471
3460 if nodesorder is None and not self.delta_config.general_delta:
3472 if nodesorder is None and not self.delta_config.general_delta:
3461 nodesorder = b'storage'
3473 nodesorder = b'storage'
3462
3474
3463 if (
3475 if (
3464 not self._storedeltachains
3476 not self._storedeltachains
3465 and deltamode != repository.CG_DELTAMODE_PREV
3477 and deltamode != repository.CG_DELTAMODE_PREV
3466 ):
3478 ):
3467 deltamode = repository.CG_DELTAMODE_FULL
3479 deltamode = repository.CG_DELTAMODE_FULL
3468
3480
3469 return storageutil.emitrevisions(
3481 return storageutil.emitrevisions(
3470 self,
3482 self,
3471 nodes,
3483 nodes,
3472 nodesorder,
3484 nodesorder,
3473 revlogrevisiondelta,
3485 revlogrevisiondelta,
3474 deltaparentfn=self.deltaparent,
3486 deltaparentfn=self.deltaparent,
3475 candeltafn=self._candelta,
3487 candeltafn=self._candelta,
3476 rawsizefn=self.rawsize,
3488 rawsizefn=self.rawsize,
3477 revdifffn=self.revdiff,
3489 revdifffn=self.revdiff,
3478 flagsfn=self.flags,
3490 flagsfn=self.flags,
3479 deltamode=deltamode,
3491 deltamode=deltamode,
3480 revisiondata=revisiondata,
3492 revisiondata=revisiondata,
3481 assumehaveparentrevisions=assumehaveparentrevisions,
3493 assumehaveparentrevisions=assumehaveparentrevisions,
3482 sidedata_helpers=sidedata_helpers,
3494 sidedata_helpers=sidedata_helpers,
3483 debug_info=debug_info,
3495 debug_info=debug_info,
3484 )
3496 )
3485
3497
3486 DELTAREUSEALWAYS = b'always'
3498 DELTAREUSEALWAYS = b'always'
3487 DELTAREUSESAMEREVS = b'samerevs'
3499 DELTAREUSESAMEREVS = b'samerevs'
3488 DELTAREUSENEVER = b'never'
3500 DELTAREUSENEVER = b'never'
3489
3501
3490 DELTAREUSEFULLADD = b'fulladd'
3502 DELTAREUSEFULLADD = b'fulladd'
3491
3503
3492 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3504 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3493
3505
3494 def clone(
3506 def clone(
3495 self,
3507 self,
3496 tr,
3508 tr,
3497 destrevlog,
3509 destrevlog,
3498 addrevisioncb=None,
3510 addrevisioncb=None,
3499 deltareuse=DELTAREUSESAMEREVS,
3511 deltareuse=DELTAREUSESAMEREVS,
3500 forcedeltabothparents=None,
3512 forcedeltabothparents=None,
3501 sidedata_helpers=None,
3513 sidedata_helpers=None,
3502 ):
3514 ):
3503 """Copy this revlog to another, possibly with format changes.
3515 """Copy this revlog to another, possibly with format changes.
3504
3516
3505 The destination revlog will contain the same revisions and nodes.
3517 The destination revlog will contain the same revisions and nodes.
3506 However, it may not be bit-for-bit identical due to e.g. delta encoding
3518 However, it may not be bit-for-bit identical due to e.g. delta encoding
3507 differences.
3519 differences.
3508
3520
3509 The ``deltareuse`` argument control how deltas from the existing revlog
3521 The ``deltareuse`` argument control how deltas from the existing revlog
3510 are preserved in the destination revlog. The argument can have the
3522 are preserved in the destination revlog. The argument can have the
3511 following values:
3523 following values:
3512
3524
3513 DELTAREUSEALWAYS
3525 DELTAREUSEALWAYS
3514 Deltas will always be reused (if possible), even if the destination
3526 Deltas will always be reused (if possible), even if the destination
3515 revlog would not select the same revisions for the delta. This is the
3527 revlog would not select the same revisions for the delta. This is the
3516 fastest mode of operation.
3528 fastest mode of operation.
3517 DELTAREUSESAMEREVS
3529 DELTAREUSESAMEREVS
3518 Deltas will be reused if the destination revlog would pick the same
3530 Deltas will be reused if the destination revlog would pick the same
3519 revisions for the delta. This mode strikes a balance between speed
3531 revisions for the delta. This mode strikes a balance between speed
3520 and optimization.
3532 and optimization.
3521 DELTAREUSENEVER
3533 DELTAREUSENEVER
3522 Deltas will never be reused. This is the slowest mode of execution.
3534 Deltas will never be reused. This is the slowest mode of execution.
3523 This mode can be used to recompute deltas (e.g. if the diff/delta
3535 This mode can be used to recompute deltas (e.g. if the diff/delta
3524 algorithm changes).
3536 algorithm changes).
3525 DELTAREUSEFULLADD
3537 DELTAREUSEFULLADD
3526 Revision will be re-added as if their were new content. This is
3538 Revision will be re-added as if their were new content. This is
3527 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3539 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3528 eg: large file detection and handling.
3540 eg: large file detection and handling.
3529
3541
3530 Delta computation can be slow, so the choice of delta reuse policy can
3542 Delta computation can be slow, so the choice of delta reuse policy can
3531 significantly affect run time.
3543 significantly affect run time.
3532
3544
3533 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3545 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3534 two extremes. Deltas will be reused if they are appropriate. But if the
3546 two extremes. Deltas will be reused if they are appropriate. But if the
3535 delta could choose a better revision, it will do so. This means if you
3547 delta could choose a better revision, it will do so. This means if you
3536 are converting a non-generaldelta revlog to a generaldelta revlog,
3548 are converting a non-generaldelta revlog to a generaldelta revlog,
3537 deltas will be recomputed if the delta's parent isn't a parent of the
3549 deltas will be recomputed if the delta's parent isn't a parent of the
3538 revision.
3550 revision.
3539
3551
3540 In addition to the delta policy, the ``forcedeltabothparents``
3552 In addition to the delta policy, the ``forcedeltabothparents``
3541 argument controls whether to force compute deltas against both parents
3553 argument controls whether to force compute deltas against both parents
3542 for merges. By default, the current default is used.
3554 for merges. By default, the current default is used.
3543
3555
3544 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3556 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3545 `sidedata_helpers`.
3557 `sidedata_helpers`.
3546 """
3558 """
3547 if deltareuse not in self.DELTAREUSEALL:
3559 if deltareuse not in self.DELTAREUSEALL:
3548 raise ValueError(
3560 raise ValueError(
3549 _(b'value for deltareuse invalid: %s') % deltareuse
3561 _(b'value for deltareuse invalid: %s') % deltareuse
3550 )
3562 )
3551
3563
3552 if len(destrevlog):
3564 if len(destrevlog):
3553 raise ValueError(_(b'destination revlog is not empty'))
3565 raise ValueError(_(b'destination revlog is not empty'))
3554
3566
3555 if getattr(self, 'filteredrevs', None):
3567 if getattr(self, 'filteredrevs', None):
3556 raise ValueError(_(b'source revlog has filtered revisions'))
3568 raise ValueError(_(b'source revlog has filtered revisions'))
3557 if getattr(destrevlog, 'filteredrevs', None):
3569 if getattr(destrevlog, 'filteredrevs', None):
3558 raise ValueError(_(b'destination revlog has filtered revisions'))
3570 raise ValueError(_(b'destination revlog has filtered revisions'))
3559
3571
3560 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3572 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3561 # if possible.
3573 # if possible.
3562 old_delta_config = destrevlog.delta_config
3574 old_delta_config = destrevlog.delta_config
3563 destrevlog.delta_config = destrevlog.delta_config.copy()
3575 destrevlog.delta_config = destrevlog.delta_config.copy()
3564
3576
3565 try:
3577 try:
3566 if deltareuse == self.DELTAREUSEALWAYS:
3578 if deltareuse == self.DELTAREUSEALWAYS:
3567 destrevlog.delta_config.lazy_delta_base = True
3579 destrevlog.delta_config.lazy_delta_base = True
3568 destrevlog.delta_config.lazy_delta = True
3580 destrevlog.delta_config.lazy_delta = True
3569 elif deltareuse == self.DELTAREUSESAMEREVS:
3581 elif deltareuse == self.DELTAREUSESAMEREVS:
3570 destrevlog.delta_config.lazy_delta_base = False
3582 destrevlog.delta_config.lazy_delta_base = False
3571 destrevlog.delta_config.lazy_delta = True
3583 destrevlog.delta_config.lazy_delta = True
3572 elif deltareuse == self.DELTAREUSENEVER:
3584 elif deltareuse == self.DELTAREUSENEVER:
3573 destrevlog.delta_config.lazy_delta_base = False
3585 destrevlog.delta_config.lazy_delta_base = False
3574 destrevlog.delta_config.lazy_delta = False
3586 destrevlog.delta_config.lazy_delta = False
3575
3587
3576 delta_both_parents = (
3588 delta_both_parents = (
3577 forcedeltabothparents or old_delta_config.delta_both_parents
3589 forcedeltabothparents or old_delta_config.delta_both_parents
3578 )
3590 )
3579 destrevlog.delta_config.delta_both_parents = delta_both_parents
3591 destrevlog.delta_config.delta_both_parents = delta_both_parents
3580
3592
3581 with self.reading(), destrevlog._writing(tr):
3593 with self.reading(), destrevlog._writing(tr):
3582 self._clone(
3594 self._clone(
3583 tr,
3595 tr,
3584 destrevlog,
3596 destrevlog,
3585 addrevisioncb,
3597 addrevisioncb,
3586 deltareuse,
3598 deltareuse,
3587 forcedeltabothparents,
3599 forcedeltabothparents,
3588 sidedata_helpers,
3600 sidedata_helpers,
3589 )
3601 )
3590
3602
3591 finally:
3603 finally:
3592 destrevlog.delta_config = old_delta_config
3604 destrevlog.delta_config = old_delta_config
3593
3605
3594 def _clone(
3606 def _clone(
3595 self,
3607 self,
3596 tr,
3608 tr,
3597 destrevlog,
3609 destrevlog,
3598 addrevisioncb,
3610 addrevisioncb,
3599 deltareuse,
3611 deltareuse,
3600 forcedeltabothparents,
3612 forcedeltabothparents,
3601 sidedata_helpers,
3613 sidedata_helpers,
3602 ):
3614 ):
3603 """perform the core duty of `revlog.clone` after parameter processing"""
3615 """perform the core duty of `revlog.clone` after parameter processing"""
3604 write_debug = None
3616 write_debug = None
3605 if self.delta_config.debug_delta:
3617 if self.delta_config.debug_delta:
3606 write_debug = tr._report
3618 write_debug = tr._report
3607 deltacomputer = deltautil.deltacomputer(
3619 deltacomputer = deltautil.deltacomputer(
3608 destrevlog,
3620 destrevlog,
3609 write_debug=write_debug,
3621 write_debug=write_debug,
3610 )
3622 )
3611 index = self.index
3623 index = self.index
3612 for rev in self:
3624 for rev in self:
3613 entry = index[rev]
3625 entry = index[rev]
3614
3626
3615 # Some classes override linkrev to take filtered revs into
3627 # Some classes override linkrev to take filtered revs into
3616 # account. Use raw entry from index.
3628 # account. Use raw entry from index.
3617 flags = entry[0] & 0xFFFF
3629 flags = entry[0] & 0xFFFF
3618 linkrev = entry[4]
3630 linkrev = entry[4]
3619 p1 = index[entry[5]][7]
3631 p1 = index[entry[5]][7]
3620 p2 = index[entry[6]][7]
3632 p2 = index[entry[6]][7]
3621 node = entry[7]
3633 node = entry[7]
3622
3634
3623 # (Possibly) reuse the delta from the revlog if allowed and
3635 # (Possibly) reuse the delta from the revlog if allowed and
3624 # the revlog chunk is a delta.
3636 # the revlog chunk is a delta.
3625 cachedelta = None
3637 cachedelta = None
3626 rawtext = None
3638 rawtext = None
3627 if deltareuse == self.DELTAREUSEFULLADD:
3639 if deltareuse == self.DELTAREUSEFULLADD:
3628 text = self._revisiondata(rev)
3640 text = self._revisiondata(rev)
3629 sidedata = self.sidedata(rev)
3641 sidedata = self.sidedata(rev)
3630
3642
3631 if sidedata_helpers is not None:
3643 if sidedata_helpers is not None:
3632 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3644 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3633 self, sidedata_helpers, sidedata, rev
3645 self, sidedata_helpers, sidedata, rev
3634 )
3646 )
3635 flags = flags | new_flags[0] & ~new_flags[1]
3647 flags = flags | new_flags[0] & ~new_flags[1]
3636
3648
3637 destrevlog.addrevision(
3649 destrevlog.addrevision(
3638 text,
3650 text,
3639 tr,
3651 tr,
3640 linkrev,
3652 linkrev,
3641 p1,
3653 p1,
3642 p2,
3654 p2,
3643 cachedelta=cachedelta,
3655 cachedelta=cachedelta,
3644 node=node,
3656 node=node,
3645 flags=flags,
3657 flags=flags,
3646 deltacomputer=deltacomputer,
3658 deltacomputer=deltacomputer,
3647 sidedata=sidedata,
3659 sidedata=sidedata,
3648 )
3660 )
3649 else:
3661 else:
3650 if destrevlog.delta_config.lazy_delta:
3662 if destrevlog.delta_config.lazy_delta:
3651 dp = self.deltaparent(rev)
3663 dp = self.deltaparent(rev)
3652 if dp != nullrev:
3664 if dp != nullrev:
3653 cachedelta = (dp, bytes(self._chunk(rev)))
3665 cachedelta = (dp, bytes(self._chunk(rev)))
3654
3666
3655 sidedata = None
3667 sidedata = None
3656 if not cachedelta:
3668 if not cachedelta:
3657 try:
3669 try:
3658 rawtext = self._revisiondata(rev)
3670 rawtext = self._revisiondata(rev)
3659 except error.CensoredNodeError as censored:
3671 except error.CensoredNodeError as censored:
3660 assert flags & REVIDX_ISCENSORED
3672 assert flags & REVIDX_ISCENSORED
3661 rawtext = censored.tombstone
3673 rawtext = censored.tombstone
3662 sidedata = self.sidedata(rev)
3674 sidedata = self.sidedata(rev)
3663 if sidedata is None:
3675 if sidedata is None:
3664 sidedata = self.sidedata(rev)
3676 sidedata = self.sidedata(rev)
3665
3677
3666 if sidedata_helpers is not None:
3678 if sidedata_helpers is not None:
3667 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3679 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3668 self, sidedata_helpers, sidedata, rev
3680 self, sidedata_helpers, sidedata, rev
3669 )
3681 )
3670 flags = flags | new_flags[0] & ~new_flags[1]
3682 flags = flags | new_flags[0] & ~new_flags[1]
3671
3683
3672 destrevlog._addrevision(
3684 destrevlog._addrevision(
3673 node,
3685 node,
3674 rawtext,
3686 rawtext,
3675 tr,
3687 tr,
3676 linkrev,
3688 linkrev,
3677 p1,
3689 p1,
3678 p2,
3690 p2,
3679 flags,
3691 flags,
3680 cachedelta,
3692 cachedelta,
3681 deltacomputer=deltacomputer,
3693 deltacomputer=deltacomputer,
3682 sidedata=sidedata,
3694 sidedata=sidedata,
3683 )
3695 )
3684
3696
3685 if addrevisioncb:
3697 if addrevisioncb:
3686 addrevisioncb(self, rev, node)
3698 addrevisioncb(self, rev, node)
3687
3699
3688 def censorrevision(self, tr, censornode, tombstone=b''):
3700 def censorrevision(self, tr, censornode, tombstone=b''):
3689 if self._format_version == REVLOGV0:
3701 if self._format_version == REVLOGV0:
3690 raise error.RevlogError(
3702 raise error.RevlogError(
3691 _(b'cannot censor with version %d revlogs')
3703 _(b'cannot censor with version %d revlogs')
3692 % self._format_version
3704 % self._format_version
3693 )
3705 )
3694 elif self._format_version == REVLOGV1:
3706 elif self._format_version == REVLOGV1:
3695 rewrite.v1_censor(self, tr, censornode, tombstone)
3707 rewrite.v1_censor(self, tr, censornode, tombstone)
3696 else:
3708 else:
3697 rewrite.v2_censor(self, tr, censornode, tombstone)
3709 rewrite.v2_censor(self, tr, censornode, tombstone)
3698
3710
3699 def verifyintegrity(self, state):
3711 def verifyintegrity(self, state):
3700 """Verifies the integrity of the revlog.
3712 """Verifies the integrity of the revlog.
3701
3713
3702 Yields ``revlogproblem`` instances describing problems that are
3714 Yields ``revlogproblem`` instances describing problems that are
3703 found.
3715 found.
3704 """
3716 """
3705 dd, di = self.checksize()
3717 dd, di = self.checksize()
3706 if dd:
3718 if dd:
3707 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3719 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3708 if di:
3720 if di:
3709 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3721 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3710
3722
3711 version = self._format_version
3723 version = self._format_version
3712
3724
3713 # The verifier tells us what version revlog we should be.
3725 # The verifier tells us what version revlog we should be.
3714 if version != state[b'expectedversion']:
3726 if version != state[b'expectedversion']:
3715 yield revlogproblem(
3727 yield revlogproblem(
3716 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3728 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3717 % (self.display_id, version, state[b'expectedversion'])
3729 % (self.display_id, version, state[b'expectedversion'])
3718 )
3730 )
3719
3731
3720 state[b'skipread'] = set()
3732 state[b'skipread'] = set()
3721 state[b'safe_renamed'] = set()
3733 state[b'safe_renamed'] = set()
3722
3734
3723 for rev in self:
3735 for rev in self:
3724 node = self.node(rev)
3736 node = self.node(rev)
3725
3737
3726 # Verify contents. 4 cases to care about:
3738 # Verify contents. 4 cases to care about:
3727 #
3739 #
3728 # common: the most common case
3740 # common: the most common case
3729 # rename: with a rename
3741 # rename: with a rename
3730 # meta: file content starts with b'\1\n', the metadata
3742 # meta: file content starts with b'\1\n', the metadata
3731 # header defined in filelog.py, but without a rename
3743 # header defined in filelog.py, but without a rename
3732 # ext: content stored externally
3744 # ext: content stored externally
3733 #
3745 #
3734 # More formally, their differences are shown below:
3746 # More formally, their differences are shown below:
3735 #
3747 #
3736 # | common | rename | meta | ext
3748 # | common | rename | meta | ext
3737 # -------------------------------------------------------
3749 # -------------------------------------------------------
3738 # flags() | 0 | 0 | 0 | not 0
3750 # flags() | 0 | 0 | 0 | not 0
3739 # renamed() | False | True | False | ?
3751 # renamed() | False | True | False | ?
3740 # rawtext[0:2]=='\1\n'| False | True | True | ?
3752 # rawtext[0:2]=='\1\n'| False | True | True | ?
3741 #
3753 #
3742 # "rawtext" means the raw text stored in revlog data, which
3754 # "rawtext" means the raw text stored in revlog data, which
3743 # could be retrieved by "rawdata(rev)". "text"
3755 # could be retrieved by "rawdata(rev)". "text"
3744 # mentioned below is "revision(rev)".
3756 # mentioned below is "revision(rev)".
3745 #
3757 #
3746 # There are 3 different lengths stored physically:
3758 # There are 3 different lengths stored physically:
3747 # 1. L1: rawsize, stored in revlog index
3759 # 1. L1: rawsize, stored in revlog index
3748 # 2. L2: len(rawtext), stored in revlog data
3760 # 2. L2: len(rawtext), stored in revlog data
3749 # 3. L3: len(text), stored in revlog data if flags==0, or
3761 # 3. L3: len(text), stored in revlog data if flags==0, or
3750 # possibly somewhere else if flags!=0
3762 # possibly somewhere else if flags!=0
3751 #
3763 #
3752 # L1 should be equal to L2. L3 could be different from them.
3764 # L1 should be equal to L2. L3 could be different from them.
3753 # "text" may or may not affect commit hash depending on flag
3765 # "text" may or may not affect commit hash depending on flag
3754 # processors (see flagutil.addflagprocessor).
3766 # processors (see flagutil.addflagprocessor).
3755 #
3767 #
3756 # | common | rename | meta | ext
3768 # | common | rename | meta | ext
3757 # -------------------------------------------------
3769 # -------------------------------------------------
3758 # rawsize() | L1 | L1 | L1 | L1
3770 # rawsize() | L1 | L1 | L1 | L1
3759 # size() | L1 | L2-LM | L1(*) | L1 (?)
3771 # size() | L1 | L2-LM | L1(*) | L1 (?)
3760 # len(rawtext) | L2 | L2 | L2 | L2
3772 # len(rawtext) | L2 | L2 | L2 | L2
3761 # len(text) | L2 | L2 | L2 | L3
3773 # len(text) | L2 | L2 | L2 | L3
3762 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3774 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3763 #
3775 #
3764 # LM: length of metadata, depending on rawtext
3776 # LM: length of metadata, depending on rawtext
3765 # (*): not ideal, see comment in filelog.size
3777 # (*): not ideal, see comment in filelog.size
3766 # (?): could be "- len(meta)" if the resolved content has
3778 # (?): could be "- len(meta)" if the resolved content has
3767 # rename metadata
3779 # rename metadata
3768 #
3780 #
3769 # Checks needed to be done:
3781 # Checks needed to be done:
3770 # 1. length check: L1 == L2, in all cases.
3782 # 1. length check: L1 == L2, in all cases.
3771 # 2. hash check: depending on flag processor, we may need to
3783 # 2. hash check: depending on flag processor, we may need to
3772 # use either "text" (external), or "rawtext" (in revlog).
3784 # use either "text" (external), or "rawtext" (in revlog).
3773
3785
3774 try:
3786 try:
3775 skipflags = state.get(b'skipflags', 0)
3787 skipflags = state.get(b'skipflags', 0)
3776 if skipflags:
3788 if skipflags:
3777 skipflags &= self.flags(rev)
3789 skipflags &= self.flags(rev)
3778
3790
3779 _verify_revision(self, skipflags, state, node)
3791 _verify_revision(self, skipflags, state, node)
3780
3792
3781 l1 = self.rawsize(rev)
3793 l1 = self.rawsize(rev)
3782 l2 = len(self.rawdata(node))
3794 l2 = len(self.rawdata(node))
3783
3795
3784 if l1 != l2:
3796 if l1 != l2:
3785 yield revlogproblem(
3797 yield revlogproblem(
3786 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3798 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3787 node=node,
3799 node=node,
3788 )
3800 )
3789
3801
3790 except error.CensoredNodeError:
3802 except error.CensoredNodeError:
3791 if state[b'erroroncensored']:
3803 if state[b'erroroncensored']:
3792 yield revlogproblem(
3804 yield revlogproblem(
3793 error=_(b'censored file data'), node=node
3805 error=_(b'censored file data'), node=node
3794 )
3806 )
3795 state[b'skipread'].add(node)
3807 state[b'skipread'].add(node)
3796 except Exception as e:
3808 except Exception as e:
3797 yield revlogproblem(
3809 yield revlogproblem(
3798 error=_(b'unpacking %s: %s')
3810 error=_(b'unpacking %s: %s')
3799 % (short(node), stringutil.forcebytestr(e)),
3811 % (short(node), stringutil.forcebytestr(e)),
3800 node=node,
3812 node=node,
3801 )
3813 )
3802 state[b'skipread'].add(node)
3814 state[b'skipread'].add(node)
3803
3815
3804 def storageinfo(
3816 def storageinfo(
3805 self,
3817 self,
3806 exclusivefiles=False,
3818 exclusivefiles=False,
3807 sharedfiles=False,
3819 sharedfiles=False,
3808 revisionscount=False,
3820 revisionscount=False,
3809 trackedsize=False,
3821 trackedsize=False,
3810 storedsize=False,
3822 storedsize=False,
3811 ):
3823 ):
3812 d = {}
3824 d = {}
3813
3825
3814 if exclusivefiles:
3826 if exclusivefiles:
3815 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3827 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3816 if not self._inline:
3828 if not self._inline:
3817 d[b'exclusivefiles'].append((self.opener, self._datafile))
3829 d[b'exclusivefiles'].append((self.opener, self._datafile))
3818
3830
3819 if sharedfiles:
3831 if sharedfiles:
3820 d[b'sharedfiles'] = []
3832 d[b'sharedfiles'] = []
3821
3833
3822 if revisionscount:
3834 if revisionscount:
3823 d[b'revisionscount'] = len(self)
3835 d[b'revisionscount'] = len(self)
3824
3836
3825 if trackedsize:
3837 if trackedsize:
3826 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3838 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3827
3839
3828 if storedsize:
3840 if storedsize:
3829 d[b'storedsize'] = sum(
3841 d[b'storedsize'] = sum(
3830 self.opener.stat(path).st_size for path in self.files()
3842 self.opener.stat(path).st_size for path in self.files()
3831 )
3843 )
3832
3844
3833 return d
3845 return d
3834
3846
3835 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3847 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3836 if not self.feature_config.has_side_data:
3848 if not self.feature_config.has_side_data:
3837 return
3849 return
3838 # revlog formats with sidedata support does not support inline
3850 # revlog formats with sidedata support does not support inline
3839 assert not self._inline
3851 assert not self._inline
3840 if not helpers[1] and not helpers[2]:
3852 if not helpers[1] and not helpers[2]:
3841 # Nothing to generate or remove
3853 # Nothing to generate or remove
3842 return
3854 return
3843
3855
3844 new_entries = []
3856 new_entries = []
3845 # append the new sidedata
3857 # append the new sidedata
3846 with self._writing(transaction):
3858 with self._writing(transaction):
3847 ifh, dfh, sdfh = self._inner._writinghandles
3859 ifh, dfh, sdfh = self._inner._writinghandles
3848 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3860 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3849
3861
3850 current_offset = sdfh.tell()
3862 current_offset = sdfh.tell()
3851 for rev in range(startrev, endrev + 1):
3863 for rev in range(startrev, endrev + 1):
3852 entry = self.index[rev]
3864 entry = self.index[rev]
3853 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3865 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3854 store=self,
3866 store=self,
3855 sidedata_helpers=helpers,
3867 sidedata_helpers=helpers,
3856 sidedata={},
3868 sidedata={},
3857 rev=rev,
3869 rev=rev,
3858 )
3870 )
3859
3871
3860 serialized_sidedata = sidedatautil.serialize_sidedata(
3872 serialized_sidedata = sidedatautil.serialize_sidedata(
3861 new_sidedata
3873 new_sidedata
3862 )
3874 )
3863
3875
3864 sidedata_compression_mode = COMP_MODE_INLINE
3876 sidedata_compression_mode = COMP_MODE_INLINE
3865 if serialized_sidedata and self.feature_config.has_side_data:
3877 if serialized_sidedata and self.feature_config.has_side_data:
3866 sidedata_compression_mode = COMP_MODE_PLAIN
3878 sidedata_compression_mode = COMP_MODE_PLAIN
3867 h, comp_sidedata = self.compress(serialized_sidedata)
3879 h, comp_sidedata = self.compress(serialized_sidedata)
3868 if (
3880 if (
3869 h != b'u'
3881 h != b'u'
3870 and comp_sidedata[0] != b'\0'
3882 and comp_sidedata[0] != b'\0'
3871 and len(comp_sidedata) < len(serialized_sidedata)
3883 and len(comp_sidedata) < len(serialized_sidedata)
3872 ):
3884 ):
3873 assert not h
3885 assert not h
3874 if (
3886 if (
3875 comp_sidedata[0]
3887 comp_sidedata[0]
3876 == self._docket.default_compression_header
3888 == self._docket.default_compression_header
3877 ):
3889 ):
3878 sidedata_compression_mode = COMP_MODE_DEFAULT
3890 sidedata_compression_mode = COMP_MODE_DEFAULT
3879 serialized_sidedata = comp_sidedata
3891 serialized_sidedata = comp_sidedata
3880 else:
3892 else:
3881 sidedata_compression_mode = COMP_MODE_INLINE
3893 sidedata_compression_mode = COMP_MODE_INLINE
3882 serialized_sidedata = comp_sidedata
3894 serialized_sidedata = comp_sidedata
3883 if entry[8] != 0 or entry[9] != 0:
3895 if entry[8] != 0 or entry[9] != 0:
3884 # rewriting entries that already have sidedata is not
3896 # rewriting entries that already have sidedata is not
3885 # supported yet, because it introduces garbage data in the
3897 # supported yet, because it introduces garbage data in the
3886 # revlog.
3898 # revlog.
3887 msg = b"rewriting existing sidedata is not supported yet"
3899 msg = b"rewriting existing sidedata is not supported yet"
3888 raise error.Abort(msg)
3900 raise error.Abort(msg)
3889
3901
3890 # Apply (potential) flags to add and to remove after running
3902 # Apply (potential) flags to add and to remove after running
3891 # the sidedata helpers
3903 # the sidedata helpers
3892 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3904 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3893 entry_update = (
3905 entry_update = (
3894 current_offset,
3906 current_offset,
3895 len(serialized_sidedata),
3907 len(serialized_sidedata),
3896 new_offset_flags,
3908 new_offset_flags,
3897 sidedata_compression_mode,
3909 sidedata_compression_mode,
3898 )
3910 )
3899
3911
3900 # the sidedata computation might have move the file cursors around
3912 # the sidedata computation might have move the file cursors around
3901 sdfh.seek(current_offset, os.SEEK_SET)
3913 sdfh.seek(current_offset, os.SEEK_SET)
3902 sdfh.write(serialized_sidedata)
3914 sdfh.write(serialized_sidedata)
3903 new_entries.append(entry_update)
3915 new_entries.append(entry_update)
3904 current_offset += len(serialized_sidedata)
3916 current_offset += len(serialized_sidedata)
3905 self._docket.sidedata_end = sdfh.tell()
3917 self._docket.sidedata_end = sdfh.tell()
3906
3918
3907 # rewrite the new index entries
3919 # rewrite the new index entries
3908 ifh.seek(startrev * self.index.entry_size)
3920 ifh.seek(startrev * self.index.entry_size)
3909 for i, e in enumerate(new_entries):
3921 for i, e in enumerate(new_entries):
3910 rev = startrev + i
3922 rev = startrev + i
3911 self.index.replace_sidedata_info(rev, *e)
3923 self.index.replace_sidedata_info(rev, *e)
3912 packed = self.index.entry_binary(rev)
3924 packed = self.index.entry_binary(rev)
3913 if rev == 0 and self._docket is None:
3925 if rev == 0 and self._docket is None:
3914 header = self._format_flags | self._format_version
3926 header = self._format_flags | self._format_version
3915 header = self.index.pack_header(header)
3927 header = self.index.pack_header(header)
3916 packed = header + packed
3928 packed = header + packed
3917 ifh.write(packed)
3929 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now