##// END OF EJS Templates
revlog: also migrates `revlog.upperboundcomp` to ConfigClass...
marmoute -
r51976:5645524c default
parent child Browse files
Show More
@@ -1,3797 +1,3807 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of inline data embedded into a revlog
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class _Config:
244 class _Config:
245 def copy(self):
245 def copy(self):
246 return self.__class__(**self.__dict__)
246 return self.__class__(**self.__dict__)
247
247
248
248
249 @attr.s()
249 @attr.s()
250 class FeatureConfig(_Config):
250 class FeatureConfig(_Config):
251 """Hold configuration values about the available revlog features"""
251 """Hold configuration values about the available revlog features"""
252
252
253 # the default compression engine
253 # the default compression engine
254 compression_engine = attr.ib(default=b'zlib')
254 compression_engine = attr.ib(default=b'zlib')
255 # compression engines options
255 # compression engines options
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257
257
258 # can we use censor on this revlog
258 # can we use censor on this revlog
259 censorable = attr.ib(default=False)
259 censorable = attr.ib(default=False)
260 # does this revlog use the "side data" feature
260 # does this revlog use the "side data" feature
261 has_side_data = attr.ib(default=False)
261 has_side_data = attr.ib(default=False)
262 # might remove rank configuration once the computation has no impact
262 # might remove rank configuration once the computation has no impact
263 compute_rank = attr.ib(default=False)
263 compute_rank = attr.ib(default=False)
264 # parent order is supposed to be semantically irrelevant, so we
264 # parent order is supposed to be semantically irrelevant, so we
265 # normally resort parents to ensure that the first parent is non-null,
265 # normally resort parents to ensure that the first parent is non-null,
266 # if there is a non-null parent at all.
266 # if there is a non-null parent at all.
267 # filelog abuses the parent order as flag to mark some instances of
267 # filelog abuses the parent order as flag to mark some instances of
268 # meta-encoded files, so allow it to disable this behavior.
268 # meta-encoded files, so allow it to disable this behavior.
269 canonical_parent_order = attr.ib(default=False)
269 canonical_parent_order = attr.ib(default=False)
270 # can ellipsis commit be used
270 # can ellipsis commit be used
271 enable_ellipsis = attr.ib(default=False)
271 enable_ellipsis = attr.ib(default=False)
272
272
273 def copy(self):
273 def copy(self):
274 new = super().copy()
274 new = super().copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
276 return new
276 return new
277
277
278
278
279 @attr.s()
279 @attr.s()
280 class DataConfig(_Config):
280 class DataConfig(_Config):
281 """Hold configuration value about how the revlog data are read"""
281 """Hold configuration value about how the revlog data are read"""
282
282
283 # should we try to open the "pending" version of the revlog
283 # should we try to open the "pending" version of the revlog
284 try_pending = attr.ib(default=False)
284 try_pending = attr.ib(default=False)
285 # should we try to open the "splitted" version of the revlog
285 # should we try to open the "splitted" version of the revlog
286 try_split = attr.ib(default=False)
286 try_split = attr.ib(default=False)
287 # When True, indexfile should be opened with checkambig=True at writing,
287 # When True, indexfile should be opened with checkambig=True at writing,
288 # to avoid file stat ambiguity.
288 # to avoid file stat ambiguity.
289 check_ambig = attr.ib(default=False)
289 check_ambig = attr.ib(default=False)
290
290
291 # If true, use mmap instead of reading to deal with large index
291 # If true, use mmap instead of reading to deal with large index
292 mmap_large_index = attr.ib(default=False)
292 mmap_large_index = attr.ib(default=False)
293 # how much data is large
293 # how much data is large
294 mmap_index_threshold = attr.ib(default=None)
294 mmap_index_threshold = attr.ib(default=None)
295 # How much data to read and cache into the raw revlog data cache.
295 # How much data to read and cache into the raw revlog data cache.
296 chunk_cache_size = attr.ib(default=65536)
296 chunk_cache_size = attr.ib(default=65536)
297
297
298 # Allow sparse reading of the revlog data
298 # Allow sparse reading of the revlog data
299 with_sparse_read = attr.ib(default=False)
299 with_sparse_read = attr.ib(default=False)
300 # minimal density of a sparse read chunk
300 # minimal density of a sparse read chunk
301 sr_density_threshold = attr.ib(default=0.50)
301 sr_density_threshold = attr.ib(default=0.50)
302 # minimal size of data we skip when performing sparse read
302 # minimal size of data we skip when performing sparse read
303 sr_min_gap_size = attr.ib(default=262144)
303 sr_min_gap_size = attr.ib(default=262144)
304
304
305 # are delta encoded against arbitrary bases.
305 # are delta encoded against arbitrary bases.
306 generaldelta = attr.ib(default=False)
306 generaldelta = attr.ib(default=False)
307
307
308
308
309 @attr.s()
309 @attr.s()
310 class DeltaConfig(_Config):
310 class DeltaConfig(_Config):
311 """Hold configuration value about how new delta are computed
311 """Hold configuration value about how new delta are computed
312
312
313 Some attributes are duplicated from DataConfig to help havign each object
313 Some attributes are duplicated from DataConfig to help havign each object
314 self contained.
314 self contained.
315 """
315 """
316
316
317 # can delta be encoded against arbitrary bases.
317 # can delta be encoded against arbitrary bases.
318 general_delta = attr.ib(default=False)
318 general_delta = attr.ib(default=False)
319 # Allow sparse writing of the revlog data
319 # Allow sparse writing of the revlog data
320 sparse_revlog = attr.ib(default=False)
320 sparse_revlog = attr.ib(default=False)
321 # maximum length of a delta chain
321 # maximum length of a delta chain
322 max_chain_len = attr.ib(default=None)
322 max_chain_len = attr.ib(default=None)
323 # Maximum distance between delta chain base start and end
323 # Maximum distance between delta chain base start and end
324 max_deltachain_span = attr.ib(default=-1)
324 max_deltachain_span = attr.ib(default=-1)
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 # compression for the data content.
326 # compression for the data content.
327 upper_bound_comp = attr.ib(default=None)
327 upper_bound_comp = attr.ib(default=None)
328 # Should we try a delta against both parent
328 # Should we try a delta against both parent
329 delta_both_parents = attr.ib(default=True)
329 delta_both_parents = attr.ib(default=True)
330 # Test delta base candidate group by chunk of this maximal size.
330 # Test delta base candidate group by chunk of this maximal size.
331 candidate_group_chunk_size = attr.ib(default=0)
331 candidate_group_chunk_size = attr.ib(default=0)
332 # Should we display debug information about delta computation
332 # Should we display debug information about delta computation
333 debug_delta = attr.ib(default=False)
333 debug_delta = attr.ib(default=False)
334 # trust incoming delta by default
334 # trust incoming delta by default
335 lazy_delta = attr.ib(default=True)
335 lazy_delta = attr.ib(default=True)
336 # trust the base of incoming delta by default
336 # trust the base of incoming delta by default
337 lazy_delta_base = attr.ib(default=False)
337 lazy_delta_base = attr.ib(default=False)
338
338
339
339
340 class revlog:
340 class revlog:
341 """
341 """
342 the underlying revision storage object
342 the underlying revision storage object
343
343
344 A revlog consists of two parts, an index and the revision data.
344 A revlog consists of two parts, an index and the revision data.
345
345
346 The index is a file with a fixed record size containing
346 The index is a file with a fixed record size containing
347 information on each revision, including its nodeid (hash), the
347 information on each revision, including its nodeid (hash), the
348 nodeids of its parents, the position and offset of its data within
348 nodeids of its parents, the position and offset of its data within
349 the data file, and the revision it's based on. Finally, each entry
349 the data file, and the revision it's based on. Finally, each entry
350 contains a linkrev entry that can serve as a pointer to external
350 contains a linkrev entry that can serve as a pointer to external
351 data.
351 data.
352
352
353 The revision data itself is a linear collection of data chunks.
353 The revision data itself is a linear collection of data chunks.
354 Each chunk represents a revision and is usually represented as a
354 Each chunk represents a revision and is usually represented as a
355 delta against the previous chunk. To bound lookup time, runs of
355 delta against the previous chunk. To bound lookup time, runs of
356 deltas are limited to about 2 times the length of the original
356 deltas are limited to about 2 times the length of the original
357 version data. This makes retrieval of a version proportional to
357 version data. This makes retrieval of a version proportional to
358 its size, or O(1) relative to the number of revisions.
358 its size, or O(1) relative to the number of revisions.
359
359
360 Both pieces of the revlog are written to in an append-only
360 Both pieces of the revlog are written to in an append-only
361 fashion, which means we never need to rewrite a file to insert or
361 fashion, which means we never need to rewrite a file to insert or
362 remove data, and can use some simple techniques to avoid the need
362 remove data, and can use some simple techniques to avoid the need
363 for locking while reading.
363 for locking while reading.
364
364
365 If checkambig, indexfile is opened with checkambig=True at
365 If checkambig, indexfile is opened with checkambig=True at
366 writing, to avoid file stat ambiguity.
366 writing, to avoid file stat ambiguity.
367
367
368 If mmaplargeindex is True, and an mmapindexthreshold is set, the
368 If mmaplargeindex is True, and an mmapindexthreshold is set, the
369 index will be mmapped rather than read if it is larger than the
369 index will be mmapped rather than read if it is larger than the
370 configured threshold.
370 configured threshold.
371
371
372 If censorable is True, the revlog can have censored revisions.
372 If censorable is True, the revlog can have censored revisions.
373
373
374 If `upperboundcomp` is not None, this is the expected maximal gain from
374 If `upperboundcomp` is not None, this is the expected maximal gain from
375 compression for the data content.
375 compression for the data content.
376
376
377 `concurrencychecker` is an optional function that receives 3 arguments: a
377 `concurrencychecker` is an optional function that receives 3 arguments: a
378 file handle, a filename, and an expected position. It should check whether
378 file handle, a filename, and an expected position. It should check whether
379 the current position in the file handle is valid, and log/warn/fail (by
379 the current position in the file handle is valid, and log/warn/fail (by
380 raising).
380 raising).
381
381
382 See mercurial/revlogutils/contants.py for details about the content of an
382 See mercurial/revlogutils/contants.py for details about the content of an
383 index entry.
383 index entry.
384 """
384 """
385
385
386 _flagserrorclass = error.RevlogError
386 _flagserrorclass = error.RevlogError
387
387
388 @staticmethod
388 @staticmethod
389 def is_inline_index(header_bytes):
389 def is_inline_index(header_bytes):
390 """Determine if a revlog is inline from the initial bytes of the index"""
390 """Determine if a revlog is inline from the initial bytes of the index"""
391 header = INDEX_HEADER.unpack(header_bytes)[0]
391 header = INDEX_HEADER.unpack(header_bytes)[0]
392
392
393 _format_flags = header & ~0xFFFF
393 _format_flags = header & ~0xFFFF
394 _format_version = header & 0xFFFF
394 _format_version = header & 0xFFFF
395
395
396 features = FEATURES_BY_VERSION[_format_version]
396 features = FEATURES_BY_VERSION[_format_version]
397 return features[b'inline'](_format_flags)
397 return features[b'inline'](_format_flags)
398
398
399 def __init__(
399 def __init__(
400 self,
400 self,
401 opener,
401 opener,
402 target,
402 target,
403 radix,
403 radix,
404 postfix=None, # only exist for `tmpcensored` now
404 postfix=None, # only exist for `tmpcensored` now
405 checkambig=False,
405 checkambig=False,
406 mmaplargeindex=False,
406 mmaplargeindex=False,
407 censorable=False,
407 censorable=False,
408 upperboundcomp=None,
408 upperboundcomp=None,
409 persistentnodemap=False,
409 persistentnodemap=False,
410 concurrencychecker=None,
410 concurrencychecker=None,
411 trypending=False,
411 trypending=False,
412 try_split=False,
412 try_split=False,
413 canonical_parent_order=True,
413 canonical_parent_order=True,
414 ):
414 ):
415 """
415 """
416 create a revlog object
416 create a revlog object
417
417
418 opener is a function that abstracts the file opening operation
418 opener is a function that abstracts the file opening operation
419 and can be used to implement COW semantics or the like.
419 and can be used to implement COW semantics or the like.
420
420
421 `target`: a (KIND, ID) tuple that identify the content stored in
421 `target`: a (KIND, ID) tuple that identify the content stored in
422 this revlog. It help the rest of the code to understand what the revlog
422 this revlog. It help the rest of the code to understand what the revlog
423 is about without having to resort to heuristic and index filename
423 is about without having to resort to heuristic and index filename
424 analysis. Note: that this must be reliably be set by normal code, but
424 analysis. Note: that this must be reliably be set by normal code, but
425 that test, debug, or performance measurement code might not set this to
425 that test, debug, or performance measurement code might not set this to
426 accurate value.
426 accurate value.
427 """
427 """
428 self.upperboundcomp = upperboundcomp
429
428
430 self.radix = radix
429 self.radix = radix
431
430
432 self._docket_file = None
431 self._docket_file = None
433 self._indexfile = None
432 self._indexfile = None
434 self._datafile = None
433 self._datafile = None
435 self._sidedatafile = None
434 self._sidedatafile = None
436 self._nodemap_file = None
435 self._nodemap_file = None
437 self.postfix = postfix
436 self.postfix = postfix
438 self._trypending = trypending
437 self._trypending = trypending
439 self._try_split = try_split
438 self._try_split = try_split
440 self.opener = opener
439 self.opener = opener
441 if persistentnodemap:
440 if persistentnodemap:
442 self._nodemap_file = nodemaputil.get_nodemap_file(self)
441 self._nodemap_file = nodemaputil.get_nodemap_file(self)
443
442
444 assert target[0] in ALL_KINDS
443 assert target[0] in ALL_KINDS
445 assert len(target) == 2
444 assert len(target) == 2
446 self.target = target
445 self.target = target
447 if b'feature-config' in self.opener.options:
446 if b'feature-config' in self.opener.options:
448 self.feature_config = self.opener.options[b'feature-config'].copy()
447 self.feature_config = self.opener.options[b'feature-config'].copy()
449 else:
448 else:
450 self.feature_config = FeatureConfig()
449 self.feature_config = FeatureConfig()
451 self.feature_config.censorable = censorable
450 self.feature_config.censorable = censorable
452 self.feature_config.canonical_parent_order = canonical_parent_order
451 self.feature_config.canonical_parent_order = canonical_parent_order
453 if b'data-config' in self.opener.options:
452 if b'data-config' in self.opener.options:
454 self.data_config = self.opener.options[b'data-config'].copy()
453 self.data_config = self.opener.options[b'data-config'].copy()
455 else:
454 else:
456 self.data_config = DataConfig()
455 self.data_config = DataConfig()
457 self.data_config.check_ambig = checkambig
456 self.data_config.check_ambig = checkambig
458 self.data_config.mmap_large_index = mmaplargeindex
457 self.data_config.mmap_large_index = mmaplargeindex
459 if b'delta-config' in self.opener.options:
458 if b'delta-config' in self.opener.options:
460 self.delta_config = self.opener.options[b'delta-config'].copy()
459 self.delta_config = self.opener.options[b'delta-config'].copy()
461 else:
460 else:
462 self.delta_config = DeltaConfig()
461 self.delta_config = DeltaConfig()
462 self.delta_config.upper_bound_comp = upperboundcomp
463
463
464 # 3-tuple of (node, rev, text) for a raw revision.
464 # 3-tuple of (node, rev, text) for a raw revision.
465 self._revisioncache = None
465 self._revisioncache = None
466 # Maps rev to chain base rev.
466 # Maps rev to chain base rev.
467 self._chainbasecache = util.lrucachedict(100)
467 self._chainbasecache = util.lrucachedict(100)
468 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
468 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
469 self._chunkcache = (0, b'')
469 self._chunkcache = (0, b'')
470
470
471 self.index = None
471 self.index = None
472 self._docket = None
472 self._docket = None
473 self._nodemap_docket = None
473 self._nodemap_docket = None
474 # Mapping of partial identifiers to full nodes.
474 # Mapping of partial identifiers to full nodes.
475 self._pcache = {}
475 self._pcache = {}
476
476
477 # other optionnals features
477 # other optionnals features
478
478
479 # Make copy of flag processors so each revlog instance can support
479 # Make copy of flag processors so each revlog instance can support
480 # custom flags.
480 # custom flags.
481 self._flagprocessors = dict(flagutil.flagprocessors)
481 self._flagprocessors = dict(flagutil.flagprocessors)
482
482
483 # 3-tuple of file handles being used for active writing.
483 # 3-tuple of file handles being used for active writing.
484 self._writinghandles = None
484 self._writinghandles = None
485 # prevent nesting of addgroup
485 # prevent nesting of addgroup
486 self._adding_group = None
486 self._adding_group = None
487
487
488 self._loadindex()
488 self._loadindex()
489
489
490 self._concurrencychecker = concurrencychecker
490 self._concurrencychecker = concurrencychecker
491
491
492 @property
492 @property
493 def _generaldelta(self):
493 def _generaldelta(self):
494 """temporary compatibility proxy"""
494 """temporary compatibility proxy"""
495 util.nouideprecwarn(
495 util.nouideprecwarn(
496 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
496 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
497 )
497 )
498 return self.delta_config.general_delta
498 return self.delta_config.general_delta
499
499
500 @property
500 @property
501 def _checkambig(self):
501 def _checkambig(self):
502 """temporary compatibility proxy"""
502 """temporary compatibility proxy"""
503 util.nouideprecwarn(
503 util.nouideprecwarn(
504 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
504 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
505 )
505 )
506 return self.data_config.check_ambig
506 return self.data_config.check_ambig
507
507
508 @property
508 @property
509 def _mmaplargeindex(self):
509 def _mmaplargeindex(self):
510 """temporary compatibility proxy"""
510 """temporary compatibility proxy"""
511 util.nouideprecwarn(
511 util.nouideprecwarn(
512 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
512 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
513 )
513 )
514 return self.data_config.mmap_large_index
514 return self.data_config.mmap_large_index
515
515
516 @property
516 @property
517 def _censorable(self):
517 def _censorable(self):
518 """temporary compatibility proxy"""
518 """temporary compatibility proxy"""
519 util.nouideprecwarn(
519 util.nouideprecwarn(
520 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
520 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
521 )
521 )
522 return self.feature_config.censorable
522 return self.feature_config.censorable
523
523
524 @property
524 @property
525 def _chunkcachesize(self):
525 def _chunkcachesize(self):
526 """temporary compatibility proxy"""
526 """temporary compatibility proxy"""
527 util.nouideprecwarn(
527 util.nouideprecwarn(
528 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
528 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
529 )
529 )
530 return self.data_config.chunk_cache_size
530 return self.data_config.chunk_cache_size
531
531
532 @property
532 @property
533 def _maxchainlen(self):
533 def _maxchainlen(self):
534 """temporary compatibility proxy"""
534 """temporary compatibility proxy"""
535 util.nouideprecwarn(
535 util.nouideprecwarn(
536 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
536 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
537 )
537 )
538 return self.delta_config.max_chain_len
538 return self.delta_config.max_chain_len
539
539
540 @property
540 @property
541 def _deltabothparents(self):
541 def _deltabothparents(self):
542 """temporary compatibility proxy"""
542 """temporary compatibility proxy"""
543 util.nouideprecwarn(
543 util.nouideprecwarn(
544 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
544 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
545 )
545 )
546 return self.delta_config.delta_both_parents
546 return self.delta_config.delta_both_parents
547
547
548 @property
548 @property
549 def _candidate_group_chunk_size(self):
549 def _candidate_group_chunk_size(self):
550 """temporary compatibility proxy"""
550 """temporary compatibility proxy"""
551 util.nouideprecwarn(
551 util.nouideprecwarn(
552 b"use revlog.delta_config.candidate_group_chunk_size",
552 b"use revlog.delta_config.candidate_group_chunk_size",
553 b"6.6",
553 b"6.6",
554 stacklevel=2,
554 stacklevel=2,
555 )
555 )
556 return self.delta_config.candidate_group_chunk_size
556 return self.delta_config.candidate_group_chunk_size
557
557
558 @property
558 @property
559 def _debug_delta(self):
559 def _debug_delta(self):
560 """temporary compatibility proxy"""
560 """temporary compatibility proxy"""
561 util.nouideprecwarn(
561 util.nouideprecwarn(
562 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
562 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
563 )
563 )
564 return self.delta_config.debug_delta
564 return self.delta_config.debug_delta
565
565
566 @property
566 @property
567 def _compengine(self):
567 def _compengine(self):
568 """temporary compatibility proxy"""
568 """temporary compatibility proxy"""
569 util.nouideprecwarn(
569 util.nouideprecwarn(
570 b"use revlog.feature_config.compression_engine",
570 b"use revlog.feature_config.compression_engine",
571 b"6.6",
571 b"6.6",
572 stacklevel=2,
572 stacklevel=2,
573 )
573 )
574 return self.feature_config.compression_engine
574 return self.feature_config.compression_engine
575
575
576 @property
576 @property
577 def upperboundcomp(self):
578 """temporary compatibility proxy"""
579 util.nouideprecwarn(
580 b"use revlog.delta_config.upper_bound_comp",
581 b"6.6",
582 stacklevel=2,
583 )
584 return self.delta_config.upper_bound_comp
585
586 @property
577 def _compengineopts(self):
587 def _compengineopts(self):
578 """temporary compatibility proxy"""
588 """temporary compatibility proxy"""
579 util.nouideprecwarn(
589 util.nouideprecwarn(
580 b"use revlog.feature_config.compression_engine_options",
590 b"use revlog.feature_config.compression_engine_options",
581 b"6.6",
591 b"6.6",
582 stacklevel=2,
592 stacklevel=2,
583 )
593 )
584 return self.feature_config.compression_engine_options
594 return self.feature_config.compression_engine_options
585
595
586 @property
596 @property
587 def _maxdeltachainspan(self):
597 def _maxdeltachainspan(self):
588 """temporary compatibility proxy"""
598 """temporary compatibility proxy"""
589 util.nouideprecwarn(
599 util.nouideprecwarn(
590 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
600 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
591 )
601 )
592 return self.delta_config.max_deltachain_span
602 return self.delta_config.max_deltachain_span
593
603
594 @property
604 @property
595 def _withsparseread(self):
605 def _withsparseread(self):
596 """temporary compatibility proxy"""
606 """temporary compatibility proxy"""
597 util.nouideprecwarn(
607 util.nouideprecwarn(
598 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
608 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
599 )
609 )
600 return self.data_config.with_sparse_read
610 return self.data_config.with_sparse_read
601
611
602 @property
612 @property
603 def _sparserevlog(self):
613 def _sparserevlog(self):
604 """temporary compatibility proxy"""
614 """temporary compatibility proxy"""
605 util.nouideprecwarn(
615 util.nouideprecwarn(
606 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
616 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
607 )
617 )
608 return self.delta_config.sparse_revlog
618 return self.delta_config.sparse_revlog
609
619
610 @property
620 @property
611 def hassidedata(self):
621 def hassidedata(self):
612 """temporary compatibility proxy"""
622 """temporary compatibility proxy"""
613 util.nouideprecwarn(
623 util.nouideprecwarn(
614 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
624 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
615 )
625 )
616 return self.feature_config.has_side_data
626 return self.feature_config.has_side_data
617
627
618 @property
628 @property
619 def _srdensitythreshold(self):
629 def _srdensitythreshold(self):
620 """temporary compatibility proxy"""
630 """temporary compatibility proxy"""
621 util.nouideprecwarn(
631 util.nouideprecwarn(
622 b"use revlog.data_config.sr_density_threshold",
632 b"use revlog.data_config.sr_density_threshold",
623 b"6.6",
633 b"6.6",
624 stacklevel=2,
634 stacklevel=2,
625 )
635 )
626 return self.data_config.sr_density_threshold
636 return self.data_config.sr_density_threshold
627
637
628 @property
638 @property
629 def _srmingapsize(self):
639 def _srmingapsize(self):
630 """temporary compatibility proxy"""
640 """temporary compatibility proxy"""
631 util.nouideprecwarn(
641 util.nouideprecwarn(
632 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
642 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
633 )
643 )
634 return self.data_config.sr_min_gap_size
644 return self.data_config.sr_min_gap_size
635
645
636 @property
646 @property
637 def _compute_rank(self):
647 def _compute_rank(self):
638 """temporary compatibility proxy"""
648 """temporary compatibility proxy"""
639 util.nouideprecwarn(
649 util.nouideprecwarn(
640 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
650 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
641 )
651 )
642 return self.feature_config.compute_rank
652 return self.feature_config.compute_rank
643
653
644 @property
654 @property
645 def canonical_parent_order(self):
655 def canonical_parent_order(self):
646 """temporary compatibility proxy"""
656 """temporary compatibility proxy"""
647 util.nouideprecwarn(
657 util.nouideprecwarn(
648 b"use revlog.feature_config.canonical_parent_order",
658 b"use revlog.feature_config.canonical_parent_order",
649 b"6.6",
659 b"6.6",
650 stacklevel=2,
660 stacklevel=2,
651 )
661 )
652 return self.feature_config.canonical_parent_order
662 return self.feature_config.canonical_parent_order
653
663
654 @property
664 @property
655 def _lazydelta(self):
665 def _lazydelta(self):
656 """temporary compatibility proxy"""
666 """temporary compatibility proxy"""
657 util.nouideprecwarn(
667 util.nouideprecwarn(
658 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
668 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
659 )
669 )
660 return self.delta_config.lazy_delta
670 return self.delta_config.lazy_delta
661
671
662 @property
672 @property
663 def _lazydeltabase(self):
673 def _lazydeltabase(self):
664 """temporary compatibility proxy"""
674 """temporary compatibility proxy"""
665 util.nouideprecwarn(
675 util.nouideprecwarn(
666 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
676 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
667 )
677 )
668 return self.delta_config.lazy_delta_base
678 return self.delta_config.lazy_delta_base
669
679
670 def _init_opts(self):
680 def _init_opts(self):
671 """process options (from above/config) to setup associated default revlog mode
681 """process options (from above/config) to setup associated default revlog mode
672
682
673 These values might be affected when actually reading on disk information.
683 These values might be affected when actually reading on disk information.
674
684
675 The relevant values are returned for use in _loadindex().
685 The relevant values are returned for use in _loadindex().
676
686
677 * newversionflags:
687 * newversionflags:
678 version header to use if we need to create a new revlog
688 version header to use if we need to create a new revlog
679
689
680 * mmapindexthreshold:
690 * mmapindexthreshold:
681 minimal index size for start to use mmap
691 minimal index size for start to use mmap
682
692
683 * force_nodemap:
693 * force_nodemap:
684 force the usage of a "development" version of the nodemap code
694 force the usage of a "development" version of the nodemap code
685 """
695 """
686 opts = self.opener.options
696 opts = self.opener.options
687
697
688 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
698 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
689 new_header = CHANGELOGV2
699 new_header = CHANGELOGV2
690 compute_rank = opts.get(b'changelogv2.compute-rank', True)
700 compute_rank = opts.get(b'changelogv2.compute-rank', True)
691 self.feature_config.compute_rank = compute_rank
701 self.feature_config.compute_rank = compute_rank
692 elif b'revlogv2' in opts:
702 elif b'revlogv2' in opts:
693 new_header = REVLOGV2
703 new_header = REVLOGV2
694 elif b'revlogv1' in opts:
704 elif b'revlogv1' in opts:
695 new_header = REVLOGV1 | FLAG_INLINE_DATA
705 new_header = REVLOGV1 | FLAG_INLINE_DATA
696 if b'generaldelta' in opts:
706 if b'generaldelta' in opts:
697 new_header |= FLAG_GENERALDELTA
707 new_header |= FLAG_GENERALDELTA
698 elif b'revlogv0' in self.opener.options:
708 elif b'revlogv0' in self.opener.options:
699 new_header = REVLOGV0
709 new_header = REVLOGV0
700 else:
710 else:
701 new_header = REVLOG_DEFAULT_VERSION
711 new_header = REVLOG_DEFAULT_VERSION
702
712
703 mmapindexthreshold = None
713 mmapindexthreshold = None
704 if self.data_config.mmap_large_index:
714 if self.data_config.mmap_large_index:
705 mmapindexthreshold = self.data_config.mmap_index_threshold
715 mmapindexthreshold = self.data_config.mmap_index_threshold
706 if self.feature_config.enable_ellipsis:
716 if self.feature_config.enable_ellipsis:
707 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
717 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
708
718
709 # revlog v0 doesn't have flag processors
719 # revlog v0 doesn't have flag processors
710 for flag, processor in opts.get(b'flagprocessors', {}).items():
720 for flag, processor in opts.get(b'flagprocessors', {}).items():
711 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
721 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
712
722
713 chunk_cache_size = self.data_config.chunk_cache_size
723 chunk_cache_size = self.data_config.chunk_cache_size
714 if chunk_cache_size <= 0:
724 if chunk_cache_size <= 0:
715 raise error.RevlogError(
725 raise error.RevlogError(
716 _(b'revlog chunk cache size %r is not greater than 0')
726 _(b'revlog chunk cache size %r is not greater than 0')
717 % chunk_cache_size
727 % chunk_cache_size
718 )
728 )
719 elif chunk_cache_size & (chunk_cache_size - 1):
729 elif chunk_cache_size & (chunk_cache_size - 1):
720 raise error.RevlogError(
730 raise error.RevlogError(
721 _(b'revlog chunk cache size %r is not a power of 2')
731 _(b'revlog chunk cache size %r is not a power of 2')
722 % chunk_cache_size
732 % chunk_cache_size
723 )
733 )
724 force_nodemap = opts.get(b'devel-force-nodemap', False)
734 force_nodemap = opts.get(b'devel-force-nodemap', False)
725 return new_header, mmapindexthreshold, force_nodemap
735 return new_header, mmapindexthreshold, force_nodemap
726
736
727 def _get_data(self, filepath, mmap_threshold, size=None):
737 def _get_data(self, filepath, mmap_threshold, size=None):
728 """return a file content with or without mmap
738 """return a file content with or without mmap
729
739
730 If the file is missing return the empty string"""
740 If the file is missing return the empty string"""
731 try:
741 try:
732 with self.opener(filepath) as fp:
742 with self.opener(filepath) as fp:
733 if mmap_threshold is not None:
743 if mmap_threshold is not None:
734 file_size = self.opener.fstat(fp).st_size
744 file_size = self.opener.fstat(fp).st_size
735 if file_size >= mmap_threshold:
745 if file_size >= mmap_threshold:
736 if size is not None:
746 if size is not None:
737 # avoid potentiel mmap crash
747 # avoid potentiel mmap crash
738 size = min(file_size, size)
748 size = min(file_size, size)
739 # TODO: should .close() to release resources without
749 # TODO: should .close() to release resources without
740 # relying on Python GC
750 # relying on Python GC
741 if size is None:
751 if size is None:
742 return util.buffer(util.mmapread(fp))
752 return util.buffer(util.mmapread(fp))
743 else:
753 else:
744 return util.buffer(util.mmapread(fp, size))
754 return util.buffer(util.mmapread(fp, size))
745 if size is None:
755 if size is None:
746 return fp.read()
756 return fp.read()
747 else:
757 else:
748 return fp.read(size)
758 return fp.read(size)
749 except FileNotFoundError:
759 except FileNotFoundError:
750 return b''
760 return b''
751
761
752 def get_streams(self, max_linkrev, force_inline=False):
762 def get_streams(self, max_linkrev, force_inline=False):
753 """return a list of streams that represent this revlog
763 """return a list of streams that represent this revlog
754
764
755 This is used by stream-clone to do bytes to bytes copies of a repository.
765 This is used by stream-clone to do bytes to bytes copies of a repository.
756
766
757 This streams data for all revisions that refer to a changelog revision up
767 This streams data for all revisions that refer to a changelog revision up
758 to `max_linkrev`.
768 to `max_linkrev`.
759
769
760 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
770 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
761
771
762 It returns is a list of three-tuple:
772 It returns is a list of three-tuple:
763
773
764 [
774 [
765 (filename, bytes_stream, stream_size),
775 (filename, bytes_stream, stream_size),
766 …
776 …
767 ]
777 ]
768 """
778 """
769 n = len(self)
779 n = len(self)
770 index = self.index
780 index = self.index
771 while n > 0:
781 while n > 0:
772 linkrev = index[n - 1][4]
782 linkrev = index[n - 1][4]
773 if linkrev < max_linkrev:
783 if linkrev < max_linkrev:
774 break
784 break
775 # note: this loop will rarely go through multiple iterations, since
785 # note: this loop will rarely go through multiple iterations, since
776 # it only traverses commits created during the current streaming
786 # it only traverses commits created during the current streaming
777 # pull operation.
787 # pull operation.
778 #
788 #
779 # If this become a problem, using a binary search should cap the
789 # If this become a problem, using a binary search should cap the
780 # runtime of this.
790 # runtime of this.
781 n = n - 1
791 n = n - 1
782 if n == 0:
792 if n == 0:
783 # no data to send
793 # no data to send
784 return []
794 return []
785 index_size = n * index.entry_size
795 index_size = n * index.entry_size
786 data_size = self.end(n - 1)
796 data_size = self.end(n - 1)
787
797
788 # XXX we might have been split (or stripped) since the object
798 # XXX we might have been split (or stripped) since the object
789 # initialization, We need to close this race too, but having a way to
799 # initialization, We need to close this race too, but having a way to
790 # pre-open the file we feed to the revlog and never closing them before
800 # pre-open the file we feed to the revlog and never closing them before
791 # we are done streaming.
801 # we are done streaming.
792
802
793 if self._inline:
803 if self._inline:
794
804
795 def get_stream():
805 def get_stream():
796 with self.opener(self._indexfile, mode=b"r") as fp:
806 with self.opener(self._indexfile, mode=b"r") as fp:
797 yield None
807 yield None
798 size = index_size + data_size
808 size = index_size + data_size
799 if size <= 65536:
809 if size <= 65536:
800 yield fp.read(size)
810 yield fp.read(size)
801 else:
811 else:
802 yield from util.filechunkiter(fp, limit=size)
812 yield from util.filechunkiter(fp, limit=size)
803
813
804 inline_stream = get_stream()
814 inline_stream = get_stream()
805 next(inline_stream)
815 next(inline_stream)
806 return [
816 return [
807 (self._indexfile, inline_stream, index_size + data_size),
817 (self._indexfile, inline_stream, index_size + data_size),
808 ]
818 ]
809 elif force_inline:
819 elif force_inline:
810
820
811 def get_stream():
821 def get_stream():
812 with self.reading():
822 with self.reading():
813 yield None
823 yield None
814
824
815 for rev in range(n):
825 for rev in range(n):
816 idx = self.index.entry_binary(rev)
826 idx = self.index.entry_binary(rev)
817 if rev == 0 and self._docket is None:
827 if rev == 0 and self._docket is None:
818 # re-inject the inline flag
828 # re-inject the inline flag
819 header = self._format_flags
829 header = self._format_flags
820 header |= self._format_version
830 header |= self._format_version
821 header |= FLAG_INLINE_DATA
831 header |= FLAG_INLINE_DATA
822 header = self.index.pack_header(header)
832 header = self.index.pack_header(header)
823 idx = header + idx
833 idx = header + idx
824 yield idx
834 yield idx
825 yield self._getsegmentforrevs(rev, rev)[1]
835 yield self._getsegmentforrevs(rev, rev)[1]
826
836
827 inline_stream = get_stream()
837 inline_stream = get_stream()
828 next(inline_stream)
838 next(inline_stream)
829 return [
839 return [
830 (self._indexfile, inline_stream, index_size + data_size),
840 (self._indexfile, inline_stream, index_size + data_size),
831 ]
841 ]
832 else:
842 else:
833
843
834 def get_index_stream():
844 def get_index_stream():
835 with self.opener(self._indexfile, mode=b"r") as fp:
845 with self.opener(self._indexfile, mode=b"r") as fp:
836 yield None
846 yield None
837 if index_size <= 65536:
847 if index_size <= 65536:
838 yield fp.read(index_size)
848 yield fp.read(index_size)
839 else:
849 else:
840 yield from util.filechunkiter(fp, limit=index_size)
850 yield from util.filechunkiter(fp, limit=index_size)
841
851
842 def get_data_stream():
852 def get_data_stream():
843 with self._datafp() as fp:
853 with self._datafp() as fp:
844 yield None
854 yield None
845 if data_size <= 65536:
855 if data_size <= 65536:
846 yield fp.read(data_size)
856 yield fp.read(data_size)
847 else:
857 else:
848 yield from util.filechunkiter(fp, limit=data_size)
858 yield from util.filechunkiter(fp, limit=data_size)
849
859
850 index_stream = get_index_stream()
860 index_stream = get_index_stream()
851 next(index_stream)
861 next(index_stream)
852 data_stream = get_data_stream()
862 data_stream = get_data_stream()
853 next(data_stream)
863 next(data_stream)
854 return [
864 return [
855 (self._datafile, data_stream, data_size),
865 (self._datafile, data_stream, data_size),
856 (self._indexfile, index_stream, index_size),
866 (self._indexfile, index_stream, index_size),
857 ]
867 ]
858
868
859 def _loadindex(self, docket=None):
869 def _loadindex(self, docket=None):
860
870
861 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
871 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
862
872
863 if self.postfix is not None:
873 if self.postfix is not None:
864 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
874 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
865 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
875 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
866 entry_point = b'%s.i.a' % self.radix
876 entry_point = b'%s.i.a' % self.radix
867 elif self._try_split and self.opener.exists(self._split_index_file):
877 elif self._try_split and self.opener.exists(self._split_index_file):
868 entry_point = self._split_index_file
878 entry_point = self._split_index_file
869 else:
879 else:
870 entry_point = b'%s.i' % self.radix
880 entry_point = b'%s.i' % self.radix
871
881
872 if docket is not None:
882 if docket is not None:
873 self._docket = docket
883 self._docket = docket
874 self._docket_file = entry_point
884 self._docket_file = entry_point
875 else:
885 else:
876 self._initempty = True
886 self._initempty = True
877 entry_data = self._get_data(entry_point, mmapindexthreshold)
887 entry_data = self._get_data(entry_point, mmapindexthreshold)
878 if len(entry_data) > 0:
888 if len(entry_data) > 0:
879 header = INDEX_HEADER.unpack(entry_data[:4])[0]
889 header = INDEX_HEADER.unpack(entry_data[:4])[0]
880 self._initempty = False
890 self._initempty = False
881 else:
891 else:
882 header = new_header
892 header = new_header
883
893
884 self._format_flags = header & ~0xFFFF
894 self._format_flags = header & ~0xFFFF
885 self._format_version = header & 0xFFFF
895 self._format_version = header & 0xFFFF
886
896
887 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
897 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
888 if supported_flags is None:
898 if supported_flags is None:
889 msg = _(b'unknown version (%d) in revlog %s')
899 msg = _(b'unknown version (%d) in revlog %s')
890 msg %= (self._format_version, self.display_id)
900 msg %= (self._format_version, self.display_id)
891 raise error.RevlogError(msg)
901 raise error.RevlogError(msg)
892 elif self._format_flags & ~supported_flags:
902 elif self._format_flags & ~supported_flags:
893 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
903 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
894 display_flag = self._format_flags >> 16
904 display_flag = self._format_flags >> 16
895 msg %= (display_flag, self._format_version, self.display_id)
905 msg %= (display_flag, self._format_version, self.display_id)
896 raise error.RevlogError(msg)
906 raise error.RevlogError(msg)
897
907
898 features = FEATURES_BY_VERSION[self._format_version]
908 features = FEATURES_BY_VERSION[self._format_version]
899 self._inline = features[b'inline'](self._format_flags)
909 self._inline = features[b'inline'](self._format_flags)
900 self.delta_config.general_delta = features[b'generaldelta'](
910 self.delta_config.general_delta = features[b'generaldelta'](
901 self._format_flags
911 self._format_flags
902 )
912 )
903 self.feature_config.has_side_data = features[b'sidedata']
913 self.feature_config.has_side_data = features[b'sidedata']
904
914
905 if not features[b'docket']:
915 if not features[b'docket']:
906 self._indexfile = entry_point
916 self._indexfile = entry_point
907 index_data = entry_data
917 index_data = entry_data
908 else:
918 else:
909 self._docket_file = entry_point
919 self._docket_file = entry_point
910 if self._initempty:
920 if self._initempty:
911 self._docket = docketutil.default_docket(self, header)
921 self._docket = docketutil.default_docket(self, header)
912 else:
922 else:
913 self._docket = docketutil.parse_docket(
923 self._docket = docketutil.parse_docket(
914 self, entry_data, use_pending=self._trypending
924 self, entry_data, use_pending=self._trypending
915 )
925 )
916
926
917 if self._docket is not None:
927 if self._docket is not None:
918 self._indexfile = self._docket.index_filepath()
928 self._indexfile = self._docket.index_filepath()
919 index_data = b''
929 index_data = b''
920 index_size = self._docket.index_end
930 index_size = self._docket.index_end
921 if index_size > 0:
931 if index_size > 0:
922 index_data = self._get_data(
932 index_data = self._get_data(
923 self._indexfile, mmapindexthreshold, size=index_size
933 self._indexfile, mmapindexthreshold, size=index_size
924 )
934 )
925 if len(index_data) < index_size:
935 if len(index_data) < index_size:
926 msg = _(b'too few index data for %s: got %d, expected %d')
936 msg = _(b'too few index data for %s: got %d, expected %d')
927 msg %= (self.display_id, len(index_data), index_size)
937 msg %= (self.display_id, len(index_data), index_size)
928 raise error.RevlogError(msg)
938 raise error.RevlogError(msg)
929
939
930 self._inline = False
940 self._inline = False
931 # generaldelta implied by version 2 revlogs.
941 # generaldelta implied by version 2 revlogs.
932 self.delta_config.general_delta = True
942 self.delta_config.general_delta = True
933 # the logic for persistent nodemap will be dealt with within the
943 # the logic for persistent nodemap will be dealt with within the
934 # main docket, so disable it for now.
944 # main docket, so disable it for now.
935 self._nodemap_file = None
945 self._nodemap_file = None
936
946
937 if self._docket is not None:
947 if self._docket is not None:
938 self._datafile = self._docket.data_filepath()
948 self._datafile = self._docket.data_filepath()
939 self._sidedatafile = self._docket.sidedata_filepath()
949 self._sidedatafile = self._docket.sidedata_filepath()
940 elif self.postfix is None:
950 elif self.postfix is None:
941 self._datafile = b'%s.d' % self.radix
951 self._datafile = b'%s.d' % self.radix
942 else:
952 else:
943 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
953 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
944
954
945 self.nodeconstants = sha1nodeconstants
955 self.nodeconstants = sha1nodeconstants
946 self.nullid = self.nodeconstants.nullid
956 self.nullid = self.nodeconstants.nullid
947
957
948 # sparse-revlog can't be on without general-delta (issue6056)
958 # sparse-revlog can't be on without general-delta (issue6056)
949 if not self.delta_config.general_delta:
959 if not self.delta_config.general_delta:
950 self.delta_config.sparse_revlog = False
960 self.delta_config.sparse_revlog = False
951
961
952 self._storedeltachains = True
962 self._storedeltachains = True
953
963
954 devel_nodemap = (
964 devel_nodemap = (
955 self._nodemap_file
965 self._nodemap_file
956 and force_nodemap
966 and force_nodemap
957 and parse_index_v1_nodemap is not None
967 and parse_index_v1_nodemap is not None
958 )
968 )
959
969
960 use_rust_index = False
970 use_rust_index = False
961 if rustrevlog is not None:
971 if rustrevlog is not None:
962 if self._nodemap_file is not None:
972 if self._nodemap_file is not None:
963 use_rust_index = True
973 use_rust_index = True
964 else:
974 else:
965 use_rust_index = self.opener.options.get(b'rust.index')
975 use_rust_index = self.opener.options.get(b'rust.index')
966
976
967 self._parse_index = parse_index_v1
977 self._parse_index = parse_index_v1
968 if self._format_version == REVLOGV0:
978 if self._format_version == REVLOGV0:
969 self._parse_index = revlogv0.parse_index_v0
979 self._parse_index = revlogv0.parse_index_v0
970 elif self._format_version == REVLOGV2:
980 elif self._format_version == REVLOGV2:
971 self._parse_index = parse_index_v2
981 self._parse_index = parse_index_v2
972 elif self._format_version == CHANGELOGV2:
982 elif self._format_version == CHANGELOGV2:
973 self._parse_index = parse_index_cl_v2
983 self._parse_index = parse_index_cl_v2
974 elif devel_nodemap:
984 elif devel_nodemap:
975 self._parse_index = parse_index_v1_nodemap
985 self._parse_index = parse_index_v1_nodemap
976 elif use_rust_index:
986 elif use_rust_index:
977 self._parse_index = parse_index_v1_mixed
987 self._parse_index = parse_index_v1_mixed
978 try:
988 try:
979 d = self._parse_index(index_data, self._inline)
989 d = self._parse_index(index_data, self._inline)
980 index, chunkcache = d
990 index, chunkcache = d
981 use_nodemap = (
991 use_nodemap = (
982 not self._inline
992 not self._inline
983 and self._nodemap_file is not None
993 and self._nodemap_file is not None
984 and hasattr(index, 'update_nodemap_data')
994 and hasattr(index, 'update_nodemap_data')
985 )
995 )
986 if use_nodemap:
996 if use_nodemap:
987 nodemap_data = nodemaputil.persisted_data(self)
997 nodemap_data = nodemaputil.persisted_data(self)
988 if nodemap_data is not None:
998 if nodemap_data is not None:
989 docket = nodemap_data[0]
999 docket = nodemap_data[0]
990 if (
1000 if (
991 len(d[0]) > docket.tip_rev
1001 len(d[0]) > docket.tip_rev
992 and d[0][docket.tip_rev][7] == docket.tip_node
1002 and d[0][docket.tip_rev][7] == docket.tip_node
993 ):
1003 ):
994 # no changelog tampering
1004 # no changelog tampering
995 self._nodemap_docket = docket
1005 self._nodemap_docket = docket
996 index.update_nodemap_data(*nodemap_data)
1006 index.update_nodemap_data(*nodemap_data)
997 except (ValueError, IndexError):
1007 except (ValueError, IndexError):
998 raise error.RevlogError(
1008 raise error.RevlogError(
999 _(b"index %s is corrupted") % self.display_id
1009 _(b"index %s is corrupted") % self.display_id
1000 )
1010 )
1001 self.index = index
1011 self.index = index
1002 self._segmentfile = randomaccessfile.randomaccessfile(
1012 self._segmentfile = randomaccessfile.randomaccessfile(
1003 self.opener,
1013 self.opener,
1004 (self._indexfile if self._inline else self._datafile),
1014 (self._indexfile if self._inline else self._datafile),
1005 self.data_config.chunk_cache_size,
1015 self.data_config.chunk_cache_size,
1006 chunkcache,
1016 chunkcache,
1007 )
1017 )
1008 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
1018 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
1009 self.opener,
1019 self.opener,
1010 self._sidedatafile,
1020 self._sidedatafile,
1011 self.data_config.chunk_cache_size,
1021 self.data_config.chunk_cache_size,
1012 )
1022 )
1013 # revnum -> (chain-length, sum-delta-length)
1023 # revnum -> (chain-length, sum-delta-length)
1014 self._chaininfocache = util.lrucachedict(500)
1024 self._chaininfocache = util.lrucachedict(500)
1015 # revlog header -> revlog compressor
1025 # revlog header -> revlog compressor
1016 self._decompressors = {}
1026 self._decompressors = {}
1017
1027
1018 def get_revlog(self):
1028 def get_revlog(self):
1019 """simple function to mirror API of other not-really-revlog API"""
1029 """simple function to mirror API of other not-really-revlog API"""
1020 return self
1030 return self
1021
1031
1022 @util.propertycache
1032 @util.propertycache
1023 def revlog_kind(self):
1033 def revlog_kind(self):
1024 return self.target[0]
1034 return self.target[0]
1025
1035
1026 @util.propertycache
1036 @util.propertycache
1027 def display_id(self):
1037 def display_id(self):
1028 """The public facing "ID" of the revlog that we use in message"""
1038 """The public facing "ID" of the revlog that we use in message"""
1029 if self.revlog_kind == KIND_FILELOG:
1039 if self.revlog_kind == KIND_FILELOG:
1030 # Reference the file without the "data/" prefix, so it is familiar
1040 # Reference the file without the "data/" prefix, so it is familiar
1031 # to the user.
1041 # to the user.
1032 return self.target[1]
1042 return self.target[1]
1033 else:
1043 else:
1034 return self.radix
1044 return self.radix
1035
1045
1036 def _get_decompressor(self, t):
1046 def _get_decompressor(self, t):
1037 try:
1047 try:
1038 compressor = self._decompressors[t]
1048 compressor = self._decompressors[t]
1039 except KeyError:
1049 except KeyError:
1040 try:
1050 try:
1041 engine = util.compengines.forrevlogheader(t)
1051 engine = util.compengines.forrevlogheader(t)
1042 compressor = engine.revlogcompressor(
1052 compressor = engine.revlogcompressor(
1043 self.feature_config.compression_engine_options
1053 self.feature_config.compression_engine_options
1044 )
1054 )
1045 self._decompressors[t] = compressor
1055 self._decompressors[t] = compressor
1046 except KeyError:
1056 except KeyError:
1047 raise error.RevlogError(
1057 raise error.RevlogError(
1048 _(b'unknown compression type %s') % binascii.hexlify(t)
1058 _(b'unknown compression type %s') % binascii.hexlify(t)
1049 )
1059 )
1050 return compressor
1060 return compressor
1051
1061
1052 @util.propertycache
1062 @util.propertycache
1053 def _compressor(self):
1063 def _compressor(self):
1054 engine = util.compengines[self.feature_config.compression_engine]
1064 engine = util.compengines[self.feature_config.compression_engine]
1055 return engine.revlogcompressor(
1065 return engine.revlogcompressor(
1056 self.feature_config.compression_engine_options
1066 self.feature_config.compression_engine_options
1057 )
1067 )
1058
1068
1059 @util.propertycache
1069 @util.propertycache
1060 def _decompressor(self):
1070 def _decompressor(self):
1061 """the default decompressor"""
1071 """the default decompressor"""
1062 if self._docket is None:
1072 if self._docket is None:
1063 return None
1073 return None
1064 t = self._docket.default_compression_header
1074 t = self._docket.default_compression_header
1065 c = self._get_decompressor(t)
1075 c = self._get_decompressor(t)
1066 return c.decompress
1076 return c.decompress
1067
1077
1068 def __index_write_fp(self):
1078 def __index_write_fp(self):
1069 # You should not use this directly and use `_writing` instead
1079 # You should not use this directly and use `_writing` instead
1070 try:
1080 try:
1071 f = self.opener(
1081 f = self.opener(
1072 self._indexfile,
1082 self._indexfile,
1073 mode=b"r+",
1083 mode=b"r+",
1074 checkambig=self.data_config.check_ambig,
1084 checkambig=self.data_config.check_ambig,
1075 )
1085 )
1076 if self._docket is None:
1086 if self._docket is None:
1077 f.seek(0, os.SEEK_END)
1087 f.seek(0, os.SEEK_END)
1078 else:
1088 else:
1079 f.seek(self._docket.index_end, os.SEEK_SET)
1089 f.seek(self._docket.index_end, os.SEEK_SET)
1080 return f
1090 return f
1081 except FileNotFoundError:
1091 except FileNotFoundError:
1082 return self.opener(
1092 return self.opener(
1083 self._indexfile,
1093 self._indexfile,
1084 mode=b"w+",
1094 mode=b"w+",
1085 checkambig=self.data_config.check_ambig,
1095 checkambig=self.data_config.check_ambig,
1086 )
1096 )
1087
1097
1088 def __index_new_fp(self):
1098 def __index_new_fp(self):
1089 # You should not use this unless you are upgrading from inline revlog
1099 # You should not use this unless you are upgrading from inline revlog
1090 return self.opener(
1100 return self.opener(
1091 self._indexfile,
1101 self._indexfile,
1092 mode=b"w",
1102 mode=b"w",
1093 checkambig=self.data_config.check_ambig,
1103 checkambig=self.data_config.check_ambig,
1094 atomictemp=True,
1104 atomictemp=True,
1095 )
1105 )
1096
1106
1097 def _datafp(self, mode=b'r'):
1107 def _datafp(self, mode=b'r'):
1098 """file object for the revlog's data file"""
1108 """file object for the revlog's data file"""
1099 return self.opener(self._datafile, mode=mode)
1109 return self.opener(self._datafile, mode=mode)
1100
1110
1101 @contextlib.contextmanager
1111 @contextlib.contextmanager
1102 def _sidedatareadfp(self):
1112 def _sidedatareadfp(self):
1103 """file object suitable to read sidedata"""
1113 """file object suitable to read sidedata"""
1104 if self._writinghandles:
1114 if self._writinghandles:
1105 yield self._writinghandles[2]
1115 yield self._writinghandles[2]
1106 else:
1116 else:
1107 with self.opener(self._sidedatafile) as fp:
1117 with self.opener(self._sidedatafile) as fp:
1108 yield fp
1118 yield fp
1109
1119
1110 def tiprev(self):
1120 def tiprev(self):
1111 return len(self.index) - 1
1121 return len(self.index) - 1
1112
1122
1113 def tip(self):
1123 def tip(self):
1114 return self.node(self.tiprev())
1124 return self.node(self.tiprev())
1115
1125
1116 def __contains__(self, rev):
1126 def __contains__(self, rev):
1117 return 0 <= rev < len(self)
1127 return 0 <= rev < len(self)
1118
1128
1119 def __len__(self):
1129 def __len__(self):
1120 return len(self.index)
1130 return len(self.index)
1121
1131
1122 def __iter__(self):
1132 def __iter__(self):
1123 return iter(range(len(self)))
1133 return iter(range(len(self)))
1124
1134
1125 def revs(self, start=0, stop=None):
1135 def revs(self, start=0, stop=None):
1126 """iterate over all rev in this revlog (from start to stop)"""
1136 """iterate over all rev in this revlog (from start to stop)"""
1127 return storageutil.iterrevs(len(self), start=start, stop=stop)
1137 return storageutil.iterrevs(len(self), start=start, stop=stop)
1128
1138
1129 def hasnode(self, node):
1139 def hasnode(self, node):
1130 try:
1140 try:
1131 self.rev(node)
1141 self.rev(node)
1132 return True
1142 return True
1133 except KeyError:
1143 except KeyError:
1134 return False
1144 return False
1135
1145
1136 def _candelta(self, baserev, rev):
1146 def _candelta(self, baserev, rev):
1137 """whether two revisions (baserev, rev) can be delta-ed or not"""
1147 """whether two revisions (baserev, rev) can be delta-ed or not"""
1138 # Disable delta if either rev requires a content-changing flag
1148 # Disable delta if either rev requires a content-changing flag
1139 # processor (ex. LFS). This is because such flag processor can alter
1149 # processor (ex. LFS). This is because such flag processor can alter
1140 # the rawtext content that the delta will be based on, and two clients
1150 # the rawtext content that the delta will be based on, and two clients
1141 # could have a same revlog node with different flags (i.e. different
1151 # could have a same revlog node with different flags (i.e. different
1142 # rawtext contents) and the delta could be incompatible.
1152 # rawtext contents) and the delta could be incompatible.
1143 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1153 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1144 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1154 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1145 ):
1155 ):
1146 return False
1156 return False
1147 return True
1157 return True
1148
1158
1149 def update_caches(self, transaction):
1159 def update_caches(self, transaction):
1150 """update on disk cache
1160 """update on disk cache
1151
1161
1152 If a transaction is passed, the update may be delayed to transaction
1162 If a transaction is passed, the update may be delayed to transaction
1153 commit."""
1163 commit."""
1154 if self._nodemap_file is not None:
1164 if self._nodemap_file is not None:
1155 if transaction is None:
1165 if transaction is None:
1156 nodemaputil.update_persistent_nodemap(self)
1166 nodemaputil.update_persistent_nodemap(self)
1157 else:
1167 else:
1158 nodemaputil.setup_persistent_nodemap(transaction, self)
1168 nodemaputil.setup_persistent_nodemap(transaction, self)
1159
1169
1160 def clearcaches(self):
1170 def clearcaches(self):
1161 """Clear in-memory caches"""
1171 """Clear in-memory caches"""
1162 self._revisioncache = None
1172 self._revisioncache = None
1163 self._chainbasecache.clear()
1173 self._chainbasecache.clear()
1164 self._segmentfile.clear_cache()
1174 self._segmentfile.clear_cache()
1165 self._segmentfile_sidedata.clear_cache()
1175 self._segmentfile_sidedata.clear_cache()
1166 self._pcache = {}
1176 self._pcache = {}
1167 self._nodemap_docket = None
1177 self._nodemap_docket = None
1168 self.index.clearcaches()
1178 self.index.clearcaches()
1169 # The python code is the one responsible for validating the docket, we
1179 # The python code is the one responsible for validating the docket, we
1170 # end up having to refresh it here.
1180 # end up having to refresh it here.
1171 use_nodemap = (
1181 use_nodemap = (
1172 not self._inline
1182 not self._inline
1173 and self._nodemap_file is not None
1183 and self._nodemap_file is not None
1174 and hasattr(self.index, 'update_nodemap_data')
1184 and hasattr(self.index, 'update_nodemap_data')
1175 )
1185 )
1176 if use_nodemap:
1186 if use_nodemap:
1177 nodemap_data = nodemaputil.persisted_data(self)
1187 nodemap_data = nodemaputil.persisted_data(self)
1178 if nodemap_data is not None:
1188 if nodemap_data is not None:
1179 self._nodemap_docket = nodemap_data[0]
1189 self._nodemap_docket = nodemap_data[0]
1180 self.index.update_nodemap_data(*nodemap_data)
1190 self.index.update_nodemap_data(*nodemap_data)
1181
1191
1182 def rev(self, node):
1192 def rev(self, node):
1183 """return the revision number associated with a <nodeid>"""
1193 """return the revision number associated with a <nodeid>"""
1184 try:
1194 try:
1185 return self.index.rev(node)
1195 return self.index.rev(node)
1186 except TypeError:
1196 except TypeError:
1187 raise
1197 raise
1188 except error.RevlogError:
1198 except error.RevlogError:
1189 # parsers.c radix tree lookup failed
1199 # parsers.c radix tree lookup failed
1190 if (
1200 if (
1191 node == self.nodeconstants.wdirid
1201 node == self.nodeconstants.wdirid
1192 or node in self.nodeconstants.wdirfilenodeids
1202 or node in self.nodeconstants.wdirfilenodeids
1193 ):
1203 ):
1194 raise error.WdirUnsupported
1204 raise error.WdirUnsupported
1195 raise error.LookupError(node, self.display_id, _(b'no node'))
1205 raise error.LookupError(node, self.display_id, _(b'no node'))
1196
1206
1197 # Accessors for index entries.
1207 # Accessors for index entries.
1198
1208
1199 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1209 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1200 # are flags.
1210 # are flags.
1201 def start(self, rev):
1211 def start(self, rev):
1202 return int(self.index[rev][0] >> 16)
1212 return int(self.index[rev][0] >> 16)
1203
1213
1204 def sidedata_cut_off(self, rev):
1214 def sidedata_cut_off(self, rev):
1205 sd_cut_off = self.index[rev][8]
1215 sd_cut_off = self.index[rev][8]
1206 if sd_cut_off != 0:
1216 if sd_cut_off != 0:
1207 return sd_cut_off
1217 return sd_cut_off
1208 # This is some annoying dance, because entries without sidedata
1218 # This is some annoying dance, because entries without sidedata
1209 # currently use 0 as their ofsset. (instead of previous-offset +
1219 # currently use 0 as their ofsset. (instead of previous-offset +
1210 # previous-size)
1220 # previous-size)
1211 #
1221 #
1212 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1222 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1213 # In the meantime, we need this.
1223 # In the meantime, we need this.
1214 while 0 <= rev:
1224 while 0 <= rev:
1215 e = self.index[rev]
1225 e = self.index[rev]
1216 if e[9] != 0:
1226 if e[9] != 0:
1217 return e[8] + e[9]
1227 return e[8] + e[9]
1218 rev -= 1
1228 rev -= 1
1219 return 0
1229 return 0
1220
1230
1221 def flags(self, rev):
1231 def flags(self, rev):
1222 return self.index[rev][0] & 0xFFFF
1232 return self.index[rev][0] & 0xFFFF
1223
1233
1224 def length(self, rev):
1234 def length(self, rev):
1225 return self.index[rev][1]
1235 return self.index[rev][1]
1226
1236
1227 def sidedata_length(self, rev):
1237 def sidedata_length(self, rev):
1228 if not self.feature_config.has_side_data:
1238 if not self.feature_config.has_side_data:
1229 return 0
1239 return 0
1230 return self.index[rev][9]
1240 return self.index[rev][9]
1231
1241
1232 def rawsize(self, rev):
1242 def rawsize(self, rev):
1233 """return the length of the uncompressed text for a given revision"""
1243 """return the length of the uncompressed text for a given revision"""
1234 l = self.index[rev][2]
1244 l = self.index[rev][2]
1235 if l >= 0:
1245 if l >= 0:
1236 return l
1246 return l
1237
1247
1238 t = self.rawdata(rev)
1248 t = self.rawdata(rev)
1239 return len(t)
1249 return len(t)
1240
1250
1241 def size(self, rev):
1251 def size(self, rev):
1242 """length of non-raw text (processed by a "read" flag processor)"""
1252 """length of non-raw text (processed by a "read" flag processor)"""
1243 # fast path: if no "read" flag processor could change the content,
1253 # fast path: if no "read" flag processor could change the content,
1244 # size is rawsize. note: ELLIPSIS is known to not change the content.
1254 # size is rawsize. note: ELLIPSIS is known to not change the content.
1245 flags = self.flags(rev)
1255 flags = self.flags(rev)
1246 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1256 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1247 return self.rawsize(rev)
1257 return self.rawsize(rev)
1248
1258
1249 return len(self.revision(rev))
1259 return len(self.revision(rev))
1250
1260
1251 def fast_rank(self, rev):
1261 def fast_rank(self, rev):
1252 """Return the rank of a revision if already known, or None otherwise.
1262 """Return the rank of a revision if already known, or None otherwise.
1253
1263
1254 The rank of a revision is the size of the sub-graph it defines as a
1264 The rank of a revision is the size of the sub-graph it defines as a
1255 head. Equivalently, the rank of a revision `r` is the size of the set
1265 head. Equivalently, the rank of a revision `r` is the size of the set
1256 `ancestors(r)`, `r` included.
1266 `ancestors(r)`, `r` included.
1257
1267
1258 This method returns the rank retrieved from the revlog in constant
1268 This method returns the rank retrieved from the revlog in constant
1259 time. It makes no attempt at computing unknown values for versions of
1269 time. It makes no attempt at computing unknown values for versions of
1260 the revlog which do not persist the rank.
1270 the revlog which do not persist the rank.
1261 """
1271 """
1262 rank = self.index[rev][ENTRY_RANK]
1272 rank = self.index[rev][ENTRY_RANK]
1263 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1273 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1264 return None
1274 return None
1265 if rev == nullrev:
1275 if rev == nullrev:
1266 return 0 # convention
1276 return 0 # convention
1267 return rank
1277 return rank
1268
1278
1269 def chainbase(self, rev):
1279 def chainbase(self, rev):
1270 base = self._chainbasecache.get(rev)
1280 base = self._chainbasecache.get(rev)
1271 if base is not None:
1281 if base is not None:
1272 return base
1282 return base
1273
1283
1274 index = self.index
1284 index = self.index
1275 iterrev = rev
1285 iterrev = rev
1276 base = index[iterrev][3]
1286 base = index[iterrev][3]
1277 while base != iterrev:
1287 while base != iterrev:
1278 iterrev = base
1288 iterrev = base
1279 base = index[iterrev][3]
1289 base = index[iterrev][3]
1280
1290
1281 self._chainbasecache[rev] = base
1291 self._chainbasecache[rev] = base
1282 return base
1292 return base
1283
1293
1284 def linkrev(self, rev):
1294 def linkrev(self, rev):
1285 return self.index[rev][4]
1295 return self.index[rev][4]
1286
1296
1287 def parentrevs(self, rev):
1297 def parentrevs(self, rev):
1288 try:
1298 try:
1289 entry = self.index[rev]
1299 entry = self.index[rev]
1290 except IndexError:
1300 except IndexError:
1291 if rev == wdirrev:
1301 if rev == wdirrev:
1292 raise error.WdirUnsupported
1302 raise error.WdirUnsupported
1293 raise
1303 raise
1294
1304
1295 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1305 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1296 return entry[6], entry[5]
1306 return entry[6], entry[5]
1297 else:
1307 else:
1298 return entry[5], entry[6]
1308 return entry[5], entry[6]
1299
1309
1300 # fast parentrevs(rev) where rev isn't filtered
1310 # fast parentrevs(rev) where rev isn't filtered
1301 _uncheckedparentrevs = parentrevs
1311 _uncheckedparentrevs = parentrevs
1302
1312
1303 def node(self, rev):
1313 def node(self, rev):
1304 try:
1314 try:
1305 return self.index[rev][7]
1315 return self.index[rev][7]
1306 except IndexError:
1316 except IndexError:
1307 if rev == wdirrev:
1317 if rev == wdirrev:
1308 raise error.WdirUnsupported
1318 raise error.WdirUnsupported
1309 raise
1319 raise
1310
1320
1311 # Derived from index values.
1321 # Derived from index values.
1312
1322
1313 def end(self, rev):
1323 def end(self, rev):
1314 return self.start(rev) + self.length(rev)
1324 return self.start(rev) + self.length(rev)
1315
1325
1316 def parents(self, node):
1326 def parents(self, node):
1317 i = self.index
1327 i = self.index
1318 d = i[self.rev(node)]
1328 d = i[self.rev(node)]
1319 # inline node() to avoid function call overhead
1329 # inline node() to avoid function call overhead
1320 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1330 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1321 return i[d[6]][7], i[d[5]][7]
1331 return i[d[6]][7], i[d[5]][7]
1322 else:
1332 else:
1323 return i[d[5]][7], i[d[6]][7]
1333 return i[d[5]][7], i[d[6]][7]
1324
1334
1325 def chainlen(self, rev):
1335 def chainlen(self, rev):
1326 return self._chaininfo(rev)[0]
1336 return self._chaininfo(rev)[0]
1327
1337
1328 def _chaininfo(self, rev):
1338 def _chaininfo(self, rev):
1329 chaininfocache = self._chaininfocache
1339 chaininfocache = self._chaininfocache
1330 if rev in chaininfocache:
1340 if rev in chaininfocache:
1331 return chaininfocache[rev]
1341 return chaininfocache[rev]
1332 index = self.index
1342 index = self.index
1333 generaldelta = self.delta_config.general_delta
1343 generaldelta = self.delta_config.general_delta
1334 iterrev = rev
1344 iterrev = rev
1335 e = index[iterrev]
1345 e = index[iterrev]
1336 clen = 0
1346 clen = 0
1337 compresseddeltalen = 0
1347 compresseddeltalen = 0
1338 while iterrev != e[3]:
1348 while iterrev != e[3]:
1339 clen += 1
1349 clen += 1
1340 compresseddeltalen += e[1]
1350 compresseddeltalen += e[1]
1341 if generaldelta:
1351 if generaldelta:
1342 iterrev = e[3]
1352 iterrev = e[3]
1343 else:
1353 else:
1344 iterrev -= 1
1354 iterrev -= 1
1345 if iterrev in chaininfocache:
1355 if iterrev in chaininfocache:
1346 t = chaininfocache[iterrev]
1356 t = chaininfocache[iterrev]
1347 clen += t[0]
1357 clen += t[0]
1348 compresseddeltalen += t[1]
1358 compresseddeltalen += t[1]
1349 break
1359 break
1350 e = index[iterrev]
1360 e = index[iterrev]
1351 else:
1361 else:
1352 # Add text length of base since decompressing that also takes
1362 # Add text length of base since decompressing that also takes
1353 # work. For cache hits the length is already included.
1363 # work. For cache hits the length is already included.
1354 compresseddeltalen += e[1]
1364 compresseddeltalen += e[1]
1355 r = (clen, compresseddeltalen)
1365 r = (clen, compresseddeltalen)
1356 chaininfocache[rev] = r
1366 chaininfocache[rev] = r
1357 return r
1367 return r
1358
1368
1359 def _deltachain(self, rev, stoprev=None):
1369 def _deltachain(self, rev, stoprev=None):
1360 """Obtain the delta chain for a revision.
1370 """Obtain the delta chain for a revision.
1361
1371
1362 ``stoprev`` specifies a revision to stop at. If not specified, we
1372 ``stoprev`` specifies a revision to stop at. If not specified, we
1363 stop at the base of the chain.
1373 stop at the base of the chain.
1364
1374
1365 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1375 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1366 revs in ascending order and ``stopped`` is a bool indicating whether
1376 revs in ascending order and ``stopped`` is a bool indicating whether
1367 ``stoprev`` was hit.
1377 ``stoprev`` was hit.
1368 """
1378 """
1369 generaldelta = self.delta_config.general_delta
1379 generaldelta = self.delta_config.general_delta
1370 # Try C implementation.
1380 # Try C implementation.
1371 try:
1381 try:
1372 return self.index.deltachain(rev, stoprev, generaldelta)
1382 return self.index.deltachain(rev, stoprev, generaldelta)
1373 except AttributeError:
1383 except AttributeError:
1374 pass
1384 pass
1375
1385
1376 chain = []
1386 chain = []
1377
1387
1378 # Alias to prevent attribute lookup in tight loop.
1388 # Alias to prevent attribute lookup in tight loop.
1379 index = self.index
1389 index = self.index
1380
1390
1381 iterrev = rev
1391 iterrev = rev
1382 e = index[iterrev]
1392 e = index[iterrev]
1383 while iterrev != e[3] and iterrev != stoprev:
1393 while iterrev != e[3] and iterrev != stoprev:
1384 chain.append(iterrev)
1394 chain.append(iterrev)
1385 if generaldelta:
1395 if generaldelta:
1386 iterrev = e[3]
1396 iterrev = e[3]
1387 else:
1397 else:
1388 iterrev -= 1
1398 iterrev -= 1
1389 e = index[iterrev]
1399 e = index[iterrev]
1390
1400
1391 if iterrev == stoprev:
1401 if iterrev == stoprev:
1392 stopped = True
1402 stopped = True
1393 else:
1403 else:
1394 chain.append(iterrev)
1404 chain.append(iterrev)
1395 stopped = False
1405 stopped = False
1396
1406
1397 chain.reverse()
1407 chain.reverse()
1398 return chain, stopped
1408 return chain, stopped
1399
1409
1400 def ancestors(self, revs, stoprev=0, inclusive=False):
1410 def ancestors(self, revs, stoprev=0, inclusive=False):
1401 """Generate the ancestors of 'revs' in reverse revision order.
1411 """Generate the ancestors of 'revs' in reverse revision order.
1402 Does not generate revs lower than stoprev.
1412 Does not generate revs lower than stoprev.
1403
1413
1404 See the documentation for ancestor.lazyancestors for more details."""
1414 See the documentation for ancestor.lazyancestors for more details."""
1405
1415
1406 # first, make sure start revisions aren't filtered
1416 # first, make sure start revisions aren't filtered
1407 revs = list(revs)
1417 revs = list(revs)
1408 checkrev = self.node
1418 checkrev = self.node
1409 for r in revs:
1419 for r in revs:
1410 checkrev(r)
1420 checkrev(r)
1411 # and we're sure ancestors aren't filtered as well
1421 # and we're sure ancestors aren't filtered as well
1412
1422
1413 if rustancestor is not None and self.index.rust_ext_compat:
1423 if rustancestor is not None and self.index.rust_ext_compat:
1414 lazyancestors = rustancestor.LazyAncestors
1424 lazyancestors = rustancestor.LazyAncestors
1415 arg = self.index
1425 arg = self.index
1416 else:
1426 else:
1417 lazyancestors = ancestor.lazyancestors
1427 lazyancestors = ancestor.lazyancestors
1418 arg = self._uncheckedparentrevs
1428 arg = self._uncheckedparentrevs
1419 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1429 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1420
1430
1421 def descendants(self, revs):
1431 def descendants(self, revs):
1422 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1432 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1423
1433
1424 def findcommonmissing(self, common=None, heads=None):
1434 def findcommonmissing(self, common=None, heads=None):
1425 """Return a tuple of the ancestors of common and the ancestors of heads
1435 """Return a tuple of the ancestors of common and the ancestors of heads
1426 that are not ancestors of common. In revset terminology, we return the
1436 that are not ancestors of common. In revset terminology, we return the
1427 tuple:
1437 tuple:
1428
1438
1429 ::common, (::heads) - (::common)
1439 ::common, (::heads) - (::common)
1430
1440
1431 The list is sorted by revision number, meaning it is
1441 The list is sorted by revision number, meaning it is
1432 topologically sorted.
1442 topologically sorted.
1433
1443
1434 'heads' and 'common' are both lists of node IDs. If heads is
1444 'heads' and 'common' are both lists of node IDs. If heads is
1435 not supplied, uses all of the revlog's heads. If common is not
1445 not supplied, uses all of the revlog's heads. If common is not
1436 supplied, uses nullid."""
1446 supplied, uses nullid."""
1437 if common is None:
1447 if common is None:
1438 common = [self.nullid]
1448 common = [self.nullid]
1439 if heads is None:
1449 if heads is None:
1440 heads = self.heads()
1450 heads = self.heads()
1441
1451
1442 common = [self.rev(n) for n in common]
1452 common = [self.rev(n) for n in common]
1443 heads = [self.rev(n) for n in heads]
1453 heads = [self.rev(n) for n in heads]
1444
1454
1445 # we want the ancestors, but inclusive
1455 # we want the ancestors, but inclusive
1446 class lazyset:
1456 class lazyset:
1447 def __init__(self, lazyvalues):
1457 def __init__(self, lazyvalues):
1448 self.addedvalues = set()
1458 self.addedvalues = set()
1449 self.lazyvalues = lazyvalues
1459 self.lazyvalues = lazyvalues
1450
1460
1451 def __contains__(self, value):
1461 def __contains__(self, value):
1452 return value in self.addedvalues or value in self.lazyvalues
1462 return value in self.addedvalues or value in self.lazyvalues
1453
1463
1454 def __iter__(self):
1464 def __iter__(self):
1455 added = self.addedvalues
1465 added = self.addedvalues
1456 for r in added:
1466 for r in added:
1457 yield r
1467 yield r
1458 for r in self.lazyvalues:
1468 for r in self.lazyvalues:
1459 if not r in added:
1469 if not r in added:
1460 yield r
1470 yield r
1461
1471
1462 def add(self, value):
1472 def add(self, value):
1463 self.addedvalues.add(value)
1473 self.addedvalues.add(value)
1464
1474
1465 def update(self, values):
1475 def update(self, values):
1466 self.addedvalues.update(values)
1476 self.addedvalues.update(values)
1467
1477
1468 has = lazyset(self.ancestors(common))
1478 has = lazyset(self.ancestors(common))
1469 has.add(nullrev)
1479 has.add(nullrev)
1470 has.update(common)
1480 has.update(common)
1471
1481
1472 # take all ancestors from heads that aren't in has
1482 # take all ancestors from heads that aren't in has
1473 missing = set()
1483 missing = set()
1474 visit = collections.deque(r for r in heads if r not in has)
1484 visit = collections.deque(r for r in heads if r not in has)
1475 while visit:
1485 while visit:
1476 r = visit.popleft()
1486 r = visit.popleft()
1477 if r in missing:
1487 if r in missing:
1478 continue
1488 continue
1479 else:
1489 else:
1480 missing.add(r)
1490 missing.add(r)
1481 for p in self.parentrevs(r):
1491 for p in self.parentrevs(r):
1482 if p not in has:
1492 if p not in has:
1483 visit.append(p)
1493 visit.append(p)
1484 missing = list(missing)
1494 missing = list(missing)
1485 missing.sort()
1495 missing.sort()
1486 return has, [self.node(miss) for miss in missing]
1496 return has, [self.node(miss) for miss in missing]
1487
1497
1488 def incrementalmissingrevs(self, common=None):
1498 def incrementalmissingrevs(self, common=None):
1489 """Return an object that can be used to incrementally compute the
1499 """Return an object that can be used to incrementally compute the
1490 revision numbers of the ancestors of arbitrary sets that are not
1500 revision numbers of the ancestors of arbitrary sets that are not
1491 ancestors of common. This is an ancestor.incrementalmissingancestors
1501 ancestors of common. This is an ancestor.incrementalmissingancestors
1492 object.
1502 object.
1493
1503
1494 'common' is a list of revision numbers. If common is not supplied, uses
1504 'common' is a list of revision numbers. If common is not supplied, uses
1495 nullrev.
1505 nullrev.
1496 """
1506 """
1497 if common is None:
1507 if common is None:
1498 common = [nullrev]
1508 common = [nullrev]
1499
1509
1500 if rustancestor is not None and self.index.rust_ext_compat:
1510 if rustancestor is not None and self.index.rust_ext_compat:
1501 return rustancestor.MissingAncestors(self.index, common)
1511 return rustancestor.MissingAncestors(self.index, common)
1502 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1512 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1503
1513
1504 def findmissingrevs(self, common=None, heads=None):
1514 def findmissingrevs(self, common=None, heads=None):
1505 """Return the revision numbers of the ancestors of heads that
1515 """Return the revision numbers of the ancestors of heads that
1506 are not ancestors of common.
1516 are not ancestors of common.
1507
1517
1508 More specifically, return a list of revision numbers corresponding to
1518 More specifically, return a list of revision numbers corresponding to
1509 nodes N such that every N satisfies the following constraints:
1519 nodes N such that every N satisfies the following constraints:
1510
1520
1511 1. N is an ancestor of some node in 'heads'
1521 1. N is an ancestor of some node in 'heads'
1512 2. N is not an ancestor of any node in 'common'
1522 2. N is not an ancestor of any node in 'common'
1513
1523
1514 The list is sorted by revision number, meaning it is
1524 The list is sorted by revision number, meaning it is
1515 topologically sorted.
1525 topologically sorted.
1516
1526
1517 'heads' and 'common' are both lists of revision numbers. If heads is
1527 'heads' and 'common' are both lists of revision numbers. If heads is
1518 not supplied, uses all of the revlog's heads. If common is not
1528 not supplied, uses all of the revlog's heads. If common is not
1519 supplied, uses nullid."""
1529 supplied, uses nullid."""
1520 if common is None:
1530 if common is None:
1521 common = [nullrev]
1531 common = [nullrev]
1522 if heads is None:
1532 if heads is None:
1523 heads = self.headrevs()
1533 heads = self.headrevs()
1524
1534
1525 inc = self.incrementalmissingrevs(common=common)
1535 inc = self.incrementalmissingrevs(common=common)
1526 return inc.missingancestors(heads)
1536 return inc.missingancestors(heads)
1527
1537
1528 def findmissing(self, common=None, heads=None):
1538 def findmissing(self, common=None, heads=None):
1529 """Return the ancestors of heads that are not ancestors of common.
1539 """Return the ancestors of heads that are not ancestors of common.
1530
1540
1531 More specifically, return a list of nodes N such that every N
1541 More specifically, return a list of nodes N such that every N
1532 satisfies the following constraints:
1542 satisfies the following constraints:
1533
1543
1534 1. N is an ancestor of some node in 'heads'
1544 1. N is an ancestor of some node in 'heads'
1535 2. N is not an ancestor of any node in 'common'
1545 2. N is not an ancestor of any node in 'common'
1536
1546
1537 The list is sorted by revision number, meaning it is
1547 The list is sorted by revision number, meaning it is
1538 topologically sorted.
1548 topologically sorted.
1539
1549
1540 'heads' and 'common' are both lists of node IDs. If heads is
1550 'heads' and 'common' are both lists of node IDs. If heads is
1541 not supplied, uses all of the revlog's heads. If common is not
1551 not supplied, uses all of the revlog's heads. If common is not
1542 supplied, uses nullid."""
1552 supplied, uses nullid."""
1543 if common is None:
1553 if common is None:
1544 common = [self.nullid]
1554 common = [self.nullid]
1545 if heads is None:
1555 if heads is None:
1546 heads = self.heads()
1556 heads = self.heads()
1547
1557
1548 common = [self.rev(n) for n in common]
1558 common = [self.rev(n) for n in common]
1549 heads = [self.rev(n) for n in heads]
1559 heads = [self.rev(n) for n in heads]
1550
1560
1551 inc = self.incrementalmissingrevs(common=common)
1561 inc = self.incrementalmissingrevs(common=common)
1552 return [self.node(r) for r in inc.missingancestors(heads)]
1562 return [self.node(r) for r in inc.missingancestors(heads)]
1553
1563
1554 def nodesbetween(self, roots=None, heads=None):
1564 def nodesbetween(self, roots=None, heads=None):
1555 """Return a topological path from 'roots' to 'heads'.
1565 """Return a topological path from 'roots' to 'heads'.
1556
1566
1557 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1567 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1558 topologically sorted list of all nodes N that satisfy both of
1568 topologically sorted list of all nodes N that satisfy both of
1559 these constraints:
1569 these constraints:
1560
1570
1561 1. N is a descendant of some node in 'roots'
1571 1. N is a descendant of some node in 'roots'
1562 2. N is an ancestor of some node in 'heads'
1572 2. N is an ancestor of some node in 'heads'
1563
1573
1564 Every node is considered to be both a descendant and an ancestor
1574 Every node is considered to be both a descendant and an ancestor
1565 of itself, so every reachable node in 'roots' and 'heads' will be
1575 of itself, so every reachable node in 'roots' and 'heads' will be
1566 included in 'nodes'.
1576 included in 'nodes'.
1567
1577
1568 'outroots' is the list of reachable nodes in 'roots', i.e., the
1578 'outroots' is the list of reachable nodes in 'roots', i.e., the
1569 subset of 'roots' that is returned in 'nodes'. Likewise,
1579 subset of 'roots' that is returned in 'nodes'. Likewise,
1570 'outheads' is the subset of 'heads' that is also in 'nodes'.
1580 'outheads' is the subset of 'heads' that is also in 'nodes'.
1571
1581
1572 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1582 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1573 unspecified, uses nullid as the only root. If 'heads' is
1583 unspecified, uses nullid as the only root. If 'heads' is
1574 unspecified, uses list of all of the revlog's heads."""
1584 unspecified, uses list of all of the revlog's heads."""
1575 nonodes = ([], [], [])
1585 nonodes = ([], [], [])
1576 if roots is not None:
1586 if roots is not None:
1577 roots = list(roots)
1587 roots = list(roots)
1578 if not roots:
1588 if not roots:
1579 return nonodes
1589 return nonodes
1580 lowestrev = min([self.rev(n) for n in roots])
1590 lowestrev = min([self.rev(n) for n in roots])
1581 else:
1591 else:
1582 roots = [self.nullid] # Everybody's a descendant of nullid
1592 roots = [self.nullid] # Everybody's a descendant of nullid
1583 lowestrev = nullrev
1593 lowestrev = nullrev
1584 if (lowestrev == nullrev) and (heads is None):
1594 if (lowestrev == nullrev) and (heads is None):
1585 # We want _all_ the nodes!
1595 # We want _all_ the nodes!
1586 return (
1596 return (
1587 [self.node(r) for r in self],
1597 [self.node(r) for r in self],
1588 [self.nullid],
1598 [self.nullid],
1589 list(self.heads()),
1599 list(self.heads()),
1590 )
1600 )
1591 if heads is None:
1601 if heads is None:
1592 # All nodes are ancestors, so the latest ancestor is the last
1602 # All nodes are ancestors, so the latest ancestor is the last
1593 # node.
1603 # node.
1594 highestrev = len(self) - 1
1604 highestrev = len(self) - 1
1595 # Set ancestors to None to signal that every node is an ancestor.
1605 # Set ancestors to None to signal that every node is an ancestor.
1596 ancestors = None
1606 ancestors = None
1597 # Set heads to an empty dictionary for later discovery of heads
1607 # Set heads to an empty dictionary for later discovery of heads
1598 heads = {}
1608 heads = {}
1599 else:
1609 else:
1600 heads = list(heads)
1610 heads = list(heads)
1601 if not heads:
1611 if not heads:
1602 return nonodes
1612 return nonodes
1603 ancestors = set()
1613 ancestors = set()
1604 # Turn heads into a dictionary so we can remove 'fake' heads.
1614 # Turn heads into a dictionary so we can remove 'fake' heads.
1605 # Also, later we will be using it to filter out the heads we can't
1615 # Also, later we will be using it to filter out the heads we can't
1606 # find from roots.
1616 # find from roots.
1607 heads = dict.fromkeys(heads, False)
1617 heads = dict.fromkeys(heads, False)
1608 # Start at the top and keep marking parents until we're done.
1618 # Start at the top and keep marking parents until we're done.
1609 nodestotag = set(heads)
1619 nodestotag = set(heads)
1610 # Remember where the top was so we can use it as a limit later.
1620 # Remember where the top was so we can use it as a limit later.
1611 highestrev = max([self.rev(n) for n in nodestotag])
1621 highestrev = max([self.rev(n) for n in nodestotag])
1612 while nodestotag:
1622 while nodestotag:
1613 # grab a node to tag
1623 # grab a node to tag
1614 n = nodestotag.pop()
1624 n = nodestotag.pop()
1615 # Never tag nullid
1625 # Never tag nullid
1616 if n == self.nullid:
1626 if n == self.nullid:
1617 continue
1627 continue
1618 # A node's revision number represents its place in a
1628 # A node's revision number represents its place in a
1619 # topologically sorted list of nodes.
1629 # topologically sorted list of nodes.
1620 r = self.rev(n)
1630 r = self.rev(n)
1621 if r >= lowestrev:
1631 if r >= lowestrev:
1622 if n not in ancestors:
1632 if n not in ancestors:
1623 # If we are possibly a descendant of one of the roots
1633 # If we are possibly a descendant of one of the roots
1624 # and we haven't already been marked as an ancestor
1634 # and we haven't already been marked as an ancestor
1625 ancestors.add(n) # Mark as ancestor
1635 ancestors.add(n) # Mark as ancestor
1626 # Add non-nullid parents to list of nodes to tag.
1636 # Add non-nullid parents to list of nodes to tag.
1627 nodestotag.update(
1637 nodestotag.update(
1628 [p for p in self.parents(n) if p != self.nullid]
1638 [p for p in self.parents(n) if p != self.nullid]
1629 )
1639 )
1630 elif n in heads: # We've seen it before, is it a fake head?
1640 elif n in heads: # We've seen it before, is it a fake head?
1631 # So it is, real heads should not be the ancestors of
1641 # So it is, real heads should not be the ancestors of
1632 # any other heads.
1642 # any other heads.
1633 heads.pop(n)
1643 heads.pop(n)
1634 if not ancestors:
1644 if not ancestors:
1635 return nonodes
1645 return nonodes
1636 # Now that we have our set of ancestors, we want to remove any
1646 # Now that we have our set of ancestors, we want to remove any
1637 # roots that are not ancestors.
1647 # roots that are not ancestors.
1638
1648
1639 # If one of the roots was nullid, everything is included anyway.
1649 # If one of the roots was nullid, everything is included anyway.
1640 if lowestrev > nullrev:
1650 if lowestrev > nullrev:
1641 # But, since we weren't, let's recompute the lowest rev to not
1651 # But, since we weren't, let's recompute the lowest rev to not
1642 # include roots that aren't ancestors.
1652 # include roots that aren't ancestors.
1643
1653
1644 # Filter out roots that aren't ancestors of heads
1654 # Filter out roots that aren't ancestors of heads
1645 roots = [root for root in roots if root in ancestors]
1655 roots = [root for root in roots if root in ancestors]
1646 # Recompute the lowest revision
1656 # Recompute the lowest revision
1647 if roots:
1657 if roots:
1648 lowestrev = min([self.rev(root) for root in roots])
1658 lowestrev = min([self.rev(root) for root in roots])
1649 else:
1659 else:
1650 # No more roots? Return empty list
1660 # No more roots? Return empty list
1651 return nonodes
1661 return nonodes
1652 else:
1662 else:
1653 # We are descending from nullid, and don't need to care about
1663 # We are descending from nullid, and don't need to care about
1654 # any other roots.
1664 # any other roots.
1655 lowestrev = nullrev
1665 lowestrev = nullrev
1656 roots = [self.nullid]
1666 roots = [self.nullid]
1657 # Transform our roots list into a set.
1667 # Transform our roots list into a set.
1658 descendants = set(roots)
1668 descendants = set(roots)
1659 # Also, keep the original roots so we can filter out roots that aren't
1669 # Also, keep the original roots so we can filter out roots that aren't
1660 # 'real' roots (i.e. are descended from other roots).
1670 # 'real' roots (i.e. are descended from other roots).
1661 roots = descendants.copy()
1671 roots = descendants.copy()
1662 # Our topologically sorted list of output nodes.
1672 # Our topologically sorted list of output nodes.
1663 orderedout = []
1673 orderedout = []
1664 # Don't start at nullid since we don't want nullid in our output list,
1674 # Don't start at nullid since we don't want nullid in our output list,
1665 # and if nullid shows up in descendants, empty parents will look like
1675 # and if nullid shows up in descendants, empty parents will look like
1666 # they're descendants.
1676 # they're descendants.
1667 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1677 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1668 n = self.node(r)
1678 n = self.node(r)
1669 isdescendant = False
1679 isdescendant = False
1670 if lowestrev == nullrev: # Everybody is a descendant of nullid
1680 if lowestrev == nullrev: # Everybody is a descendant of nullid
1671 isdescendant = True
1681 isdescendant = True
1672 elif n in descendants:
1682 elif n in descendants:
1673 # n is already a descendant
1683 # n is already a descendant
1674 isdescendant = True
1684 isdescendant = True
1675 # This check only needs to be done here because all the roots
1685 # This check only needs to be done here because all the roots
1676 # will start being marked is descendants before the loop.
1686 # will start being marked is descendants before the loop.
1677 if n in roots:
1687 if n in roots:
1678 # If n was a root, check if it's a 'real' root.
1688 # If n was a root, check if it's a 'real' root.
1679 p = tuple(self.parents(n))
1689 p = tuple(self.parents(n))
1680 # If any of its parents are descendants, it's not a root.
1690 # If any of its parents are descendants, it's not a root.
1681 if (p[0] in descendants) or (p[1] in descendants):
1691 if (p[0] in descendants) or (p[1] in descendants):
1682 roots.remove(n)
1692 roots.remove(n)
1683 else:
1693 else:
1684 p = tuple(self.parents(n))
1694 p = tuple(self.parents(n))
1685 # A node is a descendant if either of its parents are
1695 # A node is a descendant if either of its parents are
1686 # descendants. (We seeded the dependents list with the roots
1696 # descendants. (We seeded the dependents list with the roots
1687 # up there, remember?)
1697 # up there, remember?)
1688 if (p[0] in descendants) or (p[1] in descendants):
1698 if (p[0] in descendants) or (p[1] in descendants):
1689 descendants.add(n)
1699 descendants.add(n)
1690 isdescendant = True
1700 isdescendant = True
1691 if isdescendant and ((ancestors is None) or (n in ancestors)):
1701 if isdescendant and ((ancestors is None) or (n in ancestors)):
1692 # Only include nodes that are both descendants and ancestors.
1702 # Only include nodes that are both descendants and ancestors.
1693 orderedout.append(n)
1703 orderedout.append(n)
1694 if (ancestors is not None) and (n in heads):
1704 if (ancestors is not None) and (n in heads):
1695 # We're trying to figure out which heads are reachable
1705 # We're trying to figure out which heads are reachable
1696 # from roots.
1706 # from roots.
1697 # Mark this head as having been reached
1707 # Mark this head as having been reached
1698 heads[n] = True
1708 heads[n] = True
1699 elif ancestors is None:
1709 elif ancestors is None:
1700 # Otherwise, we're trying to discover the heads.
1710 # Otherwise, we're trying to discover the heads.
1701 # Assume this is a head because if it isn't, the next step
1711 # Assume this is a head because if it isn't, the next step
1702 # will eventually remove it.
1712 # will eventually remove it.
1703 heads[n] = True
1713 heads[n] = True
1704 # But, obviously its parents aren't.
1714 # But, obviously its parents aren't.
1705 for p in self.parents(n):
1715 for p in self.parents(n):
1706 heads.pop(p, None)
1716 heads.pop(p, None)
1707 heads = [head for head, flag in heads.items() if flag]
1717 heads = [head for head, flag in heads.items() if flag]
1708 roots = list(roots)
1718 roots = list(roots)
1709 assert orderedout
1719 assert orderedout
1710 assert roots
1720 assert roots
1711 assert heads
1721 assert heads
1712 return (orderedout, roots, heads)
1722 return (orderedout, roots, heads)
1713
1723
1714 def headrevs(self, revs=None):
1724 def headrevs(self, revs=None):
1715 if revs is None:
1725 if revs is None:
1716 try:
1726 try:
1717 return self.index.headrevs()
1727 return self.index.headrevs()
1718 except AttributeError:
1728 except AttributeError:
1719 return self._headrevs()
1729 return self._headrevs()
1720 if rustdagop is not None and self.index.rust_ext_compat:
1730 if rustdagop is not None and self.index.rust_ext_compat:
1721 return rustdagop.headrevs(self.index, revs)
1731 return rustdagop.headrevs(self.index, revs)
1722 return dagop.headrevs(revs, self._uncheckedparentrevs)
1732 return dagop.headrevs(revs, self._uncheckedparentrevs)
1723
1733
1724 def computephases(self, roots):
1734 def computephases(self, roots):
1725 return self.index.computephasesmapsets(roots)
1735 return self.index.computephasesmapsets(roots)
1726
1736
1727 def _headrevs(self):
1737 def _headrevs(self):
1728 count = len(self)
1738 count = len(self)
1729 if not count:
1739 if not count:
1730 return [nullrev]
1740 return [nullrev]
1731 # we won't iter over filtered rev so nobody is a head at start
1741 # we won't iter over filtered rev so nobody is a head at start
1732 ishead = [0] * (count + 1)
1742 ishead = [0] * (count + 1)
1733 index = self.index
1743 index = self.index
1734 for r in self:
1744 for r in self:
1735 ishead[r] = 1 # I may be an head
1745 ishead[r] = 1 # I may be an head
1736 e = index[r]
1746 e = index[r]
1737 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1747 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1738 return [r for r, val in enumerate(ishead) if val]
1748 return [r for r, val in enumerate(ishead) if val]
1739
1749
1740 def heads(self, start=None, stop=None):
1750 def heads(self, start=None, stop=None):
1741 """return the list of all nodes that have no children
1751 """return the list of all nodes that have no children
1742
1752
1743 if start is specified, only heads that are descendants of
1753 if start is specified, only heads that are descendants of
1744 start will be returned
1754 start will be returned
1745 if stop is specified, it will consider all the revs from stop
1755 if stop is specified, it will consider all the revs from stop
1746 as if they had no children
1756 as if they had no children
1747 """
1757 """
1748 if start is None and stop is None:
1758 if start is None and stop is None:
1749 if not len(self):
1759 if not len(self):
1750 return [self.nullid]
1760 return [self.nullid]
1751 return [self.node(r) for r in self.headrevs()]
1761 return [self.node(r) for r in self.headrevs()]
1752
1762
1753 if start is None:
1763 if start is None:
1754 start = nullrev
1764 start = nullrev
1755 else:
1765 else:
1756 start = self.rev(start)
1766 start = self.rev(start)
1757
1767
1758 stoprevs = {self.rev(n) for n in stop or []}
1768 stoprevs = {self.rev(n) for n in stop or []}
1759
1769
1760 revs = dagop.headrevssubset(
1770 revs = dagop.headrevssubset(
1761 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1771 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1762 )
1772 )
1763
1773
1764 return [self.node(rev) for rev in revs]
1774 return [self.node(rev) for rev in revs]
1765
1775
1766 def children(self, node):
1776 def children(self, node):
1767 """find the children of a given node"""
1777 """find the children of a given node"""
1768 c = []
1778 c = []
1769 p = self.rev(node)
1779 p = self.rev(node)
1770 for r in self.revs(start=p + 1):
1780 for r in self.revs(start=p + 1):
1771 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1781 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1772 if prevs:
1782 if prevs:
1773 for pr in prevs:
1783 for pr in prevs:
1774 if pr == p:
1784 if pr == p:
1775 c.append(self.node(r))
1785 c.append(self.node(r))
1776 elif p == nullrev:
1786 elif p == nullrev:
1777 c.append(self.node(r))
1787 c.append(self.node(r))
1778 return c
1788 return c
1779
1789
1780 def commonancestorsheads(self, a, b):
1790 def commonancestorsheads(self, a, b):
1781 """calculate all the heads of the common ancestors of nodes a and b"""
1791 """calculate all the heads of the common ancestors of nodes a and b"""
1782 a, b = self.rev(a), self.rev(b)
1792 a, b = self.rev(a), self.rev(b)
1783 ancs = self._commonancestorsheads(a, b)
1793 ancs = self._commonancestorsheads(a, b)
1784 return pycompat.maplist(self.node, ancs)
1794 return pycompat.maplist(self.node, ancs)
1785
1795
1786 def _commonancestorsheads(self, *revs):
1796 def _commonancestorsheads(self, *revs):
1787 """calculate all the heads of the common ancestors of revs"""
1797 """calculate all the heads of the common ancestors of revs"""
1788 try:
1798 try:
1789 ancs = self.index.commonancestorsheads(*revs)
1799 ancs = self.index.commonancestorsheads(*revs)
1790 except (AttributeError, OverflowError): # C implementation failed
1800 except (AttributeError, OverflowError): # C implementation failed
1791 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1801 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1792 return ancs
1802 return ancs
1793
1803
1794 def isancestor(self, a, b):
1804 def isancestor(self, a, b):
1795 """return True if node a is an ancestor of node b
1805 """return True if node a is an ancestor of node b
1796
1806
1797 A revision is considered an ancestor of itself."""
1807 A revision is considered an ancestor of itself."""
1798 a, b = self.rev(a), self.rev(b)
1808 a, b = self.rev(a), self.rev(b)
1799 return self.isancestorrev(a, b)
1809 return self.isancestorrev(a, b)
1800
1810
1801 def isancestorrev(self, a, b):
1811 def isancestorrev(self, a, b):
1802 """return True if revision a is an ancestor of revision b
1812 """return True if revision a is an ancestor of revision b
1803
1813
1804 A revision is considered an ancestor of itself.
1814 A revision is considered an ancestor of itself.
1805
1815
1806 The implementation of this is trivial but the use of
1816 The implementation of this is trivial but the use of
1807 reachableroots is not."""
1817 reachableroots is not."""
1808 if a == nullrev:
1818 if a == nullrev:
1809 return True
1819 return True
1810 elif a == b:
1820 elif a == b:
1811 return True
1821 return True
1812 elif a > b:
1822 elif a > b:
1813 return False
1823 return False
1814 return bool(self.reachableroots(a, [b], [a], includepath=False))
1824 return bool(self.reachableroots(a, [b], [a], includepath=False))
1815
1825
1816 def reachableroots(self, minroot, heads, roots, includepath=False):
1826 def reachableroots(self, minroot, heads, roots, includepath=False):
1817 """return (heads(::(<roots> and <roots>::<heads>)))
1827 """return (heads(::(<roots> and <roots>::<heads>)))
1818
1828
1819 If includepath is True, return (<roots>::<heads>)."""
1829 If includepath is True, return (<roots>::<heads>)."""
1820 try:
1830 try:
1821 return self.index.reachableroots2(
1831 return self.index.reachableroots2(
1822 minroot, heads, roots, includepath
1832 minroot, heads, roots, includepath
1823 )
1833 )
1824 except AttributeError:
1834 except AttributeError:
1825 return dagop._reachablerootspure(
1835 return dagop._reachablerootspure(
1826 self.parentrevs, minroot, roots, heads, includepath
1836 self.parentrevs, minroot, roots, heads, includepath
1827 )
1837 )
1828
1838
1829 def ancestor(self, a, b):
1839 def ancestor(self, a, b):
1830 """calculate the "best" common ancestor of nodes a and b"""
1840 """calculate the "best" common ancestor of nodes a and b"""
1831
1841
1832 a, b = self.rev(a), self.rev(b)
1842 a, b = self.rev(a), self.rev(b)
1833 try:
1843 try:
1834 ancs = self.index.ancestors(a, b)
1844 ancs = self.index.ancestors(a, b)
1835 except (AttributeError, OverflowError):
1845 except (AttributeError, OverflowError):
1836 ancs = ancestor.ancestors(self.parentrevs, a, b)
1846 ancs = ancestor.ancestors(self.parentrevs, a, b)
1837 if ancs:
1847 if ancs:
1838 # choose a consistent winner when there's a tie
1848 # choose a consistent winner when there's a tie
1839 return min(map(self.node, ancs))
1849 return min(map(self.node, ancs))
1840 return self.nullid
1850 return self.nullid
1841
1851
1842 def _match(self, id):
1852 def _match(self, id):
1843 if isinstance(id, int):
1853 if isinstance(id, int):
1844 # rev
1854 # rev
1845 return self.node(id)
1855 return self.node(id)
1846 if len(id) == self.nodeconstants.nodelen:
1856 if len(id) == self.nodeconstants.nodelen:
1847 # possibly a binary node
1857 # possibly a binary node
1848 # odds of a binary node being all hex in ASCII are 1 in 10**25
1858 # odds of a binary node being all hex in ASCII are 1 in 10**25
1849 try:
1859 try:
1850 node = id
1860 node = id
1851 self.rev(node) # quick search the index
1861 self.rev(node) # quick search the index
1852 return node
1862 return node
1853 except error.LookupError:
1863 except error.LookupError:
1854 pass # may be partial hex id
1864 pass # may be partial hex id
1855 try:
1865 try:
1856 # str(rev)
1866 # str(rev)
1857 rev = int(id)
1867 rev = int(id)
1858 if b"%d" % rev != id:
1868 if b"%d" % rev != id:
1859 raise ValueError
1869 raise ValueError
1860 if rev < 0:
1870 if rev < 0:
1861 rev = len(self) + rev
1871 rev = len(self) + rev
1862 if rev < 0 or rev >= len(self):
1872 if rev < 0 or rev >= len(self):
1863 raise ValueError
1873 raise ValueError
1864 return self.node(rev)
1874 return self.node(rev)
1865 except (ValueError, OverflowError):
1875 except (ValueError, OverflowError):
1866 pass
1876 pass
1867 if len(id) == 2 * self.nodeconstants.nodelen:
1877 if len(id) == 2 * self.nodeconstants.nodelen:
1868 try:
1878 try:
1869 # a full hex nodeid?
1879 # a full hex nodeid?
1870 node = bin(id)
1880 node = bin(id)
1871 self.rev(node)
1881 self.rev(node)
1872 return node
1882 return node
1873 except (binascii.Error, error.LookupError):
1883 except (binascii.Error, error.LookupError):
1874 pass
1884 pass
1875
1885
1876 def _partialmatch(self, id):
1886 def _partialmatch(self, id):
1877 # we don't care wdirfilenodeids as they should be always full hash
1887 # we don't care wdirfilenodeids as they should be always full hash
1878 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1888 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1879 ambiguous = False
1889 ambiguous = False
1880 try:
1890 try:
1881 partial = self.index.partialmatch(id)
1891 partial = self.index.partialmatch(id)
1882 if partial and self.hasnode(partial):
1892 if partial and self.hasnode(partial):
1883 if maybewdir:
1893 if maybewdir:
1884 # single 'ff...' match in radix tree, ambiguous with wdir
1894 # single 'ff...' match in radix tree, ambiguous with wdir
1885 ambiguous = True
1895 ambiguous = True
1886 else:
1896 else:
1887 return partial
1897 return partial
1888 elif maybewdir:
1898 elif maybewdir:
1889 # no 'ff...' match in radix tree, wdir identified
1899 # no 'ff...' match in radix tree, wdir identified
1890 raise error.WdirUnsupported
1900 raise error.WdirUnsupported
1891 else:
1901 else:
1892 return None
1902 return None
1893 except error.RevlogError:
1903 except error.RevlogError:
1894 # parsers.c radix tree lookup gave multiple matches
1904 # parsers.c radix tree lookup gave multiple matches
1895 # fast path: for unfiltered changelog, radix tree is accurate
1905 # fast path: for unfiltered changelog, radix tree is accurate
1896 if not getattr(self, 'filteredrevs', None):
1906 if not getattr(self, 'filteredrevs', None):
1897 ambiguous = True
1907 ambiguous = True
1898 # fall through to slow path that filters hidden revisions
1908 # fall through to slow path that filters hidden revisions
1899 except (AttributeError, ValueError):
1909 except (AttributeError, ValueError):
1900 # we are pure python, or key is not hex
1910 # we are pure python, or key is not hex
1901 pass
1911 pass
1902 if ambiguous:
1912 if ambiguous:
1903 raise error.AmbiguousPrefixLookupError(
1913 raise error.AmbiguousPrefixLookupError(
1904 id, self.display_id, _(b'ambiguous identifier')
1914 id, self.display_id, _(b'ambiguous identifier')
1905 )
1915 )
1906
1916
1907 if id in self._pcache:
1917 if id in self._pcache:
1908 return self._pcache[id]
1918 return self._pcache[id]
1909
1919
1910 if len(id) <= 40:
1920 if len(id) <= 40:
1911 # hex(node)[:...]
1921 # hex(node)[:...]
1912 l = len(id) // 2 * 2 # grab an even number of digits
1922 l = len(id) // 2 * 2 # grab an even number of digits
1913 try:
1923 try:
1914 # we're dropping the last digit, so let's check that it's hex,
1924 # we're dropping the last digit, so let's check that it's hex,
1915 # to avoid the expensive computation below if it's not
1925 # to avoid the expensive computation below if it's not
1916 if len(id) % 2 > 0:
1926 if len(id) % 2 > 0:
1917 if not (id[-1] in hexdigits):
1927 if not (id[-1] in hexdigits):
1918 return None
1928 return None
1919 prefix = bin(id[:l])
1929 prefix = bin(id[:l])
1920 except binascii.Error:
1930 except binascii.Error:
1921 pass
1931 pass
1922 else:
1932 else:
1923 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1933 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1924 nl = [
1934 nl = [
1925 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1935 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1926 ]
1936 ]
1927 if self.nodeconstants.nullhex.startswith(id):
1937 if self.nodeconstants.nullhex.startswith(id):
1928 nl.append(self.nullid)
1938 nl.append(self.nullid)
1929 if len(nl) > 0:
1939 if len(nl) > 0:
1930 if len(nl) == 1 and not maybewdir:
1940 if len(nl) == 1 and not maybewdir:
1931 self._pcache[id] = nl[0]
1941 self._pcache[id] = nl[0]
1932 return nl[0]
1942 return nl[0]
1933 raise error.AmbiguousPrefixLookupError(
1943 raise error.AmbiguousPrefixLookupError(
1934 id, self.display_id, _(b'ambiguous identifier')
1944 id, self.display_id, _(b'ambiguous identifier')
1935 )
1945 )
1936 if maybewdir:
1946 if maybewdir:
1937 raise error.WdirUnsupported
1947 raise error.WdirUnsupported
1938 return None
1948 return None
1939
1949
1940 def lookup(self, id):
1950 def lookup(self, id):
1941 """locate a node based on:
1951 """locate a node based on:
1942 - revision number or str(revision number)
1952 - revision number or str(revision number)
1943 - nodeid or subset of hex nodeid
1953 - nodeid or subset of hex nodeid
1944 """
1954 """
1945 n = self._match(id)
1955 n = self._match(id)
1946 if n is not None:
1956 if n is not None:
1947 return n
1957 return n
1948 n = self._partialmatch(id)
1958 n = self._partialmatch(id)
1949 if n:
1959 if n:
1950 return n
1960 return n
1951
1961
1952 raise error.LookupError(id, self.display_id, _(b'no match found'))
1962 raise error.LookupError(id, self.display_id, _(b'no match found'))
1953
1963
1954 def shortest(self, node, minlength=1):
1964 def shortest(self, node, minlength=1):
1955 """Find the shortest unambiguous prefix that matches node."""
1965 """Find the shortest unambiguous prefix that matches node."""
1956
1966
1957 def isvalid(prefix):
1967 def isvalid(prefix):
1958 try:
1968 try:
1959 matchednode = self._partialmatch(prefix)
1969 matchednode = self._partialmatch(prefix)
1960 except error.AmbiguousPrefixLookupError:
1970 except error.AmbiguousPrefixLookupError:
1961 return False
1971 return False
1962 except error.WdirUnsupported:
1972 except error.WdirUnsupported:
1963 # single 'ff...' match
1973 # single 'ff...' match
1964 return True
1974 return True
1965 if matchednode is None:
1975 if matchednode is None:
1966 raise error.LookupError(node, self.display_id, _(b'no node'))
1976 raise error.LookupError(node, self.display_id, _(b'no node'))
1967 return True
1977 return True
1968
1978
1969 def maybewdir(prefix):
1979 def maybewdir(prefix):
1970 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1980 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1971
1981
1972 hexnode = hex(node)
1982 hexnode = hex(node)
1973
1983
1974 def disambiguate(hexnode, minlength):
1984 def disambiguate(hexnode, minlength):
1975 """Disambiguate against wdirid."""
1985 """Disambiguate against wdirid."""
1976 for length in range(minlength, len(hexnode) + 1):
1986 for length in range(minlength, len(hexnode) + 1):
1977 prefix = hexnode[:length]
1987 prefix = hexnode[:length]
1978 if not maybewdir(prefix):
1988 if not maybewdir(prefix):
1979 return prefix
1989 return prefix
1980
1990
1981 if not getattr(self, 'filteredrevs', None):
1991 if not getattr(self, 'filteredrevs', None):
1982 try:
1992 try:
1983 length = max(self.index.shortest(node), minlength)
1993 length = max(self.index.shortest(node), minlength)
1984 return disambiguate(hexnode, length)
1994 return disambiguate(hexnode, length)
1985 except error.RevlogError:
1995 except error.RevlogError:
1986 if node != self.nodeconstants.wdirid:
1996 if node != self.nodeconstants.wdirid:
1987 raise error.LookupError(
1997 raise error.LookupError(
1988 node, self.display_id, _(b'no node')
1998 node, self.display_id, _(b'no node')
1989 )
1999 )
1990 except AttributeError:
2000 except AttributeError:
1991 # Fall through to pure code
2001 # Fall through to pure code
1992 pass
2002 pass
1993
2003
1994 if node == self.nodeconstants.wdirid:
2004 if node == self.nodeconstants.wdirid:
1995 for length in range(minlength, len(hexnode) + 1):
2005 for length in range(minlength, len(hexnode) + 1):
1996 prefix = hexnode[:length]
2006 prefix = hexnode[:length]
1997 if isvalid(prefix):
2007 if isvalid(prefix):
1998 return prefix
2008 return prefix
1999
2009
2000 for length in range(minlength, len(hexnode) + 1):
2010 for length in range(minlength, len(hexnode) + 1):
2001 prefix = hexnode[:length]
2011 prefix = hexnode[:length]
2002 if isvalid(prefix):
2012 if isvalid(prefix):
2003 return disambiguate(hexnode, length)
2013 return disambiguate(hexnode, length)
2004
2014
2005 def cmp(self, node, text):
2015 def cmp(self, node, text):
2006 """compare text with a given file revision
2016 """compare text with a given file revision
2007
2017
2008 returns True if text is different than what is stored.
2018 returns True if text is different than what is stored.
2009 """
2019 """
2010 p1, p2 = self.parents(node)
2020 p1, p2 = self.parents(node)
2011 return storageutil.hashrevisionsha1(text, p1, p2) != node
2021 return storageutil.hashrevisionsha1(text, p1, p2) != node
2012
2022
2013 def _getsegmentforrevs(self, startrev, endrev):
2023 def _getsegmentforrevs(self, startrev, endrev):
2014 """Obtain a segment of raw data corresponding to a range of revisions.
2024 """Obtain a segment of raw data corresponding to a range of revisions.
2015
2025
2016 Accepts the start and end revisions and an optional already-open
2026 Accepts the start and end revisions and an optional already-open
2017 file handle to be used for reading. If the file handle is read, its
2027 file handle to be used for reading. If the file handle is read, its
2018 seek position will not be preserved.
2028 seek position will not be preserved.
2019
2029
2020 Requests for data may be satisfied by a cache.
2030 Requests for data may be satisfied by a cache.
2021
2031
2022 Returns a 2-tuple of (offset, data) for the requested range of
2032 Returns a 2-tuple of (offset, data) for the requested range of
2023 revisions. Offset is the integer offset from the beginning of the
2033 revisions. Offset is the integer offset from the beginning of the
2024 revlog and data is a str or buffer of the raw byte data.
2034 revlog and data is a str or buffer of the raw byte data.
2025
2035
2026 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2036 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2027 to determine where each revision's data begins and ends.
2037 to determine where each revision's data begins and ends.
2028 """
2038 """
2029 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2039 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2030 # (functions are expensive).
2040 # (functions are expensive).
2031 index = self.index
2041 index = self.index
2032 istart = index[startrev]
2042 istart = index[startrev]
2033 start = int(istart[0] >> 16)
2043 start = int(istart[0] >> 16)
2034 if startrev == endrev:
2044 if startrev == endrev:
2035 end = start + istart[1]
2045 end = start + istart[1]
2036 else:
2046 else:
2037 iend = index[endrev]
2047 iend = index[endrev]
2038 end = int(iend[0] >> 16) + iend[1]
2048 end = int(iend[0] >> 16) + iend[1]
2039
2049
2040 if self._inline:
2050 if self._inline:
2041 start += (startrev + 1) * self.index.entry_size
2051 start += (startrev + 1) * self.index.entry_size
2042 end += (endrev + 1) * self.index.entry_size
2052 end += (endrev + 1) * self.index.entry_size
2043 length = end - start
2053 length = end - start
2044
2054
2045 return start, self._segmentfile.read_chunk(start, length)
2055 return start, self._segmentfile.read_chunk(start, length)
2046
2056
2047 def _chunk(self, rev):
2057 def _chunk(self, rev):
2048 """Obtain a single decompressed chunk for a revision.
2058 """Obtain a single decompressed chunk for a revision.
2049
2059
2050 Accepts an integer revision and an optional already-open file handle
2060 Accepts an integer revision and an optional already-open file handle
2051 to be used for reading. If used, the seek position of the file will not
2061 to be used for reading. If used, the seek position of the file will not
2052 be preserved.
2062 be preserved.
2053
2063
2054 Returns a str holding uncompressed data for the requested revision.
2064 Returns a str holding uncompressed data for the requested revision.
2055 """
2065 """
2056 compression_mode = self.index[rev][10]
2066 compression_mode = self.index[rev][10]
2057 data = self._getsegmentforrevs(rev, rev)[1]
2067 data = self._getsegmentforrevs(rev, rev)[1]
2058 if compression_mode == COMP_MODE_PLAIN:
2068 if compression_mode == COMP_MODE_PLAIN:
2059 return data
2069 return data
2060 elif compression_mode == COMP_MODE_DEFAULT:
2070 elif compression_mode == COMP_MODE_DEFAULT:
2061 return self._decompressor(data)
2071 return self._decompressor(data)
2062 elif compression_mode == COMP_MODE_INLINE:
2072 elif compression_mode == COMP_MODE_INLINE:
2063 return self.decompress(data)
2073 return self.decompress(data)
2064 else:
2074 else:
2065 msg = b'unknown compression mode %d'
2075 msg = b'unknown compression mode %d'
2066 msg %= compression_mode
2076 msg %= compression_mode
2067 raise error.RevlogError(msg)
2077 raise error.RevlogError(msg)
2068
2078
2069 def _chunks(self, revs, targetsize=None):
2079 def _chunks(self, revs, targetsize=None):
2070 """Obtain decompressed chunks for the specified revisions.
2080 """Obtain decompressed chunks for the specified revisions.
2071
2081
2072 Accepts an iterable of numeric revisions that are assumed to be in
2082 Accepts an iterable of numeric revisions that are assumed to be in
2073 ascending order. Also accepts an optional already-open file handle
2083 ascending order. Also accepts an optional already-open file handle
2074 to be used for reading. If used, the seek position of the file will
2084 to be used for reading. If used, the seek position of the file will
2075 not be preserved.
2085 not be preserved.
2076
2086
2077 This function is similar to calling ``self._chunk()`` multiple times,
2087 This function is similar to calling ``self._chunk()`` multiple times,
2078 but is faster.
2088 but is faster.
2079
2089
2080 Returns a list with decompressed data for each requested revision.
2090 Returns a list with decompressed data for each requested revision.
2081 """
2091 """
2082 if not revs:
2092 if not revs:
2083 return []
2093 return []
2084 start = self.start
2094 start = self.start
2085 length = self.length
2095 length = self.length
2086 inline = self._inline
2096 inline = self._inline
2087 iosize = self.index.entry_size
2097 iosize = self.index.entry_size
2088 buffer = util.buffer
2098 buffer = util.buffer
2089
2099
2090 l = []
2100 l = []
2091 ladd = l.append
2101 ladd = l.append
2092
2102
2093 if not self.data_config.with_sparse_read:
2103 if not self.data_config.with_sparse_read:
2094 slicedchunks = (revs,)
2104 slicedchunks = (revs,)
2095 else:
2105 else:
2096 slicedchunks = deltautil.slicechunk(
2106 slicedchunks = deltautil.slicechunk(
2097 self, revs, targetsize=targetsize
2107 self, revs, targetsize=targetsize
2098 )
2108 )
2099
2109
2100 for revschunk in slicedchunks:
2110 for revschunk in slicedchunks:
2101 firstrev = revschunk[0]
2111 firstrev = revschunk[0]
2102 # Skip trailing revisions with empty diff
2112 # Skip trailing revisions with empty diff
2103 for lastrev in revschunk[::-1]:
2113 for lastrev in revschunk[::-1]:
2104 if length(lastrev) != 0:
2114 if length(lastrev) != 0:
2105 break
2115 break
2106
2116
2107 try:
2117 try:
2108 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2118 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2109 except OverflowError:
2119 except OverflowError:
2110 # issue4215 - we can't cache a run of chunks greater than
2120 # issue4215 - we can't cache a run of chunks greater than
2111 # 2G on Windows
2121 # 2G on Windows
2112 return [self._chunk(rev) for rev in revschunk]
2122 return [self._chunk(rev) for rev in revschunk]
2113
2123
2114 decomp = self.decompress
2124 decomp = self.decompress
2115 # self._decompressor might be None, but will not be used in that case
2125 # self._decompressor might be None, but will not be used in that case
2116 def_decomp = self._decompressor
2126 def_decomp = self._decompressor
2117 for rev in revschunk:
2127 for rev in revschunk:
2118 chunkstart = start(rev)
2128 chunkstart = start(rev)
2119 if inline:
2129 if inline:
2120 chunkstart += (rev + 1) * iosize
2130 chunkstart += (rev + 1) * iosize
2121 chunklength = length(rev)
2131 chunklength = length(rev)
2122 comp_mode = self.index[rev][10]
2132 comp_mode = self.index[rev][10]
2123 c = buffer(data, chunkstart - offset, chunklength)
2133 c = buffer(data, chunkstart - offset, chunklength)
2124 if comp_mode == COMP_MODE_PLAIN:
2134 if comp_mode == COMP_MODE_PLAIN:
2125 ladd(c)
2135 ladd(c)
2126 elif comp_mode == COMP_MODE_INLINE:
2136 elif comp_mode == COMP_MODE_INLINE:
2127 ladd(decomp(c))
2137 ladd(decomp(c))
2128 elif comp_mode == COMP_MODE_DEFAULT:
2138 elif comp_mode == COMP_MODE_DEFAULT:
2129 ladd(def_decomp(c))
2139 ladd(def_decomp(c))
2130 else:
2140 else:
2131 msg = b'unknown compression mode %d'
2141 msg = b'unknown compression mode %d'
2132 msg %= comp_mode
2142 msg %= comp_mode
2133 raise error.RevlogError(msg)
2143 raise error.RevlogError(msg)
2134
2144
2135 return l
2145 return l
2136
2146
2137 def deltaparent(self, rev):
2147 def deltaparent(self, rev):
2138 """return deltaparent of the given revision"""
2148 """return deltaparent of the given revision"""
2139 base = self.index[rev][3]
2149 base = self.index[rev][3]
2140 if base == rev:
2150 if base == rev:
2141 return nullrev
2151 return nullrev
2142 elif self.delta_config.general_delta:
2152 elif self.delta_config.general_delta:
2143 return base
2153 return base
2144 else:
2154 else:
2145 return rev - 1
2155 return rev - 1
2146
2156
2147 def issnapshot(self, rev):
2157 def issnapshot(self, rev):
2148 """tells whether rev is a snapshot"""
2158 """tells whether rev is a snapshot"""
2149 if not self.delta_config.sparse_revlog:
2159 if not self.delta_config.sparse_revlog:
2150 return self.deltaparent(rev) == nullrev
2160 return self.deltaparent(rev) == nullrev
2151 elif hasattr(self.index, 'issnapshot'):
2161 elif hasattr(self.index, 'issnapshot'):
2152 # directly assign the method to cache the testing and access
2162 # directly assign the method to cache the testing and access
2153 self.issnapshot = self.index.issnapshot
2163 self.issnapshot = self.index.issnapshot
2154 return self.issnapshot(rev)
2164 return self.issnapshot(rev)
2155 if rev == nullrev:
2165 if rev == nullrev:
2156 return True
2166 return True
2157 entry = self.index[rev]
2167 entry = self.index[rev]
2158 base = entry[3]
2168 base = entry[3]
2159 if base == rev:
2169 if base == rev:
2160 return True
2170 return True
2161 if base == nullrev:
2171 if base == nullrev:
2162 return True
2172 return True
2163 p1 = entry[5]
2173 p1 = entry[5]
2164 while self.length(p1) == 0:
2174 while self.length(p1) == 0:
2165 b = self.deltaparent(p1)
2175 b = self.deltaparent(p1)
2166 if b == p1:
2176 if b == p1:
2167 break
2177 break
2168 p1 = b
2178 p1 = b
2169 p2 = entry[6]
2179 p2 = entry[6]
2170 while self.length(p2) == 0:
2180 while self.length(p2) == 0:
2171 b = self.deltaparent(p2)
2181 b = self.deltaparent(p2)
2172 if b == p2:
2182 if b == p2:
2173 break
2183 break
2174 p2 = b
2184 p2 = b
2175 if base == p1 or base == p2:
2185 if base == p1 or base == p2:
2176 return False
2186 return False
2177 return self.issnapshot(base)
2187 return self.issnapshot(base)
2178
2188
2179 def snapshotdepth(self, rev):
2189 def snapshotdepth(self, rev):
2180 """number of snapshot in the chain before this one"""
2190 """number of snapshot in the chain before this one"""
2181 if not self.issnapshot(rev):
2191 if not self.issnapshot(rev):
2182 raise error.ProgrammingError(b'revision %d not a snapshot')
2192 raise error.ProgrammingError(b'revision %d not a snapshot')
2183 return len(self._deltachain(rev)[0]) - 1
2193 return len(self._deltachain(rev)[0]) - 1
2184
2194
2185 def revdiff(self, rev1, rev2):
2195 def revdiff(self, rev1, rev2):
2186 """return or calculate a delta between two revisions
2196 """return or calculate a delta between two revisions
2187
2197
2188 The delta calculated is in binary form and is intended to be written to
2198 The delta calculated is in binary form and is intended to be written to
2189 revlog data directly. So this function needs raw revision data.
2199 revlog data directly. So this function needs raw revision data.
2190 """
2200 """
2191 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2201 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2192 return bytes(self._chunk(rev2))
2202 return bytes(self._chunk(rev2))
2193
2203
2194 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2204 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2195
2205
2196 def revision(self, nodeorrev):
2206 def revision(self, nodeorrev):
2197 """return an uncompressed revision of a given node or revision
2207 """return an uncompressed revision of a given node or revision
2198 number.
2208 number.
2199 """
2209 """
2200 return self._revisiondata(nodeorrev)
2210 return self._revisiondata(nodeorrev)
2201
2211
2202 def sidedata(self, nodeorrev):
2212 def sidedata(self, nodeorrev):
2203 """a map of extra data related to the changeset but not part of the hash
2213 """a map of extra data related to the changeset but not part of the hash
2204
2214
2205 This function currently return a dictionary. However, more advanced
2215 This function currently return a dictionary. However, more advanced
2206 mapping object will likely be used in the future for a more
2216 mapping object will likely be used in the future for a more
2207 efficient/lazy code.
2217 efficient/lazy code.
2208 """
2218 """
2209 # deal with <nodeorrev> argument type
2219 # deal with <nodeorrev> argument type
2210 if isinstance(nodeorrev, int):
2220 if isinstance(nodeorrev, int):
2211 rev = nodeorrev
2221 rev = nodeorrev
2212 else:
2222 else:
2213 rev = self.rev(nodeorrev)
2223 rev = self.rev(nodeorrev)
2214 return self._sidedata(rev)
2224 return self._sidedata(rev)
2215
2225
2216 def _revisiondata(self, nodeorrev, raw=False):
2226 def _revisiondata(self, nodeorrev, raw=False):
2217 # deal with <nodeorrev> argument type
2227 # deal with <nodeorrev> argument type
2218 if isinstance(nodeorrev, int):
2228 if isinstance(nodeorrev, int):
2219 rev = nodeorrev
2229 rev = nodeorrev
2220 node = self.node(rev)
2230 node = self.node(rev)
2221 else:
2231 else:
2222 node = nodeorrev
2232 node = nodeorrev
2223 rev = None
2233 rev = None
2224
2234
2225 # fast path the special `nullid` rev
2235 # fast path the special `nullid` rev
2226 if node == self.nullid:
2236 if node == self.nullid:
2227 return b""
2237 return b""
2228
2238
2229 # ``rawtext`` is the text as stored inside the revlog. Might be the
2239 # ``rawtext`` is the text as stored inside the revlog. Might be the
2230 # revision or might need to be processed to retrieve the revision.
2240 # revision or might need to be processed to retrieve the revision.
2231 rev, rawtext, validated = self._rawtext(node, rev)
2241 rev, rawtext, validated = self._rawtext(node, rev)
2232
2242
2233 if raw and validated:
2243 if raw and validated:
2234 # if we don't want to process the raw text and that raw
2244 # if we don't want to process the raw text and that raw
2235 # text is cached, we can exit early.
2245 # text is cached, we can exit early.
2236 return rawtext
2246 return rawtext
2237 if rev is None:
2247 if rev is None:
2238 rev = self.rev(node)
2248 rev = self.rev(node)
2239 # the revlog's flag for this revision
2249 # the revlog's flag for this revision
2240 # (usually alter its state or content)
2250 # (usually alter its state or content)
2241 flags = self.flags(rev)
2251 flags = self.flags(rev)
2242
2252
2243 if validated and flags == REVIDX_DEFAULT_FLAGS:
2253 if validated and flags == REVIDX_DEFAULT_FLAGS:
2244 # no extra flags set, no flag processor runs, text = rawtext
2254 # no extra flags set, no flag processor runs, text = rawtext
2245 return rawtext
2255 return rawtext
2246
2256
2247 if raw:
2257 if raw:
2248 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2258 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2249 text = rawtext
2259 text = rawtext
2250 else:
2260 else:
2251 r = flagutil.processflagsread(self, rawtext, flags)
2261 r = flagutil.processflagsread(self, rawtext, flags)
2252 text, validatehash = r
2262 text, validatehash = r
2253 if validatehash:
2263 if validatehash:
2254 self.checkhash(text, node, rev=rev)
2264 self.checkhash(text, node, rev=rev)
2255 if not validated:
2265 if not validated:
2256 self._revisioncache = (node, rev, rawtext)
2266 self._revisioncache = (node, rev, rawtext)
2257
2267
2258 return text
2268 return text
2259
2269
2260 def _rawtext(self, node, rev):
2270 def _rawtext(self, node, rev):
2261 """return the possibly unvalidated rawtext for a revision
2271 """return the possibly unvalidated rawtext for a revision
2262
2272
2263 returns (rev, rawtext, validated)
2273 returns (rev, rawtext, validated)
2264 """
2274 """
2265
2275
2266 # revision in the cache (could be useful to apply delta)
2276 # revision in the cache (could be useful to apply delta)
2267 cachedrev = None
2277 cachedrev = None
2268 # An intermediate text to apply deltas to
2278 # An intermediate text to apply deltas to
2269 basetext = None
2279 basetext = None
2270
2280
2271 # Check if we have the entry in cache
2281 # Check if we have the entry in cache
2272 # The cache entry looks like (node, rev, rawtext)
2282 # The cache entry looks like (node, rev, rawtext)
2273 if self._revisioncache:
2283 if self._revisioncache:
2274 if self._revisioncache[0] == node:
2284 if self._revisioncache[0] == node:
2275 return (rev, self._revisioncache[2], True)
2285 return (rev, self._revisioncache[2], True)
2276 cachedrev = self._revisioncache[1]
2286 cachedrev = self._revisioncache[1]
2277
2287
2278 if rev is None:
2288 if rev is None:
2279 rev = self.rev(node)
2289 rev = self.rev(node)
2280
2290
2281 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2291 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2282 if stopped:
2292 if stopped:
2283 basetext = self._revisioncache[2]
2293 basetext = self._revisioncache[2]
2284
2294
2285 # drop cache to save memory, the caller is expected to
2295 # drop cache to save memory, the caller is expected to
2286 # update self._revisioncache after validating the text
2296 # update self._revisioncache after validating the text
2287 self._revisioncache = None
2297 self._revisioncache = None
2288
2298
2289 targetsize = None
2299 targetsize = None
2290 rawsize = self.index[rev][2]
2300 rawsize = self.index[rev][2]
2291 if 0 <= rawsize:
2301 if 0 <= rawsize:
2292 targetsize = 4 * rawsize
2302 targetsize = 4 * rawsize
2293
2303
2294 bins = self._chunks(chain, targetsize=targetsize)
2304 bins = self._chunks(chain, targetsize=targetsize)
2295 if basetext is None:
2305 if basetext is None:
2296 basetext = bytes(bins[0])
2306 basetext = bytes(bins[0])
2297 bins = bins[1:]
2307 bins = bins[1:]
2298
2308
2299 rawtext = mdiff.patches(basetext, bins)
2309 rawtext = mdiff.patches(basetext, bins)
2300 del basetext # let us have a chance to free memory early
2310 del basetext # let us have a chance to free memory early
2301 return (rev, rawtext, False)
2311 return (rev, rawtext, False)
2302
2312
2303 def _sidedata(self, rev):
2313 def _sidedata(self, rev):
2304 """Return the sidedata for a given revision number."""
2314 """Return the sidedata for a given revision number."""
2305 index_entry = self.index[rev]
2315 index_entry = self.index[rev]
2306 sidedata_offset = index_entry[8]
2316 sidedata_offset = index_entry[8]
2307 sidedata_size = index_entry[9]
2317 sidedata_size = index_entry[9]
2308
2318
2309 if self._inline:
2319 if self._inline:
2310 sidedata_offset += self.index.entry_size * (1 + rev)
2320 sidedata_offset += self.index.entry_size * (1 + rev)
2311 if sidedata_size == 0:
2321 if sidedata_size == 0:
2312 return {}
2322 return {}
2313
2323
2314 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2324 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2315 filename = self._sidedatafile
2325 filename = self._sidedatafile
2316 end = self._docket.sidedata_end
2326 end = self._docket.sidedata_end
2317 offset = sidedata_offset
2327 offset = sidedata_offset
2318 length = sidedata_size
2328 length = sidedata_size
2319 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2329 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2320 raise error.RevlogError(m)
2330 raise error.RevlogError(m)
2321
2331
2322 comp_segment = self._segmentfile_sidedata.read_chunk(
2332 comp_segment = self._segmentfile_sidedata.read_chunk(
2323 sidedata_offset, sidedata_size
2333 sidedata_offset, sidedata_size
2324 )
2334 )
2325
2335
2326 comp = self.index[rev][11]
2336 comp = self.index[rev][11]
2327 if comp == COMP_MODE_PLAIN:
2337 if comp == COMP_MODE_PLAIN:
2328 segment = comp_segment
2338 segment = comp_segment
2329 elif comp == COMP_MODE_DEFAULT:
2339 elif comp == COMP_MODE_DEFAULT:
2330 segment = self._decompressor(comp_segment)
2340 segment = self._decompressor(comp_segment)
2331 elif comp == COMP_MODE_INLINE:
2341 elif comp == COMP_MODE_INLINE:
2332 segment = self.decompress(comp_segment)
2342 segment = self.decompress(comp_segment)
2333 else:
2343 else:
2334 msg = b'unknown compression mode %d'
2344 msg = b'unknown compression mode %d'
2335 msg %= comp
2345 msg %= comp
2336 raise error.RevlogError(msg)
2346 raise error.RevlogError(msg)
2337
2347
2338 sidedata = sidedatautil.deserialize_sidedata(segment)
2348 sidedata = sidedatautil.deserialize_sidedata(segment)
2339 return sidedata
2349 return sidedata
2340
2350
2341 def rawdata(self, nodeorrev):
2351 def rawdata(self, nodeorrev):
2342 """return an uncompressed raw data of a given node or revision number."""
2352 """return an uncompressed raw data of a given node or revision number."""
2343 return self._revisiondata(nodeorrev, raw=True)
2353 return self._revisiondata(nodeorrev, raw=True)
2344
2354
2345 def hash(self, text, p1, p2):
2355 def hash(self, text, p1, p2):
2346 """Compute a node hash.
2356 """Compute a node hash.
2347
2357
2348 Available as a function so that subclasses can replace the hash
2358 Available as a function so that subclasses can replace the hash
2349 as needed.
2359 as needed.
2350 """
2360 """
2351 return storageutil.hashrevisionsha1(text, p1, p2)
2361 return storageutil.hashrevisionsha1(text, p1, p2)
2352
2362
2353 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2363 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2354 """Check node hash integrity.
2364 """Check node hash integrity.
2355
2365
2356 Available as a function so that subclasses can extend hash mismatch
2366 Available as a function so that subclasses can extend hash mismatch
2357 behaviors as needed.
2367 behaviors as needed.
2358 """
2368 """
2359 try:
2369 try:
2360 if p1 is None and p2 is None:
2370 if p1 is None and p2 is None:
2361 p1, p2 = self.parents(node)
2371 p1, p2 = self.parents(node)
2362 if node != self.hash(text, p1, p2):
2372 if node != self.hash(text, p1, p2):
2363 # Clear the revision cache on hash failure. The revision cache
2373 # Clear the revision cache on hash failure. The revision cache
2364 # only stores the raw revision and clearing the cache does have
2374 # only stores the raw revision and clearing the cache does have
2365 # the side-effect that we won't have a cache hit when the raw
2375 # the side-effect that we won't have a cache hit when the raw
2366 # revision data is accessed. But this case should be rare and
2376 # revision data is accessed. But this case should be rare and
2367 # it is extra work to teach the cache about the hash
2377 # it is extra work to teach the cache about the hash
2368 # verification state.
2378 # verification state.
2369 if self._revisioncache and self._revisioncache[0] == node:
2379 if self._revisioncache and self._revisioncache[0] == node:
2370 self._revisioncache = None
2380 self._revisioncache = None
2371
2381
2372 revornode = rev
2382 revornode = rev
2373 if revornode is None:
2383 if revornode is None:
2374 revornode = templatefilters.short(hex(node))
2384 revornode = templatefilters.short(hex(node))
2375 raise error.RevlogError(
2385 raise error.RevlogError(
2376 _(b"integrity check failed on %s:%s")
2386 _(b"integrity check failed on %s:%s")
2377 % (self.display_id, pycompat.bytestr(revornode))
2387 % (self.display_id, pycompat.bytestr(revornode))
2378 )
2388 )
2379 except error.RevlogError:
2389 except error.RevlogError:
2380 if self.feature_config.censorable and storageutil.iscensoredtext(
2390 if self.feature_config.censorable and storageutil.iscensoredtext(
2381 text
2391 text
2382 ):
2392 ):
2383 raise error.CensoredNodeError(self.display_id, node, text)
2393 raise error.CensoredNodeError(self.display_id, node, text)
2384 raise
2394 raise
2385
2395
2386 @property
2396 @property
2387 def _split_index_file(self):
2397 def _split_index_file(self):
2388 """the path where to expect the index of an ongoing splitting operation
2398 """the path where to expect the index of an ongoing splitting operation
2389
2399
2390 The file will only exist if a splitting operation is in progress, but
2400 The file will only exist if a splitting operation is in progress, but
2391 it is always expected at the same location."""
2401 it is always expected at the same location."""
2392 parts = self.radix.split(b'/')
2402 parts = self.radix.split(b'/')
2393 if len(parts) > 1:
2403 if len(parts) > 1:
2394 # adds a '-s' prefix to the ``data/` or `meta/` base
2404 # adds a '-s' prefix to the ``data/` or `meta/` base
2395 head = parts[0] + b'-s'
2405 head = parts[0] + b'-s'
2396 mids = parts[1:-1]
2406 mids = parts[1:-1]
2397 tail = parts[-1] + b'.i'
2407 tail = parts[-1] + b'.i'
2398 pieces = [head] + mids + [tail]
2408 pieces = [head] + mids + [tail]
2399 return b'/'.join(pieces)
2409 return b'/'.join(pieces)
2400 else:
2410 else:
2401 # the revlog is stored at the root of the store (changelog or
2411 # the revlog is stored at the root of the store (changelog or
2402 # manifest), no risk of collision.
2412 # manifest), no risk of collision.
2403 return self.radix + b'.i.s'
2413 return self.radix + b'.i.s'
2404
2414
2405 def _enforceinlinesize(self, tr, side_write=True):
2415 def _enforceinlinesize(self, tr, side_write=True):
2406 """Check if the revlog is too big for inline and convert if so.
2416 """Check if the revlog is too big for inline and convert if so.
2407
2417
2408 This should be called after revisions are added to the revlog. If the
2418 This should be called after revisions are added to the revlog. If the
2409 revlog has grown too large to be an inline revlog, it will convert it
2419 revlog has grown too large to be an inline revlog, it will convert it
2410 to use multiple index and data files.
2420 to use multiple index and data files.
2411 """
2421 """
2412 tiprev = len(self) - 1
2422 tiprev = len(self) - 1
2413 total_size = self.start(tiprev) + self.length(tiprev)
2423 total_size = self.start(tiprev) + self.length(tiprev)
2414 if not self._inline or total_size < _maxinline:
2424 if not self._inline or total_size < _maxinline:
2415 return
2425 return
2416
2426
2417 troffset = tr.findoffset(self._indexfile)
2427 troffset = tr.findoffset(self._indexfile)
2418 if troffset is None:
2428 if troffset is None:
2419 raise error.RevlogError(
2429 raise error.RevlogError(
2420 _(b"%s not found in the transaction") % self._indexfile
2430 _(b"%s not found in the transaction") % self._indexfile
2421 )
2431 )
2422 if troffset:
2432 if troffset:
2423 tr.addbackup(self._indexfile, for_offset=True)
2433 tr.addbackup(self._indexfile, for_offset=True)
2424 tr.add(self._datafile, 0)
2434 tr.add(self._datafile, 0)
2425
2435
2426 existing_handles = False
2436 existing_handles = False
2427 if self._writinghandles is not None:
2437 if self._writinghandles is not None:
2428 existing_handles = True
2438 existing_handles = True
2429 fp = self._writinghandles[0]
2439 fp = self._writinghandles[0]
2430 fp.flush()
2440 fp.flush()
2431 fp.close()
2441 fp.close()
2432 # We can't use the cached file handle after close(). So prevent
2442 # We can't use the cached file handle after close(). So prevent
2433 # its usage.
2443 # its usage.
2434 self._writinghandles = None
2444 self._writinghandles = None
2435 self._segmentfile.writing_handle = None
2445 self._segmentfile.writing_handle = None
2436 # No need to deal with sidedata writing handle as it is only
2446 # No need to deal with sidedata writing handle as it is only
2437 # relevant with revlog-v2 which is never inline, not reaching
2447 # relevant with revlog-v2 which is never inline, not reaching
2438 # this code
2448 # this code
2439 if side_write:
2449 if side_write:
2440 old_index_file_path = self._indexfile
2450 old_index_file_path = self._indexfile
2441 new_index_file_path = self._split_index_file
2451 new_index_file_path = self._split_index_file
2442 opener = self.opener
2452 opener = self.opener
2443 weak_self = weakref.ref(self)
2453 weak_self = weakref.ref(self)
2444
2454
2445 # the "split" index replace the real index when the transaction is finalized
2455 # the "split" index replace the real index when the transaction is finalized
2446 def finalize_callback(tr):
2456 def finalize_callback(tr):
2447 opener.rename(
2457 opener.rename(
2448 new_index_file_path,
2458 new_index_file_path,
2449 old_index_file_path,
2459 old_index_file_path,
2450 checkambig=True,
2460 checkambig=True,
2451 )
2461 )
2452 maybe_self = weak_self()
2462 maybe_self = weak_self()
2453 if maybe_self is not None:
2463 if maybe_self is not None:
2454 maybe_self._indexfile = old_index_file_path
2464 maybe_self._indexfile = old_index_file_path
2455
2465
2456 def abort_callback(tr):
2466 def abort_callback(tr):
2457 maybe_self = weak_self()
2467 maybe_self = weak_self()
2458 if maybe_self is not None:
2468 if maybe_self is not None:
2459 maybe_self._indexfile = old_index_file_path
2469 maybe_self._indexfile = old_index_file_path
2460
2470
2461 tr.registertmp(new_index_file_path)
2471 tr.registertmp(new_index_file_path)
2462 if self.target[1] is not None:
2472 if self.target[1] is not None:
2463 callback_id = b'000-revlog-split-%d-%s' % self.target
2473 callback_id = b'000-revlog-split-%d-%s' % self.target
2464 else:
2474 else:
2465 callback_id = b'000-revlog-split-%d' % self.target[0]
2475 callback_id = b'000-revlog-split-%d' % self.target[0]
2466 tr.addfinalize(callback_id, finalize_callback)
2476 tr.addfinalize(callback_id, finalize_callback)
2467 tr.addabort(callback_id, abort_callback)
2477 tr.addabort(callback_id, abort_callback)
2468
2478
2469 new_dfh = self._datafp(b'w+')
2479 new_dfh = self._datafp(b'w+')
2470 new_dfh.truncate(0) # drop any potentially existing data
2480 new_dfh.truncate(0) # drop any potentially existing data
2471 try:
2481 try:
2472 with self.reading():
2482 with self.reading():
2473 for r in self:
2483 for r in self:
2474 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2484 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2475 new_dfh.flush()
2485 new_dfh.flush()
2476
2486
2477 if side_write:
2487 if side_write:
2478 self._indexfile = new_index_file_path
2488 self._indexfile = new_index_file_path
2479 with self.__index_new_fp() as fp:
2489 with self.__index_new_fp() as fp:
2480 self._format_flags &= ~FLAG_INLINE_DATA
2490 self._format_flags &= ~FLAG_INLINE_DATA
2481 self._inline = False
2491 self._inline = False
2482 for i in self:
2492 for i in self:
2483 e = self.index.entry_binary(i)
2493 e = self.index.entry_binary(i)
2484 if i == 0 and self._docket is None:
2494 if i == 0 and self._docket is None:
2485 header = self._format_flags | self._format_version
2495 header = self._format_flags | self._format_version
2486 header = self.index.pack_header(header)
2496 header = self.index.pack_header(header)
2487 e = header + e
2497 e = header + e
2488 fp.write(e)
2498 fp.write(e)
2489 if self._docket is not None:
2499 if self._docket is not None:
2490 self._docket.index_end = fp.tell()
2500 self._docket.index_end = fp.tell()
2491
2501
2492 # If we don't use side-write, the temp file replace the real
2502 # If we don't use side-write, the temp file replace the real
2493 # index when we exit the context manager
2503 # index when we exit the context manager
2494
2504
2495 nodemaputil.setup_persistent_nodemap(tr, self)
2505 nodemaputil.setup_persistent_nodemap(tr, self)
2496 self._segmentfile = randomaccessfile.randomaccessfile(
2506 self._segmentfile = randomaccessfile.randomaccessfile(
2497 self.opener,
2507 self.opener,
2498 self._datafile,
2508 self._datafile,
2499 self.data_config.chunk_cache_size,
2509 self.data_config.chunk_cache_size,
2500 )
2510 )
2501
2511
2502 if existing_handles:
2512 if existing_handles:
2503 # switched from inline to conventional reopen the index
2513 # switched from inline to conventional reopen the index
2504 ifh = self.__index_write_fp()
2514 ifh = self.__index_write_fp()
2505 self._writinghandles = (ifh, new_dfh, None)
2515 self._writinghandles = (ifh, new_dfh, None)
2506 self._segmentfile.writing_handle = new_dfh
2516 self._segmentfile.writing_handle = new_dfh
2507 new_dfh = None
2517 new_dfh = None
2508 # No need to deal with sidedata writing handle as it is only
2518 # No need to deal with sidedata writing handle as it is only
2509 # relevant with revlog-v2 which is never inline, not reaching
2519 # relevant with revlog-v2 which is never inline, not reaching
2510 # this code
2520 # this code
2511 finally:
2521 finally:
2512 if new_dfh is not None:
2522 if new_dfh is not None:
2513 new_dfh.close()
2523 new_dfh.close()
2514
2524
2515 def _nodeduplicatecallback(self, transaction, node):
2525 def _nodeduplicatecallback(self, transaction, node):
2516 """called when trying to add a node already stored."""
2526 """called when trying to add a node already stored."""
2517
2527
2518 @contextlib.contextmanager
2528 @contextlib.contextmanager
2519 def reading(self):
2529 def reading(self):
2520 """Context manager that keeps data and sidedata files open for reading"""
2530 """Context manager that keeps data and sidedata files open for reading"""
2521 if len(self.index) == 0:
2531 if len(self.index) == 0:
2522 yield # nothing to be read
2532 yield # nothing to be read
2523 else:
2533 else:
2524 with self._segmentfile.reading():
2534 with self._segmentfile.reading():
2525 with self._segmentfile_sidedata.reading():
2535 with self._segmentfile_sidedata.reading():
2526 yield
2536 yield
2527
2537
2528 @contextlib.contextmanager
2538 @contextlib.contextmanager
2529 def _writing(self, transaction):
2539 def _writing(self, transaction):
2530 if self._trypending:
2540 if self._trypending:
2531 msg = b'try to write in a `trypending` revlog: %s'
2541 msg = b'try to write in a `trypending` revlog: %s'
2532 msg %= self.display_id
2542 msg %= self.display_id
2533 raise error.ProgrammingError(msg)
2543 raise error.ProgrammingError(msg)
2534 if self._writinghandles is not None:
2544 if self._writinghandles is not None:
2535 yield
2545 yield
2536 else:
2546 else:
2537 ifh = dfh = sdfh = None
2547 ifh = dfh = sdfh = None
2538 try:
2548 try:
2539 r = len(self)
2549 r = len(self)
2540 # opening the data file.
2550 # opening the data file.
2541 dsize = 0
2551 dsize = 0
2542 if r:
2552 if r:
2543 dsize = self.end(r - 1)
2553 dsize = self.end(r - 1)
2544 dfh = None
2554 dfh = None
2545 if not self._inline:
2555 if not self._inline:
2546 try:
2556 try:
2547 dfh = self._datafp(b"r+")
2557 dfh = self._datafp(b"r+")
2548 if self._docket is None:
2558 if self._docket is None:
2549 dfh.seek(0, os.SEEK_END)
2559 dfh.seek(0, os.SEEK_END)
2550 else:
2560 else:
2551 dfh.seek(self._docket.data_end, os.SEEK_SET)
2561 dfh.seek(self._docket.data_end, os.SEEK_SET)
2552 except FileNotFoundError:
2562 except FileNotFoundError:
2553 dfh = self._datafp(b"w+")
2563 dfh = self._datafp(b"w+")
2554 transaction.add(self._datafile, dsize)
2564 transaction.add(self._datafile, dsize)
2555 if self._sidedatafile is not None:
2565 if self._sidedatafile is not None:
2556 # revlog-v2 does not inline, help Pytype
2566 # revlog-v2 does not inline, help Pytype
2557 assert dfh is not None
2567 assert dfh is not None
2558 try:
2568 try:
2559 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2569 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2560 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2570 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2561 except FileNotFoundError:
2571 except FileNotFoundError:
2562 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2572 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2563 transaction.add(
2573 transaction.add(
2564 self._sidedatafile, self._docket.sidedata_end
2574 self._sidedatafile, self._docket.sidedata_end
2565 )
2575 )
2566
2576
2567 # opening the index file.
2577 # opening the index file.
2568 isize = r * self.index.entry_size
2578 isize = r * self.index.entry_size
2569 ifh = self.__index_write_fp()
2579 ifh = self.__index_write_fp()
2570 if self._inline:
2580 if self._inline:
2571 transaction.add(self._indexfile, dsize + isize)
2581 transaction.add(self._indexfile, dsize + isize)
2572 else:
2582 else:
2573 transaction.add(self._indexfile, isize)
2583 transaction.add(self._indexfile, isize)
2574 # exposing all file handle for writing.
2584 # exposing all file handle for writing.
2575 self._writinghandles = (ifh, dfh, sdfh)
2585 self._writinghandles = (ifh, dfh, sdfh)
2576 self._segmentfile.writing_handle = ifh if self._inline else dfh
2586 self._segmentfile.writing_handle = ifh if self._inline else dfh
2577 self._segmentfile_sidedata.writing_handle = sdfh
2587 self._segmentfile_sidedata.writing_handle = sdfh
2578 yield
2588 yield
2579 if self._docket is not None:
2589 if self._docket is not None:
2580 self._write_docket(transaction)
2590 self._write_docket(transaction)
2581 finally:
2591 finally:
2582 self._writinghandles = None
2592 self._writinghandles = None
2583 self._segmentfile.writing_handle = None
2593 self._segmentfile.writing_handle = None
2584 self._segmentfile_sidedata.writing_handle = None
2594 self._segmentfile_sidedata.writing_handle = None
2585 if dfh is not None:
2595 if dfh is not None:
2586 dfh.close()
2596 dfh.close()
2587 if sdfh is not None:
2597 if sdfh is not None:
2588 sdfh.close()
2598 sdfh.close()
2589 # closing the index file last to avoid exposing referent to
2599 # closing the index file last to avoid exposing referent to
2590 # potential unflushed data content.
2600 # potential unflushed data content.
2591 if ifh is not None:
2601 if ifh is not None:
2592 ifh.close()
2602 ifh.close()
2593
2603
2594 def _write_docket(self, transaction):
2604 def _write_docket(self, transaction):
2595 """write the current docket on disk
2605 """write the current docket on disk
2596
2606
2597 Exist as a method to help changelog to implement transaction logic
2607 Exist as a method to help changelog to implement transaction logic
2598
2608
2599 We could also imagine using the same transaction logic for all revlog
2609 We could also imagine using the same transaction logic for all revlog
2600 since docket are cheap."""
2610 since docket are cheap."""
2601 self._docket.write(transaction)
2611 self._docket.write(transaction)
2602
2612
2603 def addrevision(
2613 def addrevision(
2604 self,
2614 self,
2605 text,
2615 text,
2606 transaction,
2616 transaction,
2607 link,
2617 link,
2608 p1,
2618 p1,
2609 p2,
2619 p2,
2610 cachedelta=None,
2620 cachedelta=None,
2611 node=None,
2621 node=None,
2612 flags=REVIDX_DEFAULT_FLAGS,
2622 flags=REVIDX_DEFAULT_FLAGS,
2613 deltacomputer=None,
2623 deltacomputer=None,
2614 sidedata=None,
2624 sidedata=None,
2615 ):
2625 ):
2616 """add a revision to the log
2626 """add a revision to the log
2617
2627
2618 text - the revision data to add
2628 text - the revision data to add
2619 transaction - the transaction object used for rollback
2629 transaction - the transaction object used for rollback
2620 link - the linkrev data to add
2630 link - the linkrev data to add
2621 p1, p2 - the parent nodeids of the revision
2631 p1, p2 - the parent nodeids of the revision
2622 cachedelta - an optional precomputed delta
2632 cachedelta - an optional precomputed delta
2623 node - nodeid of revision; typically node is not specified, and it is
2633 node - nodeid of revision; typically node is not specified, and it is
2624 computed by default as hash(text, p1, p2), however subclasses might
2634 computed by default as hash(text, p1, p2), however subclasses might
2625 use different hashing method (and override checkhash() in such case)
2635 use different hashing method (and override checkhash() in such case)
2626 flags - the known flags to set on the revision
2636 flags - the known flags to set on the revision
2627 deltacomputer - an optional deltacomputer instance shared between
2637 deltacomputer - an optional deltacomputer instance shared between
2628 multiple calls
2638 multiple calls
2629 """
2639 """
2630 if link == nullrev:
2640 if link == nullrev:
2631 raise error.RevlogError(
2641 raise error.RevlogError(
2632 _(b"attempted to add linkrev -1 to %s") % self.display_id
2642 _(b"attempted to add linkrev -1 to %s") % self.display_id
2633 )
2643 )
2634
2644
2635 if sidedata is None:
2645 if sidedata is None:
2636 sidedata = {}
2646 sidedata = {}
2637 elif sidedata and not self.feature_config.has_side_data:
2647 elif sidedata and not self.feature_config.has_side_data:
2638 raise error.ProgrammingError(
2648 raise error.ProgrammingError(
2639 _(b"trying to add sidedata to a revlog who don't support them")
2649 _(b"trying to add sidedata to a revlog who don't support them")
2640 )
2650 )
2641
2651
2642 if flags:
2652 if flags:
2643 node = node or self.hash(text, p1, p2)
2653 node = node or self.hash(text, p1, p2)
2644
2654
2645 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2655 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2646
2656
2647 # If the flag processor modifies the revision data, ignore any provided
2657 # If the flag processor modifies the revision data, ignore any provided
2648 # cachedelta.
2658 # cachedelta.
2649 if rawtext != text:
2659 if rawtext != text:
2650 cachedelta = None
2660 cachedelta = None
2651
2661
2652 if len(rawtext) > _maxentrysize:
2662 if len(rawtext) > _maxentrysize:
2653 raise error.RevlogError(
2663 raise error.RevlogError(
2654 _(
2664 _(
2655 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2665 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2656 )
2666 )
2657 % (self.display_id, len(rawtext))
2667 % (self.display_id, len(rawtext))
2658 )
2668 )
2659
2669
2660 node = node or self.hash(rawtext, p1, p2)
2670 node = node or self.hash(rawtext, p1, p2)
2661 rev = self.index.get_rev(node)
2671 rev = self.index.get_rev(node)
2662 if rev is not None:
2672 if rev is not None:
2663 return rev
2673 return rev
2664
2674
2665 if validatehash:
2675 if validatehash:
2666 self.checkhash(rawtext, node, p1=p1, p2=p2)
2676 self.checkhash(rawtext, node, p1=p1, p2=p2)
2667
2677
2668 return self.addrawrevision(
2678 return self.addrawrevision(
2669 rawtext,
2679 rawtext,
2670 transaction,
2680 transaction,
2671 link,
2681 link,
2672 p1,
2682 p1,
2673 p2,
2683 p2,
2674 node,
2684 node,
2675 flags,
2685 flags,
2676 cachedelta=cachedelta,
2686 cachedelta=cachedelta,
2677 deltacomputer=deltacomputer,
2687 deltacomputer=deltacomputer,
2678 sidedata=sidedata,
2688 sidedata=sidedata,
2679 )
2689 )
2680
2690
2681 def addrawrevision(
2691 def addrawrevision(
2682 self,
2692 self,
2683 rawtext,
2693 rawtext,
2684 transaction,
2694 transaction,
2685 link,
2695 link,
2686 p1,
2696 p1,
2687 p2,
2697 p2,
2688 node,
2698 node,
2689 flags,
2699 flags,
2690 cachedelta=None,
2700 cachedelta=None,
2691 deltacomputer=None,
2701 deltacomputer=None,
2692 sidedata=None,
2702 sidedata=None,
2693 ):
2703 ):
2694 """add a raw revision with known flags, node and parents
2704 """add a raw revision with known flags, node and parents
2695 useful when reusing a revision not stored in this revlog (ex: received
2705 useful when reusing a revision not stored in this revlog (ex: received
2696 over wire, or read from an external bundle).
2706 over wire, or read from an external bundle).
2697 """
2707 """
2698 with self._writing(transaction):
2708 with self._writing(transaction):
2699 return self._addrevision(
2709 return self._addrevision(
2700 node,
2710 node,
2701 rawtext,
2711 rawtext,
2702 transaction,
2712 transaction,
2703 link,
2713 link,
2704 p1,
2714 p1,
2705 p2,
2715 p2,
2706 flags,
2716 flags,
2707 cachedelta,
2717 cachedelta,
2708 deltacomputer=deltacomputer,
2718 deltacomputer=deltacomputer,
2709 sidedata=sidedata,
2719 sidedata=sidedata,
2710 )
2720 )
2711
2721
2712 def compress(self, data):
2722 def compress(self, data):
2713 """Generate a possibly-compressed representation of data."""
2723 """Generate a possibly-compressed representation of data."""
2714 if not data:
2724 if not data:
2715 return b'', data
2725 return b'', data
2716
2726
2717 compressed = self._compressor.compress(data)
2727 compressed = self._compressor.compress(data)
2718
2728
2719 if compressed:
2729 if compressed:
2720 # The revlog compressor added the header in the returned data.
2730 # The revlog compressor added the header in the returned data.
2721 return b'', compressed
2731 return b'', compressed
2722
2732
2723 if data[0:1] == b'\0':
2733 if data[0:1] == b'\0':
2724 return b'', data
2734 return b'', data
2725 return b'u', data
2735 return b'u', data
2726
2736
2727 def decompress(self, data):
2737 def decompress(self, data):
2728 """Decompress a revlog chunk.
2738 """Decompress a revlog chunk.
2729
2739
2730 The chunk is expected to begin with a header identifying the
2740 The chunk is expected to begin with a header identifying the
2731 format type so it can be routed to an appropriate decompressor.
2741 format type so it can be routed to an appropriate decompressor.
2732 """
2742 """
2733 if not data:
2743 if not data:
2734 return data
2744 return data
2735
2745
2736 # Revlogs are read much more frequently than they are written and many
2746 # Revlogs are read much more frequently than they are written and many
2737 # chunks only take microseconds to decompress, so performance is
2747 # chunks only take microseconds to decompress, so performance is
2738 # important here.
2748 # important here.
2739 #
2749 #
2740 # We can make a few assumptions about revlogs:
2750 # We can make a few assumptions about revlogs:
2741 #
2751 #
2742 # 1) the majority of chunks will be compressed (as opposed to inline
2752 # 1) the majority of chunks will be compressed (as opposed to inline
2743 # raw data).
2753 # raw data).
2744 # 2) decompressing *any* data will likely by at least 10x slower than
2754 # 2) decompressing *any* data will likely by at least 10x slower than
2745 # returning raw inline data.
2755 # returning raw inline data.
2746 # 3) we want to prioritize common and officially supported compression
2756 # 3) we want to prioritize common and officially supported compression
2747 # engines
2757 # engines
2748 #
2758 #
2749 # It follows that we want to optimize for "decompress compressed data
2759 # It follows that we want to optimize for "decompress compressed data
2750 # when encoded with common and officially supported compression engines"
2760 # when encoded with common and officially supported compression engines"
2751 # case over "raw data" and "data encoded by less common or non-official
2761 # case over "raw data" and "data encoded by less common or non-official
2752 # compression engines." That is why we have the inline lookup first
2762 # compression engines." That is why we have the inline lookup first
2753 # followed by the compengines lookup.
2763 # followed by the compengines lookup.
2754 #
2764 #
2755 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2765 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2756 # compressed chunks. And this matters for changelog and manifest reads.
2766 # compressed chunks. And this matters for changelog and manifest reads.
2757 t = data[0:1]
2767 t = data[0:1]
2758
2768
2759 if t == b'x':
2769 if t == b'x':
2760 try:
2770 try:
2761 return _zlibdecompress(data)
2771 return _zlibdecompress(data)
2762 except zlib.error as e:
2772 except zlib.error as e:
2763 raise error.RevlogError(
2773 raise error.RevlogError(
2764 _(b'revlog decompress error: %s')
2774 _(b'revlog decompress error: %s')
2765 % stringutil.forcebytestr(e)
2775 % stringutil.forcebytestr(e)
2766 )
2776 )
2767 # '\0' is more common than 'u' so it goes first.
2777 # '\0' is more common than 'u' so it goes first.
2768 elif t == b'\0':
2778 elif t == b'\0':
2769 return data
2779 return data
2770 elif t == b'u':
2780 elif t == b'u':
2771 return util.buffer(data, 1)
2781 return util.buffer(data, 1)
2772
2782
2773 compressor = self._get_decompressor(t)
2783 compressor = self._get_decompressor(t)
2774
2784
2775 return compressor.decompress(data)
2785 return compressor.decompress(data)
2776
2786
2777 def _addrevision(
2787 def _addrevision(
2778 self,
2788 self,
2779 node,
2789 node,
2780 rawtext,
2790 rawtext,
2781 transaction,
2791 transaction,
2782 link,
2792 link,
2783 p1,
2793 p1,
2784 p2,
2794 p2,
2785 flags,
2795 flags,
2786 cachedelta,
2796 cachedelta,
2787 alwayscache=False,
2797 alwayscache=False,
2788 deltacomputer=None,
2798 deltacomputer=None,
2789 sidedata=None,
2799 sidedata=None,
2790 ):
2800 ):
2791 """internal function to add revisions to the log
2801 """internal function to add revisions to the log
2792
2802
2793 see addrevision for argument descriptions.
2803 see addrevision for argument descriptions.
2794
2804
2795 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2805 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2796
2806
2797 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2807 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2798 be used.
2808 be used.
2799
2809
2800 invariants:
2810 invariants:
2801 - rawtext is optional (can be None); if not set, cachedelta must be set.
2811 - rawtext is optional (can be None); if not set, cachedelta must be set.
2802 if both are set, they must correspond to each other.
2812 if both are set, they must correspond to each other.
2803 """
2813 """
2804 if node == self.nullid:
2814 if node == self.nullid:
2805 raise error.RevlogError(
2815 raise error.RevlogError(
2806 _(b"%s: attempt to add null revision") % self.display_id
2816 _(b"%s: attempt to add null revision") % self.display_id
2807 )
2817 )
2808 if (
2818 if (
2809 node == self.nodeconstants.wdirid
2819 node == self.nodeconstants.wdirid
2810 or node in self.nodeconstants.wdirfilenodeids
2820 or node in self.nodeconstants.wdirfilenodeids
2811 ):
2821 ):
2812 raise error.RevlogError(
2822 raise error.RevlogError(
2813 _(b"%s: attempt to add wdir revision") % self.display_id
2823 _(b"%s: attempt to add wdir revision") % self.display_id
2814 )
2824 )
2815 if self._writinghandles is None:
2825 if self._writinghandles is None:
2816 msg = b'adding revision outside `revlog._writing` context'
2826 msg = b'adding revision outside `revlog._writing` context'
2817 raise error.ProgrammingError(msg)
2827 raise error.ProgrammingError(msg)
2818
2828
2819 btext = [rawtext]
2829 btext = [rawtext]
2820
2830
2821 curr = len(self)
2831 curr = len(self)
2822 prev = curr - 1
2832 prev = curr - 1
2823
2833
2824 offset = self._get_data_offset(prev)
2834 offset = self._get_data_offset(prev)
2825
2835
2826 if self._concurrencychecker:
2836 if self._concurrencychecker:
2827 ifh, dfh, sdfh = self._writinghandles
2837 ifh, dfh, sdfh = self._writinghandles
2828 # XXX no checking for the sidedata file
2838 # XXX no checking for the sidedata file
2829 if self._inline:
2839 if self._inline:
2830 # offset is "as if" it were in the .d file, so we need to add on
2840 # offset is "as if" it were in the .d file, so we need to add on
2831 # the size of the entry metadata.
2841 # the size of the entry metadata.
2832 self._concurrencychecker(
2842 self._concurrencychecker(
2833 ifh, self._indexfile, offset + curr * self.index.entry_size
2843 ifh, self._indexfile, offset + curr * self.index.entry_size
2834 )
2844 )
2835 else:
2845 else:
2836 # Entries in the .i are a consistent size.
2846 # Entries in the .i are a consistent size.
2837 self._concurrencychecker(
2847 self._concurrencychecker(
2838 ifh, self._indexfile, curr * self.index.entry_size
2848 ifh, self._indexfile, curr * self.index.entry_size
2839 )
2849 )
2840 self._concurrencychecker(dfh, self._datafile, offset)
2850 self._concurrencychecker(dfh, self._datafile, offset)
2841
2851
2842 p1r, p2r = self.rev(p1), self.rev(p2)
2852 p1r, p2r = self.rev(p1), self.rev(p2)
2843
2853
2844 # full versions are inserted when the needed deltas
2854 # full versions are inserted when the needed deltas
2845 # become comparable to the uncompressed text
2855 # become comparable to the uncompressed text
2846 if rawtext is None:
2856 if rawtext is None:
2847 # need rawtext size, before changed by flag processors, which is
2857 # need rawtext size, before changed by flag processors, which is
2848 # the non-raw size. use revlog explicitly to avoid filelog's extra
2858 # the non-raw size. use revlog explicitly to avoid filelog's extra
2849 # logic that might remove metadata size.
2859 # logic that might remove metadata size.
2850 textlen = mdiff.patchedsize(
2860 textlen = mdiff.patchedsize(
2851 revlog.size(self, cachedelta[0]), cachedelta[1]
2861 revlog.size(self, cachedelta[0]), cachedelta[1]
2852 )
2862 )
2853 else:
2863 else:
2854 textlen = len(rawtext)
2864 textlen = len(rawtext)
2855
2865
2856 if deltacomputer is None:
2866 if deltacomputer is None:
2857 write_debug = None
2867 write_debug = None
2858 if self.delta_config.debug_delta:
2868 if self.delta_config.debug_delta:
2859 write_debug = transaction._report
2869 write_debug = transaction._report
2860 deltacomputer = deltautil.deltacomputer(
2870 deltacomputer = deltautil.deltacomputer(
2861 self, write_debug=write_debug
2871 self, write_debug=write_debug
2862 )
2872 )
2863
2873
2864 if cachedelta is not None and len(cachedelta) == 2:
2874 if cachedelta is not None and len(cachedelta) == 2:
2865 # If the cached delta has no information about how it should be
2875 # If the cached delta has no information about how it should be
2866 # reused, add the default reuse instruction according to the
2876 # reused, add the default reuse instruction according to the
2867 # revlog's configuration.
2877 # revlog's configuration.
2868 if (
2878 if (
2869 self.delta_config.general_delta
2879 self.delta_config.general_delta
2870 and self.delta_config.lazy_delta_base
2880 and self.delta_config.lazy_delta_base
2871 ):
2881 ):
2872 delta_base_reuse = DELTA_BASE_REUSE_TRY
2882 delta_base_reuse = DELTA_BASE_REUSE_TRY
2873 else:
2883 else:
2874 delta_base_reuse = DELTA_BASE_REUSE_NO
2884 delta_base_reuse = DELTA_BASE_REUSE_NO
2875 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2885 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2876
2886
2877 revinfo = revlogutils.revisioninfo(
2887 revinfo = revlogutils.revisioninfo(
2878 node,
2888 node,
2879 p1,
2889 p1,
2880 p2,
2890 p2,
2881 btext,
2891 btext,
2882 textlen,
2892 textlen,
2883 cachedelta,
2893 cachedelta,
2884 flags,
2894 flags,
2885 )
2895 )
2886
2896
2887 deltainfo = deltacomputer.finddeltainfo(revinfo)
2897 deltainfo = deltacomputer.finddeltainfo(revinfo)
2888
2898
2889 compression_mode = COMP_MODE_INLINE
2899 compression_mode = COMP_MODE_INLINE
2890 if self._docket is not None:
2900 if self._docket is not None:
2891 default_comp = self._docket.default_compression_header
2901 default_comp = self._docket.default_compression_header
2892 r = deltautil.delta_compression(default_comp, deltainfo)
2902 r = deltautil.delta_compression(default_comp, deltainfo)
2893 compression_mode, deltainfo = r
2903 compression_mode, deltainfo = r
2894
2904
2895 sidedata_compression_mode = COMP_MODE_INLINE
2905 sidedata_compression_mode = COMP_MODE_INLINE
2896 if sidedata and self.feature_config.has_side_data:
2906 if sidedata and self.feature_config.has_side_data:
2897 sidedata_compression_mode = COMP_MODE_PLAIN
2907 sidedata_compression_mode = COMP_MODE_PLAIN
2898 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2908 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2899 sidedata_offset = self._docket.sidedata_end
2909 sidedata_offset = self._docket.sidedata_end
2900 h, comp_sidedata = self.compress(serialized_sidedata)
2910 h, comp_sidedata = self.compress(serialized_sidedata)
2901 if (
2911 if (
2902 h != b'u'
2912 h != b'u'
2903 and comp_sidedata[0:1] != b'\0'
2913 and comp_sidedata[0:1] != b'\0'
2904 and len(comp_sidedata) < len(serialized_sidedata)
2914 and len(comp_sidedata) < len(serialized_sidedata)
2905 ):
2915 ):
2906 assert not h
2916 assert not h
2907 if (
2917 if (
2908 comp_sidedata[0:1]
2918 comp_sidedata[0:1]
2909 == self._docket.default_compression_header
2919 == self._docket.default_compression_header
2910 ):
2920 ):
2911 sidedata_compression_mode = COMP_MODE_DEFAULT
2921 sidedata_compression_mode = COMP_MODE_DEFAULT
2912 serialized_sidedata = comp_sidedata
2922 serialized_sidedata = comp_sidedata
2913 else:
2923 else:
2914 sidedata_compression_mode = COMP_MODE_INLINE
2924 sidedata_compression_mode = COMP_MODE_INLINE
2915 serialized_sidedata = comp_sidedata
2925 serialized_sidedata = comp_sidedata
2916 else:
2926 else:
2917 serialized_sidedata = b""
2927 serialized_sidedata = b""
2918 # Don't store the offset if the sidedata is empty, that way
2928 # Don't store the offset if the sidedata is empty, that way
2919 # we can easily detect empty sidedata and they will be no different
2929 # we can easily detect empty sidedata and they will be no different
2920 # than ones we manually add.
2930 # than ones we manually add.
2921 sidedata_offset = 0
2931 sidedata_offset = 0
2922
2932
2923 rank = RANK_UNKNOWN
2933 rank = RANK_UNKNOWN
2924 if self.feature_config.compute_rank:
2934 if self.feature_config.compute_rank:
2925 if (p1r, p2r) == (nullrev, nullrev):
2935 if (p1r, p2r) == (nullrev, nullrev):
2926 rank = 1
2936 rank = 1
2927 elif p1r != nullrev and p2r == nullrev:
2937 elif p1r != nullrev and p2r == nullrev:
2928 rank = 1 + self.fast_rank(p1r)
2938 rank = 1 + self.fast_rank(p1r)
2929 elif p1r == nullrev and p2r != nullrev:
2939 elif p1r == nullrev and p2r != nullrev:
2930 rank = 1 + self.fast_rank(p2r)
2940 rank = 1 + self.fast_rank(p2r)
2931 else: # merge node
2941 else: # merge node
2932 if rustdagop is not None and self.index.rust_ext_compat:
2942 if rustdagop is not None and self.index.rust_ext_compat:
2933 rank = rustdagop.rank(self.index, p1r, p2r)
2943 rank = rustdagop.rank(self.index, p1r, p2r)
2934 else:
2944 else:
2935 pmin, pmax = sorted((p1r, p2r))
2945 pmin, pmax = sorted((p1r, p2r))
2936 rank = 1 + self.fast_rank(pmax)
2946 rank = 1 + self.fast_rank(pmax)
2937 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2947 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2938
2948
2939 e = revlogutils.entry(
2949 e = revlogutils.entry(
2940 flags=flags,
2950 flags=flags,
2941 data_offset=offset,
2951 data_offset=offset,
2942 data_compressed_length=deltainfo.deltalen,
2952 data_compressed_length=deltainfo.deltalen,
2943 data_uncompressed_length=textlen,
2953 data_uncompressed_length=textlen,
2944 data_compression_mode=compression_mode,
2954 data_compression_mode=compression_mode,
2945 data_delta_base=deltainfo.base,
2955 data_delta_base=deltainfo.base,
2946 link_rev=link,
2956 link_rev=link,
2947 parent_rev_1=p1r,
2957 parent_rev_1=p1r,
2948 parent_rev_2=p2r,
2958 parent_rev_2=p2r,
2949 node_id=node,
2959 node_id=node,
2950 sidedata_offset=sidedata_offset,
2960 sidedata_offset=sidedata_offset,
2951 sidedata_compressed_length=len(serialized_sidedata),
2961 sidedata_compressed_length=len(serialized_sidedata),
2952 sidedata_compression_mode=sidedata_compression_mode,
2962 sidedata_compression_mode=sidedata_compression_mode,
2953 rank=rank,
2963 rank=rank,
2954 )
2964 )
2955
2965
2956 self.index.append(e)
2966 self.index.append(e)
2957 entry = self.index.entry_binary(curr)
2967 entry = self.index.entry_binary(curr)
2958 if curr == 0 and self._docket is None:
2968 if curr == 0 and self._docket is None:
2959 header = self._format_flags | self._format_version
2969 header = self._format_flags | self._format_version
2960 header = self.index.pack_header(header)
2970 header = self.index.pack_header(header)
2961 entry = header + entry
2971 entry = header + entry
2962 self._writeentry(
2972 self._writeentry(
2963 transaction,
2973 transaction,
2964 entry,
2974 entry,
2965 deltainfo.data,
2975 deltainfo.data,
2966 link,
2976 link,
2967 offset,
2977 offset,
2968 serialized_sidedata,
2978 serialized_sidedata,
2969 sidedata_offset,
2979 sidedata_offset,
2970 )
2980 )
2971
2981
2972 rawtext = btext[0]
2982 rawtext = btext[0]
2973
2983
2974 if alwayscache and rawtext is None:
2984 if alwayscache and rawtext is None:
2975 rawtext = deltacomputer.buildtext(revinfo)
2985 rawtext = deltacomputer.buildtext(revinfo)
2976
2986
2977 if type(rawtext) == bytes: # only accept immutable objects
2987 if type(rawtext) == bytes: # only accept immutable objects
2978 self._revisioncache = (node, curr, rawtext)
2988 self._revisioncache = (node, curr, rawtext)
2979 self._chainbasecache[curr] = deltainfo.chainbase
2989 self._chainbasecache[curr] = deltainfo.chainbase
2980 return curr
2990 return curr
2981
2991
2982 def _get_data_offset(self, prev):
2992 def _get_data_offset(self, prev):
2983 """Returns the current offset in the (in-transaction) data file.
2993 """Returns the current offset in the (in-transaction) data file.
2984 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2994 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2985 file to store that information: since sidedata can be rewritten to the
2995 file to store that information: since sidedata can be rewritten to the
2986 end of the data file within a transaction, you can have cases where, for
2996 end of the data file within a transaction, you can have cases where, for
2987 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2997 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2988 to `n - 1`'s sidedata being written after `n`'s data.
2998 to `n - 1`'s sidedata being written after `n`'s data.
2989
2999
2990 TODO cache this in a docket file before getting out of experimental."""
3000 TODO cache this in a docket file before getting out of experimental."""
2991 if self._docket is None:
3001 if self._docket is None:
2992 return self.end(prev)
3002 return self.end(prev)
2993 else:
3003 else:
2994 return self._docket.data_end
3004 return self._docket.data_end
2995
3005
2996 def _writeentry(
3006 def _writeentry(
2997 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3007 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2998 ):
3008 ):
2999 # Files opened in a+ mode have inconsistent behavior on various
3009 # Files opened in a+ mode have inconsistent behavior on various
3000 # platforms. Windows requires that a file positioning call be made
3010 # platforms. Windows requires that a file positioning call be made
3001 # when the file handle transitions between reads and writes. See
3011 # when the file handle transitions between reads and writes. See
3002 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3012 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3003 # platforms, Python or the platform itself can be buggy. Some versions
3013 # platforms, Python or the platform itself can be buggy. Some versions
3004 # of Solaris have been observed to not append at the end of the file
3014 # of Solaris have been observed to not append at the end of the file
3005 # if the file was seeked to before the end. See issue4943 for more.
3015 # if the file was seeked to before the end. See issue4943 for more.
3006 #
3016 #
3007 # We work around this issue by inserting a seek() before writing.
3017 # We work around this issue by inserting a seek() before writing.
3008 # Note: This is likely not necessary on Python 3. However, because
3018 # Note: This is likely not necessary on Python 3. However, because
3009 # the file handle is reused for reads and may be seeked there, we need
3019 # the file handle is reused for reads and may be seeked there, we need
3010 # to be careful before changing this.
3020 # to be careful before changing this.
3011 if self._writinghandles is None:
3021 if self._writinghandles is None:
3012 msg = b'adding revision outside `revlog._writing` context'
3022 msg = b'adding revision outside `revlog._writing` context'
3013 raise error.ProgrammingError(msg)
3023 raise error.ProgrammingError(msg)
3014 ifh, dfh, sdfh = self._writinghandles
3024 ifh, dfh, sdfh = self._writinghandles
3015 if self._docket is None:
3025 if self._docket is None:
3016 ifh.seek(0, os.SEEK_END)
3026 ifh.seek(0, os.SEEK_END)
3017 else:
3027 else:
3018 ifh.seek(self._docket.index_end, os.SEEK_SET)
3028 ifh.seek(self._docket.index_end, os.SEEK_SET)
3019 if dfh:
3029 if dfh:
3020 if self._docket is None:
3030 if self._docket is None:
3021 dfh.seek(0, os.SEEK_END)
3031 dfh.seek(0, os.SEEK_END)
3022 else:
3032 else:
3023 dfh.seek(self._docket.data_end, os.SEEK_SET)
3033 dfh.seek(self._docket.data_end, os.SEEK_SET)
3024 if sdfh:
3034 if sdfh:
3025 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3035 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3026
3036
3027 curr = len(self) - 1
3037 curr = len(self) - 1
3028 if not self._inline:
3038 if not self._inline:
3029 transaction.add(self._datafile, offset)
3039 transaction.add(self._datafile, offset)
3030 if self._sidedatafile:
3040 if self._sidedatafile:
3031 transaction.add(self._sidedatafile, sidedata_offset)
3041 transaction.add(self._sidedatafile, sidedata_offset)
3032 transaction.add(self._indexfile, curr * len(entry))
3042 transaction.add(self._indexfile, curr * len(entry))
3033 if data[0]:
3043 if data[0]:
3034 dfh.write(data[0])
3044 dfh.write(data[0])
3035 dfh.write(data[1])
3045 dfh.write(data[1])
3036 if sidedata:
3046 if sidedata:
3037 sdfh.write(sidedata)
3047 sdfh.write(sidedata)
3038 ifh.write(entry)
3048 ifh.write(entry)
3039 else:
3049 else:
3040 offset += curr * self.index.entry_size
3050 offset += curr * self.index.entry_size
3041 transaction.add(self._indexfile, offset)
3051 transaction.add(self._indexfile, offset)
3042 ifh.write(entry)
3052 ifh.write(entry)
3043 ifh.write(data[0])
3053 ifh.write(data[0])
3044 ifh.write(data[1])
3054 ifh.write(data[1])
3045 assert not sidedata
3055 assert not sidedata
3046 self._enforceinlinesize(transaction)
3056 self._enforceinlinesize(transaction)
3047 if self._docket is not None:
3057 if self._docket is not None:
3048 # revlog-v2 always has 3 writing handles, help Pytype
3058 # revlog-v2 always has 3 writing handles, help Pytype
3049 wh1 = self._writinghandles[0]
3059 wh1 = self._writinghandles[0]
3050 wh2 = self._writinghandles[1]
3060 wh2 = self._writinghandles[1]
3051 wh3 = self._writinghandles[2]
3061 wh3 = self._writinghandles[2]
3052 assert wh1 is not None
3062 assert wh1 is not None
3053 assert wh2 is not None
3063 assert wh2 is not None
3054 assert wh3 is not None
3064 assert wh3 is not None
3055 self._docket.index_end = wh1.tell()
3065 self._docket.index_end = wh1.tell()
3056 self._docket.data_end = wh2.tell()
3066 self._docket.data_end = wh2.tell()
3057 self._docket.sidedata_end = wh3.tell()
3067 self._docket.sidedata_end = wh3.tell()
3058
3068
3059 nodemaputil.setup_persistent_nodemap(transaction, self)
3069 nodemaputil.setup_persistent_nodemap(transaction, self)
3060
3070
3061 def addgroup(
3071 def addgroup(
3062 self,
3072 self,
3063 deltas,
3073 deltas,
3064 linkmapper,
3074 linkmapper,
3065 transaction,
3075 transaction,
3066 alwayscache=False,
3076 alwayscache=False,
3067 addrevisioncb=None,
3077 addrevisioncb=None,
3068 duplicaterevisioncb=None,
3078 duplicaterevisioncb=None,
3069 debug_info=None,
3079 debug_info=None,
3070 delta_base_reuse_policy=None,
3080 delta_base_reuse_policy=None,
3071 ):
3081 ):
3072 """
3082 """
3073 add a delta group
3083 add a delta group
3074
3084
3075 given a set of deltas, add them to the revision log. the
3085 given a set of deltas, add them to the revision log. the
3076 first delta is against its parent, which should be in our
3086 first delta is against its parent, which should be in our
3077 log, the rest are against the previous delta.
3087 log, the rest are against the previous delta.
3078
3088
3079 If ``addrevisioncb`` is defined, it will be called with arguments of
3089 If ``addrevisioncb`` is defined, it will be called with arguments of
3080 this revlog and the node that was added.
3090 this revlog and the node that was added.
3081 """
3091 """
3082
3092
3083 if self._adding_group:
3093 if self._adding_group:
3084 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3094 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3085
3095
3086 # read the default delta-base reuse policy from revlog config if the
3096 # read the default delta-base reuse policy from revlog config if the
3087 # group did not specify one.
3097 # group did not specify one.
3088 if delta_base_reuse_policy is None:
3098 if delta_base_reuse_policy is None:
3089 if (
3099 if (
3090 self.delta_config.general_delta
3100 self.delta_config.general_delta
3091 and self.delta_config.lazy_delta_base
3101 and self.delta_config.lazy_delta_base
3092 ):
3102 ):
3093 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3103 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3094 else:
3104 else:
3095 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3105 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3096
3106
3097 self._adding_group = True
3107 self._adding_group = True
3098 empty = True
3108 empty = True
3099 try:
3109 try:
3100 with self._writing(transaction):
3110 with self._writing(transaction):
3101 write_debug = None
3111 write_debug = None
3102 if self.delta_config.debug_delta:
3112 if self.delta_config.debug_delta:
3103 write_debug = transaction._report
3113 write_debug = transaction._report
3104 deltacomputer = deltautil.deltacomputer(
3114 deltacomputer = deltautil.deltacomputer(
3105 self,
3115 self,
3106 write_debug=write_debug,
3116 write_debug=write_debug,
3107 debug_info=debug_info,
3117 debug_info=debug_info,
3108 )
3118 )
3109 # loop through our set of deltas
3119 # loop through our set of deltas
3110 for data in deltas:
3120 for data in deltas:
3111 (
3121 (
3112 node,
3122 node,
3113 p1,
3123 p1,
3114 p2,
3124 p2,
3115 linknode,
3125 linknode,
3116 deltabase,
3126 deltabase,
3117 delta,
3127 delta,
3118 flags,
3128 flags,
3119 sidedata,
3129 sidedata,
3120 ) = data
3130 ) = data
3121 link = linkmapper(linknode)
3131 link = linkmapper(linknode)
3122 flags = flags or REVIDX_DEFAULT_FLAGS
3132 flags = flags or REVIDX_DEFAULT_FLAGS
3123
3133
3124 rev = self.index.get_rev(node)
3134 rev = self.index.get_rev(node)
3125 if rev is not None:
3135 if rev is not None:
3126 # this can happen if two branches make the same change
3136 # this can happen if two branches make the same change
3127 self._nodeduplicatecallback(transaction, rev)
3137 self._nodeduplicatecallback(transaction, rev)
3128 if duplicaterevisioncb:
3138 if duplicaterevisioncb:
3129 duplicaterevisioncb(self, rev)
3139 duplicaterevisioncb(self, rev)
3130 empty = False
3140 empty = False
3131 continue
3141 continue
3132
3142
3133 for p in (p1, p2):
3143 for p in (p1, p2):
3134 if not self.index.has_node(p):
3144 if not self.index.has_node(p):
3135 raise error.LookupError(
3145 raise error.LookupError(
3136 p, self.radix, _(b'unknown parent')
3146 p, self.radix, _(b'unknown parent')
3137 )
3147 )
3138
3148
3139 if not self.index.has_node(deltabase):
3149 if not self.index.has_node(deltabase):
3140 raise error.LookupError(
3150 raise error.LookupError(
3141 deltabase, self.display_id, _(b'unknown delta base')
3151 deltabase, self.display_id, _(b'unknown delta base')
3142 )
3152 )
3143
3153
3144 baserev = self.rev(deltabase)
3154 baserev = self.rev(deltabase)
3145
3155
3146 if baserev != nullrev and self.iscensored(baserev):
3156 if baserev != nullrev and self.iscensored(baserev):
3147 # if base is censored, delta must be full replacement in a
3157 # if base is censored, delta must be full replacement in a
3148 # single patch operation
3158 # single patch operation
3149 hlen = struct.calcsize(b">lll")
3159 hlen = struct.calcsize(b">lll")
3150 oldlen = self.rawsize(baserev)
3160 oldlen = self.rawsize(baserev)
3151 newlen = len(delta) - hlen
3161 newlen = len(delta) - hlen
3152 if delta[:hlen] != mdiff.replacediffheader(
3162 if delta[:hlen] != mdiff.replacediffheader(
3153 oldlen, newlen
3163 oldlen, newlen
3154 ):
3164 ):
3155 raise error.CensoredBaseError(
3165 raise error.CensoredBaseError(
3156 self.display_id, self.node(baserev)
3166 self.display_id, self.node(baserev)
3157 )
3167 )
3158
3168
3159 if not flags and self._peek_iscensored(baserev, delta):
3169 if not flags and self._peek_iscensored(baserev, delta):
3160 flags |= REVIDX_ISCENSORED
3170 flags |= REVIDX_ISCENSORED
3161
3171
3162 # We assume consumers of addrevisioncb will want to retrieve
3172 # We assume consumers of addrevisioncb will want to retrieve
3163 # the added revision, which will require a call to
3173 # the added revision, which will require a call to
3164 # revision(). revision() will fast path if there is a cache
3174 # revision(). revision() will fast path if there is a cache
3165 # hit. So, we tell _addrevision() to always cache in this case.
3175 # hit. So, we tell _addrevision() to always cache in this case.
3166 # We're only using addgroup() in the context of changegroup
3176 # We're only using addgroup() in the context of changegroup
3167 # generation so the revision data can always be handled as raw
3177 # generation so the revision data can always be handled as raw
3168 # by the flagprocessor.
3178 # by the flagprocessor.
3169 rev = self._addrevision(
3179 rev = self._addrevision(
3170 node,
3180 node,
3171 None,
3181 None,
3172 transaction,
3182 transaction,
3173 link,
3183 link,
3174 p1,
3184 p1,
3175 p2,
3185 p2,
3176 flags,
3186 flags,
3177 (baserev, delta, delta_base_reuse_policy),
3187 (baserev, delta, delta_base_reuse_policy),
3178 alwayscache=alwayscache,
3188 alwayscache=alwayscache,
3179 deltacomputer=deltacomputer,
3189 deltacomputer=deltacomputer,
3180 sidedata=sidedata,
3190 sidedata=sidedata,
3181 )
3191 )
3182
3192
3183 if addrevisioncb:
3193 if addrevisioncb:
3184 addrevisioncb(self, rev)
3194 addrevisioncb(self, rev)
3185 empty = False
3195 empty = False
3186 finally:
3196 finally:
3187 self._adding_group = False
3197 self._adding_group = False
3188 return not empty
3198 return not empty
3189
3199
3190 def iscensored(self, rev):
3200 def iscensored(self, rev):
3191 """Check if a file revision is censored."""
3201 """Check if a file revision is censored."""
3192 if not self.feature_config.censorable:
3202 if not self.feature_config.censorable:
3193 return False
3203 return False
3194
3204
3195 return self.flags(rev) & REVIDX_ISCENSORED
3205 return self.flags(rev) & REVIDX_ISCENSORED
3196
3206
3197 def _peek_iscensored(self, baserev, delta):
3207 def _peek_iscensored(self, baserev, delta):
3198 """Quickly check if a delta produces a censored revision."""
3208 """Quickly check if a delta produces a censored revision."""
3199 if not self.feature_config.censorable:
3209 if not self.feature_config.censorable:
3200 return False
3210 return False
3201
3211
3202 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3212 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3203
3213
3204 def getstrippoint(self, minlink):
3214 def getstrippoint(self, minlink):
3205 """find the minimum rev that must be stripped to strip the linkrev
3215 """find the minimum rev that must be stripped to strip the linkrev
3206
3216
3207 Returns a tuple containing the minimum rev and a set of all revs that
3217 Returns a tuple containing the minimum rev and a set of all revs that
3208 have linkrevs that will be broken by this strip.
3218 have linkrevs that will be broken by this strip.
3209 """
3219 """
3210 return storageutil.resolvestripinfo(
3220 return storageutil.resolvestripinfo(
3211 minlink,
3221 minlink,
3212 len(self) - 1,
3222 len(self) - 1,
3213 self.headrevs(),
3223 self.headrevs(),
3214 self.linkrev,
3224 self.linkrev,
3215 self.parentrevs,
3225 self.parentrevs,
3216 )
3226 )
3217
3227
3218 def strip(self, minlink, transaction):
3228 def strip(self, minlink, transaction):
3219 """truncate the revlog on the first revision with a linkrev >= minlink
3229 """truncate the revlog on the first revision with a linkrev >= minlink
3220
3230
3221 This function is called when we're stripping revision minlink and
3231 This function is called when we're stripping revision minlink and
3222 its descendants from the repository.
3232 its descendants from the repository.
3223
3233
3224 We have to remove all revisions with linkrev >= minlink, because
3234 We have to remove all revisions with linkrev >= minlink, because
3225 the equivalent changelog revisions will be renumbered after the
3235 the equivalent changelog revisions will be renumbered after the
3226 strip.
3236 strip.
3227
3237
3228 So we truncate the revlog on the first of these revisions, and
3238 So we truncate the revlog on the first of these revisions, and
3229 trust that the caller has saved the revisions that shouldn't be
3239 trust that the caller has saved the revisions that shouldn't be
3230 removed and that it'll re-add them after this truncation.
3240 removed and that it'll re-add them after this truncation.
3231 """
3241 """
3232 if len(self) == 0:
3242 if len(self) == 0:
3233 return
3243 return
3234
3244
3235 rev, _ = self.getstrippoint(minlink)
3245 rev, _ = self.getstrippoint(minlink)
3236 if rev == len(self):
3246 if rev == len(self):
3237 return
3247 return
3238
3248
3239 # first truncate the files on disk
3249 # first truncate the files on disk
3240 data_end = self.start(rev)
3250 data_end = self.start(rev)
3241 if not self._inline:
3251 if not self._inline:
3242 transaction.add(self._datafile, data_end)
3252 transaction.add(self._datafile, data_end)
3243 end = rev * self.index.entry_size
3253 end = rev * self.index.entry_size
3244 else:
3254 else:
3245 end = data_end + (rev * self.index.entry_size)
3255 end = data_end + (rev * self.index.entry_size)
3246
3256
3247 if self._sidedatafile:
3257 if self._sidedatafile:
3248 sidedata_end = self.sidedata_cut_off(rev)
3258 sidedata_end = self.sidedata_cut_off(rev)
3249 transaction.add(self._sidedatafile, sidedata_end)
3259 transaction.add(self._sidedatafile, sidedata_end)
3250
3260
3251 transaction.add(self._indexfile, end)
3261 transaction.add(self._indexfile, end)
3252 if self._docket is not None:
3262 if self._docket is not None:
3253 # XXX we could, leverage the docket while stripping. However it is
3263 # XXX we could, leverage the docket while stripping. However it is
3254 # not powerfull enough at the time of this comment
3264 # not powerfull enough at the time of this comment
3255 self._docket.index_end = end
3265 self._docket.index_end = end
3256 self._docket.data_end = data_end
3266 self._docket.data_end = data_end
3257 self._docket.sidedata_end = sidedata_end
3267 self._docket.sidedata_end = sidedata_end
3258 self._docket.write(transaction, stripping=True)
3268 self._docket.write(transaction, stripping=True)
3259
3269
3260 # then reset internal state in memory to forget those revisions
3270 # then reset internal state in memory to forget those revisions
3261 self._revisioncache = None
3271 self._revisioncache = None
3262 self._chaininfocache = util.lrucachedict(500)
3272 self._chaininfocache = util.lrucachedict(500)
3263 self._segmentfile.clear_cache()
3273 self._segmentfile.clear_cache()
3264 self._segmentfile_sidedata.clear_cache()
3274 self._segmentfile_sidedata.clear_cache()
3265
3275
3266 del self.index[rev:-1]
3276 del self.index[rev:-1]
3267
3277
3268 def checksize(self):
3278 def checksize(self):
3269 """Check size of index and data files
3279 """Check size of index and data files
3270
3280
3271 return a (dd, di) tuple.
3281 return a (dd, di) tuple.
3272 - dd: extra bytes for the "data" file
3282 - dd: extra bytes for the "data" file
3273 - di: extra bytes for the "index" file
3283 - di: extra bytes for the "index" file
3274
3284
3275 A healthy revlog will return (0, 0).
3285 A healthy revlog will return (0, 0).
3276 """
3286 """
3277 expected = 0
3287 expected = 0
3278 if len(self):
3288 if len(self):
3279 expected = max(0, self.end(len(self) - 1))
3289 expected = max(0, self.end(len(self) - 1))
3280
3290
3281 try:
3291 try:
3282 with self._datafp() as f:
3292 with self._datafp() as f:
3283 f.seek(0, io.SEEK_END)
3293 f.seek(0, io.SEEK_END)
3284 actual = f.tell()
3294 actual = f.tell()
3285 dd = actual - expected
3295 dd = actual - expected
3286 except FileNotFoundError:
3296 except FileNotFoundError:
3287 dd = 0
3297 dd = 0
3288
3298
3289 try:
3299 try:
3290 f = self.opener(self._indexfile)
3300 f = self.opener(self._indexfile)
3291 f.seek(0, io.SEEK_END)
3301 f.seek(0, io.SEEK_END)
3292 actual = f.tell()
3302 actual = f.tell()
3293 f.close()
3303 f.close()
3294 s = self.index.entry_size
3304 s = self.index.entry_size
3295 i = max(0, actual // s)
3305 i = max(0, actual // s)
3296 di = actual - (i * s)
3306 di = actual - (i * s)
3297 if self._inline:
3307 if self._inline:
3298 databytes = 0
3308 databytes = 0
3299 for r in self:
3309 for r in self:
3300 databytes += max(0, self.length(r))
3310 databytes += max(0, self.length(r))
3301 dd = 0
3311 dd = 0
3302 di = actual - len(self) * s - databytes
3312 di = actual - len(self) * s - databytes
3303 except FileNotFoundError:
3313 except FileNotFoundError:
3304 di = 0
3314 di = 0
3305
3315
3306 return (dd, di)
3316 return (dd, di)
3307
3317
3308 def files(self):
3318 def files(self):
3309 """return list of files that compose this revlog"""
3319 """return list of files that compose this revlog"""
3310 res = [self._indexfile]
3320 res = [self._indexfile]
3311 if self._docket_file is None:
3321 if self._docket_file is None:
3312 if not self._inline:
3322 if not self._inline:
3313 res.append(self._datafile)
3323 res.append(self._datafile)
3314 else:
3324 else:
3315 res.append(self._docket_file)
3325 res.append(self._docket_file)
3316 res.extend(self._docket.old_index_filepaths(include_empty=False))
3326 res.extend(self._docket.old_index_filepaths(include_empty=False))
3317 if self._docket.data_end:
3327 if self._docket.data_end:
3318 res.append(self._datafile)
3328 res.append(self._datafile)
3319 res.extend(self._docket.old_data_filepaths(include_empty=False))
3329 res.extend(self._docket.old_data_filepaths(include_empty=False))
3320 if self._docket.sidedata_end:
3330 if self._docket.sidedata_end:
3321 res.append(self._sidedatafile)
3331 res.append(self._sidedatafile)
3322 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3332 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3323 return res
3333 return res
3324
3334
3325 def emitrevisions(
3335 def emitrevisions(
3326 self,
3336 self,
3327 nodes,
3337 nodes,
3328 nodesorder=None,
3338 nodesorder=None,
3329 revisiondata=False,
3339 revisiondata=False,
3330 assumehaveparentrevisions=False,
3340 assumehaveparentrevisions=False,
3331 deltamode=repository.CG_DELTAMODE_STD,
3341 deltamode=repository.CG_DELTAMODE_STD,
3332 sidedata_helpers=None,
3342 sidedata_helpers=None,
3333 debug_info=None,
3343 debug_info=None,
3334 ):
3344 ):
3335 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3345 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3336 raise error.ProgrammingError(
3346 raise error.ProgrammingError(
3337 b'unhandled value for nodesorder: %s' % nodesorder
3347 b'unhandled value for nodesorder: %s' % nodesorder
3338 )
3348 )
3339
3349
3340 if nodesorder is None and not self.delta_config.general_delta:
3350 if nodesorder is None and not self.delta_config.general_delta:
3341 nodesorder = b'storage'
3351 nodesorder = b'storage'
3342
3352
3343 if (
3353 if (
3344 not self._storedeltachains
3354 not self._storedeltachains
3345 and deltamode != repository.CG_DELTAMODE_PREV
3355 and deltamode != repository.CG_DELTAMODE_PREV
3346 ):
3356 ):
3347 deltamode = repository.CG_DELTAMODE_FULL
3357 deltamode = repository.CG_DELTAMODE_FULL
3348
3358
3349 return storageutil.emitrevisions(
3359 return storageutil.emitrevisions(
3350 self,
3360 self,
3351 nodes,
3361 nodes,
3352 nodesorder,
3362 nodesorder,
3353 revlogrevisiondelta,
3363 revlogrevisiondelta,
3354 deltaparentfn=self.deltaparent,
3364 deltaparentfn=self.deltaparent,
3355 candeltafn=self._candelta,
3365 candeltafn=self._candelta,
3356 rawsizefn=self.rawsize,
3366 rawsizefn=self.rawsize,
3357 revdifffn=self.revdiff,
3367 revdifffn=self.revdiff,
3358 flagsfn=self.flags,
3368 flagsfn=self.flags,
3359 deltamode=deltamode,
3369 deltamode=deltamode,
3360 revisiondata=revisiondata,
3370 revisiondata=revisiondata,
3361 assumehaveparentrevisions=assumehaveparentrevisions,
3371 assumehaveparentrevisions=assumehaveparentrevisions,
3362 sidedata_helpers=sidedata_helpers,
3372 sidedata_helpers=sidedata_helpers,
3363 debug_info=debug_info,
3373 debug_info=debug_info,
3364 )
3374 )
3365
3375
3366 DELTAREUSEALWAYS = b'always'
3376 DELTAREUSEALWAYS = b'always'
3367 DELTAREUSESAMEREVS = b'samerevs'
3377 DELTAREUSESAMEREVS = b'samerevs'
3368 DELTAREUSENEVER = b'never'
3378 DELTAREUSENEVER = b'never'
3369
3379
3370 DELTAREUSEFULLADD = b'fulladd'
3380 DELTAREUSEFULLADD = b'fulladd'
3371
3381
3372 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3382 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3373
3383
3374 def clone(
3384 def clone(
3375 self,
3385 self,
3376 tr,
3386 tr,
3377 destrevlog,
3387 destrevlog,
3378 addrevisioncb=None,
3388 addrevisioncb=None,
3379 deltareuse=DELTAREUSESAMEREVS,
3389 deltareuse=DELTAREUSESAMEREVS,
3380 forcedeltabothparents=None,
3390 forcedeltabothparents=None,
3381 sidedata_helpers=None,
3391 sidedata_helpers=None,
3382 ):
3392 ):
3383 """Copy this revlog to another, possibly with format changes.
3393 """Copy this revlog to another, possibly with format changes.
3384
3394
3385 The destination revlog will contain the same revisions and nodes.
3395 The destination revlog will contain the same revisions and nodes.
3386 However, it may not be bit-for-bit identical due to e.g. delta encoding
3396 However, it may not be bit-for-bit identical due to e.g. delta encoding
3387 differences.
3397 differences.
3388
3398
3389 The ``deltareuse`` argument control how deltas from the existing revlog
3399 The ``deltareuse`` argument control how deltas from the existing revlog
3390 are preserved in the destination revlog. The argument can have the
3400 are preserved in the destination revlog. The argument can have the
3391 following values:
3401 following values:
3392
3402
3393 DELTAREUSEALWAYS
3403 DELTAREUSEALWAYS
3394 Deltas will always be reused (if possible), even if the destination
3404 Deltas will always be reused (if possible), even if the destination
3395 revlog would not select the same revisions for the delta. This is the
3405 revlog would not select the same revisions for the delta. This is the
3396 fastest mode of operation.
3406 fastest mode of operation.
3397 DELTAREUSESAMEREVS
3407 DELTAREUSESAMEREVS
3398 Deltas will be reused if the destination revlog would pick the same
3408 Deltas will be reused if the destination revlog would pick the same
3399 revisions for the delta. This mode strikes a balance between speed
3409 revisions for the delta. This mode strikes a balance between speed
3400 and optimization.
3410 and optimization.
3401 DELTAREUSENEVER
3411 DELTAREUSENEVER
3402 Deltas will never be reused. This is the slowest mode of execution.
3412 Deltas will never be reused. This is the slowest mode of execution.
3403 This mode can be used to recompute deltas (e.g. if the diff/delta
3413 This mode can be used to recompute deltas (e.g. if the diff/delta
3404 algorithm changes).
3414 algorithm changes).
3405 DELTAREUSEFULLADD
3415 DELTAREUSEFULLADD
3406 Revision will be re-added as if their were new content. This is
3416 Revision will be re-added as if their were new content. This is
3407 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3417 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3408 eg: large file detection and handling.
3418 eg: large file detection and handling.
3409
3419
3410 Delta computation can be slow, so the choice of delta reuse policy can
3420 Delta computation can be slow, so the choice of delta reuse policy can
3411 significantly affect run time.
3421 significantly affect run time.
3412
3422
3413 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3423 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3414 two extremes. Deltas will be reused if they are appropriate. But if the
3424 two extremes. Deltas will be reused if they are appropriate. But if the
3415 delta could choose a better revision, it will do so. This means if you
3425 delta could choose a better revision, it will do so. This means if you
3416 are converting a non-generaldelta revlog to a generaldelta revlog,
3426 are converting a non-generaldelta revlog to a generaldelta revlog,
3417 deltas will be recomputed if the delta's parent isn't a parent of the
3427 deltas will be recomputed if the delta's parent isn't a parent of the
3418 revision.
3428 revision.
3419
3429
3420 In addition to the delta policy, the ``forcedeltabothparents``
3430 In addition to the delta policy, the ``forcedeltabothparents``
3421 argument controls whether to force compute deltas against both parents
3431 argument controls whether to force compute deltas against both parents
3422 for merges. By default, the current default is used.
3432 for merges. By default, the current default is used.
3423
3433
3424 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3434 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3425 `sidedata_helpers`.
3435 `sidedata_helpers`.
3426 """
3436 """
3427 if deltareuse not in self.DELTAREUSEALL:
3437 if deltareuse not in self.DELTAREUSEALL:
3428 raise ValueError(
3438 raise ValueError(
3429 _(b'value for deltareuse invalid: %s') % deltareuse
3439 _(b'value for deltareuse invalid: %s') % deltareuse
3430 )
3440 )
3431
3441
3432 if len(destrevlog):
3442 if len(destrevlog):
3433 raise ValueError(_(b'destination revlog is not empty'))
3443 raise ValueError(_(b'destination revlog is not empty'))
3434
3444
3435 if getattr(self, 'filteredrevs', None):
3445 if getattr(self, 'filteredrevs', None):
3436 raise ValueError(_(b'source revlog has filtered revisions'))
3446 raise ValueError(_(b'source revlog has filtered revisions'))
3437 if getattr(destrevlog, 'filteredrevs', None):
3447 if getattr(destrevlog, 'filteredrevs', None):
3438 raise ValueError(_(b'destination revlog has filtered revisions'))
3448 raise ValueError(_(b'destination revlog has filtered revisions'))
3439
3449
3440 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3450 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3441 # if possible.
3451 # if possible.
3442 old_delta_config = destrevlog.delta_config
3452 old_delta_config = destrevlog.delta_config
3443 destrevlog.delta_config = destrevlog.delta_config.copy()
3453 destrevlog.delta_config = destrevlog.delta_config.copy()
3444
3454
3445 try:
3455 try:
3446 if deltareuse == self.DELTAREUSEALWAYS:
3456 if deltareuse == self.DELTAREUSEALWAYS:
3447 destrevlog.delta_config.lazy_delta_base = True
3457 destrevlog.delta_config.lazy_delta_base = True
3448 destrevlog.delta_config.lazy_delta = True
3458 destrevlog.delta_config.lazy_delta = True
3449 elif deltareuse == self.DELTAREUSESAMEREVS:
3459 elif deltareuse == self.DELTAREUSESAMEREVS:
3450 destrevlog.delta_config.lazy_delta_base = False
3460 destrevlog.delta_config.lazy_delta_base = False
3451 destrevlog.delta_config.lazy_delta = True
3461 destrevlog.delta_config.lazy_delta = True
3452 elif deltareuse == self.DELTAREUSENEVER:
3462 elif deltareuse == self.DELTAREUSENEVER:
3453 destrevlog.delta_config.lazy_delta_base = False
3463 destrevlog.delta_config.lazy_delta_base = False
3454 destrevlog.delta_config.lazy_delta = False
3464 destrevlog.delta_config.lazy_delta = False
3455
3465
3456 delta_both_parents = (
3466 delta_both_parents = (
3457 forcedeltabothparents or old_delta_config.delta_both_parents
3467 forcedeltabothparents or old_delta_config.delta_both_parents
3458 )
3468 )
3459 destrevlog.delta_config.delta_both_parents = delta_both_parents
3469 destrevlog.delta_config.delta_both_parents = delta_both_parents
3460
3470
3461 with self.reading(), destrevlog._writing(tr):
3471 with self.reading(), destrevlog._writing(tr):
3462 self._clone(
3472 self._clone(
3463 tr,
3473 tr,
3464 destrevlog,
3474 destrevlog,
3465 addrevisioncb,
3475 addrevisioncb,
3466 deltareuse,
3476 deltareuse,
3467 forcedeltabothparents,
3477 forcedeltabothparents,
3468 sidedata_helpers,
3478 sidedata_helpers,
3469 )
3479 )
3470
3480
3471 finally:
3481 finally:
3472 destrevlog.delta_config = old_delta_config
3482 destrevlog.delta_config = old_delta_config
3473
3483
3474 def _clone(
3484 def _clone(
3475 self,
3485 self,
3476 tr,
3486 tr,
3477 destrevlog,
3487 destrevlog,
3478 addrevisioncb,
3488 addrevisioncb,
3479 deltareuse,
3489 deltareuse,
3480 forcedeltabothparents,
3490 forcedeltabothparents,
3481 sidedata_helpers,
3491 sidedata_helpers,
3482 ):
3492 ):
3483 """perform the core duty of `revlog.clone` after parameter processing"""
3493 """perform the core duty of `revlog.clone` after parameter processing"""
3484 write_debug = None
3494 write_debug = None
3485 if self.delta_config.debug_delta:
3495 if self.delta_config.debug_delta:
3486 write_debug = tr._report
3496 write_debug = tr._report
3487 deltacomputer = deltautil.deltacomputer(
3497 deltacomputer = deltautil.deltacomputer(
3488 destrevlog,
3498 destrevlog,
3489 write_debug=write_debug,
3499 write_debug=write_debug,
3490 )
3500 )
3491 index = self.index
3501 index = self.index
3492 for rev in self:
3502 for rev in self:
3493 entry = index[rev]
3503 entry = index[rev]
3494
3504
3495 # Some classes override linkrev to take filtered revs into
3505 # Some classes override linkrev to take filtered revs into
3496 # account. Use raw entry from index.
3506 # account. Use raw entry from index.
3497 flags = entry[0] & 0xFFFF
3507 flags = entry[0] & 0xFFFF
3498 linkrev = entry[4]
3508 linkrev = entry[4]
3499 p1 = index[entry[5]][7]
3509 p1 = index[entry[5]][7]
3500 p2 = index[entry[6]][7]
3510 p2 = index[entry[6]][7]
3501 node = entry[7]
3511 node = entry[7]
3502
3512
3503 # (Possibly) reuse the delta from the revlog if allowed and
3513 # (Possibly) reuse the delta from the revlog if allowed and
3504 # the revlog chunk is a delta.
3514 # the revlog chunk is a delta.
3505 cachedelta = None
3515 cachedelta = None
3506 rawtext = None
3516 rawtext = None
3507 if deltareuse == self.DELTAREUSEFULLADD:
3517 if deltareuse == self.DELTAREUSEFULLADD:
3508 text = self._revisiondata(rev)
3518 text = self._revisiondata(rev)
3509 sidedata = self.sidedata(rev)
3519 sidedata = self.sidedata(rev)
3510
3520
3511 if sidedata_helpers is not None:
3521 if sidedata_helpers is not None:
3512 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3522 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3513 self, sidedata_helpers, sidedata, rev
3523 self, sidedata_helpers, sidedata, rev
3514 )
3524 )
3515 flags = flags | new_flags[0] & ~new_flags[1]
3525 flags = flags | new_flags[0] & ~new_flags[1]
3516
3526
3517 destrevlog.addrevision(
3527 destrevlog.addrevision(
3518 text,
3528 text,
3519 tr,
3529 tr,
3520 linkrev,
3530 linkrev,
3521 p1,
3531 p1,
3522 p2,
3532 p2,
3523 cachedelta=cachedelta,
3533 cachedelta=cachedelta,
3524 node=node,
3534 node=node,
3525 flags=flags,
3535 flags=flags,
3526 deltacomputer=deltacomputer,
3536 deltacomputer=deltacomputer,
3527 sidedata=sidedata,
3537 sidedata=sidedata,
3528 )
3538 )
3529 else:
3539 else:
3530 if destrevlog.delta_config.lazy_delta:
3540 if destrevlog.delta_config.lazy_delta:
3531 dp = self.deltaparent(rev)
3541 dp = self.deltaparent(rev)
3532 if dp != nullrev:
3542 if dp != nullrev:
3533 cachedelta = (dp, bytes(self._chunk(rev)))
3543 cachedelta = (dp, bytes(self._chunk(rev)))
3534
3544
3535 sidedata = None
3545 sidedata = None
3536 if not cachedelta:
3546 if not cachedelta:
3537 try:
3547 try:
3538 rawtext = self._revisiondata(rev)
3548 rawtext = self._revisiondata(rev)
3539 except error.CensoredNodeError as censored:
3549 except error.CensoredNodeError as censored:
3540 assert flags & REVIDX_ISCENSORED
3550 assert flags & REVIDX_ISCENSORED
3541 rawtext = censored.tombstone
3551 rawtext = censored.tombstone
3542 sidedata = self.sidedata(rev)
3552 sidedata = self.sidedata(rev)
3543 if sidedata is None:
3553 if sidedata is None:
3544 sidedata = self.sidedata(rev)
3554 sidedata = self.sidedata(rev)
3545
3555
3546 if sidedata_helpers is not None:
3556 if sidedata_helpers is not None:
3547 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3557 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3548 self, sidedata_helpers, sidedata, rev
3558 self, sidedata_helpers, sidedata, rev
3549 )
3559 )
3550 flags = flags | new_flags[0] & ~new_flags[1]
3560 flags = flags | new_flags[0] & ~new_flags[1]
3551
3561
3552 destrevlog._addrevision(
3562 destrevlog._addrevision(
3553 node,
3563 node,
3554 rawtext,
3564 rawtext,
3555 tr,
3565 tr,
3556 linkrev,
3566 linkrev,
3557 p1,
3567 p1,
3558 p2,
3568 p2,
3559 flags,
3569 flags,
3560 cachedelta,
3570 cachedelta,
3561 deltacomputer=deltacomputer,
3571 deltacomputer=deltacomputer,
3562 sidedata=sidedata,
3572 sidedata=sidedata,
3563 )
3573 )
3564
3574
3565 if addrevisioncb:
3575 if addrevisioncb:
3566 addrevisioncb(self, rev, node)
3576 addrevisioncb(self, rev, node)
3567
3577
3568 def censorrevision(self, tr, censornode, tombstone=b''):
3578 def censorrevision(self, tr, censornode, tombstone=b''):
3569 if self._format_version == REVLOGV0:
3579 if self._format_version == REVLOGV0:
3570 raise error.RevlogError(
3580 raise error.RevlogError(
3571 _(b'cannot censor with version %d revlogs')
3581 _(b'cannot censor with version %d revlogs')
3572 % self._format_version
3582 % self._format_version
3573 )
3583 )
3574 elif self._format_version == REVLOGV1:
3584 elif self._format_version == REVLOGV1:
3575 rewrite.v1_censor(self, tr, censornode, tombstone)
3585 rewrite.v1_censor(self, tr, censornode, tombstone)
3576 else:
3586 else:
3577 rewrite.v2_censor(self, tr, censornode, tombstone)
3587 rewrite.v2_censor(self, tr, censornode, tombstone)
3578
3588
3579 def verifyintegrity(self, state):
3589 def verifyintegrity(self, state):
3580 """Verifies the integrity of the revlog.
3590 """Verifies the integrity of the revlog.
3581
3591
3582 Yields ``revlogproblem`` instances describing problems that are
3592 Yields ``revlogproblem`` instances describing problems that are
3583 found.
3593 found.
3584 """
3594 """
3585 dd, di = self.checksize()
3595 dd, di = self.checksize()
3586 if dd:
3596 if dd:
3587 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3597 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3588 if di:
3598 if di:
3589 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3599 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3590
3600
3591 version = self._format_version
3601 version = self._format_version
3592
3602
3593 # The verifier tells us what version revlog we should be.
3603 # The verifier tells us what version revlog we should be.
3594 if version != state[b'expectedversion']:
3604 if version != state[b'expectedversion']:
3595 yield revlogproblem(
3605 yield revlogproblem(
3596 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3606 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3597 % (self.display_id, version, state[b'expectedversion'])
3607 % (self.display_id, version, state[b'expectedversion'])
3598 )
3608 )
3599
3609
3600 state[b'skipread'] = set()
3610 state[b'skipread'] = set()
3601 state[b'safe_renamed'] = set()
3611 state[b'safe_renamed'] = set()
3602
3612
3603 for rev in self:
3613 for rev in self:
3604 node = self.node(rev)
3614 node = self.node(rev)
3605
3615
3606 # Verify contents. 4 cases to care about:
3616 # Verify contents. 4 cases to care about:
3607 #
3617 #
3608 # common: the most common case
3618 # common: the most common case
3609 # rename: with a rename
3619 # rename: with a rename
3610 # meta: file content starts with b'\1\n', the metadata
3620 # meta: file content starts with b'\1\n', the metadata
3611 # header defined in filelog.py, but without a rename
3621 # header defined in filelog.py, but without a rename
3612 # ext: content stored externally
3622 # ext: content stored externally
3613 #
3623 #
3614 # More formally, their differences are shown below:
3624 # More formally, their differences are shown below:
3615 #
3625 #
3616 # | common | rename | meta | ext
3626 # | common | rename | meta | ext
3617 # -------------------------------------------------------
3627 # -------------------------------------------------------
3618 # flags() | 0 | 0 | 0 | not 0
3628 # flags() | 0 | 0 | 0 | not 0
3619 # renamed() | False | True | False | ?
3629 # renamed() | False | True | False | ?
3620 # rawtext[0:2]=='\1\n'| False | True | True | ?
3630 # rawtext[0:2]=='\1\n'| False | True | True | ?
3621 #
3631 #
3622 # "rawtext" means the raw text stored in revlog data, which
3632 # "rawtext" means the raw text stored in revlog data, which
3623 # could be retrieved by "rawdata(rev)". "text"
3633 # could be retrieved by "rawdata(rev)". "text"
3624 # mentioned below is "revision(rev)".
3634 # mentioned below is "revision(rev)".
3625 #
3635 #
3626 # There are 3 different lengths stored physically:
3636 # There are 3 different lengths stored physically:
3627 # 1. L1: rawsize, stored in revlog index
3637 # 1. L1: rawsize, stored in revlog index
3628 # 2. L2: len(rawtext), stored in revlog data
3638 # 2. L2: len(rawtext), stored in revlog data
3629 # 3. L3: len(text), stored in revlog data if flags==0, or
3639 # 3. L3: len(text), stored in revlog data if flags==0, or
3630 # possibly somewhere else if flags!=0
3640 # possibly somewhere else if flags!=0
3631 #
3641 #
3632 # L1 should be equal to L2. L3 could be different from them.
3642 # L1 should be equal to L2. L3 could be different from them.
3633 # "text" may or may not affect commit hash depending on flag
3643 # "text" may or may not affect commit hash depending on flag
3634 # processors (see flagutil.addflagprocessor).
3644 # processors (see flagutil.addflagprocessor).
3635 #
3645 #
3636 # | common | rename | meta | ext
3646 # | common | rename | meta | ext
3637 # -------------------------------------------------
3647 # -------------------------------------------------
3638 # rawsize() | L1 | L1 | L1 | L1
3648 # rawsize() | L1 | L1 | L1 | L1
3639 # size() | L1 | L2-LM | L1(*) | L1 (?)
3649 # size() | L1 | L2-LM | L1(*) | L1 (?)
3640 # len(rawtext) | L2 | L2 | L2 | L2
3650 # len(rawtext) | L2 | L2 | L2 | L2
3641 # len(text) | L2 | L2 | L2 | L3
3651 # len(text) | L2 | L2 | L2 | L3
3642 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3652 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3643 #
3653 #
3644 # LM: length of metadata, depending on rawtext
3654 # LM: length of metadata, depending on rawtext
3645 # (*): not ideal, see comment in filelog.size
3655 # (*): not ideal, see comment in filelog.size
3646 # (?): could be "- len(meta)" if the resolved content has
3656 # (?): could be "- len(meta)" if the resolved content has
3647 # rename metadata
3657 # rename metadata
3648 #
3658 #
3649 # Checks needed to be done:
3659 # Checks needed to be done:
3650 # 1. length check: L1 == L2, in all cases.
3660 # 1. length check: L1 == L2, in all cases.
3651 # 2. hash check: depending on flag processor, we may need to
3661 # 2. hash check: depending on flag processor, we may need to
3652 # use either "text" (external), or "rawtext" (in revlog).
3662 # use either "text" (external), or "rawtext" (in revlog).
3653
3663
3654 try:
3664 try:
3655 skipflags = state.get(b'skipflags', 0)
3665 skipflags = state.get(b'skipflags', 0)
3656 if skipflags:
3666 if skipflags:
3657 skipflags &= self.flags(rev)
3667 skipflags &= self.flags(rev)
3658
3668
3659 _verify_revision(self, skipflags, state, node)
3669 _verify_revision(self, skipflags, state, node)
3660
3670
3661 l1 = self.rawsize(rev)
3671 l1 = self.rawsize(rev)
3662 l2 = len(self.rawdata(node))
3672 l2 = len(self.rawdata(node))
3663
3673
3664 if l1 != l2:
3674 if l1 != l2:
3665 yield revlogproblem(
3675 yield revlogproblem(
3666 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3676 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3667 node=node,
3677 node=node,
3668 )
3678 )
3669
3679
3670 except error.CensoredNodeError:
3680 except error.CensoredNodeError:
3671 if state[b'erroroncensored']:
3681 if state[b'erroroncensored']:
3672 yield revlogproblem(
3682 yield revlogproblem(
3673 error=_(b'censored file data'), node=node
3683 error=_(b'censored file data'), node=node
3674 )
3684 )
3675 state[b'skipread'].add(node)
3685 state[b'skipread'].add(node)
3676 except Exception as e:
3686 except Exception as e:
3677 yield revlogproblem(
3687 yield revlogproblem(
3678 error=_(b'unpacking %s: %s')
3688 error=_(b'unpacking %s: %s')
3679 % (short(node), stringutil.forcebytestr(e)),
3689 % (short(node), stringutil.forcebytestr(e)),
3680 node=node,
3690 node=node,
3681 )
3691 )
3682 state[b'skipread'].add(node)
3692 state[b'skipread'].add(node)
3683
3693
3684 def storageinfo(
3694 def storageinfo(
3685 self,
3695 self,
3686 exclusivefiles=False,
3696 exclusivefiles=False,
3687 sharedfiles=False,
3697 sharedfiles=False,
3688 revisionscount=False,
3698 revisionscount=False,
3689 trackedsize=False,
3699 trackedsize=False,
3690 storedsize=False,
3700 storedsize=False,
3691 ):
3701 ):
3692 d = {}
3702 d = {}
3693
3703
3694 if exclusivefiles:
3704 if exclusivefiles:
3695 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3705 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3696 if not self._inline:
3706 if not self._inline:
3697 d[b'exclusivefiles'].append((self.opener, self._datafile))
3707 d[b'exclusivefiles'].append((self.opener, self._datafile))
3698
3708
3699 if sharedfiles:
3709 if sharedfiles:
3700 d[b'sharedfiles'] = []
3710 d[b'sharedfiles'] = []
3701
3711
3702 if revisionscount:
3712 if revisionscount:
3703 d[b'revisionscount'] = len(self)
3713 d[b'revisionscount'] = len(self)
3704
3714
3705 if trackedsize:
3715 if trackedsize:
3706 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3716 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3707
3717
3708 if storedsize:
3718 if storedsize:
3709 d[b'storedsize'] = sum(
3719 d[b'storedsize'] = sum(
3710 self.opener.stat(path).st_size for path in self.files()
3720 self.opener.stat(path).st_size for path in self.files()
3711 )
3721 )
3712
3722
3713 return d
3723 return d
3714
3724
3715 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3725 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3716 if not self.feature_config.has_side_data:
3726 if not self.feature_config.has_side_data:
3717 return
3727 return
3718 # revlog formats with sidedata support does not support inline
3728 # revlog formats with sidedata support does not support inline
3719 assert not self._inline
3729 assert not self._inline
3720 if not helpers[1] and not helpers[2]:
3730 if not helpers[1] and not helpers[2]:
3721 # Nothing to generate or remove
3731 # Nothing to generate or remove
3722 return
3732 return
3723
3733
3724 new_entries = []
3734 new_entries = []
3725 # append the new sidedata
3735 # append the new sidedata
3726 with self._writing(transaction):
3736 with self._writing(transaction):
3727 ifh, dfh, sdfh = self._writinghandles
3737 ifh, dfh, sdfh = self._writinghandles
3728 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3738 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3729
3739
3730 current_offset = sdfh.tell()
3740 current_offset = sdfh.tell()
3731 for rev in range(startrev, endrev + 1):
3741 for rev in range(startrev, endrev + 1):
3732 entry = self.index[rev]
3742 entry = self.index[rev]
3733 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3743 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3734 store=self,
3744 store=self,
3735 sidedata_helpers=helpers,
3745 sidedata_helpers=helpers,
3736 sidedata={},
3746 sidedata={},
3737 rev=rev,
3747 rev=rev,
3738 )
3748 )
3739
3749
3740 serialized_sidedata = sidedatautil.serialize_sidedata(
3750 serialized_sidedata = sidedatautil.serialize_sidedata(
3741 new_sidedata
3751 new_sidedata
3742 )
3752 )
3743
3753
3744 sidedata_compression_mode = COMP_MODE_INLINE
3754 sidedata_compression_mode = COMP_MODE_INLINE
3745 if serialized_sidedata and self.feature_config.has_side_data:
3755 if serialized_sidedata and self.feature_config.has_side_data:
3746 sidedata_compression_mode = COMP_MODE_PLAIN
3756 sidedata_compression_mode = COMP_MODE_PLAIN
3747 h, comp_sidedata = self.compress(serialized_sidedata)
3757 h, comp_sidedata = self.compress(serialized_sidedata)
3748 if (
3758 if (
3749 h != b'u'
3759 h != b'u'
3750 and comp_sidedata[0] != b'\0'
3760 and comp_sidedata[0] != b'\0'
3751 and len(comp_sidedata) < len(serialized_sidedata)
3761 and len(comp_sidedata) < len(serialized_sidedata)
3752 ):
3762 ):
3753 assert not h
3763 assert not h
3754 if (
3764 if (
3755 comp_sidedata[0]
3765 comp_sidedata[0]
3756 == self._docket.default_compression_header
3766 == self._docket.default_compression_header
3757 ):
3767 ):
3758 sidedata_compression_mode = COMP_MODE_DEFAULT
3768 sidedata_compression_mode = COMP_MODE_DEFAULT
3759 serialized_sidedata = comp_sidedata
3769 serialized_sidedata = comp_sidedata
3760 else:
3770 else:
3761 sidedata_compression_mode = COMP_MODE_INLINE
3771 sidedata_compression_mode = COMP_MODE_INLINE
3762 serialized_sidedata = comp_sidedata
3772 serialized_sidedata = comp_sidedata
3763 if entry[8] != 0 or entry[9] != 0:
3773 if entry[8] != 0 or entry[9] != 0:
3764 # rewriting entries that already have sidedata is not
3774 # rewriting entries that already have sidedata is not
3765 # supported yet, because it introduces garbage data in the
3775 # supported yet, because it introduces garbage data in the
3766 # revlog.
3776 # revlog.
3767 msg = b"rewriting existing sidedata is not supported yet"
3777 msg = b"rewriting existing sidedata is not supported yet"
3768 raise error.Abort(msg)
3778 raise error.Abort(msg)
3769
3779
3770 # Apply (potential) flags to add and to remove after running
3780 # Apply (potential) flags to add and to remove after running
3771 # the sidedata helpers
3781 # the sidedata helpers
3772 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3782 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3773 entry_update = (
3783 entry_update = (
3774 current_offset,
3784 current_offset,
3775 len(serialized_sidedata),
3785 len(serialized_sidedata),
3776 new_offset_flags,
3786 new_offset_flags,
3777 sidedata_compression_mode,
3787 sidedata_compression_mode,
3778 )
3788 )
3779
3789
3780 # the sidedata computation might have move the file cursors around
3790 # the sidedata computation might have move the file cursors around
3781 sdfh.seek(current_offset, os.SEEK_SET)
3791 sdfh.seek(current_offset, os.SEEK_SET)
3782 sdfh.write(serialized_sidedata)
3792 sdfh.write(serialized_sidedata)
3783 new_entries.append(entry_update)
3793 new_entries.append(entry_update)
3784 current_offset += len(serialized_sidedata)
3794 current_offset += len(serialized_sidedata)
3785 self._docket.sidedata_end = sdfh.tell()
3795 self._docket.sidedata_end = sdfh.tell()
3786
3796
3787 # rewrite the new index entries
3797 # rewrite the new index entries
3788 ifh.seek(startrev * self.index.entry_size)
3798 ifh.seek(startrev * self.index.entry_size)
3789 for i, e in enumerate(new_entries):
3799 for i, e in enumerate(new_entries):
3790 rev = startrev + i
3800 rev = startrev + i
3791 self.index.replace_sidedata_info(rev, *e)
3801 self.index.replace_sidedata_info(rev, *e)
3792 packed = self.index.entry_binary(rev)
3802 packed = self.index.entry_binary(rev)
3793 if rev == 0 and self._docket is None:
3803 if rev == 0 and self._docket is None:
3794 header = self._format_flags | self._format_version
3804 header = self._format_flags | self._format_version
3795 header = self.index.pack_header(header)
3805 header = self.index.pack_header(header)
3796 packed = header + packed
3806 packed = header + packed
3797 ifh.write(packed)
3807 ifh.write(packed)
@@ -1,1630 +1,1632 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import collections
11 import collections
12 import struct
12 import struct
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from ..node import nullrev
15 from ..node import nullrev
16 from ..i18n import _
16 from ..i18n import _
17
17
18 from .constants import (
18 from .constants import (
19 COMP_MODE_DEFAULT,
19 COMP_MODE_DEFAULT,
20 COMP_MODE_INLINE,
20 COMP_MODE_INLINE,
21 COMP_MODE_PLAIN,
21 COMP_MODE_PLAIN,
22 DELTA_BASE_REUSE_FORCE,
22 DELTA_BASE_REUSE_FORCE,
23 DELTA_BASE_REUSE_NO,
23 DELTA_BASE_REUSE_NO,
24 KIND_CHANGELOG,
24 KIND_CHANGELOG,
25 KIND_FILELOG,
25 KIND_FILELOG,
26 KIND_MANIFESTLOG,
26 KIND_MANIFESTLOG,
27 REVIDX_ISCENSORED,
27 REVIDX_ISCENSORED,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 )
29 )
30
30
31 from ..thirdparty import attr
31 from ..thirdparty import attr
32
32
33 from .. import (
33 from .. import (
34 error,
34 error,
35 mdiff,
35 mdiff,
36 util,
36 util,
37 )
37 )
38
38
39 from . import flagutil
39 from . import flagutil
40
40
41 # maximum <delta-chain-data>/<revision-text-length> ratio
41 # maximum <delta-chain-data>/<revision-text-length> ratio
42 LIMIT_DELTA2TEXT = 2
42 LIMIT_DELTA2TEXT = 2
43
43
44
44
45 class _testrevlog:
45 class _testrevlog:
46 """minimalist fake revlog to use in doctests"""
46 """minimalist fake revlog to use in doctests"""
47
47
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 """data is an list of revision payload boundaries"""
49 """data is an list of revision payload boundaries"""
50 from .. import revlog
50 from .. import revlog
51
51
52 self._data = data
52 self._data = data
53 self.data_config = revlog.DataConfig()
53 self.data_config = revlog.DataConfig()
54 self.data_config.sr_density_threshold = density
54 self.data_config.sr_density_threshold = density
55 self.data_config.sr_min_gap_size = mingap
55 self.data_config.sr_min_gap_size = mingap
56 self.delta_config = revlog.DeltaConfig()
56 self.delta_config = revlog.DeltaConfig()
57 self.feature_config = revlog.FeatureConfig()
57 self.feature_config = revlog.FeatureConfig()
58 self._snapshot = set(snapshot)
58 self._snapshot = set(snapshot)
59 self.index = None
59 self.index = None
60
60
61 def start(self, rev):
61 def start(self, rev):
62 if rev == nullrev:
62 if rev == nullrev:
63 return 0
63 return 0
64 if rev == 0:
64 if rev == 0:
65 return 0
65 return 0
66 return self._data[rev - 1]
66 return self._data[rev - 1]
67
67
68 def end(self, rev):
68 def end(self, rev):
69 if rev == nullrev:
69 if rev == nullrev:
70 return 0
70 return 0
71 return self._data[rev]
71 return self._data[rev]
72
72
73 def length(self, rev):
73 def length(self, rev):
74 return self.end(rev) - self.start(rev)
74 return self.end(rev) - self.start(rev)
75
75
76 def __len__(self):
76 def __len__(self):
77 return len(self._data)
77 return len(self._data)
78
78
79 def issnapshot(self, rev):
79 def issnapshot(self, rev):
80 if rev == nullrev:
80 if rev == nullrev:
81 return True
81 return True
82 return rev in self._snapshot
82 return rev in self._snapshot
83
83
84
84
85 def slicechunk(revlog, revs, targetsize=None):
85 def slicechunk(revlog, revs, targetsize=None):
86 """slice revs to reduce the amount of unrelated data to be read from disk.
86 """slice revs to reduce the amount of unrelated data to be read from disk.
87
87
88 ``revs`` is sliced into groups that should be read in one time.
88 ``revs`` is sliced into groups that should be read in one time.
89 Assume that revs are sorted.
89 Assume that revs are sorted.
90
90
91 The initial chunk is sliced until the overall density (payload/chunks-span
91 The initial chunk is sliced until the overall density (payload/chunks-span
92 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
92 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
93 than `revlog.data_config.sr_min_gap_size` is skipped.
93 than `revlog.data_config.sr_min_gap_size` is skipped.
94
94
95 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
95 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
96 For consistency with other slicing choice, this limit won't go lower than
96 For consistency with other slicing choice, this limit won't go lower than
97 `revlog.data_config.sr_min_gap_size`.
97 `revlog.data_config.sr_min_gap_size`.
98
98
99 If individual revisions chunk are larger than this limit, they will still
99 If individual revisions chunk are larger than this limit, they will still
100 be raised individually.
100 be raised individually.
101
101
102 >>> data = [
102 >>> data = [
103 ... 5, #00 (5)
103 ... 5, #00 (5)
104 ... 10, #01 (5)
104 ... 10, #01 (5)
105 ... 12, #02 (2)
105 ... 12, #02 (2)
106 ... 12, #03 (empty)
106 ... 12, #03 (empty)
107 ... 27, #04 (15)
107 ... 27, #04 (15)
108 ... 31, #05 (4)
108 ... 31, #05 (4)
109 ... 31, #06 (empty)
109 ... 31, #06 (empty)
110 ... 42, #07 (11)
110 ... 42, #07 (11)
111 ... 47, #08 (5)
111 ... 47, #08 (5)
112 ... 47, #09 (empty)
112 ... 47, #09 (empty)
113 ... 48, #10 (1)
113 ... 48, #10 (1)
114 ... 51, #11 (3)
114 ... 51, #11 (3)
115 ... 74, #12 (23)
115 ... 74, #12 (23)
116 ... 85, #13 (11)
116 ... 85, #13 (11)
117 ... 86, #14 (1)
117 ... 86, #14 (1)
118 ... 91, #15 (5)
118 ... 91, #15 (5)
119 ... ]
119 ... ]
120 >>> revlog = _testrevlog(data, snapshot=range(16))
120 >>> revlog = _testrevlog(data, snapshot=range(16))
121
121
122 >>> list(slicechunk(revlog, list(range(16))))
122 >>> list(slicechunk(revlog, list(range(16))))
123 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
123 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
124 >>> list(slicechunk(revlog, [0, 15]))
124 >>> list(slicechunk(revlog, [0, 15]))
125 [[0], [15]]
125 [[0], [15]]
126 >>> list(slicechunk(revlog, [0, 11, 15]))
126 >>> list(slicechunk(revlog, [0, 11, 15]))
127 [[0], [11], [15]]
127 [[0], [11], [15]]
128 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
128 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
129 [[0], [11, 13, 15]]
129 [[0], [11, 13, 15]]
130 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
130 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
131 [[1, 2], [5, 8, 10, 11], [14]]
131 [[1, 2], [5, 8, 10, 11], [14]]
132
132
133 Slicing with a maximum chunk size
133 Slicing with a maximum chunk size
134 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
134 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
135 [[0], [11], [13], [15]]
135 [[0], [11], [13], [15]]
136 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
136 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
137 [[0], [11], [13, 15]]
137 [[0], [11], [13, 15]]
138
138
139 Slicing involving nullrev
139 Slicing involving nullrev
140 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
140 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
141 [[-1, 0], [11], [13, 15]]
141 [[-1, 0], [11], [13, 15]]
142 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
142 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
143 [[-1], [13], [15]]
143 [[-1], [13], [15]]
144 """
144 """
145 if targetsize is not None:
145 if targetsize is not None:
146 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
146 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
147 # targetsize should not be specified when evaluating delta candidates:
147 # targetsize should not be specified when evaluating delta candidates:
148 # * targetsize is used to ensure we stay within specification when reading,
148 # * targetsize is used to ensure we stay within specification when reading,
149 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
149 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
150 if densityslicing is None:
150 if densityslicing is None:
151 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
151 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
152 for chunk in densityslicing(
152 for chunk in densityslicing(
153 revs,
153 revs,
154 revlog.data_config.sr_density_threshold,
154 revlog.data_config.sr_density_threshold,
155 revlog.data_config.sr_min_gap_size,
155 revlog.data_config.sr_min_gap_size,
156 ):
156 ):
157 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
157 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
158 yield subchunk
158 yield subchunk
159
159
160
160
161 def _slicechunktosize(revlog, revs, targetsize=None):
161 def _slicechunktosize(revlog, revs, targetsize=None):
162 """slice revs to match the target size
162 """slice revs to match the target size
163
163
164 This is intended to be used on chunk that density slicing selected by that
164 This is intended to be used on chunk that density slicing selected by that
165 are still too large compared to the read garantee of revlog. This might
165 are still too large compared to the read garantee of revlog. This might
166 happens when "minimal gap size" interrupted the slicing or when chain are
166 happens when "minimal gap size" interrupted the slicing or when chain are
167 built in a way that create large blocks next to each other.
167 built in a way that create large blocks next to each other.
168
168
169 >>> data = [
169 >>> data = [
170 ... 3, #0 (3)
170 ... 3, #0 (3)
171 ... 5, #1 (2)
171 ... 5, #1 (2)
172 ... 6, #2 (1)
172 ... 6, #2 (1)
173 ... 8, #3 (2)
173 ... 8, #3 (2)
174 ... 8, #4 (empty)
174 ... 8, #4 (empty)
175 ... 11, #5 (3)
175 ... 11, #5 (3)
176 ... 12, #6 (1)
176 ... 12, #6 (1)
177 ... 13, #7 (1)
177 ... 13, #7 (1)
178 ... 14, #8 (1)
178 ... 14, #8 (1)
179 ... ]
179 ... ]
180
180
181 == All snapshots cases ==
181 == All snapshots cases ==
182 >>> revlog = _testrevlog(data, snapshot=range(9))
182 >>> revlog = _testrevlog(data, snapshot=range(9))
183
183
184 Cases where chunk is already small enough
184 Cases where chunk is already small enough
185 >>> list(_slicechunktosize(revlog, [0], 3))
185 >>> list(_slicechunktosize(revlog, [0], 3))
186 [[0]]
186 [[0]]
187 >>> list(_slicechunktosize(revlog, [6, 7], 3))
187 >>> list(_slicechunktosize(revlog, [6, 7], 3))
188 [[6, 7]]
188 [[6, 7]]
189 >>> list(_slicechunktosize(revlog, [0], None))
189 >>> list(_slicechunktosize(revlog, [0], None))
190 [[0]]
190 [[0]]
191 >>> list(_slicechunktosize(revlog, [6, 7], None))
191 >>> list(_slicechunktosize(revlog, [6, 7], None))
192 [[6, 7]]
192 [[6, 7]]
193
193
194 cases where we need actual slicing
194 cases where we need actual slicing
195 >>> list(_slicechunktosize(revlog, [0, 1], 3))
195 >>> list(_slicechunktosize(revlog, [0, 1], 3))
196 [[0], [1]]
196 [[0], [1]]
197 >>> list(_slicechunktosize(revlog, [1, 3], 3))
197 >>> list(_slicechunktosize(revlog, [1, 3], 3))
198 [[1], [3]]
198 [[1], [3]]
199 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
199 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
200 [[1, 2], [3]]
200 [[1, 2], [3]]
201 >>> list(_slicechunktosize(revlog, [3, 5], 3))
201 >>> list(_slicechunktosize(revlog, [3, 5], 3))
202 [[3], [5]]
202 [[3], [5]]
203 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
203 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
204 [[3], [5]]
204 [[3], [5]]
205 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
205 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
206 [[5], [6, 7, 8]]
206 [[5], [6, 7, 8]]
207 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
207 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
208 [[0], [1, 2], [3], [5], [6, 7, 8]]
208 [[0], [1, 2], [3], [5], [6, 7, 8]]
209
209
210 Case with too large individual chunk (must return valid chunk)
210 Case with too large individual chunk (must return valid chunk)
211 >>> list(_slicechunktosize(revlog, [0, 1], 2))
211 >>> list(_slicechunktosize(revlog, [0, 1], 2))
212 [[0], [1]]
212 [[0], [1]]
213 >>> list(_slicechunktosize(revlog, [1, 3], 1))
213 >>> list(_slicechunktosize(revlog, [1, 3], 1))
214 [[1], [3]]
214 [[1], [3]]
215 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
215 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
216 [[3], [5]]
216 [[3], [5]]
217
217
218 == No Snapshot cases ==
218 == No Snapshot cases ==
219 >>> revlog = _testrevlog(data)
219 >>> revlog = _testrevlog(data)
220
220
221 Cases where chunk is already small enough
221 Cases where chunk is already small enough
222 >>> list(_slicechunktosize(revlog, [0], 3))
222 >>> list(_slicechunktosize(revlog, [0], 3))
223 [[0]]
223 [[0]]
224 >>> list(_slicechunktosize(revlog, [6, 7], 3))
224 >>> list(_slicechunktosize(revlog, [6, 7], 3))
225 [[6, 7]]
225 [[6, 7]]
226 >>> list(_slicechunktosize(revlog, [0], None))
226 >>> list(_slicechunktosize(revlog, [0], None))
227 [[0]]
227 [[0]]
228 >>> list(_slicechunktosize(revlog, [6, 7], None))
228 >>> list(_slicechunktosize(revlog, [6, 7], None))
229 [[6, 7]]
229 [[6, 7]]
230
230
231 cases where we need actual slicing
231 cases where we need actual slicing
232 >>> list(_slicechunktosize(revlog, [0, 1], 3))
232 >>> list(_slicechunktosize(revlog, [0, 1], 3))
233 [[0], [1]]
233 [[0], [1]]
234 >>> list(_slicechunktosize(revlog, [1, 3], 3))
234 >>> list(_slicechunktosize(revlog, [1, 3], 3))
235 [[1], [3]]
235 [[1], [3]]
236 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
236 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
237 [[1], [2, 3]]
237 [[1], [2, 3]]
238 >>> list(_slicechunktosize(revlog, [3, 5], 3))
238 >>> list(_slicechunktosize(revlog, [3, 5], 3))
239 [[3], [5]]
239 [[3], [5]]
240 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
240 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
241 [[3], [4, 5]]
241 [[3], [4, 5]]
242 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
242 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
243 [[5], [6, 7, 8]]
243 [[5], [6, 7, 8]]
244 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
244 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
245 [[0], [1, 2], [3], [5], [6, 7, 8]]
245 [[0], [1, 2], [3], [5], [6, 7, 8]]
246
246
247 Case with too large individual chunk (must return valid chunk)
247 Case with too large individual chunk (must return valid chunk)
248 >>> list(_slicechunktosize(revlog, [0, 1], 2))
248 >>> list(_slicechunktosize(revlog, [0, 1], 2))
249 [[0], [1]]
249 [[0], [1]]
250 >>> list(_slicechunktosize(revlog, [1, 3], 1))
250 >>> list(_slicechunktosize(revlog, [1, 3], 1))
251 [[1], [3]]
251 [[1], [3]]
252 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
252 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
253 [[3], [5]]
253 [[3], [5]]
254
254
255 == mixed case ==
255 == mixed case ==
256 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
256 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
257 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
257 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
258 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
258 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
259 """
259 """
260 assert targetsize is None or 0 <= targetsize
260 assert targetsize is None or 0 <= targetsize
261 startdata = revlog.start(revs[0])
261 startdata = revlog.start(revs[0])
262 enddata = revlog.end(revs[-1])
262 enddata = revlog.end(revs[-1])
263 fullspan = enddata - startdata
263 fullspan = enddata - startdata
264 if targetsize is None or fullspan <= targetsize:
264 if targetsize is None or fullspan <= targetsize:
265 yield revs
265 yield revs
266 return
266 return
267
267
268 startrevidx = 0
268 startrevidx = 0
269 endrevidx = 1
269 endrevidx = 1
270 iterrevs = enumerate(revs)
270 iterrevs = enumerate(revs)
271 next(iterrevs) # skip first rev.
271 next(iterrevs) # skip first rev.
272 # first step: get snapshots out of the way
272 # first step: get snapshots out of the way
273 for idx, r in iterrevs:
273 for idx, r in iterrevs:
274 span = revlog.end(r) - startdata
274 span = revlog.end(r) - startdata
275 snapshot = revlog.issnapshot(r)
275 snapshot = revlog.issnapshot(r)
276 if span <= targetsize and snapshot:
276 if span <= targetsize and snapshot:
277 endrevidx = idx + 1
277 endrevidx = idx + 1
278 else:
278 else:
279 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
279 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
280 if chunk:
280 if chunk:
281 yield chunk
281 yield chunk
282 startrevidx = idx
282 startrevidx = idx
283 startdata = revlog.start(r)
283 startdata = revlog.start(r)
284 endrevidx = idx + 1
284 endrevidx = idx + 1
285 if not snapshot:
285 if not snapshot:
286 break
286 break
287
287
288 # for the others, we use binary slicing to quickly converge toward valid
288 # for the others, we use binary slicing to quickly converge toward valid
289 # chunks (otherwise, we might end up looking for start/end of many
289 # chunks (otherwise, we might end up looking for start/end of many
290 # revisions). This logic is not looking for the perfect slicing point, it
290 # revisions). This logic is not looking for the perfect slicing point, it
291 # focuses on quickly converging toward valid chunks.
291 # focuses on quickly converging toward valid chunks.
292 nbitem = len(revs)
292 nbitem = len(revs)
293 while (enddata - startdata) > targetsize:
293 while (enddata - startdata) > targetsize:
294 endrevidx = nbitem
294 endrevidx = nbitem
295 if nbitem - startrevidx <= 1:
295 if nbitem - startrevidx <= 1:
296 break # protect against individual chunk larger than limit
296 break # protect against individual chunk larger than limit
297 localenddata = revlog.end(revs[endrevidx - 1])
297 localenddata = revlog.end(revs[endrevidx - 1])
298 span = localenddata - startdata
298 span = localenddata - startdata
299 while span > targetsize:
299 while span > targetsize:
300 if endrevidx - startrevidx <= 1:
300 if endrevidx - startrevidx <= 1:
301 break # protect against individual chunk larger than limit
301 break # protect against individual chunk larger than limit
302 endrevidx -= (endrevidx - startrevidx) // 2
302 endrevidx -= (endrevidx - startrevidx) // 2
303 localenddata = revlog.end(revs[endrevidx - 1])
303 localenddata = revlog.end(revs[endrevidx - 1])
304 span = localenddata - startdata
304 span = localenddata - startdata
305 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
305 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
306 if chunk:
306 if chunk:
307 yield chunk
307 yield chunk
308 startrevidx = endrevidx
308 startrevidx = endrevidx
309 startdata = revlog.start(revs[startrevidx])
309 startdata = revlog.start(revs[startrevidx])
310
310
311 chunk = _trimchunk(revlog, revs, startrevidx)
311 chunk = _trimchunk(revlog, revs, startrevidx)
312 if chunk:
312 if chunk:
313 yield chunk
313 yield chunk
314
314
315
315
316 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
316 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
317 """slice revs to reduce the amount of unrelated data to be read from disk.
317 """slice revs to reduce the amount of unrelated data to be read from disk.
318
318
319 ``revs`` is sliced into groups that should be read in one time.
319 ``revs`` is sliced into groups that should be read in one time.
320 Assume that revs are sorted.
320 Assume that revs are sorted.
321
321
322 The initial chunk is sliced until the overall density (payload/chunks-span
322 The initial chunk is sliced until the overall density (payload/chunks-span
323 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
323 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
324 skipped.
324 skipped.
325
325
326 >>> revlog = _testrevlog([
326 >>> revlog = _testrevlog([
327 ... 5, #00 (5)
327 ... 5, #00 (5)
328 ... 10, #01 (5)
328 ... 10, #01 (5)
329 ... 12, #02 (2)
329 ... 12, #02 (2)
330 ... 12, #03 (empty)
330 ... 12, #03 (empty)
331 ... 27, #04 (15)
331 ... 27, #04 (15)
332 ... 31, #05 (4)
332 ... 31, #05 (4)
333 ... 31, #06 (empty)
333 ... 31, #06 (empty)
334 ... 42, #07 (11)
334 ... 42, #07 (11)
335 ... 47, #08 (5)
335 ... 47, #08 (5)
336 ... 47, #09 (empty)
336 ... 47, #09 (empty)
337 ... 48, #10 (1)
337 ... 48, #10 (1)
338 ... 51, #11 (3)
338 ... 51, #11 (3)
339 ... 74, #12 (23)
339 ... 74, #12 (23)
340 ... 85, #13 (11)
340 ... 85, #13 (11)
341 ... 86, #14 (1)
341 ... 86, #14 (1)
342 ... 91, #15 (5)
342 ... 91, #15 (5)
343 ... ])
343 ... ])
344
344
345 >>> list(_slicechunktodensity(revlog, list(range(16))))
345 >>> list(_slicechunktodensity(revlog, list(range(16))))
346 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
346 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
347 >>> list(_slicechunktodensity(revlog, [0, 15]))
347 >>> list(_slicechunktodensity(revlog, [0, 15]))
348 [[0], [15]]
348 [[0], [15]]
349 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
349 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
350 [[0], [11], [15]]
350 [[0], [11], [15]]
351 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
351 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
352 [[0], [11, 13, 15]]
352 [[0], [11, 13, 15]]
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
354 [[1, 2], [5, 8, 10, 11], [14]]
354 [[1, 2], [5, 8, 10, 11], [14]]
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 ... mingapsize=20))
356 ... mingapsize=20))
357 [[1, 2, 3, 5, 8, 10, 11], [14]]
357 [[1, 2, 3, 5, 8, 10, 11], [14]]
358 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
358 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
359 ... targetdensity=0.95))
359 ... targetdensity=0.95))
360 [[1, 2], [5], [8, 10, 11], [14]]
360 [[1, 2], [5], [8, 10, 11], [14]]
361 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
361 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
362 ... targetdensity=0.95, mingapsize=12))
362 ... targetdensity=0.95, mingapsize=12))
363 [[1, 2], [5, 8, 10, 11], [14]]
363 [[1, 2], [5, 8, 10, 11], [14]]
364 """
364 """
365 start = revlog.start
365 start = revlog.start
366 length = revlog.length
366 length = revlog.length
367
367
368 if len(revs) <= 1:
368 if len(revs) <= 1:
369 yield revs
369 yield revs
370 return
370 return
371
371
372 deltachainspan = segmentspan(revlog, revs)
372 deltachainspan = segmentspan(revlog, revs)
373
373
374 if deltachainspan < mingapsize:
374 if deltachainspan < mingapsize:
375 yield revs
375 yield revs
376 return
376 return
377
377
378 readdata = deltachainspan
378 readdata = deltachainspan
379 chainpayload = sum(length(r) for r in revs)
379 chainpayload = sum(length(r) for r in revs)
380
380
381 if deltachainspan:
381 if deltachainspan:
382 density = chainpayload / float(deltachainspan)
382 density = chainpayload / float(deltachainspan)
383 else:
383 else:
384 density = 1.0
384 density = 1.0
385
385
386 if density >= targetdensity:
386 if density >= targetdensity:
387 yield revs
387 yield revs
388 return
388 return
389
389
390 # Store the gaps in a heap to have them sorted by decreasing size
390 # Store the gaps in a heap to have them sorted by decreasing size
391 gaps = []
391 gaps = []
392 prevend = None
392 prevend = None
393 for i, rev in enumerate(revs):
393 for i, rev in enumerate(revs):
394 revstart = start(rev)
394 revstart = start(rev)
395 revlen = length(rev)
395 revlen = length(rev)
396
396
397 # Skip empty revisions to form larger holes
397 # Skip empty revisions to form larger holes
398 if revlen == 0:
398 if revlen == 0:
399 continue
399 continue
400
400
401 if prevend is not None:
401 if prevend is not None:
402 gapsize = revstart - prevend
402 gapsize = revstart - prevend
403 # only consider holes that are large enough
403 # only consider holes that are large enough
404 if gapsize > mingapsize:
404 if gapsize > mingapsize:
405 gaps.append((gapsize, i))
405 gaps.append((gapsize, i))
406
406
407 prevend = revstart + revlen
407 prevend = revstart + revlen
408 # sort the gaps to pop them from largest to small
408 # sort the gaps to pop them from largest to small
409 gaps.sort()
409 gaps.sort()
410
410
411 # Collect the indices of the largest holes until the density is acceptable
411 # Collect the indices of the largest holes until the density is acceptable
412 selected = []
412 selected = []
413 while gaps and density < targetdensity:
413 while gaps and density < targetdensity:
414 gapsize, gapidx = gaps.pop()
414 gapsize, gapidx = gaps.pop()
415
415
416 selected.append(gapidx)
416 selected.append(gapidx)
417
417
418 # the gap sizes are stored as negatives to be sorted decreasingly
418 # the gap sizes are stored as negatives to be sorted decreasingly
419 # by the heap
419 # by the heap
420 readdata -= gapsize
420 readdata -= gapsize
421 if readdata > 0:
421 if readdata > 0:
422 density = chainpayload / float(readdata)
422 density = chainpayload / float(readdata)
423 else:
423 else:
424 density = 1.0
424 density = 1.0
425 selected.sort()
425 selected.sort()
426
426
427 # Cut the revs at collected indices
427 # Cut the revs at collected indices
428 previdx = 0
428 previdx = 0
429 for idx in selected:
429 for idx in selected:
430
430
431 chunk = _trimchunk(revlog, revs, previdx, idx)
431 chunk = _trimchunk(revlog, revs, previdx, idx)
432 if chunk:
432 if chunk:
433 yield chunk
433 yield chunk
434
434
435 previdx = idx
435 previdx = idx
436
436
437 chunk = _trimchunk(revlog, revs, previdx)
437 chunk = _trimchunk(revlog, revs, previdx)
438 if chunk:
438 if chunk:
439 yield chunk
439 yield chunk
440
440
441
441
442 def _trimchunk(revlog, revs, startidx, endidx=None):
442 def _trimchunk(revlog, revs, startidx, endidx=None):
443 """returns revs[startidx:endidx] without empty trailing revs
443 """returns revs[startidx:endidx] without empty trailing revs
444
444
445 Doctest Setup
445 Doctest Setup
446 >>> revlog = _testrevlog([
446 >>> revlog = _testrevlog([
447 ... 5, #0
447 ... 5, #0
448 ... 10, #1
448 ... 10, #1
449 ... 12, #2
449 ... 12, #2
450 ... 12, #3 (empty)
450 ... 12, #3 (empty)
451 ... 17, #4
451 ... 17, #4
452 ... 21, #5
452 ... 21, #5
453 ... 21, #6 (empty)
453 ... 21, #6 (empty)
454 ... ])
454 ... ])
455
455
456 Contiguous cases:
456 Contiguous cases:
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
458 [0, 1, 2, 3, 4, 5]
458 [0, 1, 2, 3, 4, 5]
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
460 [0, 1, 2, 3, 4]
460 [0, 1, 2, 3, 4]
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
462 [0, 1, 2]
462 [0, 1, 2]
463 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
463 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
464 [2]
464 [2]
465 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
465 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
466 [3, 4, 5]
466 [3, 4, 5]
467 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
467 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
468 [3, 4]
468 [3, 4]
469
469
470 Discontiguous cases:
470 Discontiguous cases:
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
472 [1, 3, 5]
472 [1, 3, 5]
473 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
473 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
474 [1]
474 [1]
475 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
475 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
476 [3, 5]
476 [3, 5]
477 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
477 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
478 [3, 5]
478 [3, 5]
479 """
479 """
480 length = revlog.length
480 length = revlog.length
481
481
482 if endidx is None:
482 if endidx is None:
483 endidx = len(revs)
483 endidx = len(revs)
484
484
485 # If we have a non-emtpy delta candidate, there are nothing to trim
485 # If we have a non-emtpy delta candidate, there are nothing to trim
486 if revs[endidx - 1] < len(revlog):
486 if revs[endidx - 1] < len(revlog):
487 # Trim empty revs at the end, except the very first revision of a chain
487 # Trim empty revs at the end, except the very first revision of a chain
488 while (
488 while (
489 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
489 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
490 ):
490 ):
491 endidx -= 1
491 endidx -= 1
492
492
493 return revs[startidx:endidx]
493 return revs[startidx:endidx]
494
494
495
495
496 def segmentspan(revlog, revs):
496 def segmentspan(revlog, revs):
497 """Get the byte span of a segment of revisions
497 """Get the byte span of a segment of revisions
498
498
499 revs is a sorted array of revision numbers
499 revs is a sorted array of revision numbers
500
500
501 >>> revlog = _testrevlog([
501 >>> revlog = _testrevlog([
502 ... 5, #0
502 ... 5, #0
503 ... 10, #1
503 ... 10, #1
504 ... 12, #2
504 ... 12, #2
505 ... 12, #3 (empty)
505 ... 12, #3 (empty)
506 ... 17, #4
506 ... 17, #4
507 ... ])
507 ... ])
508
508
509 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
509 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
510 17
510 17
511 >>> segmentspan(revlog, [0, 4])
511 >>> segmentspan(revlog, [0, 4])
512 17
512 17
513 >>> segmentspan(revlog, [3, 4])
513 >>> segmentspan(revlog, [3, 4])
514 5
514 5
515 >>> segmentspan(revlog, [1, 2, 3,])
515 >>> segmentspan(revlog, [1, 2, 3,])
516 7
516 7
517 >>> segmentspan(revlog, [1, 3])
517 >>> segmentspan(revlog, [1, 3])
518 7
518 7
519 """
519 """
520 if not revs:
520 if not revs:
521 return 0
521 return 0
522 end = revlog.end(revs[-1])
522 end = revlog.end(revs[-1])
523 return end - revlog.start(revs[0])
523 return end - revlog.start(revs[0])
524
524
525
525
526 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
526 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
527 """build full text from a (base, delta) pair and other metadata"""
527 """build full text from a (base, delta) pair and other metadata"""
528 # special case deltas which replace entire base; no need to decode
528 # special case deltas which replace entire base; no need to decode
529 # base revision. this neatly avoids censored bases, which throw when
529 # base revision. this neatly avoids censored bases, which throw when
530 # they're decoded.
530 # they're decoded.
531 hlen = struct.calcsize(b">lll")
531 hlen = struct.calcsize(b">lll")
532 if delta[:hlen] == mdiff.replacediffheader(
532 if delta[:hlen] == mdiff.replacediffheader(
533 revlog.rawsize(baserev), len(delta) - hlen
533 revlog.rawsize(baserev), len(delta) - hlen
534 ):
534 ):
535 fulltext = delta[hlen:]
535 fulltext = delta[hlen:]
536 else:
536 else:
537 # deltabase is rawtext before changed by flag processors, which is
537 # deltabase is rawtext before changed by flag processors, which is
538 # equivalent to non-raw text
538 # equivalent to non-raw text
539 basetext = revlog.revision(baserev)
539 basetext = revlog.revision(baserev)
540 fulltext = mdiff.patch(basetext, delta)
540 fulltext = mdiff.patch(basetext, delta)
541
541
542 try:
542 try:
543 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
543 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
544 if validatehash:
544 if validatehash:
545 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
545 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
546 if flags & REVIDX_ISCENSORED:
546 if flags & REVIDX_ISCENSORED:
547 raise error.StorageError(
547 raise error.StorageError(
548 _(b'node %s is not censored') % expectednode
548 _(b'node %s is not censored') % expectednode
549 )
549 )
550 except error.CensoredNodeError:
550 except error.CensoredNodeError:
551 # must pass the censored index flag to add censored revisions
551 # must pass the censored index flag to add censored revisions
552 if not flags & REVIDX_ISCENSORED:
552 if not flags & REVIDX_ISCENSORED:
553 raise
553 raise
554 return fulltext
554 return fulltext
555
555
556
556
557 @attr.s(slots=True, frozen=True)
557 @attr.s(slots=True, frozen=True)
558 class _deltainfo:
558 class _deltainfo:
559 distance = attr.ib()
559 distance = attr.ib()
560 deltalen = attr.ib()
560 deltalen = attr.ib()
561 data = attr.ib()
561 data = attr.ib()
562 base = attr.ib()
562 base = attr.ib()
563 chainbase = attr.ib()
563 chainbase = attr.ib()
564 chainlen = attr.ib()
564 chainlen = attr.ib()
565 compresseddeltalen = attr.ib()
565 compresseddeltalen = attr.ib()
566 snapshotdepth = attr.ib()
566 snapshotdepth = attr.ib()
567
567
568
568
569 def drop_u_compression(delta):
569 def drop_u_compression(delta):
570 """turn into a "u" (no-compression) into no-compression without header
570 """turn into a "u" (no-compression) into no-compression without header
571
571
572 This is useful for revlog format that has better compression method.
572 This is useful for revlog format that has better compression method.
573 """
573 """
574 assert delta.data[0] == b'u', delta.data[0]
574 assert delta.data[0] == b'u', delta.data[0]
575 return _deltainfo(
575 return _deltainfo(
576 delta.distance,
576 delta.distance,
577 delta.deltalen - 1,
577 delta.deltalen - 1,
578 (b'', delta.data[1]),
578 (b'', delta.data[1]),
579 delta.base,
579 delta.base,
580 delta.chainbase,
580 delta.chainbase,
581 delta.chainlen,
581 delta.chainlen,
582 delta.compresseddeltalen,
582 delta.compresseddeltalen,
583 delta.snapshotdepth,
583 delta.snapshotdepth,
584 )
584 )
585
585
586
586
587 def is_good_delta_info(revlog, deltainfo, revinfo):
587 def is_good_delta_info(revlog, deltainfo, revinfo):
588 """Returns True if the given delta is good. Good means that it is within
588 """Returns True if the given delta is good. Good means that it is within
589 the disk span, disk size, and chain length bounds that we know to be
589 the disk span, disk size, and chain length bounds that we know to be
590 performant."""
590 performant."""
591 if deltainfo is None:
591 if deltainfo is None:
592 return False
592 return False
593
593
594 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
594 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
595 # we should never end up asking such question. Adding the assert as a
595 # we should never end up asking such question. Adding the assert as a
596 # safe-guard to detect anything that would be fishy in this regard.
596 # safe-guard to detect anything that would be fishy in this regard.
597 assert (
597 assert (
598 revinfo.cachedelta is None
598 revinfo.cachedelta is None
599 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
599 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
600 or not revlog.delta_config.general_delta
600 or not revlog.delta_config.general_delta
601 )
601 )
602
602
603 # - 'deltainfo.distance' is the distance from the base revision --
603 # - 'deltainfo.distance' is the distance from the base revision --
604 # bounding it limits the amount of I/O we need to do.
604 # bounding it limits the amount of I/O we need to do.
605 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
605 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
606 # deltas we need to apply -- bounding it limits the amount of CPU
606 # deltas we need to apply -- bounding it limits the amount of CPU
607 # we consume.
607 # we consume.
608
608
609 textlen = revinfo.textlen
609 textlen = revinfo.textlen
610 defaultmax = textlen * 4
610 defaultmax = textlen * 4
611 maxdist = revlog.delta_config.max_deltachain_span
611 maxdist = revlog.delta_config.max_deltachain_span
612 if not maxdist:
612 if not maxdist:
613 maxdist = deltainfo.distance # ensure the conditional pass
613 maxdist = deltainfo.distance # ensure the conditional pass
614 maxdist = max(maxdist, defaultmax)
614 maxdist = max(maxdist, defaultmax)
615
615
616 # Bad delta from read span:
616 # Bad delta from read span:
617 #
617 #
618 # If the span of data read is larger than the maximum allowed.
618 # If the span of data read is larger than the maximum allowed.
619 #
619 #
620 # In the sparse-revlog case, we rely on the associated "sparse reading"
620 # In the sparse-revlog case, we rely on the associated "sparse reading"
621 # to avoid issue related to the span of data. In theory, it would be
621 # to avoid issue related to the span of data. In theory, it would be
622 # possible to build pathological revlog where delta pattern would lead
622 # possible to build pathological revlog where delta pattern would lead
623 # to too many reads. However, they do not happen in practice at all. So
623 # to too many reads. However, they do not happen in practice at all. So
624 # we skip the span check entirely.
624 # we skip the span check entirely.
625 if not revlog.delta_config.sparse_revlog and maxdist < deltainfo.distance:
625 if not revlog.delta_config.sparse_revlog and maxdist < deltainfo.distance:
626 return False
626 return False
627
627
628 # Bad delta from new delta size:
628 # Bad delta from new delta size:
629 #
629 #
630 # If the delta size is larger than the target text, storing the
630 # If the delta size is larger than the target text, storing the
631 # delta will be inefficient.
631 # delta will be inefficient.
632 if textlen < deltainfo.deltalen:
632 if textlen < deltainfo.deltalen:
633 return False
633 return False
634
634
635 # Bad delta from cumulated payload size:
635 # Bad delta from cumulated payload size:
636 #
636 #
637 # If the sum of delta get larger than K * target text length.
637 # If the sum of delta get larger than K * target text length.
638 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
638 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
639 return False
639 return False
640
640
641 # Bad delta from chain length:
641 # Bad delta from chain length:
642 #
642 #
643 # If the number of delta in the chain gets too high.
643 # If the number of delta in the chain gets too high.
644 if (
644 if (
645 revlog.delta_config.max_chain_len
645 revlog.delta_config.max_chain_len
646 and revlog.delta_config.max_chain_len < deltainfo.chainlen
646 and revlog.delta_config.max_chain_len < deltainfo.chainlen
647 ):
647 ):
648 return False
648 return False
649
649
650 # bad delta from intermediate snapshot size limit
650 # bad delta from intermediate snapshot size limit
651 #
651 #
652 # If an intermediate snapshot size is higher than the limit. The
652 # If an intermediate snapshot size is higher than the limit. The
653 # limit exist to prevent endless chain of intermediate delta to be
653 # limit exist to prevent endless chain of intermediate delta to be
654 # created.
654 # created.
655 if (
655 if (
656 deltainfo.snapshotdepth is not None
656 deltainfo.snapshotdepth is not None
657 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
657 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
658 ):
658 ):
659 return False
659 return False
660
660
661 # bad delta if new intermediate snapshot is larger than the previous
661 # bad delta if new intermediate snapshot is larger than the previous
662 # snapshot
662 # snapshot
663 if (
663 if (
664 deltainfo.snapshotdepth
664 deltainfo.snapshotdepth
665 and revlog.length(deltainfo.base) < deltainfo.deltalen
665 and revlog.length(deltainfo.base) < deltainfo.deltalen
666 ):
666 ):
667 return False
667 return False
668
668
669 return True
669 return True
670
670
671
671
672 # If a revision's full text is that much bigger than a base candidate full
672 # If a revision's full text is that much bigger than a base candidate full
673 # text's, it is very unlikely that it will produce a valid delta. We no longer
673 # text's, it is very unlikely that it will produce a valid delta. We no longer
674 # consider these candidates.
674 # consider these candidates.
675 LIMIT_BASE2TEXT = 500
675 LIMIT_BASE2TEXT = 500
676
676
677
677
678 def _candidategroups(
678 def _candidategroups(
679 revlog,
679 revlog,
680 textlen,
680 textlen,
681 p1,
681 p1,
682 p2,
682 p2,
683 cachedelta,
683 cachedelta,
684 excluded_bases=None,
684 excluded_bases=None,
685 target_rev=None,
685 target_rev=None,
686 snapshot_cache=None,
686 snapshot_cache=None,
687 ):
687 ):
688 """Provides group of revision to be tested as delta base
688 """Provides group of revision to be tested as delta base
689
689
690 This top level function focus on emitting groups with unique and worthwhile
690 This top level function focus on emitting groups with unique and worthwhile
691 content. See _raw_candidate_groups for details about the group order.
691 content. See _raw_candidate_groups for details about the group order.
692 """
692 """
693 # should we try to build a delta?
693 # should we try to build a delta?
694 if not (len(revlog) and revlog._storedeltachains):
694 if not (len(revlog) and revlog._storedeltachains):
695 yield None
695 yield None
696 return
696 return
697
697
698 if target_rev is None:
698 if target_rev is None:
699 target_rev = len(revlog)
699 target_rev = len(revlog)
700
700
701 if not revlog.delta_config.general_delta:
701 if not revlog.delta_config.general_delta:
702 # before general delta, there is only one possible delta base
702 # before general delta, there is only one possible delta base
703 yield (target_rev - 1,)
703 yield (target_rev - 1,)
704 yield None
704 yield None
705 return
705 return
706
706
707 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
707 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
708 # we should never end up asking such question. Adding the assert as a
708 # we should never end up asking such question. Adding the assert as a
709 # safe-guard to detect anything that would be fishy in this regard.
709 # safe-guard to detect anything that would be fishy in this regard.
710 assert (
710 assert (
711 cachedelta is None
711 cachedelta is None
712 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
712 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
713 or not revlog.delta_config.general_delta
713 or not revlog.delta_config.general_delta
714 )
714 )
715
715
716 deltalength = revlog.length
716 deltalength = revlog.length
717 deltaparent = revlog.deltaparent
717 deltaparent = revlog.deltaparent
718 sparse = revlog.delta_config.sparse_revlog
718 sparse = revlog.delta_config.sparse_revlog
719 good = None
719 good = None
720
720
721 deltas_limit = textlen * LIMIT_DELTA2TEXT
721 deltas_limit = textlen * LIMIT_DELTA2TEXT
722 group_chunk_size = revlog.delta_config.candidate_group_chunk_size
722 group_chunk_size = revlog.delta_config.candidate_group_chunk_size
723
723
724 tested = {nullrev}
724 tested = {nullrev}
725 candidates = _refinedgroups(
725 candidates = _refinedgroups(
726 revlog,
726 revlog,
727 p1,
727 p1,
728 p2,
728 p2,
729 cachedelta,
729 cachedelta,
730 snapshot_cache=snapshot_cache,
730 snapshot_cache=snapshot_cache,
731 )
731 )
732 while True:
732 while True:
733 temptative = candidates.send(good)
733 temptative = candidates.send(good)
734 if temptative is None:
734 if temptative is None:
735 break
735 break
736 group = []
736 group = []
737 for rev in temptative:
737 for rev in temptative:
738 # skip over empty delta (no need to include them in a chain)
738 # skip over empty delta (no need to include them in a chain)
739 while not (rev == nullrev or rev in tested or deltalength(rev)):
739 while not (rev == nullrev or rev in tested or deltalength(rev)):
740 tested.add(rev)
740 tested.add(rev)
741 rev = deltaparent(rev)
741 rev = deltaparent(rev)
742 # no need to try a delta against nullrev, this will be done as a
742 # no need to try a delta against nullrev, this will be done as a
743 # last resort.
743 # last resort.
744 if rev == nullrev:
744 if rev == nullrev:
745 continue
745 continue
746 # filter out revision we tested already
746 # filter out revision we tested already
747 if rev in tested:
747 if rev in tested:
748 continue
748 continue
749
749
750 # an higher authority deamed the base unworthy (e.g. censored)
750 # an higher authority deamed the base unworthy (e.g. censored)
751 if excluded_bases is not None and rev in excluded_bases:
751 if excluded_bases is not None and rev in excluded_bases:
752 tested.add(rev)
752 tested.add(rev)
753 continue
753 continue
754 # We are in some recomputation cases and that rev is too high in
754 # We are in some recomputation cases and that rev is too high in
755 # the revlog
755 # the revlog
756 if target_rev is not None and rev >= target_rev:
756 if target_rev is not None and rev >= target_rev:
757 tested.add(rev)
757 tested.add(rev)
758 continue
758 continue
759 # filter out delta base that will never produce good delta
759 # filter out delta base that will never produce good delta
760 if deltas_limit < revlog.length(rev):
760 if deltas_limit < revlog.length(rev):
761 tested.add(rev)
761 tested.add(rev)
762 continue
762 continue
763 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
763 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
764 tested.add(rev)
764 tested.add(rev)
765 continue
765 continue
766 # no delta for rawtext-changing revs (see "candelta" for why)
766 # no delta for rawtext-changing revs (see "candelta" for why)
767 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
767 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
768 tested.add(rev)
768 tested.add(rev)
769 continue
769 continue
770
770
771 # If we reach here, we are about to build and test a delta.
771 # If we reach here, we are about to build and test a delta.
772 # The delta building process will compute the chaininfo in all
772 # The delta building process will compute the chaininfo in all
773 # case, since that computation is cached, it is fine to access it
773 # case, since that computation is cached, it is fine to access it
774 # here too.
774 # here too.
775 chainlen, chainsize = revlog._chaininfo(rev)
775 chainlen, chainsize = revlog._chaininfo(rev)
776 # if chain will be too long, skip base
776 # if chain will be too long, skip base
777 if (
777 if (
778 revlog.delta_config.max_chain_len
778 revlog.delta_config.max_chain_len
779 and chainlen >= revlog.delta_config.max_chain_len
779 and chainlen >= revlog.delta_config.max_chain_len
780 ):
780 ):
781 tested.add(rev)
781 tested.add(rev)
782 continue
782 continue
783 # if chain already have too much data, skip base
783 # if chain already have too much data, skip base
784 if deltas_limit < chainsize:
784 if deltas_limit < chainsize:
785 tested.add(rev)
785 tested.add(rev)
786 continue
786 continue
787 if sparse and revlog.upperboundcomp is not None:
787 if sparse and revlog.delta_config.upper_bound_comp is not None:
788 maxcomp = revlog.upperboundcomp
788 maxcomp = revlog.delta_config.upper_bound_comp
789 basenotsnap = (p1, p2, nullrev)
789 basenotsnap = (p1, p2, nullrev)
790 if rev not in basenotsnap and revlog.issnapshot(rev):
790 if rev not in basenotsnap and revlog.issnapshot(rev):
791 snapshotdepth = revlog.snapshotdepth(rev)
791 snapshotdepth = revlog.snapshotdepth(rev)
792 # If text is significantly larger than the base, we can
792 # If text is significantly larger than the base, we can
793 # expect the resulting delta to be proportional to the size
793 # expect the resulting delta to be proportional to the size
794 # difference
794 # difference
795 revsize = revlog.rawsize(rev)
795 revsize = revlog.rawsize(rev)
796 rawsizedistance = max(textlen - revsize, 0)
796 rawsizedistance = max(textlen - revsize, 0)
797 # use an estimate of the compression upper bound.
797 # use an estimate of the compression upper bound.
798 lowestrealisticdeltalen = rawsizedistance // maxcomp
798 lowestrealisticdeltalen = rawsizedistance // maxcomp
799
799
800 # check the absolute constraint on the delta size
800 # check the absolute constraint on the delta size
801 snapshotlimit = textlen >> snapshotdepth
801 snapshotlimit = textlen >> snapshotdepth
802 if snapshotlimit < lowestrealisticdeltalen:
802 if snapshotlimit < lowestrealisticdeltalen:
803 # delta lower bound is larger than accepted upper bound
803 # delta lower bound is larger than accepted upper bound
804 tested.add(rev)
804 tested.add(rev)
805 continue
805 continue
806
806
807 # check the relative constraint on the delta size
807 # check the relative constraint on the delta size
808 revlength = revlog.length(rev)
808 revlength = revlog.length(rev)
809 if revlength < lowestrealisticdeltalen:
809 if revlength < lowestrealisticdeltalen:
810 # delta probable lower bound is larger than target base
810 # delta probable lower bound is larger than target base
811 tested.add(rev)
811 tested.add(rev)
812 continue
812 continue
813
813
814 group.append(rev)
814 group.append(rev)
815 if group:
815 if group:
816 # When the size of the candidate group is big, it can result in a
816 # When the size of the candidate group is big, it can result in a
817 # quite significant performance impact. To reduce this, we can send
817 # quite significant performance impact. To reduce this, we can send
818 # them in smaller batches until the new batch does not provide any
818 # them in smaller batches until the new batch does not provide any
819 # improvements.
819 # improvements.
820 #
820 #
821 # This might reduce the overall efficiency of the compression in
821 # This might reduce the overall efficiency of the compression in
822 # some corner cases, but that should also prevent very pathological
822 # some corner cases, but that should also prevent very pathological
823 # cases from being an issue. (eg. 20 000 candidates).
823 # cases from being an issue. (eg. 20 000 candidates).
824 #
824 #
825 # XXX note that the ordering of the group becomes important as it
825 # XXX note that the ordering of the group becomes important as it
826 # now impacts the final result. The current order is unprocessed
826 # now impacts the final result. The current order is unprocessed
827 # and can be improved.
827 # and can be improved.
828 if group_chunk_size == 0:
828 if group_chunk_size == 0:
829 tested.update(group)
829 tested.update(group)
830 good = yield tuple(group)
830 good = yield tuple(group)
831 else:
831 else:
832 prev_good = good
832 prev_good = good
833 for start in range(0, len(group), group_chunk_size):
833 for start in range(0, len(group), group_chunk_size):
834 sub_group = group[start : start + group_chunk_size]
834 sub_group = group[start : start + group_chunk_size]
835 tested.update(sub_group)
835 tested.update(sub_group)
836 good = yield tuple(sub_group)
836 good = yield tuple(sub_group)
837 if prev_good == good:
837 if prev_good == good:
838 break
838 break
839
839
840 yield None
840 yield None
841
841
842
842
843 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
843 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
844 good = None
844 good = None
845 # First we try to reuse a the delta contained in the bundle.
845 # First we try to reuse a the delta contained in the bundle.
846 # (or from the source revlog)
846 # (or from the source revlog)
847 #
847 #
848 # This logic only applies to general delta repositories and can be disabled
848 # This logic only applies to general delta repositories and can be disabled
849 # through configuration. Disabling reuse source delta is useful when
849 # through configuration. Disabling reuse source delta is useful when
850 # we want to make sure we recomputed "optimal" deltas.
850 # we want to make sure we recomputed "optimal" deltas.
851 debug_info = None
851 debug_info = None
852 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
852 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
853 # Assume what we received from the server is a good choice
853 # Assume what we received from the server is a good choice
854 # build delta will reuse the cache
854 # build delta will reuse the cache
855 if debug_info is not None:
855 if debug_info is not None:
856 debug_info['cached-delta.tested'] += 1
856 debug_info['cached-delta.tested'] += 1
857 good = yield (cachedelta[0],)
857 good = yield (cachedelta[0],)
858 if good is not None:
858 if good is not None:
859 if debug_info is not None:
859 if debug_info is not None:
860 debug_info['cached-delta.accepted'] += 1
860 debug_info['cached-delta.accepted'] += 1
861 yield None
861 yield None
862 return
862 return
863 if snapshot_cache is None:
863 if snapshot_cache is None:
864 snapshot_cache = SnapshotCache()
864 snapshot_cache = SnapshotCache()
865 groups = _rawgroups(
865 groups = _rawgroups(
866 revlog,
866 revlog,
867 p1,
867 p1,
868 p2,
868 p2,
869 cachedelta,
869 cachedelta,
870 snapshot_cache,
870 snapshot_cache,
871 )
871 )
872 for candidates in groups:
872 for candidates in groups:
873 good = yield candidates
873 good = yield candidates
874 if good is not None:
874 if good is not None:
875 break
875 break
876
876
877 # If sparse revlog is enabled, we can try to refine the available deltas
877 # If sparse revlog is enabled, we can try to refine the available deltas
878 if not revlog.delta_config.sparse_revlog:
878 if not revlog.delta_config.sparse_revlog:
879 yield None
879 yield None
880 return
880 return
881
881
882 # if we have a refinable value, try to refine it
882 # if we have a refinable value, try to refine it
883 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
883 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
884 # refine snapshot down
884 # refine snapshot down
885 previous = None
885 previous = None
886 while previous != good:
886 while previous != good:
887 previous = good
887 previous = good
888 base = revlog.deltaparent(good)
888 base = revlog.deltaparent(good)
889 if base == nullrev:
889 if base == nullrev:
890 break
890 break
891 good = yield (base,)
891 good = yield (base,)
892 # refine snapshot up
892 # refine snapshot up
893 if not snapshot_cache.snapshots:
893 if not snapshot_cache.snapshots:
894 snapshot_cache.update(revlog, good + 1)
894 snapshot_cache.update(revlog, good + 1)
895 previous = None
895 previous = None
896 while good != previous:
896 while good != previous:
897 previous = good
897 previous = good
898 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
898 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
899 good = yield children
899 good = yield children
900
900
901 if debug_info is not None:
901 if debug_info is not None:
902 if good is None:
902 if good is None:
903 debug_info['no-solution'] += 1
903 debug_info['no-solution'] += 1
904
904
905 yield None
905 yield None
906
906
907
907
908 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
908 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
909 """Provides group of revision to be tested as delta base
909 """Provides group of revision to be tested as delta base
910
910
911 This lower level function focus on emitting delta theorically interresting
911 This lower level function focus on emitting delta theorically interresting
912 without looking it any practical details.
912 without looking it any practical details.
913
913
914 The group order aims at providing fast or small candidates first.
914 The group order aims at providing fast or small candidates first.
915 """
915 """
916 # Why search for delta base if we cannot use a delta base ?
916 # Why search for delta base if we cannot use a delta base ?
917 assert revlog.delta_config.general_delta
917 assert revlog.delta_config.general_delta
918 # also see issue6056
918 # also see issue6056
919 sparse = revlog.delta_config.sparse_revlog
919 sparse = revlog.delta_config.sparse_revlog
920 curr = len(revlog)
920 curr = len(revlog)
921 prev = curr - 1
921 prev = curr - 1
922 deltachain = lambda rev: revlog._deltachain(rev)[0]
922 deltachain = lambda rev: revlog._deltachain(rev)[0]
923
923
924 # exclude already lazy tested base if any
924 # exclude already lazy tested base if any
925 parents = [p for p in (p1, p2) if p != nullrev]
925 parents = [p for p in (p1, p2) if p != nullrev]
926
926
927 if not revlog.delta_config.delta_both_parents and len(parents) == 2:
927 if not revlog.delta_config.delta_both_parents and len(parents) == 2:
928 parents.sort()
928 parents.sort()
929 # To minimize the chance of having to build a fulltext,
929 # To minimize the chance of having to build a fulltext,
930 # pick first whichever parent is closest to us (max rev)
930 # pick first whichever parent is closest to us (max rev)
931 yield (parents[1],)
931 yield (parents[1],)
932 # then the other one (min rev) if the first did not fit
932 # then the other one (min rev) if the first did not fit
933 yield (parents[0],)
933 yield (parents[0],)
934 elif len(parents) > 0:
934 elif len(parents) > 0:
935 # Test all parents (1 or 2), and keep the best candidate
935 # Test all parents (1 or 2), and keep the best candidate
936 yield parents
936 yield parents
937
937
938 if sparse and parents:
938 if sparse and parents:
939 if snapshot_cache is None:
939 if snapshot_cache is None:
940 # map: base-rev: [snapshot-revs]
940 # map: base-rev: [snapshot-revs]
941 snapshot_cache = SnapshotCache()
941 snapshot_cache = SnapshotCache()
942 # See if we can use an existing snapshot in the parent chains to use as
942 # See if we can use an existing snapshot in the parent chains to use as
943 # a base for a new intermediate-snapshot
943 # a base for a new intermediate-snapshot
944 #
944 #
945 # search for snapshot in parents delta chain
945 # search for snapshot in parents delta chain
946 # map: snapshot-level: snapshot-rev
946 # map: snapshot-level: snapshot-rev
947 parents_snaps = collections.defaultdict(set)
947 parents_snaps = collections.defaultdict(set)
948 candidate_chains = [deltachain(p) for p in parents]
948 candidate_chains = [deltachain(p) for p in parents]
949 for chain in candidate_chains:
949 for chain in candidate_chains:
950 for idx, s in enumerate(chain):
950 for idx, s in enumerate(chain):
951 if not revlog.issnapshot(s):
951 if not revlog.issnapshot(s):
952 break
952 break
953 parents_snaps[idx].add(s)
953 parents_snaps[idx].add(s)
954 snapfloor = min(parents_snaps[0]) + 1
954 snapfloor = min(parents_snaps[0]) + 1
955 snapshot_cache.update(revlog, snapfloor)
955 snapshot_cache.update(revlog, snapfloor)
956 # search for the highest "unrelated" revision
956 # search for the highest "unrelated" revision
957 #
957 #
958 # Adding snapshots used by "unrelated" revision increase the odd we
958 # Adding snapshots used by "unrelated" revision increase the odd we
959 # reuse an independant, yet better snapshot chain.
959 # reuse an independant, yet better snapshot chain.
960 #
960 #
961 # XXX instead of building a set of revisions, we could lazily enumerate
961 # XXX instead of building a set of revisions, we could lazily enumerate
962 # over the chains. That would be more efficient, however we stick to
962 # over the chains. That would be more efficient, however we stick to
963 # simple code for now.
963 # simple code for now.
964 all_revs = set()
964 all_revs = set()
965 for chain in candidate_chains:
965 for chain in candidate_chains:
966 all_revs.update(chain)
966 all_revs.update(chain)
967 other = None
967 other = None
968 for r in revlog.revs(prev, snapfloor):
968 for r in revlog.revs(prev, snapfloor):
969 if r not in all_revs:
969 if r not in all_revs:
970 other = r
970 other = r
971 break
971 break
972 if other is not None:
972 if other is not None:
973 # To avoid unfair competition, we won't use unrelated intermediate
973 # To avoid unfair competition, we won't use unrelated intermediate
974 # snapshot that are deeper than the ones from the parent delta
974 # snapshot that are deeper than the ones from the parent delta
975 # chain.
975 # chain.
976 max_depth = max(parents_snaps.keys())
976 max_depth = max(parents_snaps.keys())
977 chain = deltachain(other)
977 chain = deltachain(other)
978 for depth, s in enumerate(chain):
978 for depth, s in enumerate(chain):
979 if s < snapfloor:
979 if s < snapfloor:
980 continue
980 continue
981 if max_depth < depth:
981 if max_depth < depth:
982 break
982 break
983 if not revlog.issnapshot(s):
983 if not revlog.issnapshot(s):
984 break
984 break
985 parents_snaps[depth].add(s)
985 parents_snaps[depth].add(s)
986 # Test them as possible intermediate snapshot base
986 # Test them as possible intermediate snapshot base
987 # We test them from highest to lowest level. High level one are more
987 # We test them from highest to lowest level. High level one are more
988 # likely to result in small delta
988 # likely to result in small delta
989 floor = None
989 floor = None
990 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
990 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
991 siblings = set()
991 siblings = set()
992 for s in snaps:
992 for s in snaps:
993 siblings.update(snapshot_cache.snapshots[s])
993 siblings.update(snapshot_cache.snapshots[s])
994 # Before considering making a new intermediate snapshot, we check
994 # Before considering making a new intermediate snapshot, we check
995 # if an existing snapshot, children of base we consider, would be
995 # if an existing snapshot, children of base we consider, would be
996 # suitable.
996 # suitable.
997 #
997 #
998 # It give a change to reuse a delta chain "unrelated" to the
998 # It give a change to reuse a delta chain "unrelated" to the
999 # current revision instead of starting our own. Without such
999 # current revision instead of starting our own. Without such
1000 # re-use, topological branches would keep reopening new chains.
1000 # re-use, topological branches would keep reopening new chains.
1001 # Creating more and more snapshot as the repository grow.
1001 # Creating more and more snapshot as the repository grow.
1002
1002
1003 if floor is not None:
1003 if floor is not None:
1004 # We only do this for siblings created after the one in our
1004 # We only do this for siblings created after the one in our
1005 # parent's delta chain. Those created before has less chances
1005 # parent's delta chain. Those created before has less chances
1006 # to be valid base since our ancestors had to create a new
1006 # to be valid base since our ancestors had to create a new
1007 # snapshot.
1007 # snapshot.
1008 siblings = [r for r in siblings if floor < r]
1008 siblings = [r for r in siblings if floor < r]
1009 yield tuple(sorted(siblings))
1009 yield tuple(sorted(siblings))
1010 # then test the base from our parent's delta chain.
1010 # then test the base from our parent's delta chain.
1011 yield tuple(sorted(snaps))
1011 yield tuple(sorted(snaps))
1012 floor = min(snaps)
1012 floor = min(snaps)
1013 # No suitable base found in the parent chain, search if any full
1013 # No suitable base found in the parent chain, search if any full
1014 # snapshots emitted since parent's base would be a suitable base for an
1014 # snapshots emitted since parent's base would be a suitable base for an
1015 # intermediate snapshot.
1015 # intermediate snapshot.
1016 #
1016 #
1017 # It give a chance to reuse a delta chain unrelated to the current
1017 # It give a chance to reuse a delta chain unrelated to the current
1018 # revisions instead of starting our own. Without such re-use,
1018 # revisions instead of starting our own. Without such re-use,
1019 # topological branches would keep reopening new full chains. Creating
1019 # topological branches would keep reopening new full chains. Creating
1020 # more and more snapshot as the repository grow.
1020 # more and more snapshot as the repository grow.
1021 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1021 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1022 yield tuple(sorted(full))
1022 yield tuple(sorted(full))
1023
1023
1024 if not sparse:
1024 if not sparse:
1025 # other approach failed try against prev to hopefully save us a
1025 # other approach failed try against prev to hopefully save us a
1026 # fulltext.
1026 # fulltext.
1027 yield (prev,)
1027 yield (prev,)
1028
1028
1029
1029
1030 class SnapshotCache:
1030 class SnapshotCache:
1031 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1031 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1032
1032
1033 def __init__(self):
1033 def __init__(self):
1034 self.snapshots = collections.defaultdict(set)
1034 self.snapshots = collections.defaultdict(set)
1035 self._start_rev = None
1035 self._start_rev = None
1036 self._end_rev = None
1036 self._end_rev = None
1037
1037
1038 def update(self, revlog, start_rev=0):
1038 def update(self, revlog, start_rev=0):
1039 """find snapshots from start_rev to tip"""
1039 """find snapshots from start_rev to tip"""
1040 nb_revs = len(revlog)
1040 nb_revs = len(revlog)
1041 end_rev = nb_revs - 1
1041 end_rev = nb_revs - 1
1042 if start_rev > end_rev:
1042 if start_rev > end_rev:
1043 return # range is empty
1043 return # range is empty
1044
1044
1045 if self._start_rev is None:
1045 if self._start_rev is None:
1046 assert self._end_rev is None
1046 assert self._end_rev is None
1047 self._update(revlog, start_rev, end_rev)
1047 self._update(revlog, start_rev, end_rev)
1048 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1048 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1049 if start_rev < self._start_rev:
1049 if start_rev < self._start_rev:
1050 self._update(revlog, start_rev, self._start_rev - 1)
1050 self._update(revlog, start_rev, self._start_rev - 1)
1051 if self._end_rev < end_rev:
1051 if self._end_rev < end_rev:
1052 self._update(revlog, self._end_rev + 1, end_rev)
1052 self._update(revlog, self._end_rev + 1, end_rev)
1053
1053
1054 if self._start_rev is None:
1054 if self._start_rev is None:
1055 assert self._end_rev is None
1055 assert self._end_rev is None
1056 self._end_rev = end_rev
1056 self._end_rev = end_rev
1057 self._start_rev = start_rev
1057 self._start_rev = start_rev
1058 else:
1058 else:
1059 self._start_rev = min(self._start_rev, start_rev)
1059 self._start_rev = min(self._start_rev, start_rev)
1060 self._end_rev = max(self._end_rev, end_rev)
1060 self._end_rev = max(self._end_rev, end_rev)
1061 assert self._start_rev <= self._end_rev, (
1061 assert self._start_rev <= self._end_rev, (
1062 self._start_rev,
1062 self._start_rev,
1063 self._end_rev,
1063 self._end_rev,
1064 )
1064 )
1065
1065
1066 def _update(self, revlog, start_rev, end_rev):
1066 def _update(self, revlog, start_rev, end_rev):
1067 """internal method that actually do update content"""
1067 """internal method that actually do update content"""
1068 assert self._start_rev is None or (
1068 assert self._start_rev is None or (
1069 start_rev < self._start_rev or start_rev > self._end_rev
1069 start_rev < self._start_rev or start_rev > self._end_rev
1070 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1070 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1071 assert self._start_rev is None or (
1071 assert self._start_rev is None or (
1072 end_rev < self._start_rev or end_rev > self._end_rev
1072 end_rev < self._start_rev or end_rev > self._end_rev
1073 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1073 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1074 cache = self.snapshots
1074 cache = self.snapshots
1075 if hasattr(revlog.index, 'findsnapshots'):
1075 if hasattr(revlog.index, 'findsnapshots'):
1076 revlog.index.findsnapshots(cache, start_rev, end_rev)
1076 revlog.index.findsnapshots(cache, start_rev, end_rev)
1077 else:
1077 else:
1078 deltaparent = revlog.deltaparent
1078 deltaparent = revlog.deltaparent
1079 issnapshot = revlog.issnapshot
1079 issnapshot = revlog.issnapshot
1080 for rev in revlog.revs(start_rev, end_rev):
1080 for rev in revlog.revs(start_rev, end_rev):
1081 if issnapshot(rev):
1081 if issnapshot(rev):
1082 cache[deltaparent(rev)].add(rev)
1082 cache[deltaparent(rev)].add(rev)
1083
1083
1084
1084
1085 class deltacomputer:
1085 class deltacomputer:
1086 def __init__(
1086 def __init__(
1087 self,
1087 self,
1088 revlog,
1088 revlog,
1089 write_debug=None,
1089 write_debug=None,
1090 debug_search=False,
1090 debug_search=False,
1091 debug_info=None,
1091 debug_info=None,
1092 ):
1092 ):
1093 self.revlog = revlog
1093 self.revlog = revlog
1094 self._write_debug = write_debug
1094 self._write_debug = write_debug
1095 if write_debug is None:
1095 if write_debug is None:
1096 self._debug_search = False
1096 self._debug_search = False
1097 else:
1097 else:
1098 self._debug_search = debug_search
1098 self._debug_search = debug_search
1099 self._debug_info = debug_info
1099 self._debug_info = debug_info
1100 self._snapshot_cache = SnapshotCache()
1100 self._snapshot_cache = SnapshotCache()
1101
1101
1102 @property
1102 @property
1103 def _gather_debug(self):
1103 def _gather_debug(self):
1104 return self._write_debug is not None or self._debug_info is not None
1104 return self._write_debug is not None or self._debug_info is not None
1105
1105
1106 def buildtext(self, revinfo):
1106 def buildtext(self, revinfo):
1107 """Builds a fulltext version of a revision
1107 """Builds a fulltext version of a revision
1108
1108
1109 revinfo: revisioninfo instance that contains all needed info
1109 revinfo: revisioninfo instance that contains all needed info
1110 """
1110 """
1111 btext = revinfo.btext
1111 btext = revinfo.btext
1112 if btext[0] is not None:
1112 if btext[0] is not None:
1113 return btext[0]
1113 return btext[0]
1114
1114
1115 revlog = self.revlog
1115 revlog = self.revlog
1116 cachedelta = revinfo.cachedelta
1116 cachedelta = revinfo.cachedelta
1117 baserev = cachedelta[0]
1117 baserev = cachedelta[0]
1118 delta = cachedelta[1]
1118 delta = cachedelta[1]
1119
1119
1120 fulltext = btext[0] = _textfromdelta(
1120 fulltext = btext[0] = _textfromdelta(
1121 revlog,
1121 revlog,
1122 baserev,
1122 baserev,
1123 delta,
1123 delta,
1124 revinfo.p1,
1124 revinfo.p1,
1125 revinfo.p2,
1125 revinfo.p2,
1126 revinfo.flags,
1126 revinfo.flags,
1127 revinfo.node,
1127 revinfo.node,
1128 )
1128 )
1129 return fulltext
1129 return fulltext
1130
1130
1131 def _builddeltadiff(self, base, revinfo):
1131 def _builddeltadiff(self, base, revinfo):
1132 revlog = self.revlog
1132 revlog = self.revlog
1133 t = self.buildtext(revinfo)
1133 t = self.buildtext(revinfo)
1134 if revlog.iscensored(base):
1134 if revlog.iscensored(base):
1135 # deltas based on a censored revision must replace the
1135 # deltas based on a censored revision must replace the
1136 # full content in one patch, so delta works everywhere
1136 # full content in one patch, so delta works everywhere
1137 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1137 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1138 delta = header + t
1138 delta = header + t
1139 else:
1139 else:
1140 ptext = revlog.rawdata(base)
1140 ptext = revlog.rawdata(base)
1141 delta = mdiff.textdiff(ptext, t)
1141 delta = mdiff.textdiff(ptext, t)
1142
1142
1143 return delta
1143 return delta
1144
1144
1145 def _builddeltainfo(self, revinfo, base, target_rev=None):
1145 def _builddeltainfo(self, revinfo, base, target_rev=None):
1146 # can we use the cached delta?
1146 # can we use the cached delta?
1147 revlog = self.revlog
1147 revlog = self.revlog
1148 chainbase = revlog.chainbase(base)
1148 chainbase = revlog.chainbase(base)
1149 if revlog.delta_config.general_delta:
1149 if revlog.delta_config.general_delta:
1150 deltabase = base
1150 deltabase = base
1151 else:
1151 else:
1152 if target_rev is not None and base != target_rev - 1:
1152 if target_rev is not None and base != target_rev - 1:
1153 msg = (
1153 msg = (
1154 b'general delta cannot use delta for something else '
1154 b'general delta cannot use delta for something else '
1155 b'than `prev`: %d<-%d'
1155 b'than `prev`: %d<-%d'
1156 )
1156 )
1157 msg %= (base, target_rev)
1157 msg %= (base, target_rev)
1158 raise error.ProgrammingError(msg)
1158 raise error.ProgrammingError(msg)
1159 deltabase = chainbase
1159 deltabase = chainbase
1160 snapshotdepth = None
1160 snapshotdepth = None
1161 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1161 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1162 snapshotdepth = 0
1162 snapshotdepth = 0
1163 elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
1163 elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
1164 # A delta chain should always be one full snapshot,
1164 # A delta chain should always be one full snapshot,
1165 # zero or more semi-snapshots, and zero or more deltas
1165 # zero or more semi-snapshots, and zero or more deltas
1166 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1166 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1167 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1167 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1168 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1168 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1169 delta = None
1169 delta = None
1170 if revinfo.cachedelta:
1170 if revinfo.cachedelta:
1171 cachebase = revinfo.cachedelta[0]
1171 cachebase = revinfo.cachedelta[0]
1172 # check if the diff still apply
1172 # check if the diff still apply
1173 currentbase = cachebase
1173 currentbase = cachebase
1174 while (
1174 while (
1175 currentbase != nullrev
1175 currentbase != nullrev
1176 and currentbase != base
1176 and currentbase != base
1177 and self.revlog.length(currentbase) == 0
1177 and self.revlog.length(currentbase) == 0
1178 ):
1178 ):
1179 currentbase = self.revlog.deltaparent(currentbase)
1179 currentbase = self.revlog.deltaparent(currentbase)
1180 if self.revlog.delta_config.lazy_delta and currentbase == base:
1180 if self.revlog.delta_config.lazy_delta and currentbase == base:
1181 delta = revinfo.cachedelta[1]
1181 delta = revinfo.cachedelta[1]
1182 if delta is None:
1182 if delta is None:
1183 delta = self._builddeltadiff(base, revinfo)
1183 delta = self._builddeltadiff(base, revinfo)
1184 if self._debug_search:
1184 if self._debug_search:
1185 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1185 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1186 msg %= len(delta)
1186 msg %= len(delta)
1187 self._write_debug(msg)
1187 self._write_debug(msg)
1188 # snapshotdept need to be neither None nor 0 level snapshot
1188 # snapshotdept need to be neither None nor 0 level snapshot
1189 if revlog.upperboundcomp is not None and snapshotdepth:
1189 if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
1190 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1190 lowestrealisticdeltalen = (
1191 len(delta) // revlog.delta_config.upper_bound_comp
1192 )
1191 snapshotlimit = revinfo.textlen >> snapshotdepth
1193 snapshotlimit = revinfo.textlen >> snapshotdepth
1192 if self._debug_search:
1194 if self._debug_search:
1193 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1195 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1194 msg %= lowestrealisticdeltalen
1196 msg %= lowestrealisticdeltalen
1195 self._write_debug(msg)
1197 self._write_debug(msg)
1196 if snapshotlimit < lowestrealisticdeltalen:
1198 if snapshotlimit < lowestrealisticdeltalen:
1197 if self._debug_search:
1199 if self._debug_search:
1198 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1200 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1199 self._write_debug(msg)
1201 self._write_debug(msg)
1200 return None
1202 return None
1201 if revlog.length(base) < lowestrealisticdeltalen:
1203 if revlog.length(base) < lowestrealisticdeltalen:
1202 if self._debug_search:
1204 if self._debug_search:
1203 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1205 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1204 self._write_debug(msg)
1206 self._write_debug(msg)
1205 return None
1207 return None
1206 header, data = revlog.compress(delta)
1208 header, data = revlog.compress(delta)
1207 deltalen = len(header) + len(data)
1209 deltalen = len(header) + len(data)
1208 offset = revlog.end(len(revlog) - 1)
1210 offset = revlog.end(len(revlog) - 1)
1209 dist = deltalen + offset - revlog.start(chainbase)
1211 dist = deltalen + offset - revlog.start(chainbase)
1210 chainlen, compresseddeltalen = revlog._chaininfo(base)
1212 chainlen, compresseddeltalen = revlog._chaininfo(base)
1211 chainlen += 1
1213 chainlen += 1
1212 compresseddeltalen += deltalen
1214 compresseddeltalen += deltalen
1213
1215
1214 return _deltainfo(
1216 return _deltainfo(
1215 dist,
1217 dist,
1216 deltalen,
1218 deltalen,
1217 (header, data),
1219 (header, data),
1218 deltabase,
1220 deltabase,
1219 chainbase,
1221 chainbase,
1220 chainlen,
1222 chainlen,
1221 compresseddeltalen,
1223 compresseddeltalen,
1222 snapshotdepth,
1224 snapshotdepth,
1223 )
1225 )
1224
1226
1225 def _fullsnapshotinfo(self, revinfo, curr):
1227 def _fullsnapshotinfo(self, revinfo, curr):
1226 rawtext = self.buildtext(revinfo)
1228 rawtext = self.buildtext(revinfo)
1227 data = self.revlog.compress(rawtext)
1229 data = self.revlog.compress(rawtext)
1228 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1230 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1229 deltabase = chainbase = curr
1231 deltabase = chainbase = curr
1230 snapshotdepth = 0
1232 snapshotdepth = 0
1231 chainlen = 1
1233 chainlen = 1
1232
1234
1233 return _deltainfo(
1235 return _deltainfo(
1234 dist,
1236 dist,
1235 deltalen,
1237 deltalen,
1236 data,
1238 data,
1237 deltabase,
1239 deltabase,
1238 chainbase,
1240 chainbase,
1239 chainlen,
1241 chainlen,
1240 compresseddeltalen,
1242 compresseddeltalen,
1241 snapshotdepth,
1243 snapshotdepth,
1242 )
1244 )
1243
1245
1244 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1246 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1245 """Find an acceptable delta against a candidate revision
1247 """Find an acceptable delta against a candidate revision
1246
1248
1247 revinfo: information about the revision (instance of _revisioninfo)
1249 revinfo: information about the revision (instance of _revisioninfo)
1248
1250
1249 Returns the first acceptable candidate revision, as ordered by
1251 Returns the first acceptable candidate revision, as ordered by
1250 _candidategroups
1252 _candidategroups
1251
1253
1252 If no suitable deltabase is found, we return delta info for a full
1254 If no suitable deltabase is found, we return delta info for a full
1253 snapshot.
1255 snapshot.
1254
1256
1255 `excluded_bases` is an optional set of revision that cannot be used as
1257 `excluded_bases` is an optional set of revision that cannot be used as
1256 a delta base. Use this to recompute delta suitable in censor or strip
1258 a delta base. Use this to recompute delta suitable in censor or strip
1257 context.
1259 context.
1258 """
1260 """
1259 if target_rev is None:
1261 if target_rev is None:
1260 target_rev = len(self.revlog)
1262 target_rev = len(self.revlog)
1261
1263
1262 gather_debug = self._gather_debug
1264 gather_debug = self._gather_debug
1263 cachedelta = revinfo.cachedelta
1265 cachedelta = revinfo.cachedelta
1264 revlog = self.revlog
1266 revlog = self.revlog
1265 p1r = p2r = None
1267 p1r = p2r = None
1266
1268
1267 if excluded_bases is None:
1269 if excluded_bases is None:
1268 excluded_bases = set()
1270 excluded_bases = set()
1269
1271
1270 if gather_debug:
1272 if gather_debug:
1271 start = util.timer()
1273 start = util.timer()
1272 dbg = self._one_dbg_data()
1274 dbg = self._one_dbg_data()
1273 dbg['revision'] = target_rev
1275 dbg['revision'] = target_rev
1274 target_revlog = b"UNKNOWN"
1276 target_revlog = b"UNKNOWN"
1275 target_type = self.revlog.target[0]
1277 target_type = self.revlog.target[0]
1276 target_key = self.revlog.target[1]
1278 target_key = self.revlog.target[1]
1277 if target_type == KIND_CHANGELOG:
1279 if target_type == KIND_CHANGELOG:
1278 target_revlog = b'CHANGELOG:'
1280 target_revlog = b'CHANGELOG:'
1279 elif target_type == KIND_MANIFESTLOG:
1281 elif target_type == KIND_MANIFESTLOG:
1280 target_revlog = b'MANIFESTLOG:'
1282 target_revlog = b'MANIFESTLOG:'
1281 if target_key:
1283 if target_key:
1282 target_revlog += b'%s:' % target_key
1284 target_revlog += b'%s:' % target_key
1283 elif target_type == KIND_FILELOG:
1285 elif target_type == KIND_FILELOG:
1284 target_revlog = b'FILELOG:'
1286 target_revlog = b'FILELOG:'
1285 if target_key:
1287 if target_key:
1286 target_revlog += b'%s:' % target_key
1288 target_revlog += b'%s:' % target_key
1287 dbg['target-revlog'] = target_revlog
1289 dbg['target-revlog'] = target_revlog
1288 p1r = revlog.rev(revinfo.p1)
1290 p1r = revlog.rev(revinfo.p1)
1289 p2r = revlog.rev(revinfo.p2)
1291 p2r = revlog.rev(revinfo.p2)
1290 if p1r != nullrev:
1292 if p1r != nullrev:
1291 p1_chain_len = revlog._chaininfo(p1r)[0]
1293 p1_chain_len = revlog._chaininfo(p1r)[0]
1292 else:
1294 else:
1293 p1_chain_len = -1
1295 p1_chain_len = -1
1294 if p2r != nullrev:
1296 if p2r != nullrev:
1295 p2_chain_len = revlog._chaininfo(p2r)[0]
1297 p2_chain_len = revlog._chaininfo(p2r)[0]
1296 else:
1298 else:
1297 p2_chain_len = -1
1299 p2_chain_len = -1
1298 dbg['p1-chain-len'] = p1_chain_len
1300 dbg['p1-chain-len'] = p1_chain_len
1299 dbg['p2-chain-len'] = p2_chain_len
1301 dbg['p2-chain-len'] = p2_chain_len
1300
1302
1301 # 1) if the revision is empty, no amount of delta can beat it
1303 # 1) if the revision is empty, no amount of delta can beat it
1302 #
1304 #
1303 # 2) no delta for flag processor revision (see "candelta" for why)
1305 # 2) no delta for flag processor revision (see "candelta" for why)
1304 # not calling candelta since only one revision needs test, also to
1306 # not calling candelta since only one revision needs test, also to
1305 # avoid overhead fetching flags again.
1307 # avoid overhead fetching flags again.
1306 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1308 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1307 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1309 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1308 if gather_debug:
1310 if gather_debug:
1309 end = util.timer()
1311 end = util.timer()
1310 dbg['duration'] = end - start
1312 dbg['duration'] = end - start
1311 dbg[
1313 dbg[
1312 'delta-base'
1314 'delta-base'
1313 ] = deltainfo.base # pytype: disable=attribute-error
1315 ] = deltainfo.base # pytype: disable=attribute-error
1314 dbg['search_round_count'] = 0
1316 dbg['search_round_count'] = 0
1315 dbg['using-cached-base'] = False
1317 dbg['using-cached-base'] = False
1316 dbg['delta_try_count'] = 0
1318 dbg['delta_try_count'] = 0
1317 dbg['type'] = b"full"
1319 dbg['type'] = b"full"
1318 dbg['snapshot-depth'] = 0
1320 dbg['snapshot-depth'] = 0
1319 self._dbg_process_data(dbg)
1321 self._dbg_process_data(dbg)
1320 return deltainfo
1322 return deltainfo
1321
1323
1322 deltainfo = None
1324 deltainfo = None
1323
1325
1324 # If this source delta are to be forcibly reuse, let us comply early.
1326 # If this source delta are to be forcibly reuse, let us comply early.
1325 if (
1327 if (
1326 revlog.delta_config.general_delta
1328 revlog.delta_config.general_delta
1327 and revinfo.cachedelta is not None
1329 and revinfo.cachedelta is not None
1328 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1330 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1329 ):
1331 ):
1330 base = revinfo.cachedelta[0]
1332 base = revinfo.cachedelta[0]
1331 if base == nullrev:
1333 if base == nullrev:
1332 dbg_type = b"full"
1334 dbg_type = b"full"
1333 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1335 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1334 if gather_debug:
1336 if gather_debug:
1335 snapshotdepth = 0
1337 snapshotdepth = 0
1336 elif base not in excluded_bases:
1338 elif base not in excluded_bases:
1337 delta = revinfo.cachedelta[1]
1339 delta = revinfo.cachedelta[1]
1338 header, data = revlog.compress(delta)
1340 header, data = revlog.compress(delta)
1339 deltalen = len(header) + len(data)
1341 deltalen = len(header) + len(data)
1340 if gather_debug:
1342 if gather_debug:
1341 offset = revlog.end(len(revlog) - 1)
1343 offset = revlog.end(len(revlog) - 1)
1342 chainbase = revlog.chainbase(base)
1344 chainbase = revlog.chainbase(base)
1343 distance = deltalen + offset - revlog.start(chainbase)
1345 distance = deltalen + offset - revlog.start(chainbase)
1344 chainlen, compresseddeltalen = revlog._chaininfo(base)
1346 chainlen, compresseddeltalen = revlog._chaininfo(base)
1345 chainlen += 1
1347 chainlen += 1
1346 compresseddeltalen += deltalen
1348 compresseddeltalen += deltalen
1347 if base == p1r or base == p2r:
1349 if base == p1r or base == p2r:
1348 dbg_type = b"delta"
1350 dbg_type = b"delta"
1349 snapshotdepth = None
1351 snapshotdepth = None
1350 elif not revlog.issnapshot(base):
1352 elif not revlog.issnapshot(base):
1351 snapshotdepth = None
1353 snapshotdepth = None
1352 else:
1354 else:
1353 dbg_type = b"snapshot"
1355 dbg_type = b"snapshot"
1354 snapshotdepth = revlog.snapshotdepth(base) + 1
1356 snapshotdepth = revlog.snapshotdepth(base) + 1
1355 else:
1357 else:
1356 distance = None
1358 distance = None
1357 chainbase = None
1359 chainbase = None
1358 chainlen = None
1360 chainlen = None
1359 compresseddeltalen = None
1361 compresseddeltalen = None
1360 snapshotdepth = None
1362 snapshotdepth = None
1361 deltainfo = _deltainfo(
1363 deltainfo = _deltainfo(
1362 distance=distance,
1364 distance=distance,
1363 deltalen=deltalen,
1365 deltalen=deltalen,
1364 data=(header, data),
1366 data=(header, data),
1365 base=base,
1367 base=base,
1366 chainbase=chainbase,
1368 chainbase=chainbase,
1367 chainlen=chainlen,
1369 chainlen=chainlen,
1368 compresseddeltalen=compresseddeltalen,
1370 compresseddeltalen=compresseddeltalen,
1369 snapshotdepth=snapshotdepth,
1371 snapshotdepth=snapshotdepth,
1370 )
1372 )
1371
1373
1372 if deltainfo is not None:
1374 if deltainfo is not None:
1373 if gather_debug:
1375 if gather_debug:
1374 end = util.timer()
1376 end = util.timer()
1375 dbg['duration'] = end - start
1377 dbg['duration'] = end - start
1376 dbg[
1378 dbg[
1377 'delta-base'
1379 'delta-base'
1378 ] = deltainfo.base # pytype: disable=attribute-error
1380 ] = deltainfo.base # pytype: disable=attribute-error
1379 dbg['search_round_count'] = 0
1381 dbg['search_round_count'] = 0
1380 dbg['using-cached-base'] = True
1382 dbg['using-cached-base'] = True
1381 dbg['delta_try_count'] = 0
1383 dbg['delta_try_count'] = 0
1382 dbg['type'] = b"full"
1384 dbg['type'] = b"full"
1383 if snapshotdepth is None:
1385 if snapshotdepth is None:
1384 dbg['snapshot-depth'] = 0
1386 dbg['snapshot-depth'] = 0
1385 else:
1387 else:
1386 dbg['snapshot-depth'] = snapshotdepth
1388 dbg['snapshot-depth'] = snapshotdepth
1387 self._dbg_process_data(dbg)
1389 self._dbg_process_data(dbg)
1388 return deltainfo
1390 return deltainfo
1389
1391
1390 # count the number of different delta we tried (for debug purpose)
1392 # count the number of different delta we tried (for debug purpose)
1391 dbg_try_count = 0
1393 dbg_try_count = 0
1392 # count the number of "search round" we did. (for debug purpose)
1394 # count the number of "search round" we did. (for debug purpose)
1393 dbg_try_rounds = 0
1395 dbg_try_rounds = 0
1394 dbg_type = b'unknown'
1396 dbg_type = b'unknown'
1395
1397
1396 if p1r is None:
1398 if p1r is None:
1397 p1r = revlog.rev(revinfo.p1)
1399 p1r = revlog.rev(revinfo.p1)
1398 p2r = revlog.rev(revinfo.p2)
1400 p2r = revlog.rev(revinfo.p2)
1399
1401
1400 if self._debug_search:
1402 if self._debug_search:
1401 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1403 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1402 msg %= target_rev
1404 msg %= target_rev
1403 self._write_debug(msg)
1405 self._write_debug(msg)
1404
1406
1405 groups = _candidategroups(
1407 groups = _candidategroups(
1406 self.revlog,
1408 self.revlog,
1407 revinfo.textlen,
1409 revinfo.textlen,
1408 p1r,
1410 p1r,
1409 p2r,
1411 p2r,
1410 cachedelta,
1412 cachedelta,
1411 excluded_bases,
1413 excluded_bases,
1412 target_rev,
1414 target_rev,
1413 snapshot_cache=self._snapshot_cache,
1415 snapshot_cache=self._snapshot_cache,
1414 )
1416 )
1415 candidaterevs = next(groups)
1417 candidaterevs = next(groups)
1416 while candidaterevs is not None:
1418 while candidaterevs is not None:
1417 dbg_try_rounds += 1
1419 dbg_try_rounds += 1
1418 if self._debug_search:
1420 if self._debug_search:
1419 prev = None
1421 prev = None
1420 if deltainfo is not None:
1422 if deltainfo is not None:
1421 prev = deltainfo.base
1423 prev = deltainfo.base
1422
1424
1423 if (
1425 if (
1424 cachedelta is not None
1426 cachedelta is not None
1425 and len(candidaterevs) == 1
1427 and len(candidaterevs) == 1
1426 and cachedelta[0] in candidaterevs
1428 and cachedelta[0] in candidaterevs
1427 ):
1429 ):
1428 round_type = b"cached-delta"
1430 round_type = b"cached-delta"
1429 elif p1r in candidaterevs or p2r in candidaterevs:
1431 elif p1r in candidaterevs or p2r in candidaterevs:
1430 round_type = b"parents"
1432 round_type = b"parents"
1431 elif prev is not None and all(c < prev for c in candidaterevs):
1433 elif prev is not None and all(c < prev for c in candidaterevs):
1432 round_type = b"refine-down"
1434 round_type = b"refine-down"
1433 elif prev is not None and all(c > prev for c in candidaterevs):
1435 elif prev is not None and all(c > prev for c in candidaterevs):
1434 round_type = b"refine-up"
1436 round_type = b"refine-up"
1435 else:
1437 else:
1436 round_type = b"search-down"
1438 round_type = b"search-down"
1437 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1439 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1438 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1440 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1439 self._write_debug(msg)
1441 self._write_debug(msg)
1440 nominateddeltas = []
1442 nominateddeltas = []
1441 if deltainfo is not None:
1443 if deltainfo is not None:
1442 if self._debug_search:
1444 if self._debug_search:
1443 msg = (
1445 msg = (
1444 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1446 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1445 )
1447 )
1446 msg %= (deltainfo.base, deltainfo.deltalen)
1448 msg %= (deltainfo.base, deltainfo.deltalen)
1447 self._write_debug(msg)
1449 self._write_debug(msg)
1448 # if we already found a good delta,
1450 # if we already found a good delta,
1449 # challenge it against refined candidates
1451 # challenge it against refined candidates
1450 nominateddeltas.append(deltainfo)
1452 nominateddeltas.append(deltainfo)
1451 for candidaterev in candidaterevs:
1453 for candidaterev in candidaterevs:
1452 if self._debug_search:
1454 if self._debug_search:
1453 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1455 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1454 msg %= candidaterev
1456 msg %= candidaterev
1455 self._write_debug(msg)
1457 self._write_debug(msg)
1456 candidate_type = None
1458 candidate_type = None
1457 if candidaterev == p1r:
1459 if candidaterev == p1r:
1458 candidate_type = b"p1"
1460 candidate_type = b"p1"
1459 elif candidaterev == p2r:
1461 elif candidaterev == p2r:
1460 candidate_type = b"p2"
1462 candidate_type = b"p2"
1461 elif self.revlog.issnapshot(candidaterev):
1463 elif self.revlog.issnapshot(candidaterev):
1462 candidate_type = b"snapshot-%d"
1464 candidate_type = b"snapshot-%d"
1463 candidate_type %= self.revlog.snapshotdepth(
1465 candidate_type %= self.revlog.snapshotdepth(
1464 candidaterev
1466 candidaterev
1465 )
1467 )
1466
1468
1467 if candidate_type is not None:
1469 if candidate_type is not None:
1468 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1470 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1469 msg %= candidate_type
1471 msg %= candidate_type
1470 self._write_debug(msg)
1472 self._write_debug(msg)
1471 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1473 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1472 msg %= self.revlog.length(candidaterev)
1474 msg %= self.revlog.length(candidaterev)
1473 self._write_debug(msg)
1475 self._write_debug(msg)
1474 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1476 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1475 msg %= self.revlog.deltaparent(candidaterev)
1477 msg %= self.revlog.deltaparent(candidaterev)
1476 self._write_debug(msg)
1478 self._write_debug(msg)
1477
1479
1478 dbg_try_count += 1
1480 dbg_try_count += 1
1479
1481
1480 if self._debug_search:
1482 if self._debug_search:
1481 delta_start = util.timer()
1483 delta_start = util.timer()
1482 candidatedelta = self._builddeltainfo(
1484 candidatedelta = self._builddeltainfo(
1483 revinfo,
1485 revinfo,
1484 candidaterev,
1486 candidaterev,
1485 target_rev=target_rev,
1487 target_rev=target_rev,
1486 )
1488 )
1487 if self._debug_search:
1489 if self._debug_search:
1488 delta_end = util.timer()
1490 delta_end = util.timer()
1489 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1491 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1490 msg %= delta_end - delta_start
1492 msg %= delta_end - delta_start
1491 self._write_debug(msg)
1493 self._write_debug(msg)
1492 if candidatedelta is not None:
1494 if candidatedelta is not None:
1493 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1495 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1494 if self._debug_search:
1496 if self._debug_search:
1495 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1497 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1496 msg %= candidatedelta.deltalen
1498 msg %= candidatedelta.deltalen
1497 self._write_debug(msg)
1499 self._write_debug(msg)
1498 nominateddeltas.append(candidatedelta)
1500 nominateddeltas.append(candidatedelta)
1499 elif self._debug_search:
1501 elif self._debug_search:
1500 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1502 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1501 msg %= candidatedelta.deltalen
1503 msg %= candidatedelta.deltalen
1502 self._write_debug(msg)
1504 self._write_debug(msg)
1503 elif self._debug_search:
1505 elif self._debug_search:
1504 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1506 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1505 self._write_debug(msg)
1507 self._write_debug(msg)
1506 if nominateddeltas:
1508 if nominateddeltas:
1507 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1509 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1508 if deltainfo is not None:
1510 if deltainfo is not None:
1509 candidaterevs = groups.send(deltainfo.base)
1511 candidaterevs = groups.send(deltainfo.base)
1510 else:
1512 else:
1511 candidaterevs = next(groups)
1513 candidaterevs = next(groups)
1512
1514
1513 if deltainfo is None:
1515 if deltainfo is None:
1514 dbg_type = b"full"
1516 dbg_type = b"full"
1515 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1517 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1516 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1518 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1517 dbg_type = b"snapshot"
1519 dbg_type = b"snapshot"
1518 else:
1520 else:
1519 dbg_type = b"delta"
1521 dbg_type = b"delta"
1520
1522
1521 if gather_debug:
1523 if gather_debug:
1522 end = util.timer()
1524 end = util.timer()
1523 if dbg_type == b'full':
1525 if dbg_type == b'full':
1524 used_cached = (
1526 used_cached = (
1525 cachedelta is not None
1527 cachedelta is not None
1526 and dbg_try_rounds == 0
1528 and dbg_try_rounds == 0
1527 and dbg_try_count == 0
1529 and dbg_try_count == 0
1528 and cachedelta[0] == nullrev
1530 and cachedelta[0] == nullrev
1529 )
1531 )
1530 else:
1532 else:
1531 used_cached = (
1533 used_cached = (
1532 cachedelta is not None
1534 cachedelta is not None
1533 and dbg_try_rounds == 1
1535 and dbg_try_rounds == 1
1534 and dbg_try_count == 1
1536 and dbg_try_count == 1
1535 and deltainfo.base == cachedelta[0]
1537 and deltainfo.base == cachedelta[0]
1536 )
1538 )
1537 dbg['duration'] = end - start
1539 dbg['duration'] = end - start
1538 dbg[
1540 dbg[
1539 'delta-base'
1541 'delta-base'
1540 ] = deltainfo.base # pytype: disable=attribute-error
1542 ] = deltainfo.base # pytype: disable=attribute-error
1541 dbg['search_round_count'] = dbg_try_rounds
1543 dbg['search_round_count'] = dbg_try_rounds
1542 dbg['using-cached-base'] = used_cached
1544 dbg['using-cached-base'] = used_cached
1543 dbg['delta_try_count'] = dbg_try_count
1545 dbg['delta_try_count'] = dbg_try_count
1544 dbg['type'] = dbg_type
1546 dbg['type'] = dbg_type
1545 if (
1547 if (
1546 deltainfo.snapshotdepth # pytype: disable=attribute-error
1548 deltainfo.snapshotdepth # pytype: disable=attribute-error
1547 is not None
1549 is not None
1548 ):
1550 ):
1549 dbg[
1551 dbg[
1550 'snapshot-depth'
1552 'snapshot-depth'
1551 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1553 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1552 else:
1554 else:
1553 dbg['snapshot-depth'] = 0
1555 dbg['snapshot-depth'] = 0
1554 self._dbg_process_data(dbg)
1556 self._dbg_process_data(dbg)
1555 return deltainfo
1557 return deltainfo
1556
1558
1557 def _one_dbg_data(self):
1559 def _one_dbg_data(self):
1558 return {
1560 return {
1559 'duration': None,
1561 'duration': None,
1560 'revision': None,
1562 'revision': None,
1561 'delta-base': None,
1563 'delta-base': None,
1562 'search_round_count': None,
1564 'search_round_count': None,
1563 'using-cached-base': None,
1565 'using-cached-base': None,
1564 'delta_try_count': None,
1566 'delta_try_count': None,
1565 'type': None,
1567 'type': None,
1566 'p1-chain-len': None,
1568 'p1-chain-len': None,
1567 'p2-chain-len': None,
1569 'p2-chain-len': None,
1568 'snapshot-depth': None,
1570 'snapshot-depth': None,
1569 'target-revlog': None,
1571 'target-revlog': None,
1570 }
1572 }
1571
1573
1572 def _dbg_process_data(self, dbg):
1574 def _dbg_process_data(self, dbg):
1573 if self._debug_info is not None:
1575 if self._debug_info is not None:
1574 self._debug_info.append(dbg)
1576 self._debug_info.append(dbg)
1575
1577
1576 if self._write_debug is not None:
1578 if self._write_debug is not None:
1577 msg = (
1579 msg = (
1578 b"DBG-DELTAS:"
1580 b"DBG-DELTAS:"
1579 b" %-12s"
1581 b" %-12s"
1580 b" rev=%d:"
1582 b" rev=%d:"
1581 b" delta-base=%d"
1583 b" delta-base=%d"
1582 b" is-cached=%d"
1584 b" is-cached=%d"
1583 b" - search-rounds=%d"
1585 b" - search-rounds=%d"
1584 b" try-count=%d"
1586 b" try-count=%d"
1585 b" - delta-type=%-6s"
1587 b" - delta-type=%-6s"
1586 b" snap-depth=%d"
1588 b" snap-depth=%d"
1587 b" - p1-chain-length=%d"
1589 b" - p1-chain-length=%d"
1588 b" p2-chain-length=%d"
1590 b" p2-chain-length=%d"
1589 b" - duration=%f"
1591 b" - duration=%f"
1590 b"\n"
1592 b"\n"
1591 )
1593 )
1592 msg %= (
1594 msg %= (
1593 dbg["target-revlog"],
1595 dbg["target-revlog"],
1594 dbg["revision"],
1596 dbg["revision"],
1595 dbg["delta-base"],
1597 dbg["delta-base"],
1596 dbg["using-cached-base"],
1598 dbg["using-cached-base"],
1597 dbg["search_round_count"],
1599 dbg["search_round_count"],
1598 dbg["delta_try_count"],
1600 dbg["delta_try_count"],
1599 dbg["type"],
1601 dbg["type"],
1600 dbg["snapshot-depth"],
1602 dbg["snapshot-depth"],
1601 dbg["p1-chain-len"],
1603 dbg["p1-chain-len"],
1602 dbg["p2-chain-len"],
1604 dbg["p2-chain-len"],
1603 dbg["duration"],
1605 dbg["duration"],
1604 )
1606 )
1605 self._write_debug(msg)
1607 self._write_debug(msg)
1606
1608
1607
1609
1608 def delta_compression(default_compression_header, deltainfo):
1610 def delta_compression(default_compression_header, deltainfo):
1609 """return (COMPRESSION_MODE, deltainfo)
1611 """return (COMPRESSION_MODE, deltainfo)
1610
1612
1611 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1613 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1612 compression.
1614 compression.
1613 """
1615 """
1614 h, d = deltainfo.data
1616 h, d = deltainfo.data
1615 compression_mode = COMP_MODE_INLINE
1617 compression_mode = COMP_MODE_INLINE
1616 if not h and not d:
1618 if not h and not d:
1617 # not data to store at all... declare them uncompressed
1619 # not data to store at all... declare them uncompressed
1618 compression_mode = COMP_MODE_PLAIN
1620 compression_mode = COMP_MODE_PLAIN
1619 elif not h:
1621 elif not h:
1620 t = d[0:1]
1622 t = d[0:1]
1621 if t == b'\0':
1623 if t == b'\0':
1622 compression_mode = COMP_MODE_PLAIN
1624 compression_mode = COMP_MODE_PLAIN
1623 elif t == default_compression_header:
1625 elif t == default_compression_header:
1624 compression_mode = COMP_MODE_DEFAULT
1626 compression_mode = COMP_MODE_DEFAULT
1625 elif h == b'u':
1627 elif h == b'u':
1626 # we have a more efficient way to declare uncompressed
1628 # we have a more efficient way to declare uncompressed
1627 h = b''
1629 h = b''
1628 compression_mode = COMP_MODE_PLAIN
1630 compression_mode = COMP_MODE_PLAIN
1629 deltainfo = drop_u_compression(deltainfo)
1631 deltainfo = drop_u_compression(deltainfo)
1630 return compression_mode, deltainfo
1632 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now