##// END OF EJS Templates
delta-computer: stop explicitly taking file handle...
marmoute -
r51913:509f0f7f default
parent child Browse files
Show More
@@ -1,3556 +1,3551 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of inline data embedded into a revlog
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class revlog:
244 class revlog:
245 """
245 """
246 the underlying revision storage object
246 the underlying revision storage object
247
247
248 A revlog consists of two parts, an index and the revision data.
248 A revlog consists of two parts, an index and the revision data.
249
249
250 The index is a file with a fixed record size containing
250 The index is a file with a fixed record size containing
251 information on each revision, including its nodeid (hash), the
251 information on each revision, including its nodeid (hash), the
252 nodeids of its parents, the position and offset of its data within
252 nodeids of its parents, the position and offset of its data within
253 the data file, and the revision it's based on. Finally, each entry
253 the data file, and the revision it's based on. Finally, each entry
254 contains a linkrev entry that can serve as a pointer to external
254 contains a linkrev entry that can serve as a pointer to external
255 data.
255 data.
256
256
257 The revision data itself is a linear collection of data chunks.
257 The revision data itself is a linear collection of data chunks.
258 Each chunk represents a revision and is usually represented as a
258 Each chunk represents a revision and is usually represented as a
259 delta against the previous chunk. To bound lookup time, runs of
259 delta against the previous chunk. To bound lookup time, runs of
260 deltas are limited to about 2 times the length of the original
260 deltas are limited to about 2 times the length of the original
261 version data. This makes retrieval of a version proportional to
261 version data. This makes retrieval of a version proportional to
262 its size, or O(1) relative to the number of revisions.
262 its size, or O(1) relative to the number of revisions.
263
263
264 Both pieces of the revlog are written to in an append-only
264 Both pieces of the revlog are written to in an append-only
265 fashion, which means we never need to rewrite a file to insert or
265 fashion, which means we never need to rewrite a file to insert or
266 remove data, and can use some simple techniques to avoid the need
266 remove data, and can use some simple techniques to avoid the need
267 for locking while reading.
267 for locking while reading.
268
268
269 If checkambig, indexfile is opened with checkambig=True at
269 If checkambig, indexfile is opened with checkambig=True at
270 writing, to avoid file stat ambiguity.
270 writing, to avoid file stat ambiguity.
271
271
272 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 index will be mmapped rather than read if it is larger than the
273 index will be mmapped rather than read if it is larger than the
274 configured threshold.
274 configured threshold.
275
275
276 If censorable is True, the revlog can have censored revisions.
276 If censorable is True, the revlog can have censored revisions.
277
277
278 If `upperboundcomp` is not None, this is the expected maximal gain from
278 If `upperboundcomp` is not None, this is the expected maximal gain from
279 compression for the data content.
279 compression for the data content.
280
280
281 `concurrencychecker` is an optional function that receives 3 arguments: a
281 `concurrencychecker` is an optional function that receives 3 arguments: a
282 file handle, a filename, and an expected position. It should check whether
282 file handle, a filename, and an expected position. It should check whether
283 the current position in the file handle is valid, and log/warn/fail (by
283 the current position in the file handle is valid, and log/warn/fail (by
284 raising).
284 raising).
285
285
286 See mercurial/revlogutils/contants.py for details about the content of an
286 See mercurial/revlogutils/contants.py for details about the content of an
287 index entry.
287 index entry.
288 """
288 """
289
289
290 _flagserrorclass = error.RevlogError
290 _flagserrorclass = error.RevlogError
291
291
292 @staticmethod
292 @staticmethod
293 def is_inline_index(header_bytes):
293 def is_inline_index(header_bytes):
294 """Determine if a revlog is inline from the initial bytes of the index"""
294 """Determine if a revlog is inline from the initial bytes of the index"""
295 header = INDEX_HEADER.unpack(header_bytes)[0]
295 header = INDEX_HEADER.unpack(header_bytes)[0]
296
296
297 _format_flags = header & ~0xFFFF
297 _format_flags = header & ~0xFFFF
298 _format_version = header & 0xFFFF
298 _format_version = header & 0xFFFF
299
299
300 features = FEATURES_BY_VERSION[_format_version]
300 features = FEATURES_BY_VERSION[_format_version]
301 return features[b'inline'](_format_flags)
301 return features[b'inline'](_format_flags)
302
302
303 def __init__(
303 def __init__(
304 self,
304 self,
305 opener,
305 opener,
306 target,
306 target,
307 radix,
307 radix,
308 postfix=None, # only exist for `tmpcensored` now
308 postfix=None, # only exist for `tmpcensored` now
309 checkambig=False,
309 checkambig=False,
310 mmaplargeindex=False,
310 mmaplargeindex=False,
311 censorable=False,
311 censorable=False,
312 upperboundcomp=None,
312 upperboundcomp=None,
313 persistentnodemap=False,
313 persistentnodemap=False,
314 concurrencychecker=None,
314 concurrencychecker=None,
315 trypending=False,
315 trypending=False,
316 try_split=False,
316 try_split=False,
317 canonical_parent_order=True,
317 canonical_parent_order=True,
318 ):
318 ):
319 """
319 """
320 create a revlog object
320 create a revlog object
321
321
322 opener is a function that abstracts the file opening operation
322 opener is a function that abstracts the file opening operation
323 and can be used to implement COW semantics or the like.
323 and can be used to implement COW semantics or the like.
324
324
325 `target`: a (KIND, ID) tuple that identify the content stored in
325 `target`: a (KIND, ID) tuple that identify the content stored in
326 this revlog. It help the rest of the code to understand what the revlog
326 this revlog. It help the rest of the code to understand what the revlog
327 is about without having to resort to heuristic and index filename
327 is about without having to resort to heuristic and index filename
328 analysis. Note: that this must be reliably be set by normal code, but
328 analysis. Note: that this must be reliably be set by normal code, but
329 that test, debug, or performance measurement code might not set this to
329 that test, debug, or performance measurement code might not set this to
330 accurate value.
330 accurate value.
331 """
331 """
332 self.upperboundcomp = upperboundcomp
332 self.upperboundcomp = upperboundcomp
333
333
334 self.radix = radix
334 self.radix = radix
335
335
336 self._docket_file = None
336 self._docket_file = None
337 self._indexfile = None
337 self._indexfile = None
338 self._datafile = None
338 self._datafile = None
339 self._sidedatafile = None
339 self._sidedatafile = None
340 self._nodemap_file = None
340 self._nodemap_file = None
341 self.postfix = postfix
341 self.postfix = postfix
342 self._trypending = trypending
342 self._trypending = trypending
343 self._try_split = try_split
343 self._try_split = try_split
344 self.opener = opener
344 self.opener = opener
345 if persistentnodemap:
345 if persistentnodemap:
346 self._nodemap_file = nodemaputil.get_nodemap_file(self)
346 self._nodemap_file = nodemaputil.get_nodemap_file(self)
347
347
348 assert target[0] in ALL_KINDS
348 assert target[0] in ALL_KINDS
349 assert len(target) == 2
349 assert len(target) == 2
350 self.target = target
350 self.target = target
351 # When True, indexfile is opened with checkambig=True at writing, to
351 # When True, indexfile is opened with checkambig=True at writing, to
352 # avoid file stat ambiguity.
352 # avoid file stat ambiguity.
353 self._checkambig = checkambig
353 self._checkambig = checkambig
354 self._mmaplargeindex = mmaplargeindex
354 self._mmaplargeindex = mmaplargeindex
355 self._censorable = censorable
355 self._censorable = censorable
356 # 3-tuple of (node, rev, text) for a raw revision.
356 # 3-tuple of (node, rev, text) for a raw revision.
357 self._revisioncache = None
357 self._revisioncache = None
358 # Maps rev to chain base rev.
358 # Maps rev to chain base rev.
359 self._chainbasecache = util.lrucachedict(100)
359 self._chainbasecache = util.lrucachedict(100)
360 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
360 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 self._chunkcache = (0, b'')
361 self._chunkcache = (0, b'')
362 # How much data to read and cache into the raw revlog data cache.
362 # How much data to read and cache into the raw revlog data cache.
363 self._chunkcachesize = 65536
363 self._chunkcachesize = 65536
364 self._maxchainlen = None
364 self._maxchainlen = None
365 self._deltabothparents = True
365 self._deltabothparents = True
366 self._candidate_group_chunk_size = 0
366 self._candidate_group_chunk_size = 0
367 self._debug_delta = False
367 self._debug_delta = False
368 self.index = None
368 self.index = None
369 self._docket = None
369 self._docket = None
370 self._nodemap_docket = None
370 self._nodemap_docket = None
371 # Mapping of partial identifiers to full nodes.
371 # Mapping of partial identifiers to full nodes.
372 self._pcache = {}
372 self._pcache = {}
373 # Mapping of revision integer to full node.
373 # Mapping of revision integer to full node.
374 self._compengine = b'zlib'
374 self._compengine = b'zlib'
375 self._compengineopts = {}
375 self._compengineopts = {}
376 self._maxdeltachainspan = -1
376 self._maxdeltachainspan = -1
377 self._withsparseread = False
377 self._withsparseread = False
378 self._sparserevlog = False
378 self._sparserevlog = False
379 self.hassidedata = False
379 self.hassidedata = False
380 self._srdensitythreshold = 0.50
380 self._srdensitythreshold = 0.50
381 self._srmingapsize = 262144
381 self._srmingapsize = 262144
382
382
383 # other optionnals features
383 # other optionnals features
384
384
385 # might remove rank configuration once the computation has no impact
385 # might remove rank configuration once the computation has no impact
386 self._compute_rank = False
386 self._compute_rank = False
387
387
388 # Make copy of flag processors so each revlog instance can support
388 # Make copy of flag processors so each revlog instance can support
389 # custom flags.
389 # custom flags.
390 self._flagprocessors = dict(flagutil.flagprocessors)
390 self._flagprocessors = dict(flagutil.flagprocessors)
391
391
392 # 3-tuple of file handles being used for active writing.
392 # 3-tuple of file handles being used for active writing.
393 self._writinghandles = None
393 self._writinghandles = None
394 # prevent nesting of addgroup
394 # prevent nesting of addgroup
395 self._adding_group = None
395 self._adding_group = None
396
396
397 self._loadindex()
397 self._loadindex()
398
398
399 self._concurrencychecker = concurrencychecker
399 self._concurrencychecker = concurrencychecker
400
400
401 # parent order is supposed to be semantically irrelevant, so we
401 # parent order is supposed to be semantically irrelevant, so we
402 # normally resort parents to ensure that the first parent is non-null,
402 # normally resort parents to ensure that the first parent is non-null,
403 # if there is a non-null parent at all.
403 # if there is a non-null parent at all.
404 # filelog abuses the parent order as flag to mark some instances of
404 # filelog abuses the parent order as flag to mark some instances of
405 # meta-encoded files, so allow it to disable this behavior.
405 # meta-encoded files, so allow it to disable this behavior.
406 self.canonical_parent_order = canonical_parent_order
406 self.canonical_parent_order = canonical_parent_order
407
407
408 def _init_opts(self):
408 def _init_opts(self):
409 """process options (from above/config) to setup associated default revlog mode
409 """process options (from above/config) to setup associated default revlog mode
410
410
411 These values might be affected when actually reading on disk information.
411 These values might be affected when actually reading on disk information.
412
412
413 The relevant values are returned for use in _loadindex().
413 The relevant values are returned for use in _loadindex().
414
414
415 * newversionflags:
415 * newversionflags:
416 version header to use if we need to create a new revlog
416 version header to use if we need to create a new revlog
417
417
418 * mmapindexthreshold:
418 * mmapindexthreshold:
419 minimal index size for start to use mmap
419 minimal index size for start to use mmap
420
420
421 * force_nodemap:
421 * force_nodemap:
422 force the usage of a "development" version of the nodemap code
422 force the usage of a "development" version of the nodemap code
423 """
423 """
424 mmapindexthreshold = None
424 mmapindexthreshold = None
425 opts = self.opener.options
425 opts = self.opener.options
426
426
427 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
427 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
428 new_header = CHANGELOGV2
428 new_header = CHANGELOGV2
429 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
429 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
430 elif b'revlogv2' in opts:
430 elif b'revlogv2' in opts:
431 new_header = REVLOGV2
431 new_header = REVLOGV2
432 elif b'revlogv1' in opts:
432 elif b'revlogv1' in opts:
433 new_header = REVLOGV1 | FLAG_INLINE_DATA
433 new_header = REVLOGV1 | FLAG_INLINE_DATA
434 if b'generaldelta' in opts:
434 if b'generaldelta' in opts:
435 new_header |= FLAG_GENERALDELTA
435 new_header |= FLAG_GENERALDELTA
436 elif b'revlogv0' in self.opener.options:
436 elif b'revlogv0' in self.opener.options:
437 new_header = REVLOGV0
437 new_header = REVLOGV0
438 else:
438 else:
439 new_header = REVLOG_DEFAULT_VERSION
439 new_header = REVLOG_DEFAULT_VERSION
440
440
441 if b'chunkcachesize' in opts:
441 if b'chunkcachesize' in opts:
442 self._chunkcachesize = opts[b'chunkcachesize']
442 self._chunkcachesize = opts[b'chunkcachesize']
443 if b'maxchainlen' in opts:
443 if b'maxchainlen' in opts:
444 self._maxchainlen = opts[b'maxchainlen']
444 self._maxchainlen = opts[b'maxchainlen']
445 if b'deltabothparents' in opts:
445 if b'deltabothparents' in opts:
446 self._deltabothparents = opts[b'deltabothparents']
446 self._deltabothparents = opts[b'deltabothparents']
447 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
447 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
448 if dps_cgds:
448 if dps_cgds:
449 self._candidate_group_chunk_size = dps_cgds
449 self._candidate_group_chunk_size = dps_cgds
450 self._lazydelta = bool(opts.get(b'lazydelta', True))
450 self._lazydelta = bool(opts.get(b'lazydelta', True))
451 self._lazydeltabase = False
451 self._lazydeltabase = False
452 if self._lazydelta:
452 if self._lazydelta:
453 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
453 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
454 if b'debug-delta' in opts:
454 if b'debug-delta' in opts:
455 self._debug_delta = opts[b'debug-delta']
455 self._debug_delta = opts[b'debug-delta']
456 if b'compengine' in opts:
456 if b'compengine' in opts:
457 self._compengine = opts[b'compengine']
457 self._compengine = opts[b'compengine']
458 if b'zlib.level' in opts:
458 if b'zlib.level' in opts:
459 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
459 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
460 if b'zstd.level' in opts:
460 if b'zstd.level' in opts:
461 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
461 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
462 if b'maxdeltachainspan' in opts:
462 if b'maxdeltachainspan' in opts:
463 self._maxdeltachainspan = opts[b'maxdeltachainspan']
463 self._maxdeltachainspan = opts[b'maxdeltachainspan']
464 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
464 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
465 mmapindexthreshold = opts[b'mmapindexthreshold']
465 mmapindexthreshold = opts[b'mmapindexthreshold']
466 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
466 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
467 withsparseread = bool(opts.get(b'with-sparse-read', False))
467 withsparseread = bool(opts.get(b'with-sparse-read', False))
468 # sparse-revlog forces sparse-read
468 # sparse-revlog forces sparse-read
469 self._withsparseread = self._sparserevlog or withsparseread
469 self._withsparseread = self._sparserevlog or withsparseread
470 if b'sparse-read-density-threshold' in opts:
470 if b'sparse-read-density-threshold' in opts:
471 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
471 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
472 if b'sparse-read-min-gap-size' in opts:
472 if b'sparse-read-min-gap-size' in opts:
473 self._srmingapsize = opts[b'sparse-read-min-gap-size']
473 self._srmingapsize = opts[b'sparse-read-min-gap-size']
474 if opts.get(b'enableellipsis'):
474 if opts.get(b'enableellipsis'):
475 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
475 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
476
476
477 # revlog v0 doesn't have flag processors
477 # revlog v0 doesn't have flag processors
478 for flag, processor in opts.get(b'flagprocessors', {}).items():
478 for flag, processor in opts.get(b'flagprocessors', {}).items():
479 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
479 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
480
480
481 if self._chunkcachesize <= 0:
481 if self._chunkcachesize <= 0:
482 raise error.RevlogError(
482 raise error.RevlogError(
483 _(b'revlog chunk cache size %r is not greater than 0')
483 _(b'revlog chunk cache size %r is not greater than 0')
484 % self._chunkcachesize
484 % self._chunkcachesize
485 )
485 )
486 elif self._chunkcachesize & (self._chunkcachesize - 1):
486 elif self._chunkcachesize & (self._chunkcachesize - 1):
487 raise error.RevlogError(
487 raise error.RevlogError(
488 _(b'revlog chunk cache size %r is not a power of 2')
488 _(b'revlog chunk cache size %r is not a power of 2')
489 % self._chunkcachesize
489 % self._chunkcachesize
490 )
490 )
491 force_nodemap = opts.get(b'devel-force-nodemap', False)
491 force_nodemap = opts.get(b'devel-force-nodemap', False)
492 return new_header, mmapindexthreshold, force_nodemap
492 return new_header, mmapindexthreshold, force_nodemap
493
493
494 def _get_data(self, filepath, mmap_threshold, size=None):
494 def _get_data(self, filepath, mmap_threshold, size=None):
495 """return a file content with or without mmap
495 """return a file content with or without mmap
496
496
497 If the file is missing return the empty string"""
497 If the file is missing return the empty string"""
498 try:
498 try:
499 with self.opener(filepath) as fp:
499 with self.opener(filepath) as fp:
500 if mmap_threshold is not None:
500 if mmap_threshold is not None:
501 file_size = self.opener.fstat(fp).st_size
501 file_size = self.opener.fstat(fp).st_size
502 if file_size >= mmap_threshold:
502 if file_size >= mmap_threshold:
503 if size is not None:
503 if size is not None:
504 # avoid potentiel mmap crash
504 # avoid potentiel mmap crash
505 size = min(file_size, size)
505 size = min(file_size, size)
506 # TODO: should .close() to release resources without
506 # TODO: should .close() to release resources without
507 # relying on Python GC
507 # relying on Python GC
508 if size is None:
508 if size is None:
509 return util.buffer(util.mmapread(fp))
509 return util.buffer(util.mmapread(fp))
510 else:
510 else:
511 return util.buffer(util.mmapread(fp, size))
511 return util.buffer(util.mmapread(fp, size))
512 if size is None:
512 if size is None:
513 return fp.read()
513 return fp.read()
514 else:
514 else:
515 return fp.read(size)
515 return fp.read(size)
516 except FileNotFoundError:
516 except FileNotFoundError:
517 return b''
517 return b''
518
518
519 def get_streams(self, max_linkrev, force_inline=False):
519 def get_streams(self, max_linkrev, force_inline=False):
520 """return a list of streams that represent this revlog
520 """return a list of streams that represent this revlog
521
521
522 This is used by stream-clone to do bytes to bytes copies of a repository.
522 This is used by stream-clone to do bytes to bytes copies of a repository.
523
523
524 This streams data for all revisions that refer to a changelog revision up
524 This streams data for all revisions that refer to a changelog revision up
525 to `max_linkrev`.
525 to `max_linkrev`.
526
526
527 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
527 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
528
528
529 It returns is a list of three-tuple:
529 It returns is a list of three-tuple:
530
530
531 [
531 [
532 (filename, bytes_stream, stream_size),
532 (filename, bytes_stream, stream_size),
533 …
533 …
534 ]
534 ]
535 """
535 """
536 n = len(self)
536 n = len(self)
537 index = self.index
537 index = self.index
538 while n > 0:
538 while n > 0:
539 linkrev = index[n - 1][4]
539 linkrev = index[n - 1][4]
540 if linkrev < max_linkrev:
540 if linkrev < max_linkrev:
541 break
541 break
542 # note: this loop will rarely go through multiple iterations, since
542 # note: this loop will rarely go through multiple iterations, since
543 # it only traverses commits created during the current streaming
543 # it only traverses commits created during the current streaming
544 # pull operation.
544 # pull operation.
545 #
545 #
546 # If this become a problem, using a binary search should cap the
546 # If this become a problem, using a binary search should cap the
547 # runtime of this.
547 # runtime of this.
548 n = n - 1
548 n = n - 1
549 if n == 0:
549 if n == 0:
550 # no data to send
550 # no data to send
551 return []
551 return []
552 index_size = n * index.entry_size
552 index_size = n * index.entry_size
553 data_size = self.end(n - 1)
553 data_size = self.end(n - 1)
554
554
555 # XXX we might have been split (or stripped) since the object
555 # XXX we might have been split (or stripped) since the object
556 # initialization, We need to close this race too, but having a way to
556 # initialization, We need to close this race too, but having a way to
557 # pre-open the file we feed to the revlog and never closing them before
557 # pre-open the file we feed to the revlog and never closing them before
558 # we are done streaming.
558 # we are done streaming.
559
559
560 if self._inline:
560 if self._inline:
561
561
562 def get_stream():
562 def get_stream():
563 with self._indexfp() as fp:
563 with self._indexfp() as fp:
564 yield None
564 yield None
565 size = index_size + data_size
565 size = index_size + data_size
566 if size <= 65536:
566 if size <= 65536:
567 yield fp.read(size)
567 yield fp.read(size)
568 else:
568 else:
569 yield from util.filechunkiter(fp, limit=size)
569 yield from util.filechunkiter(fp, limit=size)
570
570
571 inline_stream = get_stream()
571 inline_stream = get_stream()
572 next(inline_stream)
572 next(inline_stream)
573 return [
573 return [
574 (self._indexfile, inline_stream, index_size + data_size),
574 (self._indexfile, inline_stream, index_size + data_size),
575 ]
575 ]
576 elif force_inline:
576 elif force_inline:
577
577
578 def get_stream():
578 def get_stream():
579 with self.reading():
579 with self.reading():
580 yield None
580 yield None
581
581
582 for rev in range(n):
582 for rev in range(n):
583 idx = self.index.entry_binary(rev)
583 idx = self.index.entry_binary(rev)
584 if rev == 0 and self._docket is None:
584 if rev == 0 and self._docket is None:
585 # re-inject the inline flag
585 # re-inject the inline flag
586 header = self._format_flags
586 header = self._format_flags
587 header |= self._format_version
587 header |= self._format_version
588 header |= FLAG_INLINE_DATA
588 header |= FLAG_INLINE_DATA
589 header = self.index.pack_header(header)
589 header = self.index.pack_header(header)
590 idx = header + idx
590 idx = header + idx
591 yield idx
591 yield idx
592 yield self._getsegmentforrevs(rev, rev)[1]
592 yield self._getsegmentforrevs(rev, rev)[1]
593
593
594 inline_stream = get_stream()
594 inline_stream = get_stream()
595 next(inline_stream)
595 next(inline_stream)
596 return [
596 return [
597 (self._indexfile, inline_stream, index_size + data_size),
597 (self._indexfile, inline_stream, index_size + data_size),
598 ]
598 ]
599 else:
599 else:
600
600
601 def get_index_stream():
601 def get_index_stream():
602 with self._indexfp() as fp:
602 with self._indexfp() as fp:
603 yield None
603 yield None
604 if index_size <= 65536:
604 if index_size <= 65536:
605 yield fp.read(index_size)
605 yield fp.read(index_size)
606 else:
606 else:
607 yield from util.filechunkiter(fp, limit=index_size)
607 yield from util.filechunkiter(fp, limit=index_size)
608
608
609 def get_data_stream():
609 def get_data_stream():
610 with self._datafp() as fp:
610 with self._datafp() as fp:
611 yield None
611 yield None
612 if data_size <= 65536:
612 if data_size <= 65536:
613 yield fp.read(data_size)
613 yield fp.read(data_size)
614 else:
614 else:
615 yield from util.filechunkiter(fp, limit=data_size)
615 yield from util.filechunkiter(fp, limit=data_size)
616
616
617 index_stream = get_index_stream()
617 index_stream = get_index_stream()
618 next(index_stream)
618 next(index_stream)
619 data_stream = get_data_stream()
619 data_stream = get_data_stream()
620 next(data_stream)
620 next(data_stream)
621 return [
621 return [
622 (self._datafile, data_stream, data_size),
622 (self._datafile, data_stream, data_size),
623 (self._indexfile, index_stream, index_size),
623 (self._indexfile, index_stream, index_size),
624 ]
624 ]
625
625
626 def _loadindex(self, docket=None):
626 def _loadindex(self, docket=None):
627
627
628 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
628 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
629
629
630 if self.postfix is not None:
630 if self.postfix is not None:
631 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
631 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
632 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
632 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
633 entry_point = b'%s.i.a' % self.radix
633 entry_point = b'%s.i.a' % self.radix
634 elif self._try_split and self.opener.exists(self._split_index_file):
634 elif self._try_split and self.opener.exists(self._split_index_file):
635 entry_point = self._split_index_file
635 entry_point = self._split_index_file
636 else:
636 else:
637 entry_point = b'%s.i' % self.radix
637 entry_point = b'%s.i' % self.radix
638
638
639 if docket is not None:
639 if docket is not None:
640 self._docket = docket
640 self._docket = docket
641 self._docket_file = entry_point
641 self._docket_file = entry_point
642 else:
642 else:
643 self._initempty = True
643 self._initempty = True
644 entry_data = self._get_data(entry_point, mmapindexthreshold)
644 entry_data = self._get_data(entry_point, mmapindexthreshold)
645 if len(entry_data) > 0:
645 if len(entry_data) > 0:
646 header = INDEX_HEADER.unpack(entry_data[:4])[0]
646 header = INDEX_HEADER.unpack(entry_data[:4])[0]
647 self._initempty = False
647 self._initempty = False
648 else:
648 else:
649 header = new_header
649 header = new_header
650
650
651 self._format_flags = header & ~0xFFFF
651 self._format_flags = header & ~0xFFFF
652 self._format_version = header & 0xFFFF
652 self._format_version = header & 0xFFFF
653
653
654 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
654 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
655 if supported_flags is None:
655 if supported_flags is None:
656 msg = _(b'unknown version (%d) in revlog %s')
656 msg = _(b'unknown version (%d) in revlog %s')
657 msg %= (self._format_version, self.display_id)
657 msg %= (self._format_version, self.display_id)
658 raise error.RevlogError(msg)
658 raise error.RevlogError(msg)
659 elif self._format_flags & ~supported_flags:
659 elif self._format_flags & ~supported_flags:
660 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
660 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
661 display_flag = self._format_flags >> 16
661 display_flag = self._format_flags >> 16
662 msg %= (display_flag, self._format_version, self.display_id)
662 msg %= (display_flag, self._format_version, self.display_id)
663 raise error.RevlogError(msg)
663 raise error.RevlogError(msg)
664
664
665 features = FEATURES_BY_VERSION[self._format_version]
665 features = FEATURES_BY_VERSION[self._format_version]
666 self._inline = features[b'inline'](self._format_flags)
666 self._inline = features[b'inline'](self._format_flags)
667 self._generaldelta = features[b'generaldelta'](self._format_flags)
667 self._generaldelta = features[b'generaldelta'](self._format_flags)
668 self.hassidedata = features[b'sidedata']
668 self.hassidedata = features[b'sidedata']
669
669
670 if not features[b'docket']:
670 if not features[b'docket']:
671 self._indexfile = entry_point
671 self._indexfile = entry_point
672 index_data = entry_data
672 index_data = entry_data
673 else:
673 else:
674 self._docket_file = entry_point
674 self._docket_file = entry_point
675 if self._initempty:
675 if self._initempty:
676 self._docket = docketutil.default_docket(self, header)
676 self._docket = docketutil.default_docket(self, header)
677 else:
677 else:
678 self._docket = docketutil.parse_docket(
678 self._docket = docketutil.parse_docket(
679 self, entry_data, use_pending=self._trypending
679 self, entry_data, use_pending=self._trypending
680 )
680 )
681
681
682 if self._docket is not None:
682 if self._docket is not None:
683 self._indexfile = self._docket.index_filepath()
683 self._indexfile = self._docket.index_filepath()
684 index_data = b''
684 index_data = b''
685 index_size = self._docket.index_end
685 index_size = self._docket.index_end
686 if index_size > 0:
686 if index_size > 0:
687 index_data = self._get_data(
687 index_data = self._get_data(
688 self._indexfile, mmapindexthreshold, size=index_size
688 self._indexfile, mmapindexthreshold, size=index_size
689 )
689 )
690 if len(index_data) < index_size:
690 if len(index_data) < index_size:
691 msg = _(b'too few index data for %s: got %d, expected %d')
691 msg = _(b'too few index data for %s: got %d, expected %d')
692 msg %= (self.display_id, len(index_data), index_size)
692 msg %= (self.display_id, len(index_data), index_size)
693 raise error.RevlogError(msg)
693 raise error.RevlogError(msg)
694
694
695 self._inline = False
695 self._inline = False
696 # generaldelta implied by version 2 revlogs.
696 # generaldelta implied by version 2 revlogs.
697 self._generaldelta = True
697 self._generaldelta = True
698 # the logic for persistent nodemap will be dealt with within the
698 # the logic for persistent nodemap will be dealt with within the
699 # main docket, so disable it for now.
699 # main docket, so disable it for now.
700 self._nodemap_file = None
700 self._nodemap_file = None
701
701
702 if self._docket is not None:
702 if self._docket is not None:
703 self._datafile = self._docket.data_filepath()
703 self._datafile = self._docket.data_filepath()
704 self._sidedatafile = self._docket.sidedata_filepath()
704 self._sidedatafile = self._docket.sidedata_filepath()
705 elif self.postfix is None:
705 elif self.postfix is None:
706 self._datafile = b'%s.d' % self.radix
706 self._datafile = b'%s.d' % self.radix
707 else:
707 else:
708 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
708 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
709
709
710 self.nodeconstants = sha1nodeconstants
710 self.nodeconstants = sha1nodeconstants
711 self.nullid = self.nodeconstants.nullid
711 self.nullid = self.nodeconstants.nullid
712
712
713 # sparse-revlog can't be on without general-delta (issue6056)
713 # sparse-revlog can't be on without general-delta (issue6056)
714 if not self._generaldelta:
714 if not self._generaldelta:
715 self._sparserevlog = False
715 self._sparserevlog = False
716
716
717 self._storedeltachains = True
717 self._storedeltachains = True
718
718
719 devel_nodemap = (
719 devel_nodemap = (
720 self._nodemap_file
720 self._nodemap_file
721 and force_nodemap
721 and force_nodemap
722 and parse_index_v1_nodemap is not None
722 and parse_index_v1_nodemap is not None
723 )
723 )
724
724
725 use_rust_index = False
725 use_rust_index = False
726 if rustrevlog is not None:
726 if rustrevlog is not None:
727 if self._nodemap_file is not None:
727 if self._nodemap_file is not None:
728 use_rust_index = True
728 use_rust_index = True
729 else:
729 else:
730 use_rust_index = self.opener.options.get(b'rust.index')
730 use_rust_index = self.opener.options.get(b'rust.index')
731
731
732 self._parse_index = parse_index_v1
732 self._parse_index = parse_index_v1
733 if self._format_version == REVLOGV0:
733 if self._format_version == REVLOGV0:
734 self._parse_index = revlogv0.parse_index_v0
734 self._parse_index = revlogv0.parse_index_v0
735 elif self._format_version == REVLOGV2:
735 elif self._format_version == REVLOGV2:
736 self._parse_index = parse_index_v2
736 self._parse_index = parse_index_v2
737 elif self._format_version == CHANGELOGV2:
737 elif self._format_version == CHANGELOGV2:
738 self._parse_index = parse_index_cl_v2
738 self._parse_index = parse_index_cl_v2
739 elif devel_nodemap:
739 elif devel_nodemap:
740 self._parse_index = parse_index_v1_nodemap
740 self._parse_index = parse_index_v1_nodemap
741 elif use_rust_index:
741 elif use_rust_index:
742 self._parse_index = parse_index_v1_mixed
742 self._parse_index = parse_index_v1_mixed
743 try:
743 try:
744 d = self._parse_index(index_data, self._inline)
744 d = self._parse_index(index_data, self._inline)
745 index, chunkcache = d
745 index, chunkcache = d
746 use_nodemap = (
746 use_nodemap = (
747 not self._inline
747 not self._inline
748 and self._nodemap_file is not None
748 and self._nodemap_file is not None
749 and hasattr(index, 'update_nodemap_data')
749 and hasattr(index, 'update_nodemap_data')
750 )
750 )
751 if use_nodemap:
751 if use_nodemap:
752 nodemap_data = nodemaputil.persisted_data(self)
752 nodemap_data = nodemaputil.persisted_data(self)
753 if nodemap_data is not None:
753 if nodemap_data is not None:
754 docket = nodemap_data[0]
754 docket = nodemap_data[0]
755 if (
755 if (
756 len(d[0]) > docket.tip_rev
756 len(d[0]) > docket.tip_rev
757 and d[0][docket.tip_rev][7] == docket.tip_node
757 and d[0][docket.tip_rev][7] == docket.tip_node
758 ):
758 ):
759 # no changelog tampering
759 # no changelog tampering
760 self._nodemap_docket = docket
760 self._nodemap_docket = docket
761 index.update_nodemap_data(*nodemap_data)
761 index.update_nodemap_data(*nodemap_data)
762 except (ValueError, IndexError):
762 except (ValueError, IndexError):
763 raise error.RevlogError(
763 raise error.RevlogError(
764 _(b"index %s is corrupted") % self.display_id
764 _(b"index %s is corrupted") % self.display_id
765 )
765 )
766 self.index = index
766 self.index = index
767 self._segmentfile = randomaccessfile.randomaccessfile(
767 self._segmentfile = randomaccessfile.randomaccessfile(
768 self.opener,
768 self.opener,
769 (self._indexfile if self._inline else self._datafile),
769 (self._indexfile if self._inline else self._datafile),
770 self._chunkcachesize,
770 self._chunkcachesize,
771 chunkcache,
771 chunkcache,
772 )
772 )
773 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
773 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
774 self.opener,
774 self.opener,
775 self._sidedatafile,
775 self._sidedatafile,
776 self._chunkcachesize,
776 self._chunkcachesize,
777 )
777 )
778 # revnum -> (chain-length, sum-delta-length)
778 # revnum -> (chain-length, sum-delta-length)
779 self._chaininfocache = util.lrucachedict(500)
779 self._chaininfocache = util.lrucachedict(500)
780 # revlog header -> revlog compressor
780 # revlog header -> revlog compressor
781 self._decompressors = {}
781 self._decompressors = {}
782
782
783 def get_revlog(self):
783 def get_revlog(self):
784 """simple function to mirror API of other not-really-revlog API"""
784 """simple function to mirror API of other not-really-revlog API"""
785 return self
785 return self
786
786
787 @util.propertycache
787 @util.propertycache
788 def revlog_kind(self):
788 def revlog_kind(self):
789 return self.target[0]
789 return self.target[0]
790
790
791 @util.propertycache
791 @util.propertycache
792 def display_id(self):
792 def display_id(self):
793 """The public facing "ID" of the revlog that we use in message"""
793 """The public facing "ID" of the revlog that we use in message"""
794 if self.revlog_kind == KIND_FILELOG:
794 if self.revlog_kind == KIND_FILELOG:
795 # Reference the file without the "data/" prefix, so it is familiar
795 # Reference the file without the "data/" prefix, so it is familiar
796 # to the user.
796 # to the user.
797 return self.target[1]
797 return self.target[1]
798 else:
798 else:
799 return self.radix
799 return self.radix
800
800
801 def _get_decompressor(self, t):
801 def _get_decompressor(self, t):
802 try:
802 try:
803 compressor = self._decompressors[t]
803 compressor = self._decompressors[t]
804 except KeyError:
804 except KeyError:
805 try:
805 try:
806 engine = util.compengines.forrevlogheader(t)
806 engine = util.compengines.forrevlogheader(t)
807 compressor = engine.revlogcompressor(self._compengineopts)
807 compressor = engine.revlogcompressor(self._compengineopts)
808 self._decompressors[t] = compressor
808 self._decompressors[t] = compressor
809 except KeyError:
809 except KeyError:
810 raise error.RevlogError(
810 raise error.RevlogError(
811 _(b'unknown compression type %s') % binascii.hexlify(t)
811 _(b'unknown compression type %s') % binascii.hexlify(t)
812 )
812 )
813 return compressor
813 return compressor
814
814
815 @util.propertycache
815 @util.propertycache
816 def _compressor(self):
816 def _compressor(self):
817 engine = util.compengines[self._compengine]
817 engine = util.compengines[self._compengine]
818 return engine.revlogcompressor(self._compengineopts)
818 return engine.revlogcompressor(self._compengineopts)
819
819
820 @util.propertycache
820 @util.propertycache
821 def _decompressor(self):
821 def _decompressor(self):
822 """the default decompressor"""
822 """the default decompressor"""
823 if self._docket is None:
823 if self._docket is None:
824 return None
824 return None
825 t = self._docket.default_compression_header
825 t = self._docket.default_compression_header
826 c = self._get_decompressor(t)
826 c = self._get_decompressor(t)
827 return c.decompress
827 return c.decompress
828
828
829 def _indexfp(self):
829 def _indexfp(self):
830 """file object for the revlog's index file"""
830 """file object for the revlog's index file"""
831 return self.opener(self._indexfile, mode=b"r")
831 return self.opener(self._indexfile, mode=b"r")
832
832
833 def __index_write_fp(self):
833 def __index_write_fp(self):
834 # You should not use this directly and use `_writing` instead
834 # You should not use this directly and use `_writing` instead
835 try:
835 try:
836 f = self.opener(
836 f = self.opener(
837 self._indexfile, mode=b"r+", checkambig=self._checkambig
837 self._indexfile, mode=b"r+", checkambig=self._checkambig
838 )
838 )
839 if self._docket is None:
839 if self._docket is None:
840 f.seek(0, os.SEEK_END)
840 f.seek(0, os.SEEK_END)
841 else:
841 else:
842 f.seek(self._docket.index_end, os.SEEK_SET)
842 f.seek(self._docket.index_end, os.SEEK_SET)
843 return f
843 return f
844 except FileNotFoundError:
844 except FileNotFoundError:
845 return self.opener(
845 return self.opener(
846 self._indexfile, mode=b"w+", checkambig=self._checkambig
846 self._indexfile, mode=b"w+", checkambig=self._checkambig
847 )
847 )
848
848
849 def __index_new_fp(self):
849 def __index_new_fp(self):
850 # You should not use this unless you are upgrading from inline revlog
850 # You should not use this unless you are upgrading from inline revlog
851 return self.opener(
851 return self.opener(
852 self._indexfile,
852 self._indexfile,
853 mode=b"w",
853 mode=b"w",
854 checkambig=self._checkambig,
854 checkambig=self._checkambig,
855 atomictemp=True,
855 atomictemp=True,
856 )
856 )
857
857
858 def _datafp(self, mode=b'r'):
858 def _datafp(self, mode=b'r'):
859 """file object for the revlog's data file"""
859 """file object for the revlog's data file"""
860 return self.opener(self._datafile, mode=mode)
860 return self.opener(self._datafile, mode=mode)
861
861
862 @contextlib.contextmanager
862 @contextlib.contextmanager
863 def _sidedatareadfp(self):
863 def _sidedatareadfp(self):
864 """file object suitable to read sidedata"""
864 """file object suitable to read sidedata"""
865 if self._writinghandles:
865 if self._writinghandles:
866 yield self._writinghandles[2]
866 yield self._writinghandles[2]
867 else:
867 else:
868 with self.opener(self._sidedatafile) as fp:
868 with self.opener(self._sidedatafile) as fp:
869 yield fp
869 yield fp
870
870
871 def tiprev(self):
871 def tiprev(self):
872 return len(self.index) - 1
872 return len(self.index) - 1
873
873
874 def tip(self):
874 def tip(self):
875 return self.node(self.tiprev())
875 return self.node(self.tiprev())
876
876
877 def __contains__(self, rev):
877 def __contains__(self, rev):
878 return 0 <= rev < len(self)
878 return 0 <= rev < len(self)
879
879
880 def __len__(self):
880 def __len__(self):
881 return len(self.index)
881 return len(self.index)
882
882
883 def __iter__(self):
883 def __iter__(self):
884 return iter(range(len(self)))
884 return iter(range(len(self)))
885
885
886 def revs(self, start=0, stop=None):
886 def revs(self, start=0, stop=None):
887 """iterate over all rev in this revlog (from start to stop)"""
887 """iterate over all rev in this revlog (from start to stop)"""
888 return storageutil.iterrevs(len(self), start=start, stop=stop)
888 return storageutil.iterrevs(len(self), start=start, stop=stop)
889
889
890 def hasnode(self, node):
890 def hasnode(self, node):
891 try:
891 try:
892 self.rev(node)
892 self.rev(node)
893 return True
893 return True
894 except KeyError:
894 except KeyError:
895 return False
895 return False
896
896
897 def _candelta(self, baserev, rev):
897 def _candelta(self, baserev, rev):
898 """whether two revisions (baserev, rev) can be delta-ed or not"""
898 """whether two revisions (baserev, rev) can be delta-ed or not"""
899 # Disable delta if either rev requires a content-changing flag
899 # Disable delta if either rev requires a content-changing flag
900 # processor (ex. LFS). This is because such flag processor can alter
900 # processor (ex. LFS). This is because such flag processor can alter
901 # the rawtext content that the delta will be based on, and two clients
901 # the rawtext content that the delta will be based on, and two clients
902 # could have a same revlog node with different flags (i.e. different
902 # could have a same revlog node with different flags (i.e. different
903 # rawtext contents) and the delta could be incompatible.
903 # rawtext contents) and the delta could be incompatible.
904 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
904 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
905 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
905 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
906 ):
906 ):
907 return False
907 return False
908 return True
908 return True
909
909
910 def update_caches(self, transaction):
910 def update_caches(self, transaction):
911 """update on disk cache
911 """update on disk cache
912
912
913 If a transaction is passed, the update may be delayed to transaction
913 If a transaction is passed, the update may be delayed to transaction
914 commit."""
914 commit."""
915 if self._nodemap_file is not None:
915 if self._nodemap_file is not None:
916 if transaction is None:
916 if transaction is None:
917 nodemaputil.update_persistent_nodemap(self)
917 nodemaputil.update_persistent_nodemap(self)
918 else:
918 else:
919 nodemaputil.setup_persistent_nodemap(transaction, self)
919 nodemaputil.setup_persistent_nodemap(transaction, self)
920
920
921 def clearcaches(self):
921 def clearcaches(self):
922 """Clear in-memory caches"""
922 """Clear in-memory caches"""
923 self._revisioncache = None
923 self._revisioncache = None
924 self._chainbasecache.clear()
924 self._chainbasecache.clear()
925 self._segmentfile.clear_cache()
925 self._segmentfile.clear_cache()
926 self._segmentfile_sidedata.clear_cache()
926 self._segmentfile_sidedata.clear_cache()
927 self._pcache = {}
927 self._pcache = {}
928 self._nodemap_docket = None
928 self._nodemap_docket = None
929 self.index.clearcaches()
929 self.index.clearcaches()
930 # The python code is the one responsible for validating the docket, we
930 # The python code is the one responsible for validating the docket, we
931 # end up having to refresh it here.
931 # end up having to refresh it here.
932 use_nodemap = (
932 use_nodemap = (
933 not self._inline
933 not self._inline
934 and self._nodemap_file is not None
934 and self._nodemap_file is not None
935 and hasattr(self.index, 'update_nodemap_data')
935 and hasattr(self.index, 'update_nodemap_data')
936 )
936 )
937 if use_nodemap:
937 if use_nodemap:
938 nodemap_data = nodemaputil.persisted_data(self)
938 nodemap_data = nodemaputil.persisted_data(self)
939 if nodemap_data is not None:
939 if nodemap_data is not None:
940 self._nodemap_docket = nodemap_data[0]
940 self._nodemap_docket = nodemap_data[0]
941 self.index.update_nodemap_data(*nodemap_data)
941 self.index.update_nodemap_data(*nodemap_data)
942
942
943 def rev(self, node):
943 def rev(self, node):
944 """return the revision number associated with a <nodeid>"""
944 """return the revision number associated with a <nodeid>"""
945 try:
945 try:
946 return self.index.rev(node)
946 return self.index.rev(node)
947 except TypeError:
947 except TypeError:
948 raise
948 raise
949 except error.RevlogError:
949 except error.RevlogError:
950 # parsers.c radix tree lookup failed
950 # parsers.c radix tree lookup failed
951 if (
951 if (
952 node == self.nodeconstants.wdirid
952 node == self.nodeconstants.wdirid
953 or node in self.nodeconstants.wdirfilenodeids
953 or node in self.nodeconstants.wdirfilenodeids
954 ):
954 ):
955 raise error.WdirUnsupported
955 raise error.WdirUnsupported
956 raise error.LookupError(node, self.display_id, _(b'no node'))
956 raise error.LookupError(node, self.display_id, _(b'no node'))
957
957
958 # Accessors for index entries.
958 # Accessors for index entries.
959
959
960 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
960 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
961 # are flags.
961 # are flags.
962 def start(self, rev):
962 def start(self, rev):
963 return int(self.index[rev][0] >> 16)
963 return int(self.index[rev][0] >> 16)
964
964
965 def sidedata_cut_off(self, rev):
965 def sidedata_cut_off(self, rev):
966 sd_cut_off = self.index[rev][8]
966 sd_cut_off = self.index[rev][8]
967 if sd_cut_off != 0:
967 if sd_cut_off != 0:
968 return sd_cut_off
968 return sd_cut_off
969 # This is some annoying dance, because entries without sidedata
969 # This is some annoying dance, because entries without sidedata
970 # currently use 0 as their ofsset. (instead of previous-offset +
970 # currently use 0 as their ofsset. (instead of previous-offset +
971 # previous-size)
971 # previous-size)
972 #
972 #
973 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
973 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
974 # In the meantime, we need this.
974 # In the meantime, we need this.
975 while 0 <= rev:
975 while 0 <= rev:
976 e = self.index[rev]
976 e = self.index[rev]
977 if e[9] != 0:
977 if e[9] != 0:
978 return e[8] + e[9]
978 return e[8] + e[9]
979 rev -= 1
979 rev -= 1
980 return 0
980 return 0
981
981
982 def flags(self, rev):
982 def flags(self, rev):
983 return self.index[rev][0] & 0xFFFF
983 return self.index[rev][0] & 0xFFFF
984
984
985 def length(self, rev):
985 def length(self, rev):
986 return self.index[rev][1]
986 return self.index[rev][1]
987
987
988 def sidedata_length(self, rev):
988 def sidedata_length(self, rev):
989 if not self.hassidedata:
989 if not self.hassidedata:
990 return 0
990 return 0
991 return self.index[rev][9]
991 return self.index[rev][9]
992
992
993 def rawsize(self, rev):
993 def rawsize(self, rev):
994 """return the length of the uncompressed text for a given revision"""
994 """return the length of the uncompressed text for a given revision"""
995 l = self.index[rev][2]
995 l = self.index[rev][2]
996 if l >= 0:
996 if l >= 0:
997 return l
997 return l
998
998
999 t = self.rawdata(rev)
999 t = self.rawdata(rev)
1000 return len(t)
1000 return len(t)
1001
1001
1002 def size(self, rev):
1002 def size(self, rev):
1003 """length of non-raw text (processed by a "read" flag processor)"""
1003 """length of non-raw text (processed by a "read" flag processor)"""
1004 # fast path: if no "read" flag processor could change the content,
1004 # fast path: if no "read" flag processor could change the content,
1005 # size is rawsize. note: ELLIPSIS is known to not change the content.
1005 # size is rawsize. note: ELLIPSIS is known to not change the content.
1006 flags = self.flags(rev)
1006 flags = self.flags(rev)
1007 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1007 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1008 return self.rawsize(rev)
1008 return self.rawsize(rev)
1009
1009
1010 return len(self.revision(rev))
1010 return len(self.revision(rev))
1011
1011
1012 def fast_rank(self, rev):
1012 def fast_rank(self, rev):
1013 """Return the rank of a revision if already known, or None otherwise.
1013 """Return the rank of a revision if already known, or None otherwise.
1014
1014
1015 The rank of a revision is the size of the sub-graph it defines as a
1015 The rank of a revision is the size of the sub-graph it defines as a
1016 head. Equivalently, the rank of a revision `r` is the size of the set
1016 head. Equivalently, the rank of a revision `r` is the size of the set
1017 `ancestors(r)`, `r` included.
1017 `ancestors(r)`, `r` included.
1018
1018
1019 This method returns the rank retrieved from the revlog in constant
1019 This method returns the rank retrieved from the revlog in constant
1020 time. It makes no attempt at computing unknown values for versions of
1020 time. It makes no attempt at computing unknown values for versions of
1021 the revlog which do not persist the rank.
1021 the revlog which do not persist the rank.
1022 """
1022 """
1023 rank = self.index[rev][ENTRY_RANK]
1023 rank = self.index[rev][ENTRY_RANK]
1024 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1024 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1025 return None
1025 return None
1026 if rev == nullrev:
1026 if rev == nullrev:
1027 return 0 # convention
1027 return 0 # convention
1028 return rank
1028 return rank
1029
1029
1030 def chainbase(self, rev):
1030 def chainbase(self, rev):
1031 base = self._chainbasecache.get(rev)
1031 base = self._chainbasecache.get(rev)
1032 if base is not None:
1032 if base is not None:
1033 return base
1033 return base
1034
1034
1035 index = self.index
1035 index = self.index
1036 iterrev = rev
1036 iterrev = rev
1037 base = index[iterrev][3]
1037 base = index[iterrev][3]
1038 while base != iterrev:
1038 while base != iterrev:
1039 iterrev = base
1039 iterrev = base
1040 base = index[iterrev][3]
1040 base = index[iterrev][3]
1041
1041
1042 self._chainbasecache[rev] = base
1042 self._chainbasecache[rev] = base
1043 return base
1043 return base
1044
1044
1045 def linkrev(self, rev):
1045 def linkrev(self, rev):
1046 return self.index[rev][4]
1046 return self.index[rev][4]
1047
1047
1048 def parentrevs(self, rev):
1048 def parentrevs(self, rev):
1049 try:
1049 try:
1050 entry = self.index[rev]
1050 entry = self.index[rev]
1051 except IndexError:
1051 except IndexError:
1052 if rev == wdirrev:
1052 if rev == wdirrev:
1053 raise error.WdirUnsupported
1053 raise error.WdirUnsupported
1054 raise
1054 raise
1055
1055
1056 if self.canonical_parent_order and entry[5] == nullrev:
1056 if self.canonical_parent_order and entry[5] == nullrev:
1057 return entry[6], entry[5]
1057 return entry[6], entry[5]
1058 else:
1058 else:
1059 return entry[5], entry[6]
1059 return entry[5], entry[6]
1060
1060
1061 # fast parentrevs(rev) where rev isn't filtered
1061 # fast parentrevs(rev) where rev isn't filtered
1062 _uncheckedparentrevs = parentrevs
1062 _uncheckedparentrevs = parentrevs
1063
1063
1064 def node(self, rev):
1064 def node(self, rev):
1065 try:
1065 try:
1066 return self.index[rev][7]
1066 return self.index[rev][7]
1067 except IndexError:
1067 except IndexError:
1068 if rev == wdirrev:
1068 if rev == wdirrev:
1069 raise error.WdirUnsupported
1069 raise error.WdirUnsupported
1070 raise
1070 raise
1071
1071
1072 # Derived from index values.
1072 # Derived from index values.
1073
1073
1074 def end(self, rev):
1074 def end(self, rev):
1075 return self.start(rev) + self.length(rev)
1075 return self.start(rev) + self.length(rev)
1076
1076
1077 def parents(self, node):
1077 def parents(self, node):
1078 i = self.index
1078 i = self.index
1079 d = i[self.rev(node)]
1079 d = i[self.rev(node)]
1080 # inline node() to avoid function call overhead
1080 # inline node() to avoid function call overhead
1081 if self.canonical_parent_order and d[5] == self.nullid:
1081 if self.canonical_parent_order and d[5] == self.nullid:
1082 return i[d[6]][7], i[d[5]][7]
1082 return i[d[6]][7], i[d[5]][7]
1083 else:
1083 else:
1084 return i[d[5]][7], i[d[6]][7]
1084 return i[d[5]][7], i[d[6]][7]
1085
1085
1086 def chainlen(self, rev):
1086 def chainlen(self, rev):
1087 return self._chaininfo(rev)[0]
1087 return self._chaininfo(rev)[0]
1088
1088
1089 def _chaininfo(self, rev):
1089 def _chaininfo(self, rev):
1090 chaininfocache = self._chaininfocache
1090 chaininfocache = self._chaininfocache
1091 if rev in chaininfocache:
1091 if rev in chaininfocache:
1092 return chaininfocache[rev]
1092 return chaininfocache[rev]
1093 index = self.index
1093 index = self.index
1094 generaldelta = self._generaldelta
1094 generaldelta = self._generaldelta
1095 iterrev = rev
1095 iterrev = rev
1096 e = index[iterrev]
1096 e = index[iterrev]
1097 clen = 0
1097 clen = 0
1098 compresseddeltalen = 0
1098 compresseddeltalen = 0
1099 while iterrev != e[3]:
1099 while iterrev != e[3]:
1100 clen += 1
1100 clen += 1
1101 compresseddeltalen += e[1]
1101 compresseddeltalen += e[1]
1102 if generaldelta:
1102 if generaldelta:
1103 iterrev = e[3]
1103 iterrev = e[3]
1104 else:
1104 else:
1105 iterrev -= 1
1105 iterrev -= 1
1106 if iterrev in chaininfocache:
1106 if iterrev in chaininfocache:
1107 t = chaininfocache[iterrev]
1107 t = chaininfocache[iterrev]
1108 clen += t[0]
1108 clen += t[0]
1109 compresseddeltalen += t[1]
1109 compresseddeltalen += t[1]
1110 break
1110 break
1111 e = index[iterrev]
1111 e = index[iterrev]
1112 else:
1112 else:
1113 # Add text length of base since decompressing that also takes
1113 # Add text length of base since decompressing that also takes
1114 # work. For cache hits the length is already included.
1114 # work. For cache hits the length is already included.
1115 compresseddeltalen += e[1]
1115 compresseddeltalen += e[1]
1116 r = (clen, compresseddeltalen)
1116 r = (clen, compresseddeltalen)
1117 chaininfocache[rev] = r
1117 chaininfocache[rev] = r
1118 return r
1118 return r
1119
1119
1120 def _deltachain(self, rev, stoprev=None):
1120 def _deltachain(self, rev, stoprev=None):
1121 """Obtain the delta chain for a revision.
1121 """Obtain the delta chain for a revision.
1122
1122
1123 ``stoprev`` specifies a revision to stop at. If not specified, we
1123 ``stoprev`` specifies a revision to stop at. If not specified, we
1124 stop at the base of the chain.
1124 stop at the base of the chain.
1125
1125
1126 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1126 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1127 revs in ascending order and ``stopped`` is a bool indicating whether
1127 revs in ascending order and ``stopped`` is a bool indicating whether
1128 ``stoprev`` was hit.
1128 ``stoprev`` was hit.
1129 """
1129 """
1130 # Try C implementation.
1130 # Try C implementation.
1131 try:
1131 try:
1132 return self.index.deltachain(rev, stoprev, self._generaldelta)
1132 return self.index.deltachain(rev, stoprev, self._generaldelta)
1133 except AttributeError:
1133 except AttributeError:
1134 pass
1134 pass
1135
1135
1136 chain = []
1136 chain = []
1137
1137
1138 # Alias to prevent attribute lookup in tight loop.
1138 # Alias to prevent attribute lookup in tight loop.
1139 index = self.index
1139 index = self.index
1140 generaldelta = self._generaldelta
1140 generaldelta = self._generaldelta
1141
1141
1142 iterrev = rev
1142 iterrev = rev
1143 e = index[iterrev]
1143 e = index[iterrev]
1144 while iterrev != e[3] and iterrev != stoprev:
1144 while iterrev != e[3] and iterrev != stoprev:
1145 chain.append(iterrev)
1145 chain.append(iterrev)
1146 if generaldelta:
1146 if generaldelta:
1147 iterrev = e[3]
1147 iterrev = e[3]
1148 else:
1148 else:
1149 iterrev -= 1
1149 iterrev -= 1
1150 e = index[iterrev]
1150 e = index[iterrev]
1151
1151
1152 if iterrev == stoprev:
1152 if iterrev == stoprev:
1153 stopped = True
1153 stopped = True
1154 else:
1154 else:
1155 chain.append(iterrev)
1155 chain.append(iterrev)
1156 stopped = False
1156 stopped = False
1157
1157
1158 chain.reverse()
1158 chain.reverse()
1159 return chain, stopped
1159 return chain, stopped
1160
1160
1161 def ancestors(self, revs, stoprev=0, inclusive=False):
1161 def ancestors(self, revs, stoprev=0, inclusive=False):
1162 """Generate the ancestors of 'revs' in reverse revision order.
1162 """Generate the ancestors of 'revs' in reverse revision order.
1163 Does not generate revs lower than stoprev.
1163 Does not generate revs lower than stoprev.
1164
1164
1165 See the documentation for ancestor.lazyancestors for more details."""
1165 See the documentation for ancestor.lazyancestors for more details."""
1166
1166
1167 # first, make sure start revisions aren't filtered
1167 # first, make sure start revisions aren't filtered
1168 revs = list(revs)
1168 revs = list(revs)
1169 checkrev = self.node
1169 checkrev = self.node
1170 for r in revs:
1170 for r in revs:
1171 checkrev(r)
1171 checkrev(r)
1172 # and we're sure ancestors aren't filtered as well
1172 # and we're sure ancestors aren't filtered as well
1173
1173
1174 if rustancestor is not None and self.index.rust_ext_compat:
1174 if rustancestor is not None and self.index.rust_ext_compat:
1175 lazyancestors = rustancestor.LazyAncestors
1175 lazyancestors = rustancestor.LazyAncestors
1176 arg = self.index
1176 arg = self.index
1177 else:
1177 else:
1178 lazyancestors = ancestor.lazyancestors
1178 lazyancestors = ancestor.lazyancestors
1179 arg = self._uncheckedparentrevs
1179 arg = self._uncheckedparentrevs
1180 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1180 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1181
1181
1182 def descendants(self, revs):
1182 def descendants(self, revs):
1183 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1183 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1184
1184
1185 def findcommonmissing(self, common=None, heads=None):
1185 def findcommonmissing(self, common=None, heads=None):
1186 """Return a tuple of the ancestors of common and the ancestors of heads
1186 """Return a tuple of the ancestors of common and the ancestors of heads
1187 that are not ancestors of common. In revset terminology, we return the
1187 that are not ancestors of common. In revset terminology, we return the
1188 tuple:
1188 tuple:
1189
1189
1190 ::common, (::heads) - (::common)
1190 ::common, (::heads) - (::common)
1191
1191
1192 The list is sorted by revision number, meaning it is
1192 The list is sorted by revision number, meaning it is
1193 topologically sorted.
1193 topologically sorted.
1194
1194
1195 'heads' and 'common' are both lists of node IDs. If heads is
1195 'heads' and 'common' are both lists of node IDs. If heads is
1196 not supplied, uses all of the revlog's heads. If common is not
1196 not supplied, uses all of the revlog's heads. If common is not
1197 supplied, uses nullid."""
1197 supplied, uses nullid."""
1198 if common is None:
1198 if common is None:
1199 common = [self.nullid]
1199 common = [self.nullid]
1200 if heads is None:
1200 if heads is None:
1201 heads = self.heads()
1201 heads = self.heads()
1202
1202
1203 common = [self.rev(n) for n in common]
1203 common = [self.rev(n) for n in common]
1204 heads = [self.rev(n) for n in heads]
1204 heads = [self.rev(n) for n in heads]
1205
1205
1206 # we want the ancestors, but inclusive
1206 # we want the ancestors, but inclusive
1207 class lazyset:
1207 class lazyset:
1208 def __init__(self, lazyvalues):
1208 def __init__(self, lazyvalues):
1209 self.addedvalues = set()
1209 self.addedvalues = set()
1210 self.lazyvalues = lazyvalues
1210 self.lazyvalues = lazyvalues
1211
1211
1212 def __contains__(self, value):
1212 def __contains__(self, value):
1213 return value in self.addedvalues or value in self.lazyvalues
1213 return value in self.addedvalues or value in self.lazyvalues
1214
1214
1215 def __iter__(self):
1215 def __iter__(self):
1216 added = self.addedvalues
1216 added = self.addedvalues
1217 for r in added:
1217 for r in added:
1218 yield r
1218 yield r
1219 for r in self.lazyvalues:
1219 for r in self.lazyvalues:
1220 if not r in added:
1220 if not r in added:
1221 yield r
1221 yield r
1222
1222
1223 def add(self, value):
1223 def add(self, value):
1224 self.addedvalues.add(value)
1224 self.addedvalues.add(value)
1225
1225
1226 def update(self, values):
1226 def update(self, values):
1227 self.addedvalues.update(values)
1227 self.addedvalues.update(values)
1228
1228
1229 has = lazyset(self.ancestors(common))
1229 has = lazyset(self.ancestors(common))
1230 has.add(nullrev)
1230 has.add(nullrev)
1231 has.update(common)
1231 has.update(common)
1232
1232
1233 # take all ancestors from heads that aren't in has
1233 # take all ancestors from heads that aren't in has
1234 missing = set()
1234 missing = set()
1235 visit = collections.deque(r for r in heads if r not in has)
1235 visit = collections.deque(r for r in heads if r not in has)
1236 while visit:
1236 while visit:
1237 r = visit.popleft()
1237 r = visit.popleft()
1238 if r in missing:
1238 if r in missing:
1239 continue
1239 continue
1240 else:
1240 else:
1241 missing.add(r)
1241 missing.add(r)
1242 for p in self.parentrevs(r):
1242 for p in self.parentrevs(r):
1243 if p not in has:
1243 if p not in has:
1244 visit.append(p)
1244 visit.append(p)
1245 missing = list(missing)
1245 missing = list(missing)
1246 missing.sort()
1246 missing.sort()
1247 return has, [self.node(miss) for miss in missing]
1247 return has, [self.node(miss) for miss in missing]
1248
1248
1249 def incrementalmissingrevs(self, common=None):
1249 def incrementalmissingrevs(self, common=None):
1250 """Return an object that can be used to incrementally compute the
1250 """Return an object that can be used to incrementally compute the
1251 revision numbers of the ancestors of arbitrary sets that are not
1251 revision numbers of the ancestors of arbitrary sets that are not
1252 ancestors of common. This is an ancestor.incrementalmissingancestors
1252 ancestors of common. This is an ancestor.incrementalmissingancestors
1253 object.
1253 object.
1254
1254
1255 'common' is a list of revision numbers. If common is not supplied, uses
1255 'common' is a list of revision numbers. If common is not supplied, uses
1256 nullrev.
1256 nullrev.
1257 """
1257 """
1258 if common is None:
1258 if common is None:
1259 common = [nullrev]
1259 common = [nullrev]
1260
1260
1261 if rustancestor is not None and self.index.rust_ext_compat:
1261 if rustancestor is not None and self.index.rust_ext_compat:
1262 return rustancestor.MissingAncestors(self.index, common)
1262 return rustancestor.MissingAncestors(self.index, common)
1263 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1263 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1264
1264
1265 def findmissingrevs(self, common=None, heads=None):
1265 def findmissingrevs(self, common=None, heads=None):
1266 """Return the revision numbers of the ancestors of heads that
1266 """Return the revision numbers of the ancestors of heads that
1267 are not ancestors of common.
1267 are not ancestors of common.
1268
1268
1269 More specifically, return a list of revision numbers corresponding to
1269 More specifically, return a list of revision numbers corresponding to
1270 nodes N such that every N satisfies the following constraints:
1270 nodes N such that every N satisfies the following constraints:
1271
1271
1272 1. N is an ancestor of some node in 'heads'
1272 1. N is an ancestor of some node in 'heads'
1273 2. N is not an ancestor of any node in 'common'
1273 2. N is not an ancestor of any node in 'common'
1274
1274
1275 The list is sorted by revision number, meaning it is
1275 The list is sorted by revision number, meaning it is
1276 topologically sorted.
1276 topologically sorted.
1277
1277
1278 'heads' and 'common' are both lists of revision numbers. If heads is
1278 'heads' and 'common' are both lists of revision numbers. If heads is
1279 not supplied, uses all of the revlog's heads. If common is not
1279 not supplied, uses all of the revlog's heads. If common is not
1280 supplied, uses nullid."""
1280 supplied, uses nullid."""
1281 if common is None:
1281 if common is None:
1282 common = [nullrev]
1282 common = [nullrev]
1283 if heads is None:
1283 if heads is None:
1284 heads = self.headrevs()
1284 heads = self.headrevs()
1285
1285
1286 inc = self.incrementalmissingrevs(common=common)
1286 inc = self.incrementalmissingrevs(common=common)
1287 return inc.missingancestors(heads)
1287 return inc.missingancestors(heads)
1288
1288
1289 def findmissing(self, common=None, heads=None):
1289 def findmissing(self, common=None, heads=None):
1290 """Return the ancestors of heads that are not ancestors of common.
1290 """Return the ancestors of heads that are not ancestors of common.
1291
1291
1292 More specifically, return a list of nodes N such that every N
1292 More specifically, return a list of nodes N such that every N
1293 satisfies the following constraints:
1293 satisfies the following constraints:
1294
1294
1295 1. N is an ancestor of some node in 'heads'
1295 1. N is an ancestor of some node in 'heads'
1296 2. N is not an ancestor of any node in 'common'
1296 2. N is not an ancestor of any node in 'common'
1297
1297
1298 The list is sorted by revision number, meaning it is
1298 The list is sorted by revision number, meaning it is
1299 topologically sorted.
1299 topologically sorted.
1300
1300
1301 'heads' and 'common' are both lists of node IDs. If heads is
1301 'heads' and 'common' are both lists of node IDs. If heads is
1302 not supplied, uses all of the revlog's heads. If common is not
1302 not supplied, uses all of the revlog's heads. If common is not
1303 supplied, uses nullid."""
1303 supplied, uses nullid."""
1304 if common is None:
1304 if common is None:
1305 common = [self.nullid]
1305 common = [self.nullid]
1306 if heads is None:
1306 if heads is None:
1307 heads = self.heads()
1307 heads = self.heads()
1308
1308
1309 common = [self.rev(n) for n in common]
1309 common = [self.rev(n) for n in common]
1310 heads = [self.rev(n) for n in heads]
1310 heads = [self.rev(n) for n in heads]
1311
1311
1312 inc = self.incrementalmissingrevs(common=common)
1312 inc = self.incrementalmissingrevs(common=common)
1313 return [self.node(r) for r in inc.missingancestors(heads)]
1313 return [self.node(r) for r in inc.missingancestors(heads)]
1314
1314
1315 def nodesbetween(self, roots=None, heads=None):
1315 def nodesbetween(self, roots=None, heads=None):
1316 """Return a topological path from 'roots' to 'heads'.
1316 """Return a topological path from 'roots' to 'heads'.
1317
1317
1318 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1318 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1319 topologically sorted list of all nodes N that satisfy both of
1319 topologically sorted list of all nodes N that satisfy both of
1320 these constraints:
1320 these constraints:
1321
1321
1322 1. N is a descendant of some node in 'roots'
1322 1. N is a descendant of some node in 'roots'
1323 2. N is an ancestor of some node in 'heads'
1323 2. N is an ancestor of some node in 'heads'
1324
1324
1325 Every node is considered to be both a descendant and an ancestor
1325 Every node is considered to be both a descendant and an ancestor
1326 of itself, so every reachable node in 'roots' and 'heads' will be
1326 of itself, so every reachable node in 'roots' and 'heads' will be
1327 included in 'nodes'.
1327 included in 'nodes'.
1328
1328
1329 'outroots' is the list of reachable nodes in 'roots', i.e., the
1329 'outroots' is the list of reachable nodes in 'roots', i.e., the
1330 subset of 'roots' that is returned in 'nodes'. Likewise,
1330 subset of 'roots' that is returned in 'nodes'. Likewise,
1331 'outheads' is the subset of 'heads' that is also in 'nodes'.
1331 'outheads' is the subset of 'heads' that is also in 'nodes'.
1332
1332
1333 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1333 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1334 unspecified, uses nullid as the only root. If 'heads' is
1334 unspecified, uses nullid as the only root. If 'heads' is
1335 unspecified, uses list of all of the revlog's heads."""
1335 unspecified, uses list of all of the revlog's heads."""
1336 nonodes = ([], [], [])
1336 nonodes = ([], [], [])
1337 if roots is not None:
1337 if roots is not None:
1338 roots = list(roots)
1338 roots = list(roots)
1339 if not roots:
1339 if not roots:
1340 return nonodes
1340 return nonodes
1341 lowestrev = min([self.rev(n) for n in roots])
1341 lowestrev = min([self.rev(n) for n in roots])
1342 else:
1342 else:
1343 roots = [self.nullid] # Everybody's a descendant of nullid
1343 roots = [self.nullid] # Everybody's a descendant of nullid
1344 lowestrev = nullrev
1344 lowestrev = nullrev
1345 if (lowestrev == nullrev) and (heads is None):
1345 if (lowestrev == nullrev) and (heads is None):
1346 # We want _all_ the nodes!
1346 # We want _all_ the nodes!
1347 return (
1347 return (
1348 [self.node(r) for r in self],
1348 [self.node(r) for r in self],
1349 [self.nullid],
1349 [self.nullid],
1350 list(self.heads()),
1350 list(self.heads()),
1351 )
1351 )
1352 if heads is None:
1352 if heads is None:
1353 # All nodes are ancestors, so the latest ancestor is the last
1353 # All nodes are ancestors, so the latest ancestor is the last
1354 # node.
1354 # node.
1355 highestrev = len(self) - 1
1355 highestrev = len(self) - 1
1356 # Set ancestors to None to signal that every node is an ancestor.
1356 # Set ancestors to None to signal that every node is an ancestor.
1357 ancestors = None
1357 ancestors = None
1358 # Set heads to an empty dictionary for later discovery of heads
1358 # Set heads to an empty dictionary for later discovery of heads
1359 heads = {}
1359 heads = {}
1360 else:
1360 else:
1361 heads = list(heads)
1361 heads = list(heads)
1362 if not heads:
1362 if not heads:
1363 return nonodes
1363 return nonodes
1364 ancestors = set()
1364 ancestors = set()
1365 # Turn heads into a dictionary so we can remove 'fake' heads.
1365 # Turn heads into a dictionary so we can remove 'fake' heads.
1366 # Also, later we will be using it to filter out the heads we can't
1366 # Also, later we will be using it to filter out the heads we can't
1367 # find from roots.
1367 # find from roots.
1368 heads = dict.fromkeys(heads, False)
1368 heads = dict.fromkeys(heads, False)
1369 # Start at the top and keep marking parents until we're done.
1369 # Start at the top and keep marking parents until we're done.
1370 nodestotag = set(heads)
1370 nodestotag = set(heads)
1371 # Remember where the top was so we can use it as a limit later.
1371 # Remember where the top was so we can use it as a limit later.
1372 highestrev = max([self.rev(n) for n in nodestotag])
1372 highestrev = max([self.rev(n) for n in nodestotag])
1373 while nodestotag:
1373 while nodestotag:
1374 # grab a node to tag
1374 # grab a node to tag
1375 n = nodestotag.pop()
1375 n = nodestotag.pop()
1376 # Never tag nullid
1376 # Never tag nullid
1377 if n == self.nullid:
1377 if n == self.nullid:
1378 continue
1378 continue
1379 # A node's revision number represents its place in a
1379 # A node's revision number represents its place in a
1380 # topologically sorted list of nodes.
1380 # topologically sorted list of nodes.
1381 r = self.rev(n)
1381 r = self.rev(n)
1382 if r >= lowestrev:
1382 if r >= lowestrev:
1383 if n not in ancestors:
1383 if n not in ancestors:
1384 # If we are possibly a descendant of one of the roots
1384 # If we are possibly a descendant of one of the roots
1385 # and we haven't already been marked as an ancestor
1385 # and we haven't already been marked as an ancestor
1386 ancestors.add(n) # Mark as ancestor
1386 ancestors.add(n) # Mark as ancestor
1387 # Add non-nullid parents to list of nodes to tag.
1387 # Add non-nullid parents to list of nodes to tag.
1388 nodestotag.update(
1388 nodestotag.update(
1389 [p for p in self.parents(n) if p != self.nullid]
1389 [p for p in self.parents(n) if p != self.nullid]
1390 )
1390 )
1391 elif n in heads: # We've seen it before, is it a fake head?
1391 elif n in heads: # We've seen it before, is it a fake head?
1392 # So it is, real heads should not be the ancestors of
1392 # So it is, real heads should not be the ancestors of
1393 # any other heads.
1393 # any other heads.
1394 heads.pop(n)
1394 heads.pop(n)
1395 if not ancestors:
1395 if not ancestors:
1396 return nonodes
1396 return nonodes
1397 # Now that we have our set of ancestors, we want to remove any
1397 # Now that we have our set of ancestors, we want to remove any
1398 # roots that are not ancestors.
1398 # roots that are not ancestors.
1399
1399
1400 # If one of the roots was nullid, everything is included anyway.
1400 # If one of the roots was nullid, everything is included anyway.
1401 if lowestrev > nullrev:
1401 if lowestrev > nullrev:
1402 # But, since we weren't, let's recompute the lowest rev to not
1402 # But, since we weren't, let's recompute the lowest rev to not
1403 # include roots that aren't ancestors.
1403 # include roots that aren't ancestors.
1404
1404
1405 # Filter out roots that aren't ancestors of heads
1405 # Filter out roots that aren't ancestors of heads
1406 roots = [root for root in roots if root in ancestors]
1406 roots = [root for root in roots if root in ancestors]
1407 # Recompute the lowest revision
1407 # Recompute the lowest revision
1408 if roots:
1408 if roots:
1409 lowestrev = min([self.rev(root) for root in roots])
1409 lowestrev = min([self.rev(root) for root in roots])
1410 else:
1410 else:
1411 # No more roots? Return empty list
1411 # No more roots? Return empty list
1412 return nonodes
1412 return nonodes
1413 else:
1413 else:
1414 # We are descending from nullid, and don't need to care about
1414 # We are descending from nullid, and don't need to care about
1415 # any other roots.
1415 # any other roots.
1416 lowestrev = nullrev
1416 lowestrev = nullrev
1417 roots = [self.nullid]
1417 roots = [self.nullid]
1418 # Transform our roots list into a set.
1418 # Transform our roots list into a set.
1419 descendants = set(roots)
1419 descendants = set(roots)
1420 # Also, keep the original roots so we can filter out roots that aren't
1420 # Also, keep the original roots so we can filter out roots that aren't
1421 # 'real' roots (i.e. are descended from other roots).
1421 # 'real' roots (i.e. are descended from other roots).
1422 roots = descendants.copy()
1422 roots = descendants.copy()
1423 # Our topologically sorted list of output nodes.
1423 # Our topologically sorted list of output nodes.
1424 orderedout = []
1424 orderedout = []
1425 # Don't start at nullid since we don't want nullid in our output list,
1425 # Don't start at nullid since we don't want nullid in our output list,
1426 # and if nullid shows up in descendants, empty parents will look like
1426 # and if nullid shows up in descendants, empty parents will look like
1427 # they're descendants.
1427 # they're descendants.
1428 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1428 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1429 n = self.node(r)
1429 n = self.node(r)
1430 isdescendant = False
1430 isdescendant = False
1431 if lowestrev == nullrev: # Everybody is a descendant of nullid
1431 if lowestrev == nullrev: # Everybody is a descendant of nullid
1432 isdescendant = True
1432 isdescendant = True
1433 elif n in descendants:
1433 elif n in descendants:
1434 # n is already a descendant
1434 # n is already a descendant
1435 isdescendant = True
1435 isdescendant = True
1436 # This check only needs to be done here because all the roots
1436 # This check only needs to be done here because all the roots
1437 # will start being marked is descendants before the loop.
1437 # will start being marked is descendants before the loop.
1438 if n in roots:
1438 if n in roots:
1439 # If n was a root, check if it's a 'real' root.
1439 # If n was a root, check if it's a 'real' root.
1440 p = tuple(self.parents(n))
1440 p = tuple(self.parents(n))
1441 # If any of its parents are descendants, it's not a root.
1441 # If any of its parents are descendants, it's not a root.
1442 if (p[0] in descendants) or (p[1] in descendants):
1442 if (p[0] in descendants) or (p[1] in descendants):
1443 roots.remove(n)
1443 roots.remove(n)
1444 else:
1444 else:
1445 p = tuple(self.parents(n))
1445 p = tuple(self.parents(n))
1446 # A node is a descendant if either of its parents are
1446 # A node is a descendant if either of its parents are
1447 # descendants. (We seeded the dependents list with the roots
1447 # descendants. (We seeded the dependents list with the roots
1448 # up there, remember?)
1448 # up there, remember?)
1449 if (p[0] in descendants) or (p[1] in descendants):
1449 if (p[0] in descendants) or (p[1] in descendants):
1450 descendants.add(n)
1450 descendants.add(n)
1451 isdescendant = True
1451 isdescendant = True
1452 if isdescendant and ((ancestors is None) or (n in ancestors)):
1452 if isdescendant and ((ancestors is None) or (n in ancestors)):
1453 # Only include nodes that are both descendants and ancestors.
1453 # Only include nodes that are both descendants and ancestors.
1454 orderedout.append(n)
1454 orderedout.append(n)
1455 if (ancestors is not None) and (n in heads):
1455 if (ancestors is not None) and (n in heads):
1456 # We're trying to figure out which heads are reachable
1456 # We're trying to figure out which heads are reachable
1457 # from roots.
1457 # from roots.
1458 # Mark this head as having been reached
1458 # Mark this head as having been reached
1459 heads[n] = True
1459 heads[n] = True
1460 elif ancestors is None:
1460 elif ancestors is None:
1461 # Otherwise, we're trying to discover the heads.
1461 # Otherwise, we're trying to discover the heads.
1462 # Assume this is a head because if it isn't, the next step
1462 # Assume this is a head because if it isn't, the next step
1463 # will eventually remove it.
1463 # will eventually remove it.
1464 heads[n] = True
1464 heads[n] = True
1465 # But, obviously its parents aren't.
1465 # But, obviously its parents aren't.
1466 for p in self.parents(n):
1466 for p in self.parents(n):
1467 heads.pop(p, None)
1467 heads.pop(p, None)
1468 heads = [head for head, flag in heads.items() if flag]
1468 heads = [head for head, flag in heads.items() if flag]
1469 roots = list(roots)
1469 roots = list(roots)
1470 assert orderedout
1470 assert orderedout
1471 assert roots
1471 assert roots
1472 assert heads
1472 assert heads
1473 return (orderedout, roots, heads)
1473 return (orderedout, roots, heads)
1474
1474
1475 def headrevs(self, revs=None):
1475 def headrevs(self, revs=None):
1476 if revs is None:
1476 if revs is None:
1477 try:
1477 try:
1478 return self.index.headrevs()
1478 return self.index.headrevs()
1479 except AttributeError:
1479 except AttributeError:
1480 return self._headrevs()
1480 return self._headrevs()
1481 if rustdagop is not None and self.index.rust_ext_compat:
1481 if rustdagop is not None and self.index.rust_ext_compat:
1482 return rustdagop.headrevs(self.index, revs)
1482 return rustdagop.headrevs(self.index, revs)
1483 return dagop.headrevs(revs, self._uncheckedparentrevs)
1483 return dagop.headrevs(revs, self._uncheckedparentrevs)
1484
1484
1485 def computephases(self, roots):
1485 def computephases(self, roots):
1486 return self.index.computephasesmapsets(roots)
1486 return self.index.computephasesmapsets(roots)
1487
1487
1488 def _headrevs(self):
1488 def _headrevs(self):
1489 count = len(self)
1489 count = len(self)
1490 if not count:
1490 if not count:
1491 return [nullrev]
1491 return [nullrev]
1492 # we won't iter over filtered rev so nobody is a head at start
1492 # we won't iter over filtered rev so nobody is a head at start
1493 ishead = [0] * (count + 1)
1493 ishead = [0] * (count + 1)
1494 index = self.index
1494 index = self.index
1495 for r in self:
1495 for r in self:
1496 ishead[r] = 1 # I may be an head
1496 ishead[r] = 1 # I may be an head
1497 e = index[r]
1497 e = index[r]
1498 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1498 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1499 return [r for r, val in enumerate(ishead) if val]
1499 return [r for r, val in enumerate(ishead) if val]
1500
1500
1501 def heads(self, start=None, stop=None):
1501 def heads(self, start=None, stop=None):
1502 """return the list of all nodes that have no children
1502 """return the list of all nodes that have no children
1503
1503
1504 if start is specified, only heads that are descendants of
1504 if start is specified, only heads that are descendants of
1505 start will be returned
1505 start will be returned
1506 if stop is specified, it will consider all the revs from stop
1506 if stop is specified, it will consider all the revs from stop
1507 as if they had no children
1507 as if they had no children
1508 """
1508 """
1509 if start is None and stop is None:
1509 if start is None and stop is None:
1510 if not len(self):
1510 if not len(self):
1511 return [self.nullid]
1511 return [self.nullid]
1512 return [self.node(r) for r in self.headrevs()]
1512 return [self.node(r) for r in self.headrevs()]
1513
1513
1514 if start is None:
1514 if start is None:
1515 start = nullrev
1515 start = nullrev
1516 else:
1516 else:
1517 start = self.rev(start)
1517 start = self.rev(start)
1518
1518
1519 stoprevs = {self.rev(n) for n in stop or []}
1519 stoprevs = {self.rev(n) for n in stop or []}
1520
1520
1521 revs = dagop.headrevssubset(
1521 revs = dagop.headrevssubset(
1522 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1522 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1523 )
1523 )
1524
1524
1525 return [self.node(rev) for rev in revs]
1525 return [self.node(rev) for rev in revs]
1526
1526
1527 def children(self, node):
1527 def children(self, node):
1528 """find the children of a given node"""
1528 """find the children of a given node"""
1529 c = []
1529 c = []
1530 p = self.rev(node)
1530 p = self.rev(node)
1531 for r in self.revs(start=p + 1):
1531 for r in self.revs(start=p + 1):
1532 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1532 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1533 if prevs:
1533 if prevs:
1534 for pr in prevs:
1534 for pr in prevs:
1535 if pr == p:
1535 if pr == p:
1536 c.append(self.node(r))
1536 c.append(self.node(r))
1537 elif p == nullrev:
1537 elif p == nullrev:
1538 c.append(self.node(r))
1538 c.append(self.node(r))
1539 return c
1539 return c
1540
1540
1541 def commonancestorsheads(self, a, b):
1541 def commonancestorsheads(self, a, b):
1542 """calculate all the heads of the common ancestors of nodes a and b"""
1542 """calculate all the heads of the common ancestors of nodes a and b"""
1543 a, b = self.rev(a), self.rev(b)
1543 a, b = self.rev(a), self.rev(b)
1544 ancs = self._commonancestorsheads(a, b)
1544 ancs = self._commonancestorsheads(a, b)
1545 return pycompat.maplist(self.node, ancs)
1545 return pycompat.maplist(self.node, ancs)
1546
1546
1547 def _commonancestorsheads(self, *revs):
1547 def _commonancestorsheads(self, *revs):
1548 """calculate all the heads of the common ancestors of revs"""
1548 """calculate all the heads of the common ancestors of revs"""
1549 try:
1549 try:
1550 ancs = self.index.commonancestorsheads(*revs)
1550 ancs = self.index.commonancestorsheads(*revs)
1551 except (AttributeError, OverflowError): # C implementation failed
1551 except (AttributeError, OverflowError): # C implementation failed
1552 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1552 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1553 return ancs
1553 return ancs
1554
1554
1555 def isancestor(self, a, b):
1555 def isancestor(self, a, b):
1556 """return True if node a is an ancestor of node b
1556 """return True if node a is an ancestor of node b
1557
1557
1558 A revision is considered an ancestor of itself."""
1558 A revision is considered an ancestor of itself."""
1559 a, b = self.rev(a), self.rev(b)
1559 a, b = self.rev(a), self.rev(b)
1560 return self.isancestorrev(a, b)
1560 return self.isancestorrev(a, b)
1561
1561
1562 def isancestorrev(self, a, b):
1562 def isancestorrev(self, a, b):
1563 """return True if revision a is an ancestor of revision b
1563 """return True if revision a is an ancestor of revision b
1564
1564
1565 A revision is considered an ancestor of itself.
1565 A revision is considered an ancestor of itself.
1566
1566
1567 The implementation of this is trivial but the use of
1567 The implementation of this is trivial but the use of
1568 reachableroots is not."""
1568 reachableroots is not."""
1569 if a == nullrev:
1569 if a == nullrev:
1570 return True
1570 return True
1571 elif a == b:
1571 elif a == b:
1572 return True
1572 return True
1573 elif a > b:
1573 elif a > b:
1574 return False
1574 return False
1575 return bool(self.reachableroots(a, [b], [a], includepath=False))
1575 return bool(self.reachableroots(a, [b], [a], includepath=False))
1576
1576
1577 def reachableroots(self, minroot, heads, roots, includepath=False):
1577 def reachableroots(self, minroot, heads, roots, includepath=False):
1578 """return (heads(::(<roots> and <roots>::<heads>)))
1578 """return (heads(::(<roots> and <roots>::<heads>)))
1579
1579
1580 If includepath is True, return (<roots>::<heads>)."""
1580 If includepath is True, return (<roots>::<heads>)."""
1581 try:
1581 try:
1582 return self.index.reachableroots2(
1582 return self.index.reachableroots2(
1583 minroot, heads, roots, includepath
1583 minroot, heads, roots, includepath
1584 )
1584 )
1585 except AttributeError:
1585 except AttributeError:
1586 return dagop._reachablerootspure(
1586 return dagop._reachablerootspure(
1587 self.parentrevs, minroot, roots, heads, includepath
1587 self.parentrevs, minroot, roots, heads, includepath
1588 )
1588 )
1589
1589
1590 def ancestor(self, a, b):
1590 def ancestor(self, a, b):
1591 """calculate the "best" common ancestor of nodes a and b"""
1591 """calculate the "best" common ancestor of nodes a and b"""
1592
1592
1593 a, b = self.rev(a), self.rev(b)
1593 a, b = self.rev(a), self.rev(b)
1594 try:
1594 try:
1595 ancs = self.index.ancestors(a, b)
1595 ancs = self.index.ancestors(a, b)
1596 except (AttributeError, OverflowError):
1596 except (AttributeError, OverflowError):
1597 ancs = ancestor.ancestors(self.parentrevs, a, b)
1597 ancs = ancestor.ancestors(self.parentrevs, a, b)
1598 if ancs:
1598 if ancs:
1599 # choose a consistent winner when there's a tie
1599 # choose a consistent winner when there's a tie
1600 return min(map(self.node, ancs))
1600 return min(map(self.node, ancs))
1601 return self.nullid
1601 return self.nullid
1602
1602
1603 def _match(self, id):
1603 def _match(self, id):
1604 if isinstance(id, int):
1604 if isinstance(id, int):
1605 # rev
1605 # rev
1606 return self.node(id)
1606 return self.node(id)
1607 if len(id) == self.nodeconstants.nodelen:
1607 if len(id) == self.nodeconstants.nodelen:
1608 # possibly a binary node
1608 # possibly a binary node
1609 # odds of a binary node being all hex in ASCII are 1 in 10**25
1609 # odds of a binary node being all hex in ASCII are 1 in 10**25
1610 try:
1610 try:
1611 node = id
1611 node = id
1612 self.rev(node) # quick search the index
1612 self.rev(node) # quick search the index
1613 return node
1613 return node
1614 except error.LookupError:
1614 except error.LookupError:
1615 pass # may be partial hex id
1615 pass # may be partial hex id
1616 try:
1616 try:
1617 # str(rev)
1617 # str(rev)
1618 rev = int(id)
1618 rev = int(id)
1619 if b"%d" % rev != id:
1619 if b"%d" % rev != id:
1620 raise ValueError
1620 raise ValueError
1621 if rev < 0:
1621 if rev < 0:
1622 rev = len(self) + rev
1622 rev = len(self) + rev
1623 if rev < 0 or rev >= len(self):
1623 if rev < 0 or rev >= len(self):
1624 raise ValueError
1624 raise ValueError
1625 return self.node(rev)
1625 return self.node(rev)
1626 except (ValueError, OverflowError):
1626 except (ValueError, OverflowError):
1627 pass
1627 pass
1628 if len(id) == 2 * self.nodeconstants.nodelen:
1628 if len(id) == 2 * self.nodeconstants.nodelen:
1629 try:
1629 try:
1630 # a full hex nodeid?
1630 # a full hex nodeid?
1631 node = bin(id)
1631 node = bin(id)
1632 self.rev(node)
1632 self.rev(node)
1633 return node
1633 return node
1634 except (binascii.Error, error.LookupError):
1634 except (binascii.Error, error.LookupError):
1635 pass
1635 pass
1636
1636
1637 def _partialmatch(self, id):
1637 def _partialmatch(self, id):
1638 # we don't care wdirfilenodeids as they should be always full hash
1638 # we don't care wdirfilenodeids as they should be always full hash
1639 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1639 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1640 ambiguous = False
1640 ambiguous = False
1641 try:
1641 try:
1642 partial = self.index.partialmatch(id)
1642 partial = self.index.partialmatch(id)
1643 if partial and self.hasnode(partial):
1643 if partial and self.hasnode(partial):
1644 if maybewdir:
1644 if maybewdir:
1645 # single 'ff...' match in radix tree, ambiguous with wdir
1645 # single 'ff...' match in radix tree, ambiguous with wdir
1646 ambiguous = True
1646 ambiguous = True
1647 else:
1647 else:
1648 return partial
1648 return partial
1649 elif maybewdir:
1649 elif maybewdir:
1650 # no 'ff...' match in radix tree, wdir identified
1650 # no 'ff...' match in radix tree, wdir identified
1651 raise error.WdirUnsupported
1651 raise error.WdirUnsupported
1652 else:
1652 else:
1653 return None
1653 return None
1654 except error.RevlogError:
1654 except error.RevlogError:
1655 # parsers.c radix tree lookup gave multiple matches
1655 # parsers.c radix tree lookup gave multiple matches
1656 # fast path: for unfiltered changelog, radix tree is accurate
1656 # fast path: for unfiltered changelog, radix tree is accurate
1657 if not getattr(self, 'filteredrevs', None):
1657 if not getattr(self, 'filteredrevs', None):
1658 ambiguous = True
1658 ambiguous = True
1659 # fall through to slow path that filters hidden revisions
1659 # fall through to slow path that filters hidden revisions
1660 except (AttributeError, ValueError):
1660 except (AttributeError, ValueError):
1661 # we are pure python, or key is not hex
1661 # we are pure python, or key is not hex
1662 pass
1662 pass
1663 if ambiguous:
1663 if ambiguous:
1664 raise error.AmbiguousPrefixLookupError(
1664 raise error.AmbiguousPrefixLookupError(
1665 id, self.display_id, _(b'ambiguous identifier')
1665 id, self.display_id, _(b'ambiguous identifier')
1666 )
1666 )
1667
1667
1668 if id in self._pcache:
1668 if id in self._pcache:
1669 return self._pcache[id]
1669 return self._pcache[id]
1670
1670
1671 if len(id) <= 40:
1671 if len(id) <= 40:
1672 # hex(node)[:...]
1672 # hex(node)[:...]
1673 l = len(id) // 2 * 2 # grab an even number of digits
1673 l = len(id) // 2 * 2 # grab an even number of digits
1674 try:
1674 try:
1675 # we're dropping the last digit, so let's check that it's hex,
1675 # we're dropping the last digit, so let's check that it's hex,
1676 # to avoid the expensive computation below if it's not
1676 # to avoid the expensive computation below if it's not
1677 if len(id) % 2 > 0:
1677 if len(id) % 2 > 0:
1678 if not (id[-1] in hexdigits):
1678 if not (id[-1] in hexdigits):
1679 return None
1679 return None
1680 prefix = bin(id[:l])
1680 prefix = bin(id[:l])
1681 except binascii.Error:
1681 except binascii.Error:
1682 pass
1682 pass
1683 else:
1683 else:
1684 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1684 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1685 nl = [
1685 nl = [
1686 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1686 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1687 ]
1687 ]
1688 if self.nodeconstants.nullhex.startswith(id):
1688 if self.nodeconstants.nullhex.startswith(id):
1689 nl.append(self.nullid)
1689 nl.append(self.nullid)
1690 if len(nl) > 0:
1690 if len(nl) > 0:
1691 if len(nl) == 1 and not maybewdir:
1691 if len(nl) == 1 and not maybewdir:
1692 self._pcache[id] = nl[0]
1692 self._pcache[id] = nl[0]
1693 return nl[0]
1693 return nl[0]
1694 raise error.AmbiguousPrefixLookupError(
1694 raise error.AmbiguousPrefixLookupError(
1695 id, self.display_id, _(b'ambiguous identifier')
1695 id, self.display_id, _(b'ambiguous identifier')
1696 )
1696 )
1697 if maybewdir:
1697 if maybewdir:
1698 raise error.WdirUnsupported
1698 raise error.WdirUnsupported
1699 return None
1699 return None
1700
1700
1701 def lookup(self, id):
1701 def lookup(self, id):
1702 """locate a node based on:
1702 """locate a node based on:
1703 - revision number or str(revision number)
1703 - revision number or str(revision number)
1704 - nodeid or subset of hex nodeid
1704 - nodeid or subset of hex nodeid
1705 """
1705 """
1706 n = self._match(id)
1706 n = self._match(id)
1707 if n is not None:
1707 if n is not None:
1708 return n
1708 return n
1709 n = self._partialmatch(id)
1709 n = self._partialmatch(id)
1710 if n:
1710 if n:
1711 return n
1711 return n
1712
1712
1713 raise error.LookupError(id, self.display_id, _(b'no match found'))
1713 raise error.LookupError(id, self.display_id, _(b'no match found'))
1714
1714
1715 def shortest(self, node, minlength=1):
1715 def shortest(self, node, minlength=1):
1716 """Find the shortest unambiguous prefix that matches node."""
1716 """Find the shortest unambiguous prefix that matches node."""
1717
1717
1718 def isvalid(prefix):
1718 def isvalid(prefix):
1719 try:
1719 try:
1720 matchednode = self._partialmatch(prefix)
1720 matchednode = self._partialmatch(prefix)
1721 except error.AmbiguousPrefixLookupError:
1721 except error.AmbiguousPrefixLookupError:
1722 return False
1722 return False
1723 except error.WdirUnsupported:
1723 except error.WdirUnsupported:
1724 # single 'ff...' match
1724 # single 'ff...' match
1725 return True
1725 return True
1726 if matchednode is None:
1726 if matchednode is None:
1727 raise error.LookupError(node, self.display_id, _(b'no node'))
1727 raise error.LookupError(node, self.display_id, _(b'no node'))
1728 return True
1728 return True
1729
1729
1730 def maybewdir(prefix):
1730 def maybewdir(prefix):
1731 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1731 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1732
1732
1733 hexnode = hex(node)
1733 hexnode = hex(node)
1734
1734
1735 def disambiguate(hexnode, minlength):
1735 def disambiguate(hexnode, minlength):
1736 """Disambiguate against wdirid."""
1736 """Disambiguate against wdirid."""
1737 for length in range(minlength, len(hexnode) + 1):
1737 for length in range(minlength, len(hexnode) + 1):
1738 prefix = hexnode[:length]
1738 prefix = hexnode[:length]
1739 if not maybewdir(prefix):
1739 if not maybewdir(prefix):
1740 return prefix
1740 return prefix
1741
1741
1742 if not getattr(self, 'filteredrevs', None):
1742 if not getattr(self, 'filteredrevs', None):
1743 try:
1743 try:
1744 length = max(self.index.shortest(node), minlength)
1744 length = max(self.index.shortest(node), minlength)
1745 return disambiguate(hexnode, length)
1745 return disambiguate(hexnode, length)
1746 except error.RevlogError:
1746 except error.RevlogError:
1747 if node != self.nodeconstants.wdirid:
1747 if node != self.nodeconstants.wdirid:
1748 raise error.LookupError(
1748 raise error.LookupError(
1749 node, self.display_id, _(b'no node')
1749 node, self.display_id, _(b'no node')
1750 )
1750 )
1751 except AttributeError:
1751 except AttributeError:
1752 # Fall through to pure code
1752 # Fall through to pure code
1753 pass
1753 pass
1754
1754
1755 if node == self.nodeconstants.wdirid:
1755 if node == self.nodeconstants.wdirid:
1756 for length in range(minlength, len(hexnode) + 1):
1756 for length in range(minlength, len(hexnode) + 1):
1757 prefix = hexnode[:length]
1757 prefix = hexnode[:length]
1758 if isvalid(prefix):
1758 if isvalid(prefix):
1759 return prefix
1759 return prefix
1760
1760
1761 for length in range(minlength, len(hexnode) + 1):
1761 for length in range(minlength, len(hexnode) + 1):
1762 prefix = hexnode[:length]
1762 prefix = hexnode[:length]
1763 if isvalid(prefix):
1763 if isvalid(prefix):
1764 return disambiguate(hexnode, length)
1764 return disambiguate(hexnode, length)
1765
1765
1766 def cmp(self, node, text):
1766 def cmp(self, node, text):
1767 """compare text with a given file revision
1767 """compare text with a given file revision
1768
1768
1769 returns True if text is different than what is stored.
1769 returns True if text is different than what is stored.
1770 """
1770 """
1771 p1, p2 = self.parents(node)
1771 p1, p2 = self.parents(node)
1772 return storageutil.hashrevisionsha1(text, p1, p2) != node
1772 return storageutil.hashrevisionsha1(text, p1, p2) != node
1773
1773
1774 def _getsegmentforrevs(self, startrev, endrev, df=None):
1774 def _getsegmentforrevs(self, startrev, endrev, df=None):
1775 """Obtain a segment of raw data corresponding to a range of revisions.
1775 """Obtain a segment of raw data corresponding to a range of revisions.
1776
1776
1777 Accepts the start and end revisions and an optional already-open
1777 Accepts the start and end revisions and an optional already-open
1778 file handle to be used for reading. If the file handle is read, its
1778 file handle to be used for reading. If the file handle is read, its
1779 seek position will not be preserved.
1779 seek position will not be preserved.
1780
1780
1781 Requests for data may be satisfied by a cache.
1781 Requests for data may be satisfied by a cache.
1782
1782
1783 Returns a 2-tuple of (offset, data) for the requested range of
1783 Returns a 2-tuple of (offset, data) for the requested range of
1784 revisions. Offset is the integer offset from the beginning of the
1784 revisions. Offset is the integer offset from the beginning of the
1785 revlog and data is a str or buffer of the raw byte data.
1785 revlog and data is a str or buffer of the raw byte data.
1786
1786
1787 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1787 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1788 to determine where each revision's data begins and ends.
1788 to determine where each revision's data begins and ends.
1789 """
1789 """
1790 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1790 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1791 # (functions are expensive).
1791 # (functions are expensive).
1792 index = self.index
1792 index = self.index
1793 istart = index[startrev]
1793 istart = index[startrev]
1794 start = int(istart[0] >> 16)
1794 start = int(istart[0] >> 16)
1795 if startrev == endrev:
1795 if startrev == endrev:
1796 end = start + istart[1]
1796 end = start + istart[1]
1797 else:
1797 else:
1798 iend = index[endrev]
1798 iend = index[endrev]
1799 end = int(iend[0] >> 16) + iend[1]
1799 end = int(iend[0] >> 16) + iend[1]
1800
1800
1801 if self._inline:
1801 if self._inline:
1802 start += (startrev + 1) * self.index.entry_size
1802 start += (startrev + 1) * self.index.entry_size
1803 end += (endrev + 1) * self.index.entry_size
1803 end += (endrev + 1) * self.index.entry_size
1804 length = end - start
1804 length = end - start
1805
1805
1806 return start, self._segmentfile.read_chunk(start, length, df)
1806 return start, self._segmentfile.read_chunk(start, length, df)
1807
1807
1808 def _chunk(self, rev, df=None):
1808 def _chunk(self, rev, df=None):
1809 """Obtain a single decompressed chunk for a revision.
1809 """Obtain a single decompressed chunk for a revision.
1810
1810
1811 Accepts an integer revision and an optional already-open file handle
1811 Accepts an integer revision and an optional already-open file handle
1812 to be used for reading. If used, the seek position of the file will not
1812 to be used for reading. If used, the seek position of the file will not
1813 be preserved.
1813 be preserved.
1814
1814
1815 Returns a str holding uncompressed data for the requested revision.
1815 Returns a str holding uncompressed data for the requested revision.
1816 """
1816 """
1817 compression_mode = self.index[rev][10]
1817 compression_mode = self.index[rev][10]
1818 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1818 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1819 if compression_mode == COMP_MODE_PLAIN:
1819 if compression_mode == COMP_MODE_PLAIN:
1820 return data
1820 return data
1821 elif compression_mode == COMP_MODE_DEFAULT:
1821 elif compression_mode == COMP_MODE_DEFAULT:
1822 return self._decompressor(data)
1822 return self._decompressor(data)
1823 elif compression_mode == COMP_MODE_INLINE:
1823 elif compression_mode == COMP_MODE_INLINE:
1824 return self.decompress(data)
1824 return self.decompress(data)
1825 else:
1825 else:
1826 msg = b'unknown compression mode %d'
1826 msg = b'unknown compression mode %d'
1827 msg %= compression_mode
1827 msg %= compression_mode
1828 raise error.RevlogError(msg)
1828 raise error.RevlogError(msg)
1829
1829
1830 def _chunks(self, revs, df=None, targetsize=None):
1830 def _chunks(self, revs, df=None, targetsize=None):
1831 """Obtain decompressed chunks for the specified revisions.
1831 """Obtain decompressed chunks for the specified revisions.
1832
1832
1833 Accepts an iterable of numeric revisions that are assumed to be in
1833 Accepts an iterable of numeric revisions that are assumed to be in
1834 ascending order. Also accepts an optional already-open file handle
1834 ascending order. Also accepts an optional already-open file handle
1835 to be used for reading. If used, the seek position of the file will
1835 to be used for reading. If used, the seek position of the file will
1836 not be preserved.
1836 not be preserved.
1837
1837
1838 This function is similar to calling ``self._chunk()`` multiple times,
1838 This function is similar to calling ``self._chunk()`` multiple times,
1839 but is faster.
1839 but is faster.
1840
1840
1841 Returns a list with decompressed data for each requested revision.
1841 Returns a list with decompressed data for each requested revision.
1842 """
1842 """
1843 if not revs:
1843 if not revs:
1844 return []
1844 return []
1845 start = self.start
1845 start = self.start
1846 length = self.length
1846 length = self.length
1847 inline = self._inline
1847 inline = self._inline
1848 iosize = self.index.entry_size
1848 iosize = self.index.entry_size
1849 buffer = util.buffer
1849 buffer = util.buffer
1850
1850
1851 l = []
1851 l = []
1852 ladd = l.append
1852 ladd = l.append
1853
1853
1854 if not self._withsparseread:
1854 if not self._withsparseread:
1855 slicedchunks = (revs,)
1855 slicedchunks = (revs,)
1856 else:
1856 else:
1857 slicedchunks = deltautil.slicechunk(
1857 slicedchunks = deltautil.slicechunk(
1858 self, revs, targetsize=targetsize
1858 self, revs, targetsize=targetsize
1859 )
1859 )
1860
1860
1861 for revschunk in slicedchunks:
1861 for revschunk in slicedchunks:
1862 firstrev = revschunk[0]
1862 firstrev = revschunk[0]
1863 # Skip trailing revisions with empty diff
1863 # Skip trailing revisions with empty diff
1864 for lastrev in revschunk[::-1]:
1864 for lastrev in revschunk[::-1]:
1865 if length(lastrev) != 0:
1865 if length(lastrev) != 0:
1866 break
1866 break
1867
1867
1868 try:
1868 try:
1869 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1869 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1870 except OverflowError:
1870 except OverflowError:
1871 # issue4215 - we can't cache a run of chunks greater than
1871 # issue4215 - we can't cache a run of chunks greater than
1872 # 2G on Windows
1872 # 2G on Windows
1873 return [self._chunk(rev, df=df) for rev in revschunk]
1873 return [self._chunk(rev, df=df) for rev in revschunk]
1874
1874
1875 decomp = self.decompress
1875 decomp = self.decompress
1876 # self._decompressor might be None, but will not be used in that case
1876 # self._decompressor might be None, but will not be used in that case
1877 def_decomp = self._decompressor
1877 def_decomp = self._decompressor
1878 for rev in revschunk:
1878 for rev in revschunk:
1879 chunkstart = start(rev)
1879 chunkstart = start(rev)
1880 if inline:
1880 if inline:
1881 chunkstart += (rev + 1) * iosize
1881 chunkstart += (rev + 1) * iosize
1882 chunklength = length(rev)
1882 chunklength = length(rev)
1883 comp_mode = self.index[rev][10]
1883 comp_mode = self.index[rev][10]
1884 c = buffer(data, chunkstart - offset, chunklength)
1884 c = buffer(data, chunkstart - offset, chunklength)
1885 if comp_mode == COMP_MODE_PLAIN:
1885 if comp_mode == COMP_MODE_PLAIN:
1886 ladd(c)
1886 ladd(c)
1887 elif comp_mode == COMP_MODE_INLINE:
1887 elif comp_mode == COMP_MODE_INLINE:
1888 ladd(decomp(c))
1888 ladd(decomp(c))
1889 elif comp_mode == COMP_MODE_DEFAULT:
1889 elif comp_mode == COMP_MODE_DEFAULT:
1890 ladd(def_decomp(c))
1890 ladd(def_decomp(c))
1891 else:
1891 else:
1892 msg = b'unknown compression mode %d'
1892 msg = b'unknown compression mode %d'
1893 msg %= comp_mode
1893 msg %= comp_mode
1894 raise error.RevlogError(msg)
1894 raise error.RevlogError(msg)
1895
1895
1896 return l
1896 return l
1897
1897
1898 def deltaparent(self, rev):
1898 def deltaparent(self, rev):
1899 """return deltaparent of the given revision"""
1899 """return deltaparent of the given revision"""
1900 base = self.index[rev][3]
1900 base = self.index[rev][3]
1901 if base == rev:
1901 if base == rev:
1902 return nullrev
1902 return nullrev
1903 elif self._generaldelta:
1903 elif self._generaldelta:
1904 return base
1904 return base
1905 else:
1905 else:
1906 return rev - 1
1906 return rev - 1
1907
1907
1908 def issnapshot(self, rev):
1908 def issnapshot(self, rev):
1909 """tells whether rev is a snapshot"""
1909 """tells whether rev is a snapshot"""
1910 if not self._sparserevlog:
1910 if not self._sparserevlog:
1911 return self.deltaparent(rev) == nullrev
1911 return self.deltaparent(rev) == nullrev
1912 elif hasattr(self.index, 'issnapshot'):
1912 elif hasattr(self.index, 'issnapshot'):
1913 # directly assign the method to cache the testing and access
1913 # directly assign the method to cache the testing and access
1914 self.issnapshot = self.index.issnapshot
1914 self.issnapshot = self.index.issnapshot
1915 return self.issnapshot(rev)
1915 return self.issnapshot(rev)
1916 if rev == nullrev:
1916 if rev == nullrev:
1917 return True
1917 return True
1918 entry = self.index[rev]
1918 entry = self.index[rev]
1919 base = entry[3]
1919 base = entry[3]
1920 if base == rev:
1920 if base == rev:
1921 return True
1921 return True
1922 if base == nullrev:
1922 if base == nullrev:
1923 return True
1923 return True
1924 p1 = entry[5]
1924 p1 = entry[5]
1925 while self.length(p1) == 0:
1925 while self.length(p1) == 0:
1926 b = self.deltaparent(p1)
1926 b = self.deltaparent(p1)
1927 if b == p1:
1927 if b == p1:
1928 break
1928 break
1929 p1 = b
1929 p1 = b
1930 p2 = entry[6]
1930 p2 = entry[6]
1931 while self.length(p2) == 0:
1931 while self.length(p2) == 0:
1932 b = self.deltaparent(p2)
1932 b = self.deltaparent(p2)
1933 if b == p2:
1933 if b == p2:
1934 break
1934 break
1935 p2 = b
1935 p2 = b
1936 if base == p1 or base == p2:
1936 if base == p1 or base == p2:
1937 return False
1937 return False
1938 return self.issnapshot(base)
1938 return self.issnapshot(base)
1939
1939
1940 def snapshotdepth(self, rev):
1940 def snapshotdepth(self, rev):
1941 """number of snapshot in the chain before this one"""
1941 """number of snapshot in the chain before this one"""
1942 if not self.issnapshot(rev):
1942 if not self.issnapshot(rev):
1943 raise error.ProgrammingError(b'revision %d not a snapshot')
1943 raise error.ProgrammingError(b'revision %d not a snapshot')
1944 return len(self._deltachain(rev)[0]) - 1
1944 return len(self._deltachain(rev)[0]) - 1
1945
1945
1946 def revdiff(self, rev1, rev2):
1946 def revdiff(self, rev1, rev2):
1947 """return or calculate a delta between two revisions
1947 """return or calculate a delta between two revisions
1948
1948
1949 The delta calculated is in binary form and is intended to be written to
1949 The delta calculated is in binary form and is intended to be written to
1950 revlog data directly. So this function needs raw revision data.
1950 revlog data directly. So this function needs raw revision data.
1951 """
1951 """
1952 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1952 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1953 return bytes(self._chunk(rev2))
1953 return bytes(self._chunk(rev2))
1954
1954
1955 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1955 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1956
1956
1957 def revision(self, nodeorrev, _df=None):
1957 def revision(self, nodeorrev, _df=None):
1958 """return an uncompressed revision of a given node or revision
1958 """return an uncompressed revision of a given node or revision
1959 number.
1959 number.
1960
1960
1961 _df - an existing file handle to read from. (internal-only)
1961 _df - an existing file handle to read from. (internal-only)
1962 """
1962 """
1963 return self._revisiondata(nodeorrev, _df)
1963 return self._revisiondata(nodeorrev, _df)
1964
1964
1965 def sidedata(self, nodeorrev, _df=None):
1965 def sidedata(self, nodeorrev, _df=None):
1966 """a map of extra data related to the changeset but not part of the hash
1966 """a map of extra data related to the changeset but not part of the hash
1967
1967
1968 This function currently return a dictionary. However, more advanced
1968 This function currently return a dictionary. However, more advanced
1969 mapping object will likely be used in the future for a more
1969 mapping object will likely be used in the future for a more
1970 efficient/lazy code.
1970 efficient/lazy code.
1971 """
1971 """
1972 # deal with <nodeorrev> argument type
1972 # deal with <nodeorrev> argument type
1973 if isinstance(nodeorrev, int):
1973 if isinstance(nodeorrev, int):
1974 rev = nodeorrev
1974 rev = nodeorrev
1975 else:
1975 else:
1976 rev = self.rev(nodeorrev)
1976 rev = self.rev(nodeorrev)
1977 return self._sidedata(rev)
1977 return self._sidedata(rev)
1978
1978
1979 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1979 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1980 # deal with <nodeorrev> argument type
1980 # deal with <nodeorrev> argument type
1981 if isinstance(nodeorrev, int):
1981 if isinstance(nodeorrev, int):
1982 rev = nodeorrev
1982 rev = nodeorrev
1983 node = self.node(rev)
1983 node = self.node(rev)
1984 else:
1984 else:
1985 node = nodeorrev
1985 node = nodeorrev
1986 rev = None
1986 rev = None
1987
1987
1988 # fast path the special `nullid` rev
1988 # fast path the special `nullid` rev
1989 if node == self.nullid:
1989 if node == self.nullid:
1990 return b""
1990 return b""
1991
1991
1992 # ``rawtext`` is the text as stored inside the revlog. Might be the
1992 # ``rawtext`` is the text as stored inside the revlog. Might be the
1993 # revision or might need to be processed to retrieve the revision.
1993 # revision or might need to be processed to retrieve the revision.
1994 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1994 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1995
1995
1996 if raw and validated:
1996 if raw and validated:
1997 # if we don't want to process the raw text and that raw
1997 # if we don't want to process the raw text and that raw
1998 # text is cached, we can exit early.
1998 # text is cached, we can exit early.
1999 return rawtext
1999 return rawtext
2000 if rev is None:
2000 if rev is None:
2001 rev = self.rev(node)
2001 rev = self.rev(node)
2002 # the revlog's flag for this revision
2002 # the revlog's flag for this revision
2003 # (usually alter its state or content)
2003 # (usually alter its state or content)
2004 flags = self.flags(rev)
2004 flags = self.flags(rev)
2005
2005
2006 if validated and flags == REVIDX_DEFAULT_FLAGS:
2006 if validated and flags == REVIDX_DEFAULT_FLAGS:
2007 # no extra flags set, no flag processor runs, text = rawtext
2007 # no extra flags set, no flag processor runs, text = rawtext
2008 return rawtext
2008 return rawtext
2009
2009
2010 if raw:
2010 if raw:
2011 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2011 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2012 text = rawtext
2012 text = rawtext
2013 else:
2013 else:
2014 r = flagutil.processflagsread(self, rawtext, flags)
2014 r = flagutil.processflagsread(self, rawtext, flags)
2015 text, validatehash = r
2015 text, validatehash = r
2016 if validatehash:
2016 if validatehash:
2017 self.checkhash(text, node, rev=rev)
2017 self.checkhash(text, node, rev=rev)
2018 if not validated:
2018 if not validated:
2019 self._revisioncache = (node, rev, rawtext)
2019 self._revisioncache = (node, rev, rawtext)
2020
2020
2021 return text
2021 return text
2022
2022
2023 def _rawtext(self, node, rev, _df=None):
2023 def _rawtext(self, node, rev, _df=None):
2024 """return the possibly unvalidated rawtext for a revision
2024 """return the possibly unvalidated rawtext for a revision
2025
2025
2026 returns (rev, rawtext, validated)
2026 returns (rev, rawtext, validated)
2027 """
2027 """
2028
2028
2029 # revision in the cache (could be useful to apply delta)
2029 # revision in the cache (could be useful to apply delta)
2030 cachedrev = None
2030 cachedrev = None
2031 # An intermediate text to apply deltas to
2031 # An intermediate text to apply deltas to
2032 basetext = None
2032 basetext = None
2033
2033
2034 # Check if we have the entry in cache
2034 # Check if we have the entry in cache
2035 # The cache entry looks like (node, rev, rawtext)
2035 # The cache entry looks like (node, rev, rawtext)
2036 if self._revisioncache:
2036 if self._revisioncache:
2037 if self._revisioncache[0] == node:
2037 if self._revisioncache[0] == node:
2038 return (rev, self._revisioncache[2], True)
2038 return (rev, self._revisioncache[2], True)
2039 cachedrev = self._revisioncache[1]
2039 cachedrev = self._revisioncache[1]
2040
2040
2041 if rev is None:
2041 if rev is None:
2042 rev = self.rev(node)
2042 rev = self.rev(node)
2043
2043
2044 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2044 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2045 if stopped:
2045 if stopped:
2046 basetext = self._revisioncache[2]
2046 basetext = self._revisioncache[2]
2047
2047
2048 # drop cache to save memory, the caller is expected to
2048 # drop cache to save memory, the caller is expected to
2049 # update self._revisioncache after validating the text
2049 # update self._revisioncache after validating the text
2050 self._revisioncache = None
2050 self._revisioncache = None
2051
2051
2052 targetsize = None
2052 targetsize = None
2053 rawsize = self.index[rev][2]
2053 rawsize = self.index[rev][2]
2054 if 0 <= rawsize:
2054 if 0 <= rawsize:
2055 targetsize = 4 * rawsize
2055 targetsize = 4 * rawsize
2056
2056
2057 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2057 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2058 if basetext is None:
2058 if basetext is None:
2059 basetext = bytes(bins[0])
2059 basetext = bytes(bins[0])
2060 bins = bins[1:]
2060 bins = bins[1:]
2061
2061
2062 rawtext = mdiff.patches(basetext, bins)
2062 rawtext = mdiff.patches(basetext, bins)
2063 del basetext # let us have a chance to free memory early
2063 del basetext # let us have a chance to free memory early
2064 return (rev, rawtext, False)
2064 return (rev, rawtext, False)
2065
2065
2066 def _sidedata(self, rev):
2066 def _sidedata(self, rev):
2067 """Return the sidedata for a given revision number."""
2067 """Return the sidedata for a given revision number."""
2068 index_entry = self.index[rev]
2068 index_entry = self.index[rev]
2069 sidedata_offset = index_entry[8]
2069 sidedata_offset = index_entry[8]
2070 sidedata_size = index_entry[9]
2070 sidedata_size = index_entry[9]
2071
2071
2072 if self._inline:
2072 if self._inline:
2073 sidedata_offset += self.index.entry_size * (1 + rev)
2073 sidedata_offset += self.index.entry_size * (1 + rev)
2074 if sidedata_size == 0:
2074 if sidedata_size == 0:
2075 return {}
2075 return {}
2076
2076
2077 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2077 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2078 filename = self._sidedatafile
2078 filename = self._sidedatafile
2079 end = self._docket.sidedata_end
2079 end = self._docket.sidedata_end
2080 offset = sidedata_offset
2080 offset = sidedata_offset
2081 length = sidedata_size
2081 length = sidedata_size
2082 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2082 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2083 raise error.RevlogError(m)
2083 raise error.RevlogError(m)
2084
2084
2085 comp_segment = self._segmentfile_sidedata.read_chunk(
2085 comp_segment = self._segmentfile_sidedata.read_chunk(
2086 sidedata_offset, sidedata_size
2086 sidedata_offset, sidedata_size
2087 )
2087 )
2088
2088
2089 comp = self.index[rev][11]
2089 comp = self.index[rev][11]
2090 if comp == COMP_MODE_PLAIN:
2090 if comp == COMP_MODE_PLAIN:
2091 segment = comp_segment
2091 segment = comp_segment
2092 elif comp == COMP_MODE_DEFAULT:
2092 elif comp == COMP_MODE_DEFAULT:
2093 segment = self._decompressor(comp_segment)
2093 segment = self._decompressor(comp_segment)
2094 elif comp == COMP_MODE_INLINE:
2094 elif comp == COMP_MODE_INLINE:
2095 segment = self.decompress(comp_segment)
2095 segment = self.decompress(comp_segment)
2096 else:
2096 else:
2097 msg = b'unknown compression mode %d'
2097 msg = b'unknown compression mode %d'
2098 msg %= comp
2098 msg %= comp
2099 raise error.RevlogError(msg)
2099 raise error.RevlogError(msg)
2100
2100
2101 sidedata = sidedatautil.deserialize_sidedata(segment)
2101 sidedata = sidedatautil.deserialize_sidedata(segment)
2102 return sidedata
2102 return sidedata
2103
2103
2104 def rawdata(self, nodeorrev, _df=None):
2104 def rawdata(self, nodeorrev, _df=None):
2105 """return an uncompressed raw data of a given node or revision number.
2105 """return an uncompressed raw data of a given node or revision number.
2106
2106
2107 _df - an existing file handle to read from. (internal-only)
2107 _df - an existing file handle to read from. (internal-only)
2108 """
2108 """
2109 return self._revisiondata(nodeorrev, _df, raw=True)
2109 return self._revisiondata(nodeorrev, _df, raw=True)
2110
2110
2111 def hash(self, text, p1, p2):
2111 def hash(self, text, p1, p2):
2112 """Compute a node hash.
2112 """Compute a node hash.
2113
2113
2114 Available as a function so that subclasses can replace the hash
2114 Available as a function so that subclasses can replace the hash
2115 as needed.
2115 as needed.
2116 """
2116 """
2117 return storageutil.hashrevisionsha1(text, p1, p2)
2117 return storageutil.hashrevisionsha1(text, p1, p2)
2118
2118
2119 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2119 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2120 """Check node hash integrity.
2120 """Check node hash integrity.
2121
2121
2122 Available as a function so that subclasses can extend hash mismatch
2122 Available as a function so that subclasses can extend hash mismatch
2123 behaviors as needed.
2123 behaviors as needed.
2124 """
2124 """
2125 try:
2125 try:
2126 if p1 is None and p2 is None:
2126 if p1 is None and p2 is None:
2127 p1, p2 = self.parents(node)
2127 p1, p2 = self.parents(node)
2128 if node != self.hash(text, p1, p2):
2128 if node != self.hash(text, p1, p2):
2129 # Clear the revision cache on hash failure. The revision cache
2129 # Clear the revision cache on hash failure. The revision cache
2130 # only stores the raw revision and clearing the cache does have
2130 # only stores the raw revision and clearing the cache does have
2131 # the side-effect that we won't have a cache hit when the raw
2131 # the side-effect that we won't have a cache hit when the raw
2132 # revision data is accessed. But this case should be rare and
2132 # revision data is accessed. But this case should be rare and
2133 # it is extra work to teach the cache about the hash
2133 # it is extra work to teach the cache about the hash
2134 # verification state.
2134 # verification state.
2135 if self._revisioncache and self._revisioncache[0] == node:
2135 if self._revisioncache and self._revisioncache[0] == node:
2136 self._revisioncache = None
2136 self._revisioncache = None
2137
2137
2138 revornode = rev
2138 revornode = rev
2139 if revornode is None:
2139 if revornode is None:
2140 revornode = templatefilters.short(hex(node))
2140 revornode = templatefilters.short(hex(node))
2141 raise error.RevlogError(
2141 raise error.RevlogError(
2142 _(b"integrity check failed on %s:%s")
2142 _(b"integrity check failed on %s:%s")
2143 % (self.display_id, pycompat.bytestr(revornode))
2143 % (self.display_id, pycompat.bytestr(revornode))
2144 )
2144 )
2145 except error.RevlogError:
2145 except error.RevlogError:
2146 if self._censorable and storageutil.iscensoredtext(text):
2146 if self._censorable and storageutil.iscensoredtext(text):
2147 raise error.CensoredNodeError(self.display_id, node, text)
2147 raise error.CensoredNodeError(self.display_id, node, text)
2148 raise
2148 raise
2149
2149
2150 @property
2150 @property
2151 def _split_index_file(self):
2151 def _split_index_file(self):
2152 """the path where to expect the index of an ongoing splitting operation
2152 """the path where to expect the index of an ongoing splitting operation
2153
2153
2154 The file will only exist if a splitting operation is in progress, but
2154 The file will only exist if a splitting operation is in progress, but
2155 it is always expected at the same location."""
2155 it is always expected at the same location."""
2156 parts = self.radix.split(b'/')
2156 parts = self.radix.split(b'/')
2157 if len(parts) > 1:
2157 if len(parts) > 1:
2158 # adds a '-s' prefix to the ``data/` or `meta/` base
2158 # adds a '-s' prefix to the ``data/` or `meta/` base
2159 head = parts[0] + b'-s'
2159 head = parts[0] + b'-s'
2160 mids = parts[1:-1]
2160 mids = parts[1:-1]
2161 tail = parts[-1] + b'.i'
2161 tail = parts[-1] + b'.i'
2162 pieces = [head] + mids + [tail]
2162 pieces = [head] + mids + [tail]
2163 return b'/'.join(pieces)
2163 return b'/'.join(pieces)
2164 else:
2164 else:
2165 # the revlog is stored at the root of the store (changelog or
2165 # the revlog is stored at the root of the store (changelog or
2166 # manifest), no risk of collision.
2166 # manifest), no risk of collision.
2167 return self.radix + b'.i.s'
2167 return self.radix + b'.i.s'
2168
2168
2169 def _enforceinlinesize(self, tr, side_write=True):
2169 def _enforceinlinesize(self, tr, side_write=True):
2170 """Check if the revlog is too big for inline and convert if so.
2170 """Check if the revlog is too big for inline and convert if so.
2171
2171
2172 This should be called after revisions are added to the revlog. If the
2172 This should be called after revisions are added to the revlog. If the
2173 revlog has grown too large to be an inline revlog, it will convert it
2173 revlog has grown too large to be an inline revlog, it will convert it
2174 to use multiple index and data files.
2174 to use multiple index and data files.
2175 """
2175 """
2176 tiprev = len(self) - 1
2176 tiprev = len(self) - 1
2177 total_size = self.start(tiprev) + self.length(tiprev)
2177 total_size = self.start(tiprev) + self.length(tiprev)
2178 if not self._inline or total_size < _maxinline:
2178 if not self._inline or total_size < _maxinline:
2179 return
2179 return
2180
2180
2181 troffset = tr.findoffset(self._indexfile)
2181 troffset = tr.findoffset(self._indexfile)
2182 if troffset is None:
2182 if troffset is None:
2183 raise error.RevlogError(
2183 raise error.RevlogError(
2184 _(b"%s not found in the transaction") % self._indexfile
2184 _(b"%s not found in the transaction") % self._indexfile
2185 )
2185 )
2186 if troffset:
2186 if troffset:
2187 tr.addbackup(self._indexfile, for_offset=True)
2187 tr.addbackup(self._indexfile, for_offset=True)
2188 tr.add(self._datafile, 0)
2188 tr.add(self._datafile, 0)
2189
2189
2190 existing_handles = False
2190 existing_handles = False
2191 if self._writinghandles is not None:
2191 if self._writinghandles is not None:
2192 existing_handles = True
2192 existing_handles = True
2193 fp = self._writinghandles[0]
2193 fp = self._writinghandles[0]
2194 fp.flush()
2194 fp.flush()
2195 fp.close()
2195 fp.close()
2196 # We can't use the cached file handle after close(). So prevent
2196 # We can't use the cached file handle after close(). So prevent
2197 # its usage.
2197 # its usage.
2198 self._writinghandles = None
2198 self._writinghandles = None
2199 self._segmentfile.writing_handle = None
2199 self._segmentfile.writing_handle = None
2200 # No need to deal with sidedata writing handle as it is only
2200 # No need to deal with sidedata writing handle as it is only
2201 # relevant with revlog-v2 which is never inline, not reaching
2201 # relevant with revlog-v2 which is never inline, not reaching
2202 # this code
2202 # this code
2203 if side_write:
2203 if side_write:
2204 old_index_file_path = self._indexfile
2204 old_index_file_path = self._indexfile
2205 new_index_file_path = self._split_index_file
2205 new_index_file_path = self._split_index_file
2206 opener = self.opener
2206 opener = self.opener
2207 weak_self = weakref.ref(self)
2207 weak_self = weakref.ref(self)
2208
2208
2209 # the "split" index replace the real index when the transaction is finalized
2209 # the "split" index replace the real index when the transaction is finalized
2210 def finalize_callback(tr):
2210 def finalize_callback(tr):
2211 opener.rename(
2211 opener.rename(
2212 new_index_file_path,
2212 new_index_file_path,
2213 old_index_file_path,
2213 old_index_file_path,
2214 checkambig=True,
2214 checkambig=True,
2215 )
2215 )
2216 maybe_self = weak_self()
2216 maybe_self = weak_self()
2217 if maybe_self is not None:
2217 if maybe_self is not None:
2218 maybe_self._indexfile = old_index_file_path
2218 maybe_self._indexfile = old_index_file_path
2219
2219
2220 def abort_callback(tr):
2220 def abort_callback(tr):
2221 maybe_self = weak_self()
2221 maybe_self = weak_self()
2222 if maybe_self is not None:
2222 if maybe_self is not None:
2223 maybe_self._indexfile = old_index_file_path
2223 maybe_self._indexfile = old_index_file_path
2224
2224
2225 tr.registertmp(new_index_file_path)
2225 tr.registertmp(new_index_file_path)
2226 if self.target[1] is not None:
2226 if self.target[1] is not None:
2227 callback_id = b'000-revlog-split-%d-%s' % self.target
2227 callback_id = b'000-revlog-split-%d-%s' % self.target
2228 else:
2228 else:
2229 callback_id = b'000-revlog-split-%d' % self.target[0]
2229 callback_id = b'000-revlog-split-%d' % self.target[0]
2230 tr.addfinalize(callback_id, finalize_callback)
2230 tr.addfinalize(callback_id, finalize_callback)
2231 tr.addabort(callback_id, abort_callback)
2231 tr.addabort(callback_id, abort_callback)
2232
2232
2233 new_dfh = self._datafp(b'w+')
2233 new_dfh = self._datafp(b'w+')
2234 new_dfh.truncate(0) # drop any potentially existing data
2234 new_dfh.truncate(0) # drop any potentially existing data
2235 try:
2235 try:
2236 with self._indexfp() as read_ifh:
2236 with self._indexfp() as read_ifh:
2237 for r in self:
2237 for r in self:
2238 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2238 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2239 new_dfh.flush()
2239 new_dfh.flush()
2240
2240
2241 if side_write:
2241 if side_write:
2242 self._indexfile = new_index_file_path
2242 self._indexfile = new_index_file_path
2243 with self.__index_new_fp() as fp:
2243 with self.__index_new_fp() as fp:
2244 self._format_flags &= ~FLAG_INLINE_DATA
2244 self._format_flags &= ~FLAG_INLINE_DATA
2245 self._inline = False
2245 self._inline = False
2246 for i in self:
2246 for i in self:
2247 e = self.index.entry_binary(i)
2247 e = self.index.entry_binary(i)
2248 if i == 0 and self._docket is None:
2248 if i == 0 and self._docket is None:
2249 header = self._format_flags | self._format_version
2249 header = self._format_flags | self._format_version
2250 header = self.index.pack_header(header)
2250 header = self.index.pack_header(header)
2251 e = header + e
2251 e = header + e
2252 fp.write(e)
2252 fp.write(e)
2253 if self._docket is not None:
2253 if self._docket is not None:
2254 self._docket.index_end = fp.tell()
2254 self._docket.index_end = fp.tell()
2255
2255
2256 # If we don't use side-write, the temp file replace the real
2256 # If we don't use side-write, the temp file replace the real
2257 # index when we exit the context manager
2257 # index when we exit the context manager
2258
2258
2259 nodemaputil.setup_persistent_nodemap(tr, self)
2259 nodemaputil.setup_persistent_nodemap(tr, self)
2260 self._segmentfile = randomaccessfile.randomaccessfile(
2260 self._segmentfile = randomaccessfile.randomaccessfile(
2261 self.opener,
2261 self.opener,
2262 self._datafile,
2262 self._datafile,
2263 self._chunkcachesize,
2263 self._chunkcachesize,
2264 )
2264 )
2265
2265
2266 if existing_handles:
2266 if existing_handles:
2267 # switched from inline to conventional reopen the index
2267 # switched from inline to conventional reopen the index
2268 ifh = self.__index_write_fp()
2268 ifh = self.__index_write_fp()
2269 self._writinghandles = (ifh, new_dfh, None)
2269 self._writinghandles = (ifh, new_dfh, None)
2270 self._segmentfile.writing_handle = new_dfh
2270 self._segmentfile.writing_handle = new_dfh
2271 new_dfh = None
2271 new_dfh = None
2272 # No need to deal with sidedata writing handle as it is only
2272 # No need to deal with sidedata writing handle as it is only
2273 # relevant with revlog-v2 which is never inline, not reaching
2273 # relevant with revlog-v2 which is never inline, not reaching
2274 # this code
2274 # this code
2275 finally:
2275 finally:
2276 if new_dfh is not None:
2276 if new_dfh is not None:
2277 new_dfh.close()
2277 new_dfh.close()
2278
2278
2279 def _nodeduplicatecallback(self, transaction, node):
2279 def _nodeduplicatecallback(self, transaction, node):
2280 """called when trying to add a node already stored."""
2280 """called when trying to add a node already stored."""
2281
2281
2282 @contextlib.contextmanager
2282 @contextlib.contextmanager
2283 def reading(self):
2283 def reading(self):
2284 """Context manager that keeps data and sidedata files open for reading"""
2284 """Context manager that keeps data and sidedata files open for reading"""
2285 if len(self.index) == 0:
2285 if len(self.index) == 0:
2286 yield # nothing to be read
2286 yield # nothing to be read
2287 else:
2287 else:
2288 with self._segmentfile.reading():
2288 with self._segmentfile.reading():
2289 with self._segmentfile_sidedata.reading():
2289 with self._segmentfile_sidedata.reading():
2290 yield
2290 yield
2291
2291
2292 @contextlib.contextmanager
2292 @contextlib.contextmanager
2293 def _writing(self, transaction):
2293 def _writing(self, transaction):
2294 if self._trypending:
2294 if self._trypending:
2295 msg = b'try to write in a `trypending` revlog: %s'
2295 msg = b'try to write in a `trypending` revlog: %s'
2296 msg %= self.display_id
2296 msg %= self.display_id
2297 raise error.ProgrammingError(msg)
2297 raise error.ProgrammingError(msg)
2298 if self._writinghandles is not None:
2298 if self._writinghandles is not None:
2299 yield
2299 yield
2300 else:
2300 else:
2301 ifh = dfh = sdfh = None
2301 ifh = dfh = sdfh = None
2302 try:
2302 try:
2303 r = len(self)
2303 r = len(self)
2304 # opening the data file.
2304 # opening the data file.
2305 dsize = 0
2305 dsize = 0
2306 if r:
2306 if r:
2307 dsize = self.end(r - 1)
2307 dsize = self.end(r - 1)
2308 dfh = None
2308 dfh = None
2309 if not self._inline:
2309 if not self._inline:
2310 try:
2310 try:
2311 dfh = self._datafp(b"r+")
2311 dfh = self._datafp(b"r+")
2312 if self._docket is None:
2312 if self._docket is None:
2313 dfh.seek(0, os.SEEK_END)
2313 dfh.seek(0, os.SEEK_END)
2314 else:
2314 else:
2315 dfh.seek(self._docket.data_end, os.SEEK_SET)
2315 dfh.seek(self._docket.data_end, os.SEEK_SET)
2316 except FileNotFoundError:
2316 except FileNotFoundError:
2317 dfh = self._datafp(b"w+")
2317 dfh = self._datafp(b"w+")
2318 transaction.add(self._datafile, dsize)
2318 transaction.add(self._datafile, dsize)
2319 if self._sidedatafile is not None:
2319 if self._sidedatafile is not None:
2320 # revlog-v2 does not inline, help Pytype
2320 # revlog-v2 does not inline, help Pytype
2321 assert dfh is not None
2321 assert dfh is not None
2322 try:
2322 try:
2323 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2323 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2324 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2324 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2325 except FileNotFoundError:
2325 except FileNotFoundError:
2326 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2326 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2327 transaction.add(
2327 transaction.add(
2328 self._sidedatafile, self._docket.sidedata_end
2328 self._sidedatafile, self._docket.sidedata_end
2329 )
2329 )
2330
2330
2331 # opening the index file.
2331 # opening the index file.
2332 isize = r * self.index.entry_size
2332 isize = r * self.index.entry_size
2333 ifh = self.__index_write_fp()
2333 ifh = self.__index_write_fp()
2334 if self._inline:
2334 if self._inline:
2335 transaction.add(self._indexfile, dsize + isize)
2335 transaction.add(self._indexfile, dsize + isize)
2336 else:
2336 else:
2337 transaction.add(self._indexfile, isize)
2337 transaction.add(self._indexfile, isize)
2338 # exposing all file handle for writing.
2338 # exposing all file handle for writing.
2339 self._writinghandles = (ifh, dfh, sdfh)
2339 self._writinghandles = (ifh, dfh, sdfh)
2340 self._segmentfile.writing_handle = ifh if self._inline else dfh
2340 self._segmentfile.writing_handle = ifh if self._inline else dfh
2341 self._segmentfile_sidedata.writing_handle = sdfh
2341 self._segmentfile_sidedata.writing_handle = sdfh
2342 yield
2342 yield
2343 if self._docket is not None:
2343 if self._docket is not None:
2344 self._write_docket(transaction)
2344 self._write_docket(transaction)
2345 finally:
2345 finally:
2346 self._writinghandles = None
2346 self._writinghandles = None
2347 self._segmentfile.writing_handle = None
2347 self._segmentfile.writing_handle = None
2348 self._segmentfile_sidedata.writing_handle = None
2348 self._segmentfile_sidedata.writing_handle = None
2349 if dfh is not None:
2349 if dfh is not None:
2350 dfh.close()
2350 dfh.close()
2351 if sdfh is not None:
2351 if sdfh is not None:
2352 sdfh.close()
2352 sdfh.close()
2353 # closing the index file last to avoid exposing referent to
2353 # closing the index file last to avoid exposing referent to
2354 # potential unflushed data content.
2354 # potential unflushed data content.
2355 if ifh is not None:
2355 if ifh is not None:
2356 ifh.close()
2356 ifh.close()
2357
2357
2358 def _write_docket(self, transaction):
2358 def _write_docket(self, transaction):
2359 """write the current docket on disk
2359 """write the current docket on disk
2360
2360
2361 Exist as a method to help changelog to implement transaction logic
2361 Exist as a method to help changelog to implement transaction logic
2362
2362
2363 We could also imagine using the same transaction logic for all revlog
2363 We could also imagine using the same transaction logic for all revlog
2364 since docket are cheap."""
2364 since docket are cheap."""
2365 self._docket.write(transaction)
2365 self._docket.write(transaction)
2366
2366
2367 def addrevision(
2367 def addrevision(
2368 self,
2368 self,
2369 text,
2369 text,
2370 transaction,
2370 transaction,
2371 link,
2371 link,
2372 p1,
2372 p1,
2373 p2,
2373 p2,
2374 cachedelta=None,
2374 cachedelta=None,
2375 node=None,
2375 node=None,
2376 flags=REVIDX_DEFAULT_FLAGS,
2376 flags=REVIDX_DEFAULT_FLAGS,
2377 deltacomputer=None,
2377 deltacomputer=None,
2378 sidedata=None,
2378 sidedata=None,
2379 ):
2379 ):
2380 """add a revision to the log
2380 """add a revision to the log
2381
2381
2382 text - the revision data to add
2382 text - the revision data to add
2383 transaction - the transaction object used for rollback
2383 transaction - the transaction object used for rollback
2384 link - the linkrev data to add
2384 link - the linkrev data to add
2385 p1, p2 - the parent nodeids of the revision
2385 p1, p2 - the parent nodeids of the revision
2386 cachedelta - an optional precomputed delta
2386 cachedelta - an optional precomputed delta
2387 node - nodeid of revision; typically node is not specified, and it is
2387 node - nodeid of revision; typically node is not specified, and it is
2388 computed by default as hash(text, p1, p2), however subclasses might
2388 computed by default as hash(text, p1, p2), however subclasses might
2389 use different hashing method (and override checkhash() in such case)
2389 use different hashing method (and override checkhash() in such case)
2390 flags - the known flags to set on the revision
2390 flags - the known flags to set on the revision
2391 deltacomputer - an optional deltacomputer instance shared between
2391 deltacomputer - an optional deltacomputer instance shared between
2392 multiple calls
2392 multiple calls
2393 """
2393 """
2394 if link == nullrev:
2394 if link == nullrev:
2395 raise error.RevlogError(
2395 raise error.RevlogError(
2396 _(b"attempted to add linkrev -1 to %s") % self.display_id
2396 _(b"attempted to add linkrev -1 to %s") % self.display_id
2397 )
2397 )
2398
2398
2399 if sidedata is None:
2399 if sidedata is None:
2400 sidedata = {}
2400 sidedata = {}
2401 elif sidedata and not self.hassidedata:
2401 elif sidedata and not self.hassidedata:
2402 raise error.ProgrammingError(
2402 raise error.ProgrammingError(
2403 _(b"trying to add sidedata to a revlog who don't support them")
2403 _(b"trying to add sidedata to a revlog who don't support them")
2404 )
2404 )
2405
2405
2406 if flags:
2406 if flags:
2407 node = node or self.hash(text, p1, p2)
2407 node = node or self.hash(text, p1, p2)
2408
2408
2409 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2409 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2410
2410
2411 # If the flag processor modifies the revision data, ignore any provided
2411 # If the flag processor modifies the revision data, ignore any provided
2412 # cachedelta.
2412 # cachedelta.
2413 if rawtext != text:
2413 if rawtext != text:
2414 cachedelta = None
2414 cachedelta = None
2415
2415
2416 if len(rawtext) > _maxentrysize:
2416 if len(rawtext) > _maxentrysize:
2417 raise error.RevlogError(
2417 raise error.RevlogError(
2418 _(
2418 _(
2419 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2419 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2420 )
2420 )
2421 % (self.display_id, len(rawtext))
2421 % (self.display_id, len(rawtext))
2422 )
2422 )
2423
2423
2424 node = node or self.hash(rawtext, p1, p2)
2424 node = node or self.hash(rawtext, p1, p2)
2425 rev = self.index.get_rev(node)
2425 rev = self.index.get_rev(node)
2426 if rev is not None:
2426 if rev is not None:
2427 return rev
2427 return rev
2428
2428
2429 if validatehash:
2429 if validatehash:
2430 self.checkhash(rawtext, node, p1=p1, p2=p2)
2430 self.checkhash(rawtext, node, p1=p1, p2=p2)
2431
2431
2432 return self.addrawrevision(
2432 return self.addrawrevision(
2433 rawtext,
2433 rawtext,
2434 transaction,
2434 transaction,
2435 link,
2435 link,
2436 p1,
2436 p1,
2437 p2,
2437 p2,
2438 node,
2438 node,
2439 flags,
2439 flags,
2440 cachedelta=cachedelta,
2440 cachedelta=cachedelta,
2441 deltacomputer=deltacomputer,
2441 deltacomputer=deltacomputer,
2442 sidedata=sidedata,
2442 sidedata=sidedata,
2443 )
2443 )
2444
2444
2445 def addrawrevision(
2445 def addrawrevision(
2446 self,
2446 self,
2447 rawtext,
2447 rawtext,
2448 transaction,
2448 transaction,
2449 link,
2449 link,
2450 p1,
2450 p1,
2451 p2,
2451 p2,
2452 node,
2452 node,
2453 flags,
2453 flags,
2454 cachedelta=None,
2454 cachedelta=None,
2455 deltacomputer=None,
2455 deltacomputer=None,
2456 sidedata=None,
2456 sidedata=None,
2457 ):
2457 ):
2458 """add a raw revision with known flags, node and parents
2458 """add a raw revision with known flags, node and parents
2459 useful when reusing a revision not stored in this revlog (ex: received
2459 useful when reusing a revision not stored in this revlog (ex: received
2460 over wire, or read from an external bundle).
2460 over wire, or read from an external bundle).
2461 """
2461 """
2462 with self._writing(transaction):
2462 with self._writing(transaction):
2463 return self._addrevision(
2463 return self._addrevision(
2464 node,
2464 node,
2465 rawtext,
2465 rawtext,
2466 transaction,
2466 transaction,
2467 link,
2467 link,
2468 p1,
2468 p1,
2469 p2,
2469 p2,
2470 flags,
2470 flags,
2471 cachedelta,
2471 cachedelta,
2472 deltacomputer=deltacomputer,
2472 deltacomputer=deltacomputer,
2473 sidedata=sidedata,
2473 sidedata=sidedata,
2474 )
2474 )
2475
2475
2476 def compress(self, data):
2476 def compress(self, data):
2477 """Generate a possibly-compressed representation of data."""
2477 """Generate a possibly-compressed representation of data."""
2478 if not data:
2478 if not data:
2479 return b'', data
2479 return b'', data
2480
2480
2481 compressed = self._compressor.compress(data)
2481 compressed = self._compressor.compress(data)
2482
2482
2483 if compressed:
2483 if compressed:
2484 # The revlog compressor added the header in the returned data.
2484 # The revlog compressor added the header in the returned data.
2485 return b'', compressed
2485 return b'', compressed
2486
2486
2487 if data[0:1] == b'\0':
2487 if data[0:1] == b'\0':
2488 return b'', data
2488 return b'', data
2489 return b'u', data
2489 return b'u', data
2490
2490
2491 def decompress(self, data):
2491 def decompress(self, data):
2492 """Decompress a revlog chunk.
2492 """Decompress a revlog chunk.
2493
2493
2494 The chunk is expected to begin with a header identifying the
2494 The chunk is expected to begin with a header identifying the
2495 format type so it can be routed to an appropriate decompressor.
2495 format type so it can be routed to an appropriate decompressor.
2496 """
2496 """
2497 if not data:
2497 if not data:
2498 return data
2498 return data
2499
2499
2500 # Revlogs are read much more frequently than they are written and many
2500 # Revlogs are read much more frequently than they are written and many
2501 # chunks only take microseconds to decompress, so performance is
2501 # chunks only take microseconds to decompress, so performance is
2502 # important here.
2502 # important here.
2503 #
2503 #
2504 # We can make a few assumptions about revlogs:
2504 # We can make a few assumptions about revlogs:
2505 #
2505 #
2506 # 1) the majority of chunks will be compressed (as opposed to inline
2506 # 1) the majority of chunks will be compressed (as opposed to inline
2507 # raw data).
2507 # raw data).
2508 # 2) decompressing *any* data will likely by at least 10x slower than
2508 # 2) decompressing *any* data will likely by at least 10x slower than
2509 # returning raw inline data.
2509 # returning raw inline data.
2510 # 3) we want to prioritize common and officially supported compression
2510 # 3) we want to prioritize common and officially supported compression
2511 # engines
2511 # engines
2512 #
2512 #
2513 # It follows that we want to optimize for "decompress compressed data
2513 # It follows that we want to optimize for "decompress compressed data
2514 # when encoded with common and officially supported compression engines"
2514 # when encoded with common and officially supported compression engines"
2515 # case over "raw data" and "data encoded by less common or non-official
2515 # case over "raw data" and "data encoded by less common or non-official
2516 # compression engines." That is why we have the inline lookup first
2516 # compression engines." That is why we have the inline lookup first
2517 # followed by the compengines lookup.
2517 # followed by the compengines lookup.
2518 #
2518 #
2519 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2519 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2520 # compressed chunks. And this matters for changelog and manifest reads.
2520 # compressed chunks. And this matters for changelog and manifest reads.
2521 t = data[0:1]
2521 t = data[0:1]
2522
2522
2523 if t == b'x':
2523 if t == b'x':
2524 try:
2524 try:
2525 return _zlibdecompress(data)
2525 return _zlibdecompress(data)
2526 except zlib.error as e:
2526 except zlib.error as e:
2527 raise error.RevlogError(
2527 raise error.RevlogError(
2528 _(b'revlog decompress error: %s')
2528 _(b'revlog decompress error: %s')
2529 % stringutil.forcebytestr(e)
2529 % stringutil.forcebytestr(e)
2530 )
2530 )
2531 # '\0' is more common than 'u' so it goes first.
2531 # '\0' is more common than 'u' so it goes first.
2532 elif t == b'\0':
2532 elif t == b'\0':
2533 return data
2533 return data
2534 elif t == b'u':
2534 elif t == b'u':
2535 return util.buffer(data, 1)
2535 return util.buffer(data, 1)
2536
2536
2537 compressor = self._get_decompressor(t)
2537 compressor = self._get_decompressor(t)
2538
2538
2539 return compressor.decompress(data)
2539 return compressor.decompress(data)
2540
2540
2541 def _addrevision(
2541 def _addrevision(
2542 self,
2542 self,
2543 node,
2543 node,
2544 rawtext,
2544 rawtext,
2545 transaction,
2545 transaction,
2546 link,
2546 link,
2547 p1,
2547 p1,
2548 p2,
2548 p2,
2549 flags,
2549 flags,
2550 cachedelta,
2550 cachedelta,
2551 alwayscache=False,
2551 alwayscache=False,
2552 deltacomputer=None,
2552 deltacomputer=None,
2553 sidedata=None,
2553 sidedata=None,
2554 ):
2554 ):
2555 """internal function to add revisions to the log
2555 """internal function to add revisions to the log
2556
2556
2557 see addrevision for argument descriptions.
2557 see addrevision for argument descriptions.
2558
2558
2559 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2559 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2560
2560
2561 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2561 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2562 be used.
2562 be used.
2563
2563
2564 invariants:
2564 invariants:
2565 - rawtext is optional (can be None); if not set, cachedelta must be set.
2565 - rawtext is optional (can be None); if not set, cachedelta must be set.
2566 if both are set, they must correspond to each other.
2566 if both are set, they must correspond to each other.
2567 """
2567 """
2568 if node == self.nullid:
2568 if node == self.nullid:
2569 raise error.RevlogError(
2569 raise error.RevlogError(
2570 _(b"%s: attempt to add null revision") % self.display_id
2570 _(b"%s: attempt to add null revision") % self.display_id
2571 )
2571 )
2572 if (
2572 if (
2573 node == self.nodeconstants.wdirid
2573 node == self.nodeconstants.wdirid
2574 or node in self.nodeconstants.wdirfilenodeids
2574 or node in self.nodeconstants.wdirfilenodeids
2575 ):
2575 ):
2576 raise error.RevlogError(
2576 raise error.RevlogError(
2577 _(b"%s: attempt to add wdir revision") % self.display_id
2577 _(b"%s: attempt to add wdir revision") % self.display_id
2578 )
2578 )
2579 if self._writinghandles is None:
2579 if self._writinghandles is None:
2580 msg = b'adding revision outside `revlog._writing` context'
2580 msg = b'adding revision outside `revlog._writing` context'
2581 raise error.ProgrammingError(msg)
2581 raise error.ProgrammingError(msg)
2582
2582
2583 if self._inline:
2584 fh = self._writinghandles[0]
2585 else:
2586 fh = self._writinghandles[1]
2587
2588 btext = [rawtext]
2583 btext = [rawtext]
2589
2584
2590 curr = len(self)
2585 curr = len(self)
2591 prev = curr - 1
2586 prev = curr - 1
2592
2587
2593 offset = self._get_data_offset(prev)
2588 offset = self._get_data_offset(prev)
2594
2589
2595 if self._concurrencychecker:
2590 if self._concurrencychecker:
2596 ifh, dfh, sdfh = self._writinghandles
2591 ifh, dfh, sdfh = self._writinghandles
2597 # XXX no checking for the sidedata file
2592 # XXX no checking for the sidedata file
2598 if self._inline:
2593 if self._inline:
2599 # offset is "as if" it were in the .d file, so we need to add on
2594 # offset is "as if" it were in the .d file, so we need to add on
2600 # the size of the entry metadata.
2595 # the size of the entry metadata.
2601 self._concurrencychecker(
2596 self._concurrencychecker(
2602 ifh, self._indexfile, offset + curr * self.index.entry_size
2597 ifh, self._indexfile, offset + curr * self.index.entry_size
2603 )
2598 )
2604 else:
2599 else:
2605 # Entries in the .i are a consistent size.
2600 # Entries in the .i are a consistent size.
2606 self._concurrencychecker(
2601 self._concurrencychecker(
2607 ifh, self._indexfile, curr * self.index.entry_size
2602 ifh, self._indexfile, curr * self.index.entry_size
2608 )
2603 )
2609 self._concurrencychecker(dfh, self._datafile, offset)
2604 self._concurrencychecker(dfh, self._datafile, offset)
2610
2605
2611 p1r, p2r = self.rev(p1), self.rev(p2)
2606 p1r, p2r = self.rev(p1), self.rev(p2)
2612
2607
2613 # full versions are inserted when the needed deltas
2608 # full versions are inserted when the needed deltas
2614 # become comparable to the uncompressed text
2609 # become comparable to the uncompressed text
2615 if rawtext is None:
2610 if rawtext is None:
2616 # need rawtext size, before changed by flag processors, which is
2611 # need rawtext size, before changed by flag processors, which is
2617 # the non-raw size. use revlog explicitly to avoid filelog's extra
2612 # the non-raw size. use revlog explicitly to avoid filelog's extra
2618 # logic that might remove metadata size.
2613 # logic that might remove metadata size.
2619 textlen = mdiff.patchedsize(
2614 textlen = mdiff.patchedsize(
2620 revlog.size(self, cachedelta[0]), cachedelta[1]
2615 revlog.size(self, cachedelta[0]), cachedelta[1]
2621 )
2616 )
2622 else:
2617 else:
2623 textlen = len(rawtext)
2618 textlen = len(rawtext)
2624
2619
2625 if deltacomputer is None:
2620 if deltacomputer is None:
2626 write_debug = None
2621 write_debug = None
2627 if self._debug_delta:
2622 if self._debug_delta:
2628 write_debug = transaction._report
2623 write_debug = transaction._report
2629 deltacomputer = deltautil.deltacomputer(
2624 deltacomputer = deltautil.deltacomputer(
2630 self, write_debug=write_debug
2625 self, write_debug=write_debug
2631 )
2626 )
2632
2627
2633 if cachedelta is not None and len(cachedelta) == 2:
2628 if cachedelta is not None and len(cachedelta) == 2:
2634 # If the cached delta has no information about how it should be
2629 # If the cached delta has no information about how it should be
2635 # reused, add the default reuse instruction according to the
2630 # reused, add the default reuse instruction according to the
2636 # revlog's configuration.
2631 # revlog's configuration.
2637 if self._generaldelta and self._lazydeltabase:
2632 if self._generaldelta and self._lazydeltabase:
2638 delta_base_reuse = DELTA_BASE_REUSE_TRY
2633 delta_base_reuse = DELTA_BASE_REUSE_TRY
2639 else:
2634 else:
2640 delta_base_reuse = DELTA_BASE_REUSE_NO
2635 delta_base_reuse = DELTA_BASE_REUSE_NO
2641 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2636 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2642
2637
2643 revinfo = revlogutils.revisioninfo(
2638 revinfo = revlogutils.revisioninfo(
2644 node,
2639 node,
2645 p1,
2640 p1,
2646 p2,
2641 p2,
2647 btext,
2642 btext,
2648 textlen,
2643 textlen,
2649 cachedelta,
2644 cachedelta,
2650 flags,
2645 flags,
2651 )
2646 )
2652
2647
2653 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2648 deltainfo = deltacomputer.finddeltainfo(revinfo)
2654
2649
2655 compression_mode = COMP_MODE_INLINE
2650 compression_mode = COMP_MODE_INLINE
2656 if self._docket is not None:
2651 if self._docket is not None:
2657 default_comp = self._docket.default_compression_header
2652 default_comp = self._docket.default_compression_header
2658 r = deltautil.delta_compression(default_comp, deltainfo)
2653 r = deltautil.delta_compression(default_comp, deltainfo)
2659 compression_mode, deltainfo = r
2654 compression_mode, deltainfo = r
2660
2655
2661 sidedata_compression_mode = COMP_MODE_INLINE
2656 sidedata_compression_mode = COMP_MODE_INLINE
2662 if sidedata and self.hassidedata:
2657 if sidedata and self.hassidedata:
2663 sidedata_compression_mode = COMP_MODE_PLAIN
2658 sidedata_compression_mode = COMP_MODE_PLAIN
2664 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2659 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2665 sidedata_offset = self._docket.sidedata_end
2660 sidedata_offset = self._docket.sidedata_end
2666 h, comp_sidedata = self.compress(serialized_sidedata)
2661 h, comp_sidedata = self.compress(serialized_sidedata)
2667 if (
2662 if (
2668 h != b'u'
2663 h != b'u'
2669 and comp_sidedata[0:1] != b'\0'
2664 and comp_sidedata[0:1] != b'\0'
2670 and len(comp_sidedata) < len(serialized_sidedata)
2665 and len(comp_sidedata) < len(serialized_sidedata)
2671 ):
2666 ):
2672 assert not h
2667 assert not h
2673 if (
2668 if (
2674 comp_sidedata[0:1]
2669 comp_sidedata[0:1]
2675 == self._docket.default_compression_header
2670 == self._docket.default_compression_header
2676 ):
2671 ):
2677 sidedata_compression_mode = COMP_MODE_DEFAULT
2672 sidedata_compression_mode = COMP_MODE_DEFAULT
2678 serialized_sidedata = comp_sidedata
2673 serialized_sidedata = comp_sidedata
2679 else:
2674 else:
2680 sidedata_compression_mode = COMP_MODE_INLINE
2675 sidedata_compression_mode = COMP_MODE_INLINE
2681 serialized_sidedata = comp_sidedata
2676 serialized_sidedata = comp_sidedata
2682 else:
2677 else:
2683 serialized_sidedata = b""
2678 serialized_sidedata = b""
2684 # Don't store the offset if the sidedata is empty, that way
2679 # Don't store the offset if the sidedata is empty, that way
2685 # we can easily detect empty sidedata and they will be no different
2680 # we can easily detect empty sidedata and they will be no different
2686 # than ones we manually add.
2681 # than ones we manually add.
2687 sidedata_offset = 0
2682 sidedata_offset = 0
2688
2683
2689 rank = RANK_UNKNOWN
2684 rank = RANK_UNKNOWN
2690 if self._compute_rank:
2685 if self._compute_rank:
2691 if (p1r, p2r) == (nullrev, nullrev):
2686 if (p1r, p2r) == (nullrev, nullrev):
2692 rank = 1
2687 rank = 1
2693 elif p1r != nullrev and p2r == nullrev:
2688 elif p1r != nullrev and p2r == nullrev:
2694 rank = 1 + self.fast_rank(p1r)
2689 rank = 1 + self.fast_rank(p1r)
2695 elif p1r == nullrev and p2r != nullrev:
2690 elif p1r == nullrev and p2r != nullrev:
2696 rank = 1 + self.fast_rank(p2r)
2691 rank = 1 + self.fast_rank(p2r)
2697 else: # merge node
2692 else: # merge node
2698 if rustdagop is not None and self.index.rust_ext_compat:
2693 if rustdagop is not None and self.index.rust_ext_compat:
2699 rank = rustdagop.rank(self.index, p1r, p2r)
2694 rank = rustdagop.rank(self.index, p1r, p2r)
2700 else:
2695 else:
2701 pmin, pmax = sorted((p1r, p2r))
2696 pmin, pmax = sorted((p1r, p2r))
2702 rank = 1 + self.fast_rank(pmax)
2697 rank = 1 + self.fast_rank(pmax)
2703 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2698 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2704
2699
2705 e = revlogutils.entry(
2700 e = revlogutils.entry(
2706 flags=flags,
2701 flags=flags,
2707 data_offset=offset,
2702 data_offset=offset,
2708 data_compressed_length=deltainfo.deltalen,
2703 data_compressed_length=deltainfo.deltalen,
2709 data_uncompressed_length=textlen,
2704 data_uncompressed_length=textlen,
2710 data_compression_mode=compression_mode,
2705 data_compression_mode=compression_mode,
2711 data_delta_base=deltainfo.base,
2706 data_delta_base=deltainfo.base,
2712 link_rev=link,
2707 link_rev=link,
2713 parent_rev_1=p1r,
2708 parent_rev_1=p1r,
2714 parent_rev_2=p2r,
2709 parent_rev_2=p2r,
2715 node_id=node,
2710 node_id=node,
2716 sidedata_offset=sidedata_offset,
2711 sidedata_offset=sidedata_offset,
2717 sidedata_compressed_length=len(serialized_sidedata),
2712 sidedata_compressed_length=len(serialized_sidedata),
2718 sidedata_compression_mode=sidedata_compression_mode,
2713 sidedata_compression_mode=sidedata_compression_mode,
2719 rank=rank,
2714 rank=rank,
2720 )
2715 )
2721
2716
2722 self.index.append(e)
2717 self.index.append(e)
2723 entry = self.index.entry_binary(curr)
2718 entry = self.index.entry_binary(curr)
2724 if curr == 0 and self._docket is None:
2719 if curr == 0 and self._docket is None:
2725 header = self._format_flags | self._format_version
2720 header = self._format_flags | self._format_version
2726 header = self.index.pack_header(header)
2721 header = self.index.pack_header(header)
2727 entry = header + entry
2722 entry = header + entry
2728 self._writeentry(
2723 self._writeentry(
2729 transaction,
2724 transaction,
2730 entry,
2725 entry,
2731 deltainfo.data,
2726 deltainfo.data,
2732 link,
2727 link,
2733 offset,
2728 offset,
2734 serialized_sidedata,
2729 serialized_sidedata,
2735 sidedata_offset,
2730 sidedata_offset,
2736 )
2731 )
2737
2732
2738 rawtext = btext[0]
2733 rawtext = btext[0]
2739
2734
2740 if alwayscache and rawtext is None:
2735 if alwayscache and rawtext is None:
2741 rawtext = deltacomputer.buildtext(revinfo, fh)
2736 rawtext = deltacomputer.buildtext(revinfo)
2742
2737
2743 if type(rawtext) == bytes: # only accept immutable objects
2738 if type(rawtext) == bytes: # only accept immutable objects
2744 self._revisioncache = (node, curr, rawtext)
2739 self._revisioncache = (node, curr, rawtext)
2745 self._chainbasecache[curr] = deltainfo.chainbase
2740 self._chainbasecache[curr] = deltainfo.chainbase
2746 return curr
2741 return curr
2747
2742
2748 def _get_data_offset(self, prev):
2743 def _get_data_offset(self, prev):
2749 """Returns the current offset in the (in-transaction) data file.
2744 """Returns the current offset in the (in-transaction) data file.
2750 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2745 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2751 file to store that information: since sidedata can be rewritten to the
2746 file to store that information: since sidedata can be rewritten to the
2752 end of the data file within a transaction, you can have cases where, for
2747 end of the data file within a transaction, you can have cases where, for
2753 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2748 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2754 to `n - 1`'s sidedata being written after `n`'s data.
2749 to `n - 1`'s sidedata being written after `n`'s data.
2755
2750
2756 TODO cache this in a docket file before getting out of experimental."""
2751 TODO cache this in a docket file before getting out of experimental."""
2757 if self._docket is None:
2752 if self._docket is None:
2758 return self.end(prev)
2753 return self.end(prev)
2759 else:
2754 else:
2760 return self._docket.data_end
2755 return self._docket.data_end
2761
2756
2762 def _writeentry(
2757 def _writeentry(
2763 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2758 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2764 ):
2759 ):
2765 # Files opened in a+ mode have inconsistent behavior on various
2760 # Files opened in a+ mode have inconsistent behavior on various
2766 # platforms. Windows requires that a file positioning call be made
2761 # platforms. Windows requires that a file positioning call be made
2767 # when the file handle transitions between reads and writes. See
2762 # when the file handle transitions between reads and writes. See
2768 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2763 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2769 # platforms, Python or the platform itself can be buggy. Some versions
2764 # platforms, Python or the platform itself can be buggy. Some versions
2770 # of Solaris have been observed to not append at the end of the file
2765 # of Solaris have been observed to not append at the end of the file
2771 # if the file was seeked to before the end. See issue4943 for more.
2766 # if the file was seeked to before the end. See issue4943 for more.
2772 #
2767 #
2773 # We work around this issue by inserting a seek() before writing.
2768 # We work around this issue by inserting a seek() before writing.
2774 # Note: This is likely not necessary on Python 3. However, because
2769 # Note: This is likely not necessary on Python 3. However, because
2775 # the file handle is reused for reads and may be seeked there, we need
2770 # the file handle is reused for reads and may be seeked there, we need
2776 # to be careful before changing this.
2771 # to be careful before changing this.
2777 if self._writinghandles is None:
2772 if self._writinghandles is None:
2778 msg = b'adding revision outside `revlog._writing` context'
2773 msg = b'adding revision outside `revlog._writing` context'
2779 raise error.ProgrammingError(msg)
2774 raise error.ProgrammingError(msg)
2780 ifh, dfh, sdfh = self._writinghandles
2775 ifh, dfh, sdfh = self._writinghandles
2781 if self._docket is None:
2776 if self._docket is None:
2782 ifh.seek(0, os.SEEK_END)
2777 ifh.seek(0, os.SEEK_END)
2783 else:
2778 else:
2784 ifh.seek(self._docket.index_end, os.SEEK_SET)
2779 ifh.seek(self._docket.index_end, os.SEEK_SET)
2785 if dfh:
2780 if dfh:
2786 if self._docket is None:
2781 if self._docket is None:
2787 dfh.seek(0, os.SEEK_END)
2782 dfh.seek(0, os.SEEK_END)
2788 else:
2783 else:
2789 dfh.seek(self._docket.data_end, os.SEEK_SET)
2784 dfh.seek(self._docket.data_end, os.SEEK_SET)
2790 if sdfh:
2785 if sdfh:
2791 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2786 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2792
2787
2793 curr = len(self) - 1
2788 curr = len(self) - 1
2794 if not self._inline:
2789 if not self._inline:
2795 transaction.add(self._datafile, offset)
2790 transaction.add(self._datafile, offset)
2796 if self._sidedatafile:
2791 if self._sidedatafile:
2797 transaction.add(self._sidedatafile, sidedata_offset)
2792 transaction.add(self._sidedatafile, sidedata_offset)
2798 transaction.add(self._indexfile, curr * len(entry))
2793 transaction.add(self._indexfile, curr * len(entry))
2799 if data[0]:
2794 if data[0]:
2800 dfh.write(data[0])
2795 dfh.write(data[0])
2801 dfh.write(data[1])
2796 dfh.write(data[1])
2802 if sidedata:
2797 if sidedata:
2803 sdfh.write(sidedata)
2798 sdfh.write(sidedata)
2804 ifh.write(entry)
2799 ifh.write(entry)
2805 else:
2800 else:
2806 offset += curr * self.index.entry_size
2801 offset += curr * self.index.entry_size
2807 transaction.add(self._indexfile, offset)
2802 transaction.add(self._indexfile, offset)
2808 ifh.write(entry)
2803 ifh.write(entry)
2809 ifh.write(data[0])
2804 ifh.write(data[0])
2810 ifh.write(data[1])
2805 ifh.write(data[1])
2811 assert not sidedata
2806 assert not sidedata
2812 self._enforceinlinesize(transaction)
2807 self._enforceinlinesize(transaction)
2813 if self._docket is not None:
2808 if self._docket is not None:
2814 # revlog-v2 always has 3 writing handles, help Pytype
2809 # revlog-v2 always has 3 writing handles, help Pytype
2815 wh1 = self._writinghandles[0]
2810 wh1 = self._writinghandles[0]
2816 wh2 = self._writinghandles[1]
2811 wh2 = self._writinghandles[1]
2817 wh3 = self._writinghandles[2]
2812 wh3 = self._writinghandles[2]
2818 assert wh1 is not None
2813 assert wh1 is not None
2819 assert wh2 is not None
2814 assert wh2 is not None
2820 assert wh3 is not None
2815 assert wh3 is not None
2821 self._docket.index_end = wh1.tell()
2816 self._docket.index_end = wh1.tell()
2822 self._docket.data_end = wh2.tell()
2817 self._docket.data_end = wh2.tell()
2823 self._docket.sidedata_end = wh3.tell()
2818 self._docket.sidedata_end = wh3.tell()
2824
2819
2825 nodemaputil.setup_persistent_nodemap(transaction, self)
2820 nodemaputil.setup_persistent_nodemap(transaction, self)
2826
2821
2827 def addgroup(
2822 def addgroup(
2828 self,
2823 self,
2829 deltas,
2824 deltas,
2830 linkmapper,
2825 linkmapper,
2831 transaction,
2826 transaction,
2832 alwayscache=False,
2827 alwayscache=False,
2833 addrevisioncb=None,
2828 addrevisioncb=None,
2834 duplicaterevisioncb=None,
2829 duplicaterevisioncb=None,
2835 debug_info=None,
2830 debug_info=None,
2836 delta_base_reuse_policy=None,
2831 delta_base_reuse_policy=None,
2837 ):
2832 ):
2838 """
2833 """
2839 add a delta group
2834 add a delta group
2840
2835
2841 given a set of deltas, add them to the revision log. the
2836 given a set of deltas, add them to the revision log. the
2842 first delta is against its parent, which should be in our
2837 first delta is against its parent, which should be in our
2843 log, the rest are against the previous delta.
2838 log, the rest are against the previous delta.
2844
2839
2845 If ``addrevisioncb`` is defined, it will be called with arguments of
2840 If ``addrevisioncb`` is defined, it will be called with arguments of
2846 this revlog and the node that was added.
2841 this revlog and the node that was added.
2847 """
2842 """
2848
2843
2849 if self._adding_group:
2844 if self._adding_group:
2850 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2845 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2851
2846
2852 # read the default delta-base reuse policy from revlog config if the
2847 # read the default delta-base reuse policy from revlog config if the
2853 # group did not specify one.
2848 # group did not specify one.
2854 if delta_base_reuse_policy is None:
2849 if delta_base_reuse_policy is None:
2855 if self._generaldelta and self._lazydeltabase:
2850 if self._generaldelta and self._lazydeltabase:
2856 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2851 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2857 else:
2852 else:
2858 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2853 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2859
2854
2860 self._adding_group = True
2855 self._adding_group = True
2861 empty = True
2856 empty = True
2862 try:
2857 try:
2863 with self._writing(transaction):
2858 with self._writing(transaction):
2864 write_debug = None
2859 write_debug = None
2865 if self._debug_delta:
2860 if self._debug_delta:
2866 write_debug = transaction._report
2861 write_debug = transaction._report
2867 deltacomputer = deltautil.deltacomputer(
2862 deltacomputer = deltautil.deltacomputer(
2868 self,
2863 self,
2869 write_debug=write_debug,
2864 write_debug=write_debug,
2870 debug_info=debug_info,
2865 debug_info=debug_info,
2871 )
2866 )
2872 # loop through our set of deltas
2867 # loop through our set of deltas
2873 for data in deltas:
2868 for data in deltas:
2874 (
2869 (
2875 node,
2870 node,
2876 p1,
2871 p1,
2877 p2,
2872 p2,
2878 linknode,
2873 linknode,
2879 deltabase,
2874 deltabase,
2880 delta,
2875 delta,
2881 flags,
2876 flags,
2882 sidedata,
2877 sidedata,
2883 ) = data
2878 ) = data
2884 link = linkmapper(linknode)
2879 link = linkmapper(linknode)
2885 flags = flags or REVIDX_DEFAULT_FLAGS
2880 flags = flags or REVIDX_DEFAULT_FLAGS
2886
2881
2887 rev = self.index.get_rev(node)
2882 rev = self.index.get_rev(node)
2888 if rev is not None:
2883 if rev is not None:
2889 # this can happen if two branches make the same change
2884 # this can happen if two branches make the same change
2890 self._nodeduplicatecallback(transaction, rev)
2885 self._nodeduplicatecallback(transaction, rev)
2891 if duplicaterevisioncb:
2886 if duplicaterevisioncb:
2892 duplicaterevisioncb(self, rev)
2887 duplicaterevisioncb(self, rev)
2893 empty = False
2888 empty = False
2894 continue
2889 continue
2895
2890
2896 for p in (p1, p2):
2891 for p in (p1, p2):
2897 if not self.index.has_node(p):
2892 if not self.index.has_node(p):
2898 raise error.LookupError(
2893 raise error.LookupError(
2899 p, self.radix, _(b'unknown parent')
2894 p, self.radix, _(b'unknown parent')
2900 )
2895 )
2901
2896
2902 if not self.index.has_node(deltabase):
2897 if not self.index.has_node(deltabase):
2903 raise error.LookupError(
2898 raise error.LookupError(
2904 deltabase, self.display_id, _(b'unknown delta base')
2899 deltabase, self.display_id, _(b'unknown delta base')
2905 )
2900 )
2906
2901
2907 baserev = self.rev(deltabase)
2902 baserev = self.rev(deltabase)
2908
2903
2909 if baserev != nullrev and self.iscensored(baserev):
2904 if baserev != nullrev and self.iscensored(baserev):
2910 # if base is censored, delta must be full replacement in a
2905 # if base is censored, delta must be full replacement in a
2911 # single patch operation
2906 # single patch operation
2912 hlen = struct.calcsize(b">lll")
2907 hlen = struct.calcsize(b">lll")
2913 oldlen = self.rawsize(baserev)
2908 oldlen = self.rawsize(baserev)
2914 newlen = len(delta) - hlen
2909 newlen = len(delta) - hlen
2915 if delta[:hlen] != mdiff.replacediffheader(
2910 if delta[:hlen] != mdiff.replacediffheader(
2916 oldlen, newlen
2911 oldlen, newlen
2917 ):
2912 ):
2918 raise error.CensoredBaseError(
2913 raise error.CensoredBaseError(
2919 self.display_id, self.node(baserev)
2914 self.display_id, self.node(baserev)
2920 )
2915 )
2921
2916
2922 if not flags and self._peek_iscensored(baserev, delta):
2917 if not flags and self._peek_iscensored(baserev, delta):
2923 flags |= REVIDX_ISCENSORED
2918 flags |= REVIDX_ISCENSORED
2924
2919
2925 # We assume consumers of addrevisioncb will want to retrieve
2920 # We assume consumers of addrevisioncb will want to retrieve
2926 # the added revision, which will require a call to
2921 # the added revision, which will require a call to
2927 # revision(). revision() will fast path if there is a cache
2922 # revision(). revision() will fast path if there is a cache
2928 # hit. So, we tell _addrevision() to always cache in this case.
2923 # hit. So, we tell _addrevision() to always cache in this case.
2929 # We're only using addgroup() in the context of changegroup
2924 # We're only using addgroup() in the context of changegroup
2930 # generation so the revision data can always be handled as raw
2925 # generation so the revision data can always be handled as raw
2931 # by the flagprocessor.
2926 # by the flagprocessor.
2932 rev = self._addrevision(
2927 rev = self._addrevision(
2933 node,
2928 node,
2934 None,
2929 None,
2935 transaction,
2930 transaction,
2936 link,
2931 link,
2937 p1,
2932 p1,
2938 p2,
2933 p2,
2939 flags,
2934 flags,
2940 (baserev, delta, delta_base_reuse_policy),
2935 (baserev, delta, delta_base_reuse_policy),
2941 alwayscache=alwayscache,
2936 alwayscache=alwayscache,
2942 deltacomputer=deltacomputer,
2937 deltacomputer=deltacomputer,
2943 sidedata=sidedata,
2938 sidedata=sidedata,
2944 )
2939 )
2945
2940
2946 if addrevisioncb:
2941 if addrevisioncb:
2947 addrevisioncb(self, rev)
2942 addrevisioncb(self, rev)
2948 empty = False
2943 empty = False
2949 finally:
2944 finally:
2950 self._adding_group = False
2945 self._adding_group = False
2951 return not empty
2946 return not empty
2952
2947
2953 def iscensored(self, rev):
2948 def iscensored(self, rev):
2954 """Check if a file revision is censored."""
2949 """Check if a file revision is censored."""
2955 if not self._censorable:
2950 if not self._censorable:
2956 return False
2951 return False
2957
2952
2958 return self.flags(rev) & REVIDX_ISCENSORED
2953 return self.flags(rev) & REVIDX_ISCENSORED
2959
2954
2960 def _peek_iscensored(self, baserev, delta):
2955 def _peek_iscensored(self, baserev, delta):
2961 """Quickly check if a delta produces a censored revision."""
2956 """Quickly check if a delta produces a censored revision."""
2962 if not self._censorable:
2957 if not self._censorable:
2963 return False
2958 return False
2964
2959
2965 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2960 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2966
2961
2967 def getstrippoint(self, minlink):
2962 def getstrippoint(self, minlink):
2968 """find the minimum rev that must be stripped to strip the linkrev
2963 """find the minimum rev that must be stripped to strip the linkrev
2969
2964
2970 Returns a tuple containing the minimum rev and a set of all revs that
2965 Returns a tuple containing the minimum rev and a set of all revs that
2971 have linkrevs that will be broken by this strip.
2966 have linkrevs that will be broken by this strip.
2972 """
2967 """
2973 return storageutil.resolvestripinfo(
2968 return storageutil.resolvestripinfo(
2974 minlink,
2969 minlink,
2975 len(self) - 1,
2970 len(self) - 1,
2976 self.headrevs(),
2971 self.headrevs(),
2977 self.linkrev,
2972 self.linkrev,
2978 self.parentrevs,
2973 self.parentrevs,
2979 )
2974 )
2980
2975
2981 def strip(self, minlink, transaction):
2976 def strip(self, minlink, transaction):
2982 """truncate the revlog on the first revision with a linkrev >= minlink
2977 """truncate the revlog on the first revision with a linkrev >= minlink
2983
2978
2984 This function is called when we're stripping revision minlink and
2979 This function is called when we're stripping revision minlink and
2985 its descendants from the repository.
2980 its descendants from the repository.
2986
2981
2987 We have to remove all revisions with linkrev >= minlink, because
2982 We have to remove all revisions with linkrev >= minlink, because
2988 the equivalent changelog revisions will be renumbered after the
2983 the equivalent changelog revisions will be renumbered after the
2989 strip.
2984 strip.
2990
2985
2991 So we truncate the revlog on the first of these revisions, and
2986 So we truncate the revlog on the first of these revisions, and
2992 trust that the caller has saved the revisions that shouldn't be
2987 trust that the caller has saved the revisions that shouldn't be
2993 removed and that it'll re-add them after this truncation.
2988 removed and that it'll re-add them after this truncation.
2994 """
2989 """
2995 if len(self) == 0:
2990 if len(self) == 0:
2996 return
2991 return
2997
2992
2998 rev, _ = self.getstrippoint(minlink)
2993 rev, _ = self.getstrippoint(minlink)
2999 if rev == len(self):
2994 if rev == len(self):
3000 return
2995 return
3001
2996
3002 # first truncate the files on disk
2997 # first truncate the files on disk
3003 data_end = self.start(rev)
2998 data_end = self.start(rev)
3004 if not self._inline:
2999 if not self._inline:
3005 transaction.add(self._datafile, data_end)
3000 transaction.add(self._datafile, data_end)
3006 end = rev * self.index.entry_size
3001 end = rev * self.index.entry_size
3007 else:
3002 else:
3008 end = data_end + (rev * self.index.entry_size)
3003 end = data_end + (rev * self.index.entry_size)
3009
3004
3010 if self._sidedatafile:
3005 if self._sidedatafile:
3011 sidedata_end = self.sidedata_cut_off(rev)
3006 sidedata_end = self.sidedata_cut_off(rev)
3012 transaction.add(self._sidedatafile, sidedata_end)
3007 transaction.add(self._sidedatafile, sidedata_end)
3013
3008
3014 transaction.add(self._indexfile, end)
3009 transaction.add(self._indexfile, end)
3015 if self._docket is not None:
3010 if self._docket is not None:
3016 # XXX we could, leverage the docket while stripping. However it is
3011 # XXX we could, leverage the docket while stripping. However it is
3017 # not powerfull enough at the time of this comment
3012 # not powerfull enough at the time of this comment
3018 self._docket.index_end = end
3013 self._docket.index_end = end
3019 self._docket.data_end = data_end
3014 self._docket.data_end = data_end
3020 self._docket.sidedata_end = sidedata_end
3015 self._docket.sidedata_end = sidedata_end
3021 self._docket.write(transaction, stripping=True)
3016 self._docket.write(transaction, stripping=True)
3022
3017
3023 # then reset internal state in memory to forget those revisions
3018 # then reset internal state in memory to forget those revisions
3024 self._revisioncache = None
3019 self._revisioncache = None
3025 self._chaininfocache = util.lrucachedict(500)
3020 self._chaininfocache = util.lrucachedict(500)
3026 self._segmentfile.clear_cache()
3021 self._segmentfile.clear_cache()
3027 self._segmentfile_sidedata.clear_cache()
3022 self._segmentfile_sidedata.clear_cache()
3028
3023
3029 del self.index[rev:-1]
3024 del self.index[rev:-1]
3030
3025
3031 def checksize(self):
3026 def checksize(self):
3032 """Check size of index and data files
3027 """Check size of index and data files
3033
3028
3034 return a (dd, di) tuple.
3029 return a (dd, di) tuple.
3035 - dd: extra bytes for the "data" file
3030 - dd: extra bytes for the "data" file
3036 - di: extra bytes for the "index" file
3031 - di: extra bytes for the "index" file
3037
3032
3038 A healthy revlog will return (0, 0).
3033 A healthy revlog will return (0, 0).
3039 """
3034 """
3040 expected = 0
3035 expected = 0
3041 if len(self):
3036 if len(self):
3042 expected = max(0, self.end(len(self) - 1))
3037 expected = max(0, self.end(len(self) - 1))
3043
3038
3044 try:
3039 try:
3045 with self._datafp() as f:
3040 with self._datafp() as f:
3046 f.seek(0, io.SEEK_END)
3041 f.seek(0, io.SEEK_END)
3047 actual = f.tell()
3042 actual = f.tell()
3048 dd = actual - expected
3043 dd = actual - expected
3049 except FileNotFoundError:
3044 except FileNotFoundError:
3050 dd = 0
3045 dd = 0
3051
3046
3052 try:
3047 try:
3053 f = self.opener(self._indexfile)
3048 f = self.opener(self._indexfile)
3054 f.seek(0, io.SEEK_END)
3049 f.seek(0, io.SEEK_END)
3055 actual = f.tell()
3050 actual = f.tell()
3056 f.close()
3051 f.close()
3057 s = self.index.entry_size
3052 s = self.index.entry_size
3058 i = max(0, actual // s)
3053 i = max(0, actual // s)
3059 di = actual - (i * s)
3054 di = actual - (i * s)
3060 if self._inline:
3055 if self._inline:
3061 databytes = 0
3056 databytes = 0
3062 for r in self:
3057 for r in self:
3063 databytes += max(0, self.length(r))
3058 databytes += max(0, self.length(r))
3064 dd = 0
3059 dd = 0
3065 di = actual - len(self) * s - databytes
3060 di = actual - len(self) * s - databytes
3066 except FileNotFoundError:
3061 except FileNotFoundError:
3067 di = 0
3062 di = 0
3068
3063
3069 return (dd, di)
3064 return (dd, di)
3070
3065
3071 def files(self):
3066 def files(self):
3072 res = [self._indexfile]
3067 res = [self._indexfile]
3073 if self._docket_file is None:
3068 if self._docket_file is None:
3074 if not self._inline:
3069 if not self._inline:
3075 res.append(self._datafile)
3070 res.append(self._datafile)
3076 else:
3071 else:
3077 res.append(self._docket_file)
3072 res.append(self._docket_file)
3078 res.extend(self._docket.old_index_filepaths(include_empty=False))
3073 res.extend(self._docket.old_index_filepaths(include_empty=False))
3079 if self._docket.data_end:
3074 if self._docket.data_end:
3080 res.append(self._datafile)
3075 res.append(self._datafile)
3081 res.extend(self._docket.old_data_filepaths(include_empty=False))
3076 res.extend(self._docket.old_data_filepaths(include_empty=False))
3082 if self._docket.sidedata_end:
3077 if self._docket.sidedata_end:
3083 res.append(self._sidedatafile)
3078 res.append(self._sidedatafile)
3084 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3079 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3085 return res
3080 return res
3086
3081
3087 def emitrevisions(
3082 def emitrevisions(
3088 self,
3083 self,
3089 nodes,
3084 nodes,
3090 nodesorder=None,
3085 nodesorder=None,
3091 revisiondata=False,
3086 revisiondata=False,
3092 assumehaveparentrevisions=False,
3087 assumehaveparentrevisions=False,
3093 deltamode=repository.CG_DELTAMODE_STD,
3088 deltamode=repository.CG_DELTAMODE_STD,
3094 sidedata_helpers=None,
3089 sidedata_helpers=None,
3095 debug_info=None,
3090 debug_info=None,
3096 ):
3091 ):
3097 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3092 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3098 raise error.ProgrammingError(
3093 raise error.ProgrammingError(
3099 b'unhandled value for nodesorder: %s' % nodesorder
3094 b'unhandled value for nodesorder: %s' % nodesorder
3100 )
3095 )
3101
3096
3102 if nodesorder is None and not self._generaldelta:
3097 if nodesorder is None and not self._generaldelta:
3103 nodesorder = b'storage'
3098 nodesorder = b'storage'
3104
3099
3105 if (
3100 if (
3106 not self._storedeltachains
3101 not self._storedeltachains
3107 and deltamode != repository.CG_DELTAMODE_PREV
3102 and deltamode != repository.CG_DELTAMODE_PREV
3108 ):
3103 ):
3109 deltamode = repository.CG_DELTAMODE_FULL
3104 deltamode = repository.CG_DELTAMODE_FULL
3110
3105
3111 return storageutil.emitrevisions(
3106 return storageutil.emitrevisions(
3112 self,
3107 self,
3113 nodes,
3108 nodes,
3114 nodesorder,
3109 nodesorder,
3115 revlogrevisiondelta,
3110 revlogrevisiondelta,
3116 deltaparentfn=self.deltaparent,
3111 deltaparentfn=self.deltaparent,
3117 candeltafn=self._candelta,
3112 candeltafn=self._candelta,
3118 rawsizefn=self.rawsize,
3113 rawsizefn=self.rawsize,
3119 revdifffn=self.revdiff,
3114 revdifffn=self.revdiff,
3120 flagsfn=self.flags,
3115 flagsfn=self.flags,
3121 deltamode=deltamode,
3116 deltamode=deltamode,
3122 revisiondata=revisiondata,
3117 revisiondata=revisiondata,
3123 assumehaveparentrevisions=assumehaveparentrevisions,
3118 assumehaveparentrevisions=assumehaveparentrevisions,
3124 sidedata_helpers=sidedata_helpers,
3119 sidedata_helpers=sidedata_helpers,
3125 debug_info=debug_info,
3120 debug_info=debug_info,
3126 )
3121 )
3127
3122
3128 DELTAREUSEALWAYS = b'always'
3123 DELTAREUSEALWAYS = b'always'
3129 DELTAREUSESAMEREVS = b'samerevs'
3124 DELTAREUSESAMEREVS = b'samerevs'
3130 DELTAREUSENEVER = b'never'
3125 DELTAREUSENEVER = b'never'
3131
3126
3132 DELTAREUSEFULLADD = b'fulladd'
3127 DELTAREUSEFULLADD = b'fulladd'
3133
3128
3134 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3129 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3135
3130
3136 def clone(
3131 def clone(
3137 self,
3132 self,
3138 tr,
3133 tr,
3139 destrevlog,
3134 destrevlog,
3140 addrevisioncb=None,
3135 addrevisioncb=None,
3141 deltareuse=DELTAREUSESAMEREVS,
3136 deltareuse=DELTAREUSESAMEREVS,
3142 forcedeltabothparents=None,
3137 forcedeltabothparents=None,
3143 sidedata_helpers=None,
3138 sidedata_helpers=None,
3144 ):
3139 ):
3145 """Copy this revlog to another, possibly with format changes.
3140 """Copy this revlog to another, possibly with format changes.
3146
3141
3147 The destination revlog will contain the same revisions and nodes.
3142 The destination revlog will contain the same revisions and nodes.
3148 However, it may not be bit-for-bit identical due to e.g. delta encoding
3143 However, it may not be bit-for-bit identical due to e.g. delta encoding
3149 differences.
3144 differences.
3150
3145
3151 The ``deltareuse`` argument control how deltas from the existing revlog
3146 The ``deltareuse`` argument control how deltas from the existing revlog
3152 are preserved in the destination revlog. The argument can have the
3147 are preserved in the destination revlog. The argument can have the
3153 following values:
3148 following values:
3154
3149
3155 DELTAREUSEALWAYS
3150 DELTAREUSEALWAYS
3156 Deltas will always be reused (if possible), even if the destination
3151 Deltas will always be reused (if possible), even if the destination
3157 revlog would not select the same revisions for the delta. This is the
3152 revlog would not select the same revisions for the delta. This is the
3158 fastest mode of operation.
3153 fastest mode of operation.
3159 DELTAREUSESAMEREVS
3154 DELTAREUSESAMEREVS
3160 Deltas will be reused if the destination revlog would pick the same
3155 Deltas will be reused if the destination revlog would pick the same
3161 revisions for the delta. This mode strikes a balance between speed
3156 revisions for the delta. This mode strikes a balance between speed
3162 and optimization.
3157 and optimization.
3163 DELTAREUSENEVER
3158 DELTAREUSENEVER
3164 Deltas will never be reused. This is the slowest mode of execution.
3159 Deltas will never be reused. This is the slowest mode of execution.
3165 This mode can be used to recompute deltas (e.g. if the diff/delta
3160 This mode can be used to recompute deltas (e.g. if the diff/delta
3166 algorithm changes).
3161 algorithm changes).
3167 DELTAREUSEFULLADD
3162 DELTAREUSEFULLADD
3168 Revision will be re-added as if their were new content. This is
3163 Revision will be re-added as if their were new content. This is
3169 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3164 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3170 eg: large file detection and handling.
3165 eg: large file detection and handling.
3171
3166
3172 Delta computation can be slow, so the choice of delta reuse policy can
3167 Delta computation can be slow, so the choice of delta reuse policy can
3173 significantly affect run time.
3168 significantly affect run time.
3174
3169
3175 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3170 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3176 two extremes. Deltas will be reused if they are appropriate. But if the
3171 two extremes. Deltas will be reused if they are appropriate. But if the
3177 delta could choose a better revision, it will do so. This means if you
3172 delta could choose a better revision, it will do so. This means if you
3178 are converting a non-generaldelta revlog to a generaldelta revlog,
3173 are converting a non-generaldelta revlog to a generaldelta revlog,
3179 deltas will be recomputed if the delta's parent isn't a parent of the
3174 deltas will be recomputed if the delta's parent isn't a parent of the
3180 revision.
3175 revision.
3181
3176
3182 In addition to the delta policy, the ``forcedeltabothparents``
3177 In addition to the delta policy, the ``forcedeltabothparents``
3183 argument controls whether to force compute deltas against both parents
3178 argument controls whether to force compute deltas against both parents
3184 for merges. By default, the current default is used.
3179 for merges. By default, the current default is used.
3185
3180
3186 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3181 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3187 `sidedata_helpers`.
3182 `sidedata_helpers`.
3188 """
3183 """
3189 if deltareuse not in self.DELTAREUSEALL:
3184 if deltareuse not in self.DELTAREUSEALL:
3190 raise ValueError(
3185 raise ValueError(
3191 _(b'value for deltareuse invalid: %s') % deltareuse
3186 _(b'value for deltareuse invalid: %s') % deltareuse
3192 )
3187 )
3193
3188
3194 if len(destrevlog):
3189 if len(destrevlog):
3195 raise ValueError(_(b'destination revlog is not empty'))
3190 raise ValueError(_(b'destination revlog is not empty'))
3196
3191
3197 if getattr(self, 'filteredrevs', None):
3192 if getattr(self, 'filteredrevs', None):
3198 raise ValueError(_(b'source revlog has filtered revisions'))
3193 raise ValueError(_(b'source revlog has filtered revisions'))
3199 if getattr(destrevlog, 'filteredrevs', None):
3194 if getattr(destrevlog, 'filteredrevs', None):
3200 raise ValueError(_(b'destination revlog has filtered revisions'))
3195 raise ValueError(_(b'destination revlog has filtered revisions'))
3201
3196
3202 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3197 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3203 # if possible.
3198 # if possible.
3204 oldlazydelta = destrevlog._lazydelta
3199 oldlazydelta = destrevlog._lazydelta
3205 oldlazydeltabase = destrevlog._lazydeltabase
3200 oldlazydeltabase = destrevlog._lazydeltabase
3206 oldamd = destrevlog._deltabothparents
3201 oldamd = destrevlog._deltabothparents
3207
3202
3208 try:
3203 try:
3209 if deltareuse == self.DELTAREUSEALWAYS:
3204 if deltareuse == self.DELTAREUSEALWAYS:
3210 destrevlog._lazydeltabase = True
3205 destrevlog._lazydeltabase = True
3211 destrevlog._lazydelta = True
3206 destrevlog._lazydelta = True
3212 elif deltareuse == self.DELTAREUSESAMEREVS:
3207 elif deltareuse == self.DELTAREUSESAMEREVS:
3213 destrevlog._lazydeltabase = False
3208 destrevlog._lazydeltabase = False
3214 destrevlog._lazydelta = True
3209 destrevlog._lazydelta = True
3215 elif deltareuse == self.DELTAREUSENEVER:
3210 elif deltareuse == self.DELTAREUSENEVER:
3216 destrevlog._lazydeltabase = False
3211 destrevlog._lazydeltabase = False
3217 destrevlog._lazydelta = False
3212 destrevlog._lazydelta = False
3218
3213
3219 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3214 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3220
3215
3221 with self.reading():
3216 with self.reading():
3222 self._clone(
3217 self._clone(
3223 tr,
3218 tr,
3224 destrevlog,
3219 destrevlog,
3225 addrevisioncb,
3220 addrevisioncb,
3226 deltareuse,
3221 deltareuse,
3227 forcedeltabothparents,
3222 forcedeltabothparents,
3228 sidedata_helpers,
3223 sidedata_helpers,
3229 )
3224 )
3230
3225
3231 finally:
3226 finally:
3232 destrevlog._lazydelta = oldlazydelta
3227 destrevlog._lazydelta = oldlazydelta
3233 destrevlog._lazydeltabase = oldlazydeltabase
3228 destrevlog._lazydeltabase = oldlazydeltabase
3234 destrevlog._deltabothparents = oldamd
3229 destrevlog._deltabothparents = oldamd
3235
3230
3236 def _clone(
3231 def _clone(
3237 self,
3232 self,
3238 tr,
3233 tr,
3239 destrevlog,
3234 destrevlog,
3240 addrevisioncb,
3235 addrevisioncb,
3241 deltareuse,
3236 deltareuse,
3242 forcedeltabothparents,
3237 forcedeltabothparents,
3243 sidedata_helpers,
3238 sidedata_helpers,
3244 ):
3239 ):
3245 """perform the core duty of `revlog.clone` after parameter processing"""
3240 """perform the core duty of `revlog.clone` after parameter processing"""
3246 write_debug = None
3241 write_debug = None
3247 if self._debug_delta:
3242 if self._debug_delta:
3248 write_debug = tr._report
3243 write_debug = tr._report
3249 deltacomputer = deltautil.deltacomputer(
3244 deltacomputer = deltautil.deltacomputer(
3250 destrevlog,
3245 destrevlog,
3251 write_debug=write_debug,
3246 write_debug=write_debug,
3252 )
3247 )
3253 index = self.index
3248 index = self.index
3254 for rev in self:
3249 for rev in self:
3255 entry = index[rev]
3250 entry = index[rev]
3256
3251
3257 # Some classes override linkrev to take filtered revs into
3252 # Some classes override linkrev to take filtered revs into
3258 # account. Use raw entry from index.
3253 # account. Use raw entry from index.
3259 flags = entry[0] & 0xFFFF
3254 flags = entry[0] & 0xFFFF
3260 linkrev = entry[4]
3255 linkrev = entry[4]
3261 p1 = index[entry[5]][7]
3256 p1 = index[entry[5]][7]
3262 p2 = index[entry[6]][7]
3257 p2 = index[entry[6]][7]
3263 node = entry[7]
3258 node = entry[7]
3264
3259
3265 # (Possibly) reuse the delta from the revlog if allowed and
3260 # (Possibly) reuse the delta from the revlog if allowed and
3266 # the revlog chunk is a delta.
3261 # the revlog chunk is a delta.
3267 cachedelta = None
3262 cachedelta = None
3268 rawtext = None
3263 rawtext = None
3269 if deltareuse == self.DELTAREUSEFULLADD:
3264 if deltareuse == self.DELTAREUSEFULLADD:
3270 text = self._revisiondata(rev)
3265 text = self._revisiondata(rev)
3271 sidedata = self.sidedata(rev)
3266 sidedata = self.sidedata(rev)
3272
3267
3273 if sidedata_helpers is not None:
3268 if sidedata_helpers is not None:
3274 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3269 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3275 self, sidedata_helpers, sidedata, rev
3270 self, sidedata_helpers, sidedata, rev
3276 )
3271 )
3277 flags = flags | new_flags[0] & ~new_flags[1]
3272 flags = flags | new_flags[0] & ~new_flags[1]
3278
3273
3279 destrevlog.addrevision(
3274 destrevlog.addrevision(
3280 text,
3275 text,
3281 tr,
3276 tr,
3282 linkrev,
3277 linkrev,
3283 p1,
3278 p1,
3284 p2,
3279 p2,
3285 cachedelta=cachedelta,
3280 cachedelta=cachedelta,
3286 node=node,
3281 node=node,
3287 flags=flags,
3282 flags=flags,
3288 deltacomputer=deltacomputer,
3283 deltacomputer=deltacomputer,
3289 sidedata=sidedata,
3284 sidedata=sidedata,
3290 )
3285 )
3291 else:
3286 else:
3292 if destrevlog._lazydelta:
3287 if destrevlog._lazydelta:
3293 dp = self.deltaparent(rev)
3288 dp = self.deltaparent(rev)
3294 if dp != nullrev:
3289 if dp != nullrev:
3295 cachedelta = (dp, bytes(self._chunk(rev)))
3290 cachedelta = (dp, bytes(self._chunk(rev)))
3296
3291
3297 sidedata = None
3292 sidedata = None
3298 if not cachedelta:
3293 if not cachedelta:
3299 rawtext = self._revisiondata(rev)
3294 rawtext = self._revisiondata(rev)
3300 sidedata = self.sidedata(rev)
3295 sidedata = self.sidedata(rev)
3301 if sidedata is None:
3296 if sidedata is None:
3302 sidedata = self.sidedata(rev)
3297 sidedata = self.sidedata(rev)
3303
3298
3304 if sidedata_helpers is not None:
3299 if sidedata_helpers is not None:
3305 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3300 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3306 self, sidedata_helpers, sidedata, rev
3301 self, sidedata_helpers, sidedata, rev
3307 )
3302 )
3308 flags = flags | new_flags[0] & ~new_flags[1]
3303 flags = flags | new_flags[0] & ~new_flags[1]
3309
3304
3310 with destrevlog._writing(tr):
3305 with destrevlog._writing(tr):
3311 destrevlog._addrevision(
3306 destrevlog._addrevision(
3312 node,
3307 node,
3313 rawtext,
3308 rawtext,
3314 tr,
3309 tr,
3315 linkrev,
3310 linkrev,
3316 p1,
3311 p1,
3317 p2,
3312 p2,
3318 flags,
3313 flags,
3319 cachedelta,
3314 cachedelta,
3320 deltacomputer=deltacomputer,
3315 deltacomputer=deltacomputer,
3321 sidedata=sidedata,
3316 sidedata=sidedata,
3322 )
3317 )
3323
3318
3324 if addrevisioncb:
3319 if addrevisioncb:
3325 addrevisioncb(self, rev, node)
3320 addrevisioncb(self, rev, node)
3326
3321
3327 def censorrevision(self, tr, censornode, tombstone=b''):
3322 def censorrevision(self, tr, censornode, tombstone=b''):
3328 if self._format_version == REVLOGV0:
3323 if self._format_version == REVLOGV0:
3329 raise error.RevlogError(
3324 raise error.RevlogError(
3330 _(b'cannot censor with version %d revlogs')
3325 _(b'cannot censor with version %d revlogs')
3331 % self._format_version
3326 % self._format_version
3332 )
3327 )
3333 elif self._format_version == REVLOGV1:
3328 elif self._format_version == REVLOGV1:
3334 rewrite.v1_censor(self, tr, censornode, tombstone)
3329 rewrite.v1_censor(self, tr, censornode, tombstone)
3335 else:
3330 else:
3336 rewrite.v2_censor(self, tr, censornode, tombstone)
3331 rewrite.v2_censor(self, tr, censornode, tombstone)
3337
3332
3338 def verifyintegrity(self, state):
3333 def verifyintegrity(self, state):
3339 """Verifies the integrity of the revlog.
3334 """Verifies the integrity of the revlog.
3340
3335
3341 Yields ``revlogproblem`` instances describing problems that are
3336 Yields ``revlogproblem`` instances describing problems that are
3342 found.
3337 found.
3343 """
3338 """
3344 dd, di = self.checksize()
3339 dd, di = self.checksize()
3345 if dd:
3340 if dd:
3346 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3341 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3347 if di:
3342 if di:
3348 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3343 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3349
3344
3350 version = self._format_version
3345 version = self._format_version
3351
3346
3352 # The verifier tells us what version revlog we should be.
3347 # The verifier tells us what version revlog we should be.
3353 if version != state[b'expectedversion']:
3348 if version != state[b'expectedversion']:
3354 yield revlogproblem(
3349 yield revlogproblem(
3355 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3350 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3356 % (self.display_id, version, state[b'expectedversion'])
3351 % (self.display_id, version, state[b'expectedversion'])
3357 )
3352 )
3358
3353
3359 state[b'skipread'] = set()
3354 state[b'skipread'] = set()
3360 state[b'safe_renamed'] = set()
3355 state[b'safe_renamed'] = set()
3361
3356
3362 for rev in self:
3357 for rev in self:
3363 node = self.node(rev)
3358 node = self.node(rev)
3364
3359
3365 # Verify contents. 4 cases to care about:
3360 # Verify contents. 4 cases to care about:
3366 #
3361 #
3367 # common: the most common case
3362 # common: the most common case
3368 # rename: with a rename
3363 # rename: with a rename
3369 # meta: file content starts with b'\1\n', the metadata
3364 # meta: file content starts with b'\1\n', the metadata
3370 # header defined in filelog.py, but without a rename
3365 # header defined in filelog.py, but without a rename
3371 # ext: content stored externally
3366 # ext: content stored externally
3372 #
3367 #
3373 # More formally, their differences are shown below:
3368 # More formally, their differences are shown below:
3374 #
3369 #
3375 # | common | rename | meta | ext
3370 # | common | rename | meta | ext
3376 # -------------------------------------------------------
3371 # -------------------------------------------------------
3377 # flags() | 0 | 0 | 0 | not 0
3372 # flags() | 0 | 0 | 0 | not 0
3378 # renamed() | False | True | False | ?
3373 # renamed() | False | True | False | ?
3379 # rawtext[0:2]=='\1\n'| False | True | True | ?
3374 # rawtext[0:2]=='\1\n'| False | True | True | ?
3380 #
3375 #
3381 # "rawtext" means the raw text stored in revlog data, which
3376 # "rawtext" means the raw text stored in revlog data, which
3382 # could be retrieved by "rawdata(rev)". "text"
3377 # could be retrieved by "rawdata(rev)". "text"
3383 # mentioned below is "revision(rev)".
3378 # mentioned below is "revision(rev)".
3384 #
3379 #
3385 # There are 3 different lengths stored physically:
3380 # There are 3 different lengths stored physically:
3386 # 1. L1: rawsize, stored in revlog index
3381 # 1. L1: rawsize, stored in revlog index
3387 # 2. L2: len(rawtext), stored in revlog data
3382 # 2. L2: len(rawtext), stored in revlog data
3388 # 3. L3: len(text), stored in revlog data if flags==0, or
3383 # 3. L3: len(text), stored in revlog data if flags==0, or
3389 # possibly somewhere else if flags!=0
3384 # possibly somewhere else if flags!=0
3390 #
3385 #
3391 # L1 should be equal to L2. L3 could be different from them.
3386 # L1 should be equal to L2. L3 could be different from them.
3392 # "text" may or may not affect commit hash depending on flag
3387 # "text" may or may not affect commit hash depending on flag
3393 # processors (see flagutil.addflagprocessor).
3388 # processors (see flagutil.addflagprocessor).
3394 #
3389 #
3395 # | common | rename | meta | ext
3390 # | common | rename | meta | ext
3396 # -------------------------------------------------
3391 # -------------------------------------------------
3397 # rawsize() | L1 | L1 | L1 | L1
3392 # rawsize() | L1 | L1 | L1 | L1
3398 # size() | L1 | L2-LM | L1(*) | L1 (?)
3393 # size() | L1 | L2-LM | L1(*) | L1 (?)
3399 # len(rawtext) | L2 | L2 | L2 | L2
3394 # len(rawtext) | L2 | L2 | L2 | L2
3400 # len(text) | L2 | L2 | L2 | L3
3395 # len(text) | L2 | L2 | L2 | L3
3401 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3396 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3402 #
3397 #
3403 # LM: length of metadata, depending on rawtext
3398 # LM: length of metadata, depending on rawtext
3404 # (*): not ideal, see comment in filelog.size
3399 # (*): not ideal, see comment in filelog.size
3405 # (?): could be "- len(meta)" if the resolved content has
3400 # (?): could be "- len(meta)" if the resolved content has
3406 # rename metadata
3401 # rename metadata
3407 #
3402 #
3408 # Checks needed to be done:
3403 # Checks needed to be done:
3409 # 1. length check: L1 == L2, in all cases.
3404 # 1. length check: L1 == L2, in all cases.
3410 # 2. hash check: depending on flag processor, we may need to
3405 # 2. hash check: depending on flag processor, we may need to
3411 # use either "text" (external), or "rawtext" (in revlog).
3406 # use either "text" (external), or "rawtext" (in revlog).
3412
3407
3413 try:
3408 try:
3414 skipflags = state.get(b'skipflags', 0)
3409 skipflags = state.get(b'skipflags', 0)
3415 if skipflags:
3410 if skipflags:
3416 skipflags &= self.flags(rev)
3411 skipflags &= self.flags(rev)
3417
3412
3418 _verify_revision(self, skipflags, state, node)
3413 _verify_revision(self, skipflags, state, node)
3419
3414
3420 l1 = self.rawsize(rev)
3415 l1 = self.rawsize(rev)
3421 l2 = len(self.rawdata(node))
3416 l2 = len(self.rawdata(node))
3422
3417
3423 if l1 != l2:
3418 if l1 != l2:
3424 yield revlogproblem(
3419 yield revlogproblem(
3425 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3420 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3426 node=node,
3421 node=node,
3427 )
3422 )
3428
3423
3429 except error.CensoredNodeError:
3424 except error.CensoredNodeError:
3430 if state[b'erroroncensored']:
3425 if state[b'erroroncensored']:
3431 yield revlogproblem(
3426 yield revlogproblem(
3432 error=_(b'censored file data'), node=node
3427 error=_(b'censored file data'), node=node
3433 )
3428 )
3434 state[b'skipread'].add(node)
3429 state[b'skipread'].add(node)
3435 except Exception as e:
3430 except Exception as e:
3436 yield revlogproblem(
3431 yield revlogproblem(
3437 error=_(b'unpacking %s: %s')
3432 error=_(b'unpacking %s: %s')
3438 % (short(node), stringutil.forcebytestr(e)),
3433 % (short(node), stringutil.forcebytestr(e)),
3439 node=node,
3434 node=node,
3440 )
3435 )
3441 state[b'skipread'].add(node)
3436 state[b'skipread'].add(node)
3442
3437
3443 def storageinfo(
3438 def storageinfo(
3444 self,
3439 self,
3445 exclusivefiles=False,
3440 exclusivefiles=False,
3446 sharedfiles=False,
3441 sharedfiles=False,
3447 revisionscount=False,
3442 revisionscount=False,
3448 trackedsize=False,
3443 trackedsize=False,
3449 storedsize=False,
3444 storedsize=False,
3450 ):
3445 ):
3451 d = {}
3446 d = {}
3452
3447
3453 if exclusivefiles:
3448 if exclusivefiles:
3454 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3449 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3455 if not self._inline:
3450 if not self._inline:
3456 d[b'exclusivefiles'].append((self.opener, self._datafile))
3451 d[b'exclusivefiles'].append((self.opener, self._datafile))
3457
3452
3458 if sharedfiles:
3453 if sharedfiles:
3459 d[b'sharedfiles'] = []
3454 d[b'sharedfiles'] = []
3460
3455
3461 if revisionscount:
3456 if revisionscount:
3462 d[b'revisionscount'] = len(self)
3457 d[b'revisionscount'] = len(self)
3463
3458
3464 if trackedsize:
3459 if trackedsize:
3465 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3460 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3466
3461
3467 if storedsize:
3462 if storedsize:
3468 d[b'storedsize'] = sum(
3463 d[b'storedsize'] = sum(
3469 self.opener.stat(path).st_size for path in self.files()
3464 self.opener.stat(path).st_size for path in self.files()
3470 )
3465 )
3471
3466
3472 return d
3467 return d
3473
3468
3474 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3469 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3475 if not self.hassidedata:
3470 if not self.hassidedata:
3476 return
3471 return
3477 # revlog formats with sidedata support does not support inline
3472 # revlog formats with sidedata support does not support inline
3478 assert not self._inline
3473 assert not self._inline
3479 if not helpers[1] and not helpers[2]:
3474 if not helpers[1] and not helpers[2]:
3480 # Nothing to generate or remove
3475 # Nothing to generate or remove
3481 return
3476 return
3482
3477
3483 new_entries = []
3478 new_entries = []
3484 # append the new sidedata
3479 # append the new sidedata
3485 with self._writing(transaction):
3480 with self._writing(transaction):
3486 ifh, dfh, sdfh = self._writinghandles
3481 ifh, dfh, sdfh = self._writinghandles
3487 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3482 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3488
3483
3489 current_offset = sdfh.tell()
3484 current_offset = sdfh.tell()
3490 for rev in range(startrev, endrev + 1):
3485 for rev in range(startrev, endrev + 1):
3491 entry = self.index[rev]
3486 entry = self.index[rev]
3492 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3487 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3493 store=self,
3488 store=self,
3494 sidedata_helpers=helpers,
3489 sidedata_helpers=helpers,
3495 sidedata={},
3490 sidedata={},
3496 rev=rev,
3491 rev=rev,
3497 )
3492 )
3498
3493
3499 serialized_sidedata = sidedatautil.serialize_sidedata(
3494 serialized_sidedata = sidedatautil.serialize_sidedata(
3500 new_sidedata
3495 new_sidedata
3501 )
3496 )
3502
3497
3503 sidedata_compression_mode = COMP_MODE_INLINE
3498 sidedata_compression_mode = COMP_MODE_INLINE
3504 if serialized_sidedata and self.hassidedata:
3499 if serialized_sidedata and self.hassidedata:
3505 sidedata_compression_mode = COMP_MODE_PLAIN
3500 sidedata_compression_mode = COMP_MODE_PLAIN
3506 h, comp_sidedata = self.compress(serialized_sidedata)
3501 h, comp_sidedata = self.compress(serialized_sidedata)
3507 if (
3502 if (
3508 h != b'u'
3503 h != b'u'
3509 and comp_sidedata[0] != b'\0'
3504 and comp_sidedata[0] != b'\0'
3510 and len(comp_sidedata) < len(serialized_sidedata)
3505 and len(comp_sidedata) < len(serialized_sidedata)
3511 ):
3506 ):
3512 assert not h
3507 assert not h
3513 if (
3508 if (
3514 comp_sidedata[0]
3509 comp_sidedata[0]
3515 == self._docket.default_compression_header
3510 == self._docket.default_compression_header
3516 ):
3511 ):
3517 sidedata_compression_mode = COMP_MODE_DEFAULT
3512 sidedata_compression_mode = COMP_MODE_DEFAULT
3518 serialized_sidedata = comp_sidedata
3513 serialized_sidedata = comp_sidedata
3519 else:
3514 else:
3520 sidedata_compression_mode = COMP_MODE_INLINE
3515 sidedata_compression_mode = COMP_MODE_INLINE
3521 serialized_sidedata = comp_sidedata
3516 serialized_sidedata = comp_sidedata
3522 if entry[8] != 0 or entry[9] != 0:
3517 if entry[8] != 0 or entry[9] != 0:
3523 # rewriting entries that already have sidedata is not
3518 # rewriting entries that already have sidedata is not
3524 # supported yet, because it introduces garbage data in the
3519 # supported yet, because it introduces garbage data in the
3525 # revlog.
3520 # revlog.
3526 msg = b"rewriting existing sidedata is not supported yet"
3521 msg = b"rewriting existing sidedata is not supported yet"
3527 raise error.Abort(msg)
3522 raise error.Abort(msg)
3528
3523
3529 # Apply (potential) flags to add and to remove after running
3524 # Apply (potential) flags to add and to remove after running
3530 # the sidedata helpers
3525 # the sidedata helpers
3531 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3526 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3532 entry_update = (
3527 entry_update = (
3533 current_offset,
3528 current_offset,
3534 len(serialized_sidedata),
3529 len(serialized_sidedata),
3535 new_offset_flags,
3530 new_offset_flags,
3536 sidedata_compression_mode,
3531 sidedata_compression_mode,
3537 )
3532 )
3538
3533
3539 # the sidedata computation might have move the file cursors around
3534 # the sidedata computation might have move the file cursors around
3540 sdfh.seek(current_offset, os.SEEK_SET)
3535 sdfh.seek(current_offset, os.SEEK_SET)
3541 sdfh.write(serialized_sidedata)
3536 sdfh.write(serialized_sidedata)
3542 new_entries.append(entry_update)
3537 new_entries.append(entry_update)
3543 current_offset += len(serialized_sidedata)
3538 current_offset += len(serialized_sidedata)
3544 self._docket.sidedata_end = sdfh.tell()
3539 self._docket.sidedata_end = sdfh.tell()
3545
3540
3546 # rewrite the new index entries
3541 # rewrite the new index entries
3547 ifh.seek(startrev * self.index.entry_size)
3542 ifh.seek(startrev * self.index.entry_size)
3548 for i, e in enumerate(new_entries):
3543 for i, e in enumerate(new_entries):
3549 rev = startrev + i
3544 rev = startrev + i
3550 self.index.replace_sidedata_info(rev, *e)
3545 self.index.replace_sidedata_info(rev, *e)
3551 packed = self.index.entry_binary(rev)
3546 packed = self.index.entry_binary(rev)
3552 if rev == 0 and self._docket is None:
3547 if rev == 0 and self._docket is None:
3553 header = self._format_flags | self._format_version
3548 header = self._format_flags | self._format_version
3554 header = self.index.pack_header(header)
3549 header = self.index.pack_header(header)
3555 packed = header + packed
3550 packed = header + packed
3556 ifh.write(packed)
3551 ifh.write(packed)
@@ -1,1623 +1,1617 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import collections
11 import collections
12 import struct
12 import struct
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from ..node import nullrev
15 from ..node import nullrev
16 from ..i18n import _
16 from ..i18n import _
17
17
18 from .constants import (
18 from .constants import (
19 COMP_MODE_DEFAULT,
19 COMP_MODE_DEFAULT,
20 COMP_MODE_INLINE,
20 COMP_MODE_INLINE,
21 COMP_MODE_PLAIN,
21 COMP_MODE_PLAIN,
22 DELTA_BASE_REUSE_FORCE,
22 DELTA_BASE_REUSE_FORCE,
23 DELTA_BASE_REUSE_NO,
23 DELTA_BASE_REUSE_NO,
24 KIND_CHANGELOG,
24 KIND_CHANGELOG,
25 KIND_FILELOG,
25 KIND_FILELOG,
26 KIND_MANIFESTLOG,
26 KIND_MANIFESTLOG,
27 REVIDX_ISCENSORED,
27 REVIDX_ISCENSORED,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 )
29 )
30
30
31 from ..thirdparty import attr
31 from ..thirdparty import attr
32
32
33 from .. import (
33 from .. import (
34 error,
34 error,
35 mdiff,
35 mdiff,
36 util,
36 util,
37 )
37 )
38
38
39 from . import flagutil
39 from . import flagutil
40
40
41 # maximum <delta-chain-data>/<revision-text-length> ratio
41 # maximum <delta-chain-data>/<revision-text-length> ratio
42 LIMIT_DELTA2TEXT = 2
42 LIMIT_DELTA2TEXT = 2
43
43
44
44
45 class _testrevlog:
45 class _testrevlog:
46 """minimalist fake revlog to use in doctests"""
46 """minimalist fake revlog to use in doctests"""
47
47
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 """data is an list of revision payload boundaries"""
49 """data is an list of revision payload boundaries"""
50 self._data = data
50 self._data = data
51 self._srdensitythreshold = density
51 self._srdensitythreshold = density
52 self._srmingapsize = mingap
52 self._srmingapsize = mingap
53 self._snapshot = set(snapshot)
53 self._snapshot = set(snapshot)
54 self.index = None
54 self.index = None
55
55
56 def start(self, rev):
56 def start(self, rev):
57 if rev == nullrev:
57 if rev == nullrev:
58 return 0
58 return 0
59 if rev == 0:
59 if rev == 0:
60 return 0
60 return 0
61 return self._data[rev - 1]
61 return self._data[rev - 1]
62
62
63 def end(self, rev):
63 def end(self, rev):
64 if rev == nullrev:
64 if rev == nullrev:
65 return 0
65 return 0
66 return self._data[rev]
66 return self._data[rev]
67
67
68 def length(self, rev):
68 def length(self, rev):
69 return self.end(rev) - self.start(rev)
69 return self.end(rev) - self.start(rev)
70
70
71 def __len__(self):
71 def __len__(self):
72 return len(self._data)
72 return len(self._data)
73
73
74 def issnapshot(self, rev):
74 def issnapshot(self, rev):
75 if rev == nullrev:
75 if rev == nullrev:
76 return True
76 return True
77 return rev in self._snapshot
77 return rev in self._snapshot
78
78
79
79
80 def slicechunk(revlog, revs, targetsize=None):
80 def slicechunk(revlog, revs, targetsize=None):
81 """slice revs to reduce the amount of unrelated data to be read from disk.
81 """slice revs to reduce the amount of unrelated data to be read from disk.
82
82
83 ``revs`` is sliced into groups that should be read in one time.
83 ``revs`` is sliced into groups that should be read in one time.
84 Assume that revs are sorted.
84 Assume that revs are sorted.
85
85
86 The initial chunk is sliced until the overall density (payload/chunks-span
86 The initial chunk is sliced until the overall density (payload/chunks-span
87 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
87 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
88 `revlog._srmingapsize` is skipped.
88 `revlog._srmingapsize` is skipped.
89
89
90 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
90 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
91 For consistency with other slicing choice, this limit won't go lower than
91 For consistency with other slicing choice, this limit won't go lower than
92 `revlog._srmingapsize`.
92 `revlog._srmingapsize`.
93
93
94 If individual revisions chunk are larger than this limit, they will still
94 If individual revisions chunk are larger than this limit, they will still
95 be raised individually.
95 be raised individually.
96
96
97 >>> data = [
97 >>> data = [
98 ... 5, #00 (5)
98 ... 5, #00 (5)
99 ... 10, #01 (5)
99 ... 10, #01 (5)
100 ... 12, #02 (2)
100 ... 12, #02 (2)
101 ... 12, #03 (empty)
101 ... 12, #03 (empty)
102 ... 27, #04 (15)
102 ... 27, #04 (15)
103 ... 31, #05 (4)
103 ... 31, #05 (4)
104 ... 31, #06 (empty)
104 ... 31, #06 (empty)
105 ... 42, #07 (11)
105 ... 42, #07 (11)
106 ... 47, #08 (5)
106 ... 47, #08 (5)
107 ... 47, #09 (empty)
107 ... 47, #09 (empty)
108 ... 48, #10 (1)
108 ... 48, #10 (1)
109 ... 51, #11 (3)
109 ... 51, #11 (3)
110 ... 74, #12 (23)
110 ... 74, #12 (23)
111 ... 85, #13 (11)
111 ... 85, #13 (11)
112 ... 86, #14 (1)
112 ... 86, #14 (1)
113 ... 91, #15 (5)
113 ... 91, #15 (5)
114 ... ]
114 ... ]
115 >>> revlog = _testrevlog(data, snapshot=range(16))
115 >>> revlog = _testrevlog(data, snapshot=range(16))
116
116
117 >>> list(slicechunk(revlog, list(range(16))))
117 >>> list(slicechunk(revlog, list(range(16))))
118 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
118 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
119 >>> list(slicechunk(revlog, [0, 15]))
119 >>> list(slicechunk(revlog, [0, 15]))
120 [[0], [15]]
120 [[0], [15]]
121 >>> list(slicechunk(revlog, [0, 11, 15]))
121 >>> list(slicechunk(revlog, [0, 11, 15]))
122 [[0], [11], [15]]
122 [[0], [11], [15]]
123 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
123 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
124 [[0], [11, 13, 15]]
124 [[0], [11, 13, 15]]
125 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
125 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
126 [[1, 2], [5, 8, 10, 11], [14]]
126 [[1, 2], [5, 8, 10, 11], [14]]
127
127
128 Slicing with a maximum chunk size
128 Slicing with a maximum chunk size
129 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
129 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
130 [[0], [11], [13], [15]]
130 [[0], [11], [13], [15]]
131 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
131 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
132 [[0], [11], [13, 15]]
132 [[0], [11], [13, 15]]
133
133
134 Slicing involving nullrev
134 Slicing involving nullrev
135 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
135 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
136 [[-1, 0], [11], [13, 15]]
136 [[-1, 0], [11], [13, 15]]
137 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
137 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
138 [[-1], [13], [15]]
138 [[-1], [13], [15]]
139 """
139 """
140 if targetsize is not None:
140 if targetsize is not None:
141 targetsize = max(targetsize, revlog._srmingapsize)
141 targetsize = max(targetsize, revlog._srmingapsize)
142 # targetsize should not be specified when evaluating delta candidates:
142 # targetsize should not be specified when evaluating delta candidates:
143 # * targetsize is used to ensure we stay within specification when reading,
143 # * targetsize is used to ensure we stay within specification when reading,
144 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
144 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
145 if densityslicing is None:
145 if densityslicing is None:
146 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
146 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
147 for chunk in densityslicing(
147 for chunk in densityslicing(
148 revs, revlog._srdensitythreshold, revlog._srmingapsize
148 revs, revlog._srdensitythreshold, revlog._srmingapsize
149 ):
149 ):
150 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
150 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
151 yield subchunk
151 yield subchunk
152
152
153
153
154 def _slicechunktosize(revlog, revs, targetsize=None):
154 def _slicechunktosize(revlog, revs, targetsize=None):
155 """slice revs to match the target size
155 """slice revs to match the target size
156
156
157 This is intended to be used on chunk that density slicing selected by that
157 This is intended to be used on chunk that density slicing selected by that
158 are still too large compared to the read garantee of revlog. This might
158 are still too large compared to the read garantee of revlog. This might
159 happens when "minimal gap size" interrupted the slicing or when chain are
159 happens when "minimal gap size" interrupted the slicing or when chain are
160 built in a way that create large blocks next to each other.
160 built in a way that create large blocks next to each other.
161
161
162 >>> data = [
162 >>> data = [
163 ... 3, #0 (3)
163 ... 3, #0 (3)
164 ... 5, #1 (2)
164 ... 5, #1 (2)
165 ... 6, #2 (1)
165 ... 6, #2 (1)
166 ... 8, #3 (2)
166 ... 8, #3 (2)
167 ... 8, #4 (empty)
167 ... 8, #4 (empty)
168 ... 11, #5 (3)
168 ... 11, #5 (3)
169 ... 12, #6 (1)
169 ... 12, #6 (1)
170 ... 13, #7 (1)
170 ... 13, #7 (1)
171 ... 14, #8 (1)
171 ... 14, #8 (1)
172 ... ]
172 ... ]
173
173
174 == All snapshots cases ==
174 == All snapshots cases ==
175 >>> revlog = _testrevlog(data, snapshot=range(9))
175 >>> revlog = _testrevlog(data, snapshot=range(9))
176
176
177 Cases where chunk is already small enough
177 Cases where chunk is already small enough
178 >>> list(_slicechunktosize(revlog, [0], 3))
178 >>> list(_slicechunktosize(revlog, [0], 3))
179 [[0]]
179 [[0]]
180 >>> list(_slicechunktosize(revlog, [6, 7], 3))
180 >>> list(_slicechunktosize(revlog, [6, 7], 3))
181 [[6, 7]]
181 [[6, 7]]
182 >>> list(_slicechunktosize(revlog, [0], None))
182 >>> list(_slicechunktosize(revlog, [0], None))
183 [[0]]
183 [[0]]
184 >>> list(_slicechunktosize(revlog, [6, 7], None))
184 >>> list(_slicechunktosize(revlog, [6, 7], None))
185 [[6, 7]]
185 [[6, 7]]
186
186
187 cases where we need actual slicing
187 cases where we need actual slicing
188 >>> list(_slicechunktosize(revlog, [0, 1], 3))
188 >>> list(_slicechunktosize(revlog, [0, 1], 3))
189 [[0], [1]]
189 [[0], [1]]
190 >>> list(_slicechunktosize(revlog, [1, 3], 3))
190 >>> list(_slicechunktosize(revlog, [1, 3], 3))
191 [[1], [3]]
191 [[1], [3]]
192 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
192 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
193 [[1, 2], [3]]
193 [[1, 2], [3]]
194 >>> list(_slicechunktosize(revlog, [3, 5], 3))
194 >>> list(_slicechunktosize(revlog, [3, 5], 3))
195 [[3], [5]]
195 [[3], [5]]
196 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
196 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
197 [[3], [5]]
197 [[3], [5]]
198 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
198 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
199 [[5], [6, 7, 8]]
199 [[5], [6, 7, 8]]
200 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
200 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
201 [[0], [1, 2], [3], [5], [6, 7, 8]]
201 [[0], [1, 2], [3], [5], [6, 7, 8]]
202
202
203 Case with too large individual chunk (must return valid chunk)
203 Case with too large individual chunk (must return valid chunk)
204 >>> list(_slicechunktosize(revlog, [0, 1], 2))
204 >>> list(_slicechunktosize(revlog, [0, 1], 2))
205 [[0], [1]]
205 [[0], [1]]
206 >>> list(_slicechunktosize(revlog, [1, 3], 1))
206 >>> list(_slicechunktosize(revlog, [1, 3], 1))
207 [[1], [3]]
207 [[1], [3]]
208 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
208 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
209 [[3], [5]]
209 [[3], [5]]
210
210
211 == No Snapshot cases ==
211 == No Snapshot cases ==
212 >>> revlog = _testrevlog(data)
212 >>> revlog = _testrevlog(data)
213
213
214 Cases where chunk is already small enough
214 Cases where chunk is already small enough
215 >>> list(_slicechunktosize(revlog, [0], 3))
215 >>> list(_slicechunktosize(revlog, [0], 3))
216 [[0]]
216 [[0]]
217 >>> list(_slicechunktosize(revlog, [6, 7], 3))
217 >>> list(_slicechunktosize(revlog, [6, 7], 3))
218 [[6, 7]]
218 [[6, 7]]
219 >>> list(_slicechunktosize(revlog, [0], None))
219 >>> list(_slicechunktosize(revlog, [0], None))
220 [[0]]
220 [[0]]
221 >>> list(_slicechunktosize(revlog, [6, 7], None))
221 >>> list(_slicechunktosize(revlog, [6, 7], None))
222 [[6, 7]]
222 [[6, 7]]
223
223
224 cases where we need actual slicing
224 cases where we need actual slicing
225 >>> list(_slicechunktosize(revlog, [0, 1], 3))
225 >>> list(_slicechunktosize(revlog, [0, 1], 3))
226 [[0], [1]]
226 [[0], [1]]
227 >>> list(_slicechunktosize(revlog, [1, 3], 3))
227 >>> list(_slicechunktosize(revlog, [1, 3], 3))
228 [[1], [3]]
228 [[1], [3]]
229 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
229 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
230 [[1], [2, 3]]
230 [[1], [2, 3]]
231 >>> list(_slicechunktosize(revlog, [3, 5], 3))
231 >>> list(_slicechunktosize(revlog, [3, 5], 3))
232 [[3], [5]]
232 [[3], [5]]
233 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
233 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
234 [[3], [4, 5]]
234 [[3], [4, 5]]
235 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
235 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
236 [[5], [6, 7, 8]]
236 [[5], [6, 7, 8]]
237 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
237 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
238 [[0], [1, 2], [3], [5], [6, 7, 8]]
238 [[0], [1, 2], [3], [5], [6, 7, 8]]
239
239
240 Case with too large individual chunk (must return valid chunk)
240 Case with too large individual chunk (must return valid chunk)
241 >>> list(_slicechunktosize(revlog, [0, 1], 2))
241 >>> list(_slicechunktosize(revlog, [0, 1], 2))
242 [[0], [1]]
242 [[0], [1]]
243 >>> list(_slicechunktosize(revlog, [1, 3], 1))
243 >>> list(_slicechunktosize(revlog, [1, 3], 1))
244 [[1], [3]]
244 [[1], [3]]
245 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
245 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
246 [[3], [5]]
246 [[3], [5]]
247
247
248 == mixed case ==
248 == mixed case ==
249 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
249 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
250 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
250 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
251 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
251 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
252 """
252 """
253 assert targetsize is None or 0 <= targetsize
253 assert targetsize is None or 0 <= targetsize
254 startdata = revlog.start(revs[0])
254 startdata = revlog.start(revs[0])
255 enddata = revlog.end(revs[-1])
255 enddata = revlog.end(revs[-1])
256 fullspan = enddata - startdata
256 fullspan = enddata - startdata
257 if targetsize is None or fullspan <= targetsize:
257 if targetsize is None or fullspan <= targetsize:
258 yield revs
258 yield revs
259 return
259 return
260
260
261 startrevidx = 0
261 startrevidx = 0
262 endrevidx = 1
262 endrevidx = 1
263 iterrevs = enumerate(revs)
263 iterrevs = enumerate(revs)
264 next(iterrevs) # skip first rev.
264 next(iterrevs) # skip first rev.
265 # first step: get snapshots out of the way
265 # first step: get snapshots out of the way
266 for idx, r in iterrevs:
266 for idx, r in iterrevs:
267 span = revlog.end(r) - startdata
267 span = revlog.end(r) - startdata
268 snapshot = revlog.issnapshot(r)
268 snapshot = revlog.issnapshot(r)
269 if span <= targetsize and snapshot:
269 if span <= targetsize and snapshot:
270 endrevidx = idx + 1
270 endrevidx = idx + 1
271 else:
271 else:
272 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
272 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
273 if chunk:
273 if chunk:
274 yield chunk
274 yield chunk
275 startrevidx = idx
275 startrevidx = idx
276 startdata = revlog.start(r)
276 startdata = revlog.start(r)
277 endrevidx = idx + 1
277 endrevidx = idx + 1
278 if not snapshot:
278 if not snapshot:
279 break
279 break
280
280
281 # for the others, we use binary slicing to quickly converge toward valid
281 # for the others, we use binary slicing to quickly converge toward valid
282 # chunks (otherwise, we might end up looking for start/end of many
282 # chunks (otherwise, we might end up looking for start/end of many
283 # revisions). This logic is not looking for the perfect slicing point, it
283 # revisions). This logic is not looking for the perfect slicing point, it
284 # focuses on quickly converging toward valid chunks.
284 # focuses on quickly converging toward valid chunks.
285 nbitem = len(revs)
285 nbitem = len(revs)
286 while (enddata - startdata) > targetsize:
286 while (enddata - startdata) > targetsize:
287 endrevidx = nbitem
287 endrevidx = nbitem
288 if nbitem - startrevidx <= 1:
288 if nbitem - startrevidx <= 1:
289 break # protect against individual chunk larger than limit
289 break # protect against individual chunk larger than limit
290 localenddata = revlog.end(revs[endrevidx - 1])
290 localenddata = revlog.end(revs[endrevidx - 1])
291 span = localenddata - startdata
291 span = localenddata - startdata
292 while span > targetsize:
292 while span > targetsize:
293 if endrevidx - startrevidx <= 1:
293 if endrevidx - startrevidx <= 1:
294 break # protect against individual chunk larger than limit
294 break # protect against individual chunk larger than limit
295 endrevidx -= (endrevidx - startrevidx) // 2
295 endrevidx -= (endrevidx - startrevidx) // 2
296 localenddata = revlog.end(revs[endrevidx - 1])
296 localenddata = revlog.end(revs[endrevidx - 1])
297 span = localenddata - startdata
297 span = localenddata - startdata
298 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
298 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
299 if chunk:
299 if chunk:
300 yield chunk
300 yield chunk
301 startrevidx = endrevidx
301 startrevidx = endrevidx
302 startdata = revlog.start(revs[startrevidx])
302 startdata = revlog.start(revs[startrevidx])
303
303
304 chunk = _trimchunk(revlog, revs, startrevidx)
304 chunk = _trimchunk(revlog, revs, startrevidx)
305 if chunk:
305 if chunk:
306 yield chunk
306 yield chunk
307
307
308
308
309 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
309 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
310 """slice revs to reduce the amount of unrelated data to be read from disk.
310 """slice revs to reduce the amount of unrelated data to be read from disk.
311
311
312 ``revs`` is sliced into groups that should be read in one time.
312 ``revs`` is sliced into groups that should be read in one time.
313 Assume that revs are sorted.
313 Assume that revs are sorted.
314
314
315 The initial chunk is sliced until the overall density (payload/chunks-span
315 The initial chunk is sliced until the overall density (payload/chunks-span
316 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
316 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
317 skipped.
317 skipped.
318
318
319 >>> revlog = _testrevlog([
319 >>> revlog = _testrevlog([
320 ... 5, #00 (5)
320 ... 5, #00 (5)
321 ... 10, #01 (5)
321 ... 10, #01 (5)
322 ... 12, #02 (2)
322 ... 12, #02 (2)
323 ... 12, #03 (empty)
323 ... 12, #03 (empty)
324 ... 27, #04 (15)
324 ... 27, #04 (15)
325 ... 31, #05 (4)
325 ... 31, #05 (4)
326 ... 31, #06 (empty)
326 ... 31, #06 (empty)
327 ... 42, #07 (11)
327 ... 42, #07 (11)
328 ... 47, #08 (5)
328 ... 47, #08 (5)
329 ... 47, #09 (empty)
329 ... 47, #09 (empty)
330 ... 48, #10 (1)
330 ... 48, #10 (1)
331 ... 51, #11 (3)
331 ... 51, #11 (3)
332 ... 74, #12 (23)
332 ... 74, #12 (23)
333 ... 85, #13 (11)
333 ... 85, #13 (11)
334 ... 86, #14 (1)
334 ... 86, #14 (1)
335 ... 91, #15 (5)
335 ... 91, #15 (5)
336 ... ])
336 ... ])
337
337
338 >>> list(_slicechunktodensity(revlog, list(range(16))))
338 >>> list(_slicechunktodensity(revlog, list(range(16))))
339 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
339 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
340 >>> list(_slicechunktodensity(revlog, [0, 15]))
340 >>> list(_slicechunktodensity(revlog, [0, 15]))
341 [[0], [15]]
341 [[0], [15]]
342 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
342 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
343 [[0], [11], [15]]
343 [[0], [11], [15]]
344 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
344 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
345 [[0], [11, 13, 15]]
345 [[0], [11, 13, 15]]
346 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
346 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
347 [[1, 2], [5, 8, 10, 11], [14]]
347 [[1, 2], [5, 8, 10, 11], [14]]
348 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
348 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
349 ... mingapsize=20))
349 ... mingapsize=20))
350 [[1, 2, 3, 5, 8, 10, 11], [14]]
350 [[1, 2, 3, 5, 8, 10, 11], [14]]
351 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
351 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
352 ... targetdensity=0.95))
352 ... targetdensity=0.95))
353 [[1, 2], [5], [8, 10, 11], [14]]
353 [[1, 2], [5], [8, 10, 11], [14]]
354 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
354 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
355 ... targetdensity=0.95, mingapsize=12))
355 ... targetdensity=0.95, mingapsize=12))
356 [[1, 2], [5, 8, 10, 11], [14]]
356 [[1, 2], [5, 8, 10, 11], [14]]
357 """
357 """
358 start = revlog.start
358 start = revlog.start
359 length = revlog.length
359 length = revlog.length
360
360
361 if len(revs) <= 1:
361 if len(revs) <= 1:
362 yield revs
362 yield revs
363 return
363 return
364
364
365 deltachainspan = segmentspan(revlog, revs)
365 deltachainspan = segmentspan(revlog, revs)
366
366
367 if deltachainspan < mingapsize:
367 if deltachainspan < mingapsize:
368 yield revs
368 yield revs
369 return
369 return
370
370
371 readdata = deltachainspan
371 readdata = deltachainspan
372 chainpayload = sum(length(r) for r in revs)
372 chainpayload = sum(length(r) for r in revs)
373
373
374 if deltachainspan:
374 if deltachainspan:
375 density = chainpayload / float(deltachainspan)
375 density = chainpayload / float(deltachainspan)
376 else:
376 else:
377 density = 1.0
377 density = 1.0
378
378
379 if density >= targetdensity:
379 if density >= targetdensity:
380 yield revs
380 yield revs
381 return
381 return
382
382
383 # Store the gaps in a heap to have them sorted by decreasing size
383 # Store the gaps in a heap to have them sorted by decreasing size
384 gaps = []
384 gaps = []
385 prevend = None
385 prevend = None
386 for i, rev in enumerate(revs):
386 for i, rev in enumerate(revs):
387 revstart = start(rev)
387 revstart = start(rev)
388 revlen = length(rev)
388 revlen = length(rev)
389
389
390 # Skip empty revisions to form larger holes
390 # Skip empty revisions to form larger holes
391 if revlen == 0:
391 if revlen == 0:
392 continue
392 continue
393
393
394 if prevend is not None:
394 if prevend is not None:
395 gapsize = revstart - prevend
395 gapsize = revstart - prevend
396 # only consider holes that are large enough
396 # only consider holes that are large enough
397 if gapsize > mingapsize:
397 if gapsize > mingapsize:
398 gaps.append((gapsize, i))
398 gaps.append((gapsize, i))
399
399
400 prevend = revstart + revlen
400 prevend = revstart + revlen
401 # sort the gaps to pop them from largest to small
401 # sort the gaps to pop them from largest to small
402 gaps.sort()
402 gaps.sort()
403
403
404 # Collect the indices of the largest holes until the density is acceptable
404 # Collect the indices of the largest holes until the density is acceptable
405 selected = []
405 selected = []
406 while gaps and density < targetdensity:
406 while gaps and density < targetdensity:
407 gapsize, gapidx = gaps.pop()
407 gapsize, gapidx = gaps.pop()
408
408
409 selected.append(gapidx)
409 selected.append(gapidx)
410
410
411 # the gap sizes are stored as negatives to be sorted decreasingly
411 # the gap sizes are stored as negatives to be sorted decreasingly
412 # by the heap
412 # by the heap
413 readdata -= gapsize
413 readdata -= gapsize
414 if readdata > 0:
414 if readdata > 0:
415 density = chainpayload / float(readdata)
415 density = chainpayload / float(readdata)
416 else:
416 else:
417 density = 1.0
417 density = 1.0
418 selected.sort()
418 selected.sort()
419
419
420 # Cut the revs at collected indices
420 # Cut the revs at collected indices
421 previdx = 0
421 previdx = 0
422 for idx in selected:
422 for idx in selected:
423
423
424 chunk = _trimchunk(revlog, revs, previdx, idx)
424 chunk = _trimchunk(revlog, revs, previdx, idx)
425 if chunk:
425 if chunk:
426 yield chunk
426 yield chunk
427
427
428 previdx = idx
428 previdx = idx
429
429
430 chunk = _trimchunk(revlog, revs, previdx)
430 chunk = _trimchunk(revlog, revs, previdx)
431 if chunk:
431 if chunk:
432 yield chunk
432 yield chunk
433
433
434
434
435 def _trimchunk(revlog, revs, startidx, endidx=None):
435 def _trimchunk(revlog, revs, startidx, endidx=None):
436 """returns revs[startidx:endidx] without empty trailing revs
436 """returns revs[startidx:endidx] without empty trailing revs
437
437
438 Doctest Setup
438 Doctest Setup
439 >>> revlog = _testrevlog([
439 >>> revlog = _testrevlog([
440 ... 5, #0
440 ... 5, #0
441 ... 10, #1
441 ... 10, #1
442 ... 12, #2
442 ... 12, #2
443 ... 12, #3 (empty)
443 ... 12, #3 (empty)
444 ... 17, #4
444 ... 17, #4
445 ... 21, #5
445 ... 21, #5
446 ... 21, #6 (empty)
446 ... 21, #6 (empty)
447 ... ])
447 ... ])
448
448
449 Contiguous cases:
449 Contiguous cases:
450 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
450 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
451 [0, 1, 2, 3, 4, 5]
451 [0, 1, 2, 3, 4, 5]
452 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
452 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
453 [0, 1, 2, 3, 4]
453 [0, 1, 2, 3, 4]
454 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
454 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
455 [0, 1, 2]
455 [0, 1, 2]
456 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
456 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
457 [2]
457 [2]
458 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
458 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
459 [3, 4, 5]
459 [3, 4, 5]
460 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
460 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
461 [3, 4]
461 [3, 4]
462
462
463 Discontiguous cases:
463 Discontiguous cases:
464 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
464 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
465 [1, 3, 5]
465 [1, 3, 5]
466 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
466 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
467 [1]
467 [1]
468 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
468 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
469 [3, 5]
469 [3, 5]
470 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
470 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
471 [3, 5]
471 [3, 5]
472 """
472 """
473 length = revlog.length
473 length = revlog.length
474
474
475 if endidx is None:
475 if endidx is None:
476 endidx = len(revs)
476 endidx = len(revs)
477
477
478 # If we have a non-emtpy delta candidate, there are nothing to trim
478 # If we have a non-emtpy delta candidate, there are nothing to trim
479 if revs[endidx - 1] < len(revlog):
479 if revs[endidx - 1] < len(revlog):
480 # Trim empty revs at the end, except the very first revision of a chain
480 # Trim empty revs at the end, except the very first revision of a chain
481 while (
481 while (
482 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
482 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
483 ):
483 ):
484 endidx -= 1
484 endidx -= 1
485
485
486 return revs[startidx:endidx]
486 return revs[startidx:endidx]
487
487
488
488
489 def segmentspan(revlog, revs):
489 def segmentspan(revlog, revs):
490 """Get the byte span of a segment of revisions
490 """Get the byte span of a segment of revisions
491
491
492 revs is a sorted array of revision numbers
492 revs is a sorted array of revision numbers
493
493
494 >>> revlog = _testrevlog([
494 >>> revlog = _testrevlog([
495 ... 5, #0
495 ... 5, #0
496 ... 10, #1
496 ... 10, #1
497 ... 12, #2
497 ... 12, #2
498 ... 12, #3 (empty)
498 ... 12, #3 (empty)
499 ... 17, #4
499 ... 17, #4
500 ... ])
500 ... ])
501
501
502 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
502 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
503 17
503 17
504 >>> segmentspan(revlog, [0, 4])
504 >>> segmentspan(revlog, [0, 4])
505 17
505 17
506 >>> segmentspan(revlog, [3, 4])
506 >>> segmentspan(revlog, [3, 4])
507 5
507 5
508 >>> segmentspan(revlog, [1, 2, 3,])
508 >>> segmentspan(revlog, [1, 2, 3,])
509 7
509 7
510 >>> segmentspan(revlog, [1, 3])
510 >>> segmentspan(revlog, [1, 3])
511 7
511 7
512 """
512 """
513 if not revs:
513 if not revs:
514 return 0
514 return 0
515 end = revlog.end(revs[-1])
515 end = revlog.end(revs[-1])
516 return end - revlog.start(revs[0])
516 return end - revlog.start(revs[0])
517
517
518
518
519 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
519 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
520 """build full text from a (base, delta) pair and other metadata"""
520 """build full text from a (base, delta) pair and other metadata"""
521 # special case deltas which replace entire base; no need to decode
521 # special case deltas which replace entire base; no need to decode
522 # base revision. this neatly avoids censored bases, which throw when
522 # base revision. this neatly avoids censored bases, which throw when
523 # they're decoded.
523 # they're decoded.
524 hlen = struct.calcsize(b">lll")
524 hlen = struct.calcsize(b">lll")
525 if delta[:hlen] == mdiff.replacediffheader(
525 if delta[:hlen] == mdiff.replacediffheader(
526 revlog.rawsize(baserev), len(delta) - hlen
526 revlog.rawsize(baserev), len(delta) - hlen
527 ):
527 ):
528 fulltext = delta[hlen:]
528 fulltext = delta[hlen:]
529 else:
529 else:
530 # deltabase is rawtext before changed by flag processors, which is
530 # deltabase is rawtext before changed by flag processors, which is
531 # equivalent to non-raw text
531 # equivalent to non-raw text
532 basetext = revlog.revision(baserev, _df=fh)
532 basetext = revlog.revision(baserev)
533 fulltext = mdiff.patch(basetext, delta)
533 fulltext = mdiff.patch(basetext, delta)
534
534
535 try:
535 try:
536 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
536 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
537 if validatehash:
537 if validatehash:
538 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
538 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
539 if flags & REVIDX_ISCENSORED:
539 if flags & REVIDX_ISCENSORED:
540 raise error.StorageError(
540 raise error.StorageError(
541 _(b'node %s is not censored') % expectednode
541 _(b'node %s is not censored') % expectednode
542 )
542 )
543 except error.CensoredNodeError:
543 except error.CensoredNodeError:
544 # must pass the censored index flag to add censored revisions
544 # must pass the censored index flag to add censored revisions
545 if not flags & REVIDX_ISCENSORED:
545 if not flags & REVIDX_ISCENSORED:
546 raise
546 raise
547 return fulltext
547 return fulltext
548
548
549
549
550 @attr.s(slots=True, frozen=True)
550 @attr.s(slots=True, frozen=True)
551 class _deltainfo:
551 class _deltainfo:
552 distance = attr.ib()
552 distance = attr.ib()
553 deltalen = attr.ib()
553 deltalen = attr.ib()
554 data = attr.ib()
554 data = attr.ib()
555 base = attr.ib()
555 base = attr.ib()
556 chainbase = attr.ib()
556 chainbase = attr.ib()
557 chainlen = attr.ib()
557 chainlen = attr.ib()
558 compresseddeltalen = attr.ib()
558 compresseddeltalen = attr.ib()
559 snapshotdepth = attr.ib()
559 snapshotdepth = attr.ib()
560
560
561
561
562 def drop_u_compression(delta):
562 def drop_u_compression(delta):
563 """turn into a "u" (no-compression) into no-compression without header
563 """turn into a "u" (no-compression) into no-compression without header
564
564
565 This is useful for revlog format that has better compression method.
565 This is useful for revlog format that has better compression method.
566 """
566 """
567 assert delta.data[0] == b'u', delta.data[0]
567 assert delta.data[0] == b'u', delta.data[0]
568 return _deltainfo(
568 return _deltainfo(
569 delta.distance,
569 delta.distance,
570 delta.deltalen - 1,
570 delta.deltalen - 1,
571 (b'', delta.data[1]),
571 (b'', delta.data[1]),
572 delta.base,
572 delta.base,
573 delta.chainbase,
573 delta.chainbase,
574 delta.chainlen,
574 delta.chainlen,
575 delta.compresseddeltalen,
575 delta.compresseddeltalen,
576 delta.snapshotdepth,
576 delta.snapshotdepth,
577 )
577 )
578
578
579
579
580 def is_good_delta_info(revlog, deltainfo, revinfo):
580 def is_good_delta_info(revlog, deltainfo, revinfo):
581 """Returns True if the given delta is good. Good means that it is within
581 """Returns True if the given delta is good. Good means that it is within
582 the disk span, disk size, and chain length bounds that we know to be
582 the disk span, disk size, and chain length bounds that we know to be
583 performant."""
583 performant."""
584 if deltainfo is None:
584 if deltainfo is None:
585 return False
585 return False
586
586
587 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
587 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
588 # we should never end up asking such question. Adding the assert as a
588 # we should never end up asking such question. Adding the assert as a
589 # safe-guard to detect anything that would be fishy in this regard.
589 # safe-guard to detect anything that would be fishy in this regard.
590 assert (
590 assert (
591 revinfo.cachedelta is None
591 revinfo.cachedelta is None
592 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
592 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
593 or not revlog._generaldelta
593 or not revlog._generaldelta
594 )
594 )
595
595
596 # - 'deltainfo.distance' is the distance from the base revision --
596 # - 'deltainfo.distance' is the distance from the base revision --
597 # bounding it limits the amount of I/O we need to do.
597 # bounding it limits the amount of I/O we need to do.
598 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
598 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
599 # deltas we need to apply -- bounding it limits the amount of CPU
599 # deltas we need to apply -- bounding it limits the amount of CPU
600 # we consume.
600 # we consume.
601
601
602 textlen = revinfo.textlen
602 textlen = revinfo.textlen
603 defaultmax = textlen * 4
603 defaultmax = textlen * 4
604 maxdist = revlog._maxdeltachainspan
604 maxdist = revlog._maxdeltachainspan
605 if not maxdist:
605 if not maxdist:
606 maxdist = deltainfo.distance # ensure the conditional pass
606 maxdist = deltainfo.distance # ensure the conditional pass
607 maxdist = max(maxdist, defaultmax)
607 maxdist = max(maxdist, defaultmax)
608
608
609 # Bad delta from read span:
609 # Bad delta from read span:
610 #
610 #
611 # If the span of data read is larger than the maximum allowed.
611 # If the span of data read is larger than the maximum allowed.
612 #
612 #
613 # In the sparse-revlog case, we rely on the associated "sparse reading"
613 # In the sparse-revlog case, we rely on the associated "sparse reading"
614 # to avoid issue related to the span of data. In theory, it would be
614 # to avoid issue related to the span of data. In theory, it would be
615 # possible to build pathological revlog where delta pattern would lead
615 # possible to build pathological revlog where delta pattern would lead
616 # to too many reads. However, they do not happen in practice at all. So
616 # to too many reads. However, they do not happen in practice at all. So
617 # we skip the span check entirely.
617 # we skip the span check entirely.
618 if not revlog._sparserevlog and maxdist < deltainfo.distance:
618 if not revlog._sparserevlog and maxdist < deltainfo.distance:
619 return False
619 return False
620
620
621 # Bad delta from new delta size:
621 # Bad delta from new delta size:
622 #
622 #
623 # If the delta size is larger than the target text, storing the
623 # If the delta size is larger than the target text, storing the
624 # delta will be inefficient.
624 # delta will be inefficient.
625 if textlen < deltainfo.deltalen:
625 if textlen < deltainfo.deltalen:
626 return False
626 return False
627
627
628 # Bad delta from cumulated payload size:
628 # Bad delta from cumulated payload size:
629 #
629 #
630 # If the sum of delta get larger than K * target text length.
630 # If the sum of delta get larger than K * target text length.
631 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
631 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
632 return False
632 return False
633
633
634 # Bad delta from chain length:
634 # Bad delta from chain length:
635 #
635 #
636 # If the number of delta in the chain gets too high.
636 # If the number of delta in the chain gets too high.
637 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
637 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
638 return False
638 return False
639
639
640 # bad delta from intermediate snapshot size limit
640 # bad delta from intermediate snapshot size limit
641 #
641 #
642 # If an intermediate snapshot size is higher than the limit. The
642 # If an intermediate snapshot size is higher than the limit. The
643 # limit exist to prevent endless chain of intermediate delta to be
643 # limit exist to prevent endless chain of intermediate delta to be
644 # created.
644 # created.
645 if (
645 if (
646 deltainfo.snapshotdepth is not None
646 deltainfo.snapshotdepth is not None
647 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
647 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
648 ):
648 ):
649 return False
649 return False
650
650
651 # bad delta if new intermediate snapshot is larger than the previous
651 # bad delta if new intermediate snapshot is larger than the previous
652 # snapshot
652 # snapshot
653 if (
653 if (
654 deltainfo.snapshotdepth
654 deltainfo.snapshotdepth
655 and revlog.length(deltainfo.base) < deltainfo.deltalen
655 and revlog.length(deltainfo.base) < deltainfo.deltalen
656 ):
656 ):
657 return False
657 return False
658
658
659 return True
659 return True
660
660
661
661
662 # If a revision's full text is that much bigger than a base candidate full
662 # If a revision's full text is that much bigger than a base candidate full
663 # text's, it is very unlikely that it will produce a valid delta. We no longer
663 # text's, it is very unlikely that it will produce a valid delta. We no longer
664 # consider these candidates.
664 # consider these candidates.
665 LIMIT_BASE2TEXT = 500
665 LIMIT_BASE2TEXT = 500
666
666
667
667
668 def _candidategroups(
668 def _candidategroups(
669 revlog,
669 revlog,
670 textlen,
670 textlen,
671 p1,
671 p1,
672 p2,
672 p2,
673 cachedelta,
673 cachedelta,
674 excluded_bases=None,
674 excluded_bases=None,
675 target_rev=None,
675 target_rev=None,
676 snapshot_cache=None,
676 snapshot_cache=None,
677 ):
677 ):
678 """Provides group of revision to be tested as delta base
678 """Provides group of revision to be tested as delta base
679
679
680 This top level function focus on emitting groups with unique and worthwhile
680 This top level function focus on emitting groups with unique and worthwhile
681 content. See _raw_candidate_groups for details about the group order.
681 content. See _raw_candidate_groups for details about the group order.
682 """
682 """
683 # should we try to build a delta?
683 # should we try to build a delta?
684 if not (len(revlog) and revlog._storedeltachains):
684 if not (len(revlog) and revlog._storedeltachains):
685 yield None
685 yield None
686 return
686 return
687
687
688 if target_rev is None:
688 if target_rev is None:
689 target_rev = len(revlog)
689 target_rev = len(revlog)
690
690
691 if not revlog._generaldelta:
691 if not revlog._generaldelta:
692 # before general delta, there is only one possible delta base
692 # before general delta, there is only one possible delta base
693 yield (target_rev - 1,)
693 yield (target_rev - 1,)
694 yield None
694 yield None
695 return
695 return
696
696
697 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
697 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
698 # we should never end up asking such question. Adding the assert as a
698 # we should never end up asking such question. Adding the assert as a
699 # safe-guard to detect anything that would be fishy in this regard.
699 # safe-guard to detect anything that would be fishy in this regard.
700 assert (
700 assert (
701 cachedelta is None
701 cachedelta is None
702 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
702 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
703 or not revlog._generaldelta
703 or not revlog._generaldelta
704 )
704 )
705
705
706 deltalength = revlog.length
706 deltalength = revlog.length
707 deltaparent = revlog.deltaparent
707 deltaparent = revlog.deltaparent
708 sparse = revlog._sparserevlog
708 sparse = revlog._sparserevlog
709 good = None
709 good = None
710
710
711 deltas_limit = textlen * LIMIT_DELTA2TEXT
711 deltas_limit = textlen * LIMIT_DELTA2TEXT
712 group_chunk_size = revlog._candidate_group_chunk_size
712 group_chunk_size = revlog._candidate_group_chunk_size
713
713
714 tested = {nullrev}
714 tested = {nullrev}
715 candidates = _refinedgroups(
715 candidates = _refinedgroups(
716 revlog,
716 revlog,
717 p1,
717 p1,
718 p2,
718 p2,
719 cachedelta,
719 cachedelta,
720 snapshot_cache=snapshot_cache,
720 snapshot_cache=snapshot_cache,
721 )
721 )
722 while True:
722 while True:
723 temptative = candidates.send(good)
723 temptative = candidates.send(good)
724 if temptative is None:
724 if temptative is None:
725 break
725 break
726 group = []
726 group = []
727 for rev in temptative:
727 for rev in temptative:
728 # skip over empty delta (no need to include them in a chain)
728 # skip over empty delta (no need to include them in a chain)
729 while not (rev == nullrev or rev in tested or deltalength(rev)):
729 while not (rev == nullrev or rev in tested or deltalength(rev)):
730 tested.add(rev)
730 tested.add(rev)
731 rev = deltaparent(rev)
731 rev = deltaparent(rev)
732 # no need to try a delta against nullrev, this will be done as a
732 # no need to try a delta against nullrev, this will be done as a
733 # last resort.
733 # last resort.
734 if rev == nullrev:
734 if rev == nullrev:
735 continue
735 continue
736 # filter out revision we tested already
736 # filter out revision we tested already
737 if rev in tested:
737 if rev in tested:
738 continue
738 continue
739
739
740 # an higher authority deamed the base unworthy (e.g. censored)
740 # an higher authority deamed the base unworthy (e.g. censored)
741 if excluded_bases is not None and rev in excluded_bases:
741 if excluded_bases is not None and rev in excluded_bases:
742 tested.add(rev)
742 tested.add(rev)
743 continue
743 continue
744 # We are in some recomputation cases and that rev is too high in
744 # We are in some recomputation cases and that rev is too high in
745 # the revlog
745 # the revlog
746 if target_rev is not None and rev >= target_rev:
746 if target_rev is not None and rev >= target_rev:
747 tested.add(rev)
747 tested.add(rev)
748 continue
748 continue
749 # filter out delta base that will never produce good delta
749 # filter out delta base that will never produce good delta
750 if deltas_limit < revlog.length(rev):
750 if deltas_limit < revlog.length(rev):
751 tested.add(rev)
751 tested.add(rev)
752 continue
752 continue
753 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
753 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
754 tested.add(rev)
754 tested.add(rev)
755 continue
755 continue
756 # no delta for rawtext-changing revs (see "candelta" for why)
756 # no delta for rawtext-changing revs (see "candelta" for why)
757 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
757 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
758 tested.add(rev)
758 tested.add(rev)
759 continue
759 continue
760
760
761 # If we reach here, we are about to build and test a delta.
761 # If we reach here, we are about to build and test a delta.
762 # The delta building process will compute the chaininfo in all
762 # The delta building process will compute the chaininfo in all
763 # case, since that computation is cached, it is fine to access it
763 # case, since that computation is cached, it is fine to access it
764 # here too.
764 # here too.
765 chainlen, chainsize = revlog._chaininfo(rev)
765 chainlen, chainsize = revlog._chaininfo(rev)
766 # if chain will be too long, skip base
766 # if chain will be too long, skip base
767 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
767 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
768 tested.add(rev)
768 tested.add(rev)
769 continue
769 continue
770 # if chain already have too much data, skip base
770 # if chain already have too much data, skip base
771 if deltas_limit < chainsize:
771 if deltas_limit < chainsize:
772 tested.add(rev)
772 tested.add(rev)
773 continue
773 continue
774 if sparse and revlog.upperboundcomp is not None:
774 if sparse and revlog.upperboundcomp is not None:
775 maxcomp = revlog.upperboundcomp
775 maxcomp = revlog.upperboundcomp
776 basenotsnap = (p1, p2, nullrev)
776 basenotsnap = (p1, p2, nullrev)
777 if rev not in basenotsnap and revlog.issnapshot(rev):
777 if rev not in basenotsnap and revlog.issnapshot(rev):
778 snapshotdepth = revlog.snapshotdepth(rev)
778 snapshotdepth = revlog.snapshotdepth(rev)
779 # If text is significantly larger than the base, we can
779 # If text is significantly larger than the base, we can
780 # expect the resulting delta to be proportional to the size
780 # expect the resulting delta to be proportional to the size
781 # difference
781 # difference
782 revsize = revlog.rawsize(rev)
782 revsize = revlog.rawsize(rev)
783 rawsizedistance = max(textlen - revsize, 0)
783 rawsizedistance = max(textlen - revsize, 0)
784 # use an estimate of the compression upper bound.
784 # use an estimate of the compression upper bound.
785 lowestrealisticdeltalen = rawsizedistance // maxcomp
785 lowestrealisticdeltalen = rawsizedistance // maxcomp
786
786
787 # check the absolute constraint on the delta size
787 # check the absolute constraint on the delta size
788 snapshotlimit = textlen >> snapshotdepth
788 snapshotlimit = textlen >> snapshotdepth
789 if snapshotlimit < lowestrealisticdeltalen:
789 if snapshotlimit < lowestrealisticdeltalen:
790 # delta lower bound is larger than accepted upper bound
790 # delta lower bound is larger than accepted upper bound
791 tested.add(rev)
791 tested.add(rev)
792 continue
792 continue
793
793
794 # check the relative constraint on the delta size
794 # check the relative constraint on the delta size
795 revlength = revlog.length(rev)
795 revlength = revlog.length(rev)
796 if revlength < lowestrealisticdeltalen:
796 if revlength < lowestrealisticdeltalen:
797 # delta probable lower bound is larger than target base
797 # delta probable lower bound is larger than target base
798 tested.add(rev)
798 tested.add(rev)
799 continue
799 continue
800
800
801 group.append(rev)
801 group.append(rev)
802 if group:
802 if group:
803 # When the size of the candidate group is big, it can result in a
803 # When the size of the candidate group is big, it can result in a
804 # quite significant performance impact. To reduce this, we can send
804 # quite significant performance impact. To reduce this, we can send
805 # them in smaller batches until the new batch does not provide any
805 # them in smaller batches until the new batch does not provide any
806 # improvements.
806 # improvements.
807 #
807 #
808 # This might reduce the overall efficiency of the compression in
808 # This might reduce the overall efficiency of the compression in
809 # some corner cases, but that should also prevent very pathological
809 # some corner cases, but that should also prevent very pathological
810 # cases from being an issue. (eg. 20 000 candidates).
810 # cases from being an issue. (eg. 20 000 candidates).
811 #
811 #
812 # XXX note that the ordering of the group becomes important as it
812 # XXX note that the ordering of the group becomes important as it
813 # now impacts the final result. The current order is unprocessed
813 # now impacts the final result. The current order is unprocessed
814 # and can be improved.
814 # and can be improved.
815 if group_chunk_size == 0:
815 if group_chunk_size == 0:
816 tested.update(group)
816 tested.update(group)
817 good = yield tuple(group)
817 good = yield tuple(group)
818 else:
818 else:
819 prev_good = good
819 prev_good = good
820 for start in range(0, len(group), group_chunk_size):
820 for start in range(0, len(group), group_chunk_size):
821 sub_group = group[start : start + group_chunk_size]
821 sub_group = group[start : start + group_chunk_size]
822 tested.update(sub_group)
822 tested.update(sub_group)
823 good = yield tuple(sub_group)
823 good = yield tuple(sub_group)
824 if prev_good == good:
824 if prev_good == good:
825 break
825 break
826
826
827 yield None
827 yield None
828
828
829
829
830 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
830 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
831 good = None
831 good = None
832 # First we try to reuse a the delta contained in the bundle.
832 # First we try to reuse a the delta contained in the bundle.
833 # (or from the source revlog)
833 # (or from the source revlog)
834 #
834 #
835 # This logic only applies to general delta repositories and can be disabled
835 # This logic only applies to general delta repositories and can be disabled
836 # through configuration. Disabling reuse source delta is useful when
836 # through configuration. Disabling reuse source delta is useful when
837 # we want to make sure we recomputed "optimal" deltas.
837 # we want to make sure we recomputed "optimal" deltas.
838 debug_info = None
838 debug_info = None
839 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
839 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
840 # Assume what we received from the server is a good choice
840 # Assume what we received from the server is a good choice
841 # build delta will reuse the cache
841 # build delta will reuse the cache
842 if debug_info is not None:
842 if debug_info is not None:
843 debug_info['cached-delta.tested'] += 1
843 debug_info['cached-delta.tested'] += 1
844 good = yield (cachedelta[0],)
844 good = yield (cachedelta[0],)
845 if good is not None:
845 if good is not None:
846 if debug_info is not None:
846 if debug_info is not None:
847 debug_info['cached-delta.accepted'] += 1
847 debug_info['cached-delta.accepted'] += 1
848 yield None
848 yield None
849 return
849 return
850 if snapshot_cache is None:
850 if snapshot_cache is None:
851 snapshot_cache = SnapshotCache()
851 snapshot_cache = SnapshotCache()
852 groups = _rawgroups(
852 groups = _rawgroups(
853 revlog,
853 revlog,
854 p1,
854 p1,
855 p2,
855 p2,
856 cachedelta,
856 cachedelta,
857 snapshot_cache,
857 snapshot_cache,
858 )
858 )
859 for candidates in groups:
859 for candidates in groups:
860 good = yield candidates
860 good = yield candidates
861 if good is not None:
861 if good is not None:
862 break
862 break
863
863
864 # If sparse revlog is enabled, we can try to refine the available deltas
864 # If sparse revlog is enabled, we can try to refine the available deltas
865 if not revlog._sparserevlog:
865 if not revlog._sparserevlog:
866 yield None
866 yield None
867 return
867 return
868
868
869 # if we have a refinable value, try to refine it
869 # if we have a refinable value, try to refine it
870 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
870 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
871 # refine snapshot down
871 # refine snapshot down
872 previous = None
872 previous = None
873 while previous != good:
873 while previous != good:
874 previous = good
874 previous = good
875 base = revlog.deltaparent(good)
875 base = revlog.deltaparent(good)
876 if base == nullrev:
876 if base == nullrev:
877 break
877 break
878 good = yield (base,)
878 good = yield (base,)
879 # refine snapshot up
879 # refine snapshot up
880 if not snapshot_cache.snapshots:
880 if not snapshot_cache.snapshots:
881 snapshot_cache.update(revlog, good + 1)
881 snapshot_cache.update(revlog, good + 1)
882 previous = None
882 previous = None
883 while good != previous:
883 while good != previous:
884 previous = good
884 previous = good
885 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
885 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
886 good = yield children
886 good = yield children
887
887
888 if debug_info is not None:
888 if debug_info is not None:
889 if good is None:
889 if good is None:
890 debug_info['no-solution'] += 1
890 debug_info['no-solution'] += 1
891
891
892 yield None
892 yield None
893
893
894
894
895 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
895 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
896 """Provides group of revision to be tested as delta base
896 """Provides group of revision to be tested as delta base
897
897
898 This lower level function focus on emitting delta theorically interresting
898 This lower level function focus on emitting delta theorically interresting
899 without looking it any practical details.
899 without looking it any practical details.
900
900
901 The group order aims at providing fast or small candidates first.
901 The group order aims at providing fast or small candidates first.
902 """
902 """
903 # Why search for delta base if we cannot use a delta base ?
903 # Why search for delta base if we cannot use a delta base ?
904 assert revlog._generaldelta
904 assert revlog._generaldelta
905 # also see issue6056
905 # also see issue6056
906 sparse = revlog._sparserevlog
906 sparse = revlog._sparserevlog
907 curr = len(revlog)
907 curr = len(revlog)
908 prev = curr - 1
908 prev = curr - 1
909 deltachain = lambda rev: revlog._deltachain(rev)[0]
909 deltachain = lambda rev: revlog._deltachain(rev)[0]
910
910
911 # exclude already lazy tested base if any
911 # exclude already lazy tested base if any
912 parents = [p for p in (p1, p2) if p != nullrev]
912 parents = [p for p in (p1, p2) if p != nullrev]
913
913
914 if not revlog._deltabothparents and len(parents) == 2:
914 if not revlog._deltabothparents and len(parents) == 2:
915 parents.sort()
915 parents.sort()
916 # To minimize the chance of having to build a fulltext,
916 # To minimize the chance of having to build a fulltext,
917 # pick first whichever parent is closest to us (max rev)
917 # pick first whichever parent is closest to us (max rev)
918 yield (parents[1],)
918 yield (parents[1],)
919 # then the other one (min rev) if the first did not fit
919 # then the other one (min rev) if the first did not fit
920 yield (parents[0],)
920 yield (parents[0],)
921 elif len(parents) > 0:
921 elif len(parents) > 0:
922 # Test all parents (1 or 2), and keep the best candidate
922 # Test all parents (1 or 2), and keep the best candidate
923 yield parents
923 yield parents
924
924
925 if sparse and parents:
925 if sparse and parents:
926 if snapshot_cache is None:
926 if snapshot_cache is None:
927 # map: base-rev: [snapshot-revs]
927 # map: base-rev: [snapshot-revs]
928 snapshot_cache = SnapshotCache()
928 snapshot_cache = SnapshotCache()
929 # See if we can use an existing snapshot in the parent chains to use as
929 # See if we can use an existing snapshot in the parent chains to use as
930 # a base for a new intermediate-snapshot
930 # a base for a new intermediate-snapshot
931 #
931 #
932 # search for snapshot in parents delta chain
932 # search for snapshot in parents delta chain
933 # map: snapshot-level: snapshot-rev
933 # map: snapshot-level: snapshot-rev
934 parents_snaps = collections.defaultdict(set)
934 parents_snaps = collections.defaultdict(set)
935 candidate_chains = [deltachain(p) for p in parents]
935 candidate_chains = [deltachain(p) for p in parents]
936 for chain in candidate_chains:
936 for chain in candidate_chains:
937 for idx, s in enumerate(chain):
937 for idx, s in enumerate(chain):
938 if not revlog.issnapshot(s):
938 if not revlog.issnapshot(s):
939 break
939 break
940 parents_snaps[idx].add(s)
940 parents_snaps[idx].add(s)
941 snapfloor = min(parents_snaps[0]) + 1
941 snapfloor = min(parents_snaps[0]) + 1
942 snapshot_cache.update(revlog, snapfloor)
942 snapshot_cache.update(revlog, snapfloor)
943 # search for the highest "unrelated" revision
943 # search for the highest "unrelated" revision
944 #
944 #
945 # Adding snapshots used by "unrelated" revision increase the odd we
945 # Adding snapshots used by "unrelated" revision increase the odd we
946 # reuse an independant, yet better snapshot chain.
946 # reuse an independant, yet better snapshot chain.
947 #
947 #
948 # XXX instead of building a set of revisions, we could lazily enumerate
948 # XXX instead of building a set of revisions, we could lazily enumerate
949 # over the chains. That would be more efficient, however we stick to
949 # over the chains. That would be more efficient, however we stick to
950 # simple code for now.
950 # simple code for now.
951 all_revs = set()
951 all_revs = set()
952 for chain in candidate_chains:
952 for chain in candidate_chains:
953 all_revs.update(chain)
953 all_revs.update(chain)
954 other = None
954 other = None
955 for r in revlog.revs(prev, snapfloor):
955 for r in revlog.revs(prev, snapfloor):
956 if r not in all_revs:
956 if r not in all_revs:
957 other = r
957 other = r
958 break
958 break
959 if other is not None:
959 if other is not None:
960 # To avoid unfair competition, we won't use unrelated intermediate
960 # To avoid unfair competition, we won't use unrelated intermediate
961 # snapshot that are deeper than the ones from the parent delta
961 # snapshot that are deeper than the ones from the parent delta
962 # chain.
962 # chain.
963 max_depth = max(parents_snaps.keys())
963 max_depth = max(parents_snaps.keys())
964 chain = deltachain(other)
964 chain = deltachain(other)
965 for depth, s in enumerate(chain):
965 for depth, s in enumerate(chain):
966 if s < snapfloor:
966 if s < snapfloor:
967 continue
967 continue
968 if max_depth < depth:
968 if max_depth < depth:
969 break
969 break
970 if not revlog.issnapshot(s):
970 if not revlog.issnapshot(s):
971 break
971 break
972 parents_snaps[depth].add(s)
972 parents_snaps[depth].add(s)
973 # Test them as possible intermediate snapshot base
973 # Test them as possible intermediate snapshot base
974 # We test them from highest to lowest level. High level one are more
974 # We test them from highest to lowest level. High level one are more
975 # likely to result in small delta
975 # likely to result in small delta
976 floor = None
976 floor = None
977 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
977 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
978 siblings = set()
978 siblings = set()
979 for s in snaps:
979 for s in snaps:
980 siblings.update(snapshot_cache.snapshots[s])
980 siblings.update(snapshot_cache.snapshots[s])
981 # Before considering making a new intermediate snapshot, we check
981 # Before considering making a new intermediate snapshot, we check
982 # if an existing snapshot, children of base we consider, would be
982 # if an existing snapshot, children of base we consider, would be
983 # suitable.
983 # suitable.
984 #
984 #
985 # It give a change to reuse a delta chain "unrelated" to the
985 # It give a change to reuse a delta chain "unrelated" to the
986 # current revision instead of starting our own. Without such
986 # current revision instead of starting our own. Without such
987 # re-use, topological branches would keep reopening new chains.
987 # re-use, topological branches would keep reopening new chains.
988 # Creating more and more snapshot as the repository grow.
988 # Creating more and more snapshot as the repository grow.
989
989
990 if floor is not None:
990 if floor is not None:
991 # We only do this for siblings created after the one in our
991 # We only do this for siblings created after the one in our
992 # parent's delta chain. Those created before has less chances
992 # parent's delta chain. Those created before has less chances
993 # to be valid base since our ancestors had to create a new
993 # to be valid base since our ancestors had to create a new
994 # snapshot.
994 # snapshot.
995 siblings = [r for r in siblings if floor < r]
995 siblings = [r for r in siblings if floor < r]
996 yield tuple(sorted(siblings))
996 yield tuple(sorted(siblings))
997 # then test the base from our parent's delta chain.
997 # then test the base from our parent's delta chain.
998 yield tuple(sorted(snaps))
998 yield tuple(sorted(snaps))
999 floor = min(snaps)
999 floor = min(snaps)
1000 # No suitable base found in the parent chain, search if any full
1000 # No suitable base found in the parent chain, search if any full
1001 # snapshots emitted since parent's base would be a suitable base for an
1001 # snapshots emitted since parent's base would be a suitable base for an
1002 # intermediate snapshot.
1002 # intermediate snapshot.
1003 #
1003 #
1004 # It give a chance to reuse a delta chain unrelated to the current
1004 # It give a chance to reuse a delta chain unrelated to the current
1005 # revisions instead of starting our own. Without such re-use,
1005 # revisions instead of starting our own. Without such re-use,
1006 # topological branches would keep reopening new full chains. Creating
1006 # topological branches would keep reopening new full chains. Creating
1007 # more and more snapshot as the repository grow.
1007 # more and more snapshot as the repository grow.
1008 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1008 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1009 yield tuple(sorted(full))
1009 yield tuple(sorted(full))
1010
1010
1011 if not sparse:
1011 if not sparse:
1012 # other approach failed try against prev to hopefully save us a
1012 # other approach failed try against prev to hopefully save us a
1013 # fulltext.
1013 # fulltext.
1014 yield (prev,)
1014 yield (prev,)
1015
1015
1016
1016
1017 class SnapshotCache:
1017 class SnapshotCache:
1018 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1018 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1019
1019
1020 def __init__(self):
1020 def __init__(self):
1021 self.snapshots = collections.defaultdict(set)
1021 self.snapshots = collections.defaultdict(set)
1022 self._start_rev = None
1022 self._start_rev = None
1023 self._end_rev = None
1023 self._end_rev = None
1024
1024
1025 def update(self, revlog, start_rev=0):
1025 def update(self, revlog, start_rev=0):
1026 """find snapshots from start_rev to tip"""
1026 """find snapshots from start_rev to tip"""
1027 nb_revs = len(revlog)
1027 nb_revs = len(revlog)
1028 end_rev = nb_revs - 1
1028 end_rev = nb_revs - 1
1029 if start_rev > end_rev:
1029 if start_rev > end_rev:
1030 return # range is empty
1030 return # range is empty
1031
1031
1032 if self._start_rev is None:
1032 if self._start_rev is None:
1033 assert self._end_rev is None
1033 assert self._end_rev is None
1034 self._update(revlog, start_rev, end_rev)
1034 self._update(revlog, start_rev, end_rev)
1035 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1035 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1036 if start_rev < self._start_rev:
1036 if start_rev < self._start_rev:
1037 self._update(revlog, start_rev, self._start_rev - 1)
1037 self._update(revlog, start_rev, self._start_rev - 1)
1038 if self._end_rev < end_rev:
1038 if self._end_rev < end_rev:
1039 self._update(revlog, self._end_rev + 1, end_rev)
1039 self._update(revlog, self._end_rev + 1, end_rev)
1040
1040
1041 if self._start_rev is None:
1041 if self._start_rev is None:
1042 assert self._end_rev is None
1042 assert self._end_rev is None
1043 self._end_rev = end_rev
1043 self._end_rev = end_rev
1044 self._start_rev = start_rev
1044 self._start_rev = start_rev
1045 else:
1045 else:
1046 self._start_rev = min(self._start_rev, start_rev)
1046 self._start_rev = min(self._start_rev, start_rev)
1047 self._end_rev = max(self._end_rev, end_rev)
1047 self._end_rev = max(self._end_rev, end_rev)
1048 assert self._start_rev <= self._end_rev, (
1048 assert self._start_rev <= self._end_rev, (
1049 self._start_rev,
1049 self._start_rev,
1050 self._end_rev,
1050 self._end_rev,
1051 )
1051 )
1052
1052
1053 def _update(self, revlog, start_rev, end_rev):
1053 def _update(self, revlog, start_rev, end_rev):
1054 """internal method that actually do update content"""
1054 """internal method that actually do update content"""
1055 assert self._start_rev is None or (
1055 assert self._start_rev is None or (
1056 start_rev < self._start_rev or start_rev > self._end_rev
1056 start_rev < self._start_rev or start_rev > self._end_rev
1057 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1057 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1058 assert self._start_rev is None or (
1058 assert self._start_rev is None or (
1059 end_rev < self._start_rev or end_rev > self._end_rev
1059 end_rev < self._start_rev or end_rev > self._end_rev
1060 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1060 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1061 cache = self.snapshots
1061 cache = self.snapshots
1062 if hasattr(revlog.index, 'findsnapshots'):
1062 if hasattr(revlog.index, 'findsnapshots'):
1063 revlog.index.findsnapshots(cache, start_rev, end_rev)
1063 revlog.index.findsnapshots(cache, start_rev, end_rev)
1064 else:
1064 else:
1065 deltaparent = revlog.deltaparent
1065 deltaparent = revlog.deltaparent
1066 issnapshot = revlog.issnapshot
1066 issnapshot = revlog.issnapshot
1067 for rev in revlog.revs(start_rev, end_rev):
1067 for rev in revlog.revs(start_rev, end_rev):
1068 if issnapshot(rev):
1068 if issnapshot(rev):
1069 cache[deltaparent(rev)].add(rev)
1069 cache[deltaparent(rev)].add(rev)
1070
1070
1071
1071
1072 class deltacomputer:
1072 class deltacomputer:
1073 def __init__(
1073 def __init__(
1074 self,
1074 self,
1075 revlog,
1075 revlog,
1076 write_debug=None,
1076 write_debug=None,
1077 debug_search=False,
1077 debug_search=False,
1078 debug_info=None,
1078 debug_info=None,
1079 ):
1079 ):
1080 self.revlog = revlog
1080 self.revlog = revlog
1081 self._write_debug = write_debug
1081 self._write_debug = write_debug
1082 if write_debug is None:
1082 if write_debug is None:
1083 self._debug_search = False
1083 self._debug_search = False
1084 else:
1084 else:
1085 self._debug_search = debug_search
1085 self._debug_search = debug_search
1086 self._debug_info = debug_info
1086 self._debug_info = debug_info
1087 self._snapshot_cache = SnapshotCache()
1087 self._snapshot_cache = SnapshotCache()
1088
1088
1089 @property
1089 @property
1090 def _gather_debug(self):
1090 def _gather_debug(self):
1091 return self._write_debug is not None or self._debug_info is not None
1091 return self._write_debug is not None or self._debug_info is not None
1092
1092
1093 def buildtext(self, revinfo, fh):
1093 def buildtext(self, revinfo):
1094 """Builds a fulltext version of a revision
1094 """Builds a fulltext version of a revision
1095
1095
1096 revinfo: revisioninfo instance that contains all needed info
1096 revinfo: revisioninfo instance that contains all needed info
1097 fh: file handle to either the .i or the .d revlog file,
1098 depending on whether it is inlined or not
1099 """
1097 """
1100 btext = revinfo.btext
1098 btext = revinfo.btext
1101 if btext[0] is not None:
1099 if btext[0] is not None:
1102 return btext[0]
1100 return btext[0]
1103
1101
1104 revlog = self.revlog
1102 revlog = self.revlog
1105 cachedelta = revinfo.cachedelta
1103 cachedelta = revinfo.cachedelta
1106 baserev = cachedelta[0]
1104 baserev = cachedelta[0]
1107 delta = cachedelta[1]
1105 delta = cachedelta[1]
1108
1106
1109 fulltext = btext[0] = _textfromdelta(
1107 fulltext = btext[0] = _textfromdelta(
1110 fh,
1111 revlog,
1108 revlog,
1112 baserev,
1109 baserev,
1113 delta,
1110 delta,
1114 revinfo.p1,
1111 revinfo.p1,
1115 revinfo.p2,
1112 revinfo.p2,
1116 revinfo.flags,
1113 revinfo.flags,
1117 revinfo.node,
1114 revinfo.node,
1118 )
1115 )
1119 return fulltext
1116 return fulltext
1120
1117
1121 def _builddeltadiff(self, base, revinfo, fh):
1118 def _builddeltadiff(self, base, revinfo):
1122 revlog = self.revlog
1119 revlog = self.revlog
1123 t = self.buildtext(revinfo, fh)
1120 t = self.buildtext(revinfo)
1124 if revlog.iscensored(base):
1121 if revlog.iscensored(base):
1125 # deltas based on a censored revision must replace the
1122 # deltas based on a censored revision must replace the
1126 # full content in one patch, so delta works everywhere
1123 # full content in one patch, so delta works everywhere
1127 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1124 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1128 delta = header + t
1125 delta = header + t
1129 else:
1126 else:
1130 ptext = revlog.rawdata(base, _df=fh)
1127 ptext = revlog.rawdata(base)
1131 delta = mdiff.textdiff(ptext, t)
1128 delta = mdiff.textdiff(ptext, t)
1132
1129
1133 return delta
1130 return delta
1134
1131
1135 def _builddeltainfo(self, revinfo, base, fh, target_rev=None):
1132 def _builddeltainfo(self, revinfo, base, target_rev=None):
1136 # can we use the cached delta?
1133 # can we use the cached delta?
1137 revlog = self.revlog
1134 revlog = self.revlog
1138 chainbase = revlog.chainbase(base)
1135 chainbase = revlog.chainbase(base)
1139 if revlog._generaldelta:
1136 if revlog._generaldelta:
1140 deltabase = base
1137 deltabase = base
1141 else:
1138 else:
1142 if target_rev is not None and base != target_rev - 1:
1139 if target_rev is not None and base != target_rev - 1:
1143 msg = (
1140 msg = (
1144 b'general delta cannot use delta for something else '
1141 b'general delta cannot use delta for something else '
1145 b'than `prev`: %d<-%d'
1142 b'than `prev`: %d<-%d'
1146 )
1143 )
1147 msg %= (base, target_rev)
1144 msg %= (base, target_rev)
1148 raise error.ProgrammingError(msg)
1145 raise error.ProgrammingError(msg)
1149 deltabase = chainbase
1146 deltabase = chainbase
1150 snapshotdepth = None
1147 snapshotdepth = None
1151 if revlog._sparserevlog and deltabase == nullrev:
1148 if revlog._sparserevlog and deltabase == nullrev:
1152 snapshotdepth = 0
1149 snapshotdepth = 0
1153 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1150 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1154 # A delta chain should always be one full snapshot,
1151 # A delta chain should always be one full snapshot,
1155 # zero or more semi-snapshots, and zero or more deltas
1152 # zero or more semi-snapshots, and zero or more deltas
1156 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1153 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1157 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1154 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1158 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1155 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1159 delta = None
1156 delta = None
1160 if revinfo.cachedelta:
1157 if revinfo.cachedelta:
1161 cachebase = revinfo.cachedelta[0]
1158 cachebase = revinfo.cachedelta[0]
1162 # check if the diff still apply
1159 # check if the diff still apply
1163 currentbase = cachebase
1160 currentbase = cachebase
1164 while (
1161 while (
1165 currentbase != nullrev
1162 currentbase != nullrev
1166 and currentbase != base
1163 and currentbase != base
1167 and self.revlog.length(currentbase) == 0
1164 and self.revlog.length(currentbase) == 0
1168 ):
1165 ):
1169 currentbase = self.revlog.deltaparent(currentbase)
1166 currentbase = self.revlog.deltaparent(currentbase)
1170 if self.revlog._lazydelta and currentbase == base:
1167 if self.revlog._lazydelta and currentbase == base:
1171 delta = revinfo.cachedelta[1]
1168 delta = revinfo.cachedelta[1]
1172 if delta is None:
1169 if delta is None:
1173 delta = self._builddeltadiff(base, revinfo, fh)
1170 delta = self._builddeltadiff(base, revinfo)
1174 if self._debug_search:
1171 if self._debug_search:
1175 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1172 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1176 msg %= len(delta)
1173 msg %= len(delta)
1177 self._write_debug(msg)
1174 self._write_debug(msg)
1178 # snapshotdept need to be neither None nor 0 level snapshot
1175 # snapshotdept need to be neither None nor 0 level snapshot
1179 if revlog.upperboundcomp is not None and snapshotdepth:
1176 if revlog.upperboundcomp is not None and snapshotdepth:
1180 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1177 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1181 snapshotlimit = revinfo.textlen >> snapshotdepth
1178 snapshotlimit = revinfo.textlen >> snapshotdepth
1182 if self._debug_search:
1179 if self._debug_search:
1183 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1180 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1184 msg %= lowestrealisticdeltalen
1181 msg %= lowestrealisticdeltalen
1185 self._write_debug(msg)
1182 self._write_debug(msg)
1186 if snapshotlimit < lowestrealisticdeltalen:
1183 if snapshotlimit < lowestrealisticdeltalen:
1187 if self._debug_search:
1184 if self._debug_search:
1188 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1185 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1189 self._write_debug(msg)
1186 self._write_debug(msg)
1190 return None
1187 return None
1191 if revlog.length(base) < lowestrealisticdeltalen:
1188 if revlog.length(base) < lowestrealisticdeltalen:
1192 if self._debug_search:
1189 if self._debug_search:
1193 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1190 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1194 self._write_debug(msg)
1191 self._write_debug(msg)
1195 return None
1192 return None
1196 header, data = revlog.compress(delta)
1193 header, data = revlog.compress(delta)
1197 deltalen = len(header) + len(data)
1194 deltalen = len(header) + len(data)
1198 offset = revlog.end(len(revlog) - 1)
1195 offset = revlog.end(len(revlog) - 1)
1199 dist = deltalen + offset - revlog.start(chainbase)
1196 dist = deltalen + offset - revlog.start(chainbase)
1200 chainlen, compresseddeltalen = revlog._chaininfo(base)
1197 chainlen, compresseddeltalen = revlog._chaininfo(base)
1201 chainlen += 1
1198 chainlen += 1
1202 compresseddeltalen += deltalen
1199 compresseddeltalen += deltalen
1203
1200
1204 return _deltainfo(
1201 return _deltainfo(
1205 dist,
1202 dist,
1206 deltalen,
1203 deltalen,
1207 (header, data),
1204 (header, data),
1208 deltabase,
1205 deltabase,
1209 chainbase,
1206 chainbase,
1210 chainlen,
1207 chainlen,
1211 compresseddeltalen,
1208 compresseddeltalen,
1212 snapshotdepth,
1209 snapshotdepth,
1213 )
1210 )
1214
1211
1215 def _fullsnapshotinfo(self, fh, revinfo, curr):
1212 def _fullsnapshotinfo(self, revinfo, curr):
1216 rawtext = self.buildtext(revinfo, fh)
1213 rawtext = self.buildtext(revinfo)
1217 data = self.revlog.compress(rawtext)
1214 data = self.revlog.compress(rawtext)
1218 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1215 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1219 deltabase = chainbase = curr
1216 deltabase = chainbase = curr
1220 snapshotdepth = 0
1217 snapshotdepth = 0
1221 chainlen = 1
1218 chainlen = 1
1222
1219
1223 return _deltainfo(
1220 return _deltainfo(
1224 dist,
1221 dist,
1225 deltalen,
1222 deltalen,
1226 data,
1223 data,
1227 deltabase,
1224 deltabase,
1228 chainbase,
1225 chainbase,
1229 chainlen,
1226 chainlen,
1230 compresseddeltalen,
1227 compresseddeltalen,
1231 snapshotdepth,
1228 snapshotdepth,
1232 )
1229 )
1233
1230
1234 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1231 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1235 """Find an acceptable delta against a candidate revision
1232 """Find an acceptable delta against a candidate revision
1236
1233
1237 revinfo: information about the revision (instance of _revisioninfo)
1234 revinfo: information about the revision (instance of _revisioninfo)
1238 fh: file handle to either the .i or the .d revlog file,
1239 depending on whether it is inlined or not
1240
1235
1241 Returns the first acceptable candidate revision, as ordered by
1236 Returns the first acceptable candidate revision, as ordered by
1242 _candidategroups
1237 _candidategroups
1243
1238
1244 If no suitable deltabase is found, we return delta info for a full
1239 If no suitable deltabase is found, we return delta info for a full
1245 snapshot.
1240 snapshot.
1246
1241
1247 `excluded_bases` is an optional set of revision that cannot be used as
1242 `excluded_bases` is an optional set of revision that cannot be used as
1248 a delta base. Use this to recompute delta suitable in censor or strip
1243 a delta base. Use this to recompute delta suitable in censor or strip
1249 context.
1244 context.
1250 """
1245 """
1251 if target_rev is None:
1246 if target_rev is None:
1252 target_rev = len(self.revlog)
1247 target_rev = len(self.revlog)
1253
1248
1254 gather_debug = self._gather_debug
1249 gather_debug = self._gather_debug
1255 cachedelta = revinfo.cachedelta
1250 cachedelta = revinfo.cachedelta
1256 revlog = self.revlog
1251 revlog = self.revlog
1257 p1r = p2r = None
1252 p1r = p2r = None
1258
1253
1259 if excluded_bases is None:
1254 if excluded_bases is None:
1260 excluded_bases = set()
1255 excluded_bases = set()
1261
1256
1262 if gather_debug:
1257 if gather_debug:
1263 start = util.timer()
1258 start = util.timer()
1264 dbg = self._one_dbg_data()
1259 dbg = self._one_dbg_data()
1265 dbg['revision'] = target_rev
1260 dbg['revision'] = target_rev
1266 target_revlog = b"UNKNOWN"
1261 target_revlog = b"UNKNOWN"
1267 target_type = self.revlog.target[0]
1262 target_type = self.revlog.target[0]
1268 target_key = self.revlog.target[1]
1263 target_key = self.revlog.target[1]
1269 if target_type == KIND_CHANGELOG:
1264 if target_type == KIND_CHANGELOG:
1270 target_revlog = b'CHANGELOG:'
1265 target_revlog = b'CHANGELOG:'
1271 elif target_type == KIND_MANIFESTLOG:
1266 elif target_type == KIND_MANIFESTLOG:
1272 target_revlog = b'MANIFESTLOG:'
1267 target_revlog = b'MANIFESTLOG:'
1273 if target_key:
1268 if target_key:
1274 target_revlog += b'%s:' % target_key
1269 target_revlog += b'%s:' % target_key
1275 elif target_type == KIND_FILELOG:
1270 elif target_type == KIND_FILELOG:
1276 target_revlog = b'FILELOG:'
1271 target_revlog = b'FILELOG:'
1277 if target_key:
1272 if target_key:
1278 target_revlog += b'%s:' % target_key
1273 target_revlog += b'%s:' % target_key
1279 dbg['target-revlog'] = target_revlog
1274 dbg['target-revlog'] = target_revlog
1280 p1r = revlog.rev(revinfo.p1)
1275 p1r = revlog.rev(revinfo.p1)
1281 p2r = revlog.rev(revinfo.p2)
1276 p2r = revlog.rev(revinfo.p2)
1282 if p1r != nullrev:
1277 if p1r != nullrev:
1283 p1_chain_len = revlog._chaininfo(p1r)[0]
1278 p1_chain_len = revlog._chaininfo(p1r)[0]
1284 else:
1279 else:
1285 p1_chain_len = -1
1280 p1_chain_len = -1
1286 if p2r != nullrev:
1281 if p2r != nullrev:
1287 p2_chain_len = revlog._chaininfo(p2r)[0]
1282 p2_chain_len = revlog._chaininfo(p2r)[0]
1288 else:
1283 else:
1289 p2_chain_len = -1
1284 p2_chain_len = -1
1290 dbg['p1-chain-len'] = p1_chain_len
1285 dbg['p1-chain-len'] = p1_chain_len
1291 dbg['p2-chain-len'] = p2_chain_len
1286 dbg['p2-chain-len'] = p2_chain_len
1292
1287
1293 # 1) if the revision is empty, no amount of delta can beat it
1288 # 1) if the revision is empty, no amount of delta can beat it
1294 #
1289 #
1295 # 2) no delta for flag processor revision (see "candelta" for why)
1290 # 2) no delta for flag processor revision (see "candelta" for why)
1296 # not calling candelta since only one revision needs test, also to
1291 # not calling candelta since only one revision needs test, also to
1297 # avoid overhead fetching flags again.
1292 # avoid overhead fetching flags again.
1298 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1293 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1299 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1294 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1300 if gather_debug:
1295 if gather_debug:
1301 end = util.timer()
1296 end = util.timer()
1302 dbg['duration'] = end - start
1297 dbg['duration'] = end - start
1303 dbg[
1298 dbg[
1304 'delta-base'
1299 'delta-base'
1305 ] = deltainfo.base # pytype: disable=attribute-error
1300 ] = deltainfo.base # pytype: disable=attribute-error
1306 dbg['search_round_count'] = 0
1301 dbg['search_round_count'] = 0
1307 dbg['using-cached-base'] = False
1302 dbg['using-cached-base'] = False
1308 dbg['delta_try_count'] = 0
1303 dbg['delta_try_count'] = 0
1309 dbg['type'] = b"full"
1304 dbg['type'] = b"full"
1310 dbg['snapshot-depth'] = 0
1305 dbg['snapshot-depth'] = 0
1311 self._dbg_process_data(dbg)
1306 self._dbg_process_data(dbg)
1312 return deltainfo
1307 return deltainfo
1313
1308
1314 deltainfo = None
1309 deltainfo = None
1315
1310
1316 # If this source delta are to be forcibly reuse, let us comply early.
1311 # If this source delta are to be forcibly reuse, let us comply early.
1317 if (
1312 if (
1318 revlog._generaldelta
1313 revlog._generaldelta
1319 and revinfo.cachedelta is not None
1314 and revinfo.cachedelta is not None
1320 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1315 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1321 ):
1316 ):
1322 base = revinfo.cachedelta[0]
1317 base = revinfo.cachedelta[0]
1323 if base == nullrev:
1318 if base == nullrev:
1324 dbg_type = b"full"
1319 dbg_type = b"full"
1325 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1320 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1326 if gather_debug:
1321 if gather_debug:
1327 snapshotdepth = 0
1322 snapshotdepth = 0
1328 elif base not in excluded_bases:
1323 elif base not in excluded_bases:
1329 delta = revinfo.cachedelta[1]
1324 delta = revinfo.cachedelta[1]
1330 header, data = revlog.compress(delta)
1325 header, data = revlog.compress(delta)
1331 deltalen = len(header) + len(data)
1326 deltalen = len(header) + len(data)
1332 if gather_debug:
1327 if gather_debug:
1333 offset = revlog.end(len(revlog) - 1)
1328 offset = revlog.end(len(revlog) - 1)
1334 chainbase = revlog.chainbase(base)
1329 chainbase = revlog.chainbase(base)
1335 distance = deltalen + offset - revlog.start(chainbase)
1330 distance = deltalen + offset - revlog.start(chainbase)
1336 chainlen, compresseddeltalen = revlog._chaininfo(base)
1331 chainlen, compresseddeltalen = revlog._chaininfo(base)
1337 chainlen += 1
1332 chainlen += 1
1338 compresseddeltalen += deltalen
1333 compresseddeltalen += deltalen
1339 if base == p1r or base == p2r:
1334 if base == p1r or base == p2r:
1340 dbg_type = b"delta"
1335 dbg_type = b"delta"
1341 snapshotdepth = None
1336 snapshotdepth = None
1342 elif not revlog.issnapshot(base):
1337 elif not revlog.issnapshot(base):
1343 snapshotdepth = None
1338 snapshotdepth = None
1344 else:
1339 else:
1345 dbg_type = b"snapshot"
1340 dbg_type = b"snapshot"
1346 snapshotdepth = revlog.snapshotdepth(base) + 1
1341 snapshotdepth = revlog.snapshotdepth(base) + 1
1347 else:
1342 else:
1348 distance = None
1343 distance = None
1349 chainbase = None
1344 chainbase = None
1350 chainlen = None
1345 chainlen = None
1351 compresseddeltalen = None
1346 compresseddeltalen = None
1352 snapshotdepth = None
1347 snapshotdepth = None
1353 deltainfo = _deltainfo(
1348 deltainfo = _deltainfo(
1354 distance=distance,
1349 distance=distance,
1355 deltalen=deltalen,
1350 deltalen=deltalen,
1356 data=(header, data),
1351 data=(header, data),
1357 base=base,
1352 base=base,
1358 chainbase=chainbase,
1353 chainbase=chainbase,
1359 chainlen=chainlen,
1354 chainlen=chainlen,
1360 compresseddeltalen=compresseddeltalen,
1355 compresseddeltalen=compresseddeltalen,
1361 snapshotdepth=snapshotdepth,
1356 snapshotdepth=snapshotdepth,
1362 )
1357 )
1363
1358
1364 if deltainfo is not None:
1359 if deltainfo is not None:
1365 if gather_debug:
1360 if gather_debug:
1366 end = util.timer()
1361 end = util.timer()
1367 dbg['duration'] = end - start
1362 dbg['duration'] = end - start
1368 dbg[
1363 dbg[
1369 'delta-base'
1364 'delta-base'
1370 ] = deltainfo.base # pytype: disable=attribute-error
1365 ] = deltainfo.base # pytype: disable=attribute-error
1371 dbg['search_round_count'] = 0
1366 dbg['search_round_count'] = 0
1372 dbg['using-cached-base'] = True
1367 dbg['using-cached-base'] = True
1373 dbg['delta_try_count'] = 0
1368 dbg['delta_try_count'] = 0
1374 dbg['type'] = b"full"
1369 dbg['type'] = b"full"
1375 if snapshotdepth is None:
1370 if snapshotdepth is None:
1376 dbg['snapshot-depth'] = 0
1371 dbg['snapshot-depth'] = 0
1377 else:
1372 else:
1378 dbg['snapshot-depth'] = snapshotdepth
1373 dbg['snapshot-depth'] = snapshotdepth
1379 self._dbg_process_data(dbg)
1374 self._dbg_process_data(dbg)
1380 return deltainfo
1375 return deltainfo
1381
1376
1382 # count the number of different delta we tried (for debug purpose)
1377 # count the number of different delta we tried (for debug purpose)
1383 dbg_try_count = 0
1378 dbg_try_count = 0
1384 # count the number of "search round" we did. (for debug purpose)
1379 # count the number of "search round" we did. (for debug purpose)
1385 dbg_try_rounds = 0
1380 dbg_try_rounds = 0
1386 dbg_type = b'unknown'
1381 dbg_type = b'unknown'
1387
1382
1388 if p1r is None:
1383 if p1r is None:
1389 p1r = revlog.rev(revinfo.p1)
1384 p1r = revlog.rev(revinfo.p1)
1390 p2r = revlog.rev(revinfo.p2)
1385 p2r = revlog.rev(revinfo.p2)
1391
1386
1392 if self._debug_search:
1387 if self._debug_search:
1393 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1388 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1394 msg %= target_rev
1389 msg %= target_rev
1395 self._write_debug(msg)
1390 self._write_debug(msg)
1396
1391
1397 groups = _candidategroups(
1392 groups = _candidategroups(
1398 self.revlog,
1393 self.revlog,
1399 revinfo.textlen,
1394 revinfo.textlen,
1400 p1r,
1395 p1r,
1401 p2r,
1396 p2r,
1402 cachedelta,
1397 cachedelta,
1403 excluded_bases,
1398 excluded_bases,
1404 target_rev,
1399 target_rev,
1405 snapshot_cache=self._snapshot_cache,
1400 snapshot_cache=self._snapshot_cache,
1406 )
1401 )
1407 candidaterevs = next(groups)
1402 candidaterevs = next(groups)
1408 while candidaterevs is not None:
1403 while candidaterevs is not None:
1409 dbg_try_rounds += 1
1404 dbg_try_rounds += 1
1410 if self._debug_search:
1405 if self._debug_search:
1411 prev = None
1406 prev = None
1412 if deltainfo is not None:
1407 if deltainfo is not None:
1413 prev = deltainfo.base
1408 prev = deltainfo.base
1414
1409
1415 if (
1410 if (
1416 cachedelta is not None
1411 cachedelta is not None
1417 and len(candidaterevs) == 1
1412 and len(candidaterevs) == 1
1418 and cachedelta[0] in candidaterevs
1413 and cachedelta[0] in candidaterevs
1419 ):
1414 ):
1420 round_type = b"cached-delta"
1415 round_type = b"cached-delta"
1421 elif p1r in candidaterevs or p2r in candidaterevs:
1416 elif p1r in candidaterevs or p2r in candidaterevs:
1422 round_type = b"parents"
1417 round_type = b"parents"
1423 elif prev is not None and all(c < prev for c in candidaterevs):
1418 elif prev is not None and all(c < prev for c in candidaterevs):
1424 round_type = b"refine-down"
1419 round_type = b"refine-down"
1425 elif prev is not None and all(c > prev for c in candidaterevs):
1420 elif prev is not None and all(c > prev for c in candidaterevs):
1426 round_type = b"refine-up"
1421 round_type = b"refine-up"
1427 else:
1422 else:
1428 round_type = b"search-down"
1423 round_type = b"search-down"
1429 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1424 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1430 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1425 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1431 self._write_debug(msg)
1426 self._write_debug(msg)
1432 nominateddeltas = []
1427 nominateddeltas = []
1433 if deltainfo is not None:
1428 if deltainfo is not None:
1434 if self._debug_search:
1429 if self._debug_search:
1435 msg = (
1430 msg = (
1436 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1431 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1437 )
1432 )
1438 msg %= (deltainfo.base, deltainfo.deltalen)
1433 msg %= (deltainfo.base, deltainfo.deltalen)
1439 self._write_debug(msg)
1434 self._write_debug(msg)
1440 # if we already found a good delta,
1435 # if we already found a good delta,
1441 # challenge it against refined candidates
1436 # challenge it against refined candidates
1442 nominateddeltas.append(deltainfo)
1437 nominateddeltas.append(deltainfo)
1443 for candidaterev in candidaterevs:
1438 for candidaterev in candidaterevs:
1444 if self._debug_search:
1439 if self._debug_search:
1445 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1440 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1446 msg %= candidaterev
1441 msg %= candidaterev
1447 self._write_debug(msg)
1442 self._write_debug(msg)
1448 candidate_type = None
1443 candidate_type = None
1449 if candidaterev == p1r:
1444 if candidaterev == p1r:
1450 candidate_type = b"p1"
1445 candidate_type = b"p1"
1451 elif candidaterev == p2r:
1446 elif candidaterev == p2r:
1452 candidate_type = b"p2"
1447 candidate_type = b"p2"
1453 elif self.revlog.issnapshot(candidaterev):
1448 elif self.revlog.issnapshot(candidaterev):
1454 candidate_type = b"snapshot-%d"
1449 candidate_type = b"snapshot-%d"
1455 candidate_type %= self.revlog.snapshotdepth(
1450 candidate_type %= self.revlog.snapshotdepth(
1456 candidaterev
1451 candidaterev
1457 )
1452 )
1458
1453
1459 if candidate_type is not None:
1454 if candidate_type is not None:
1460 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1455 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1461 msg %= candidate_type
1456 msg %= candidate_type
1462 self._write_debug(msg)
1457 self._write_debug(msg)
1463 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1458 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1464 msg %= self.revlog.length(candidaterev)
1459 msg %= self.revlog.length(candidaterev)
1465 self._write_debug(msg)
1460 self._write_debug(msg)
1466 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1461 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1467 msg %= self.revlog.deltaparent(candidaterev)
1462 msg %= self.revlog.deltaparent(candidaterev)
1468 self._write_debug(msg)
1463 self._write_debug(msg)
1469
1464
1470 dbg_try_count += 1
1465 dbg_try_count += 1
1471
1466
1472 if self._debug_search:
1467 if self._debug_search:
1473 delta_start = util.timer()
1468 delta_start = util.timer()
1474 candidatedelta = self._builddeltainfo(
1469 candidatedelta = self._builddeltainfo(
1475 revinfo,
1470 revinfo,
1476 candidaterev,
1471 candidaterev,
1477 fh,
1478 target_rev=target_rev,
1472 target_rev=target_rev,
1479 )
1473 )
1480 if self._debug_search:
1474 if self._debug_search:
1481 delta_end = util.timer()
1475 delta_end = util.timer()
1482 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1476 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1483 msg %= delta_end - delta_start
1477 msg %= delta_end - delta_start
1484 self._write_debug(msg)
1478 self._write_debug(msg)
1485 if candidatedelta is not None:
1479 if candidatedelta is not None:
1486 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1480 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1487 if self._debug_search:
1481 if self._debug_search:
1488 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1482 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1489 msg %= candidatedelta.deltalen
1483 msg %= candidatedelta.deltalen
1490 self._write_debug(msg)
1484 self._write_debug(msg)
1491 nominateddeltas.append(candidatedelta)
1485 nominateddeltas.append(candidatedelta)
1492 elif self._debug_search:
1486 elif self._debug_search:
1493 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1487 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1494 msg %= candidatedelta.deltalen
1488 msg %= candidatedelta.deltalen
1495 self._write_debug(msg)
1489 self._write_debug(msg)
1496 elif self._debug_search:
1490 elif self._debug_search:
1497 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1491 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1498 self._write_debug(msg)
1492 self._write_debug(msg)
1499 if nominateddeltas:
1493 if nominateddeltas:
1500 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1494 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1501 if deltainfo is not None:
1495 if deltainfo is not None:
1502 candidaterevs = groups.send(deltainfo.base)
1496 candidaterevs = groups.send(deltainfo.base)
1503 else:
1497 else:
1504 candidaterevs = next(groups)
1498 candidaterevs = next(groups)
1505
1499
1506 if deltainfo is None:
1500 if deltainfo is None:
1507 dbg_type = b"full"
1501 dbg_type = b"full"
1508 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1502 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1509 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1503 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1510 dbg_type = b"snapshot"
1504 dbg_type = b"snapshot"
1511 else:
1505 else:
1512 dbg_type = b"delta"
1506 dbg_type = b"delta"
1513
1507
1514 if gather_debug:
1508 if gather_debug:
1515 end = util.timer()
1509 end = util.timer()
1516 if dbg_type == b'full':
1510 if dbg_type == b'full':
1517 used_cached = (
1511 used_cached = (
1518 cachedelta is not None
1512 cachedelta is not None
1519 and dbg_try_rounds == 0
1513 and dbg_try_rounds == 0
1520 and dbg_try_count == 0
1514 and dbg_try_count == 0
1521 and cachedelta[0] == nullrev
1515 and cachedelta[0] == nullrev
1522 )
1516 )
1523 else:
1517 else:
1524 used_cached = (
1518 used_cached = (
1525 cachedelta is not None
1519 cachedelta is not None
1526 and dbg_try_rounds == 1
1520 and dbg_try_rounds == 1
1527 and dbg_try_count == 1
1521 and dbg_try_count == 1
1528 and deltainfo.base == cachedelta[0]
1522 and deltainfo.base == cachedelta[0]
1529 )
1523 )
1530 dbg['duration'] = end - start
1524 dbg['duration'] = end - start
1531 dbg[
1525 dbg[
1532 'delta-base'
1526 'delta-base'
1533 ] = deltainfo.base # pytype: disable=attribute-error
1527 ] = deltainfo.base # pytype: disable=attribute-error
1534 dbg['search_round_count'] = dbg_try_rounds
1528 dbg['search_round_count'] = dbg_try_rounds
1535 dbg['using-cached-base'] = used_cached
1529 dbg['using-cached-base'] = used_cached
1536 dbg['delta_try_count'] = dbg_try_count
1530 dbg['delta_try_count'] = dbg_try_count
1537 dbg['type'] = dbg_type
1531 dbg['type'] = dbg_type
1538 if (
1532 if (
1539 deltainfo.snapshotdepth # pytype: disable=attribute-error
1533 deltainfo.snapshotdepth # pytype: disable=attribute-error
1540 is not None
1534 is not None
1541 ):
1535 ):
1542 dbg[
1536 dbg[
1543 'snapshot-depth'
1537 'snapshot-depth'
1544 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1538 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1545 else:
1539 else:
1546 dbg['snapshot-depth'] = 0
1540 dbg['snapshot-depth'] = 0
1547 self._dbg_process_data(dbg)
1541 self._dbg_process_data(dbg)
1548 return deltainfo
1542 return deltainfo
1549
1543
1550 def _one_dbg_data(self):
1544 def _one_dbg_data(self):
1551 return {
1545 return {
1552 'duration': None,
1546 'duration': None,
1553 'revision': None,
1547 'revision': None,
1554 'delta-base': None,
1548 'delta-base': None,
1555 'search_round_count': None,
1549 'search_round_count': None,
1556 'using-cached-base': None,
1550 'using-cached-base': None,
1557 'delta_try_count': None,
1551 'delta_try_count': None,
1558 'type': None,
1552 'type': None,
1559 'p1-chain-len': None,
1553 'p1-chain-len': None,
1560 'p2-chain-len': None,
1554 'p2-chain-len': None,
1561 'snapshot-depth': None,
1555 'snapshot-depth': None,
1562 'target-revlog': None,
1556 'target-revlog': None,
1563 }
1557 }
1564
1558
1565 def _dbg_process_data(self, dbg):
1559 def _dbg_process_data(self, dbg):
1566 if self._debug_info is not None:
1560 if self._debug_info is not None:
1567 self._debug_info.append(dbg)
1561 self._debug_info.append(dbg)
1568
1562
1569 if self._write_debug is not None:
1563 if self._write_debug is not None:
1570 msg = (
1564 msg = (
1571 b"DBG-DELTAS:"
1565 b"DBG-DELTAS:"
1572 b" %-12s"
1566 b" %-12s"
1573 b" rev=%d:"
1567 b" rev=%d:"
1574 b" delta-base=%d"
1568 b" delta-base=%d"
1575 b" is-cached=%d"
1569 b" is-cached=%d"
1576 b" - search-rounds=%d"
1570 b" - search-rounds=%d"
1577 b" try-count=%d"
1571 b" try-count=%d"
1578 b" - delta-type=%-6s"
1572 b" - delta-type=%-6s"
1579 b" snap-depth=%d"
1573 b" snap-depth=%d"
1580 b" - p1-chain-length=%d"
1574 b" - p1-chain-length=%d"
1581 b" p2-chain-length=%d"
1575 b" p2-chain-length=%d"
1582 b" - duration=%f"
1576 b" - duration=%f"
1583 b"\n"
1577 b"\n"
1584 )
1578 )
1585 msg %= (
1579 msg %= (
1586 dbg["target-revlog"],
1580 dbg["target-revlog"],
1587 dbg["revision"],
1581 dbg["revision"],
1588 dbg["delta-base"],
1582 dbg["delta-base"],
1589 dbg["using-cached-base"],
1583 dbg["using-cached-base"],
1590 dbg["search_round_count"],
1584 dbg["search_round_count"],
1591 dbg["delta_try_count"],
1585 dbg["delta_try_count"],
1592 dbg["type"],
1586 dbg["type"],
1593 dbg["snapshot-depth"],
1587 dbg["snapshot-depth"],
1594 dbg["p1-chain-len"],
1588 dbg["p1-chain-len"],
1595 dbg["p2-chain-len"],
1589 dbg["p2-chain-len"],
1596 dbg["duration"],
1590 dbg["duration"],
1597 )
1591 )
1598 self._write_debug(msg)
1592 self._write_debug(msg)
1599
1593
1600
1594
1601 def delta_compression(default_compression_header, deltainfo):
1595 def delta_compression(default_compression_header, deltainfo):
1602 """return (COMPRESSION_MODE, deltainfo)
1596 """return (COMPRESSION_MODE, deltainfo)
1603
1597
1604 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1598 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1605 compression.
1599 compression.
1606 """
1600 """
1607 h, d = deltainfo.data
1601 h, d = deltainfo.data
1608 compression_mode = COMP_MODE_INLINE
1602 compression_mode = COMP_MODE_INLINE
1609 if not h and not d:
1603 if not h and not d:
1610 # not data to store at all... declare them uncompressed
1604 # not data to store at all... declare them uncompressed
1611 compression_mode = COMP_MODE_PLAIN
1605 compression_mode = COMP_MODE_PLAIN
1612 elif not h:
1606 elif not h:
1613 t = d[0:1]
1607 t = d[0:1]
1614 if t == b'\0':
1608 if t == b'\0':
1615 compression_mode = COMP_MODE_PLAIN
1609 compression_mode = COMP_MODE_PLAIN
1616 elif t == default_compression_header:
1610 elif t == default_compression_header:
1617 compression_mode = COMP_MODE_DEFAULT
1611 compression_mode = COMP_MODE_DEFAULT
1618 elif h == b'u':
1612 elif h == b'u':
1619 # we have a more efficient way to declare uncompressed
1613 # we have a more efficient way to declare uncompressed
1620 h = b''
1614 h = b''
1621 compression_mode = COMP_MODE_PLAIN
1615 compression_mode = COMP_MODE_PLAIN
1622 deltainfo = drop_u_compression(deltainfo)
1616 deltainfo = drop_u_compression(deltainfo)
1623 return compression_mode, deltainfo
1617 return compression_mode, deltainfo
@@ -1,881 +1,875 b''
1 # censor code related to censoring revision
1 # censor code related to censoring revision
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
5 # Copyright 2015 Google, Inc <martinvonz@google.com>
5 # Copyright 2015 Google, Inc <martinvonz@google.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 import binascii
10 import binascii
11 import contextlib
11 import contextlib
12 import os
12 import os
13 import struct
13 import struct
14
14
15 from ..node import (
15 from ..node import (
16 nullrev,
16 nullrev,
17 )
17 )
18 from .constants import (
18 from .constants import (
19 COMP_MODE_PLAIN,
19 COMP_MODE_PLAIN,
20 ENTRY_DATA_COMPRESSED_LENGTH,
20 ENTRY_DATA_COMPRESSED_LENGTH,
21 ENTRY_DATA_COMPRESSION_MODE,
21 ENTRY_DATA_COMPRESSION_MODE,
22 ENTRY_DATA_OFFSET,
22 ENTRY_DATA_OFFSET,
23 ENTRY_DATA_UNCOMPRESSED_LENGTH,
23 ENTRY_DATA_UNCOMPRESSED_LENGTH,
24 ENTRY_DELTA_BASE,
24 ENTRY_DELTA_BASE,
25 ENTRY_LINK_REV,
25 ENTRY_LINK_REV,
26 ENTRY_NODE_ID,
26 ENTRY_NODE_ID,
27 ENTRY_PARENT_1,
27 ENTRY_PARENT_1,
28 ENTRY_PARENT_2,
28 ENTRY_PARENT_2,
29 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
29 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
30 ENTRY_SIDEDATA_COMPRESSION_MODE,
30 ENTRY_SIDEDATA_COMPRESSION_MODE,
31 ENTRY_SIDEDATA_OFFSET,
31 ENTRY_SIDEDATA_OFFSET,
32 REVIDX_ISCENSORED,
32 REVIDX_ISCENSORED,
33 REVLOGV0,
33 REVLOGV0,
34 REVLOGV1,
34 REVLOGV1,
35 )
35 )
36 from ..i18n import _
36 from ..i18n import _
37
37
38 from .. import (
38 from .. import (
39 error,
39 error,
40 mdiff,
40 mdiff,
41 pycompat,
41 pycompat,
42 revlogutils,
42 revlogutils,
43 util,
43 util,
44 )
44 )
45 from ..utils import (
45 from ..utils import (
46 storageutil,
46 storageutil,
47 )
47 )
48 from . import (
48 from . import (
49 constants,
49 constants,
50 deltas,
50 deltas,
51 )
51 )
52
52
53
53
54 def v1_censor(rl, tr, censornode, tombstone=b''):
54 def v1_censor(rl, tr, censornode, tombstone=b''):
55 """censors a revision in a "version 1" revlog"""
55 """censors a revision in a "version 1" revlog"""
56 assert rl._format_version == constants.REVLOGV1, rl._format_version
56 assert rl._format_version == constants.REVLOGV1, rl._format_version
57
57
58 # avoid cycle
58 # avoid cycle
59 from .. import revlog
59 from .. import revlog
60
60
61 censorrev = rl.rev(censornode)
61 censorrev = rl.rev(censornode)
62 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
62 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
63
63
64 # Rewriting the revlog in place is hard. Our strategy for censoring is
64 # Rewriting the revlog in place is hard. Our strategy for censoring is
65 # to create a new revlog, copy all revisions to it, then replace the
65 # to create a new revlog, copy all revisions to it, then replace the
66 # revlogs on transaction close.
66 # revlogs on transaction close.
67 #
67 #
68 # This is a bit dangerous. We could easily have a mismatch of state.
68 # This is a bit dangerous. We could easily have a mismatch of state.
69 newrl = revlog.revlog(
69 newrl = revlog.revlog(
70 rl.opener,
70 rl.opener,
71 target=rl.target,
71 target=rl.target,
72 radix=rl.radix,
72 radix=rl.radix,
73 postfix=b'tmpcensored',
73 postfix=b'tmpcensored',
74 censorable=True,
74 censorable=True,
75 )
75 )
76 newrl._format_version = rl._format_version
76 newrl._format_version = rl._format_version
77 newrl._format_flags = rl._format_flags
77 newrl._format_flags = rl._format_flags
78 newrl._generaldelta = rl._generaldelta
78 newrl._generaldelta = rl._generaldelta
79 newrl._parse_index = rl._parse_index
79 newrl._parse_index = rl._parse_index
80
80
81 for rev in rl.revs():
81 for rev in rl.revs():
82 node = rl.node(rev)
82 node = rl.node(rev)
83 p1, p2 = rl.parents(node)
83 p1, p2 = rl.parents(node)
84
84
85 if rev == censorrev:
85 if rev == censorrev:
86 newrl.addrawrevision(
86 newrl.addrawrevision(
87 tombstone,
87 tombstone,
88 tr,
88 tr,
89 rl.linkrev(censorrev),
89 rl.linkrev(censorrev),
90 p1,
90 p1,
91 p2,
91 p2,
92 censornode,
92 censornode,
93 constants.REVIDX_ISCENSORED,
93 constants.REVIDX_ISCENSORED,
94 )
94 )
95
95
96 if newrl.deltaparent(rev) != nullrev:
96 if newrl.deltaparent(rev) != nullrev:
97 m = _(b'censored revision stored as delta; cannot censor')
97 m = _(b'censored revision stored as delta; cannot censor')
98 h = _(
98 h = _(
99 b'censoring of revlogs is not fully implemented;'
99 b'censoring of revlogs is not fully implemented;'
100 b' please report this bug'
100 b' please report this bug'
101 )
101 )
102 raise error.Abort(m, hint=h)
102 raise error.Abort(m, hint=h)
103 continue
103 continue
104
104
105 if rl.iscensored(rev):
105 if rl.iscensored(rev):
106 if rl.deltaparent(rev) != nullrev:
106 if rl.deltaparent(rev) != nullrev:
107 m = _(
107 m = _(
108 b'cannot censor due to censored '
108 b'cannot censor due to censored '
109 b'revision having delta stored'
109 b'revision having delta stored'
110 )
110 )
111 raise error.Abort(m)
111 raise error.Abort(m)
112 rawtext = rl._chunk(rev)
112 rawtext = rl._chunk(rev)
113 else:
113 else:
114 rawtext = rl.rawdata(rev)
114 rawtext = rl.rawdata(rev)
115
115
116 newrl.addrawrevision(
116 newrl.addrawrevision(
117 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
117 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
118 )
118 )
119
119
120 tr.addbackup(rl._indexfile, location=b'store')
120 tr.addbackup(rl._indexfile, location=b'store')
121 if not rl._inline:
121 if not rl._inline:
122 tr.addbackup(rl._datafile, location=b'store')
122 tr.addbackup(rl._datafile, location=b'store')
123
123
124 rl.opener.rename(newrl._indexfile, rl._indexfile)
124 rl.opener.rename(newrl._indexfile, rl._indexfile)
125 if not rl._inline:
125 if not rl._inline:
126 rl.opener.rename(newrl._datafile, rl._datafile)
126 rl.opener.rename(newrl._datafile, rl._datafile)
127
127
128 rl.clearcaches()
128 rl.clearcaches()
129 rl._loadindex()
129 rl._loadindex()
130
130
131
131
132 def v2_censor(revlog, tr, censornode, tombstone=b''):
132 def v2_censor(revlog, tr, censornode, tombstone=b''):
133 """censors a revision in a "version 2" revlog"""
133 """censors a revision in a "version 2" revlog"""
134 assert revlog._format_version != REVLOGV0, revlog._format_version
134 assert revlog._format_version != REVLOGV0, revlog._format_version
135 assert revlog._format_version != REVLOGV1, revlog._format_version
135 assert revlog._format_version != REVLOGV1, revlog._format_version
136
136
137 censor_revs = {revlog.rev(censornode)}
137 censor_revs = {revlog.rev(censornode)}
138 _rewrite_v2(revlog, tr, censor_revs, tombstone)
138 _rewrite_v2(revlog, tr, censor_revs, tombstone)
139
139
140
140
141 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
141 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
142 """rewrite a revlog to censor some of its content
142 """rewrite a revlog to censor some of its content
143
143
144 General principle
144 General principle
145
145
146 We create new revlog files (index/data/sidedata) to copy the content of
146 We create new revlog files (index/data/sidedata) to copy the content of
147 the existing data without the censored data.
147 the existing data without the censored data.
148
148
149 We need to recompute new delta for any revision that used the censored
149 We need to recompute new delta for any revision that used the censored
150 revision as delta base. As the cumulative size of the new delta may be
150 revision as delta base. As the cumulative size of the new delta may be
151 large, we store them in a temporary file until they are stored in their
151 large, we store them in a temporary file until they are stored in their
152 final destination.
152 final destination.
153
153
154 All data before the censored data can be blindly copied. The rest needs
154 All data before the censored data can be blindly copied. The rest needs
155 to be copied as we go and the associated index entry needs adjustement.
155 to be copied as we go and the associated index entry needs adjustement.
156 """
156 """
157 assert revlog._format_version != REVLOGV0, revlog._format_version
157 assert revlog._format_version != REVLOGV0, revlog._format_version
158 assert revlog._format_version != REVLOGV1, revlog._format_version
158 assert revlog._format_version != REVLOGV1, revlog._format_version
159
159
160 old_index = revlog.index
160 old_index = revlog.index
161 docket = revlog._docket
161 docket = revlog._docket
162
162
163 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
163 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
164
164
165 first_excl_rev = min(censor_revs)
165 first_excl_rev = min(censor_revs)
166
166
167 first_excl_entry = revlog.index[first_excl_rev]
167 first_excl_entry = revlog.index[first_excl_rev]
168 index_cutoff = revlog.index.entry_size * first_excl_rev
168 index_cutoff = revlog.index.entry_size * first_excl_rev
169 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
169 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
170 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
170 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
171
171
172 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
172 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
173 # rev β†’ (new_base, data_start, data_end, compression_mode)
173 # rev β†’ (new_base, data_start, data_end, compression_mode)
174 rewritten_entries = _precompute_rewritten_delta(
174 rewritten_entries = _precompute_rewritten_delta(
175 revlog,
175 revlog,
176 old_index,
176 old_index,
177 censor_revs,
177 censor_revs,
178 tmp_storage,
178 tmp_storage,
179 )
179 )
180
180
181 all_files = _setup_new_files(
181 all_files = _setup_new_files(
182 revlog,
182 revlog,
183 index_cutoff,
183 index_cutoff,
184 data_cutoff,
184 data_cutoff,
185 sidedata_cutoff,
185 sidedata_cutoff,
186 )
186 )
187
187
188 # we dont need to open the old index file since its content already
188 # we dont need to open the old index file since its content already
189 # exist in a usable form in `old_index`.
189 # exist in a usable form in `old_index`.
190 with all_files() as open_files:
190 with all_files() as open_files:
191 (
191 (
192 old_data_file,
192 old_data_file,
193 old_sidedata_file,
193 old_sidedata_file,
194 new_index_file,
194 new_index_file,
195 new_data_file,
195 new_data_file,
196 new_sidedata_file,
196 new_sidedata_file,
197 ) = open_files
197 ) = open_files
198
198
199 # writing the censored revision
199 # writing the censored revision
200
200
201 # Writing all subsequent revisions
201 # Writing all subsequent revisions
202 for rev in range(first_excl_rev, len(old_index)):
202 for rev in range(first_excl_rev, len(old_index)):
203 if rev in censor_revs:
203 if rev in censor_revs:
204 _rewrite_censor(
204 _rewrite_censor(
205 revlog,
205 revlog,
206 old_index,
206 old_index,
207 open_files,
207 open_files,
208 rev,
208 rev,
209 tombstone,
209 tombstone,
210 )
210 )
211 else:
211 else:
212 _rewrite_simple(
212 _rewrite_simple(
213 revlog,
213 revlog,
214 old_index,
214 old_index,
215 open_files,
215 open_files,
216 rev,
216 rev,
217 rewritten_entries,
217 rewritten_entries,
218 tmp_storage,
218 tmp_storage,
219 )
219 )
220 docket.write(transaction=None, stripping=True)
220 docket.write(transaction=None, stripping=True)
221
221
222
222
223 def _precompute_rewritten_delta(
223 def _precompute_rewritten_delta(
224 revlog,
224 revlog,
225 old_index,
225 old_index,
226 excluded_revs,
226 excluded_revs,
227 tmp_storage,
227 tmp_storage,
228 ):
228 ):
229 """Compute new delta for revisions whose delta is based on revision that
229 """Compute new delta for revisions whose delta is based on revision that
230 will not survive as is.
230 will not survive as is.
231
231
232 Return a mapping: {rev β†’ (new_base, data_start, data_end, compression_mode)}
232 Return a mapping: {rev β†’ (new_base, data_start, data_end, compression_mode)}
233 """
233 """
234 dc = deltas.deltacomputer(revlog)
234 dc = deltas.deltacomputer(revlog)
235 rewritten_entries = {}
235 rewritten_entries = {}
236 first_excl_rev = min(excluded_revs)
236 first_excl_rev = min(excluded_revs)
237 with revlog.reading(), revlog._segmentfile._open_read() as dfh:
237 with revlog.reading():
238 for rev in range(first_excl_rev, len(old_index)):
238 for rev in range(first_excl_rev, len(old_index)):
239 if rev in excluded_revs:
239 if rev in excluded_revs:
240 # this revision will be preserved as is, so we don't need to
240 # this revision will be preserved as is, so we don't need to
241 # consider recomputing a delta.
241 # consider recomputing a delta.
242 continue
242 continue
243 entry = old_index[rev]
243 entry = old_index[rev]
244 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
244 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
245 continue
245 continue
246 # This is a revision that use the censored revision as the base
246 # This is a revision that use the censored revision as the base
247 # for its delta. We need a need new deltas
247 # for its delta. We need a need new deltas
248 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
248 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
249 # this revision is empty, we can delta against nullrev
249 # this revision is empty, we can delta against nullrev
250 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
250 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
251 else:
251 else:
252
252
253 text = revlog.rawdata(rev)
253 text = revlog.rawdata(rev)
254 info = revlogutils.revisioninfo(
254 info = revlogutils.revisioninfo(
255 node=entry[ENTRY_NODE_ID],
255 node=entry[ENTRY_NODE_ID],
256 p1=revlog.node(entry[ENTRY_PARENT_1]),
256 p1=revlog.node(entry[ENTRY_PARENT_1]),
257 p2=revlog.node(entry[ENTRY_PARENT_2]),
257 p2=revlog.node(entry[ENTRY_PARENT_2]),
258 btext=[text],
258 btext=[text],
259 textlen=len(text),
259 textlen=len(text),
260 cachedelta=None,
260 cachedelta=None,
261 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
261 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
262 )
262 )
263 d = dc.finddeltainfo(
263 d = dc.finddeltainfo(
264 info, dfh, excluded_bases=excluded_revs, target_rev=rev
264 info, excluded_bases=excluded_revs, target_rev=rev
265 )
265 )
266 default_comp = revlog._docket.default_compression_header
266 default_comp = revlog._docket.default_compression_header
267 comp_mode, d = deltas.delta_compression(default_comp, d)
267 comp_mode, d = deltas.delta_compression(default_comp, d)
268 # using `tell` is a bit lazy, but we are not here for speed
268 # using `tell` is a bit lazy, but we are not here for speed
269 start = tmp_storage.tell()
269 start = tmp_storage.tell()
270 tmp_storage.write(d.data[1])
270 tmp_storage.write(d.data[1])
271 end = tmp_storage.tell()
271 end = tmp_storage.tell()
272 rewritten_entries[rev] = (d.base, start, end, comp_mode)
272 rewritten_entries[rev] = (d.base, start, end, comp_mode)
273 return rewritten_entries
273 return rewritten_entries
274
274
275
275
276 def _setup_new_files(
276 def _setup_new_files(
277 revlog,
277 revlog,
278 index_cutoff,
278 index_cutoff,
279 data_cutoff,
279 data_cutoff,
280 sidedata_cutoff,
280 sidedata_cutoff,
281 ):
281 ):
282 """
282 """
283
283
284 return a context manager to open all the relevant files:
284 return a context manager to open all the relevant files:
285 - old_data_file,
285 - old_data_file,
286 - old_sidedata_file,
286 - old_sidedata_file,
287 - new_index_file,
287 - new_index_file,
288 - new_data_file,
288 - new_data_file,
289 - new_sidedata_file,
289 - new_sidedata_file,
290
290
291 The old_index_file is not here because it is accessed through the
291 The old_index_file is not here because it is accessed through the
292 `old_index` object if the caller function.
292 `old_index` object if the caller function.
293 """
293 """
294 docket = revlog._docket
294 docket = revlog._docket
295 old_index_filepath = revlog.opener.join(docket.index_filepath())
295 old_index_filepath = revlog.opener.join(docket.index_filepath())
296 old_data_filepath = revlog.opener.join(docket.data_filepath())
296 old_data_filepath = revlog.opener.join(docket.data_filepath())
297 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
297 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
298
298
299 new_index_filepath = revlog.opener.join(docket.new_index_file())
299 new_index_filepath = revlog.opener.join(docket.new_index_file())
300 new_data_filepath = revlog.opener.join(docket.new_data_file())
300 new_data_filepath = revlog.opener.join(docket.new_data_file())
301 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
301 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
302
302
303 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
303 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
304 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
304 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
305 util.copyfile(
305 util.copyfile(
306 old_sidedata_filepath,
306 old_sidedata_filepath,
307 new_sidedata_filepath,
307 new_sidedata_filepath,
308 nb_bytes=sidedata_cutoff,
308 nb_bytes=sidedata_cutoff,
309 )
309 )
310 revlog.opener.register_file(docket.index_filepath())
310 revlog.opener.register_file(docket.index_filepath())
311 revlog.opener.register_file(docket.data_filepath())
311 revlog.opener.register_file(docket.data_filepath())
312 revlog.opener.register_file(docket.sidedata_filepath())
312 revlog.opener.register_file(docket.sidedata_filepath())
313
313
314 docket.index_end = index_cutoff
314 docket.index_end = index_cutoff
315 docket.data_end = data_cutoff
315 docket.data_end = data_cutoff
316 docket.sidedata_end = sidedata_cutoff
316 docket.sidedata_end = sidedata_cutoff
317
317
318 # reload the revlog internal information
318 # reload the revlog internal information
319 revlog.clearcaches()
319 revlog.clearcaches()
320 revlog._loadindex(docket=docket)
320 revlog._loadindex(docket=docket)
321
321
322 @contextlib.contextmanager
322 @contextlib.contextmanager
323 def all_files_opener():
323 def all_files_opener():
324 # hide opening in an helper function to please check-code, black
324 # hide opening in an helper function to please check-code, black
325 # and various python version at the same time
325 # and various python version at the same time
326 with open(old_data_filepath, 'rb') as old_data_file:
326 with open(old_data_filepath, 'rb') as old_data_file:
327 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
327 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
328 with open(new_index_filepath, 'r+b') as new_index_file:
328 with open(new_index_filepath, 'r+b') as new_index_file:
329 with open(new_data_filepath, 'r+b') as new_data_file:
329 with open(new_data_filepath, 'r+b') as new_data_file:
330 with open(
330 with open(
331 new_sidedata_filepath, 'r+b'
331 new_sidedata_filepath, 'r+b'
332 ) as new_sidedata_file:
332 ) as new_sidedata_file:
333 new_index_file.seek(0, os.SEEK_END)
333 new_index_file.seek(0, os.SEEK_END)
334 assert new_index_file.tell() == index_cutoff
334 assert new_index_file.tell() == index_cutoff
335 new_data_file.seek(0, os.SEEK_END)
335 new_data_file.seek(0, os.SEEK_END)
336 assert new_data_file.tell() == data_cutoff
336 assert new_data_file.tell() == data_cutoff
337 new_sidedata_file.seek(0, os.SEEK_END)
337 new_sidedata_file.seek(0, os.SEEK_END)
338 assert new_sidedata_file.tell() == sidedata_cutoff
338 assert new_sidedata_file.tell() == sidedata_cutoff
339 yield (
339 yield (
340 old_data_file,
340 old_data_file,
341 old_sidedata_file,
341 old_sidedata_file,
342 new_index_file,
342 new_index_file,
343 new_data_file,
343 new_data_file,
344 new_sidedata_file,
344 new_sidedata_file,
345 )
345 )
346
346
347 return all_files_opener
347 return all_files_opener
348
348
349
349
350 def _rewrite_simple(
350 def _rewrite_simple(
351 revlog,
351 revlog,
352 old_index,
352 old_index,
353 all_files,
353 all_files,
354 rev,
354 rev,
355 rewritten_entries,
355 rewritten_entries,
356 tmp_storage,
356 tmp_storage,
357 ):
357 ):
358 """append a normal revision to the index after the rewritten one(s)"""
358 """append a normal revision to the index after the rewritten one(s)"""
359 (
359 (
360 old_data_file,
360 old_data_file,
361 old_sidedata_file,
361 old_sidedata_file,
362 new_index_file,
362 new_index_file,
363 new_data_file,
363 new_data_file,
364 new_sidedata_file,
364 new_sidedata_file,
365 ) = all_files
365 ) = all_files
366 entry = old_index[rev]
366 entry = old_index[rev]
367 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
367 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
368 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
368 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
369
369
370 if rev not in rewritten_entries:
370 if rev not in rewritten_entries:
371 old_data_file.seek(old_data_offset)
371 old_data_file.seek(old_data_offset)
372 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
372 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
373 new_data = old_data_file.read(new_data_size)
373 new_data = old_data_file.read(new_data_size)
374 data_delta_base = entry[ENTRY_DELTA_BASE]
374 data_delta_base = entry[ENTRY_DELTA_BASE]
375 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
375 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
376 else:
376 else:
377 (
377 (
378 data_delta_base,
378 data_delta_base,
379 start,
379 start,
380 end,
380 end,
381 d_comp_mode,
381 d_comp_mode,
382 ) = rewritten_entries[rev]
382 ) = rewritten_entries[rev]
383 new_data_size = end - start
383 new_data_size = end - start
384 tmp_storage.seek(start)
384 tmp_storage.seek(start)
385 new_data = tmp_storage.read(new_data_size)
385 new_data = tmp_storage.read(new_data_size)
386
386
387 # It might be faster to group continuous read/write operation,
387 # It might be faster to group continuous read/write operation,
388 # however, this is censor, an operation that is not focussed
388 # however, this is censor, an operation that is not focussed
389 # around stellar performance. So I have not written this
389 # around stellar performance. So I have not written this
390 # optimisation yet.
390 # optimisation yet.
391 new_data_offset = new_data_file.tell()
391 new_data_offset = new_data_file.tell()
392 new_data_file.write(new_data)
392 new_data_file.write(new_data)
393
393
394 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
394 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
395 new_sidedata_offset = new_sidedata_file.tell()
395 new_sidedata_offset = new_sidedata_file.tell()
396 if 0 < sidedata_size:
396 if 0 < sidedata_size:
397 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
397 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
398 old_sidedata_file.seek(old_sidedata_offset)
398 old_sidedata_file.seek(old_sidedata_offset)
399 new_sidedata = old_sidedata_file.read(sidedata_size)
399 new_sidedata = old_sidedata_file.read(sidedata_size)
400 new_sidedata_file.write(new_sidedata)
400 new_sidedata_file.write(new_sidedata)
401
401
402 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
402 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
403 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
403 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
404 assert data_delta_base <= rev, (data_delta_base, rev)
404 assert data_delta_base <= rev, (data_delta_base, rev)
405
405
406 new_entry = revlogutils.entry(
406 new_entry = revlogutils.entry(
407 flags=flags,
407 flags=flags,
408 data_offset=new_data_offset,
408 data_offset=new_data_offset,
409 data_compressed_length=new_data_size,
409 data_compressed_length=new_data_size,
410 data_uncompressed_length=data_uncompressed_length,
410 data_uncompressed_length=data_uncompressed_length,
411 data_delta_base=data_delta_base,
411 data_delta_base=data_delta_base,
412 link_rev=entry[ENTRY_LINK_REV],
412 link_rev=entry[ENTRY_LINK_REV],
413 parent_rev_1=entry[ENTRY_PARENT_1],
413 parent_rev_1=entry[ENTRY_PARENT_1],
414 parent_rev_2=entry[ENTRY_PARENT_2],
414 parent_rev_2=entry[ENTRY_PARENT_2],
415 node_id=entry[ENTRY_NODE_ID],
415 node_id=entry[ENTRY_NODE_ID],
416 sidedata_offset=new_sidedata_offset,
416 sidedata_offset=new_sidedata_offset,
417 sidedata_compressed_length=sidedata_size,
417 sidedata_compressed_length=sidedata_size,
418 data_compression_mode=d_comp_mode,
418 data_compression_mode=d_comp_mode,
419 sidedata_compression_mode=sd_com_mode,
419 sidedata_compression_mode=sd_com_mode,
420 )
420 )
421 revlog.index.append(new_entry)
421 revlog.index.append(new_entry)
422 entry_bin = revlog.index.entry_binary(rev)
422 entry_bin = revlog.index.entry_binary(rev)
423 new_index_file.write(entry_bin)
423 new_index_file.write(entry_bin)
424
424
425 revlog._docket.index_end = new_index_file.tell()
425 revlog._docket.index_end = new_index_file.tell()
426 revlog._docket.data_end = new_data_file.tell()
426 revlog._docket.data_end = new_data_file.tell()
427 revlog._docket.sidedata_end = new_sidedata_file.tell()
427 revlog._docket.sidedata_end = new_sidedata_file.tell()
428
428
429
429
430 def _rewrite_censor(
430 def _rewrite_censor(
431 revlog,
431 revlog,
432 old_index,
432 old_index,
433 all_files,
433 all_files,
434 rev,
434 rev,
435 tombstone,
435 tombstone,
436 ):
436 ):
437 """rewrite and append a censored revision"""
437 """rewrite and append a censored revision"""
438 (
438 (
439 old_data_file,
439 old_data_file,
440 old_sidedata_file,
440 old_sidedata_file,
441 new_index_file,
441 new_index_file,
442 new_data_file,
442 new_data_file,
443 new_sidedata_file,
443 new_sidedata_file,
444 ) = all_files
444 ) = all_files
445 entry = old_index[rev]
445 entry = old_index[rev]
446
446
447 # XXX consider trying the default compression too
447 # XXX consider trying the default compression too
448 new_data_size = len(tombstone)
448 new_data_size = len(tombstone)
449 new_data_offset = new_data_file.tell()
449 new_data_offset = new_data_file.tell()
450 new_data_file.write(tombstone)
450 new_data_file.write(tombstone)
451
451
452 # we are not adding any sidedata as they might leak info about the censored version
452 # we are not adding any sidedata as they might leak info about the censored version
453
453
454 link_rev = entry[ENTRY_LINK_REV]
454 link_rev = entry[ENTRY_LINK_REV]
455
455
456 p1 = entry[ENTRY_PARENT_1]
456 p1 = entry[ENTRY_PARENT_1]
457 p2 = entry[ENTRY_PARENT_2]
457 p2 = entry[ENTRY_PARENT_2]
458
458
459 new_entry = revlogutils.entry(
459 new_entry = revlogutils.entry(
460 flags=constants.REVIDX_ISCENSORED,
460 flags=constants.REVIDX_ISCENSORED,
461 data_offset=new_data_offset,
461 data_offset=new_data_offset,
462 data_compressed_length=new_data_size,
462 data_compressed_length=new_data_size,
463 data_uncompressed_length=new_data_size,
463 data_uncompressed_length=new_data_size,
464 data_delta_base=rev,
464 data_delta_base=rev,
465 link_rev=link_rev,
465 link_rev=link_rev,
466 parent_rev_1=p1,
466 parent_rev_1=p1,
467 parent_rev_2=p2,
467 parent_rev_2=p2,
468 node_id=entry[ENTRY_NODE_ID],
468 node_id=entry[ENTRY_NODE_ID],
469 sidedata_offset=0,
469 sidedata_offset=0,
470 sidedata_compressed_length=0,
470 sidedata_compressed_length=0,
471 data_compression_mode=COMP_MODE_PLAIN,
471 data_compression_mode=COMP_MODE_PLAIN,
472 sidedata_compression_mode=COMP_MODE_PLAIN,
472 sidedata_compression_mode=COMP_MODE_PLAIN,
473 )
473 )
474 revlog.index.append(new_entry)
474 revlog.index.append(new_entry)
475 entry_bin = revlog.index.entry_binary(rev)
475 entry_bin = revlog.index.entry_binary(rev)
476 new_index_file.write(entry_bin)
476 new_index_file.write(entry_bin)
477 revlog._docket.index_end = new_index_file.tell()
477 revlog._docket.index_end = new_index_file.tell()
478 revlog._docket.data_end = new_data_file.tell()
478 revlog._docket.data_end = new_data_file.tell()
479
479
480
480
481 def _get_filename_from_filelog_index(path):
481 def _get_filename_from_filelog_index(path):
482 # Drop the extension and the `data/` prefix
482 # Drop the extension and the `data/` prefix
483 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
483 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
484 if len(path_part) < 2:
484 if len(path_part) < 2:
485 msg = _(b"cannot recognize filelog from filename: '%s'")
485 msg = _(b"cannot recognize filelog from filename: '%s'")
486 msg %= path
486 msg %= path
487 raise error.Abort(msg)
487 raise error.Abort(msg)
488
488
489 return path_part[1]
489 return path_part[1]
490
490
491
491
492 def _filelog_from_filename(repo, path):
492 def _filelog_from_filename(repo, path):
493 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
493 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
494
494
495 from .. import filelog # avoid cycle
495 from .. import filelog # avoid cycle
496
496
497 fl = filelog.filelog(repo.svfs, path)
497 fl = filelog.filelog(repo.svfs, path)
498 return fl
498 return fl
499
499
500
500
501 def _write_swapped_parents(repo, rl, rev, offset, fp):
501 def _write_swapped_parents(repo, rl, rev, offset, fp):
502 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
502 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
503 from ..pure import parsers # avoid cycle
503 from ..pure import parsers # avoid cycle
504
504
505 if repo._currentlock(repo._lockref) is None:
505 if repo._currentlock(repo._lockref) is None:
506 # Let's be paranoid about it
506 # Let's be paranoid about it
507 msg = "repo needs to be locked to rewrite parents"
507 msg = "repo needs to be locked to rewrite parents"
508 raise error.ProgrammingError(msg)
508 raise error.ProgrammingError(msg)
509
509
510 index_format = parsers.IndexObject.index_format
510 index_format = parsers.IndexObject.index_format
511 entry = rl.index[rev]
511 entry = rl.index[rev]
512 new_entry = list(entry)
512 new_entry = list(entry)
513 new_entry[5], new_entry[6] = entry[6], entry[5]
513 new_entry[5], new_entry[6] = entry[6], entry[5]
514 packed = index_format.pack(*new_entry[:8])
514 packed = index_format.pack(*new_entry[:8])
515 fp.seek(offset)
515 fp.seek(offset)
516 fp.write(packed)
516 fp.write(packed)
517
517
518
518
519 def _reorder_filelog_parents(repo, fl, to_fix):
519 def _reorder_filelog_parents(repo, fl, to_fix):
520 """
520 """
521 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
521 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
522 new version to disk, overwriting the old one with a rename.
522 new version to disk, overwriting the old one with a rename.
523 """
523 """
524 from ..pure import parsers # avoid cycle
524 from ..pure import parsers # avoid cycle
525
525
526 ui = repo.ui
526 ui = repo.ui
527 assert len(to_fix) > 0
527 assert len(to_fix) > 0
528 rl = fl._revlog
528 rl = fl._revlog
529 if rl._format_version != constants.REVLOGV1:
529 if rl._format_version != constants.REVLOGV1:
530 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
530 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
531 raise error.ProgrammingError(msg)
531 raise error.ProgrammingError(msg)
532
532
533 index_file = rl._indexfile
533 index_file = rl._indexfile
534 new_file_path = index_file + b'.tmp-parents-fix'
534 new_file_path = index_file + b'.tmp-parents-fix'
535 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
535 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
536
536
537 with ui.uninterruptible():
537 with ui.uninterruptible():
538 try:
538 try:
539 util.copyfile(
539 util.copyfile(
540 rl.opener.join(index_file),
540 rl.opener.join(index_file),
541 rl.opener.join(new_file_path),
541 rl.opener.join(new_file_path),
542 checkambig=rl._checkambig,
542 checkambig=rl._checkambig,
543 )
543 )
544
544
545 with rl.opener(new_file_path, mode=b"r+") as fp:
545 with rl.opener(new_file_path, mode=b"r+") as fp:
546 if rl._inline:
546 if rl._inline:
547 index = parsers.InlinedIndexObject(fp.read())
547 index = parsers.InlinedIndexObject(fp.read())
548 for rev in fl.revs():
548 for rev in fl.revs():
549 if rev in to_fix:
549 if rev in to_fix:
550 offset = index._calculate_index(rev)
550 offset = index._calculate_index(rev)
551 _write_swapped_parents(repo, rl, rev, offset, fp)
551 _write_swapped_parents(repo, rl, rev, offset, fp)
552 ui.write(repaired_msg % (rev, index_file))
552 ui.write(repaired_msg % (rev, index_file))
553 else:
553 else:
554 index_format = parsers.IndexObject.index_format
554 index_format = parsers.IndexObject.index_format
555 for rev in to_fix:
555 for rev in to_fix:
556 offset = rev * index_format.size
556 offset = rev * index_format.size
557 _write_swapped_parents(repo, rl, rev, offset, fp)
557 _write_swapped_parents(repo, rl, rev, offset, fp)
558 ui.write(repaired_msg % (rev, index_file))
558 ui.write(repaired_msg % (rev, index_file))
559
559
560 rl.opener.rename(new_file_path, index_file)
560 rl.opener.rename(new_file_path, index_file)
561 rl.clearcaches()
561 rl.clearcaches()
562 rl._loadindex()
562 rl._loadindex()
563 finally:
563 finally:
564 util.tryunlink(new_file_path)
564 util.tryunlink(new_file_path)
565
565
566
566
567 def _is_revision_affected(fl, filerev, metadata_cache=None):
567 def _is_revision_affected(fl, filerev, metadata_cache=None):
568 full_text = lambda: fl._revlog.rawdata(filerev)
568 full_text = lambda: fl._revlog.rawdata(filerev)
569 parent_revs = lambda: fl._revlog.parentrevs(filerev)
569 parent_revs = lambda: fl._revlog.parentrevs(filerev)
570 return _is_revision_affected_inner(
570 return _is_revision_affected_inner(
571 full_text, parent_revs, filerev, metadata_cache
571 full_text, parent_revs, filerev, metadata_cache
572 )
572 )
573
573
574
574
575 def _is_revision_affected_inner(
575 def _is_revision_affected_inner(
576 full_text,
576 full_text,
577 parents_revs,
577 parents_revs,
578 filerev,
578 filerev,
579 metadata_cache=None,
579 metadata_cache=None,
580 ):
580 ):
581 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
581 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
582 special meaning compared to the reverse in the context of filelog-based
582 special meaning compared to the reverse in the context of filelog-based
583 copytracing. issue6528 exists because new code assumed that parent ordering
583 copytracing. issue6528 exists because new code assumed that parent ordering
584 didn't matter, so this detects if the revision contains metadata (since
584 didn't matter, so this detects if the revision contains metadata (since
585 it's only used for filelog-based copytracing) and its parents are in the
585 it's only used for filelog-based copytracing) and its parents are in the
586 "wrong" order."""
586 "wrong" order."""
587 try:
587 try:
588 raw_text = full_text()
588 raw_text = full_text()
589 except error.CensoredNodeError:
589 except error.CensoredNodeError:
590 # We don't care about censored nodes as they never carry metadata
590 # We don't care about censored nodes as they never carry metadata
591 return False
591 return False
592
592
593 # raw text can be a `memoryview`, which doesn't implement `startswith`
593 # raw text can be a `memoryview`, which doesn't implement `startswith`
594 has_meta = bytes(raw_text[:2]) == b'\x01\n'
594 has_meta = bytes(raw_text[:2]) == b'\x01\n'
595 if metadata_cache is not None:
595 if metadata_cache is not None:
596 metadata_cache[filerev] = has_meta
596 metadata_cache[filerev] = has_meta
597 if has_meta:
597 if has_meta:
598 (p1, p2) = parents_revs()
598 (p1, p2) = parents_revs()
599 if p1 != nullrev and p2 == nullrev:
599 if p1 != nullrev and p2 == nullrev:
600 return True
600 return True
601 return False
601 return False
602
602
603
603
604 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
604 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
605 rl = fl._revlog
605 rl = fl._revlog
606 is_censored = lambda: rl.iscensored(filerev)
606 is_censored = lambda: rl.iscensored(filerev)
607 delta_base = lambda: rl.deltaparent(filerev)
607 delta_base = lambda: rl.deltaparent(filerev)
608 delta = lambda: rl._chunk(filerev)
608 delta = lambda: rl._chunk(filerev)
609 full_text = lambda: rl.rawdata(filerev)
609 full_text = lambda: rl.rawdata(filerev)
610 parent_revs = lambda: rl.parentrevs(filerev)
610 parent_revs = lambda: rl.parentrevs(filerev)
611 return _is_revision_affected_fast_inner(
611 return _is_revision_affected_fast_inner(
612 is_censored,
612 is_censored,
613 delta_base,
613 delta_base,
614 delta,
614 delta,
615 full_text,
615 full_text,
616 parent_revs,
616 parent_revs,
617 filerev,
617 filerev,
618 metadata_cache,
618 metadata_cache,
619 )
619 )
620
620
621
621
622 def _is_revision_affected_fast_inner(
622 def _is_revision_affected_fast_inner(
623 is_censored,
623 is_censored,
624 delta_base,
624 delta_base,
625 delta,
625 delta,
626 full_text,
626 full_text,
627 parent_revs,
627 parent_revs,
628 filerev,
628 filerev,
629 metadata_cache,
629 metadata_cache,
630 ):
630 ):
631 """Optimization fast-path for `_is_revision_affected`.
631 """Optimization fast-path for `_is_revision_affected`.
632
632
633 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
633 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
634 revision to check if its base has metadata, saving computation of the full
634 revision to check if its base has metadata, saving computation of the full
635 text, instead looking at the current delta.
635 text, instead looking at the current delta.
636
636
637 This optimization only works if the revisions are looked at in order."""
637 This optimization only works if the revisions are looked at in order."""
638
638
639 if is_censored():
639 if is_censored():
640 # Censored revisions don't contain metadata, so they cannot be affected
640 # Censored revisions don't contain metadata, so they cannot be affected
641 metadata_cache[filerev] = False
641 metadata_cache[filerev] = False
642 return False
642 return False
643
643
644 p1, p2 = parent_revs()
644 p1, p2 = parent_revs()
645 if p1 == nullrev or p2 != nullrev:
645 if p1 == nullrev or p2 != nullrev:
646 return False
646 return False
647
647
648 delta_parent = delta_base()
648 delta_parent = delta_base()
649 parent_has_metadata = metadata_cache.get(delta_parent)
649 parent_has_metadata = metadata_cache.get(delta_parent)
650 if parent_has_metadata is None:
650 if parent_has_metadata is None:
651 return _is_revision_affected_inner(
651 return _is_revision_affected_inner(
652 full_text,
652 full_text,
653 parent_revs,
653 parent_revs,
654 filerev,
654 filerev,
655 metadata_cache,
655 metadata_cache,
656 )
656 )
657
657
658 chunk = delta()
658 chunk = delta()
659 if not len(chunk):
659 if not len(chunk):
660 # No diff for this revision
660 # No diff for this revision
661 return parent_has_metadata
661 return parent_has_metadata
662
662
663 header_length = 12
663 header_length = 12
664 if len(chunk) < header_length:
664 if len(chunk) < header_length:
665 raise error.Abort(_(b"patch cannot be decoded"))
665 raise error.Abort(_(b"patch cannot be decoded"))
666
666
667 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
667 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
668
668
669 if start < 2: # len(b'\x01\n') == 2
669 if start < 2: # len(b'\x01\n') == 2
670 # This delta does *something* to the metadata marker (if any).
670 # This delta does *something* to the metadata marker (if any).
671 # Check it the slow way
671 # Check it the slow way
672 is_affected = _is_revision_affected_inner(
672 is_affected = _is_revision_affected_inner(
673 full_text,
673 full_text,
674 parent_revs,
674 parent_revs,
675 filerev,
675 filerev,
676 metadata_cache,
676 metadata_cache,
677 )
677 )
678 return is_affected
678 return is_affected
679
679
680 # The diff did not remove or add the metadata header, it's then in the same
680 # The diff did not remove or add the metadata header, it's then in the same
681 # situation as its parent
681 # situation as its parent
682 metadata_cache[filerev] = parent_has_metadata
682 metadata_cache[filerev] = parent_has_metadata
683 return parent_has_metadata
683 return parent_has_metadata
684
684
685
685
686 def _from_report(ui, repo, context, from_report, dry_run):
686 def _from_report(ui, repo, context, from_report, dry_run):
687 """
687 """
688 Fix the revisions given in the `from_report` file, but still checks if the
688 Fix the revisions given in the `from_report` file, but still checks if the
689 revisions are indeed affected to prevent an unfortunate cyclic situation
689 revisions are indeed affected to prevent an unfortunate cyclic situation
690 where we'd swap well-ordered parents again.
690 where we'd swap well-ordered parents again.
691
691
692 See the doc for `debug_fix_issue6528` for the format documentation.
692 See the doc for `debug_fix_issue6528` for the format documentation.
693 """
693 """
694 ui.write(_(b"loading report file '%s'\n") % from_report)
694 ui.write(_(b"loading report file '%s'\n") % from_report)
695
695
696 with context(), open(from_report, mode='rb') as f:
696 with context(), open(from_report, mode='rb') as f:
697 for line in f.read().split(b'\n'):
697 for line in f.read().split(b'\n'):
698 if not line:
698 if not line:
699 continue
699 continue
700 filenodes, filename = line.split(b' ', 1)
700 filenodes, filename = line.split(b' ', 1)
701 fl = _filelog_from_filename(repo, filename)
701 fl = _filelog_from_filename(repo, filename)
702 to_fix = set(
702 to_fix = set(
703 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
703 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
704 )
704 )
705 excluded = set()
705 excluded = set()
706
706
707 for filerev in to_fix:
707 for filerev in to_fix:
708 if _is_revision_affected(fl, filerev):
708 if _is_revision_affected(fl, filerev):
709 msg = b"found affected revision %d for filelog '%s'\n"
709 msg = b"found affected revision %d for filelog '%s'\n"
710 ui.warn(msg % (filerev, filename))
710 ui.warn(msg % (filerev, filename))
711 else:
711 else:
712 msg = _(b"revision %s of file '%s' is not affected\n")
712 msg = _(b"revision %s of file '%s' is not affected\n")
713 msg %= (binascii.hexlify(fl.node(filerev)), filename)
713 msg %= (binascii.hexlify(fl.node(filerev)), filename)
714 ui.warn(msg)
714 ui.warn(msg)
715 excluded.add(filerev)
715 excluded.add(filerev)
716
716
717 to_fix = to_fix - excluded
717 to_fix = to_fix - excluded
718 if not to_fix:
718 if not to_fix:
719 msg = _(b"no affected revisions were found for '%s'\n")
719 msg = _(b"no affected revisions were found for '%s'\n")
720 ui.write(msg % filename)
720 ui.write(msg % filename)
721 continue
721 continue
722 if not dry_run:
722 if not dry_run:
723 _reorder_filelog_parents(repo, fl, sorted(to_fix))
723 _reorder_filelog_parents(repo, fl, sorted(to_fix))
724
724
725
725
726 def filter_delta_issue6528(revlog, deltas_iter):
726 def filter_delta_issue6528(revlog, deltas_iter):
727 """filter incomind deltas to repaire issue 6528 on the fly"""
727 """filter incomind deltas to repaire issue 6528 on the fly"""
728 metadata_cache = {}
728 metadata_cache = {}
729
729
730 deltacomputer = deltas.deltacomputer(revlog)
730 deltacomputer = deltas.deltacomputer(revlog)
731
731
732 for rev, d in enumerate(deltas_iter, len(revlog)):
732 for rev, d in enumerate(deltas_iter, len(revlog)):
733 (
733 (
734 node,
734 node,
735 p1_node,
735 p1_node,
736 p2_node,
736 p2_node,
737 linknode,
737 linknode,
738 deltabase,
738 deltabase,
739 delta,
739 delta,
740 flags,
740 flags,
741 sidedata,
741 sidedata,
742 ) = d
742 ) = d
743
743
744 if not revlog.index.has_node(deltabase):
744 if not revlog.index.has_node(deltabase):
745 raise error.LookupError(
745 raise error.LookupError(
746 deltabase, revlog.radix, _(b'unknown parent')
746 deltabase, revlog.radix, _(b'unknown parent')
747 )
747 )
748 base_rev = revlog.rev(deltabase)
748 base_rev = revlog.rev(deltabase)
749 if not revlog.index.has_node(p1_node):
749 if not revlog.index.has_node(p1_node):
750 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
750 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
751 p1_rev = revlog.rev(p1_node)
751 p1_rev = revlog.rev(p1_node)
752 if not revlog.index.has_node(p2_node):
752 if not revlog.index.has_node(p2_node):
753 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
753 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
754 p2_rev = revlog.rev(p2_node)
754 p2_rev = revlog.rev(p2_node)
755
755
756 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
756 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
757 delta_base = lambda: revlog.rev(delta_base)
757 delta_base = lambda: revlog.rev(delta_base)
758 delta_base = lambda: base_rev
758 delta_base = lambda: base_rev
759 parent_revs = lambda: (p1_rev, p2_rev)
759 parent_revs = lambda: (p1_rev, p2_rev)
760
760
761 def full_text():
761 def full_text():
762 # note: being able to reuse the full text computation in the
762 # note: being able to reuse the full text computation in the
763 # underlying addrevision would be useful however this is a bit too
763 # underlying addrevision would be useful however this is a bit too
764 # intrusive the for the "quick" issue6528 we are writing before the
764 # intrusive the for the "quick" issue6528 we are writing before the
765 # 5.8 release
765 # 5.8 release
766 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
766 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
767
767
768 revinfo = revlogutils.revisioninfo(
768 revinfo = revlogutils.revisioninfo(
769 node,
769 node,
770 p1_node,
770 p1_node,
771 p2_node,
771 p2_node,
772 [None],
772 [None],
773 textlen,
773 textlen,
774 (base_rev, delta),
774 (base_rev, delta),
775 flags,
775 flags,
776 )
776 )
777 # cached by the global "writing" context
777 return deltacomputer.buildtext(revinfo)
778 assert revlog._writinghandles is not None
779 if revlog._inline:
780 fh = revlog._writinghandles[0]
781 else:
782 fh = revlog._writinghandles[1]
783 return deltacomputer.buildtext(revinfo, fh)
784
778
785 is_affected = _is_revision_affected_fast_inner(
779 is_affected = _is_revision_affected_fast_inner(
786 is_censored,
780 is_censored,
787 delta_base,
781 delta_base,
788 lambda: delta,
782 lambda: delta,
789 full_text,
783 full_text,
790 parent_revs,
784 parent_revs,
791 rev,
785 rev,
792 metadata_cache,
786 metadata_cache,
793 )
787 )
794 if is_affected:
788 if is_affected:
795 d = (
789 d = (
796 node,
790 node,
797 p2_node,
791 p2_node,
798 p1_node,
792 p1_node,
799 linknode,
793 linknode,
800 deltabase,
794 deltabase,
801 delta,
795 delta,
802 flags,
796 flags,
803 sidedata,
797 sidedata,
804 )
798 )
805 yield d
799 yield d
806
800
807
801
808 def repair_issue6528(
802 def repair_issue6528(
809 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
803 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
810 ):
804 ):
811 @contextlib.contextmanager
805 @contextlib.contextmanager
812 def context():
806 def context():
813 if dry_run or to_report: # No need for locking
807 if dry_run or to_report: # No need for locking
814 yield
808 yield
815 else:
809 else:
816 with repo.wlock(), repo.lock():
810 with repo.wlock(), repo.lock():
817 yield
811 yield
818
812
819 if from_report:
813 if from_report:
820 return _from_report(ui, repo, context, from_report, dry_run)
814 return _from_report(ui, repo, context, from_report, dry_run)
821
815
822 report_entries = []
816 report_entries = []
823
817
824 with context():
818 with context():
825 files = list(
819 files = list(
826 entry
820 entry
827 for entry in repo.store.data_entries()
821 for entry in repo.store.data_entries()
828 if entry.is_revlog and entry.is_filelog
822 if entry.is_revlog and entry.is_filelog
829 )
823 )
830
824
831 progress = ui.makeprogress(
825 progress = ui.makeprogress(
832 _(b"looking for affected revisions"),
826 _(b"looking for affected revisions"),
833 unit=_(b"filelogs"),
827 unit=_(b"filelogs"),
834 total=len(files),
828 total=len(files),
835 )
829 )
836 found_nothing = True
830 found_nothing = True
837
831
838 for entry in files:
832 for entry in files:
839 progress.increment()
833 progress.increment()
840 filename = entry.target_id
834 filename = entry.target_id
841 fl = _filelog_from_filename(repo, entry.target_id)
835 fl = _filelog_from_filename(repo, entry.target_id)
842
836
843 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
837 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
844 to_fix = set()
838 to_fix = set()
845 metadata_cache = {}
839 metadata_cache = {}
846 for filerev in fl.revs():
840 for filerev in fl.revs():
847 affected = _is_revision_affected_fast(
841 affected = _is_revision_affected_fast(
848 repo, fl, filerev, metadata_cache
842 repo, fl, filerev, metadata_cache
849 )
843 )
850 if paranoid:
844 if paranoid:
851 slow = _is_revision_affected(fl, filerev)
845 slow = _is_revision_affected(fl, filerev)
852 if slow != affected:
846 if slow != affected:
853 msg = _(b"paranoid check failed for '%s' at node %s")
847 msg = _(b"paranoid check failed for '%s' at node %s")
854 node = binascii.hexlify(fl.node(filerev))
848 node = binascii.hexlify(fl.node(filerev))
855 raise error.Abort(msg % (filename, node))
849 raise error.Abort(msg % (filename, node))
856 if affected:
850 if affected:
857 msg = b"found affected revision %d for file '%s'\n"
851 msg = b"found affected revision %d for file '%s'\n"
858 ui.warn(msg % (filerev, filename))
852 ui.warn(msg % (filerev, filename))
859 found_nothing = False
853 found_nothing = False
860 if not dry_run:
854 if not dry_run:
861 if to_report:
855 if to_report:
862 to_fix.add(binascii.hexlify(fl.node(filerev)))
856 to_fix.add(binascii.hexlify(fl.node(filerev)))
863 else:
857 else:
864 to_fix.add(filerev)
858 to_fix.add(filerev)
865
859
866 if to_fix:
860 if to_fix:
867 to_fix = sorted(to_fix)
861 to_fix = sorted(to_fix)
868 if to_report:
862 if to_report:
869 report_entries.append((filename, to_fix))
863 report_entries.append((filename, to_fix))
870 else:
864 else:
871 _reorder_filelog_parents(repo, fl, to_fix)
865 _reorder_filelog_parents(repo, fl, to_fix)
872
866
873 if found_nothing:
867 if found_nothing:
874 ui.write(_(b"no affected revisions were found\n"))
868 ui.write(_(b"no affected revisions were found\n"))
875
869
876 if to_report and report_entries:
870 if to_report and report_entries:
877 with open(to_report, mode="wb") as f:
871 with open(to_report, mode="wb") as f:
878 for path, to_fix in report_entries:
872 for path, to_fix in report_entries:
879 f.write(b"%s %s\n" % (b",".join(to_fix), path))
873 f.write(b"%s %s\n" % (b",".join(to_fix), path))
880
874
881 progress.complete()
875 progress.complete()
General Comments 0
You need to be logged in to leave comments. Login now