##// END OF EJS Templates
revlog: move `revisioninfo` in `revlogutils`...
marmoute -
r48191:34cc102c default
parent child Browse files
Show More
@@ -1,3401 +1,3388 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15 from __future__ import absolute_import
15 from __future__ import absolute_import
16
16
17 import binascii
17 import binascii
18 import collections
18 import collections
19 import contextlib
19 import contextlib
20 import errno
20 import errno
21 import io
21 import io
22 import os
22 import os
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullrev,
30 nullrev,
31 sha1nodeconstants,
31 sha1nodeconstants,
32 short,
32 short,
33 wdirrev,
33 wdirrev,
34 )
34 )
35 from .i18n import _
35 from .i18n import _
36 from .pycompat import getattr
36 from .pycompat import getattr
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 ALL_KINDS,
38 ALL_KINDS,
39 CHANGELOGV2,
39 CHANGELOGV2,
40 COMP_MODE_DEFAULT,
40 COMP_MODE_DEFAULT,
41 COMP_MODE_INLINE,
41 COMP_MODE_INLINE,
42 COMP_MODE_PLAIN,
42 COMP_MODE_PLAIN,
43 FEATURES_BY_VERSION,
43 FEATURES_BY_VERSION,
44 FLAG_GENERALDELTA,
44 FLAG_GENERALDELTA,
45 FLAG_INLINE_DATA,
45 FLAG_INLINE_DATA,
46 INDEX_HEADER,
46 INDEX_HEADER,
47 KIND_CHANGELOG,
47 KIND_CHANGELOG,
48 REVLOGV0,
48 REVLOGV0,
49 REVLOGV1,
49 REVLOGV1,
50 REVLOGV1_FLAGS,
50 REVLOGV1_FLAGS,
51 REVLOGV2,
51 REVLOGV2,
52 REVLOGV2_FLAGS,
52 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION,
55 REVLOG_DEFAULT_VERSION,
56 SUPPORTED_FLAGS,
56 SUPPORTED_FLAGS,
57 )
57 )
58 from .revlogutils.flagutil import (
58 from .revlogutils.flagutil import (
59 REVIDX_DEFAULT_FLAGS,
59 REVIDX_DEFAULT_FLAGS,
60 REVIDX_ELLIPSIS,
60 REVIDX_ELLIPSIS,
61 REVIDX_EXTSTORED,
61 REVIDX_EXTSTORED,
62 REVIDX_FLAGS_ORDER,
62 REVIDX_FLAGS_ORDER,
63 REVIDX_HASCOPIESINFO,
63 REVIDX_HASCOPIESINFO,
64 REVIDX_ISCENSORED,
64 REVIDX_ISCENSORED,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 )
66 )
67 from .thirdparty import attr
67 from .thirdparty import attr
68 from . import (
68 from . import (
69 ancestor,
69 ancestor,
70 dagop,
70 dagop,
71 error,
71 error,
72 mdiff,
72 mdiff,
73 policy,
73 policy,
74 pycompat,
74 pycompat,
75 revlogutils,
75 revlogutils,
76 templatefilters,
76 templatefilters,
77 util,
77 util,
78 )
78 )
79 from .interfaces import (
79 from .interfaces import (
80 repository,
80 repository,
81 util as interfaceutil,
81 util as interfaceutil,
82 )
82 )
83 from .revlogutils import (
83 from .revlogutils import (
84 censor,
84 censor,
85 deltas as deltautil,
85 deltas as deltautil,
86 docket as docketutil,
86 docket as docketutil,
87 flagutil,
87 flagutil,
88 nodemap as nodemaputil,
88 nodemap as nodemaputil,
89 revlogv0,
89 revlogv0,
90 sidedata as sidedatautil,
90 sidedata as sidedatautil,
91 )
91 )
92 from .utils import (
92 from .utils import (
93 storageutil,
93 storageutil,
94 stringutil,
94 stringutil,
95 )
95 )
96
96
97 # blanked usage of all the name to prevent pyflakes constraints
97 # blanked usage of all the name to prevent pyflakes constraints
98 # We need these name available in the module for extensions.
98 # We need these name available in the module for extensions.
99
99
100 REVLOGV0
100 REVLOGV0
101 REVLOGV1
101 REVLOGV1
102 REVLOGV2
102 REVLOGV2
103 FLAG_INLINE_DATA
103 FLAG_INLINE_DATA
104 FLAG_GENERALDELTA
104 FLAG_GENERALDELTA
105 REVLOG_DEFAULT_FLAGS
105 REVLOG_DEFAULT_FLAGS
106 REVLOG_DEFAULT_FORMAT
106 REVLOG_DEFAULT_FORMAT
107 REVLOG_DEFAULT_VERSION
107 REVLOG_DEFAULT_VERSION
108 REVLOGV1_FLAGS
108 REVLOGV1_FLAGS
109 REVLOGV2_FLAGS
109 REVLOGV2_FLAGS
110 REVIDX_ISCENSORED
110 REVIDX_ISCENSORED
111 REVIDX_ELLIPSIS
111 REVIDX_ELLIPSIS
112 REVIDX_HASCOPIESINFO
112 REVIDX_HASCOPIESINFO
113 REVIDX_EXTSTORED
113 REVIDX_EXTSTORED
114 REVIDX_DEFAULT_FLAGS
114 REVIDX_DEFAULT_FLAGS
115 REVIDX_FLAGS_ORDER
115 REVIDX_FLAGS_ORDER
116 REVIDX_RAWTEXT_CHANGING_FLAGS
116 REVIDX_RAWTEXT_CHANGING_FLAGS
117
117
118 parsers = policy.importmod('parsers')
118 parsers = policy.importmod('parsers')
119 rustancestor = policy.importrust('ancestor')
119 rustancestor = policy.importrust('ancestor')
120 rustdagop = policy.importrust('dagop')
120 rustdagop = policy.importrust('dagop')
121 rustrevlog = policy.importrust('revlog')
121 rustrevlog = policy.importrust('revlog')
122
122
123 # Aliased for performance.
123 # Aliased for performance.
124 _zlibdecompress = zlib.decompress
124 _zlibdecompress = zlib.decompress
125
125
126 # max size of revlog with inline data
126 # max size of revlog with inline data
127 _maxinline = 131072
127 _maxinline = 131072
128 _chunksize = 1048576
128 _chunksize = 1048576
129
129
130 # Flag processors for REVIDX_ELLIPSIS.
130 # Flag processors for REVIDX_ELLIPSIS.
131 def ellipsisreadprocessor(rl, text):
131 def ellipsisreadprocessor(rl, text):
132 return text, False
132 return text, False
133
133
134
134
135 def ellipsiswriteprocessor(rl, text):
135 def ellipsiswriteprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsisrawprocessor(rl, text):
139 def ellipsisrawprocessor(rl, text):
140 return False
140 return False
141
141
142
142
143 ellipsisprocessor = (
143 ellipsisprocessor = (
144 ellipsisreadprocessor,
144 ellipsisreadprocessor,
145 ellipsiswriteprocessor,
145 ellipsiswriteprocessor,
146 ellipsisrawprocessor,
146 ellipsisrawprocessor,
147 )
147 )
148
148
149
149
150 def _verify_revision(rl, skipflags, state, node):
150 def _verify_revision(rl, skipflags, state, node):
151 """Verify the integrity of the given revlog ``node`` while providing a hook
151 """Verify the integrity of the given revlog ``node`` while providing a hook
152 point for extensions to influence the operation."""
152 point for extensions to influence the operation."""
153 if skipflags:
153 if skipflags:
154 state[b'skipread'].add(node)
154 state[b'skipread'].add(node)
155 else:
155 else:
156 # Side-effect: read content and verify hash.
156 # Side-effect: read content and verify hash.
157 rl.revision(node)
157 rl.revision(node)
158
158
159
159
160 # True if a fast implementation for persistent-nodemap is available
160 # True if a fast implementation for persistent-nodemap is available
161 #
161 #
162 # We also consider we have a "fast" implementation in "pure" python because
162 # We also consider we have a "fast" implementation in "pure" python because
163 # people using pure don't really have performance consideration (and a
163 # people using pure don't really have performance consideration (and a
164 # wheelbarrow of other slowness source)
164 # wheelbarrow of other slowness source)
165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
166 parsers, 'BaseIndexObject'
166 parsers, 'BaseIndexObject'
167 )
167 )
168
168
169
169
170 @attr.s(slots=True, frozen=True)
171 class _revisioninfo(object):
172 """Information about a revision that allows building its fulltext
173 node: expected hash of the revision
174 p1, p2: parent revs of the revision
175 btext: built text cache consisting of a one-element list
176 cachedelta: (baserev, uncompressed_delta) or None
177 flags: flags associated to the revision storage
178
179 One of btext[0] or cachedelta must be set.
180 """
181
182 node = attr.ib()
183 p1 = attr.ib()
184 p2 = attr.ib()
185 btext = attr.ib()
186 textlen = attr.ib()
187 cachedelta = attr.ib()
188 flags = attr.ib()
189
190
191 @interfaceutil.implementer(repository.irevisiondelta)
170 @interfaceutil.implementer(repository.irevisiondelta)
192 @attr.s(slots=True)
171 @attr.s(slots=True)
193 class revlogrevisiondelta(object):
172 class revlogrevisiondelta(object):
194 node = attr.ib()
173 node = attr.ib()
195 p1node = attr.ib()
174 p1node = attr.ib()
196 p2node = attr.ib()
175 p2node = attr.ib()
197 basenode = attr.ib()
176 basenode = attr.ib()
198 flags = attr.ib()
177 flags = attr.ib()
199 baserevisionsize = attr.ib()
178 baserevisionsize = attr.ib()
200 revision = attr.ib()
179 revision = attr.ib()
201 delta = attr.ib()
180 delta = attr.ib()
202 sidedata = attr.ib()
181 sidedata = attr.ib()
203 protocol_flags = attr.ib()
182 protocol_flags = attr.ib()
204 linknode = attr.ib(default=None)
183 linknode = attr.ib(default=None)
205
184
206
185
207 @interfaceutil.implementer(repository.iverifyproblem)
186 @interfaceutil.implementer(repository.iverifyproblem)
208 @attr.s(frozen=True)
187 @attr.s(frozen=True)
209 class revlogproblem(object):
188 class revlogproblem(object):
210 warning = attr.ib(default=None)
189 warning = attr.ib(default=None)
211 error = attr.ib(default=None)
190 error = attr.ib(default=None)
212 node = attr.ib(default=None)
191 node = attr.ib(default=None)
213
192
214
193
215 def parse_index_v1(data, inline):
194 def parse_index_v1(data, inline):
216 # call the C implementation to parse the index data
195 # call the C implementation to parse the index data
217 index, cache = parsers.parse_index2(data, inline)
196 index, cache = parsers.parse_index2(data, inline)
218 return index, cache
197 return index, cache
219
198
220
199
221 def parse_index_v2(data, inline):
200 def parse_index_v2(data, inline):
222 # call the C implementation to parse the index data
201 # call the C implementation to parse the index data
223 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
202 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
224 return index, cache
203 return index, cache
225
204
226
205
227 def parse_index_cl_v2(data, inline):
206 def parse_index_cl_v2(data, inline):
228 # call the C implementation to parse the index data
207 # call the C implementation to parse the index data
229 assert not inline
208 assert not inline
230 from .pure.parsers import parse_index_cl_v2
209 from .pure.parsers import parse_index_cl_v2
231
210
232 index, cache = parse_index_cl_v2(data)
211 index, cache = parse_index_cl_v2(data)
233 return index, cache
212 return index, cache
234
213
235
214
236 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
215 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
237
216
238 def parse_index_v1_nodemap(data, inline):
217 def parse_index_v1_nodemap(data, inline):
239 index, cache = parsers.parse_index_devel_nodemap(data, inline)
218 index, cache = parsers.parse_index_devel_nodemap(data, inline)
240 return index, cache
219 return index, cache
241
220
242
221
243 else:
222 else:
244 parse_index_v1_nodemap = None
223 parse_index_v1_nodemap = None
245
224
246
225
247 def parse_index_v1_mixed(data, inline):
226 def parse_index_v1_mixed(data, inline):
248 index, cache = parse_index_v1(data, inline)
227 index, cache = parse_index_v1(data, inline)
249 return rustrevlog.MixedIndex(index), cache
228 return rustrevlog.MixedIndex(index), cache
250
229
251
230
252 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
253 # signed integer)
232 # signed integer)
254 _maxentrysize = 0x7FFFFFFF
233 _maxentrysize = 0x7FFFFFFF
255
234
256 PARTIAL_READ_MSG = _(
235 PARTIAL_READ_MSG = _(
257 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
236 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
258 )
237 )
259
238
260 FILE_TOO_SHORT_MSG = _(
239 FILE_TOO_SHORT_MSG = _(
261 b'cannot read from revlog %s;'
240 b'cannot read from revlog %s;'
262 b' expected %d bytes from offset %d, data size is %d'
241 b' expected %d bytes from offset %d, data size is %d'
263 )
242 )
264
243
265
244
266 class revlog(object):
245 class revlog(object):
267 """
246 """
268 the underlying revision storage object
247 the underlying revision storage object
269
248
270 A revlog consists of two parts, an index and the revision data.
249 A revlog consists of two parts, an index and the revision data.
271
250
272 The index is a file with a fixed record size containing
251 The index is a file with a fixed record size containing
273 information on each revision, including its nodeid (hash), the
252 information on each revision, including its nodeid (hash), the
274 nodeids of its parents, the position and offset of its data within
253 nodeids of its parents, the position and offset of its data within
275 the data file, and the revision it's based on. Finally, each entry
254 the data file, and the revision it's based on. Finally, each entry
276 contains a linkrev entry that can serve as a pointer to external
255 contains a linkrev entry that can serve as a pointer to external
277 data.
256 data.
278
257
279 The revision data itself is a linear collection of data chunks.
258 The revision data itself is a linear collection of data chunks.
280 Each chunk represents a revision and is usually represented as a
259 Each chunk represents a revision and is usually represented as a
281 delta against the previous chunk. To bound lookup time, runs of
260 delta against the previous chunk. To bound lookup time, runs of
282 deltas are limited to about 2 times the length of the original
261 deltas are limited to about 2 times the length of the original
283 version data. This makes retrieval of a version proportional to
262 version data. This makes retrieval of a version proportional to
284 its size, or O(1) relative to the number of revisions.
263 its size, or O(1) relative to the number of revisions.
285
264
286 Both pieces of the revlog are written to in an append-only
265 Both pieces of the revlog are written to in an append-only
287 fashion, which means we never need to rewrite a file to insert or
266 fashion, which means we never need to rewrite a file to insert or
288 remove data, and can use some simple techniques to avoid the need
267 remove data, and can use some simple techniques to avoid the need
289 for locking while reading.
268 for locking while reading.
290
269
291 If checkambig, indexfile is opened with checkambig=True at
270 If checkambig, indexfile is opened with checkambig=True at
292 writing, to avoid file stat ambiguity.
271 writing, to avoid file stat ambiguity.
293
272
294 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
295 index will be mmapped rather than read if it is larger than the
274 index will be mmapped rather than read if it is larger than the
296 configured threshold.
275 configured threshold.
297
276
298 If censorable is True, the revlog can have censored revisions.
277 If censorable is True, the revlog can have censored revisions.
299
278
300 If `upperboundcomp` is not None, this is the expected maximal gain from
279 If `upperboundcomp` is not None, this is the expected maximal gain from
301 compression for the data content.
280 compression for the data content.
302
281
303 `concurrencychecker` is an optional function that receives 3 arguments: a
282 `concurrencychecker` is an optional function that receives 3 arguments: a
304 file handle, a filename, and an expected position. It should check whether
283 file handle, a filename, and an expected position. It should check whether
305 the current position in the file handle is valid, and log/warn/fail (by
284 the current position in the file handle is valid, and log/warn/fail (by
306 raising).
285 raising).
307
286
308 See mercurial/revlogutils/contants.py for details about the content of an
287 See mercurial/revlogutils/contants.py for details about the content of an
309 index entry.
288 index entry.
310 """
289 """
311
290
312 _flagserrorclass = error.RevlogError
291 _flagserrorclass = error.RevlogError
313
292
314 def __init__(
293 def __init__(
315 self,
294 self,
316 opener,
295 opener,
317 target,
296 target,
318 radix,
297 radix,
319 postfix=None, # only exist for `tmpcensored` now
298 postfix=None, # only exist for `tmpcensored` now
320 checkambig=False,
299 checkambig=False,
321 mmaplargeindex=False,
300 mmaplargeindex=False,
322 censorable=False,
301 censorable=False,
323 upperboundcomp=None,
302 upperboundcomp=None,
324 persistentnodemap=False,
303 persistentnodemap=False,
325 concurrencychecker=None,
304 concurrencychecker=None,
326 trypending=False,
305 trypending=False,
327 ):
306 ):
328 """
307 """
329 create a revlog object
308 create a revlog object
330
309
331 opener is a function that abstracts the file opening operation
310 opener is a function that abstracts the file opening operation
332 and can be used to implement COW semantics or the like.
311 and can be used to implement COW semantics or the like.
333
312
334 `target`: a (KIND, ID) tuple that identify the content stored in
313 `target`: a (KIND, ID) tuple that identify the content stored in
335 this revlog. It help the rest of the code to understand what the revlog
314 this revlog. It help the rest of the code to understand what the revlog
336 is about without having to resort to heuristic and index filename
315 is about without having to resort to heuristic and index filename
337 analysis. Note: that this must be reliably be set by normal code, but
316 analysis. Note: that this must be reliably be set by normal code, but
338 that test, debug, or performance measurement code might not set this to
317 that test, debug, or performance measurement code might not set this to
339 accurate value.
318 accurate value.
340 """
319 """
341 self.upperboundcomp = upperboundcomp
320 self.upperboundcomp = upperboundcomp
342
321
343 self.radix = radix
322 self.radix = radix
344
323
345 self._docket_file = None
324 self._docket_file = None
346 self._indexfile = None
325 self._indexfile = None
347 self._datafile = None
326 self._datafile = None
348 self._sidedatafile = None
327 self._sidedatafile = None
349 self._nodemap_file = None
328 self._nodemap_file = None
350 self.postfix = postfix
329 self.postfix = postfix
351 self._trypending = trypending
330 self._trypending = trypending
352 self.opener = opener
331 self.opener = opener
353 if persistentnodemap:
332 if persistentnodemap:
354 self._nodemap_file = nodemaputil.get_nodemap_file(self)
333 self._nodemap_file = nodemaputil.get_nodemap_file(self)
355
334
356 assert target[0] in ALL_KINDS
335 assert target[0] in ALL_KINDS
357 assert len(target) == 2
336 assert len(target) == 2
358 self.target = target
337 self.target = target
359 # When True, indexfile is opened with checkambig=True at writing, to
338 # When True, indexfile is opened with checkambig=True at writing, to
360 # avoid file stat ambiguity.
339 # avoid file stat ambiguity.
361 self._checkambig = checkambig
340 self._checkambig = checkambig
362 self._mmaplargeindex = mmaplargeindex
341 self._mmaplargeindex = mmaplargeindex
363 self._censorable = censorable
342 self._censorable = censorable
364 # 3-tuple of (node, rev, text) for a raw revision.
343 # 3-tuple of (node, rev, text) for a raw revision.
365 self._revisioncache = None
344 self._revisioncache = None
366 # Maps rev to chain base rev.
345 # Maps rev to chain base rev.
367 self._chainbasecache = util.lrucachedict(100)
346 self._chainbasecache = util.lrucachedict(100)
368 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
347 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
369 self._chunkcache = (0, b'')
348 self._chunkcache = (0, b'')
370 # How much data to read and cache into the raw revlog data cache.
349 # How much data to read and cache into the raw revlog data cache.
371 self._chunkcachesize = 65536
350 self._chunkcachesize = 65536
372 self._maxchainlen = None
351 self._maxchainlen = None
373 self._deltabothparents = True
352 self._deltabothparents = True
374 self.index = None
353 self.index = None
375 self._docket = None
354 self._docket = None
376 self._nodemap_docket = None
355 self._nodemap_docket = None
377 # Mapping of partial identifiers to full nodes.
356 # Mapping of partial identifiers to full nodes.
378 self._pcache = {}
357 self._pcache = {}
379 # Mapping of revision integer to full node.
358 # Mapping of revision integer to full node.
380 self._compengine = b'zlib'
359 self._compengine = b'zlib'
381 self._compengineopts = {}
360 self._compengineopts = {}
382 self._maxdeltachainspan = -1
361 self._maxdeltachainspan = -1
383 self._withsparseread = False
362 self._withsparseread = False
384 self._sparserevlog = False
363 self._sparserevlog = False
385 self.hassidedata = False
364 self.hassidedata = False
386 self._srdensitythreshold = 0.50
365 self._srdensitythreshold = 0.50
387 self._srmingapsize = 262144
366 self._srmingapsize = 262144
388
367
389 # Make copy of flag processors so each revlog instance can support
368 # Make copy of flag processors so each revlog instance can support
390 # custom flags.
369 # custom flags.
391 self._flagprocessors = dict(flagutil.flagprocessors)
370 self._flagprocessors = dict(flagutil.flagprocessors)
392
371
393 # 3-tuple of file handles being used for active writing.
372 # 3-tuple of file handles being used for active writing.
394 self._writinghandles = None
373 self._writinghandles = None
395 # prevent nesting of addgroup
374 # prevent nesting of addgroup
396 self._adding_group = None
375 self._adding_group = None
397
376
398 self._loadindex()
377 self._loadindex()
399
378
400 self._concurrencychecker = concurrencychecker
379 self._concurrencychecker = concurrencychecker
401
380
402 def _init_opts(self):
381 def _init_opts(self):
403 """process options (from above/config) to setup associated default revlog mode
382 """process options (from above/config) to setup associated default revlog mode
404
383
405 These values might be affected when actually reading on disk information.
384 These values might be affected when actually reading on disk information.
406
385
407 The relevant values are returned for use in _loadindex().
386 The relevant values are returned for use in _loadindex().
408
387
409 * newversionflags:
388 * newversionflags:
410 version header to use if we need to create a new revlog
389 version header to use if we need to create a new revlog
411
390
412 * mmapindexthreshold:
391 * mmapindexthreshold:
413 minimal index size for start to use mmap
392 minimal index size for start to use mmap
414
393
415 * force_nodemap:
394 * force_nodemap:
416 force the usage of a "development" version of the nodemap code
395 force the usage of a "development" version of the nodemap code
417 """
396 """
418 mmapindexthreshold = None
397 mmapindexthreshold = None
419 opts = self.opener.options
398 opts = self.opener.options
420
399
421 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
400 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
422 new_header = CHANGELOGV2
401 new_header = CHANGELOGV2
423 elif b'revlogv2' in opts:
402 elif b'revlogv2' in opts:
424 new_header = REVLOGV2
403 new_header = REVLOGV2
425 elif b'revlogv1' in opts:
404 elif b'revlogv1' in opts:
426 new_header = REVLOGV1 | FLAG_INLINE_DATA
405 new_header = REVLOGV1 | FLAG_INLINE_DATA
427 if b'generaldelta' in opts:
406 if b'generaldelta' in opts:
428 new_header |= FLAG_GENERALDELTA
407 new_header |= FLAG_GENERALDELTA
429 elif b'revlogv0' in self.opener.options:
408 elif b'revlogv0' in self.opener.options:
430 new_header = REVLOGV0
409 new_header = REVLOGV0
431 else:
410 else:
432 new_header = REVLOG_DEFAULT_VERSION
411 new_header = REVLOG_DEFAULT_VERSION
433
412
434 if b'chunkcachesize' in opts:
413 if b'chunkcachesize' in opts:
435 self._chunkcachesize = opts[b'chunkcachesize']
414 self._chunkcachesize = opts[b'chunkcachesize']
436 if b'maxchainlen' in opts:
415 if b'maxchainlen' in opts:
437 self._maxchainlen = opts[b'maxchainlen']
416 self._maxchainlen = opts[b'maxchainlen']
438 if b'deltabothparents' in opts:
417 if b'deltabothparents' in opts:
439 self._deltabothparents = opts[b'deltabothparents']
418 self._deltabothparents = opts[b'deltabothparents']
440 self._lazydelta = bool(opts.get(b'lazydelta', True))
419 self._lazydelta = bool(opts.get(b'lazydelta', True))
441 self._lazydeltabase = False
420 self._lazydeltabase = False
442 if self._lazydelta:
421 if self._lazydelta:
443 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
422 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
444 if b'compengine' in opts:
423 if b'compengine' in opts:
445 self._compengine = opts[b'compengine']
424 self._compengine = opts[b'compengine']
446 if b'zlib.level' in opts:
425 if b'zlib.level' in opts:
447 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
426 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
448 if b'zstd.level' in opts:
427 if b'zstd.level' in opts:
449 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
428 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
450 if b'maxdeltachainspan' in opts:
429 if b'maxdeltachainspan' in opts:
451 self._maxdeltachainspan = opts[b'maxdeltachainspan']
430 self._maxdeltachainspan = opts[b'maxdeltachainspan']
452 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
431 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
453 mmapindexthreshold = opts[b'mmapindexthreshold']
432 mmapindexthreshold = opts[b'mmapindexthreshold']
454 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
433 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
455 withsparseread = bool(opts.get(b'with-sparse-read', False))
434 withsparseread = bool(opts.get(b'with-sparse-read', False))
456 # sparse-revlog forces sparse-read
435 # sparse-revlog forces sparse-read
457 self._withsparseread = self._sparserevlog or withsparseread
436 self._withsparseread = self._sparserevlog or withsparseread
458 if b'sparse-read-density-threshold' in opts:
437 if b'sparse-read-density-threshold' in opts:
459 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
438 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
460 if b'sparse-read-min-gap-size' in opts:
439 if b'sparse-read-min-gap-size' in opts:
461 self._srmingapsize = opts[b'sparse-read-min-gap-size']
440 self._srmingapsize = opts[b'sparse-read-min-gap-size']
462 if opts.get(b'enableellipsis'):
441 if opts.get(b'enableellipsis'):
463 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
442 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
464
443
465 # revlog v0 doesn't have flag processors
444 # revlog v0 doesn't have flag processors
466 for flag, processor in pycompat.iteritems(
445 for flag, processor in pycompat.iteritems(
467 opts.get(b'flagprocessors', {})
446 opts.get(b'flagprocessors', {})
468 ):
447 ):
469 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
448 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
470
449
471 if self._chunkcachesize <= 0:
450 if self._chunkcachesize <= 0:
472 raise error.RevlogError(
451 raise error.RevlogError(
473 _(b'revlog chunk cache size %r is not greater than 0')
452 _(b'revlog chunk cache size %r is not greater than 0')
474 % self._chunkcachesize
453 % self._chunkcachesize
475 )
454 )
476 elif self._chunkcachesize & (self._chunkcachesize - 1):
455 elif self._chunkcachesize & (self._chunkcachesize - 1):
477 raise error.RevlogError(
456 raise error.RevlogError(
478 _(b'revlog chunk cache size %r is not a power of 2')
457 _(b'revlog chunk cache size %r is not a power of 2')
479 % self._chunkcachesize
458 % self._chunkcachesize
480 )
459 )
481 force_nodemap = opts.get(b'devel-force-nodemap', False)
460 force_nodemap = opts.get(b'devel-force-nodemap', False)
482 return new_header, mmapindexthreshold, force_nodemap
461 return new_header, mmapindexthreshold, force_nodemap
483
462
484 def _get_data(self, filepath, mmap_threshold, size=None):
463 def _get_data(self, filepath, mmap_threshold, size=None):
485 """return a file content with or without mmap
464 """return a file content with or without mmap
486
465
487 If the file is missing return the empty string"""
466 If the file is missing return the empty string"""
488 try:
467 try:
489 with self.opener(filepath) as fp:
468 with self.opener(filepath) as fp:
490 if mmap_threshold is not None:
469 if mmap_threshold is not None:
491 file_size = self.opener.fstat(fp).st_size
470 file_size = self.opener.fstat(fp).st_size
492 if file_size >= mmap_threshold:
471 if file_size >= mmap_threshold:
493 if size is not None:
472 if size is not None:
494 # avoid potentiel mmap crash
473 # avoid potentiel mmap crash
495 size = min(file_size, size)
474 size = min(file_size, size)
496 # TODO: should .close() to release resources without
475 # TODO: should .close() to release resources without
497 # relying on Python GC
476 # relying on Python GC
498 if size is None:
477 if size is None:
499 return util.buffer(util.mmapread(fp))
478 return util.buffer(util.mmapread(fp))
500 else:
479 else:
501 return util.buffer(util.mmapread(fp, size))
480 return util.buffer(util.mmapread(fp, size))
502 if size is None:
481 if size is None:
503 return fp.read()
482 return fp.read()
504 else:
483 else:
505 return fp.read(size)
484 return fp.read(size)
506 except IOError as inst:
485 except IOError as inst:
507 if inst.errno != errno.ENOENT:
486 if inst.errno != errno.ENOENT:
508 raise
487 raise
509 return b''
488 return b''
510
489
511 def _loadindex(self):
490 def _loadindex(self):
512
491
513 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
492 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
514
493
515 if self.postfix is not None:
494 if self.postfix is not None:
516 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
495 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
517 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
496 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
518 entry_point = b'%s.i.a' % self.radix
497 entry_point = b'%s.i.a' % self.radix
519 else:
498 else:
520 entry_point = b'%s.i' % self.radix
499 entry_point = b'%s.i' % self.radix
521
500
522 entry_data = b''
501 entry_data = b''
523 self._initempty = True
502 self._initempty = True
524 entry_data = self._get_data(entry_point, mmapindexthreshold)
503 entry_data = self._get_data(entry_point, mmapindexthreshold)
525 if len(entry_data) > 0:
504 if len(entry_data) > 0:
526 header = INDEX_HEADER.unpack(entry_data[:4])[0]
505 header = INDEX_HEADER.unpack(entry_data[:4])[0]
527 self._initempty = False
506 self._initempty = False
528 else:
507 else:
529 header = new_header
508 header = new_header
530
509
531 self._format_flags = header & ~0xFFFF
510 self._format_flags = header & ~0xFFFF
532 self._format_version = header & 0xFFFF
511 self._format_version = header & 0xFFFF
533
512
534 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
513 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
535 if supported_flags is None:
514 if supported_flags is None:
536 msg = _(b'unknown version (%d) in revlog %s')
515 msg = _(b'unknown version (%d) in revlog %s')
537 msg %= (self._format_version, self.display_id)
516 msg %= (self._format_version, self.display_id)
538 raise error.RevlogError(msg)
517 raise error.RevlogError(msg)
539 elif self._format_flags & ~supported_flags:
518 elif self._format_flags & ~supported_flags:
540 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
519 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
541 display_flag = self._format_flags >> 16
520 display_flag = self._format_flags >> 16
542 msg %= (display_flag, self._format_version, self.display_id)
521 msg %= (display_flag, self._format_version, self.display_id)
543 raise error.RevlogError(msg)
522 raise error.RevlogError(msg)
544
523
545 features = FEATURES_BY_VERSION[self._format_version]
524 features = FEATURES_BY_VERSION[self._format_version]
546 self._inline = features[b'inline'](self._format_flags)
525 self._inline = features[b'inline'](self._format_flags)
547 self._generaldelta = features[b'generaldelta'](self._format_flags)
526 self._generaldelta = features[b'generaldelta'](self._format_flags)
548 self.hassidedata = features[b'sidedata']
527 self.hassidedata = features[b'sidedata']
549
528
550 if not features[b'docket']:
529 if not features[b'docket']:
551 self._indexfile = entry_point
530 self._indexfile = entry_point
552 index_data = entry_data
531 index_data = entry_data
553 else:
532 else:
554 self._docket_file = entry_point
533 self._docket_file = entry_point
555 if self._initempty:
534 if self._initempty:
556 self._docket = docketutil.default_docket(self, header)
535 self._docket = docketutil.default_docket(self, header)
557 else:
536 else:
558 self._docket = docketutil.parse_docket(
537 self._docket = docketutil.parse_docket(
559 self, entry_data, use_pending=self._trypending
538 self, entry_data, use_pending=self._trypending
560 )
539 )
561 self._indexfile = self._docket.index_filepath()
540 self._indexfile = self._docket.index_filepath()
562 index_data = b''
541 index_data = b''
563 index_size = self._docket.index_end
542 index_size = self._docket.index_end
564 if index_size > 0:
543 if index_size > 0:
565 index_data = self._get_data(
544 index_data = self._get_data(
566 self._indexfile, mmapindexthreshold, size=index_size
545 self._indexfile, mmapindexthreshold, size=index_size
567 )
546 )
568 if len(index_data) < index_size:
547 if len(index_data) < index_size:
569 msg = _(b'too few index data for %s: got %d, expected %d')
548 msg = _(b'too few index data for %s: got %d, expected %d')
570 msg %= (self.display_id, len(index_data), index_size)
549 msg %= (self.display_id, len(index_data), index_size)
571 raise error.RevlogError(msg)
550 raise error.RevlogError(msg)
572
551
573 self._inline = False
552 self._inline = False
574 # generaldelta implied by version 2 revlogs.
553 # generaldelta implied by version 2 revlogs.
575 self._generaldelta = True
554 self._generaldelta = True
576 # the logic for persistent nodemap will be dealt with within the
555 # the logic for persistent nodemap will be dealt with within the
577 # main docket, so disable it for now.
556 # main docket, so disable it for now.
578 self._nodemap_file = None
557 self._nodemap_file = None
579
558
580 if self._docket is not None:
559 if self._docket is not None:
581 self._datafile = self._docket.data_filepath()
560 self._datafile = self._docket.data_filepath()
582 self._sidedatafile = self._docket.sidedata_filepath()
561 self._sidedatafile = self._docket.sidedata_filepath()
583 elif self.postfix is None:
562 elif self.postfix is None:
584 self._datafile = b'%s.d' % self.radix
563 self._datafile = b'%s.d' % self.radix
585 else:
564 else:
586 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
565 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
587
566
588 self.nodeconstants = sha1nodeconstants
567 self.nodeconstants = sha1nodeconstants
589 self.nullid = self.nodeconstants.nullid
568 self.nullid = self.nodeconstants.nullid
590
569
591 # sparse-revlog can't be on without general-delta (issue6056)
570 # sparse-revlog can't be on without general-delta (issue6056)
592 if not self._generaldelta:
571 if not self._generaldelta:
593 self._sparserevlog = False
572 self._sparserevlog = False
594
573
595 self._storedeltachains = True
574 self._storedeltachains = True
596
575
597 devel_nodemap = (
576 devel_nodemap = (
598 self._nodemap_file
577 self._nodemap_file
599 and force_nodemap
578 and force_nodemap
600 and parse_index_v1_nodemap is not None
579 and parse_index_v1_nodemap is not None
601 )
580 )
602
581
603 use_rust_index = False
582 use_rust_index = False
604 if rustrevlog is not None:
583 if rustrevlog is not None:
605 if self._nodemap_file is not None:
584 if self._nodemap_file is not None:
606 use_rust_index = True
585 use_rust_index = True
607 else:
586 else:
608 use_rust_index = self.opener.options.get(b'rust.index')
587 use_rust_index = self.opener.options.get(b'rust.index')
609
588
610 self._parse_index = parse_index_v1
589 self._parse_index = parse_index_v1
611 if self._format_version == REVLOGV0:
590 if self._format_version == REVLOGV0:
612 self._parse_index = revlogv0.parse_index_v0
591 self._parse_index = revlogv0.parse_index_v0
613 elif self._format_version == REVLOGV2:
592 elif self._format_version == REVLOGV2:
614 self._parse_index = parse_index_v2
593 self._parse_index = parse_index_v2
615 elif self._format_version == CHANGELOGV2:
594 elif self._format_version == CHANGELOGV2:
616 self._parse_index = parse_index_cl_v2
595 self._parse_index = parse_index_cl_v2
617 elif devel_nodemap:
596 elif devel_nodemap:
618 self._parse_index = parse_index_v1_nodemap
597 self._parse_index = parse_index_v1_nodemap
619 elif use_rust_index:
598 elif use_rust_index:
620 self._parse_index = parse_index_v1_mixed
599 self._parse_index = parse_index_v1_mixed
621 try:
600 try:
622 d = self._parse_index(index_data, self._inline)
601 d = self._parse_index(index_data, self._inline)
623 index, _chunkcache = d
602 index, _chunkcache = d
624 use_nodemap = (
603 use_nodemap = (
625 not self._inline
604 not self._inline
626 and self._nodemap_file is not None
605 and self._nodemap_file is not None
627 and util.safehasattr(index, 'update_nodemap_data')
606 and util.safehasattr(index, 'update_nodemap_data')
628 )
607 )
629 if use_nodemap:
608 if use_nodemap:
630 nodemap_data = nodemaputil.persisted_data(self)
609 nodemap_data = nodemaputil.persisted_data(self)
631 if nodemap_data is not None:
610 if nodemap_data is not None:
632 docket = nodemap_data[0]
611 docket = nodemap_data[0]
633 if (
612 if (
634 len(d[0]) > docket.tip_rev
613 len(d[0]) > docket.tip_rev
635 and d[0][docket.tip_rev][7] == docket.tip_node
614 and d[0][docket.tip_rev][7] == docket.tip_node
636 ):
615 ):
637 # no changelog tampering
616 # no changelog tampering
638 self._nodemap_docket = docket
617 self._nodemap_docket = docket
639 index.update_nodemap_data(*nodemap_data)
618 index.update_nodemap_data(*nodemap_data)
640 except (ValueError, IndexError):
619 except (ValueError, IndexError):
641 raise error.RevlogError(
620 raise error.RevlogError(
642 _(b"index %s is corrupted") % self.display_id
621 _(b"index %s is corrupted") % self.display_id
643 )
622 )
644 self.index, self._chunkcache = d
623 self.index, self._chunkcache = d
645 if not self._chunkcache:
624 if not self._chunkcache:
646 self._chunkclear()
625 self._chunkclear()
647 # revnum -> (chain-length, sum-delta-length)
626 # revnum -> (chain-length, sum-delta-length)
648 self._chaininfocache = util.lrucachedict(500)
627 self._chaininfocache = util.lrucachedict(500)
649 # revlog header -> revlog compressor
628 # revlog header -> revlog compressor
650 self._decompressors = {}
629 self._decompressors = {}
651
630
652 @util.propertycache
631 @util.propertycache
653 def revlog_kind(self):
632 def revlog_kind(self):
654 return self.target[0]
633 return self.target[0]
655
634
656 @util.propertycache
635 @util.propertycache
657 def display_id(self):
636 def display_id(self):
658 """The public facing "ID" of the revlog that we use in message"""
637 """The public facing "ID" of the revlog that we use in message"""
659 # Maybe we should build a user facing representation of
638 # Maybe we should build a user facing representation of
660 # revlog.target instead of using `self.radix`
639 # revlog.target instead of using `self.radix`
661 return self.radix
640 return self.radix
662
641
663 def _get_decompressor(self, t):
642 def _get_decompressor(self, t):
664 try:
643 try:
665 compressor = self._decompressors[t]
644 compressor = self._decompressors[t]
666 except KeyError:
645 except KeyError:
667 try:
646 try:
668 engine = util.compengines.forrevlogheader(t)
647 engine = util.compengines.forrevlogheader(t)
669 compressor = engine.revlogcompressor(self._compengineopts)
648 compressor = engine.revlogcompressor(self._compengineopts)
670 self._decompressors[t] = compressor
649 self._decompressors[t] = compressor
671 except KeyError:
650 except KeyError:
672 raise error.RevlogError(
651 raise error.RevlogError(
673 _(b'unknown compression type %s') % binascii.hexlify(t)
652 _(b'unknown compression type %s') % binascii.hexlify(t)
674 )
653 )
675 return compressor
654 return compressor
676
655
677 @util.propertycache
656 @util.propertycache
678 def _compressor(self):
657 def _compressor(self):
679 engine = util.compengines[self._compengine]
658 engine = util.compengines[self._compengine]
680 return engine.revlogcompressor(self._compengineopts)
659 return engine.revlogcompressor(self._compengineopts)
681
660
682 @util.propertycache
661 @util.propertycache
683 def _decompressor(self):
662 def _decompressor(self):
684 """the default decompressor"""
663 """the default decompressor"""
685 if self._docket is None:
664 if self._docket is None:
686 return None
665 return None
687 t = self._docket.default_compression_header
666 t = self._docket.default_compression_header
688 c = self._get_decompressor(t)
667 c = self._get_decompressor(t)
689 return c.decompress
668 return c.decompress
690
669
691 def _indexfp(self):
670 def _indexfp(self):
692 """file object for the revlog's index file"""
671 """file object for the revlog's index file"""
693 return self.opener(self._indexfile, mode=b"r")
672 return self.opener(self._indexfile, mode=b"r")
694
673
695 def __index_write_fp(self):
674 def __index_write_fp(self):
696 # You should not use this directly and use `_writing` instead
675 # You should not use this directly and use `_writing` instead
697 try:
676 try:
698 f = self.opener(
677 f = self.opener(
699 self._indexfile, mode=b"r+", checkambig=self._checkambig
678 self._indexfile, mode=b"r+", checkambig=self._checkambig
700 )
679 )
701 if self._docket is None:
680 if self._docket is None:
702 f.seek(0, os.SEEK_END)
681 f.seek(0, os.SEEK_END)
703 else:
682 else:
704 f.seek(self._docket.index_end, os.SEEK_SET)
683 f.seek(self._docket.index_end, os.SEEK_SET)
705 return f
684 return f
706 except IOError as inst:
685 except IOError as inst:
707 if inst.errno != errno.ENOENT:
686 if inst.errno != errno.ENOENT:
708 raise
687 raise
709 return self.opener(
688 return self.opener(
710 self._indexfile, mode=b"w+", checkambig=self._checkambig
689 self._indexfile, mode=b"w+", checkambig=self._checkambig
711 )
690 )
712
691
713 def __index_new_fp(self):
692 def __index_new_fp(self):
714 # You should not use this unless you are upgrading from inline revlog
693 # You should not use this unless you are upgrading from inline revlog
715 return self.opener(
694 return self.opener(
716 self._indexfile,
695 self._indexfile,
717 mode=b"w",
696 mode=b"w",
718 checkambig=self._checkambig,
697 checkambig=self._checkambig,
719 atomictemp=True,
698 atomictemp=True,
720 )
699 )
721
700
722 def _datafp(self, mode=b'r'):
701 def _datafp(self, mode=b'r'):
723 """file object for the revlog's data file"""
702 """file object for the revlog's data file"""
724 return self.opener(self._datafile, mode=mode)
703 return self.opener(self._datafile, mode=mode)
725
704
726 @contextlib.contextmanager
705 @contextlib.contextmanager
727 def _datareadfp(self, existingfp=None):
706 def _datareadfp(self, existingfp=None):
728 """file object suitable to read data"""
707 """file object suitable to read data"""
729 # Use explicit file handle, if given.
708 # Use explicit file handle, if given.
730 if existingfp is not None:
709 if existingfp is not None:
731 yield existingfp
710 yield existingfp
732
711
733 # Use a file handle being actively used for writes, if available.
712 # Use a file handle being actively used for writes, if available.
734 # There is some danger to doing this because reads will seek the
713 # There is some danger to doing this because reads will seek the
735 # file. However, _writeentry() performs a SEEK_END before all writes,
714 # file. However, _writeentry() performs a SEEK_END before all writes,
736 # so we should be safe.
715 # so we should be safe.
737 elif self._writinghandles:
716 elif self._writinghandles:
738 if self._inline:
717 if self._inline:
739 yield self._writinghandles[0]
718 yield self._writinghandles[0]
740 else:
719 else:
741 yield self._writinghandles[1]
720 yield self._writinghandles[1]
742
721
743 # Otherwise open a new file handle.
722 # Otherwise open a new file handle.
744 else:
723 else:
745 if self._inline:
724 if self._inline:
746 func = self._indexfp
725 func = self._indexfp
747 else:
726 else:
748 func = self._datafp
727 func = self._datafp
749 with func() as fp:
728 with func() as fp:
750 yield fp
729 yield fp
751
730
752 @contextlib.contextmanager
731 @contextlib.contextmanager
753 def _sidedatareadfp(self):
732 def _sidedatareadfp(self):
754 """file object suitable to read sidedata"""
733 """file object suitable to read sidedata"""
755 if self._writinghandles:
734 if self._writinghandles:
756 yield self._writinghandles[2]
735 yield self._writinghandles[2]
757 else:
736 else:
758 with self.opener(self._sidedatafile) as fp:
737 with self.opener(self._sidedatafile) as fp:
759 yield fp
738 yield fp
760
739
761 def tiprev(self):
740 def tiprev(self):
762 return len(self.index) - 1
741 return len(self.index) - 1
763
742
764 def tip(self):
743 def tip(self):
765 return self.node(self.tiprev())
744 return self.node(self.tiprev())
766
745
767 def __contains__(self, rev):
746 def __contains__(self, rev):
768 return 0 <= rev < len(self)
747 return 0 <= rev < len(self)
769
748
770 def __len__(self):
749 def __len__(self):
771 return len(self.index)
750 return len(self.index)
772
751
773 def __iter__(self):
752 def __iter__(self):
774 return iter(pycompat.xrange(len(self)))
753 return iter(pycompat.xrange(len(self)))
775
754
776 def revs(self, start=0, stop=None):
755 def revs(self, start=0, stop=None):
777 """iterate over all rev in this revlog (from start to stop)"""
756 """iterate over all rev in this revlog (from start to stop)"""
778 return storageutil.iterrevs(len(self), start=start, stop=stop)
757 return storageutil.iterrevs(len(self), start=start, stop=stop)
779
758
780 @property
759 @property
781 def nodemap(self):
760 def nodemap(self):
782 msg = (
761 msg = (
783 b"revlog.nodemap is deprecated, "
762 b"revlog.nodemap is deprecated, "
784 b"use revlog.index.[has_node|rev|get_rev]"
763 b"use revlog.index.[has_node|rev|get_rev]"
785 )
764 )
786 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
765 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
787 return self.index.nodemap
766 return self.index.nodemap
788
767
789 @property
768 @property
790 def _nodecache(self):
769 def _nodecache(self):
791 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
770 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
792 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
771 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
793 return self.index.nodemap
772 return self.index.nodemap
794
773
795 def hasnode(self, node):
774 def hasnode(self, node):
796 try:
775 try:
797 self.rev(node)
776 self.rev(node)
798 return True
777 return True
799 except KeyError:
778 except KeyError:
800 return False
779 return False
801
780
802 def candelta(self, baserev, rev):
781 def candelta(self, baserev, rev):
803 """whether two revisions (baserev, rev) can be delta-ed or not"""
782 """whether two revisions (baserev, rev) can be delta-ed or not"""
804 # Disable delta if either rev requires a content-changing flag
783 # Disable delta if either rev requires a content-changing flag
805 # processor (ex. LFS). This is because such flag processor can alter
784 # processor (ex. LFS). This is because such flag processor can alter
806 # the rawtext content that the delta will be based on, and two clients
785 # the rawtext content that the delta will be based on, and two clients
807 # could have a same revlog node with different flags (i.e. different
786 # could have a same revlog node with different flags (i.e. different
808 # rawtext contents) and the delta could be incompatible.
787 # rawtext contents) and the delta could be incompatible.
809 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
788 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
810 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
789 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
811 ):
790 ):
812 return False
791 return False
813 return True
792 return True
814
793
815 def update_caches(self, transaction):
794 def update_caches(self, transaction):
816 if self._nodemap_file is not None:
795 if self._nodemap_file is not None:
817 if transaction is None:
796 if transaction is None:
818 nodemaputil.update_persistent_nodemap(self)
797 nodemaputil.update_persistent_nodemap(self)
819 else:
798 else:
820 nodemaputil.setup_persistent_nodemap(transaction, self)
799 nodemaputil.setup_persistent_nodemap(transaction, self)
821
800
822 def clearcaches(self):
801 def clearcaches(self):
823 self._revisioncache = None
802 self._revisioncache = None
824 self._chainbasecache.clear()
803 self._chainbasecache.clear()
825 self._chunkcache = (0, b'')
804 self._chunkcache = (0, b'')
826 self._pcache = {}
805 self._pcache = {}
827 self._nodemap_docket = None
806 self._nodemap_docket = None
828 self.index.clearcaches()
807 self.index.clearcaches()
829 # The python code is the one responsible for validating the docket, we
808 # The python code is the one responsible for validating the docket, we
830 # end up having to refresh it here.
809 # end up having to refresh it here.
831 use_nodemap = (
810 use_nodemap = (
832 not self._inline
811 not self._inline
833 and self._nodemap_file is not None
812 and self._nodemap_file is not None
834 and util.safehasattr(self.index, 'update_nodemap_data')
813 and util.safehasattr(self.index, 'update_nodemap_data')
835 )
814 )
836 if use_nodemap:
815 if use_nodemap:
837 nodemap_data = nodemaputil.persisted_data(self)
816 nodemap_data = nodemaputil.persisted_data(self)
838 if nodemap_data is not None:
817 if nodemap_data is not None:
839 self._nodemap_docket = nodemap_data[0]
818 self._nodemap_docket = nodemap_data[0]
840 self.index.update_nodemap_data(*nodemap_data)
819 self.index.update_nodemap_data(*nodemap_data)
841
820
842 def rev(self, node):
821 def rev(self, node):
843 try:
822 try:
844 return self.index.rev(node)
823 return self.index.rev(node)
845 except TypeError:
824 except TypeError:
846 raise
825 raise
847 except error.RevlogError:
826 except error.RevlogError:
848 # parsers.c radix tree lookup failed
827 # parsers.c radix tree lookup failed
849 if (
828 if (
850 node == self.nodeconstants.wdirid
829 node == self.nodeconstants.wdirid
851 or node in self.nodeconstants.wdirfilenodeids
830 or node in self.nodeconstants.wdirfilenodeids
852 ):
831 ):
853 raise error.WdirUnsupported
832 raise error.WdirUnsupported
854 raise error.LookupError(node, self.display_id, _(b'no node'))
833 raise error.LookupError(node, self.display_id, _(b'no node'))
855
834
856 # Accessors for index entries.
835 # Accessors for index entries.
857
836
858 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
837 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
859 # are flags.
838 # are flags.
860 def start(self, rev):
839 def start(self, rev):
861 return int(self.index[rev][0] >> 16)
840 return int(self.index[rev][0] >> 16)
862
841
863 def sidedata_cut_off(self, rev):
842 def sidedata_cut_off(self, rev):
864 sd_cut_off = self.index[rev][8]
843 sd_cut_off = self.index[rev][8]
865 if sd_cut_off != 0:
844 if sd_cut_off != 0:
866 return sd_cut_off
845 return sd_cut_off
867 # This is some annoying dance, because entries without sidedata
846 # This is some annoying dance, because entries without sidedata
868 # currently use 0 as their ofsset. (instead of previous-offset +
847 # currently use 0 as their ofsset. (instead of previous-offset +
869 # previous-size)
848 # previous-size)
870 #
849 #
871 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
850 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
872 # In the meantime, we need this.
851 # In the meantime, we need this.
873 while 0 <= rev:
852 while 0 <= rev:
874 e = self.index[rev]
853 e = self.index[rev]
875 if e[9] != 0:
854 if e[9] != 0:
876 return e[8] + e[9]
855 return e[8] + e[9]
877 rev -= 1
856 rev -= 1
878 return 0
857 return 0
879
858
880 def flags(self, rev):
859 def flags(self, rev):
881 return self.index[rev][0] & 0xFFFF
860 return self.index[rev][0] & 0xFFFF
882
861
883 def length(self, rev):
862 def length(self, rev):
884 return self.index[rev][1]
863 return self.index[rev][1]
885
864
886 def sidedata_length(self, rev):
865 def sidedata_length(self, rev):
887 if not self.hassidedata:
866 if not self.hassidedata:
888 return 0
867 return 0
889 return self.index[rev][9]
868 return self.index[rev][9]
890
869
891 def rawsize(self, rev):
870 def rawsize(self, rev):
892 """return the length of the uncompressed text for a given revision"""
871 """return the length of the uncompressed text for a given revision"""
893 l = self.index[rev][2]
872 l = self.index[rev][2]
894 if l >= 0:
873 if l >= 0:
895 return l
874 return l
896
875
897 t = self.rawdata(rev)
876 t = self.rawdata(rev)
898 return len(t)
877 return len(t)
899
878
900 def size(self, rev):
879 def size(self, rev):
901 """length of non-raw text (processed by a "read" flag processor)"""
880 """length of non-raw text (processed by a "read" flag processor)"""
902 # fast path: if no "read" flag processor could change the content,
881 # fast path: if no "read" flag processor could change the content,
903 # size is rawsize. note: ELLIPSIS is known to not change the content.
882 # size is rawsize. note: ELLIPSIS is known to not change the content.
904 flags = self.flags(rev)
883 flags = self.flags(rev)
905 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
884 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
906 return self.rawsize(rev)
885 return self.rawsize(rev)
907
886
908 return len(self.revision(rev, raw=False))
887 return len(self.revision(rev, raw=False))
909
888
910 def chainbase(self, rev):
889 def chainbase(self, rev):
911 base = self._chainbasecache.get(rev)
890 base = self._chainbasecache.get(rev)
912 if base is not None:
891 if base is not None:
913 return base
892 return base
914
893
915 index = self.index
894 index = self.index
916 iterrev = rev
895 iterrev = rev
917 base = index[iterrev][3]
896 base = index[iterrev][3]
918 while base != iterrev:
897 while base != iterrev:
919 iterrev = base
898 iterrev = base
920 base = index[iterrev][3]
899 base = index[iterrev][3]
921
900
922 self._chainbasecache[rev] = base
901 self._chainbasecache[rev] = base
923 return base
902 return base
924
903
925 def linkrev(self, rev):
904 def linkrev(self, rev):
926 return self.index[rev][4]
905 return self.index[rev][4]
927
906
928 def parentrevs(self, rev):
907 def parentrevs(self, rev):
929 try:
908 try:
930 entry = self.index[rev]
909 entry = self.index[rev]
931 except IndexError:
910 except IndexError:
932 if rev == wdirrev:
911 if rev == wdirrev:
933 raise error.WdirUnsupported
912 raise error.WdirUnsupported
934 raise
913 raise
935 if entry[5] == nullrev:
914 if entry[5] == nullrev:
936 return entry[6], entry[5]
915 return entry[6], entry[5]
937 else:
916 else:
938 return entry[5], entry[6]
917 return entry[5], entry[6]
939
918
940 # fast parentrevs(rev) where rev isn't filtered
919 # fast parentrevs(rev) where rev isn't filtered
941 _uncheckedparentrevs = parentrevs
920 _uncheckedparentrevs = parentrevs
942
921
943 def node(self, rev):
922 def node(self, rev):
944 try:
923 try:
945 return self.index[rev][7]
924 return self.index[rev][7]
946 except IndexError:
925 except IndexError:
947 if rev == wdirrev:
926 if rev == wdirrev:
948 raise error.WdirUnsupported
927 raise error.WdirUnsupported
949 raise
928 raise
950
929
951 # Derived from index values.
930 # Derived from index values.
952
931
953 def end(self, rev):
932 def end(self, rev):
954 return self.start(rev) + self.length(rev)
933 return self.start(rev) + self.length(rev)
955
934
956 def parents(self, node):
935 def parents(self, node):
957 i = self.index
936 i = self.index
958 d = i[self.rev(node)]
937 d = i[self.rev(node)]
959 # inline node() to avoid function call overhead
938 # inline node() to avoid function call overhead
960 if d[5] == self.nullid:
939 if d[5] == self.nullid:
961 return i[d[6]][7], i[d[5]][7]
940 return i[d[6]][7], i[d[5]][7]
962 else:
941 else:
963 return i[d[5]][7], i[d[6]][7]
942 return i[d[5]][7], i[d[6]][7]
964
943
965 def chainlen(self, rev):
944 def chainlen(self, rev):
966 return self._chaininfo(rev)[0]
945 return self._chaininfo(rev)[0]
967
946
968 def _chaininfo(self, rev):
947 def _chaininfo(self, rev):
969 chaininfocache = self._chaininfocache
948 chaininfocache = self._chaininfocache
970 if rev in chaininfocache:
949 if rev in chaininfocache:
971 return chaininfocache[rev]
950 return chaininfocache[rev]
972 index = self.index
951 index = self.index
973 generaldelta = self._generaldelta
952 generaldelta = self._generaldelta
974 iterrev = rev
953 iterrev = rev
975 e = index[iterrev]
954 e = index[iterrev]
976 clen = 0
955 clen = 0
977 compresseddeltalen = 0
956 compresseddeltalen = 0
978 while iterrev != e[3]:
957 while iterrev != e[3]:
979 clen += 1
958 clen += 1
980 compresseddeltalen += e[1]
959 compresseddeltalen += e[1]
981 if generaldelta:
960 if generaldelta:
982 iterrev = e[3]
961 iterrev = e[3]
983 else:
962 else:
984 iterrev -= 1
963 iterrev -= 1
985 if iterrev in chaininfocache:
964 if iterrev in chaininfocache:
986 t = chaininfocache[iterrev]
965 t = chaininfocache[iterrev]
987 clen += t[0]
966 clen += t[0]
988 compresseddeltalen += t[1]
967 compresseddeltalen += t[1]
989 break
968 break
990 e = index[iterrev]
969 e = index[iterrev]
991 else:
970 else:
992 # Add text length of base since decompressing that also takes
971 # Add text length of base since decompressing that also takes
993 # work. For cache hits the length is already included.
972 # work. For cache hits the length is already included.
994 compresseddeltalen += e[1]
973 compresseddeltalen += e[1]
995 r = (clen, compresseddeltalen)
974 r = (clen, compresseddeltalen)
996 chaininfocache[rev] = r
975 chaininfocache[rev] = r
997 return r
976 return r
998
977
999 def _deltachain(self, rev, stoprev=None):
978 def _deltachain(self, rev, stoprev=None):
1000 """Obtain the delta chain for a revision.
979 """Obtain the delta chain for a revision.
1001
980
1002 ``stoprev`` specifies a revision to stop at. If not specified, we
981 ``stoprev`` specifies a revision to stop at. If not specified, we
1003 stop at the base of the chain.
982 stop at the base of the chain.
1004
983
1005 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
984 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1006 revs in ascending order and ``stopped`` is a bool indicating whether
985 revs in ascending order and ``stopped`` is a bool indicating whether
1007 ``stoprev`` was hit.
986 ``stoprev`` was hit.
1008 """
987 """
1009 # Try C implementation.
988 # Try C implementation.
1010 try:
989 try:
1011 return self.index.deltachain(rev, stoprev, self._generaldelta)
990 return self.index.deltachain(rev, stoprev, self._generaldelta)
1012 except AttributeError:
991 except AttributeError:
1013 pass
992 pass
1014
993
1015 chain = []
994 chain = []
1016
995
1017 # Alias to prevent attribute lookup in tight loop.
996 # Alias to prevent attribute lookup in tight loop.
1018 index = self.index
997 index = self.index
1019 generaldelta = self._generaldelta
998 generaldelta = self._generaldelta
1020
999
1021 iterrev = rev
1000 iterrev = rev
1022 e = index[iterrev]
1001 e = index[iterrev]
1023 while iterrev != e[3] and iterrev != stoprev:
1002 while iterrev != e[3] and iterrev != stoprev:
1024 chain.append(iterrev)
1003 chain.append(iterrev)
1025 if generaldelta:
1004 if generaldelta:
1026 iterrev = e[3]
1005 iterrev = e[3]
1027 else:
1006 else:
1028 iterrev -= 1
1007 iterrev -= 1
1029 e = index[iterrev]
1008 e = index[iterrev]
1030
1009
1031 if iterrev == stoprev:
1010 if iterrev == stoprev:
1032 stopped = True
1011 stopped = True
1033 else:
1012 else:
1034 chain.append(iterrev)
1013 chain.append(iterrev)
1035 stopped = False
1014 stopped = False
1036
1015
1037 chain.reverse()
1016 chain.reverse()
1038 return chain, stopped
1017 return chain, stopped
1039
1018
1040 def ancestors(self, revs, stoprev=0, inclusive=False):
1019 def ancestors(self, revs, stoprev=0, inclusive=False):
1041 """Generate the ancestors of 'revs' in reverse revision order.
1020 """Generate the ancestors of 'revs' in reverse revision order.
1042 Does not generate revs lower than stoprev.
1021 Does not generate revs lower than stoprev.
1043
1022
1044 See the documentation for ancestor.lazyancestors for more details."""
1023 See the documentation for ancestor.lazyancestors for more details."""
1045
1024
1046 # first, make sure start revisions aren't filtered
1025 # first, make sure start revisions aren't filtered
1047 revs = list(revs)
1026 revs = list(revs)
1048 checkrev = self.node
1027 checkrev = self.node
1049 for r in revs:
1028 for r in revs:
1050 checkrev(r)
1029 checkrev(r)
1051 # and we're sure ancestors aren't filtered as well
1030 # and we're sure ancestors aren't filtered as well
1052
1031
1053 if rustancestor is not None and self.index.rust_ext_compat:
1032 if rustancestor is not None and self.index.rust_ext_compat:
1054 lazyancestors = rustancestor.LazyAncestors
1033 lazyancestors = rustancestor.LazyAncestors
1055 arg = self.index
1034 arg = self.index
1056 else:
1035 else:
1057 lazyancestors = ancestor.lazyancestors
1036 lazyancestors = ancestor.lazyancestors
1058 arg = self._uncheckedparentrevs
1037 arg = self._uncheckedparentrevs
1059 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1038 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1060
1039
1061 def descendants(self, revs):
1040 def descendants(self, revs):
1062 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1041 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1063
1042
1064 def findcommonmissing(self, common=None, heads=None):
1043 def findcommonmissing(self, common=None, heads=None):
1065 """Return a tuple of the ancestors of common and the ancestors of heads
1044 """Return a tuple of the ancestors of common and the ancestors of heads
1066 that are not ancestors of common. In revset terminology, we return the
1045 that are not ancestors of common. In revset terminology, we return the
1067 tuple:
1046 tuple:
1068
1047
1069 ::common, (::heads) - (::common)
1048 ::common, (::heads) - (::common)
1070
1049
1071 The list is sorted by revision number, meaning it is
1050 The list is sorted by revision number, meaning it is
1072 topologically sorted.
1051 topologically sorted.
1073
1052
1074 'heads' and 'common' are both lists of node IDs. If heads is
1053 'heads' and 'common' are both lists of node IDs. If heads is
1075 not supplied, uses all of the revlog's heads. If common is not
1054 not supplied, uses all of the revlog's heads. If common is not
1076 supplied, uses nullid."""
1055 supplied, uses nullid."""
1077 if common is None:
1056 if common is None:
1078 common = [self.nullid]
1057 common = [self.nullid]
1079 if heads is None:
1058 if heads is None:
1080 heads = self.heads()
1059 heads = self.heads()
1081
1060
1082 common = [self.rev(n) for n in common]
1061 common = [self.rev(n) for n in common]
1083 heads = [self.rev(n) for n in heads]
1062 heads = [self.rev(n) for n in heads]
1084
1063
1085 # we want the ancestors, but inclusive
1064 # we want the ancestors, but inclusive
1086 class lazyset(object):
1065 class lazyset(object):
1087 def __init__(self, lazyvalues):
1066 def __init__(self, lazyvalues):
1088 self.addedvalues = set()
1067 self.addedvalues = set()
1089 self.lazyvalues = lazyvalues
1068 self.lazyvalues = lazyvalues
1090
1069
1091 def __contains__(self, value):
1070 def __contains__(self, value):
1092 return value in self.addedvalues or value in self.lazyvalues
1071 return value in self.addedvalues or value in self.lazyvalues
1093
1072
1094 def __iter__(self):
1073 def __iter__(self):
1095 added = self.addedvalues
1074 added = self.addedvalues
1096 for r in added:
1075 for r in added:
1097 yield r
1076 yield r
1098 for r in self.lazyvalues:
1077 for r in self.lazyvalues:
1099 if not r in added:
1078 if not r in added:
1100 yield r
1079 yield r
1101
1080
1102 def add(self, value):
1081 def add(self, value):
1103 self.addedvalues.add(value)
1082 self.addedvalues.add(value)
1104
1083
1105 def update(self, values):
1084 def update(self, values):
1106 self.addedvalues.update(values)
1085 self.addedvalues.update(values)
1107
1086
1108 has = lazyset(self.ancestors(common))
1087 has = lazyset(self.ancestors(common))
1109 has.add(nullrev)
1088 has.add(nullrev)
1110 has.update(common)
1089 has.update(common)
1111
1090
1112 # take all ancestors from heads that aren't in has
1091 # take all ancestors from heads that aren't in has
1113 missing = set()
1092 missing = set()
1114 visit = collections.deque(r for r in heads if r not in has)
1093 visit = collections.deque(r for r in heads if r not in has)
1115 while visit:
1094 while visit:
1116 r = visit.popleft()
1095 r = visit.popleft()
1117 if r in missing:
1096 if r in missing:
1118 continue
1097 continue
1119 else:
1098 else:
1120 missing.add(r)
1099 missing.add(r)
1121 for p in self.parentrevs(r):
1100 for p in self.parentrevs(r):
1122 if p not in has:
1101 if p not in has:
1123 visit.append(p)
1102 visit.append(p)
1124 missing = list(missing)
1103 missing = list(missing)
1125 missing.sort()
1104 missing.sort()
1126 return has, [self.node(miss) for miss in missing]
1105 return has, [self.node(miss) for miss in missing]
1127
1106
1128 def incrementalmissingrevs(self, common=None):
1107 def incrementalmissingrevs(self, common=None):
1129 """Return an object that can be used to incrementally compute the
1108 """Return an object that can be used to incrementally compute the
1130 revision numbers of the ancestors of arbitrary sets that are not
1109 revision numbers of the ancestors of arbitrary sets that are not
1131 ancestors of common. This is an ancestor.incrementalmissingancestors
1110 ancestors of common. This is an ancestor.incrementalmissingancestors
1132 object.
1111 object.
1133
1112
1134 'common' is a list of revision numbers. If common is not supplied, uses
1113 'common' is a list of revision numbers. If common is not supplied, uses
1135 nullrev.
1114 nullrev.
1136 """
1115 """
1137 if common is None:
1116 if common is None:
1138 common = [nullrev]
1117 common = [nullrev]
1139
1118
1140 if rustancestor is not None and self.index.rust_ext_compat:
1119 if rustancestor is not None and self.index.rust_ext_compat:
1141 return rustancestor.MissingAncestors(self.index, common)
1120 return rustancestor.MissingAncestors(self.index, common)
1142 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1121 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1143
1122
1144 def findmissingrevs(self, common=None, heads=None):
1123 def findmissingrevs(self, common=None, heads=None):
1145 """Return the revision numbers of the ancestors of heads that
1124 """Return the revision numbers of the ancestors of heads that
1146 are not ancestors of common.
1125 are not ancestors of common.
1147
1126
1148 More specifically, return a list of revision numbers corresponding to
1127 More specifically, return a list of revision numbers corresponding to
1149 nodes N such that every N satisfies the following constraints:
1128 nodes N such that every N satisfies the following constraints:
1150
1129
1151 1. N is an ancestor of some node in 'heads'
1130 1. N is an ancestor of some node in 'heads'
1152 2. N is not an ancestor of any node in 'common'
1131 2. N is not an ancestor of any node in 'common'
1153
1132
1154 The list is sorted by revision number, meaning it is
1133 The list is sorted by revision number, meaning it is
1155 topologically sorted.
1134 topologically sorted.
1156
1135
1157 'heads' and 'common' are both lists of revision numbers. If heads is
1136 'heads' and 'common' are both lists of revision numbers. If heads is
1158 not supplied, uses all of the revlog's heads. If common is not
1137 not supplied, uses all of the revlog's heads. If common is not
1159 supplied, uses nullid."""
1138 supplied, uses nullid."""
1160 if common is None:
1139 if common is None:
1161 common = [nullrev]
1140 common = [nullrev]
1162 if heads is None:
1141 if heads is None:
1163 heads = self.headrevs()
1142 heads = self.headrevs()
1164
1143
1165 inc = self.incrementalmissingrevs(common=common)
1144 inc = self.incrementalmissingrevs(common=common)
1166 return inc.missingancestors(heads)
1145 return inc.missingancestors(heads)
1167
1146
1168 def findmissing(self, common=None, heads=None):
1147 def findmissing(self, common=None, heads=None):
1169 """Return the ancestors of heads that are not ancestors of common.
1148 """Return the ancestors of heads that are not ancestors of common.
1170
1149
1171 More specifically, return a list of nodes N such that every N
1150 More specifically, return a list of nodes N such that every N
1172 satisfies the following constraints:
1151 satisfies the following constraints:
1173
1152
1174 1. N is an ancestor of some node in 'heads'
1153 1. N is an ancestor of some node in 'heads'
1175 2. N is not an ancestor of any node in 'common'
1154 2. N is not an ancestor of any node in 'common'
1176
1155
1177 The list is sorted by revision number, meaning it is
1156 The list is sorted by revision number, meaning it is
1178 topologically sorted.
1157 topologically sorted.
1179
1158
1180 'heads' and 'common' are both lists of node IDs. If heads is
1159 'heads' and 'common' are both lists of node IDs. If heads is
1181 not supplied, uses all of the revlog's heads. If common is not
1160 not supplied, uses all of the revlog's heads. If common is not
1182 supplied, uses nullid."""
1161 supplied, uses nullid."""
1183 if common is None:
1162 if common is None:
1184 common = [self.nullid]
1163 common = [self.nullid]
1185 if heads is None:
1164 if heads is None:
1186 heads = self.heads()
1165 heads = self.heads()
1187
1166
1188 common = [self.rev(n) for n in common]
1167 common = [self.rev(n) for n in common]
1189 heads = [self.rev(n) for n in heads]
1168 heads = [self.rev(n) for n in heads]
1190
1169
1191 inc = self.incrementalmissingrevs(common=common)
1170 inc = self.incrementalmissingrevs(common=common)
1192 return [self.node(r) for r in inc.missingancestors(heads)]
1171 return [self.node(r) for r in inc.missingancestors(heads)]
1193
1172
1194 def nodesbetween(self, roots=None, heads=None):
1173 def nodesbetween(self, roots=None, heads=None):
1195 """Return a topological path from 'roots' to 'heads'.
1174 """Return a topological path from 'roots' to 'heads'.
1196
1175
1197 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1176 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1198 topologically sorted list of all nodes N that satisfy both of
1177 topologically sorted list of all nodes N that satisfy both of
1199 these constraints:
1178 these constraints:
1200
1179
1201 1. N is a descendant of some node in 'roots'
1180 1. N is a descendant of some node in 'roots'
1202 2. N is an ancestor of some node in 'heads'
1181 2. N is an ancestor of some node in 'heads'
1203
1182
1204 Every node is considered to be both a descendant and an ancestor
1183 Every node is considered to be both a descendant and an ancestor
1205 of itself, so every reachable node in 'roots' and 'heads' will be
1184 of itself, so every reachable node in 'roots' and 'heads' will be
1206 included in 'nodes'.
1185 included in 'nodes'.
1207
1186
1208 'outroots' is the list of reachable nodes in 'roots', i.e., the
1187 'outroots' is the list of reachable nodes in 'roots', i.e., the
1209 subset of 'roots' that is returned in 'nodes'. Likewise,
1188 subset of 'roots' that is returned in 'nodes'. Likewise,
1210 'outheads' is the subset of 'heads' that is also in 'nodes'.
1189 'outheads' is the subset of 'heads' that is also in 'nodes'.
1211
1190
1212 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1191 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1213 unspecified, uses nullid as the only root. If 'heads' is
1192 unspecified, uses nullid as the only root. If 'heads' is
1214 unspecified, uses list of all of the revlog's heads."""
1193 unspecified, uses list of all of the revlog's heads."""
1215 nonodes = ([], [], [])
1194 nonodes = ([], [], [])
1216 if roots is not None:
1195 if roots is not None:
1217 roots = list(roots)
1196 roots = list(roots)
1218 if not roots:
1197 if not roots:
1219 return nonodes
1198 return nonodes
1220 lowestrev = min([self.rev(n) for n in roots])
1199 lowestrev = min([self.rev(n) for n in roots])
1221 else:
1200 else:
1222 roots = [self.nullid] # Everybody's a descendant of nullid
1201 roots = [self.nullid] # Everybody's a descendant of nullid
1223 lowestrev = nullrev
1202 lowestrev = nullrev
1224 if (lowestrev == nullrev) and (heads is None):
1203 if (lowestrev == nullrev) and (heads is None):
1225 # We want _all_ the nodes!
1204 # We want _all_ the nodes!
1226 return (
1205 return (
1227 [self.node(r) for r in self],
1206 [self.node(r) for r in self],
1228 [self.nullid],
1207 [self.nullid],
1229 list(self.heads()),
1208 list(self.heads()),
1230 )
1209 )
1231 if heads is None:
1210 if heads is None:
1232 # All nodes are ancestors, so the latest ancestor is the last
1211 # All nodes are ancestors, so the latest ancestor is the last
1233 # node.
1212 # node.
1234 highestrev = len(self) - 1
1213 highestrev = len(self) - 1
1235 # Set ancestors to None to signal that every node is an ancestor.
1214 # Set ancestors to None to signal that every node is an ancestor.
1236 ancestors = None
1215 ancestors = None
1237 # Set heads to an empty dictionary for later discovery of heads
1216 # Set heads to an empty dictionary for later discovery of heads
1238 heads = {}
1217 heads = {}
1239 else:
1218 else:
1240 heads = list(heads)
1219 heads = list(heads)
1241 if not heads:
1220 if not heads:
1242 return nonodes
1221 return nonodes
1243 ancestors = set()
1222 ancestors = set()
1244 # Turn heads into a dictionary so we can remove 'fake' heads.
1223 # Turn heads into a dictionary so we can remove 'fake' heads.
1245 # Also, later we will be using it to filter out the heads we can't
1224 # Also, later we will be using it to filter out the heads we can't
1246 # find from roots.
1225 # find from roots.
1247 heads = dict.fromkeys(heads, False)
1226 heads = dict.fromkeys(heads, False)
1248 # Start at the top and keep marking parents until we're done.
1227 # Start at the top and keep marking parents until we're done.
1249 nodestotag = set(heads)
1228 nodestotag = set(heads)
1250 # Remember where the top was so we can use it as a limit later.
1229 # Remember where the top was so we can use it as a limit later.
1251 highestrev = max([self.rev(n) for n in nodestotag])
1230 highestrev = max([self.rev(n) for n in nodestotag])
1252 while nodestotag:
1231 while nodestotag:
1253 # grab a node to tag
1232 # grab a node to tag
1254 n = nodestotag.pop()
1233 n = nodestotag.pop()
1255 # Never tag nullid
1234 # Never tag nullid
1256 if n == self.nullid:
1235 if n == self.nullid:
1257 continue
1236 continue
1258 # A node's revision number represents its place in a
1237 # A node's revision number represents its place in a
1259 # topologically sorted list of nodes.
1238 # topologically sorted list of nodes.
1260 r = self.rev(n)
1239 r = self.rev(n)
1261 if r >= lowestrev:
1240 if r >= lowestrev:
1262 if n not in ancestors:
1241 if n not in ancestors:
1263 # If we are possibly a descendant of one of the roots
1242 # If we are possibly a descendant of one of the roots
1264 # and we haven't already been marked as an ancestor
1243 # and we haven't already been marked as an ancestor
1265 ancestors.add(n) # Mark as ancestor
1244 ancestors.add(n) # Mark as ancestor
1266 # Add non-nullid parents to list of nodes to tag.
1245 # Add non-nullid parents to list of nodes to tag.
1267 nodestotag.update(
1246 nodestotag.update(
1268 [p for p in self.parents(n) if p != self.nullid]
1247 [p for p in self.parents(n) if p != self.nullid]
1269 )
1248 )
1270 elif n in heads: # We've seen it before, is it a fake head?
1249 elif n in heads: # We've seen it before, is it a fake head?
1271 # So it is, real heads should not be the ancestors of
1250 # So it is, real heads should not be the ancestors of
1272 # any other heads.
1251 # any other heads.
1273 heads.pop(n)
1252 heads.pop(n)
1274 if not ancestors:
1253 if not ancestors:
1275 return nonodes
1254 return nonodes
1276 # Now that we have our set of ancestors, we want to remove any
1255 # Now that we have our set of ancestors, we want to remove any
1277 # roots that are not ancestors.
1256 # roots that are not ancestors.
1278
1257
1279 # If one of the roots was nullid, everything is included anyway.
1258 # If one of the roots was nullid, everything is included anyway.
1280 if lowestrev > nullrev:
1259 if lowestrev > nullrev:
1281 # But, since we weren't, let's recompute the lowest rev to not
1260 # But, since we weren't, let's recompute the lowest rev to not
1282 # include roots that aren't ancestors.
1261 # include roots that aren't ancestors.
1283
1262
1284 # Filter out roots that aren't ancestors of heads
1263 # Filter out roots that aren't ancestors of heads
1285 roots = [root for root in roots if root in ancestors]
1264 roots = [root for root in roots if root in ancestors]
1286 # Recompute the lowest revision
1265 # Recompute the lowest revision
1287 if roots:
1266 if roots:
1288 lowestrev = min([self.rev(root) for root in roots])
1267 lowestrev = min([self.rev(root) for root in roots])
1289 else:
1268 else:
1290 # No more roots? Return empty list
1269 # No more roots? Return empty list
1291 return nonodes
1270 return nonodes
1292 else:
1271 else:
1293 # We are descending from nullid, and don't need to care about
1272 # We are descending from nullid, and don't need to care about
1294 # any other roots.
1273 # any other roots.
1295 lowestrev = nullrev
1274 lowestrev = nullrev
1296 roots = [self.nullid]
1275 roots = [self.nullid]
1297 # Transform our roots list into a set.
1276 # Transform our roots list into a set.
1298 descendants = set(roots)
1277 descendants = set(roots)
1299 # Also, keep the original roots so we can filter out roots that aren't
1278 # Also, keep the original roots so we can filter out roots that aren't
1300 # 'real' roots (i.e. are descended from other roots).
1279 # 'real' roots (i.e. are descended from other roots).
1301 roots = descendants.copy()
1280 roots = descendants.copy()
1302 # Our topologically sorted list of output nodes.
1281 # Our topologically sorted list of output nodes.
1303 orderedout = []
1282 orderedout = []
1304 # Don't start at nullid since we don't want nullid in our output list,
1283 # Don't start at nullid since we don't want nullid in our output list,
1305 # and if nullid shows up in descendants, empty parents will look like
1284 # and if nullid shows up in descendants, empty parents will look like
1306 # they're descendants.
1285 # they're descendants.
1307 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1286 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1308 n = self.node(r)
1287 n = self.node(r)
1309 isdescendant = False
1288 isdescendant = False
1310 if lowestrev == nullrev: # Everybody is a descendant of nullid
1289 if lowestrev == nullrev: # Everybody is a descendant of nullid
1311 isdescendant = True
1290 isdescendant = True
1312 elif n in descendants:
1291 elif n in descendants:
1313 # n is already a descendant
1292 # n is already a descendant
1314 isdescendant = True
1293 isdescendant = True
1315 # This check only needs to be done here because all the roots
1294 # This check only needs to be done here because all the roots
1316 # will start being marked is descendants before the loop.
1295 # will start being marked is descendants before the loop.
1317 if n in roots:
1296 if n in roots:
1318 # If n was a root, check if it's a 'real' root.
1297 # If n was a root, check if it's a 'real' root.
1319 p = tuple(self.parents(n))
1298 p = tuple(self.parents(n))
1320 # If any of its parents are descendants, it's not a root.
1299 # If any of its parents are descendants, it's not a root.
1321 if (p[0] in descendants) or (p[1] in descendants):
1300 if (p[0] in descendants) or (p[1] in descendants):
1322 roots.remove(n)
1301 roots.remove(n)
1323 else:
1302 else:
1324 p = tuple(self.parents(n))
1303 p = tuple(self.parents(n))
1325 # A node is a descendant if either of its parents are
1304 # A node is a descendant if either of its parents are
1326 # descendants. (We seeded the dependents list with the roots
1305 # descendants. (We seeded the dependents list with the roots
1327 # up there, remember?)
1306 # up there, remember?)
1328 if (p[0] in descendants) or (p[1] in descendants):
1307 if (p[0] in descendants) or (p[1] in descendants):
1329 descendants.add(n)
1308 descendants.add(n)
1330 isdescendant = True
1309 isdescendant = True
1331 if isdescendant and ((ancestors is None) or (n in ancestors)):
1310 if isdescendant and ((ancestors is None) or (n in ancestors)):
1332 # Only include nodes that are both descendants and ancestors.
1311 # Only include nodes that are both descendants and ancestors.
1333 orderedout.append(n)
1312 orderedout.append(n)
1334 if (ancestors is not None) and (n in heads):
1313 if (ancestors is not None) and (n in heads):
1335 # We're trying to figure out which heads are reachable
1314 # We're trying to figure out which heads are reachable
1336 # from roots.
1315 # from roots.
1337 # Mark this head as having been reached
1316 # Mark this head as having been reached
1338 heads[n] = True
1317 heads[n] = True
1339 elif ancestors is None:
1318 elif ancestors is None:
1340 # Otherwise, we're trying to discover the heads.
1319 # Otherwise, we're trying to discover the heads.
1341 # Assume this is a head because if it isn't, the next step
1320 # Assume this is a head because if it isn't, the next step
1342 # will eventually remove it.
1321 # will eventually remove it.
1343 heads[n] = True
1322 heads[n] = True
1344 # But, obviously its parents aren't.
1323 # But, obviously its parents aren't.
1345 for p in self.parents(n):
1324 for p in self.parents(n):
1346 heads.pop(p, None)
1325 heads.pop(p, None)
1347 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1326 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1348 roots = list(roots)
1327 roots = list(roots)
1349 assert orderedout
1328 assert orderedout
1350 assert roots
1329 assert roots
1351 assert heads
1330 assert heads
1352 return (orderedout, roots, heads)
1331 return (orderedout, roots, heads)
1353
1332
1354 def headrevs(self, revs=None):
1333 def headrevs(self, revs=None):
1355 if revs is None:
1334 if revs is None:
1356 try:
1335 try:
1357 return self.index.headrevs()
1336 return self.index.headrevs()
1358 except AttributeError:
1337 except AttributeError:
1359 return self._headrevs()
1338 return self._headrevs()
1360 if rustdagop is not None and self.index.rust_ext_compat:
1339 if rustdagop is not None and self.index.rust_ext_compat:
1361 return rustdagop.headrevs(self.index, revs)
1340 return rustdagop.headrevs(self.index, revs)
1362 return dagop.headrevs(revs, self._uncheckedparentrevs)
1341 return dagop.headrevs(revs, self._uncheckedparentrevs)
1363
1342
1364 def computephases(self, roots):
1343 def computephases(self, roots):
1365 return self.index.computephasesmapsets(roots)
1344 return self.index.computephasesmapsets(roots)
1366
1345
1367 def _headrevs(self):
1346 def _headrevs(self):
1368 count = len(self)
1347 count = len(self)
1369 if not count:
1348 if not count:
1370 return [nullrev]
1349 return [nullrev]
1371 # we won't iter over filtered rev so nobody is a head at start
1350 # we won't iter over filtered rev so nobody is a head at start
1372 ishead = [0] * (count + 1)
1351 ishead = [0] * (count + 1)
1373 index = self.index
1352 index = self.index
1374 for r in self:
1353 for r in self:
1375 ishead[r] = 1 # I may be an head
1354 ishead[r] = 1 # I may be an head
1376 e = index[r]
1355 e = index[r]
1377 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1356 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1378 return [r for r, val in enumerate(ishead) if val]
1357 return [r for r, val in enumerate(ishead) if val]
1379
1358
1380 def heads(self, start=None, stop=None):
1359 def heads(self, start=None, stop=None):
1381 """return the list of all nodes that have no children
1360 """return the list of all nodes that have no children
1382
1361
1383 if start is specified, only heads that are descendants of
1362 if start is specified, only heads that are descendants of
1384 start will be returned
1363 start will be returned
1385 if stop is specified, it will consider all the revs from stop
1364 if stop is specified, it will consider all the revs from stop
1386 as if they had no children
1365 as if they had no children
1387 """
1366 """
1388 if start is None and stop is None:
1367 if start is None and stop is None:
1389 if not len(self):
1368 if not len(self):
1390 return [self.nullid]
1369 return [self.nullid]
1391 return [self.node(r) for r in self.headrevs()]
1370 return [self.node(r) for r in self.headrevs()]
1392
1371
1393 if start is None:
1372 if start is None:
1394 start = nullrev
1373 start = nullrev
1395 else:
1374 else:
1396 start = self.rev(start)
1375 start = self.rev(start)
1397
1376
1398 stoprevs = {self.rev(n) for n in stop or []}
1377 stoprevs = {self.rev(n) for n in stop or []}
1399
1378
1400 revs = dagop.headrevssubset(
1379 revs = dagop.headrevssubset(
1401 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1380 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1402 )
1381 )
1403
1382
1404 return [self.node(rev) for rev in revs]
1383 return [self.node(rev) for rev in revs]
1405
1384
1406 def children(self, node):
1385 def children(self, node):
1407 """find the children of a given node"""
1386 """find the children of a given node"""
1408 c = []
1387 c = []
1409 p = self.rev(node)
1388 p = self.rev(node)
1410 for r in self.revs(start=p + 1):
1389 for r in self.revs(start=p + 1):
1411 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1390 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1412 if prevs:
1391 if prevs:
1413 for pr in prevs:
1392 for pr in prevs:
1414 if pr == p:
1393 if pr == p:
1415 c.append(self.node(r))
1394 c.append(self.node(r))
1416 elif p == nullrev:
1395 elif p == nullrev:
1417 c.append(self.node(r))
1396 c.append(self.node(r))
1418 return c
1397 return c
1419
1398
1420 def commonancestorsheads(self, a, b):
1399 def commonancestorsheads(self, a, b):
1421 """calculate all the heads of the common ancestors of nodes a and b"""
1400 """calculate all the heads of the common ancestors of nodes a and b"""
1422 a, b = self.rev(a), self.rev(b)
1401 a, b = self.rev(a), self.rev(b)
1423 ancs = self._commonancestorsheads(a, b)
1402 ancs = self._commonancestorsheads(a, b)
1424 return pycompat.maplist(self.node, ancs)
1403 return pycompat.maplist(self.node, ancs)
1425
1404
1426 def _commonancestorsheads(self, *revs):
1405 def _commonancestorsheads(self, *revs):
1427 """calculate all the heads of the common ancestors of revs"""
1406 """calculate all the heads of the common ancestors of revs"""
1428 try:
1407 try:
1429 ancs = self.index.commonancestorsheads(*revs)
1408 ancs = self.index.commonancestorsheads(*revs)
1430 except (AttributeError, OverflowError): # C implementation failed
1409 except (AttributeError, OverflowError): # C implementation failed
1431 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1410 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1432 return ancs
1411 return ancs
1433
1412
1434 def isancestor(self, a, b):
1413 def isancestor(self, a, b):
1435 """return True if node a is an ancestor of node b
1414 """return True if node a is an ancestor of node b
1436
1415
1437 A revision is considered an ancestor of itself."""
1416 A revision is considered an ancestor of itself."""
1438 a, b = self.rev(a), self.rev(b)
1417 a, b = self.rev(a), self.rev(b)
1439 return self.isancestorrev(a, b)
1418 return self.isancestorrev(a, b)
1440
1419
1441 def isancestorrev(self, a, b):
1420 def isancestorrev(self, a, b):
1442 """return True if revision a is an ancestor of revision b
1421 """return True if revision a is an ancestor of revision b
1443
1422
1444 A revision is considered an ancestor of itself.
1423 A revision is considered an ancestor of itself.
1445
1424
1446 The implementation of this is trivial but the use of
1425 The implementation of this is trivial but the use of
1447 reachableroots is not."""
1426 reachableroots is not."""
1448 if a == nullrev:
1427 if a == nullrev:
1449 return True
1428 return True
1450 elif a == b:
1429 elif a == b:
1451 return True
1430 return True
1452 elif a > b:
1431 elif a > b:
1453 return False
1432 return False
1454 return bool(self.reachableroots(a, [b], [a], includepath=False))
1433 return bool(self.reachableroots(a, [b], [a], includepath=False))
1455
1434
1456 def reachableroots(self, minroot, heads, roots, includepath=False):
1435 def reachableroots(self, minroot, heads, roots, includepath=False):
1457 """return (heads(::(<roots> and <roots>::<heads>)))
1436 """return (heads(::(<roots> and <roots>::<heads>)))
1458
1437
1459 If includepath is True, return (<roots>::<heads>)."""
1438 If includepath is True, return (<roots>::<heads>)."""
1460 try:
1439 try:
1461 return self.index.reachableroots2(
1440 return self.index.reachableroots2(
1462 minroot, heads, roots, includepath
1441 minroot, heads, roots, includepath
1463 )
1442 )
1464 except AttributeError:
1443 except AttributeError:
1465 return dagop._reachablerootspure(
1444 return dagop._reachablerootspure(
1466 self.parentrevs, minroot, roots, heads, includepath
1445 self.parentrevs, minroot, roots, heads, includepath
1467 )
1446 )
1468
1447
1469 def ancestor(self, a, b):
1448 def ancestor(self, a, b):
1470 """calculate the "best" common ancestor of nodes a and b"""
1449 """calculate the "best" common ancestor of nodes a and b"""
1471
1450
1472 a, b = self.rev(a), self.rev(b)
1451 a, b = self.rev(a), self.rev(b)
1473 try:
1452 try:
1474 ancs = self.index.ancestors(a, b)
1453 ancs = self.index.ancestors(a, b)
1475 except (AttributeError, OverflowError):
1454 except (AttributeError, OverflowError):
1476 ancs = ancestor.ancestors(self.parentrevs, a, b)
1455 ancs = ancestor.ancestors(self.parentrevs, a, b)
1477 if ancs:
1456 if ancs:
1478 # choose a consistent winner when there's a tie
1457 # choose a consistent winner when there's a tie
1479 return min(map(self.node, ancs))
1458 return min(map(self.node, ancs))
1480 return self.nullid
1459 return self.nullid
1481
1460
1482 def _match(self, id):
1461 def _match(self, id):
1483 if isinstance(id, int):
1462 if isinstance(id, int):
1484 # rev
1463 # rev
1485 return self.node(id)
1464 return self.node(id)
1486 if len(id) == self.nodeconstants.nodelen:
1465 if len(id) == self.nodeconstants.nodelen:
1487 # possibly a binary node
1466 # possibly a binary node
1488 # odds of a binary node being all hex in ASCII are 1 in 10**25
1467 # odds of a binary node being all hex in ASCII are 1 in 10**25
1489 try:
1468 try:
1490 node = id
1469 node = id
1491 self.rev(node) # quick search the index
1470 self.rev(node) # quick search the index
1492 return node
1471 return node
1493 except error.LookupError:
1472 except error.LookupError:
1494 pass # may be partial hex id
1473 pass # may be partial hex id
1495 try:
1474 try:
1496 # str(rev)
1475 # str(rev)
1497 rev = int(id)
1476 rev = int(id)
1498 if b"%d" % rev != id:
1477 if b"%d" % rev != id:
1499 raise ValueError
1478 raise ValueError
1500 if rev < 0:
1479 if rev < 0:
1501 rev = len(self) + rev
1480 rev = len(self) + rev
1502 if rev < 0 or rev >= len(self):
1481 if rev < 0 or rev >= len(self):
1503 raise ValueError
1482 raise ValueError
1504 return self.node(rev)
1483 return self.node(rev)
1505 except (ValueError, OverflowError):
1484 except (ValueError, OverflowError):
1506 pass
1485 pass
1507 if len(id) == 2 * self.nodeconstants.nodelen:
1486 if len(id) == 2 * self.nodeconstants.nodelen:
1508 try:
1487 try:
1509 # a full hex nodeid?
1488 # a full hex nodeid?
1510 node = bin(id)
1489 node = bin(id)
1511 self.rev(node)
1490 self.rev(node)
1512 return node
1491 return node
1513 except (TypeError, error.LookupError):
1492 except (TypeError, error.LookupError):
1514 pass
1493 pass
1515
1494
1516 def _partialmatch(self, id):
1495 def _partialmatch(self, id):
1517 # we don't care wdirfilenodeids as they should be always full hash
1496 # we don't care wdirfilenodeids as they should be always full hash
1518 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1497 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1519 ambiguous = False
1498 ambiguous = False
1520 try:
1499 try:
1521 partial = self.index.partialmatch(id)
1500 partial = self.index.partialmatch(id)
1522 if partial and self.hasnode(partial):
1501 if partial and self.hasnode(partial):
1523 if maybewdir:
1502 if maybewdir:
1524 # single 'ff...' match in radix tree, ambiguous with wdir
1503 # single 'ff...' match in radix tree, ambiguous with wdir
1525 ambiguous = True
1504 ambiguous = True
1526 else:
1505 else:
1527 return partial
1506 return partial
1528 elif maybewdir:
1507 elif maybewdir:
1529 # no 'ff...' match in radix tree, wdir identified
1508 # no 'ff...' match in radix tree, wdir identified
1530 raise error.WdirUnsupported
1509 raise error.WdirUnsupported
1531 else:
1510 else:
1532 return None
1511 return None
1533 except error.RevlogError:
1512 except error.RevlogError:
1534 # parsers.c radix tree lookup gave multiple matches
1513 # parsers.c radix tree lookup gave multiple matches
1535 # fast path: for unfiltered changelog, radix tree is accurate
1514 # fast path: for unfiltered changelog, radix tree is accurate
1536 if not getattr(self, 'filteredrevs', None):
1515 if not getattr(self, 'filteredrevs', None):
1537 ambiguous = True
1516 ambiguous = True
1538 # fall through to slow path that filters hidden revisions
1517 # fall through to slow path that filters hidden revisions
1539 except (AttributeError, ValueError):
1518 except (AttributeError, ValueError):
1540 # we are pure python, or key was too short to search radix tree
1519 # we are pure python, or key was too short to search radix tree
1541 pass
1520 pass
1542 if ambiguous:
1521 if ambiguous:
1543 raise error.AmbiguousPrefixLookupError(
1522 raise error.AmbiguousPrefixLookupError(
1544 id, self.display_id, _(b'ambiguous identifier')
1523 id, self.display_id, _(b'ambiguous identifier')
1545 )
1524 )
1546
1525
1547 if id in self._pcache:
1526 if id in self._pcache:
1548 return self._pcache[id]
1527 return self._pcache[id]
1549
1528
1550 if len(id) <= 40:
1529 if len(id) <= 40:
1551 try:
1530 try:
1552 # hex(node)[:...]
1531 # hex(node)[:...]
1553 l = len(id) // 2 # grab an even number of digits
1532 l = len(id) // 2 # grab an even number of digits
1554 prefix = bin(id[: l * 2])
1533 prefix = bin(id[: l * 2])
1555 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1534 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1556 nl = [
1535 nl = [
1557 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1536 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1558 ]
1537 ]
1559 if self.nodeconstants.nullhex.startswith(id):
1538 if self.nodeconstants.nullhex.startswith(id):
1560 nl.append(self.nullid)
1539 nl.append(self.nullid)
1561 if len(nl) > 0:
1540 if len(nl) > 0:
1562 if len(nl) == 1 and not maybewdir:
1541 if len(nl) == 1 and not maybewdir:
1563 self._pcache[id] = nl[0]
1542 self._pcache[id] = nl[0]
1564 return nl[0]
1543 return nl[0]
1565 raise error.AmbiguousPrefixLookupError(
1544 raise error.AmbiguousPrefixLookupError(
1566 id, self.display_id, _(b'ambiguous identifier')
1545 id, self.display_id, _(b'ambiguous identifier')
1567 )
1546 )
1568 if maybewdir:
1547 if maybewdir:
1569 raise error.WdirUnsupported
1548 raise error.WdirUnsupported
1570 return None
1549 return None
1571 except TypeError:
1550 except TypeError:
1572 pass
1551 pass
1573
1552
1574 def lookup(self, id):
1553 def lookup(self, id):
1575 """locate a node based on:
1554 """locate a node based on:
1576 - revision number or str(revision number)
1555 - revision number or str(revision number)
1577 - nodeid or subset of hex nodeid
1556 - nodeid or subset of hex nodeid
1578 """
1557 """
1579 n = self._match(id)
1558 n = self._match(id)
1580 if n is not None:
1559 if n is not None:
1581 return n
1560 return n
1582 n = self._partialmatch(id)
1561 n = self._partialmatch(id)
1583 if n:
1562 if n:
1584 return n
1563 return n
1585
1564
1586 raise error.LookupError(id, self.display_id, _(b'no match found'))
1565 raise error.LookupError(id, self.display_id, _(b'no match found'))
1587
1566
1588 def shortest(self, node, minlength=1):
1567 def shortest(self, node, minlength=1):
1589 """Find the shortest unambiguous prefix that matches node."""
1568 """Find the shortest unambiguous prefix that matches node."""
1590
1569
1591 def isvalid(prefix):
1570 def isvalid(prefix):
1592 try:
1571 try:
1593 matchednode = self._partialmatch(prefix)
1572 matchednode = self._partialmatch(prefix)
1594 except error.AmbiguousPrefixLookupError:
1573 except error.AmbiguousPrefixLookupError:
1595 return False
1574 return False
1596 except error.WdirUnsupported:
1575 except error.WdirUnsupported:
1597 # single 'ff...' match
1576 # single 'ff...' match
1598 return True
1577 return True
1599 if matchednode is None:
1578 if matchednode is None:
1600 raise error.LookupError(node, self.display_id, _(b'no node'))
1579 raise error.LookupError(node, self.display_id, _(b'no node'))
1601 return True
1580 return True
1602
1581
1603 def maybewdir(prefix):
1582 def maybewdir(prefix):
1604 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1583 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1605
1584
1606 hexnode = hex(node)
1585 hexnode = hex(node)
1607
1586
1608 def disambiguate(hexnode, minlength):
1587 def disambiguate(hexnode, minlength):
1609 """Disambiguate against wdirid."""
1588 """Disambiguate against wdirid."""
1610 for length in range(minlength, len(hexnode) + 1):
1589 for length in range(minlength, len(hexnode) + 1):
1611 prefix = hexnode[:length]
1590 prefix = hexnode[:length]
1612 if not maybewdir(prefix):
1591 if not maybewdir(prefix):
1613 return prefix
1592 return prefix
1614
1593
1615 if not getattr(self, 'filteredrevs', None):
1594 if not getattr(self, 'filteredrevs', None):
1616 try:
1595 try:
1617 length = max(self.index.shortest(node), minlength)
1596 length = max(self.index.shortest(node), minlength)
1618 return disambiguate(hexnode, length)
1597 return disambiguate(hexnode, length)
1619 except error.RevlogError:
1598 except error.RevlogError:
1620 if node != self.nodeconstants.wdirid:
1599 if node != self.nodeconstants.wdirid:
1621 raise error.LookupError(
1600 raise error.LookupError(
1622 node, self.display_id, _(b'no node')
1601 node, self.display_id, _(b'no node')
1623 )
1602 )
1624 except AttributeError:
1603 except AttributeError:
1625 # Fall through to pure code
1604 # Fall through to pure code
1626 pass
1605 pass
1627
1606
1628 if node == self.nodeconstants.wdirid:
1607 if node == self.nodeconstants.wdirid:
1629 for length in range(minlength, len(hexnode) + 1):
1608 for length in range(minlength, len(hexnode) + 1):
1630 prefix = hexnode[:length]
1609 prefix = hexnode[:length]
1631 if isvalid(prefix):
1610 if isvalid(prefix):
1632 return prefix
1611 return prefix
1633
1612
1634 for length in range(minlength, len(hexnode) + 1):
1613 for length in range(minlength, len(hexnode) + 1):
1635 prefix = hexnode[:length]
1614 prefix = hexnode[:length]
1636 if isvalid(prefix):
1615 if isvalid(prefix):
1637 return disambiguate(hexnode, length)
1616 return disambiguate(hexnode, length)
1638
1617
1639 def cmp(self, node, text):
1618 def cmp(self, node, text):
1640 """compare text with a given file revision
1619 """compare text with a given file revision
1641
1620
1642 returns True if text is different than what is stored.
1621 returns True if text is different than what is stored.
1643 """
1622 """
1644 p1, p2 = self.parents(node)
1623 p1, p2 = self.parents(node)
1645 return storageutil.hashrevisionsha1(text, p1, p2) != node
1624 return storageutil.hashrevisionsha1(text, p1, p2) != node
1646
1625
1647 def _cachesegment(self, offset, data):
1626 def _cachesegment(self, offset, data):
1648 """Add a segment to the revlog cache.
1627 """Add a segment to the revlog cache.
1649
1628
1650 Accepts an absolute offset and the data that is at that location.
1629 Accepts an absolute offset and the data that is at that location.
1651 """
1630 """
1652 o, d = self._chunkcache
1631 o, d = self._chunkcache
1653 # try to add to existing cache
1632 # try to add to existing cache
1654 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1633 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1655 self._chunkcache = o, d + data
1634 self._chunkcache = o, d + data
1656 else:
1635 else:
1657 self._chunkcache = offset, data
1636 self._chunkcache = offset, data
1658
1637
1659 def _readsegment(self, offset, length, df=None):
1638 def _readsegment(self, offset, length, df=None):
1660 """Load a segment of raw data from the revlog.
1639 """Load a segment of raw data from the revlog.
1661
1640
1662 Accepts an absolute offset, length to read, and an optional existing
1641 Accepts an absolute offset, length to read, and an optional existing
1663 file handle to read from.
1642 file handle to read from.
1664
1643
1665 If an existing file handle is passed, it will be seeked and the
1644 If an existing file handle is passed, it will be seeked and the
1666 original seek position will NOT be restored.
1645 original seek position will NOT be restored.
1667
1646
1668 Returns a str or buffer of raw byte data.
1647 Returns a str or buffer of raw byte data.
1669
1648
1670 Raises if the requested number of bytes could not be read.
1649 Raises if the requested number of bytes could not be read.
1671 """
1650 """
1672 # Cache data both forward and backward around the requested
1651 # Cache data both forward and backward around the requested
1673 # data, in a fixed size window. This helps speed up operations
1652 # data, in a fixed size window. This helps speed up operations
1674 # involving reading the revlog backwards.
1653 # involving reading the revlog backwards.
1675 cachesize = self._chunkcachesize
1654 cachesize = self._chunkcachesize
1676 realoffset = offset & ~(cachesize - 1)
1655 realoffset = offset & ~(cachesize - 1)
1677 reallength = (
1656 reallength = (
1678 (offset + length + cachesize) & ~(cachesize - 1)
1657 (offset + length + cachesize) & ~(cachesize - 1)
1679 ) - realoffset
1658 ) - realoffset
1680 with self._datareadfp(df) as df:
1659 with self._datareadfp(df) as df:
1681 df.seek(realoffset)
1660 df.seek(realoffset)
1682 d = df.read(reallength)
1661 d = df.read(reallength)
1683
1662
1684 self._cachesegment(realoffset, d)
1663 self._cachesegment(realoffset, d)
1685 if offset != realoffset or reallength != length:
1664 if offset != realoffset or reallength != length:
1686 startoffset = offset - realoffset
1665 startoffset = offset - realoffset
1687 if len(d) - startoffset < length:
1666 if len(d) - startoffset < length:
1688 filename = self._indexfile if self._inline else self._datafile
1667 filename = self._indexfile if self._inline else self._datafile
1689 got = len(d) - startoffset
1668 got = len(d) - startoffset
1690 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1669 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1691 raise error.RevlogError(m)
1670 raise error.RevlogError(m)
1692 return util.buffer(d, startoffset, length)
1671 return util.buffer(d, startoffset, length)
1693
1672
1694 if len(d) < length:
1673 if len(d) < length:
1695 filename = self._indexfile if self._inline else self._datafile
1674 filename = self._indexfile if self._inline else self._datafile
1696 got = len(d) - startoffset
1675 got = len(d) - startoffset
1697 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1676 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1698 raise error.RevlogError(m)
1677 raise error.RevlogError(m)
1699
1678
1700 return d
1679 return d
1701
1680
1702 def _getsegment(self, offset, length, df=None):
1681 def _getsegment(self, offset, length, df=None):
1703 """Obtain a segment of raw data from the revlog.
1682 """Obtain a segment of raw data from the revlog.
1704
1683
1705 Accepts an absolute offset, length of bytes to obtain, and an
1684 Accepts an absolute offset, length of bytes to obtain, and an
1706 optional file handle to the already-opened revlog. If the file
1685 optional file handle to the already-opened revlog. If the file
1707 handle is used, it's original seek position will not be preserved.
1686 handle is used, it's original seek position will not be preserved.
1708
1687
1709 Requests for data may be returned from a cache.
1688 Requests for data may be returned from a cache.
1710
1689
1711 Returns a str or a buffer instance of raw byte data.
1690 Returns a str or a buffer instance of raw byte data.
1712 """
1691 """
1713 o, d = self._chunkcache
1692 o, d = self._chunkcache
1714 l = len(d)
1693 l = len(d)
1715
1694
1716 # is it in the cache?
1695 # is it in the cache?
1717 cachestart = offset - o
1696 cachestart = offset - o
1718 cacheend = cachestart + length
1697 cacheend = cachestart + length
1719 if cachestart >= 0 and cacheend <= l:
1698 if cachestart >= 0 and cacheend <= l:
1720 if cachestart == 0 and cacheend == l:
1699 if cachestart == 0 and cacheend == l:
1721 return d # avoid a copy
1700 return d # avoid a copy
1722 return util.buffer(d, cachestart, cacheend - cachestart)
1701 return util.buffer(d, cachestart, cacheend - cachestart)
1723
1702
1724 return self._readsegment(offset, length, df=df)
1703 return self._readsegment(offset, length, df=df)
1725
1704
1726 def _getsegmentforrevs(self, startrev, endrev, df=None):
1705 def _getsegmentforrevs(self, startrev, endrev, df=None):
1727 """Obtain a segment of raw data corresponding to a range of revisions.
1706 """Obtain a segment of raw data corresponding to a range of revisions.
1728
1707
1729 Accepts the start and end revisions and an optional already-open
1708 Accepts the start and end revisions and an optional already-open
1730 file handle to be used for reading. If the file handle is read, its
1709 file handle to be used for reading. If the file handle is read, its
1731 seek position will not be preserved.
1710 seek position will not be preserved.
1732
1711
1733 Requests for data may be satisfied by a cache.
1712 Requests for data may be satisfied by a cache.
1734
1713
1735 Returns a 2-tuple of (offset, data) for the requested range of
1714 Returns a 2-tuple of (offset, data) for the requested range of
1736 revisions. Offset is the integer offset from the beginning of the
1715 revisions. Offset is the integer offset from the beginning of the
1737 revlog and data is a str or buffer of the raw byte data.
1716 revlog and data is a str or buffer of the raw byte data.
1738
1717
1739 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1718 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1740 to determine where each revision's data begins and ends.
1719 to determine where each revision's data begins and ends.
1741 """
1720 """
1742 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1721 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1743 # (functions are expensive).
1722 # (functions are expensive).
1744 index = self.index
1723 index = self.index
1745 istart = index[startrev]
1724 istart = index[startrev]
1746 start = int(istart[0] >> 16)
1725 start = int(istart[0] >> 16)
1747 if startrev == endrev:
1726 if startrev == endrev:
1748 end = start + istart[1]
1727 end = start + istart[1]
1749 else:
1728 else:
1750 iend = index[endrev]
1729 iend = index[endrev]
1751 end = int(iend[0] >> 16) + iend[1]
1730 end = int(iend[0] >> 16) + iend[1]
1752
1731
1753 if self._inline:
1732 if self._inline:
1754 start += (startrev + 1) * self.index.entry_size
1733 start += (startrev + 1) * self.index.entry_size
1755 end += (endrev + 1) * self.index.entry_size
1734 end += (endrev + 1) * self.index.entry_size
1756 length = end - start
1735 length = end - start
1757
1736
1758 return start, self._getsegment(start, length, df=df)
1737 return start, self._getsegment(start, length, df=df)
1759
1738
1760 def _chunk(self, rev, df=None):
1739 def _chunk(self, rev, df=None):
1761 """Obtain a single decompressed chunk for a revision.
1740 """Obtain a single decompressed chunk for a revision.
1762
1741
1763 Accepts an integer revision and an optional already-open file handle
1742 Accepts an integer revision and an optional already-open file handle
1764 to be used for reading. If used, the seek position of the file will not
1743 to be used for reading. If used, the seek position of the file will not
1765 be preserved.
1744 be preserved.
1766
1745
1767 Returns a str holding uncompressed data for the requested revision.
1746 Returns a str holding uncompressed data for the requested revision.
1768 """
1747 """
1769 compression_mode = self.index[rev][10]
1748 compression_mode = self.index[rev][10]
1770 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1749 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1771 if compression_mode == COMP_MODE_PLAIN:
1750 if compression_mode == COMP_MODE_PLAIN:
1772 return data
1751 return data
1773 elif compression_mode == COMP_MODE_DEFAULT:
1752 elif compression_mode == COMP_MODE_DEFAULT:
1774 return self._decompressor(data)
1753 return self._decompressor(data)
1775 elif compression_mode == COMP_MODE_INLINE:
1754 elif compression_mode == COMP_MODE_INLINE:
1776 return self.decompress(data)
1755 return self.decompress(data)
1777 else:
1756 else:
1778 msg = 'unknown compression mode %d'
1757 msg = 'unknown compression mode %d'
1779 msg %= compression_mode
1758 msg %= compression_mode
1780 raise error.RevlogError(msg)
1759 raise error.RevlogError(msg)
1781
1760
1782 def _chunks(self, revs, df=None, targetsize=None):
1761 def _chunks(self, revs, df=None, targetsize=None):
1783 """Obtain decompressed chunks for the specified revisions.
1762 """Obtain decompressed chunks for the specified revisions.
1784
1763
1785 Accepts an iterable of numeric revisions that are assumed to be in
1764 Accepts an iterable of numeric revisions that are assumed to be in
1786 ascending order. Also accepts an optional already-open file handle
1765 ascending order. Also accepts an optional already-open file handle
1787 to be used for reading. If used, the seek position of the file will
1766 to be used for reading. If used, the seek position of the file will
1788 not be preserved.
1767 not be preserved.
1789
1768
1790 This function is similar to calling ``self._chunk()`` multiple times,
1769 This function is similar to calling ``self._chunk()`` multiple times,
1791 but is faster.
1770 but is faster.
1792
1771
1793 Returns a list with decompressed data for each requested revision.
1772 Returns a list with decompressed data for each requested revision.
1794 """
1773 """
1795 if not revs:
1774 if not revs:
1796 return []
1775 return []
1797 start = self.start
1776 start = self.start
1798 length = self.length
1777 length = self.length
1799 inline = self._inline
1778 inline = self._inline
1800 iosize = self.index.entry_size
1779 iosize = self.index.entry_size
1801 buffer = util.buffer
1780 buffer = util.buffer
1802
1781
1803 l = []
1782 l = []
1804 ladd = l.append
1783 ladd = l.append
1805
1784
1806 if not self._withsparseread:
1785 if not self._withsparseread:
1807 slicedchunks = (revs,)
1786 slicedchunks = (revs,)
1808 else:
1787 else:
1809 slicedchunks = deltautil.slicechunk(
1788 slicedchunks = deltautil.slicechunk(
1810 self, revs, targetsize=targetsize
1789 self, revs, targetsize=targetsize
1811 )
1790 )
1812
1791
1813 for revschunk in slicedchunks:
1792 for revschunk in slicedchunks:
1814 firstrev = revschunk[0]
1793 firstrev = revschunk[0]
1815 # Skip trailing revisions with empty diff
1794 # Skip trailing revisions with empty diff
1816 for lastrev in revschunk[::-1]:
1795 for lastrev in revschunk[::-1]:
1817 if length(lastrev) != 0:
1796 if length(lastrev) != 0:
1818 break
1797 break
1819
1798
1820 try:
1799 try:
1821 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1800 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1822 except OverflowError:
1801 except OverflowError:
1823 # issue4215 - we can't cache a run of chunks greater than
1802 # issue4215 - we can't cache a run of chunks greater than
1824 # 2G on Windows
1803 # 2G on Windows
1825 return [self._chunk(rev, df=df) for rev in revschunk]
1804 return [self._chunk(rev, df=df) for rev in revschunk]
1826
1805
1827 decomp = self.decompress
1806 decomp = self.decompress
1828 # self._decompressor might be None, but will not be used in that case
1807 # self._decompressor might be None, but will not be used in that case
1829 def_decomp = self._decompressor
1808 def_decomp = self._decompressor
1830 for rev in revschunk:
1809 for rev in revschunk:
1831 chunkstart = start(rev)
1810 chunkstart = start(rev)
1832 if inline:
1811 if inline:
1833 chunkstart += (rev + 1) * iosize
1812 chunkstart += (rev + 1) * iosize
1834 chunklength = length(rev)
1813 chunklength = length(rev)
1835 comp_mode = self.index[rev][10]
1814 comp_mode = self.index[rev][10]
1836 c = buffer(data, chunkstart - offset, chunklength)
1815 c = buffer(data, chunkstart - offset, chunklength)
1837 if comp_mode == COMP_MODE_PLAIN:
1816 if comp_mode == COMP_MODE_PLAIN:
1838 ladd(c)
1817 ladd(c)
1839 elif comp_mode == COMP_MODE_INLINE:
1818 elif comp_mode == COMP_MODE_INLINE:
1840 ladd(decomp(c))
1819 ladd(decomp(c))
1841 elif comp_mode == COMP_MODE_DEFAULT:
1820 elif comp_mode == COMP_MODE_DEFAULT:
1842 ladd(def_decomp(c))
1821 ladd(def_decomp(c))
1843 else:
1822 else:
1844 msg = 'unknown compression mode %d'
1823 msg = 'unknown compression mode %d'
1845 msg %= comp_mode
1824 msg %= comp_mode
1846 raise error.RevlogError(msg)
1825 raise error.RevlogError(msg)
1847
1826
1848 return l
1827 return l
1849
1828
1850 def _chunkclear(self):
1829 def _chunkclear(self):
1851 """Clear the raw chunk cache."""
1830 """Clear the raw chunk cache."""
1852 self._chunkcache = (0, b'')
1831 self._chunkcache = (0, b'')
1853
1832
1854 def deltaparent(self, rev):
1833 def deltaparent(self, rev):
1855 """return deltaparent of the given revision"""
1834 """return deltaparent of the given revision"""
1856 base = self.index[rev][3]
1835 base = self.index[rev][3]
1857 if base == rev:
1836 if base == rev:
1858 return nullrev
1837 return nullrev
1859 elif self._generaldelta:
1838 elif self._generaldelta:
1860 return base
1839 return base
1861 else:
1840 else:
1862 return rev - 1
1841 return rev - 1
1863
1842
1864 def issnapshot(self, rev):
1843 def issnapshot(self, rev):
1865 """tells whether rev is a snapshot"""
1844 """tells whether rev is a snapshot"""
1866 if not self._sparserevlog:
1845 if not self._sparserevlog:
1867 return self.deltaparent(rev) == nullrev
1846 return self.deltaparent(rev) == nullrev
1868 elif util.safehasattr(self.index, b'issnapshot'):
1847 elif util.safehasattr(self.index, b'issnapshot'):
1869 # directly assign the method to cache the testing and access
1848 # directly assign the method to cache the testing and access
1870 self.issnapshot = self.index.issnapshot
1849 self.issnapshot = self.index.issnapshot
1871 return self.issnapshot(rev)
1850 return self.issnapshot(rev)
1872 if rev == nullrev:
1851 if rev == nullrev:
1873 return True
1852 return True
1874 entry = self.index[rev]
1853 entry = self.index[rev]
1875 base = entry[3]
1854 base = entry[3]
1876 if base == rev:
1855 if base == rev:
1877 return True
1856 return True
1878 if base == nullrev:
1857 if base == nullrev:
1879 return True
1858 return True
1880 p1 = entry[5]
1859 p1 = entry[5]
1881 p2 = entry[6]
1860 p2 = entry[6]
1882 if base == p1 or base == p2:
1861 if base == p1 or base == p2:
1883 return False
1862 return False
1884 return self.issnapshot(base)
1863 return self.issnapshot(base)
1885
1864
1886 def snapshotdepth(self, rev):
1865 def snapshotdepth(self, rev):
1887 """number of snapshot in the chain before this one"""
1866 """number of snapshot in the chain before this one"""
1888 if not self.issnapshot(rev):
1867 if not self.issnapshot(rev):
1889 raise error.ProgrammingError(b'revision %d not a snapshot')
1868 raise error.ProgrammingError(b'revision %d not a snapshot')
1890 return len(self._deltachain(rev)[0]) - 1
1869 return len(self._deltachain(rev)[0]) - 1
1891
1870
1892 def revdiff(self, rev1, rev2):
1871 def revdiff(self, rev1, rev2):
1893 """return or calculate a delta between two revisions
1872 """return or calculate a delta between two revisions
1894
1873
1895 The delta calculated is in binary form and is intended to be written to
1874 The delta calculated is in binary form and is intended to be written to
1896 revlog data directly. So this function needs raw revision data.
1875 revlog data directly. So this function needs raw revision data.
1897 """
1876 """
1898 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1877 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1899 return bytes(self._chunk(rev2))
1878 return bytes(self._chunk(rev2))
1900
1879
1901 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1880 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1902
1881
1903 def _processflags(self, text, flags, operation, raw=False):
1882 def _processflags(self, text, flags, operation, raw=False):
1904 """deprecated entry point to access flag processors"""
1883 """deprecated entry point to access flag processors"""
1905 msg = b'_processflag(...) use the specialized variant'
1884 msg = b'_processflag(...) use the specialized variant'
1906 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1885 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1907 if raw:
1886 if raw:
1908 return text, flagutil.processflagsraw(self, text, flags)
1887 return text, flagutil.processflagsraw(self, text, flags)
1909 elif operation == b'read':
1888 elif operation == b'read':
1910 return flagutil.processflagsread(self, text, flags)
1889 return flagutil.processflagsread(self, text, flags)
1911 else: # write operation
1890 else: # write operation
1912 return flagutil.processflagswrite(self, text, flags)
1891 return flagutil.processflagswrite(self, text, flags)
1913
1892
1914 def revision(self, nodeorrev, _df=None, raw=False):
1893 def revision(self, nodeorrev, _df=None, raw=False):
1915 """return an uncompressed revision of a given node or revision
1894 """return an uncompressed revision of a given node or revision
1916 number.
1895 number.
1917
1896
1918 _df - an existing file handle to read from. (internal-only)
1897 _df - an existing file handle to read from. (internal-only)
1919 raw - an optional argument specifying if the revision data is to be
1898 raw - an optional argument specifying if the revision data is to be
1920 treated as raw data when applying flag transforms. 'raw' should be set
1899 treated as raw data when applying flag transforms. 'raw' should be set
1921 to True when generating changegroups or in debug commands.
1900 to True when generating changegroups or in debug commands.
1922 """
1901 """
1923 if raw:
1902 if raw:
1924 msg = (
1903 msg = (
1925 b'revlog.revision(..., raw=True) is deprecated, '
1904 b'revlog.revision(..., raw=True) is deprecated, '
1926 b'use revlog.rawdata(...)'
1905 b'use revlog.rawdata(...)'
1927 )
1906 )
1928 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1907 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1929 return self._revisiondata(nodeorrev, _df, raw=raw)
1908 return self._revisiondata(nodeorrev, _df, raw=raw)
1930
1909
1931 def sidedata(self, nodeorrev, _df=None):
1910 def sidedata(self, nodeorrev, _df=None):
1932 """a map of extra data related to the changeset but not part of the hash
1911 """a map of extra data related to the changeset but not part of the hash
1933
1912
1934 This function currently return a dictionary. However, more advanced
1913 This function currently return a dictionary. However, more advanced
1935 mapping object will likely be used in the future for a more
1914 mapping object will likely be used in the future for a more
1936 efficient/lazy code.
1915 efficient/lazy code.
1937 """
1916 """
1938 # deal with <nodeorrev> argument type
1917 # deal with <nodeorrev> argument type
1939 if isinstance(nodeorrev, int):
1918 if isinstance(nodeorrev, int):
1940 rev = nodeorrev
1919 rev = nodeorrev
1941 else:
1920 else:
1942 rev = self.rev(nodeorrev)
1921 rev = self.rev(nodeorrev)
1943 return self._sidedata(rev)
1922 return self._sidedata(rev)
1944
1923
1945 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1924 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1946 # deal with <nodeorrev> argument type
1925 # deal with <nodeorrev> argument type
1947 if isinstance(nodeorrev, int):
1926 if isinstance(nodeorrev, int):
1948 rev = nodeorrev
1927 rev = nodeorrev
1949 node = self.node(rev)
1928 node = self.node(rev)
1950 else:
1929 else:
1951 node = nodeorrev
1930 node = nodeorrev
1952 rev = None
1931 rev = None
1953
1932
1954 # fast path the special `nullid` rev
1933 # fast path the special `nullid` rev
1955 if node == self.nullid:
1934 if node == self.nullid:
1956 return b""
1935 return b""
1957
1936
1958 # ``rawtext`` is the text as stored inside the revlog. Might be the
1937 # ``rawtext`` is the text as stored inside the revlog. Might be the
1959 # revision or might need to be processed to retrieve the revision.
1938 # revision or might need to be processed to retrieve the revision.
1960 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1939 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1961
1940
1962 if raw and validated:
1941 if raw and validated:
1963 # if we don't want to process the raw text and that raw
1942 # if we don't want to process the raw text and that raw
1964 # text is cached, we can exit early.
1943 # text is cached, we can exit early.
1965 return rawtext
1944 return rawtext
1966 if rev is None:
1945 if rev is None:
1967 rev = self.rev(node)
1946 rev = self.rev(node)
1968 # the revlog's flag for this revision
1947 # the revlog's flag for this revision
1969 # (usually alter its state or content)
1948 # (usually alter its state or content)
1970 flags = self.flags(rev)
1949 flags = self.flags(rev)
1971
1950
1972 if validated and flags == REVIDX_DEFAULT_FLAGS:
1951 if validated and flags == REVIDX_DEFAULT_FLAGS:
1973 # no extra flags set, no flag processor runs, text = rawtext
1952 # no extra flags set, no flag processor runs, text = rawtext
1974 return rawtext
1953 return rawtext
1975
1954
1976 if raw:
1955 if raw:
1977 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1956 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1978 text = rawtext
1957 text = rawtext
1979 else:
1958 else:
1980 r = flagutil.processflagsread(self, rawtext, flags)
1959 r = flagutil.processflagsread(self, rawtext, flags)
1981 text, validatehash = r
1960 text, validatehash = r
1982 if validatehash:
1961 if validatehash:
1983 self.checkhash(text, node, rev=rev)
1962 self.checkhash(text, node, rev=rev)
1984 if not validated:
1963 if not validated:
1985 self._revisioncache = (node, rev, rawtext)
1964 self._revisioncache = (node, rev, rawtext)
1986
1965
1987 return text
1966 return text
1988
1967
1989 def _rawtext(self, node, rev, _df=None):
1968 def _rawtext(self, node, rev, _df=None):
1990 """return the possibly unvalidated rawtext for a revision
1969 """return the possibly unvalidated rawtext for a revision
1991
1970
1992 returns (rev, rawtext, validated)
1971 returns (rev, rawtext, validated)
1993 """
1972 """
1994
1973
1995 # revision in the cache (could be useful to apply delta)
1974 # revision in the cache (could be useful to apply delta)
1996 cachedrev = None
1975 cachedrev = None
1997 # An intermediate text to apply deltas to
1976 # An intermediate text to apply deltas to
1998 basetext = None
1977 basetext = None
1999
1978
2000 # Check if we have the entry in cache
1979 # Check if we have the entry in cache
2001 # The cache entry looks like (node, rev, rawtext)
1980 # The cache entry looks like (node, rev, rawtext)
2002 if self._revisioncache:
1981 if self._revisioncache:
2003 if self._revisioncache[0] == node:
1982 if self._revisioncache[0] == node:
2004 return (rev, self._revisioncache[2], True)
1983 return (rev, self._revisioncache[2], True)
2005 cachedrev = self._revisioncache[1]
1984 cachedrev = self._revisioncache[1]
2006
1985
2007 if rev is None:
1986 if rev is None:
2008 rev = self.rev(node)
1987 rev = self.rev(node)
2009
1988
2010 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1989 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2011 if stopped:
1990 if stopped:
2012 basetext = self._revisioncache[2]
1991 basetext = self._revisioncache[2]
2013
1992
2014 # drop cache to save memory, the caller is expected to
1993 # drop cache to save memory, the caller is expected to
2015 # update self._revisioncache after validating the text
1994 # update self._revisioncache after validating the text
2016 self._revisioncache = None
1995 self._revisioncache = None
2017
1996
2018 targetsize = None
1997 targetsize = None
2019 rawsize = self.index[rev][2]
1998 rawsize = self.index[rev][2]
2020 if 0 <= rawsize:
1999 if 0 <= rawsize:
2021 targetsize = 4 * rawsize
2000 targetsize = 4 * rawsize
2022
2001
2023 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2002 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2024 if basetext is None:
2003 if basetext is None:
2025 basetext = bytes(bins[0])
2004 basetext = bytes(bins[0])
2026 bins = bins[1:]
2005 bins = bins[1:]
2027
2006
2028 rawtext = mdiff.patches(basetext, bins)
2007 rawtext = mdiff.patches(basetext, bins)
2029 del basetext # let us have a chance to free memory early
2008 del basetext # let us have a chance to free memory early
2030 return (rev, rawtext, False)
2009 return (rev, rawtext, False)
2031
2010
2032 def _sidedata(self, rev):
2011 def _sidedata(self, rev):
2033 """Return the sidedata for a given revision number."""
2012 """Return the sidedata for a given revision number."""
2034 index_entry = self.index[rev]
2013 index_entry = self.index[rev]
2035 sidedata_offset = index_entry[8]
2014 sidedata_offset = index_entry[8]
2036 sidedata_size = index_entry[9]
2015 sidedata_size = index_entry[9]
2037
2016
2038 if self._inline:
2017 if self._inline:
2039 sidedata_offset += self.index.entry_size * (1 + rev)
2018 sidedata_offset += self.index.entry_size * (1 + rev)
2040 if sidedata_size == 0:
2019 if sidedata_size == 0:
2041 return {}
2020 return {}
2042
2021
2043 # XXX this need caching, as we do for data
2022 # XXX this need caching, as we do for data
2044 with self._sidedatareadfp() as sdf:
2023 with self._sidedatareadfp() as sdf:
2045 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2024 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2046 filename = self._sidedatafile
2025 filename = self._sidedatafile
2047 end = self._docket.sidedata_end
2026 end = self._docket.sidedata_end
2048 offset = sidedata_offset
2027 offset = sidedata_offset
2049 length = sidedata_size
2028 length = sidedata_size
2050 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2029 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2051 raise error.RevlogError(m)
2030 raise error.RevlogError(m)
2052
2031
2053 sdf.seek(sidedata_offset, os.SEEK_SET)
2032 sdf.seek(sidedata_offset, os.SEEK_SET)
2054 comp_segment = sdf.read(sidedata_size)
2033 comp_segment = sdf.read(sidedata_size)
2055
2034
2056 if len(comp_segment) < sidedata_size:
2035 if len(comp_segment) < sidedata_size:
2057 filename = self._sidedatafile
2036 filename = self._sidedatafile
2058 length = sidedata_size
2037 length = sidedata_size
2059 offset = sidedata_offset
2038 offset = sidedata_offset
2060 got = len(comp_segment)
2039 got = len(comp_segment)
2061 m = PARTIAL_READ_MSG % (filename, length, offset, got)
2040 m = PARTIAL_READ_MSG % (filename, length, offset, got)
2062 raise error.RevlogError(m)
2041 raise error.RevlogError(m)
2063
2042
2064 comp = self.index[rev][11]
2043 comp = self.index[rev][11]
2065 if comp == COMP_MODE_PLAIN:
2044 if comp == COMP_MODE_PLAIN:
2066 segment = comp_segment
2045 segment = comp_segment
2067 elif comp == COMP_MODE_DEFAULT:
2046 elif comp == COMP_MODE_DEFAULT:
2068 segment = self._decompressor(comp_segment)
2047 segment = self._decompressor(comp_segment)
2069 elif comp == COMP_MODE_INLINE:
2048 elif comp == COMP_MODE_INLINE:
2070 segment = self.decompress(comp_segment)
2049 segment = self.decompress(comp_segment)
2071 else:
2050 else:
2072 msg = 'unknown compression mode %d'
2051 msg = 'unknown compression mode %d'
2073 msg %= comp
2052 msg %= comp
2074 raise error.RevlogError(msg)
2053 raise error.RevlogError(msg)
2075
2054
2076 sidedata = sidedatautil.deserialize_sidedata(segment)
2055 sidedata = sidedatautil.deserialize_sidedata(segment)
2077 return sidedata
2056 return sidedata
2078
2057
2079 def rawdata(self, nodeorrev, _df=None):
2058 def rawdata(self, nodeorrev, _df=None):
2080 """return an uncompressed raw data of a given node or revision number.
2059 """return an uncompressed raw data of a given node or revision number.
2081
2060
2082 _df - an existing file handle to read from. (internal-only)
2061 _df - an existing file handle to read from. (internal-only)
2083 """
2062 """
2084 return self._revisiondata(nodeorrev, _df, raw=True)
2063 return self._revisiondata(nodeorrev, _df, raw=True)
2085
2064
2086 def hash(self, text, p1, p2):
2065 def hash(self, text, p1, p2):
2087 """Compute a node hash.
2066 """Compute a node hash.
2088
2067
2089 Available as a function so that subclasses can replace the hash
2068 Available as a function so that subclasses can replace the hash
2090 as needed.
2069 as needed.
2091 """
2070 """
2092 return storageutil.hashrevisionsha1(text, p1, p2)
2071 return storageutil.hashrevisionsha1(text, p1, p2)
2093
2072
2094 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2073 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2095 """Check node hash integrity.
2074 """Check node hash integrity.
2096
2075
2097 Available as a function so that subclasses can extend hash mismatch
2076 Available as a function so that subclasses can extend hash mismatch
2098 behaviors as needed.
2077 behaviors as needed.
2099 """
2078 """
2100 try:
2079 try:
2101 if p1 is None and p2 is None:
2080 if p1 is None and p2 is None:
2102 p1, p2 = self.parents(node)
2081 p1, p2 = self.parents(node)
2103 if node != self.hash(text, p1, p2):
2082 if node != self.hash(text, p1, p2):
2104 # Clear the revision cache on hash failure. The revision cache
2083 # Clear the revision cache on hash failure. The revision cache
2105 # only stores the raw revision and clearing the cache does have
2084 # only stores the raw revision and clearing the cache does have
2106 # the side-effect that we won't have a cache hit when the raw
2085 # the side-effect that we won't have a cache hit when the raw
2107 # revision data is accessed. But this case should be rare and
2086 # revision data is accessed. But this case should be rare and
2108 # it is extra work to teach the cache about the hash
2087 # it is extra work to teach the cache about the hash
2109 # verification state.
2088 # verification state.
2110 if self._revisioncache and self._revisioncache[0] == node:
2089 if self._revisioncache and self._revisioncache[0] == node:
2111 self._revisioncache = None
2090 self._revisioncache = None
2112
2091
2113 revornode = rev
2092 revornode = rev
2114 if revornode is None:
2093 if revornode is None:
2115 revornode = templatefilters.short(hex(node))
2094 revornode = templatefilters.short(hex(node))
2116 raise error.RevlogError(
2095 raise error.RevlogError(
2117 _(b"integrity check failed on %s:%s")
2096 _(b"integrity check failed on %s:%s")
2118 % (self.display_id, pycompat.bytestr(revornode))
2097 % (self.display_id, pycompat.bytestr(revornode))
2119 )
2098 )
2120 except error.RevlogError:
2099 except error.RevlogError:
2121 if self._censorable and storageutil.iscensoredtext(text):
2100 if self._censorable and storageutil.iscensoredtext(text):
2122 raise error.CensoredNodeError(self.display_id, node, text)
2101 raise error.CensoredNodeError(self.display_id, node, text)
2123 raise
2102 raise
2124
2103
2125 def _enforceinlinesize(self, tr):
2104 def _enforceinlinesize(self, tr):
2126 """Check if the revlog is too big for inline and convert if so.
2105 """Check if the revlog is too big for inline and convert if so.
2127
2106
2128 This should be called after revisions are added to the revlog. If the
2107 This should be called after revisions are added to the revlog. If the
2129 revlog has grown too large to be an inline revlog, it will convert it
2108 revlog has grown too large to be an inline revlog, it will convert it
2130 to use multiple index and data files.
2109 to use multiple index and data files.
2131 """
2110 """
2132 tiprev = len(self) - 1
2111 tiprev = len(self) - 1
2133 total_size = self.start(tiprev) + self.length(tiprev)
2112 total_size = self.start(tiprev) + self.length(tiprev)
2134 if not self._inline or total_size < _maxinline:
2113 if not self._inline or total_size < _maxinline:
2135 return
2114 return
2136
2115
2137 troffset = tr.findoffset(self._indexfile)
2116 troffset = tr.findoffset(self._indexfile)
2138 if troffset is None:
2117 if troffset is None:
2139 raise error.RevlogError(
2118 raise error.RevlogError(
2140 _(b"%s not found in the transaction") % self._indexfile
2119 _(b"%s not found in the transaction") % self._indexfile
2141 )
2120 )
2142 trindex = 0
2121 trindex = 0
2143 tr.add(self._datafile, 0)
2122 tr.add(self._datafile, 0)
2144
2123
2145 existing_handles = False
2124 existing_handles = False
2146 if self._writinghandles is not None:
2125 if self._writinghandles is not None:
2147 existing_handles = True
2126 existing_handles = True
2148 fp = self._writinghandles[0]
2127 fp = self._writinghandles[0]
2149 fp.flush()
2128 fp.flush()
2150 fp.close()
2129 fp.close()
2151 # We can't use the cached file handle after close(). So prevent
2130 # We can't use the cached file handle after close(). So prevent
2152 # its usage.
2131 # its usage.
2153 self._writinghandles = None
2132 self._writinghandles = None
2154
2133
2155 new_dfh = self._datafp(b'w+')
2134 new_dfh = self._datafp(b'w+')
2156 new_dfh.truncate(0) # drop any potentially existing data
2135 new_dfh.truncate(0) # drop any potentially existing data
2157 try:
2136 try:
2158 with self._indexfp() as read_ifh:
2137 with self._indexfp() as read_ifh:
2159 for r in self:
2138 for r in self:
2160 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2139 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2161 if troffset <= self.start(r) + r * self.index.entry_size:
2140 if troffset <= self.start(r) + r * self.index.entry_size:
2162 trindex = r
2141 trindex = r
2163 new_dfh.flush()
2142 new_dfh.flush()
2164
2143
2165 with self.__index_new_fp() as fp:
2144 with self.__index_new_fp() as fp:
2166 self._format_flags &= ~FLAG_INLINE_DATA
2145 self._format_flags &= ~FLAG_INLINE_DATA
2167 self._inline = False
2146 self._inline = False
2168 for i in self:
2147 for i in self:
2169 e = self.index.entry_binary(i)
2148 e = self.index.entry_binary(i)
2170 if i == 0 and self._docket is None:
2149 if i == 0 and self._docket is None:
2171 header = self._format_flags | self._format_version
2150 header = self._format_flags | self._format_version
2172 header = self.index.pack_header(header)
2151 header = self.index.pack_header(header)
2173 e = header + e
2152 e = header + e
2174 fp.write(e)
2153 fp.write(e)
2175 if self._docket is not None:
2154 if self._docket is not None:
2176 self._docket.index_end = fp.tell()
2155 self._docket.index_end = fp.tell()
2177
2156
2178 # There is a small transactional race here. If the rename of
2157 # There is a small transactional race here. If the rename of
2179 # the index fails, we should remove the datafile. It is more
2158 # the index fails, we should remove the datafile. It is more
2180 # important to ensure that the data file is not truncated
2159 # important to ensure that the data file is not truncated
2181 # when the index is replaced as otherwise data is lost.
2160 # when the index is replaced as otherwise data is lost.
2182 tr.replace(self._datafile, self.start(trindex))
2161 tr.replace(self._datafile, self.start(trindex))
2183
2162
2184 # the temp file replace the real index when we exit the context
2163 # the temp file replace the real index when we exit the context
2185 # manager
2164 # manager
2186
2165
2187 tr.replace(self._indexfile, trindex * self.index.entry_size)
2166 tr.replace(self._indexfile, trindex * self.index.entry_size)
2188 nodemaputil.setup_persistent_nodemap(tr, self)
2167 nodemaputil.setup_persistent_nodemap(tr, self)
2189 self._chunkclear()
2168 self._chunkclear()
2190
2169
2191 if existing_handles:
2170 if existing_handles:
2192 # switched from inline to conventional reopen the index
2171 # switched from inline to conventional reopen the index
2193 ifh = self.__index_write_fp()
2172 ifh = self.__index_write_fp()
2194 self._writinghandles = (ifh, new_dfh, None)
2173 self._writinghandles = (ifh, new_dfh, None)
2195 new_dfh = None
2174 new_dfh = None
2196 finally:
2175 finally:
2197 if new_dfh is not None:
2176 if new_dfh is not None:
2198 new_dfh.close()
2177 new_dfh.close()
2199
2178
2200 def _nodeduplicatecallback(self, transaction, node):
2179 def _nodeduplicatecallback(self, transaction, node):
2201 """called when trying to add a node already stored."""
2180 """called when trying to add a node already stored."""
2202
2181
2203 @contextlib.contextmanager
2182 @contextlib.contextmanager
2204 def _writing(self, transaction):
2183 def _writing(self, transaction):
2205 if self._trypending:
2184 if self._trypending:
2206 msg = b'try to write in a `trypending` revlog: %s'
2185 msg = b'try to write in a `trypending` revlog: %s'
2207 msg %= self.display_id
2186 msg %= self.display_id
2208 raise error.ProgrammingError(msg)
2187 raise error.ProgrammingError(msg)
2209 if self._writinghandles is not None:
2188 if self._writinghandles is not None:
2210 yield
2189 yield
2211 else:
2190 else:
2212 ifh = dfh = sdfh = None
2191 ifh = dfh = sdfh = None
2213 try:
2192 try:
2214 r = len(self)
2193 r = len(self)
2215 # opening the data file.
2194 # opening the data file.
2216 dsize = 0
2195 dsize = 0
2217 if r:
2196 if r:
2218 dsize = self.end(r - 1)
2197 dsize = self.end(r - 1)
2219 dfh = None
2198 dfh = None
2220 if not self._inline:
2199 if not self._inline:
2221 try:
2200 try:
2222 dfh = self._datafp(b"r+")
2201 dfh = self._datafp(b"r+")
2223 if self._docket is None:
2202 if self._docket is None:
2224 dfh.seek(0, os.SEEK_END)
2203 dfh.seek(0, os.SEEK_END)
2225 else:
2204 else:
2226 dfh.seek(self._docket.data_end, os.SEEK_SET)
2205 dfh.seek(self._docket.data_end, os.SEEK_SET)
2227 except IOError as inst:
2206 except IOError as inst:
2228 if inst.errno != errno.ENOENT:
2207 if inst.errno != errno.ENOENT:
2229 raise
2208 raise
2230 dfh = self._datafp(b"w+")
2209 dfh = self._datafp(b"w+")
2231 transaction.add(self._datafile, dsize)
2210 transaction.add(self._datafile, dsize)
2232 if self._sidedatafile is not None:
2211 if self._sidedatafile is not None:
2233 try:
2212 try:
2234 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2213 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2235 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2214 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2236 except IOError as inst:
2215 except IOError as inst:
2237 if inst.errno != errno.ENOENT:
2216 if inst.errno != errno.ENOENT:
2238 raise
2217 raise
2239 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2218 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2240 transaction.add(
2219 transaction.add(
2241 self._sidedatafile, self._docket.sidedata_end
2220 self._sidedatafile, self._docket.sidedata_end
2242 )
2221 )
2243
2222
2244 # opening the index file.
2223 # opening the index file.
2245 isize = r * self.index.entry_size
2224 isize = r * self.index.entry_size
2246 ifh = self.__index_write_fp()
2225 ifh = self.__index_write_fp()
2247 if self._inline:
2226 if self._inline:
2248 transaction.add(self._indexfile, dsize + isize)
2227 transaction.add(self._indexfile, dsize + isize)
2249 else:
2228 else:
2250 transaction.add(self._indexfile, isize)
2229 transaction.add(self._indexfile, isize)
2251 # exposing all file handle for writing.
2230 # exposing all file handle for writing.
2252 self._writinghandles = (ifh, dfh, sdfh)
2231 self._writinghandles = (ifh, dfh, sdfh)
2253 yield
2232 yield
2254 if self._docket is not None:
2233 if self._docket is not None:
2255 self._write_docket(transaction)
2234 self._write_docket(transaction)
2256 finally:
2235 finally:
2257 self._writinghandles = None
2236 self._writinghandles = None
2258 if dfh is not None:
2237 if dfh is not None:
2259 dfh.close()
2238 dfh.close()
2260 if sdfh is not None:
2239 if sdfh is not None:
2261 dfh.close()
2240 dfh.close()
2262 # closing the index file last to avoid exposing referent to
2241 # closing the index file last to avoid exposing referent to
2263 # potential unflushed data content.
2242 # potential unflushed data content.
2264 if ifh is not None:
2243 if ifh is not None:
2265 ifh.close()
2244 ifh.close()
2266
2245
2267 def _write_docket(self, transaction):
2246 def _write_docket(self, transaction):
2268 """write the current docket on disk
2247 """write the current docket on disk
2269
2248
2270 Exist as a method to help changelog to implement transaction logic
2249 Exist as a method to help changelog to implement transaction logic
2271
2250
2272 We could also imagine using the same transaction logic for all revlog
2251 We could also imagine using the same transaction logic for all revlog
2273 since docket are cheap."""
2252 since docket are cheap."""
2274 self._docket.write(transaction)
2253 self._docket.write(transaction)
2275
2254
2276 def addrevision(
2255 def addrevision(
2277 self,
2256 self,
2278 text,
2257 text,
2279 transaction,
2258 transaction,
2280 link,
2259 link,
2281 p1,
2260 p1,
2282 p2,
2261 p2,
2283 cachedelta=None,
2262 cachedelta=None,
2284 node=None,
2263 node=None,
2285 flags=REVIDX_DEFAULT_FLAGS,
2264 flags=REVIDX_DEFAULT_FLAGS,
2286 deltacomputer=None,
2265 deltacomputer=None,
2287 sidedata=None,
2266 sidedata=None,
2288 ):
2267 ):
2289 """add a revision to the log
2268 """add a revision to the log
2290
2269
2291 text - the revision data to add
2270 text - the revision data to add
2292 transaction - the transaction object used for rollback
2271 transaction - the transaction object used for rollback
2293 link - the linkrev data to add
2272 link - the linkrev data to add
2294 p1, p2 - the parent nodeids of the revision
2273 p1, p2 - the parent nodeids of the revision
2295 cachedelta - an optional precomputed delta
2274 cachedelta - an optional precomputed delta
2296 node - nodeid of revision; typically node is not specified, and it is
2275 node - nodeid of revision; typically node is not specified, and it is
2297 computed by default as hash(text, p1, p2), however subclasses might
2276 computed by default as hash(text, p1, p2), however subclasses might
2298 use different hashing method (and override checkhash() in such case)
2277 use different hashing method (and override checkhash() in such case)
2299 flags - the known flags to set on the revision
2278 flags - the known flags to set on the revision
2300 deltacomputer - an optional deltacomputer instance shared between
2279 deltacomputer - an optional deltacomputer instance shared between
2301 multiple calls
2280 multiple calls
2302 """
2281 """
2303 if link == nullrev:
2282 if link == nullrev:
2304 raise error.RevlogError(
2283 raise error.RevlogError(
2305 _(b"attempted to add linkrev -1 to %s") % self.display_id
2284 _(b"attempted to add linkrev -1 to %s") % self.display_id
2306 )
2285 )
2307
2286
2308 if sidedata is None:
2287 if sidedata is None:
2309 sidedata = {}
2288 sidedata = {}
2310 elif sidedata and not self.hassidedata:
2289 elif sidedata and not self.hassidedata:
2311 raise error.ProgrammingError(
2290 raise error.ProgrammingError(
2312 _(b"trying to add sidedata to a revlog who don't support them")
2291 _(b"trying to add sidedata to a revlog who don't support them")
2313 )
2292 )
2314
2293
2315 if flags:
2294 if flags:
2316 node = node or self.hash(text, p1, p2)
2295 node = node or self.hash(text, p1, p2)
2317
2296
2318 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2297 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2319
2298
2320 # If the flag processor modifies the revision data, ignore any provided
2299 # If the flag processor modifies the revision data, ignore any provided
2321 # cachedelta.
2300 # cachedelta.
2322 if rawtext != text:
2301 if rawtext != text:
2323 cachedelta = None
2302 cachedelta = None
2324
2303
2325 if len(rawtext) > _maxentrysize:
2304 if len(rawtext) > _maxentrysize:
2326 raise error.RevlogError(
2305 raise error.RevlogError(
2327 _(
2306 _(
2328 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2307 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2329 )
2308 )
2330 % (self.display_id, len(rawtext))
2309 % (self.display_id, len(rawtext))
2331 )
2310 )
2332
2311
2333 node = node or self.hash(rawtext, p1, p2)
2312 node = node or self.hash(rawtext, p1, p2)
2334 rev = self.index.get_rev(node)
2313 rev = self.index.get_rev(node)
2335 if rev is not None:
2314 if rev is not None:
2336 return rev
2315 return rev
2337
2316
2338 if validatehash:
2317 if validatehash:
2339 self.checkhash(rawtext, node, p1=p1, p2=p2)
2318 self.checkhash(rawtext, node, p1=p1, p2=p2)
2340
2319
2341 return self.addrawrevision(
2320 return self.addrawrevision(
2342 rawtext,
2321 rawtext,
2343 transaction,
2322 transaction,
2344 link,
2323 link,
2345 p1,
2324 p1,
2346 p2,
2325 p2,
2347 node,
2326 node,
2348 flags,
2327 flags,
2349 cachedelta=cachedelta,
2328 cachedelta=cachedelta,
2350 deltacomputer=deltacomputer,
2329 deltacomputer=deltacomputer,
2351 sidedata=sidedata,
2330 sidedata=sidedata,
2352 )
2331 )
2353
2332
2354 def addrawrevision(
2333 def addrawrevision(
2355 self,
2334 self,
2356 rawtext,
2335 rawtext,
2357 transaction,
2336 transaction,
2358 link,
2337 link,
2359 p1,
2338 p1,
2360 p2,
2339 p2,
2361 node,
2340 node,
2362 flags,
2341 flags,
2363 cachedelta=None,
2342 cachedelta=None,
2364 deltacomputer=None,
2343 deltacomputer=None,
2365 sidedata=None,
2344 sidedata=None,
2366 ):
2345 ):
2367 """add a raw revision with known flags, node and parents
2346 """add a raw revision with known flags, node and parents
2368 useful when reusing a revision not stored in this revlog (ex: received
2347 useful when reusing a revision not stored in this revlog (ex: received
2369 over wire, or read from an external bundle).
2348 over wire, or read from an external bundle).
2370 """
2349 """
2371 with self._writing(transaction):
2350 with self._writing(transaction):
2372 return self._addrevision(
2351 return self._addrevision(
2373 node,
2352 node,
2374 rawtext,
2353 rawtext,
2375 transaction,
2354 transaction,
2376 link,
2355 link,
2377 p1,
2356 p1,
2378 p2,
2357 p2,
2379 flags,
2358 flags,
2380 cachedelta,
2359 cachedelta,
2381 deltacomputer=deltacomputer,
2360 deltacomputer=deltacomputer,
2382 sidedata=sidedata,
2361 sidedata=sidedata,
2383 )
2362 )
2384
2363
2385 def compress(self, data):
2364 def compress(self, data):
2386 """Generate a possibly-compressed representation of data."""
2365 """Generate a possibly-compressed representation of data."""
2387 if not data:
2366 if not data:
2388 return b'', data
2367 return b'', data
2389
2368
2390 compressed = self._compressor.compress(data)
2369 compressed = self._compressor.compress(data)
2391
2370
2392 if compressed:
2371 if compressed:
2393 # The revlog compressor added the header in the returned data.
2372 # The revlog compressor added the header in the returned data.
2394 return b'', compressed
2373 return b'', compressed
2395
2374
2396 if data[0:1] == b'\0':
2375 if data[0:1] == b'\0':
2397 return b'', data
2376 return b'', data
2398 return b'u', data
2377 return b'u', data
2399
2378
2400 def decompress(self, data):
2379 def decompress(self, data):
2401 """Decompress a revlog chunk.
2380 """Decompress a revlog chunk.
2402
2381
2403 The chunk is expected to begin with a header identifying the
2382 The chunk is expected to begin with a header identifying the
2404 format type so it can be routed to an appropriate decompressor.
2383 format type so it can be routed to an appropriate decompressor.
2405 """
2384 """
2406 if not data:
2385 if not data:
2407 return data
2386 return data
2408
2387
2409 # Revlogs are read much more frequently than they are written and many
2388 # Revlogs are read much more frequently than they are written and many
2410 # chunks only take microseconds to decompress, so performance is
2389 # chunks only take microseconds to decompress, so performance is
2411 # important here.
2390 # important here.
2412 #
2391 #
2413 # We can make a few assumptions about revlogs:
2392 # We can make a few assumptions about revlogs:
2414 #
2393 #
2415 # 1) the majority of chunks will be compressed (as opposed to inline
2394 # 1) the majority of chunks will be compressed (as opposed to inline
2416 # raw data).
2395 # raw data).
2417 # 2) decompressing *any* data will likely by at least 10x slower than
2396 # 2) decompressing *any* data will likely by at least 10x slower than
2418 # returning raw inline data.
2397 # returning raw inline data.
2419 # 3) we want to prioritize common and officially supported compression
2398 # 3) we want to prioritize common and officially supported compression
2420 # engines
2399 # engines
2421 #
2400 #
2422 # It follows that we want to optimize for "decompress compressed data
2401 # It follows that we want to optimize for "decompress compressed data
2423 # when encoded with common and officially supported compression engines"
2402 # when encoded with common and officially supported compression engines"
2424 # case over "raw data" and "data encoded by less common or non-official
2403 # case over "raw data" and "data encoded by less common or non-official
2425 # compression engines." That is why we have the inline lookup first
2404 # compression engines." That is why we have the inline lookup first
2426 # followed by the compengines lookup.
2405 # followed by the compengines lookup.
2427 #
2406 #
2428 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2407 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2429 # compressed chunks. And this matters for changelog and manifest reads.
2408 # compressed chunks. And this matters for changelog and manifest reads.
2430 t = data[0:1]
2409 t = data[0:1]
2431
2410
2432 if t == b'x':
2411 if t == b'x':
2433 try:
2412 try:
2434 return _zlibdecompress(data)
2413 return _zlibdecompress(data)
2435 except zlib.error as e:
2414 except zlib.error as e:
2436 raise error.RevlogError(
2415 raise error.RevlogError(
2437 _(b'revlog decompress error: %s')
2416 _(b'revlog decompress error: %s')
2438 % stringutil.forcebytestr(e)
2417 % stringutil.forcebytestr(e)
2439 )
2418 )
2440 # '\0' is more common than 'u' so it goes first.
2419 # '\0' is more common than 'u' so it goes first.
2441 elif t == b'\0':
2420 elif t == b'\0':
2442 return data
2421 return data
2443 elif t == b'u':
2422 elif t == b'u':
2444 return util.buffer(data, 1)
2423 return util.buffer(data, 1)
2445
2424
2446 compressor = self._get_decompressor(t)
2425 compressor = self._get_decompressor(t)
2447
2426
2448 return compressor.decompress(data)
2427 return compressor.decompress(data)
2449
2428
2450 def _addrevision(
2429 def _addrevision(
2451 self,
2430 self,
2452 node,
2431 node,
2453 rawtext,
2432 rawtext,
2454 transaction,
2433 transaction,
2455 link,
2434 link,
2456 p1,
2435 p1,
2457 p2,
2436 p2,
2458 flags,
2437 flags,
2459 cachedelta,
2438 cachedelta,
2460 alwayscache=False,
2439 alwayscache=False,
2461 deltacomputer=None,
2440 deltacomputer=None,
2462 sidedata=None,
2441 sidedata=None,
2463 ):
2442 ):
2464 """internal function to add revisions to the log
2443 """internal function to add revisions to the log
2465
2444
2466 see addrevision for argument descriptions.
2445 see addrevision for argument descriptions.
2467
2446
2468 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2447 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2469
2448
2470 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2449 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2471 be used.
2450 be used.
2472
2451
2473 invariants:
2452 invariants:
2474 - rawtext is optional (can be None); if not set, cachedelta must be set.
2453 - rawtext is optional (can be None); if not set, cachedelta must be set.
2475 if both are set, they must correspond to each other.
2454 if both are set, they must correspond to each other.
2476 """
2455 """
2477 if node == self.nullid:
2456 if node == self.nullid:
2478 raise error.RevlogError(
2457 raise error.RevlogError(
2479 _(b"%s: attempt to add null revision") % self.display_id
2458 _(b"%s: attempt to add null revision") % self.display_id
2480 )
2459 )
2481 if (
2460 if (
2482 node == self.nodeconstants.wdirid
2461 node == self.nodeconstants.wdirid
2483 or node in self.nodeconstants.wdirfilenodeids
2462 or node in self.nodeconstants.wdirfilenodeids
2484 ):
2463 ):
2485 raise error.RevlogError(
2464 raise error.RevlogError(
2486 _(b"%s: attempt to add wdir revision") % self.display_id
2465 _(b"%s: attempt to add wdir revision") % self.display_id
2487 )
2466 )
2488 if self._writinghandles is None:
2467 if self._writinghandles is None:
2489 msg = b'adding revision outside `revlog._writing` context'
2468 msg = b'adding revision outside `revlog._writing` context'
2490 raise error.ProgrammingError(msg)
2469 raise error.ProgrammingError(msg)
2491
2470
2492 if self._inline:
2471 if self._inline:
2493 fh = self._writinghandles[0]
2472 fh = self._writinghandles[0]
2494 else:
2473 else:
2495 fh = self._writinghandles[1]
2474 fh = self._writinghandles[1]
2496
2475
2497 btext = [rawtext]
2476 btext = [rawtext]
2498
2477
2499 curr = len(self)
2478 curr = len(self)
2500 prev = curr - 1
2479 prev = curr - 1
2501
2480
2502 offset = self._get_data_offset(prev)
2481 offset = self._get_data_offset(prev)
2503
2482
2504 if self._concurrencychecker:
2483 if self._concurrencychecker:
2505 ifh, dfh, sdfh = self._writinghandles
2484 ifh, dfh, sdfh = self._writinghandles
2506 # XXX no checking for the sidedata file
2485 # XXX no checking for the sidedata file
2507 if self._inline:
2486 if self._inline:
2508 # offset is "as if" it were in the .d file, so we need to add on
2487 # offset is "as if" it were in the .d file, so we need to add on
2509 # the size of the entry metadata.
2488 # the size of the entry metadata.
2510 self._concurrencychecker(
2489 self._concurrencychecker(
2511 ifh, self._indexfile, offset + curr * self.index.entry_size
2490 ifh, self._indexfile, offset + curr * self.index.entry_size
2512 )
2491 )
2513 else:
2492 else:
2514 # Entries in the .i are a consistent size.
2493 # Entries in the .i are a consistent size.
2515 self._concurrencychecker(
2494 self._concurrencychecker(
2516 ifh, self._indexfile, curr * self.index.entry_size
2495 ifh, self._indexfile, curr * self.index.entry_size
2517 )
2496 )
2518 self._concurrencychecker(dfh, self._datafile, offset)
2497 self._concurrencychecker(dfh, self._datafile, offset)
2519
2498
2520 p1r, p2r = self.rev(p1), self.rev(p2)
2499 p1r, p2r = self.rev(p1), self.rev(p2)
2521
2500
2522 # full versions are inserted when the needed deltas
2501 # full versions are inserted when the needed deltas
2523 # become comparable to the uncompressed text
2502 # become comparable to the uncompressed text
2524 if rawtext is None:
2503 if rawtext is None:
2525 # need rawtext size, before changed by flag processors, which is
2504 # need rawtext size, before changed by flag processors, which is
2526 # the non-raw size. use revlog explicitly to avoid filelog's extra
2505 # the non-raw size. use revlog explicitly to avoid filelog's extra
2527 # logic that might remove metadata size.
2506 # logic that might remove metadata size.
2528 textlen = mdiff.patchedsize(
2507 textlen = mdiff.patchedsize(
2529 revlog.size(self, cachedelta[0]), cachedelta[1]
2508 revlog.size(self, cachedelta[0]), cachedelta[1]
2530 )
2509 )
2531 else:
2510 else:
2532 textlen = len(rawtext)
2511 textlen = len(rawtext)
2533
2512
2534 if deltacomputer is None:
2513 if deltacomputer is None:
2535 deltacomputer = deltautil.deltacomputer(self)
2514 deltacomputer = deltautil.deltacomputer(self)
2536
2515
2537 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2516 revinfo = revlogutils.revisioninfo(
2517 node,
2518 p1,
2519 p2,
2520 btext,
2521 textlen,
2522 cachedelta,
2523 flags,
2524 )
2538
2525
2539 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2526 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2540
2527
2541 compression_mode = COMP_MODE_INLINE
2528 compression_mode = COMP_MODE_INLINE
2542 if self._docket is not None:
2529 if self._docket is not None:
2543 h, d = deltainfo.data
2530 h, d = deltainfo.data
2544 if not h and not d:
2531 if not h and not d:
2545 # not data to store at all... declare them uncompressed
2532 # not data to store at all... declare them uncompressed
2546 compression_mode = COMP_MODE_PLAIN
2533 compression_mode = COMP_MODE_PLAIN
2547 elif not h:
2534 elif not h:
2548 t = d[0:1]
2535 t = d[0:1]
2549 if t == b'\0':
2536 if t == b'\0':
2550 compression_mode = COMP_MODE_PLAIN
2537 compression_mode = COMP_MODE_PLAIN
2551 elif t == self._docket.default_compression_header:
2538 elif t == self._docket.default_compression_header:
2552 compression_mode = COMP_MODE_DEFAULT
2539 compression_mode = COMP_MODE_DEFAULT
2553 elif h == b'u':
2540 elif h == b'u':
2554 # we have a more efficient way to declare uncompressed
2541 # we have a more efficient way to declare uncompressed
2555 h = b''
2542 h = b''
2556 compression_mode = COMP_MODE_PLAIN
2543 compression_mode = COMP_MODE_PLAIN
2557 deltainfo = deltautil.drop_u_compression(deltainfo)
2544 deltainfo = deltautil.drop_u_compression(deltainfo)
2558
2545
2559 sidedata_compression_mode = COMP_MODE_INLINE
2546 sidedata_compression_mode = COMP_MODE_INLINE
2560 if sidedata and self.hassidedata:
2547 if sidedata and self.hassidedata:
2561 sidedata_compression_mode = COMP_MODE_PLAIN
2548 sidedata_compression_mode = COMP_MODE_PLAIN
2562 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2549 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2563 sidedata_offset = self._docket.sidedata_end
2550 sidedata_offset = self._docket.sidedata_end
2564 h, comp_sidedata = self.compress(serialized_sidedata)
2551 h, comp_sidedata = self.compress(serialized_sidedata)
2565 if (
2552 if (
2566 h != b'u'
2553 h != b'u'
2567 and comp_sidedata[0:1] != b'\0'
2554 and comp_sidedata[0:1] != b'\0'
2568 and len(comp_sidedata) < len(serialized_sidedata)
2555 and len(comp_sidedata) < len(serialized_sidedata)
2569 ):
2556 ):
2570 assert not h
2557 assert not h
2571 if (
2558 if (
2572 comp_sidedata[0:1]
2559 comp_sidedata[0:1]
2573 == self._docket.default_compression_header
2560 == self._docket.default_compression_header
2574 ):
2561 ):
2575 sidedata_compression_mode = COMP_MODE_DEFAULT
2562 sidedata_compression_mode = COMP_MODE_DEFAULT
2576 serialized_sidedata = comp_sidedata
2563 serialized_sidedata = comp_sidedata
2577 else:
2564 else:
2578 sidedata_compression_mode = COMP_MODE_INLINE
2565 sidedata_compression_mode = COMP_MODE_INLINE
2579 serialized_sidedata = comp_sidedata
2566 serialized_sidedata = comp_sidedata
2580 else:
2567 else:
2581 serialized_sidedata = b""
2568 serialized_sidedata = b""
2582 # Don't store the offset if the sidedata is empty, that way
2569 # Don't store the offset if the sidedata is empty, that way
2583 # we can easily detect empty sidedata and they will be no different
2570 # we can easily detect empty sidedata and they will be no different
2584 # than ones we manually add.
2571 # than ones we manually add.
2585 sidedata_offset = 0
2572 sidedata_offset = 0
2586
2573
2587 e = revlogutils.entry(
2574 e = revlogutils.entry(
2588 flags=flags,
2575 flags=flags,
2589 data_offset=offset,
2576 data_offset=offset,
2590 data_compressed_length=deltainfo.deltalen,
2577 data_compressed_length=deltainfo.deltalen,
2591 data_uncompressed_length=textlen,
2578 data_uncompressed_length=textlen,
2592 data_compression_mode=compression_mode,
2579 data_compression_mode=compression_mode,
2593 data_delta_base=deltainfo.base,
2580 data_delta_base=deltainfo.base,
2594 link_rev=link,
2581 link_rev=link,
2595 parent_rev_1=p1r,
2582 parent_rev_1=p1r,
2596 parent_rev_2=p2r,
2583 parent_rev_2=p2r,
2597 node_id=node,
2584 node_id=node,
2598 sidedata_offset=sidedata_offset,
2585 sidedata_offset=sidedata_offset,
2599 sidedata_compressed_length=len(serialized_sidedata),
2586 sidedata_compressed_length=len(serialized_sidedata),
2600 sidedata_compression_mode=sidedata_compression_mode,
2587 sidedata_compression_mode=sidedata_compression_mode,
2601 )
2588 )
2602
2589
2603 self.index.append(e)
2590 self.index.append(e)
2604 entry = self.index.entry_binary(curr)
2591 entry = self.index.entry_binary(curr)
2605 if curr == 0 and self._docket is None:
2592 if curr == 0 and self._docket is None:
2606 header = self._format_flags | self._format_version
2593 header = self._format_flags | self._format_version
2607 header = self.index.pack_header(header)
2594 header = self.index.pack_header(header)
2608 entry = header + entry
2595 entry = header + entry
2609 self._writeentry(
2596 self._writeentry(
2610 transaction,
2597 transaction,
2611 entry,
2598 entry,
2612 deltainfo.data,
2599 deltainfo.data,
2613 link,
2600 link,
2614 offset,
2601 offset,
2615 serialized_sidedata,
2602 serialized_sidedata,
2616 sidedata_offset,
2603 sidedata_offset,
2617 )
2604 )
2618
2605
2619 rawtext = btext[0]
2606 rawtext = btext[0]
2620
2607
2621 if alwayscache and rawtext is None:
2608 if alwayscache and rawtext is None:
2622 rawtext = deltacomputer.buildtext(revinfo, fh)
2609 rawtext = deltacomputer.buildtext(revinfo, fh)
2623
2610
2624 if type(rawtext) == bytes: # only accept immutable objects
2611 if type(rawtext) == bytes: # only accept immutable objects
2625 self._revisioncache = (node, curr, rawtext)
2612 self._revisioncache = (node, curr, rawtext)
2626 self._chainbasecache[curr] = deltainfo.chainbase
2613 self._chainbasecache[curr] = deltainfo.chainbase
2627 return curr
2614 return curr
2628
2615
2629 def _get_data_offset(self, prev):
2616 def _get_data_offset(self, prev):
2630 """Returns the current offset in the (in-transaction) data file.
2617 """Returns the current offset in the (in-transaction) data file.
2631 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2618 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2632 file to store that information: since sidedata can be rewritten to the
2619 file to store that information: since sidedata can be rewritten to the
2633 end of the data file within a transaction, you can have cases where, for
2620 end of the data file within a transaction, you can have cases where, for
2634 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2621 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2635 to `n - 1`'s sidedata being written after `n`'s data.
2622 to `n - 1`'s sidedata being written after `n`'s data.
2636
2623
2637 TODO cache this in a docket file before getting out of experimental."""
2624 TODO cache this in a docket file before getting out of experimental."""
2638 if self._docket is None:
2625 if self._docket is None:
2639 return self.end(prev)
2626 return self.end(prev)
2640 else:
2627 else:
2641 return self._docket.data_end
2628 return self._docket.data_end
2642
2629
2643 def _writeentry(
2630 def _writeentry(
2644 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2631 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2645 ):
2632 ):
2646 # Files opened in a+ mode have inconsistent behavior on various
2633 # Files opened in a+ mode have inconsistent behavior on various
2647 # platforms. Windows requires that a file positioning call be made
2634 # platforms. Windows requires that a file positioning call be made
2648 # when the file handle transitions between reads and writes. See
2635 # when the file handle transitions between reads and writes. See
2649 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2636 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2650 # platforms, Python or the platform itself can be buggy. Some versions
2637 # platforms, Python or the platform itself can be buggy. Some versions
2651 # of Solaris have been observed to not append at the end of the file
2638 # of Solaris have been observed to not append at the end of the file
2652 # if the file was seeked to before the end. See issue4943 for more.
2639 # if the file was seeked to before the end. See issue4943 for more.
2653 #
2640 #
2654 # We work around this issue by inserting a seek() before writing.
2641 # We work around this issue by inserting a seek() before writing.
2655 # Note: This is likely not necessary on Python 3. However, because
2642 # Note: This is likely not necessary on Python 3. However, because
2656 # the file handle is reused for reads and may be seeked there, we need
2643 # the file handle is reused for reads and may be seeked there, we need
2657 # to be careful before changing this.
2644 # to be careful before changing this.
2658 if self._writinghandles is None:
2645 if self._writinghandles is None:
2659 msg = b'adding revision outside `revlog._writing` context'
2646 msg = b'adding revision outside `revlog._writing` context'
2660 raise error.ProgrammingError(msg)
2647 raise error.ProgrammingError(msg)
2661 ifh, dfh, sdfh = self._writinghandles
2648 ifh, dfh, sdfh = self._writinghandles
2662 if self._docket is None:
2649 if self._docket is None:
2663 ifh.seek(0, os.SEEK_END)
2650 ifh.seek(0, os.SEEK_END)
2664 else:
2651 else:
2665 ifh.seek(self._docket.index_end, os.SEEK_SET)
2652 ifh.seek(self._docket.index_end, os.SEEK_SET)
2666 if dfh:
2653 if dfh:
2667 if self._docket is None:
2654 if self._docket is None:
2668 dfh.seek(0, os.SEEK_END)
2655 dfh.seek(0, os.SEEK_END)
2669 else:
2656 else:
2670 dfh.seek(self._docket.data_end, os.SEEK_SET)
2657 dfh.seek(self._docket.data_end, os.SEEK_SET)
2671 if sdfh:
2658 if sdfh:
2672 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2659 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2673
2660
2674 curr = len(self) - 1
2661 curr = len(self) - 1
2675 if not self._inline:
2662 if not self._inline:
2676 transaction.add(self._datafile, offset)
2663 transaction.add(self._datafile, offset)
2677 if self._sidedatafile:
2664 if self._sidedatafile:
2678 transaction.add(self._sidedatafile, sidedata_offset)
2665 transaction.add(self._sidedatafile, sidedata_offset)
2679 transaction.add(self._indexfile, curr * len(entry))
2666 transaction.add(self._indexfile, curr * len(entry))
2680 if data[0]:
2667 if data[0]:
2681 dfh.write(data[0])
2668 dfh.write(data[0])
2682 dfh.write(data[1])
2669 dfh.write(data[1])
2683 if sidedata:
2670 if sidedata:
2684 sdfh.write(sidedata)
2671 sdfh.write(sidedata)
2685 ifh.write(entry)
2672 ifh.write(entry)
2686 else:
2673 else:
2687 offset += curr * self.index.entry_size
2674 offset += curr * self.index.entry_size
2688 transaction.add(self._indexfile, offset)
2675 transaction.add(self._indexfile, offset)
2689 ifh.write(entry)
2676 ifh.write(entry)
2690 ifh.write(data[0])
2677 ifh.write(data[0])
2691 ifh.write(data[1])
2678 ifh.write(data[1])
2692 assert not sidedata
2679 assert not sidedata
2693 self._enforceinlinesize(transaction)
2680 self._enforceinlinesize(transaction)
2694 if self._docket is not None:
2681 if self._docket is not None:
2695 self._docket.index_end = self._writinghandles[0].tell()
2682 self._docket.index_end = self._writinghandles[0].tell()
2696 self._docket.data_end = self._writinghandles[1].tell()
2683 self._docket.data_end = self._writinghandles[1].tell()
2697 self._docket.sidedata_end = self._writinghandles[2].tell()
2684 self._docket.sidedata_end = self._writinghandles[2].tell()
2698
2685
2699 nodemaputil.setup_persistent_nodemap(transaction, self)
2686 nodemaputil.setup_persistent_nodemap(transaction, self)
2700
2687
2701 def addgroup(
2688 def addgroup(
2702 self,
2689 self,
2703 deltas,
2690 deltas,
2704 linkmapper,
2691 linkmapper,
2705 transaction,
2692 transaction,
2706 alwayscache=False,
2693 alwayscache=False,
2707 addrevisioncb=None,
2694 addrevisioncb=None,
2708 duplicaterevisioncb=None,
2695 duplicaterevisioncb=None,
2709 ):
2696 ):
2710 """
2697 """
2711 add a delta group
2698 add a delta group
2712
2699
2713 given a set of deltas, add them to the revision log. the
2700 given a set of deltas, add them to the revision log. the
2714 first delta is against its parent, which should be in our
2701 first delta is against its parent, which should be in our
2715 log, the rest are against the previous delta.
2702 log, the rest are against the previous delta.
2716
2703
2717 If ``addrevisioncb`` is defined, it will be called with arguments of
2704 If ``addrevisioncb`` is defined, it will be called with arguments of
2718 this revlog and the node that was added.
2705 this revlog and the node that was added.
2719 """
2706 """
2720
2707
2721 if self._adding_group:
2708 if self._adding_group:
2722 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2709 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2723
2710
2724 self._adding_group = True
2711 self._adding_group = True
2725 empty = True
2712 empty = True
2726 try:
2713 try:
2727 with self._writing(transaction):
2714 with self._writing(transaction):
2728 deltacomputer = deltautil.deltacomputer(self)
2715 deltacomputer = deltautil.deltacomputer(self)
2729 # loop through our set of deltas
2716 # loop through our set of deltas
2730 for data in deltas:
2717 for data in deltas:
2731 (
2718 (
2732 node,
2719 node,
2733 p1,
2720 p1,
2734 p2,
2721 p2,
2735 linknode,
2722 linknode,
2736 deltabase,
2723 deltabase,
2737 delta,
2724 delta,
2738 flags,
2725 flags,
2739 sidedata,
2726 sidedata,
2740 ) = data
2727 ) = data
2741 link = linkmapper(linknode)
2728 link = linkmapper(linknode)
2742 flags = flags or REVIDX_DEFAULT_FLAGS
2729 flags = flags or REVIDX_DEFAULT_FLAGS
2743
2730
2744 rev = self.index.get_rev(node)
2731 rev = self.index.get_rev(node)
2745 if rev is not None:
2732 if rev is not None:
2746 # this can happen if two branches make the same change
2733 # this can happen if two branches make the same change
2747 self._nodeduplicatecallback(transaction, rev)
2734 self._nodeduplicatecallback(transaction, rev)
2748 if duplicaterevisioncb:
2735 if duplicaterevisioncb:
2749 duplicaterevisioncb(self, rev)
2736 duplicaterevisioncb(self, rev)
2750 empty = False
2737 empty = False
2751 continue
2738 continue
2752
2739
2753 for p in (p1, p2):
2740 for p in (p1, p2):
2754 if not self.index.has_node(p):
2741 if not self.index.has_node(p):
2755 raise error.LookupError(
2742 raise error.LookupError(
2756 p, self.radix, _(b'unknown parent')
2743 p, self.radix, _(b'unknown parent')
2757 )
2744 )
2758
2745
2759 if not self.index.has_node(deltabase):
2746 if not self.index.has_node(deltabase):
2760 raise error.LookupError(
2747 raise error.LookupError(
2761 deltabase, self.display_id, _(b'unknown delta base')
2748 deltabase, self.display_id, _(b'unknown delta base')
2762 )
2749 )
2763
2750
2764 baserev = self.rev(deltabase)
2751 baserev = self.rev(deltabase)
2765
2752
2766 if baserev != nullrev and self.iscensored(baserev):
2753 if baserev != nullrev and self.iscensored(baserev):
2767 # if base is censored, delta must be full replacement in a
2754 # if base is censored, delta must be full replacement in a
2768 # single patch operation
2755 # single patch operation
2769 hlen = struct.calcsize(b">lll")
2756 hlen = struct.calcsize(b">lll")
2770 oldlen = self.rawsize(baserev)
2757 oldlen = self.rawsize(baserev)
2771 newlen = len(delta) - hlen
2758 newlen = len(delta) - hlen
2772 if delta[:hlen] != mdiff.replacediffheader(
2759 if delta[:hlen] != mdiff.replacediffheader(
2773 oldlen, newlen
2760 oldlen, newlen
2774 ):
2761 ):
2775 raise error.CensoredBaseError(
2762 raise error.CensoredBaseError(
2776 self.display_id, self.node(baserev)
2763 self.display_id, self.node(baserev)
2777 )
2764 )
2778
2765
2779 if not flags and self._peek_iscensored(baserev, delta):
2766 if not flags and self._peek_iscensored(baserev, delta):
2780 flags |= REVIDX_ISCENSORED
2767 flags |= REVIDX_ISCENSORED
2781
2768
2782 # We assume consumers of addrevisioncb will want to retrieve
2769 # We assume consumers of addrevisioncb will want to retrieve
2783 # the added revision, which will require a call to
2770 # the added revision, which will require a call to
2784 # revision(). revision() will fast path if there is a cache
2771 # revision(). revision() will fast path if there is a cache
2785 # hit. So, we tell _addrevision() to always cache in this case.
2772 # hit. So, we tell _addrevision() to always cache in this case.
2786 # We're only using addgroup() in the context of changegroup
2773 # We're only using addgroup() in the context of changegroup
2787 # generation so the revision data can always be handled as raw
2774 # generation so the revision data can always be handled as raw
2788 # by the flagprocessor.
2775 # by the flagprocessor.
2789 rev = self._addrevision(
2776 rev = self._addrevision(
2790 node,
2777 node,
2791 None,
2778 None,
2792 transaction,
2779 transaction,
2793 link,
2780 link,
2794 p1,
2781 p1,
2795 p2,
2782 p2,
2796 flags,
2783 flags,
2797 (baserev, delta),
2784 (baserev, delta),
2798 alwayscache=alwayscache,
2785 alwayscache=alwayscache,
2799 deltacomputer=deltacomputer,
2786 deltacomputer=deltacomputer,
2800 sidedata=sidedata,
2787 sidedata=sidedata,
2801 )
2788 )
2802
2789
2803 if addrevisioncb:
2790 if addrevisioncb:
2804 addrevisioncb(self, rev)
2791 addrevisioncb(self, rev)
2805 empty = False
2792 empty = False
2806 finally:
2793 finally:
2807 self._adding_group = False
2794 self._adding_group = False
2808 return not empty
2795 return not empty
2809
2796
2810 def iscensored(self, rev):
2797 def iscensored(self, rev):
2811 """Check if a file revision is censored."""
2798 """Check if a file revision is censored."""
2812 if not self._censorable:
2799 if not self._censorable:
2813 return False
2800 return False
2814
2801
2815 return self.flags(rev) & REVIDX_ISCENSORED
2802 return self.flags(rev) & REVIDX_ISCENSORED
2816
2803
2817 def _peek_iscensored(self, baserev, delta):
2804 def _peek_iscensored(self, baserev, delta):
2818 """Quickly check if a delta produces a censored revision."""
2805 """Quickly check if a delta produces a censored revision."""
2819 if not self._censorable:
2806 if not self._censorable:
2820 return False
2807 return False
2821
2808
2822 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2809 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2823
2810
2824 def getstrippoint(self, minlink):
2811 def getstrippoint(self, minlink):
2825 """find the minimum rev that must be stripped to strip the linkrev
2812 """find the minimum rev that must be stripped to strip the linkrev
2826
2813
2827 Returns a tuple containing the minimum rev and a set of all revs that
2814 Returns a tuple containing the minimum rev and a set of all revs that
2828 have linkrevs that will be broken by this strip.
2815 have linkrevs that will be broken by this strip.
2829 """
2816 """
2830 return storageutil.resolvestripinfo(
2817 return storageutil.resolvestripinfo(
2831 minlink,
2818 minlink,
2832 len(self) - 1,
2819 len(self) - 1,
2833 self.headrevs(),
2820 self.headrevs(),
2834 self.linkrev,
2821 self.linkrev,
2835 self.parentrevs,
2822 self.parentrevs,
2836 )
2823 )
2837
2824
2838 def strip(self, minlink, transaction):
2825 def strip(self, minlink, transaction):
2839 """truncate the revlog on the first revision with a linkrev >= minlink
2826 """truncate the revlog on the first revision with a linkrev >= minlink
2840
2827
2841 This function is called when we're stripping revision minlink and
2828 This function is called when we're stripping revision minlink and
2842 its descendants from the repository.
2829 its descendants from the repository.
2843
2830
2844 We have to remove all revisions with linkrev >= minlink, because
2831 We have to remove all revisions with linkrev >= minlink, because
2845 the equivalent changelog revisions will be renumbered after the
2832 the equivalent changelog revisions will be renumbered after the
2846 strip.
2833 strip.
2847
2834
2848 So we truncate the revlog on the first of these revisions, and
2835 So we truncate the revlog on the first of these revisions, and
2849 trust that the caller has saved the revisions that shouldn't be
2836 trust that the caller has saved the revisions that shouldn't be
2850 removed and that it'll re-add them after this truncation.
2837 removed and that it'll re-add them after this truncation.
2851 """
2838 """
2852 if len(self) == 0:
2839 if len(self) == 0:
2853 return
2840 return
2854
2841
2855 rev, _ = self.getstrippoint(minlink)
2842 rev, _ = self.getstrippoint(minlink)
2856 if rev == len(self):
2843 if rev == len(self):
2857 return
2844 return
2858
2845
2859 # first truncate the files on disk
2846 # first truncate the files on disk
2860 data_end = self.start(rev)
2847 data_end = self.start(rev)
2861 if not self._inline:
2848 if not self._inline:
2862 transaction.add(self._datafile, data_end)
2849 transaction.add(self._datafile, data_end)
2863 end = rev * self.index.entry_size
2850 end = rev * self.index.entry_size
2864 else:
2851 else:
2865 end = data_end + (rev * self.index.entry_size)
2852 end = data_end + (rev * self.index.entry_size)
2866
2853
2867 if self._sidedatafile:
2854 if self._sidedatafile:
2868 sidedata_end = self.sidedata_cut_off(rev)
2855 sidedata_end = self.sidedata_cut_off(rev)
2869 transaction.add(self._sidedatafile, sidedata_end)
2856 transaction.add(self._sidedatafile, sidedata_end)
2870
2857
2871 transaction.add(self._indexfile, end)
2858 transaction.add(self._indexfile, end)
2872 if self._docket is not None:
2859 if self._docket is not None:
2873 # XXX we could, leverage the docket while stripping. However it is
2860 # XXX we could, leverage the docket while stripping. However it is
2874 # not powerfull enough at the time of this comment
2861 # not powerfull enough at the time of this comment
2875 self._docket.index_end = end
2862 self._docket.index_end = end
2876 self._docket.data_end = data_end
2863 self._docket.data_end = data_end
2877 self._docket.sidedata_end = sidedata_end
2864 self._docket.sidedata_end = sidedata_end
2878 self._docket.write(transaction, stripping=True)
2865 self._docket.write(transaction, stripping=True)
2879
2866
2880 # then reset internal state in memory to forget those revisions
2867 # then reset internal state in memory to forget those revisions
2881 self._revisioncache = None
2868 self._revisioncache = None
2882 self._chaininfocache = util.lrucachedict(500)
2869 self._chaininfocache = util.lrucachedict(500)
2883 self._chunkclear()
2870 self._chunkclear()
2884
2871
2885 del self.index[rev:-1]
2872 del self.index[rev:-1]
2886
2873
2887 def checksize(self):
2874 def checksize(self):
2888 """Check size of index and data files
2875 """Check size of index and data files
2889
2876
2890 return a (dd, di) tuple.
2877 return a (dd, di) tuple.
2891 - dd: extra bytes for the "data" file
2878 - dd: extra bytes for the "data" file
2892 - di: extra bytes for the "index" file
2879 - di: extra bytes for the "index" file
2893
2880
2894 A healthy revlog will return (0, 0).
2881 A healthy revlog will return (0, 0).
2895 """
2882 """
2896 expected = 0
2883 expected = 0
2897 if len(self):
2884 if len(self):
2898 expected = max(0, self.end(len(self) - 1))
2885 expected = max(0, self.end(len(self) - 1))
2899
2886
2900 try:
2887 try:
2901 with self._datafp() as f:
2888 with self._datafp() as f:
2902 f.seek(0, io.SEEK_END)
2889 f.seek(0, io.SEEK_END)
2903 actual = f.tell()
2890 actual = f.tell()
2904 dd = actual - expected
2891 dd = actual - expected
2905 except IOError as inst:
2892 except IOError as inst:
2906 if inst.errno != errno.ENOENT:
2893 if inst.errno != errno.ENOENT:
2907 raise
2894 raise
2908 dd = 0
2895 dd = 0
2909
2896
2910 try:
2897 try:
2911 f = self.opener(self._indexfile)
2898 f = self.opener(self._indexfile)
2912 f.seek(0, io.SEEK_END)
2899 f.seek(0, io.SEEK_END)
2913 actual = f.tell()
2900 actual = f.tell()
2914 f.close()
2901 f.close()
2915 s = self.index.entry_size
2902 s = self.index.entry_size
2916 i = max(0, actual // s)
2903 i = max(0, actual // s)
2917 di = actual - (i * s)
2904 di = actual - (i * s)
2918 if self._inline:
2905 if self._inline:
2919 databytes = 0
2906 databytes = 0
2920 for r in self:
2907 for r in self:
2921 databytes += max(0, self.length(r))
2908 databytes += max(0, self.length(r))
2922 dd = 0
2909 dd = 0
2923 di = actual - len(self) * s - databytes
2910 di = actual - len(self) * s - databytes
2924 except IOError as inst:
2911 except IOError as inst:
2925 if inst.errno != errno.ENOENT:
2912 if inst.errno != errno.ENOENT:
2926 raise
2913 raise
2927 di = 0
2914 di = 0
2928
2915
2929 return (dd, di)
2916 return (dd, di)
2930
2917
2931 def files(self):
2918 def files(self):
2932 res = [self._indexfile]
2919 res = [self._indexfile]
2933 if not self._inline:
2920 if not self._inline:
2934 res.append(self._datafile)
2921 res.append(self._datafile)
2935 return res
2922 return res
2936
2923
2937 def emitrevisions(
2924 def emitrevisions(
2938 self,
2925 self,
2939 nodes,
2926 nodes,
2940 nodesorder=None,
2927 nodesorder=None,
2941 revisiondata=False,
2928 revisiondata=False,
2942 assumehaveparentrevisions=False,
2929 assumehaveparentrevisions=False,
2943 deltamode=repository.CG_DELTAMODE_STD,
2930 deltamode=repository.CG_DELTAMODE_STD,
2944 sidedata_helpers=None,
2931 sidedata_helpers=None,
2945 ):
2932 ):
2946 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2933 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2947 raise error.ProgrammingError(
2934 raise error.ProgrammingError(
2948 b'unhandled value for nodesorder: %s' % nodesorder
2935 b'unhandled value for nodesorder: %s' % nodesorder
2949 )
2936 )
2950
2937
2951 if nodesorder is None and not self._generaldelta:
2938 if nodesorder is None and not self._generaldelta:
2952 nodesorder = b'storage'
2939 nodesorder = b'storage'
2953
2940
2954 if (
2941 if (
2955 not self._storedeltachains
2942 not self._storedeltachains
2956 and deltamode != repository.CG_DELTAMODE_PREV
2943 and deltamode != repository.CG_DELTAMODE_PREV
2957 ):
2944 ):
2958 deltamode = repository.CG_DELTAMODE_FULL
2945 deltamode = repository.CG_DELTAMODE_FULL
2959
2946
2960 return storageutil.emitrevisions(
2947 return storageutil.emitrevisions(
2961 self,
2948 self,
2962 nodes,
2949 nodes,
2963 nodesorder,
2950 nodesorder,
2964 revlogrevisiondelta,
2951 revlogrevisiondelta,
2965 deltaparentfn=self.deltaparent,
2952 deltaparentfn=self.deltaparent,
2966 candeltafn=self.candelta,
2953 candeltafn=self.candelta,
2967 rawsizefn=self.rawsize,
2954 rawsizefn=self.rawsize,
2968 revdifffn=self.revdiff,
2955 revdifffn=self.revdiff,
2969 flagsfn=self.flags,
2956 flagsfn=self.flags,
2970 deltamode=deltamode,
2957 deltamode=deltamode,
2971 revisiondata=revisiondata,
2958 revisiondata=revisiondata,
2972 assumehaveparentrevisions=assumehaveparentrevisions,
2959 assumehaveparentrevisions=assumehaveparentrevisions,
2973 sidedata_helpers=sidedata_helpers,
2960 sidedata_helpers=sidedata_helpers,
2974 )
2961 )
2975
2962
2976 DELTAREUSEALWAYS = b'always'
2963 DELTAREUSEALWAYS = b'always'
2977 DELTAREUSESAMEREVS = b'samerevs'
2964 DELTAREUSESAMEREVS = b'samerevs'
2978 DELTAREUSENEVER = b'never'
2965 DELTAREUSENEVER = b'never'
2979
2966
2980 DELTAREUSEFULLADD = b'fulladd'
2967 DELTAREUSEFULLADD = b'fulladd'
2981
2968
2982 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2969 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2983
2970
2984 def clone(
2971 def clone(
2985 self,
2972 self,
2986 tr,
2973 tr,
2987 destrevlog,
2974 destrevlog,
2988 addrevisioncb=None,
2975 addrevisioncb=None,
2989 deltareuse=DELTAREUSESAMEREVS,
2976 deltareuse=DELTAREUSESAMEREVS,
2990 forcedeltabothparents=None,
2977 forcedeltabothparents=None,
2991 sidedata_helpers=None,
2978 sidedata_helpers=None,
2992 ):
2979 ):
2993 """Copy this revlog to another, possibly with format changes.
2980 """Copy this revlog to another, possibly with format changes.
2994
2981
2995 The destination revlog will contain the same revisions and nodes.
2982 The destination revlog will contain the same revisions and nodes.
2996 However, it may not be bit-for-bit identical due to e.g. delta encoding
2983 However, it may not be bit-for-bit identical due to e.g. delta encoding
2997 differences.
2984 differences.
2998
2985
2999 The ``deltareuse`` argument control how deltas from the existing revlog
2986 The ``deltareuse`` argument control how deltas from the existing revlog
3000 are preserved in the destination revlog. The argument can have the
2987 are preserved in the destination revlog. The argument can have the
3001 following values:
2988 following values:
3002
2989
3003 DELTAREUSEALWAYS
2990 DELTAREUSEALWAYS
3004 Deltas will always be reused (if possible), even if the destination
2991 Deltas will always be reused (if possible), even if the destination
3005 revlog would not select the same revisions for the delta. This is the
2992 revlog would not select the same revisions for the delta. This is the
3006 fastest mode of operation.
2993 fastest mode of operation.
3007 DELTAREUSESAMEREVS
2994 DELTAREUSESAMEREVS
3008 Deltas will be reused if the destination revlog would pick the same
2995 Deltas will be reused if the destination revlog would pick the same
3009 revisions for the delta. This mode strikes a balance between speed
2996 revisions for the delta. This mode strikes a balance between speed
3010 and optimization.
2997 and optimization.
3011 DELTAREUSENEVER
2998 DELTAREUSENEVER
3012 Deltas will never be reused. This is the slowest mode of execution.
2999 Deltas will never be reused. This is the slowest mode of execution.
3013 This mode can be used to recompute deltas (e.g. if the diff/delta
3000 This mode can be used to recompute deltas (e.g. if the diff/delta
3014 algorithm changes).
3001 algorithm changes).
3015 DELTAREUSEFULLADD
3002 DELTAREUSEFULLADD
3016 Revision will be re-added as if their were new content. This is
3003 Revision will be re-added as if their were new content. This is
3017 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3004 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3018 eg: large file detection and handling.
3005 eg: large file detection and handling.
3019
3006
3020 Delta computation can be slow, so the choice of delta reuse policy can
3007 Delta computation can be slow, so the choice of delta reuse policy can
3021 significantly affect run time.
3008 significantly affect run time.
3022
3009
3023 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3010 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3024 two extremes. Deltas will be reused if they are appropriate. But if the
3011 two extremes. Deltas will be reused if they are appropriate. But if the
3025 delta could choose a better revision, it will do so. This means if you
3012 delta could choose a better revision, it will do so. This means if you
3026 are converting a non-generaldelta revlog to a generaldelta revlog,
3013 are converting a non-generaldelta revlog to a generaldelta revlog,
3027 deltas will be recomputed if the delta's parent isn't a parent of the
3014 deltas will be recomputed if the delta's parent isn't a parent of the
3028 revision.
3015 revision.
3029
3016
3030 In addition to the delta policy, the ``forcedeltabothparents``
3017 In addition to the delta policy, the ``forcedeltabothparents``
3031 argument controls whether to force compute deltas against both parents
3018 argument controls whether to force compute deltas against both parents
3032 for merges. By default, the current default is used.
3019 for merges. By default, the current default is used.
3033
3020
3034 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3021 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3035 `sidedata_helpers`.
3022 `sidedata_helpers`.
3036 """
3023 """
3037 if deltareuse not in self.DELTAREUSEALL:
3024 if deltareuse not in self.DELTAREUSEALL:
3038 raise ValueError(
3025 raise ValueError(
3039 _(b'value for deltareuse invalid: %s') % deltareuse
3026 _(b'value for deltareuse invalid: %s') % deltareuse
3040 )
3027 )
3041
3028
3042 if len(destrevlog):
3029 if len(destrevlog):
3043 raise ValueError(_(b'destination revlog is not empty'))
3030 raise ValueError(_(b'destination revlog is not empty'))
3044
3031
3045 if getattr(self, 'filteredrevs', None):
3032 if getattr(self, 'filteredrevs', None):
3046 raise ValueError(_(b'source revlog has filtered revisions'))
3033 raise ValueError(_(b'source revlog has filtered revisions'))
3047 if getattr(destrevlog, 'filteredrevs', None):
3034 if getattr(destrevlog, 'filteredrevs', None):
3048 raise ValueError(_(b'destination revlog has filtered revisions'))
3035 raise ValueError(_(b'destination revlog has filtered revisions'))
3049
3036
3050 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3037 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3051 # if possible.
3038 # if possible.
3052 oldlazydelta = destrevlog._lazydelta
3039 oldlazydelta = destrevlog._lazydelta
3053 oldlazydeltabase = destrevlog._lazydeltabase
3040 oldlazydeltabase = destrevlog._lazydeltabase
3054 oldamd = destrevlog._deltabothparents
3041 oldamd = destrevlog._deltabothparents
3055
3042
3056 try:
3043 try:
3057 if deltareuse == self.DELTAREUSEALWAYS:
3044 if deltareuse == self.DELTAREUSEALWAYS:
3058 destrevlog._lazydeltabase = True
3045 destrevlog._lazydeltabase = True
3059 destrevlog._lazydelta = True
3046 destrevlog._lazydelta = True
3060 elif deltareuse == self.DELTAREUSESAMEREVS:
3047 elif deltareuse == self.DELTAREUSESAMEREVS:
3061 destrevlog._lazydeltabase = False
3048 destrevlog._lazydeltabase = False
3062 destrevlog._lazydelta = True
3049 destrevlog._lazydelta = True
3063 elif deltareuse == self.DELTAREUSENEVER:
3050 elif deltareuse == self.DELTAREUSENEVER:
3064 destrevlog._lazydeltabase = False
3051 destrevlog._lazydeltabase = False
3065 destrevlog._lazydelta = False
3052 destrevlog._lazydelta = False
3066
3053
3067 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3054 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3068
3055
3069 self._clone(
3056 self._clone(
3070 tr,
3057 tr,
3071 destrevlog,
3058 destrevlog,
3072 addrevisioncb,
3059 addrevisioncb,
3073 deltareuse,
3060 deltareuse,
3074 forcedeltabothparents,
3061 forcedeltabothparents,
3075 sidedata_helpers,
3062 sidedata_helpers,
3076 )
3063 )
3077
3064
3078 finally:
3065 finally:
3079 destrevlog._lazydelta = oldlazydelta
3066 destrevlog._lazydelta = oldlazydelta
3080 destrevlog._lazydeltabase = oldlazydeltabase
3067 destrevlog._lazydeltabase = oldlazydeltabase
3081 destrevlog._deltabothparents = oldamd
3068 destrevlog._deltabothparents = oldamd
3082
3069
3083 def _clone(
3070 def _clone(
3084 self,
3071 self,
3085 tr,
3072 tr,
3086 destrevlog,
3073 destrevlog,
3087 addrevisioncb,
3074 addrevisioncb,
3088 deltareuse,
3075 deltareuse,
3089 forcedeltabothparents,
3076 forcedeltabothparents,
3090 sidedata_helpers,
3077 sidedata_helpers,
3091 ):
3078 ):
3092 """perform the core duty of `revlog.clone` after parameter processing"""
3079 """perform the core duty of `revlog.clone` after parameter processing"""
3093 deltacomputer = deltautil.deltacomputer(destrevlog)
3080 deltacomputer = deltautil.deltacomputer(destrevlog)
3094 index = self.index
3081 index = self.index
3095 for rev in self:
3082 for rev in self:
3096 entry = index[rev]
3083 entry = index[rev]
3097
3084
3098 # Some classes override linkrev to take filtered revs into
3085 # Some classes override linkrev to take filtered revs into
3099 # account. Use raw entry from index.
3086 # account. Use raw entry from index.
3100 flags = entry[0] & 0xFFFF
3087 flags = entry[0] & 0xFFFF
3101 linkrev = entry[4]
3088 linkrev = entry[4]
3102 p1 = index[entry[5]][7]
3089 p1 = index[entry[5]][7]
3103 p2 = index[entry[6]][7]
3090 p2 = index[entry[6]][7]
3104 node = entry[7]
3091 node = entry[7]
3105
3092
3106 # (Possibly) reuse the delta from the revlog if allowed and
3093 # (Possibly) reuse the delta from the revlog if allowed and
3107 # the revlog chunk is a delta.
3094 # the revlog chunk is a delta.
3108 cachedelta = None
3095 cachedelta = None
3109 rawtext = None
3096 rawtext = None
3110 if deltareuse == self.DELTAREUSEFULLADD:
3097 if deltareuse == self.DELTAREUSEFULLADD:
3111 text = self._revisiondata(rev)
3098 text = self._revisiondata(rev)
3112 sidedata = self.sidedata(rev)
3099 sidedata = self.sidedata(rev)
3113
3100
3114 if sidedata_helpers is not None:
3101 if sidedata_helpers is not None:
3115 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3102 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3116 self, sidedata_helpers, sidedata, rev
3103 self, sidedata_helpers, sidedata, rev
3117 )
3104 )
3118 flags = flags | new_flags[0] & ~new_flags[1]
3105 flags = flags | new_flags[0] & ~new_flags[1]
3119
3106
3120 destrevlog.addrevision(
3107 destrevlog.addrevision(
3121 text,
3108 text,
3122 tr,
3109 tr,
3123 linkrev,
3110 linkrev,
3124 p1,
3111 p1,
3125 p2,
3112 p2,
3126 cachedelta=cachedelta,
3113 cachedelta=cachedelta,
3127 node=node,
3114 node=node,
3128 flags=flags,
3115 flags=flags,
3129 deltacomputer=deltacomputer,
3116 deltacomputer=deltacomputer,
3130 sidedata=sidedata,
3117 sidedata=sidedata,
3131 )
3118 )
3132 else:
3119 else:
3133 if destrevlog._lazydelta:
3120 if destrevlog._lazydelta:
3134 dp = self.deltaparent(rev)
3121 dp = self.deltaparent(rev)
3135 if dp != nullrev:
3122 if dp != nullrev:
3136 cachedelta = (dp, bytes(self._chunk(rev)))
3123 cachedelta = (dp, bytes(self._chunk(rev)))
3137
3124
3138 sidedata = None
3125 sidedata = None
3139 if not cachedelta:
3126 if not cachedelta:
3140 rawtext = self._revisiondata(rev)
3127 rawtext = self._revisiondata(rev)
3141 sidedata = self.sidedata(rev)
3128 sidedata = self.sidedata(rev)
3142 if sidedata is None:
3129 if sidedata is None:
3143 sidedata = self.sidedata(rev)
3130 sidedata = self.sidedata(rev)
3144
3131
3145 if sidedata_helpers is not None:
3132 if sidedata_helpers is not None:
3146 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3133 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3147 self, sidedata_helpers, sidedata, rev
3134 self, sidedata_helpers, sidedata, rev
3148 )
3135 )
3149 flags = flags | new_flags[0] & ~new_flags[1]
3136 flags = flags | new_flags[0] & ~new_flags[1]
3150
3137
3151 with destrevlog._writing(tr):
3138 with destrevlog._writing(tr):
3152 destrevlog._addrevision(
3139 destrevlog._addrevision(
3153 node,
3140 node,
3154 rawtext,
3141 rawtext,
3155 tr,
3142 tr,
3156 linkrev,
3143 linkrev,
3157 p1,
3144 p1,
3158 p2,
3145 p2,
3159 flags,
3146 flags,
3160 cachedelta,
3147 cachedelta,
3161 deltacomputer=deltacomputer,
3148 deltacomputer=deltacomputer,
3162 sidedata=sidedata,
3149 sidedata=sidedata,
3163 )
3150 )
3164
3151
3165 if addrevisioncb:
3152 if addrevisioncb:
3166 addrevisioncb(self, rev, node)
3153 addrevisioncb(self, rev, node)
3167
3154
3168 def censorrevision(self, tr, censornode, tombstone=b''):
3155 def censorrevision(self, tr, censornode, tombstone=b''):
3169 if self._format_version == REVLOGV0:
3156 if self._format_version == REVLOGV0:
3170 raise error.RevlogError(
3157 raise error.RevlogError(
3171 _(b'cannot censor with version %d revlogs')
3158 _(b'cannot censor with version %d revlogs')
3172 % self._format_version
3159 % self._format_version
3173 )
3160 )
3174 elif self._format_version == REVLOGV1:
3161 elif self._format_version == REVLOGV1:
3175 censor.v1_censor(self, tr, censornode, tombstone)
3162 censor.v1_censor(self, tr, censornode, tombstone)
3176 else:
3163 else:
3177 # revlog v2
3164 # revlog v2
3178 raise error.RevlogError(
3165 raise error.RevlogError(
3179 _(b'cannot censor with version %d revlogs')
3166 _(b'cannot censor with version %d revlogs')
3180 % self._format_version
3167 % self._format_version
3181 )
3168 )
3182
3169
3183 def verifyintegrity(self, state):
3170 def verifyintegrity(self, state):
3184 """Verifies the integrity of the revlog.
3171 """Verifies the integrity of the revlog.
3185
3172
3186 Yields ``revlogproblem`` instances describing problems that are
3173 Yields ``revlogproblem`` instances describing problems that are
3187 found.
3174 found.
3188 """
3175 """
3189 dd, di = self.checksize()
3176 dd, di = self.checksize()
3190 if dd:
3177 if dd:
3191 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3178 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3192 if di:
3179 if di:
3193 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3180 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3194
3181
3195 version = self._format_version
3182 version = self._format_version
3196
3183
3197 # The verifier tells us what version revlog we should be.
3184 # The verifier tells us what version revlog we should be.
3198 if version != state[b'expectedversion']:
3185 if version != state[b'expectedversion']:
3199 yield revlogproblem(
3186 yield revlogproblem(
3200 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3187 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3201 % (self.display_id, version, state[b'expectedversion'])
3188 % (self.display_id, version, state[b'expectedversion'])
3202 )
3189 )
3203
3190
3204 state[b'skipread'] = set()
3191 state[b'skipread'] = set()
3205 state[b'safe_renamed'] = set()
3192 state[b'safe_renamed'] = set()
3206
3193
3207 for rev in self:
3194 for rev in self:
3208 node = self.node(rev)
3195 node = self.node(rev)
3209
3196
3210 # Verify contents. 4 cases to care about:
3197 # Verify contents. 4 cases to care about:
3211 #
3198 #
3212 # common: the most common case
3199 # common: the most common case
3213 # rename: with a rename
3200 # rename: with a rename
3214 # meta: file content starts with b'\1\n', the metadata
3201 # meta: file content starts with b'\1\n', the metadata
3215 # header defined in filelog.py, but without a rename
3202 # header defined in filelog.py, but without a rename
3216 # ext: content stored externally
3203 # ext: content stored externally
3217 #
3204 #
3218 # More formally, their differences are shown below:
3205 # More formally, their differences are shown below:
3219 #
3206 #
3220 # | common | rename | meta | ext
3207 # | common | rename | meta | ext
3221 # -------------------------------------------------------
3208 # -------------------------------------------------------
3222 # flags() | 0 | 0 | 0 | not 0
3209 # flags() | 0 | 0 | 0 | not 0
3223 # renamed() | False | True | False | ?
3210 # renamed() | False | True | False | ?
3224 # rawtext[0:2]=='\1\n'| False | True | True | ?
3211 # rawtext[0:2]=='\1\n'| False | True | True | ?
3225 #
3212 #
3226 # "rawtext" means the raw text stored in revlog data, which
3213 # "rawtext" means the raw text stored in revlog data, which
3227 # could be retrieved by "rawdata(rev)". "text"
3214 # could be retrieved by "rawdata(rev)". "text"
3228 # mentioned below is "revision(rev)".
3215 # mentioned below is "revision(rev)".
3229 #
3216 #
3230 # There are 3 different lengths stored physically:
3217 # There are 3 different lengths stored physically:
3231 # 1. L1: rawsize, stored in revlog index
3218 # 1. L1: rawsize, stored in revlog index
3232 # 2. L2: len(rawtext), stored in revlog data
3219 # 2. L2: len(rawtext), stored in revlog data
3233 # 3. L3: len(text), stored in revlog data if flags==0, or
3220 # 3. L3: len(text), stored in revlog data if flags==0, or
3234 # possibly somewhere else if flags!=0
3221 # possibly somewhere else if flags!=0
3235 #
3222 #
3236 # L1 should be equal to L2. L3 could be different from them.
3223 # L1 should be equal to L2. L3 could be different from them.
3237 # "text" may or may not affect commit hash depending on flag
3224 # "text" may or may not affect commit hash depending on flag
3238 # processors (see flagutil.addflagprocessor).
3225 # processors (see flagutil.addflagprocessor).
3239 #
3226 #
3240 # | common | rename | meta | ext
3227 # | common | rename | meta | ext
3241 # -------------------------------------------------
3228 # -------------------------------------------------
3242 # rawsize() | L1 | L1 | L1 | L1
3229 # rawsize() | L1 | L1 | L1 | L1
3243 # size() | L1 | L2-LM | L1(*) | L1 (?)
3230 # size() | L1 | L2-LM | L1(*) | L1 (?)
3244 # len(rawtext) | L2 | L2 | L2 | L2
3231 # len(rawtext) | L2 | L2 | L2 | L2
3245 # len(text) | L2 | L2 | L2 | L3
3232 # len(text) | L2 | L2 | L2 | L3
3246 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3233 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3247 #
3234 #
3248 # LM: length of metadata, depending on rawtext
3235 # LM: length of metadata, depending on rawtext
3249 # (*): not ideal, see comment in filelog.size
3236 # (*): not ideal, see comment in filelog.size
3250 # (?): could be "- len(meta)" if the resolved content has
3237 # (?): could be "- len(meta)" if the resolved content has
3251 # rename metadata
3238 # rename metadata
3252 #
3239 #
3253 # Checks needed to be done:
3240 # Checks needed to be done:
3254 # 1. length check: L1 == L2, in all cases.
3241 # 1. length check: L1 == L2, in all cases.
3255 # 2. hash check: depending on flag processor, we may need to
3242 # 2. hash check: depending on flag processor, we may need to
3256 # use either "text" (external), or "rawtext" (in revlog).
3243 # use either "text" (external), or "rawtext" (in revlog).
3257
3244
3258 try:
3245 try:
3259 skipflags = state.get(b'skipflags', 0)
3246 skipflags = state.get(b'skipflags', 0)
3260 if skipflags:
3247 if skipflags:
3261 skipflags &= self.flags(rev)
3248 skipflags &= self.flags(rev)
3262
3249
3263 _verify_revision(self, skipflags, state, node)
3250 _verify_revision(self, skipflags, state, node)
3264
3251
3265 l1 = self.rawsize(rev)
3252 l1 = self.rawsize(rev)
3266 l2 = len(self.rawdata(node))
3253 l2 = len(self.rawdata(node))
3267
3254
3268 if l1 != l2:
3255 if l1 != l2:
3269 yield revlogproblem(
3256 yield revlogproblem(
3270 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3257 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3271 node=node,
3258 node=node,
3272 )
3259 )
3273
3260
3274 except error.CensoredNodeError:
3261 except error.CensoredNodeError:
3275 if state[b'erroroncensored']:
3262 if state[b'erroroncensored']:
3276 yield revlogproblem(
3263 yield revlogproblem(
3277 error=_(b'censored file data'), node=node
3264 error=_(b'censored file data'), node=node
3278 )
3265 )
3279 state[b'skipread'].add(node)
3266 state[b'skipread'].add(node)
3280 except Exception as e:
3267 except Exception as e:
3281 yield revlogproblem(
3268 yield revlogproblem(
3282 error=_(b'unpacking %s: %s')
3269 error=_(b'unpacking %s: %s')
3283 % (short(node), stringutil.forcebytestr(e)),
3270 % (short(node), stringutil.forcebytestr(e)),
3284 node=node,
3271 node=node,
3285 )
3272 )
3286 state[b'skipread'].add(node)
3273 state[b'skipread'].add(node)
3287
3274
3288 def storageinfo(
3275 def storageinfo(
3289 self,
3276 self,
3290 exclusivefiles=False,
3277 exclusivefiles=False,
3291 sharedfiles=False,
3278 sharedfiles=False,
3292 revisionscount=False,
3279 revisionscount=False,
3293 trackedsize=False,
3280 trackedsize=False,
3294 storedsize=False,
3281 storedsize=False,
3295 ):
3282 ):
3296 d = {}
3283 d = {}
3297
3284
3298 if exclusivefiles:
3285 if exclusivefiles:
3299 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3286 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3300 if not self._inline:
3287 if not self._inline:
3301 d[b'exclusivefiles'].append((self.opener, self._datafile))
3288 d[b'exclusivefiles'].append((self.opener, self._datafile))
3302
3289
3303 if sharedfiles:
3290 if sharedfiles:
3304 d[b'sharedfiles'] = []
3291 d[b'sharedfiles'] = []
3305
3292
3306 if revisionscount:
3293 if revisionscount:
3307 d[b'revisionscount'] = len(self)
3294 d[b'revisionscount'] = len(self)
3308
3295
3309 if trackedsize:
3296 if trackedsize:
3310 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3297 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3311
3298
3312 if storedsize:
3299 if storedsize:
3313 d[b'storedsize'] = sum(
3300 d[b'storedsize'] = sum(
3314 self.opener.stat(path).st_size for path in self.files()
3301 self.opener.stat(path).st_size for path in self.files()
3315 )
3302 )
3316
3303
3317 return d
3304 return d
3318
3305
3319 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3306 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3320 if not self.hassidedata:
3307 if not self.hassidedata:
3321 return
3308 return
3322 # revlog formats with sidedata support does not support inline
3309 # revlog formats with sidedata support does not support inline
3323 assert not self._inline
3310 assert not self._inline
3324 if not helpers[1] and not helpers[2]:
3311 if not helpers[1] and not helpers[2]:
3325 # Nothing to generate or remove
3312 # Nothing to generate or remove
3326 return
3313 return
3327
3314
3328 new_entries = []
3315 new_entries = []
3329 # append the new sidedata
3316 # append the new sidedata
3330 with self._writing(transaction):
3317 with self._writing(transaction):
3331 ifh, dfh, sdfh = self._writinghandles
3318 ifh, dfh, sdfh = self._writinghandles
3332 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3319 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3333
3320
3334 current_offset = sdfh.tell()
3321 current_offset = sdfh.tell()
3335 for rev in range(startrev, endrev + 1):
3322 for rev in range(startrev, endrev + 1):
3336 entry = self.index[rev]
3323 entry = self.index[rev]
3337 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3324 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3338 store=self,
3325 store=self,
3339 sidedata_helpers=helpers,
3326 sidedata_helpers=helpers,
3340 sidedata={},
3327 sidedata={},
3341 rev=rev,
3328 rev=rev,
3342 )
3329 )
3343
3330
3344 serialized_sidedata = sidedatautil.serialize_sidedata(
3331 serialized_sidedata = sidedatautil.serialize_sidedata(
3345 new_sidedata
3332 new_sidedata
3346 )
3333 )
3347
3334
3348 sidedata_compression_mode = COMP_MODE_INLINE
3335 sidedata_compression_mode = COMP_MODE_INLINE
3349 if serialized_sidedata and self.hassidedata:
3336 if serialized_sidedata and self.hassidedata:
3350 sidedata_compression_mode = COMP_MODE_PLAIN
3337 sidedata_compression_mode = COMP_MODE_PLAIN
3351 h, comp_sidedata = self.compress(serialized_sidedata)
3338 h, comp_sidedata = self.compress(serialized_sidedata)
3352 if (
3339 if (
3353 h != b'u'
3340 h != b'u'
3354 and comp_sidedata[0] != b'\0'
3341 and comp_sidedata[0] != b'\0'
3355 and len(comp_sidedata) < len(serialized_sidedata)
3342 and len(comp_sidedata) < len(serialized_sidedata)
3356 ):
3343 ):
3357 assert not h
3344 assert not h
3358 if (
3345 if (
3359 comp_sidedata[0]
3346 comp_sidedata[0]
3360 == self._docket.default_compression_header
3347 == self._docket.default_compression_header
3361 ):
3348 ):
3362 sidedata_compression_mode = COMP_MODE_DEFAULT
3349 sidedata_compression_mode = COMP_MODE_DEFAULT
3363 serialized_sidedata = comp_sidedata
3350 serialized_sidedata = comp_sidedata
3364 else:
3351 else:
3365 sidedata_compression_mode = COMP_MODE_INLINE
3352 sidedata_compression_mode = COMP_MODE_INLINE
3366 serialized_sidedata = comp_sidedata
3353 serialized_sidedata = comp_sidedata
3367 if entry[8] != 0 or entry[9] != 0:
3354 if entry[8] != 0 or entry[9] != 0:
3368 # rewriting entries that already have sidedata is not
3355 # rewriting entries that already have sidedata is not
3369 # supported yet, because it introduces garbage data in the
3356 # supported yet, because it introduces garbage data in the
3370 # revlog.
3357 # revlog.
3371 msg = b"rewriting existing sidedata is not supported yet"
3358 msg = b"rewriting existing sidedata is not supported yet"
3372 raise error.Abort(msg)
3359 raise error.Abort(msg)
3373
3360
3374 # Apply (potential) flags to add and to remove after running
3361 # Apply (potential) flags to add and to remove after running
3375 # the sidedata helpers
3362 # the sidedata helpers
3376 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3363 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3377 entry_update = (
3364 entry_update = (
3378 current_offset,
3365 current_offset,
3379 len(serialized_sidedata),
3366 len(serialized_sidedata),
3380 new_offset_flags,
3367 new_offset_flags,
3381 sidedata_compression_mode,
3368 sidedata_compression_mode,
3382 )
3369 )
3383
3370
3384 # the sidedata computation might have move the file cursors around
3371 # the sidedata computation might have move the file cursors around
3385 sdfh.seek(current_offset, os.SEEK_SET)
3372 sdfh.seek(current_offset, os.SEEK_SET)
3386 sdfh.write(serialized_sidedata)
3373 sdfh.write(serialized_sidedata)
3387 new_entries.append(entry_update)
3374 new_entries.append(entry_update)
3388 current_offset += len(serialized_sidedata)
3375 current_offset += len(serialized_sidedata)
3389 self._docket.sidedata_end = sdfh.tell()
3376 self._docket.sidedata_end = sdfh.tell()
3390
3377
3391 # rewrite the new index entries
3378 # rewrite the new index entries
3392 ifh.seek(startrev * self.index.entry_size)
3379 ifh.seek(startrev * self.index.entry_size)
3393 for i, e in enumerate(new_entries):
3380 for i, e in enumerate(new_entries):
3394 rev = startrev + i
3381 rev = startrev + i
3395 self.index.replace_sidedata_info(rev, *e)
3382 self.index.replace_sidedata_info(rev, *e)
3396 packed = self.index.entry_binary(rev)
3383 packed = self.index.entry_binary(rev)
3397 if rev == 0 and self._docket is None:
3384 if rev == 0 and self._docket is None:
3398 header = self._format_flags | self._format_version
3385 header = self._format_flags | self._format_version
3399 header = self.index.pack_header(header)
3386 header = self.index.pack_header(header)
3400 packed = header + packed
3387 packed = header + packed
3401 ifh.write(packed)
3388 ifh.write(packed)
@@ -1,58 +1,80 b''
1 # mercurial.revlogutils -- basic utilities for revlog
1 # mercurial.revlogutils -- basic utilities for revlog
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 from ..thirdparty import attr
10 from ..interfaces import repository
11 from ..interfaces import repository
11
12
12 # See mercurial.revlogutils.constants for doc
13 # See mercurial.revlogutils.constants for doc
13 COMP_MODE_INLINE = 2
14 COMP_MODE_INLINE = 2
14
15
15
16
16 def offset_type(offset, type):
17 def offset_type(offset, type):
17 if (type & ~repository.REVISION_FLAGS_KNOWN) != 0:
18 if (type & ~repository.REVISION_FLAGS_KNOWN) != 0:
18 raise ValueError(b'unknown revlog index flags: %d' % type)
19 raise ValueError(b'unknown revlog index flags: %d' % type)
19 return int(int(offset) << 16 | type)
20 return int(int(offset) << 16 | type)
20
21
21
22
22 def entry(
23 def entry(
23 data_offset,
24 data_offset,
24 data_compressed_length,
25 data_compressed_length,
25 data_delta_base,
26 data_delta_base,
26 link_rev,
27 link_rev,
27 parent_rev_1,
28 parent_rev_1,
28 parent_rev_2,
29 parent_rev_2,
29 node_id,
30 node_id,
30 flags=0,
31 flags=0,
31 data_uncompressed_length=-1,
32 data_uncompressed_length=-1,
32 data_compression_mode=COMP_MODE_INLINE,
33 data_compression_mode=COMP_MODE_INLINE,
33 sidedata_offset=0,
34 sidedata_offset=0,
34 sidedata_compressed_length=0,
35 sidedata_compressed_length=0,
35 sidedata_compression_mode=COMP_MODE_INLINE,
36 sidedata_compression_mode=COMP_MODE_INLINE,
36 ):
37 ):
37 """Build one entry from symbolic name
38 """Build one entry from symbolic name
38
39
39 This is useful to abstract the actual detail of how we build the entry
40 This is useful to abstract the actual detail of how we build the entry
40 tuple for caller who don't care about it.
41 tuple for caller who don't care about it.
41
42
42 This should always be called using keyword arguments. Some arguments have
43 This should always be called using keyword arguments. Some arguments have
43 default value, this match the value used by index version that does not store such data.
44 default value, this match the value used by index version that does not store such data.
44 """
45 """
45 return (
46 return (
46 offset_type(data_offset, flags),
47 offset_type(data_offset, flags),
47 data_compressed_length,
48 data_compressed_length,
48 data_uncompressed_length,
49 data_uncompressed_length,
49 data_delta_base,
50 data_delta_base,
50 link_rev,
51 link_rev,
51 parent_rev_1,
52 parent_rev_1,
52 parent_rev_2,
53 parent_rev_2,
53 node_id,
54 node_id,
54 sidedata_offset,
55 sidedata_offset,
55 sidedata_compressed_length,
56 sidedata_compressed_length,
56 data_compression_mode,
57 data_compression_mode,
57 sidedata_compression_mode,
58 sidedata_compression_mode,
58 )
59 )
60
61
62 @attr.s(slots=True, frozen=True)
63 class revisioninfo(object):
64 """Information about a revision that allows building its fulltext
65 node: expected hash of the revision
66 p1, p2: parent revs of the revision
67 btext: built text cache consisting of a one-element list
68 cachedelta: (baserev, uncompressed_delta) or None
69 flags: flags associated to the revision storage
70
71 One of btext[0] or cachedelta must be set.
72 """
73
74 node = attr.ib()
75 p1 = attr.ib()
76 p2 = attr.ib()
77 btext = attr.ib()
78 textlen = attr.ib()
79 cachedelta = attr.ib()
80 flags = attr.ib()
@@ -1,1106 +1,1106 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import collections
12 import collections
13 import struct
13 import struct
14
14
15 # import stuff from node for others to import from revlog
15 # import stuff from node for others to import from revlog
16 from ..node import nullrev
16 from ..node import nullrev
17 from ..i18n import _
17 from ..i18n import _
18 from ..pycompat import getattr
18 from ..pycompat import getattr
19
19
20 from .constants import (
20 from .constants import (
21 REVIDX_ISCENSORED,
21 REVIDX_ISCENSORED,
22 REVIDX_RAWTEXT_CHANGING_FLAGS,
22 REVIDX_RAWTEXT_CHANGING_FLAGS,
23 )
23 )
24
24
25 from ..thirdparty import attr
25 from ..thirdparty import attr
26
26
27 from .. import (
27 from .. import (
28 error,
28 error,
29 mdiff,
29 mdiff,
30 util,
30 util,
31 )
31 )
32
32
33 from . import flagutil
33 from . import flagutil
34
34
35 # maximum <delta-chain-data>/<revision-text-length> ratio
35 # maximum <delta-chain-data>/<revision-text-length> ratio
36 LIMIT_DELTA2TEXT = 2
36 LIMIT_DELTA2TEXT = 2
37
37
38
38
39 class _testrevlog(object):
39 class _testrevlog(object):
40 """minimalist fake revlog to use in doctests"""
40 """minimalist fake revlog to use in doctests"""
41
41
42 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
42 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
43 """data is an list of revision payload boundaries"""
43 """data is an list of revision payload boundaries"""
44 self._data = data
44 self._data = data
45 self._srdensitythreshold = density
45 self._srdensitythreshold = density
46 self._srmingapsize = mingap
46 self._srmingapsize = mingap
47 self._snapshot = set(snapshot)
47 self._snapshot = set(snapshot)
48 self.index = None
48 self.index = None
49
49
50 def start(self, rev):
50 def start(self, rev):
51 if rev == nullrev:
51 if rev == nullrev:
52 return 0
52 return 0
53 if rev == 0:
53 if rev == 0:
54 return 0
54 return 0
55 return self._data[rev - 1]
55 return self._data[rev - 1]
56
56
57 def end(self, rev):
57 def end(self, rev):
58 if rev == nullrev:
58 if rev == nullrev:
59 return 0
59 return 0
60 return self._data[rev]
60 return self._data[rev]
61
61
62 def length(self, rev):
62 def length(self, rev):
63 return self.end(rev) - self.start(rev)
63 return self.end(rev) - self.start(rev)
64
64
65 def __len__(self):
65 def __len__(self):
66 return len(self._data)
66 return len(self._data)
67
67
68 def issnapshot(self, rev):
68 def issnapshot(self, rev):
69 if rev == nullrev:
69 if rev == nullrev:
70 return True
70 return True
71 return rev in self._snapshot
71 return rev in self._snapshot
72
72
73
73
74 def slicechunk(revlog, revs, targetsize=None):
74 def slicechunk(revlog, revs, targetsize=None):
75 """slice revs to reduce the amount of unrelated data to be read from disk.
75 """slice revs to reduce the amount of unrelated data to be read from disk.
76
76
77 ``revs`` is sliced into groups that should be read in one time.
77 ``revs`` is sliced into groups that should be read in one time.
78 Assume that revs are sorted.
78 Assume that revs are sorted.
79
79
80 The initial chunk is sliced until the overall density (payload/chunks-span
80 The initial chunk is sliced until the overall density (payload/chunks-span
81 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
81 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
82 `revlog._srmingapsize` is skipped.
82 `revlog._srmingapsize` is skipped.
83
83
84 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
84 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
85 For consistency with other slicing choice, this limit won't go lower than
85 For consistency with other slicing choice, this limit won't go lower than
86 `revlog._srmingapsize`.
86 `revlog._srmingapsize`.
87
87
88 If individual revisions chunk are larger than this limit, they will still
88 If individual revisions chunk are larger than this limit, they will still
89 be raised individually.
89 be raised individually.
90
90
91 >>> data = [
91 >>> data = [
92 ... 5, #00 (5)
92 ... 5, #00 (5)
93 ... 10, #01 (5)
93 ... 10, #01 (5)
94 ... 12, #02 (2)
94 ... 12, #02 (2)
95 ... 12, #03 (empty)
95 ... 12, #03 (empty)
96 ... 27, #04 (15)
96 ... 27, #04 (15)
97 ... 31, #05 (4)
97 ... 31, #05 (4)
98 ... 31, #06 (empty)
98 ... 31, #06 (empty)
99 ... 42, #07 (11)
99 ... 42, #07 (11)
100 ... 47, #08 (5)
100 ... 47, #08 (5)
101 ... 47, #09 (empty)
101 ... 47, #09 (empty)
102 ... 48, #10 (1)
102 ... 48, #10 (1)
103 ... 51, #11 (3)
103 ... 51, #11 (3)
104 ... 74, #12 (23)
104 ... 74, #12 (23)
105 ... 85, #13 (11)
105 ... 85, #13 (11)
106 ... 86, #14 (1)
106 ... 86, #14 (1)
107 ... 91, #15 (5)
107 ... 91, #15 (5)
108 ... ]
108 ... ]
109 >>> revlog = _testrevlog(data, snapshot=range(16))
109 >>> revlog = _testrevlog(data, snapshot=range(16))
110
110
111 >>> list(slicechunk(revlog, list(range(16))))
111 >>> list(slicechunk(revlog, list(range(16))))
112 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
112 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
113 >>> list(slicechunk(revlog, [0, 15]))
113 >>> list(slicechunk(revlog, [0, 15]))
114 [[0], [15]]
114 [[0], [15]]
115 >>> list(slicechunk(revlog, [0, 11, 15]))
115 >>> list(slicechunk(revlog, [0, 11, 15]))
116 [[0], [11], [15]]
116 [[0], [11], [15]]
117 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
117 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
118 [[0], [11, 13, 15]]
118 [[0], [11, 13, 15]]
119 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
119 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
120 [[1, 2], [5, 8, 10, 11], [14]]
120 [[1, 2], [5, 8, 10, 11], [14]]
121
121
122 Slicing with a maximum chunk size
122 Slicing with a maximum chunk size
123 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
123 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
124 [[0], [11], [13], [15]]
124 [[0], [11], [13], [15]]
125 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
125 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
126 [[0], [11], [13, 15]]
126 [[0], [11], [13, 15]]
127
127
128 Slicing involving nullrev
128 Slicing involving nullrev
129 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
129 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
130 [[-1, 0], [11], [13, 15]]
130 [[-1, 0], [11], [13, 15]]
131 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
131 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
132 [[-1], [13], [15]]
132 [[-1], [13], [15]]
133 """
133 """
134 if targetsize is not None:
134 if targetsize is not None:
135 targetsize = max(targetsize, revlog._srmingapsize)
135 targetsize = max(targetsize, revlog._srmingapsize)
136 # targetsize should not be specified when evaluating delta candidates:
136 # targetsize should not be specified when evaluating delta candidates:
137 # * targetsize is used to ensure we stay within specification when reading,
137 # * targetsize is used to ensure we stay within specification when reading,
138 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
138 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
139 if densityslicing is None:
139 if densityslicing is None:
140 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
140 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
141 for chunk in densityslicing(
141 for chunk in densityslicing(
142 revs, revlog._srdensitythreshold, revlog._srmingapsize
142 revs, revlog._srdensitythreshold, revlog._srmingapsize
143 ):
143 ):
144 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
144 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
145 yield subchunk
145 yield subchunk
146
146
147
147
148 def _slicechunktosize(revlog, revs, targetsize=None):
148 def _slicechunktosize(revlog, revs, targetsize=None):
149 """slice revs to match the target size
149 """slice revs to match the target size
150
150
151 This is intended to be used on chunk that density slicing selected by that
151 This is intended to be used on chunk that density slicing selected by that
152 are still too large compared to the read garantee of revlog. This might
152 are still too large compared to the read garantee of revlog. This might
153 happens when "minimal gap size" interrupted the slicing or when chain are
153 happens when "minimal gap size" interrupted the slicing or when chain are
154 built in a way that create large blocks next to each other.
154 built in a way that create large blocks next to each other.
155
155
156 >>> data = [
156 >>> data = [
157 ... 3, #0 (3)
157 ... 3, #0 (3)
158 ... 5, #1 (2)
158 ... 5, #1 (2)
159 ... 6, #2 (1)
159 ... 6, #2 (1)
160 ... 8, #3 (2)
160 ... 8, #3 (2)
161 ... 8, #4 (empty)
161 ... 8, #4 (empty)
162 ... 11, #5 (3)
162 ... 11, #5 (3)
163 ... 12, #6 (1)
163 ... 12, #6 (1)
164 ... 13, #7 (1)
164 ... 13, #7 (1)
165 ... 14, #8 (1)
165 ... 14, #8 (1)
166 ... ]
166 ... ]
167
167
168 == All snapshots cases ==
168 == All snapshots cases ==
169 >>> revlog = _testrevlog(data, snapshot=range(9))
169 >>> revlog = _testrevlog(data, snapshot=range(9))
170
170
171 Cases where chunk is already small enough
171 Cases where chunk is already small enough
172 >>> list(_slicechunktosize(revlog, [0], 3))
172 >>> list(_slicechunktosize(revlog, [0], 3))
173 [[0]]
173 [[0]]
174 >>> list(_slicechunktosize(revlog, [6, 7], 3))
174 >>> list(_slicechunktosize(revlog, [6, 7], 3))
175 [[6, 7]]
175 [[6, 7]]
176 >>> list(_slicechunktosize(revlog, [0], None))
176 >>> list(_slicechunktosize(revlog, [0], None))
177 [[0]]
177 [[0]]
178 >>> list(_slicechunktosize(revlog, [6, 7], None))
178 >>> list(_slicechunktosize(revlog, [6, 7], None))
179 [[6, 7]]
179 [[6, 7]]
180
180
181 cases where we need actual slicing
181 cases where we need actual slicing
182 >>> list(_slicechunktosize(revlog, [0, 1], 3))
182 >>> list(_slicechunktosize(revlog, [0, 1], 3))
183 [[0], [1]]
183 [[0], [1]]
184 >>> list(_slicechunktosize(revlog, [1, 3], 3))
184 >>> list(_slicechunktosize(revlog, [1, 3], 3))
185 [[1], [3]]
185 [[1], [3]]
186 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
186 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
187 [[1, 2], [3]]
187 [[1, 2], [3]]
188 >>> list(_slicechunktosize(revlog, [3, 5], 3))
188 >>> list(_slicechunktosize(revlog, [3, 5], 3))
189 [[3], [5]]
189 [[3], [5]]
190 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
190 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
191 [[3], [5]]
191 [[3], [5]]
192 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
192 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
193 [[5], [6, 7, 8]]
193 [[5], [6, 7, 8]]
194 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
194 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
195 [[0], [1, 2], [3], [5], [6, 7, 8]]
195 [[0], [1, 2], [3], [5], [6, 7, 8]]
196
196
197 Case with too large individual chunk (must return valid chunk)
197 Case with too large individual chunk (must return valid chunk)
198 >>> list(_slicechunktosize(revlog, [0, 1], 2))
198 >>> list(_slicechunktosize(revlog, [0, 1], 2))
199 [[0], [1]]
199 [[0], [1]]
200 >>> list(_slicechunktosize(revlog, [1, 3], 1))
200 >>> list(_slicechunktosize(revlog, [1, 3], 1))
201 [[1], [3]]
201 [[1], [3]]
202 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
202 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
203 [[3], [5]]
203 [[3], [5]]
204
204
205 == No Snapshot cases ==
205 == No Snapshot cases ==
206 >>> revlog = _testrevlog(data)
206 >>> revlog = _testrevlog(data)
207
207
208 Cases where chunk is already small enough
208 Cases where chunk is already small enough
209 >>> list(_slicechunktosize(revlog, [0], 3))
209 >>> list(_slicechunktosize(revlog, [0], 3))
210 [[0]]
210 [[0]]
211 >>> list(_slicechunktosize(revlog, [6, 7], 3))
211 >>> list(_slicechunktosize(revlog, [6, 7], 3))
212 [[6, 7]]
212 [[6, 7]]
213 >>> list(_slicechunktosize(revlog, [0], None))
213 >>> list(_slicechunktosize(revlog, [0], None))
214 [[0]]
214 [[0]]
215 >>> list(_slicechunktosize(revlog, [6, 7], None))
215 >>> list(_slicechunktosize(revlog, [6, 7], None))
216 [[6, 7]]
216 [[6, 7]]
217
217
218 cases where we need actual slicing
218 cases where we need actual slicing
219 >>> list(_slicechunktosize(revlog, [0, 1], 3))
219 >>> list(_slicechunktosize(revlog, [0, 1], 3))
220 [[0], [1]]
220 [[0], [1]]
221 >>> list(_slicechunktosize(revlog, [1, 3], 3))
221 >>> list(_slicechunktosize(revlog, [1, 3], 3))
222 [[1], [3]]
222 [[1], [3]]
223 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
223 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
224 [[1], [2, 3]]
224 [[1], [2, 3]]
225 >>> list(_slicechunktosize(revlog, [3, 5], 3))
225 >>> list(_slicechunktosize(revlog, [3, 5], 3))
226 [[3], [5]]
226 [[3], [5]]
227 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
227 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
228 [[3], [4, 5]]
228 [[3], [4, 5]]
229 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
229 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
230 [[5], [6, 7, 8]]
230 [[5], [6, 7, 8]]
231 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
231 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
232 [[0], [1, 2], [3], [5], [6, 7, 8]]
232 [[0], [1, 2], [3], [5], [6, 7, 8]]
233
233
234 Case with too large individual chunk (must return valid chunk)
234 Case with too large individual chunk (must return valid chunk)
235 >>> list(_slicechunktosize(revlog, [0, 1], 2))
235 >>> list(_slicechunktosize(revlog, [0, 1], 2))
236 [[0], [1]]
236 [[0], [1]]
237 >>> list(_slicechunktosize(revlog, [1, 3], 1))
237 >>> list(_slicechunktosize(revlog, [1, 3], 1))
238 [[1], [3]]
238 [[1], [3]]
239 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
239 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
240 [[3], [5]]
240 [[3], [5]]
241
241
242 == mixed case ==
242 == mixed case ==
243 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
243 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
244 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
244 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
245 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
245 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
246 """
246 """
247 assert targetsize is None or 0 <= targetsize
247 assert targetsize is None or 0 <= targetsize
248 startdata = revlog.start(revs[0])
248 startdata = revlog.start(revs[0])
249 enddata = revlog.end(revs[-1])
249 enddata = revlog.end(revs[-1])
250 fullspan = enddata - startdata
250 fullspan = enddata - startdata
251 if targetsize is None or fullspan <= targetsize:
251 if targetsize is None or fullspan <= targetsize:
252 yield revs
252 yield revs
253 return
253 return
254
254
255 startrevidx = 0
255 startrevidx = 0
256 endrevidx = 1
256 endrevidx = 1
257 iterrevs = enumerate(revs)
257 iterrevs = enumerate(revs)
258 next(iterrevs) # skip first rev.
258 next(iterrevs) # skip first rev.
259 # first step: get snapshots out of the way
259 # first step: get snapshots out of the way
260 for idx, r in iterrevs:
260 for idx, r in iterrevs:
261 span = revlog.end(r) - startdata
261 span = revlog.end(r) - startdata
262 snapshot = revlog.issnapshot(r)
262 snapshot = revlog.issnapshot(r)
263 if span <= targetsize and snapshot:
263 if span <= targetsize and snapshot:
264 endrevidx = idx + 1
264 endrevidx = idx + 1
265 else:
265 else:
266 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
266 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
267 if chunk:
267 if chunk:
268 yield chunk
268 yield chunk
269 startrevidx = idx
269 startrevidx = idx
270 startdata = revlog.start(r)
270 startdata = revlog.start(r)
271 endrevidx = idx + 1
271 endrevidx = idx + 1
272 if not snapshot:
272 if not snapshot:
273 break
273 break
274
274
275 # for the others, we use binary slicing to quickly converge toward valid
275 # for the others, we use binary slicing to quickly converge toward valid
276 # chunks (otherwise, we might end up looking for start/end of many
276 # chunks (otherwise, we might end up looking for start/end of many
277 # revisions). This logic is not looking for the perfect slicing point, it
277 # revisions). This logic is not looking for the perfect slicing point, it
278 # focuses on quickly converging toward valid chunks.
278 # focuses on quickly converging toward valid chunks.
279 nbitem = len(revs)
279 nbitem = len(revs)
280 while (enddata - startdata) > targetsize:
280 while (enddata - startdata) > targetsize:
281 endrevidx = nbitem
281 endrevidx = nbitem
282 if nbitem - startrevidx <= 1:
282 if nbitem - startrevidx <= 1:
283 break # protect against individual chunk larger than limit
283 break # protect against individual chunk larger than limit
284 localenddata = revlog.end(revs[endrevidx - 1])
284 localenddata = revlog.end(revs[endrevidx - 1])
285 span = localenddata - startdata
285 span = localenddata - startdata
286 while span > targetsize:
286 while span > targetsize:
287 if endrevidx - startrevidx <= 1:
287 if endrevidx - startrevidx <= 1:
288 break # protect against individual chunk larger than limit
288 break # protect against individual chunk larger than limit
289 endrevidx -= (endrevidx - startrevidx) // 2
289 endrevidx -= (endrevidx - startrevidx) // 2
290 localenddata = revlog.end(revs[endrevidx - 1])
290 localenddata = revlog.end(revs[endrevidx - 1])
291 span = localenddata - startdata
291 span = localenddata - startdata
292 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
292 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
293 if chunk:
293 if chunk:
294 yield chunk
294 yield chunk
295 startrevidx = endrevidx
295 startrevidx = endrevidx
296 startdata = revlog.start(revs[startrevidx])
296 startdata = revlog.start(revs[startrevidx])
297
297
298 chunk = _trimchunk(revlog, revs, startrevidx)
298 chunk = _trimchunk(revlog, revs, startrevidx)
299 if chunk:
299 if chunk:
300 yield chunk
300 yield chunk
301
301
302
302
303 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
303 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
304 """slice revs to reduce the amount of unrelated data to be read from disk.
304 """slice revs to reduce the amount of unrelated data to be read from disk.
305
305
306 ``revs`` is sliced into groups that should be read in one time.
306 ``revs`` is sliced into groups that should be read in one time.
307 Assume that revs are sorted.
307 Assume that revs are sorted.
308
308
309 The initial chunk is sliced until the overall density (payload/chunks-span
309 The initial chunk is sliced until the overall density (payload/chunks-span
310 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
310 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
311 skipped.
311 skipped.
312
312
313 >>> revlog = _testrevlog([
313 >>> revlog = _testrevlog([
314 ... 5, #00 (5)
314 ... 5, #00 (5)
315 ... 10, #01 (5)
315 ... 10, #01 (5)
316 ... 12, #02 (2)
316 ... 12, #02 (2)
317 ... 12, #03 (empty)
317 ... 12, #03 (empty)
318 ... 27, #04 (15)
318 ... 27, #04 (15)
319 ... 31, #05 (4)
319 ... 31, #05 (4)
320 ... 31, #06 (empty)
320 ... 31, #06 (empty)
321 ... 42, #07 (11)
321 ... 42, #07 (11)
322 ... 47, #08 (5)
322 ... 47, #08 (5)
323 ... 47, #09 (empty)
323 ... 47, #09 (empty)
324 ... 48, #10 (1)
324 ... 48, #10 (1)
325 ... 51, #11 (3)
325 ... 51, #11 (3)
326 ... 74, #12 (23)
326 ... 74, #12 (23)
327 ... 85, #13 (11)
327 ... 85, #13 (11)
328 ... 86, #14 (1)
328 ... 86, #14 (1)
329 ... 91, #15 (5)
329 ... 91, #15 (5)
330 ... ])
330 ... ])
331
331
332 >>> list(_slicechunktodensity(revlog, list(range(16))))
332 >>> list(_slicechunktodensity(revlog, list(range(16))))
333 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
333 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
334 >>> list(_slicechunktodensity(revlog, [0, 15]))
334 >>> list(_slicechunktodensity(revlog, [0, 15]))
335 [[0], [15]]
335 [[0], [15]]
336 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
336 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
337 [[0], [11], [15]]
337 [[0], [11], [15]]
338 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
338 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
339 [[0], [11, 13, 15]]
339 [[0], [11, 13, 15]]
340 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
340 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
341 [[1, 2], [5, 8, 10, 11], [14]]
341 [[1, 2], [5, 8, 10, 11], [14]]
342 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
342 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
343 ... mingapsize=20))
343 ... mingapsize=20))
344 [[1, 2, 3, 5, 8, 10, 11], [14]]
344 [[1, 2, 3, 5, 8, 10, 11], [14]]
345 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
345 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
346 ... targetdensity=0.95))
346 ... targetdensity=0.95))
347 [[1, 2], [5], [8, 10, 11], [14]]
347 [[1, 2], [5], [8, 10, 11], [14]]
348 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
348 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
349 ... targetdensity=0.95, mingapsize=12))
349 ... targetdensity=0.95, mingapsize=12))
350 [[1, 2], [5, 8, 10, 11], [14]]
350 [[1, 2], [5, 8, 10, 11], [14]]
351 """
351 """
352 start = revlog.start
352 start = revlog.start
353 length = revlog.length
353 length = revlog.length
354
354
355 if len(revs) <= 1:
355 if len(revs) <= 1:
356 yield revs
356 yield revs
357 return
357 return
358
358
359 deltachainspan = segmentspan(revlog, revs)
359 deltachainspan = segmentspan(revlog, revs)
360
360
361 if deltachainspan < mingapsize:
361 if deltachainspan < mingapsize:
362 yield revs
362 yield revs
363 return
363 return
364
364
365 readdata = deltachainspan
365 readdata = deltachainspan
366 chainpayload = sum(length(r) for r in revs)
366 chainpayload = sum(length(r) for r in revs)
367
367
368 if deltachainspan:
368 if deltachainspan:
369 density = chainpayload / float(deltachainspan)
369 density = chainpayload / float(deltachainspan)
370 else:
370 else:
371 density = 1.0
371 density = 1.0
372
372
373 if density >= targetdensity:
373 if density >= targetdensity:
374 yield revs
374 yield revs
375 return
375 return
376
376
377 # Store the gaps in a heap to have them sorted by decreasing size
377 # Store the gaps in a heap to have them sorted by decreasing size
378 gaps = []
378 gaps = []
379 prevend = None
379 prevend = None
380 for i, rev in enumerate(revs):
380 for i, rev in enumerate(revs):
381 revstart = start(rev)
381 revstart = start(rev)
382 revlen = length(rev)
382 revlen = length(rev)
383
383
384 # Skip empty revisions to form larger holes
384 # Skip empty revisions to form larger holes
385 if revlen == 0:
385 if revlen == 0:
386 continue
386 continue
387
387
388 if prevend is not None:
388 if prevend is not None:
389 gapsize = revstart - prevend
389 gapsize = revstart - prevend
390 # only consider holes that are large enough
390 # only consider holes that are large enough
391 if gapsize > mingapsize:
391 if gapsize > mingapsize:
392 gaps.append((gapsize, i))
392 gaps.append((gapsize, i))
393
393
394 prevend = revstart + revlen
394 prevend = revstart + revlen
395 # sort the gaps to pop them from largest to small
395 # sort the gaps to pop them from largest to small
396 gaps.sort()
396 gaps.sort()
397
397
398 # Collect the indices of the largest holes until the density is acceptable
398 # Collect the indices of the largest holes until the density is acceptable
399 selected = []
399 selected = []
400 while gaps and density < targetdensity:
400 while gaps and density < targetdensity:
401 gapsize, gapidx = gaps.pop()
401 gapsize, gapidx = gaps.pop()
402
402
403 selected.append(gapidx)
403 selected.append(gapidx)
404
404
405 # the gap sizes are stored as negatives to be sorted decreasingly
405 # the gap sizes are stored as negatives to be sorted decreasingly
406 # by the heap
406 # by the heap
407 readdata -= gapsize
407 readdata -= gapsize
408 if readdata > 0:
408 if readdata > 0:
409 density = chainpayload / float(readdata)
409 density = chainpayload / float(readdata)
410 else:
410 else:
411 density = 1.0
411 density = 1.0
412 selected.sort()
412 selected.sort()
413
413
414 # Cut the revs at collected indices
414 # Cut the revs at collected indices
415 previdx = 0
415 previdx = 0
416 for idx in selected:
416 for idx in selected:
417
417
418 chunk = _trimchunk(revlog, revs, previdx, idx)
418 chunk = _trimchunk(revlog, revs, previdx, idx)
419 if chunk:
419 if chunk:
420 yield chunk
420 yield chunk
421
421
422 previdx = idx
422 previdx = idx
423
423
424 chunk = _trimchunk(revlog, revs, previdx)
424 chunk = _trimchunk(revlog, revs, previdx)
425 if chunk:
425 if chunk:
426 yield chunk
426 yield chunk
427
427
428
428
429 def _trimchunk(revlog, revs, startidx, endidx=None):
429 def _trimchunk(revlog, revs, startidx, endidx=None):
430 """returns revs[startidx:endidx] without empty trailing revs
430 """returns revs[startidx:endidx] without empty trailing revs
431
431
432 Doctest Setup
432 Doctest Setup
433 >>> revlog = _testrevlog([
433 >>> revlog = _testrevlog([
434 ... 5, #0
434 ... 5, #0
435 ... 10, #1
435 ... 10, #1
436 ... 12, #2
436 ... 12, #2
437 ... 12, #3 (empty)
437 ... 12, #3 (empty)
438 ... 17, #4
438 ... 17, #4
439 ... 21, #5
439 ... 21, #5
440 ... 21, #6 (empty)
440 ... 21, #6 (empty)
441 ... ])
441 ... ])
442
442
443 Contiguous cases:
443 Contiguous cases:
444 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
444 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
445 [0, 1, 2, 3, 4, 5]
445 [0, 1, 2, 3, 4, 5]
446 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
446 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
447 [0, 1, 2, 3, 4]
447 [0, 1, 2, 3, 4]
448 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
448 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
449 [0, 1, 2]
449 [0, 1, 2]
450 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
450 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
451 [2]
451 [2]
452 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
452 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
453 [3, 4, 5]
453 [3, 4, 5]
454 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
454 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
455 [3, 4]
455 [3, 4]
456
456
457 Discontiguous cases:
457 Discontiguous cases:
458 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
458 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
459 [1, 3, 5]
459 [1, 3, 5]
460 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
460 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
461 [1]
461 [1]
462 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
462 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
463 [3, 5]
463 [3, 5]
464 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
464 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
465 [3, 5]
465 [3, 5]
466 """
466 """
467 length = revlog.length
467 length = revlog.length
468
468
469 if endidx is None:
469 if endidx is None:
470 endidx = len(revs)
470 endidx = len(revs)
471
471
472 # If we have a non-emtpy delta candidate, there are nothing to trim
472 # If we have a non-emtpy delta candidate, there are nothing to trim
473 if revs[endidx - 1] < len(revlog):
473 if revs[endidx - 1] < len(revlog):
474 # Trim empty revs at the end, except the very first revision of a chain
474 # Trim empty revs at the end, except the very first revision of a chain
475 while (
475 while (
476 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
476 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
477 ):
477 ):
478 endidx -= 1
478 endidx -= 1
479
479
480 return revs[startidx:endidx]
480 return revs[startidx:endidx]
481
481
482
482
483 def segmentspan(revlog, revs):
483 def segmentspan(revlog, revs):
484 """Get the byte span of a segment of revisions
484 """Get the byte span of a segment of revisions
485
485
486 revs is a sorted array of revision numbers
486 revs is a sorted array of revision numbers
487
487
488 >>> revlog = _testrevlog([
488 >>> revlog = _testrevlog([
489 ... 5, #0
489 ... 5, #0
490 ... 10, #1
490 ... 10, #1
491 ... 12, #2
491 ... 12, #2
492 ... 12, #3 (empty)
492 ... 12, #3 (empty)
493 ... 17, #4
493 ... 17, #4
494 ... ])
494 ... ])
495
495
496 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
496 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
497 17
497 17
498 >>> segmentspan(revlog, [0, 4])
498 >>> segmentspan(revlog, [0, 4])
499 17
499 17
500 >>> segmentspan(revlog, [3, 4])
500 >>> segmentspan(revlog, [3, 4])
501 5
501 5
502 >>> segmentspan(revlog, [1, 2, 3,])
502 >>> segmentspan(revlog, [1, 2, 3,])
503 7
503 7
504 >>> segmentspan(revlog, [1, 3])
504 >>> segmentspan(revlog, [1, 3])
505 7
505 7
506 """
506 """
507 if not revs:
507 if not revs:
508 return 0
508 return 0
509 end = revlog.end(revs[-1])
509 end = revlog.end(revs[-1])
510 return end - revlog.start(revs[0])
510 return end - revlog.start(revs[0])
511
511
512
512
513 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
513 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
514 """build full text from a (base, delta) pair and other metadata"""
514 """build full text from a (base, delta) pair and other metadata"""
515 # special case deltas which replace entire base; no need to decode
515 # special case deltas which replace entire base; no need to decode
516 # base revision. this neatly avoids censored bases, which throw when
516 # base revision. this neatly avoids censored bases, which throw when
517 # they're decoded.
517 # they're decoded.
518 hlen = struct.calcsize(b">lll")
518 hlen = struct.calcsize(b">lll")
519 if delta[:hlen] == mdiff.replacediffheader(
519 if delta[:hlen] == mdiff.replacediffheader(
520 revlog.rawsize(baserev), len(delta) - hlen
520 revlog.rawsize(baserev), len(delta) - hlen
521 ):
521 ):
522 fulltext = delta[hlen:]
522 fulltext = delta[hlen:]
523 else:
523 else:
524 # deltabase is rawtext before changed by flag processors, which is
524 # deltabase is rawtext before changed by flag processors, which is
525 # equivalent to non-raw text
525 # equivalent to non-raw text
526 basetext = revlog.revision(baserev, _df=fh, raw=False)
526 basetext = revlog.revision(baserev, _df=fh, raw=False)
527 fulltext = mdiff.patch(basetext, delta)
527 fulltext = mdiff.patch(basetext, delta)
528
528
529 try:
529 try:
530 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
530 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
531 if validatehash:
531 if validatehash:
532 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
532 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
533 if flags & REVIDX_ISCENSORED:
533 if flags & REVIDX_ISCENSORED:
534 raise error.StorageError(
534 raise error.StorageError(
535 _(b'node %s is not censored') % expectednode
535 _(b'node %s is not censored') % expectednode
536 )
536 )
537 except error.CensoredNodeError:
537 except error.CensoredNodeError:
538 # must pass the censored index flag to add censored revisions
538 # must pass the censored index flag to add censored revisions
539 if not flags & REVIDX_ISCENSORED:
539 if not flags & REVIDX_ISCENSORED:
540 raise
540 raise
541 return fulltext
541 return fulltext
542
542
543
543
544 @attr.s(slots=True, frozen=True)
544 @attr.s(slots=True, frozen=True)
545 class _deltainfo(object):
545 class _deltainfo(object):
546 distance = attr.ib()
546 distance = attr.ib()
547 deltalen = attr.ib()
547 deltalen = attr.ib()
548 data = attr.ib()
548 data = attr.ib()
549 base = attr.ib()
549 base = attr.ib()
550 chainbase = attr.ib()
550 chainbase = attr.ib()
551 chainlen = attr.ib()
551 chainlen = attr.ib()
552 compresseddeltalen = attr.ib()
552 compresseddeltalen = attr.ib()
553 snapshotdepth = attr.ib()
553 snapshotdepth = attr.ib()
554
554
555
555
556 def drop_u_compression(delta):
556 def drop_u_compression(delta):
557 """turn into a "u" (no-compression) into no-compression without header
557 """turn into a "u" (no-compression) into no-compression without header
558
558
559 This is useful for revlog format that has better compression method.
559 This is useful for revlog format that has better compression method.
560 """
560 """
561 assert delta.data[0] == b'u', delta.data[0]
561 assert delta.data[0] == b'u', delta.data[0]
562 return _deltainfo(
562 return _deltainfo(
563 delta.distance,
563 delta.distance,
564 delta.deltalen - 1,
564 delta.deltalen - 1,
565 (b'', delta.data[1]),
565 (b'', delta.data[1]),
566 delta.base,
566 delta.base,
567 delta.chainbase,
567 delta.chainbase,
568 delta.chainlen,
568 delta.chainlen,
569 delta.compresseddeltalen,
569 delta.compresseddeltalen,
570 delta.snapshotdepth,
570 delta.snapshotdepth,
571 )
571 )
572
572
573
573
574 def isgooddeltainfo(revlog, deltainfo, revinfo):
574 def isgooddeltainfo(revlog, deltainfo, revinfo):
575 """Returns True if the given delta is good. Good means that it is within
575 """Returns True if the given delta is good. Good means that it is within
576 the disk span, disk size, and chain length bounds that we know to be
576 the disk span, disk size, and chain length bounds that we know to be
577 performant."""
577 performant."""
578 if deltainfo is None:
578 if deltainfo is None:
579 return False
579 return False
580
580
581 # - 'deltainfo.distance' is the distance from the base revision --
581 # - 'deltainfo.distance' is the distance from the base revision --
582 # bounding it limits the amount of I/O we need to do.
582 # bounding it limits the amount of I/O we need to do.
583 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
583 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
584 # deltas we need to apply -- bounding it limits the amount of CPU
584 # deltas we need to apply -- bounding it limits the amount of CPU
585 # we consume.
585 # we consume.
586
586
587 textlen = revinfo.textlen
587 textlen = revinfo.textlen
588 defaultmax = textlen * 4
588 defaultmax = textlen * 4
589 maxdist = revlog._maxdeltachainspan
589 maxdist = revlog._maxdeltachainspan
590 if not maxdist:
590 if not maxdist:
591 maxdist = deltainfo.distance # ensure the conditional pass
591 maxdist = deltainfo.distance # ensure the conditional pass
592 maxdist = max(maxdist, defaultmax)
592 maxdist = max(maxdist, defaultmax)
593
593
594 # Bad delta from read span:
594 # Bad delta from read span:
595 #
595 #
596 # If the span of data read is larger than the maximum allowed.
596 # If the span of data read is larger than the maximum allowed.
597 #
597 #
598 # In the sparse-revlog case, we rely on the associated "sparse reading"
598 # In the sparse-revlog case, we rely on the associated "sparse reading"
599 # to avoid issue related to the span of data. In theory, it would be
599 # to avoid issue related to the span of data. In theory, it would be
600 # possible to build pathological revlog where delta pattern would lead
600 # possible to build pathological revlog where delta pattern would lead
601 # to too many reads. However, they do not happen in practice at all. So
601 # to too many reads. However, they do not happen in practice at all. So
602 # we skip the span check entirely.
602 # we skip the span check entirely.
603 if not revlog._sparserevlog and maxdist < deltainfo.distance:
603 if not revlog._sparserevlog and maxdist < deltainfo.distance:
604 return False
604 return False
605
605
606 # Bad delta from new delta size:
606 # Bad delta from new delta size:
607 #
607 #
608 # If the delta size is larger than the target text, storing the
608 # If the delta size is larger than the target text, storing the
609 # delta will be inefficient.
609 # delta will be inefficient.
610 if textlen < deltainfo.deltalen:
610 if textlen < deltainfo.deltalen:
611 return False
611 return False
612
612
613 # Bad delta from cumulated payload size:
613 # Bad delta from cumulated payload size:
614 #
614 #
615 # If the sum of delta get larger than K * target text length.
615 # If the sum of delta get larger than K * target text length.
616 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
616 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
617 return False
617 return False
618
618
619 # Bad delta from chain length:
619 # Bad delta from chain length:
620 #
620 #
621 # If the number of delta in the chain gets too high.
621 # If the number of delta in the chain gets too high.
622 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
622 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
623 return False
623 return False
624
624
625 # bad delta from intermediate snapshot size limit
625 # bad delta from intermediate snapshot size limit
626 #
626 #
627 # If an intermediate snapshot size is higher than the limit. The
627 # If an intermediate snapshot size is higher than the limit. The
628 # limit exist to prevent endless chain of intermediate delta to be
628 # limit exist to prevent endless chain of intermediate delta to be
629 # created.
629 # created.
630 if (
630 if (
631 deltainfo.snapshotdepth is not None
631 deltainfo.snapshotdepth is not None
632 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
632 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
633 ):
633 ):
634 return False
634 return False
635
635
636 # bad delta if new intermediate snapshot is larger than the previous
636 # bad delta if new intermediate snapshot is larger than the previous
637 # snapshot
637 # snapshot
638 if (
638 if (
639 deltainfo.snapshotdepth
639 deltainfo.snapshotdepth
640 and revlog.length(deltainfo.base) < deltainfo.deltalen
640 and revlog.length(deltainfo.base) < deltainfo.deltalen
641 ):
641 ):
642 return False
642 return False
643
643
644 return True
644 return True
645
645
646
646
647 # If a revision's full text is that much bigger than a base candidate full
647 # If a revision's full text is that much bigger than a base candidate full
648 # text's, it is very unlikely that it will produce a valid delta. We no longer
648 # text's, it is very unlikely that it will produce a valid delta. We no longer
649 # consider these candidates.
649 # consider these candidates.
650 LIMIT_BASE2TEXT = 500
650 LIMIT_BASE2TEXT = 500
651
651
652
652
653 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
653 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
654 """Provides group of revision to be tested as delta base
654 """Provides group of revision to be tested as delta base
655
655
656 This top level function focus on emitting groups with unique and worthwhile
656 This top level function focus on emitting groups with unique and worthwhile
657 content. See _raw_candidate_groups for details about the group order.
657 content. See _raw_candidate_groups for details about the group order.
658 """
658 """
659 # should we try to build a delta?
659 # should we try to build a delta?
660 if not (len(revlog) and revlog._storedeltachains):
660 if not (len(revlog) and revlog._storedeltachains):
661 yield None
661 yield None
662 return
662 return
663
663
664 deltalength = revlog.length
664 deltalength = revlog.length
665 deltaparent = revlog.deltaparent
665 deltaparent = revlog.deltaparent
666 sparse = revlog._sparserevlog
666 sparse = revlog._sparserevlog
667 good = None
667 good = None
668
668
669 deltas_limit = textlen * LIMIT_DELTA2TEXT
669 deltas_limit = textlen * LIMIT_DELTA2TEXT
670
670
671 tested = {nullrev}
671 tested = {nullrev}
672 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
672 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
673 while True:
673 while True:
674 temptative = candidates.send(good)
674 temptative = candidates.send(good)
675 if temptative is None:
675 if temptative is None:
676 break
676 break
677 group = []
677 group = []
678 for rev in temptative:
678 for rev in temptative:
679 # skip over empty delta (no need to include them in a chain)
679 # skip over empty delta (no need to include them in a chain)
680 while revlog._generaldelta and not (
680 while revlog._generaldelta and not (
681 rev == nullrev or rev in tested or deltalength(rev)
681 rev == nullrev or rev in tested or deltalength(rev)
682 ):
682 ):
683 tested.add(rev)
683 tested.add(rev)
684 rev = deltaparent(rev)
684 rev = deltaparent(rev)
685 # no need to try a delta against nullrev, this will be done as a
685 # no need to try a delta against nullrev, this will be done as a
686 # last resort.
686 # last resort.
687 if rev == nullrev:
687 if rev == nullrev:
688 continue
688 continue
689 # filter out revision we tested already
689 # filter out revision we tested already
690 if rev in tested:
690 if rev in tested:
691 continue
691 continue
692 tested.add(rev)
692 tested.add(rev)
693 # filter out delta base that will never produce good delta
693 # filter out delta base that will never produce good delta
694 if deltas_limit < revlog.length(rev):
694 if deltas_limit < revlog.length(rev):
695 continue
695 continue
696 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
696 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
697 continue
697 continue
698 # no delta for rawtext-changing revs (see "candelta" for why)
698 # no delta for rawtext-changing revs (see "candelta" for why)
699 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
699 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
700 continue
700 continue
701 # If we reach here, we are about to build and test a delta.
701 # If we reach here, we are about to build and test a delta.
702 # The delta building process will compute the chaininfo in all
702 # The delta building process will compute the chaininfo in all
703 # case, since that computation is cached, it is fine to access it
703 # case, since that computation is cached, it is fine to access it
704 # here too.
704 # here too.
705 chainlen, chainsize = revlog._chaininfo(rev)
705 chainlen, chainsize = revlog._chaininfo(rev)
706 # if chain will be too long, skip base
706 # if chain will be too long, skip base
707 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
707 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
708 continue
708 continue
709 # if chain already have too much data, skip base
709 # if chain already have too much data, skip base
710 if deltas_limit < chainsize:
710 if deltas_limit < chainsize:
711 continue
711 continue
712 if sparse and revlog.upperboundcomp is not None:
712 if sparse and revlog.upperboundcomp is not None:
713 maxcomp = revlog.upperboundcomp
713 maxcomp = revlog.upperboundcomp
714 basenotsnap = (p1, p2, nullrev)
714 basenotsnap = (p1, p2, nullrev)
715 if rev not in basenotsnap and revlog.issnapshot(rev):
715 if rev not in basenotsnap and revlog.issnapshot(rev):
716 snapshotdepth = revlog.snapshotdepth(rev)
716 snapshotdepth = revlog.snapshotdepth(rev)
717 # If text is significantly larger than the base, we can
717 # If text is significantly larger than the base, we can
718 # expect the resulting delta to be proportional to the size
718 # expect the resulting delta to be proportional to the size
719 # difference
719 # difference
720 revsize = revlog.rawsize(rev)
720 revsize = revlog.rawsize(rev)
721 rawsizedistance = max(textlen - revsize, 0)
721 rawsizedistance = max(textlen - revsize, 0)
722 # use an estimate of the compression upper bound.
722 # use an estimate of the compression upper bound.
723 lowestrealisticdeltalen = rawsizedistance // maxcomp
723 lowestrealisticdeltalen = rawsizedistance // maxcomp
724
724
725 # check the absolute constraint on the delta size
725 # check the absolute constraint on the delta size
726 snapshotlimit = textlen >> snapshotdepth
726 snapshotlimit = textlen >> snapshotdepth
727 if snapshotlimit < lowestrealisticdeltalen:
727 if snapshotlimit < lowestrealisticdeltalen:
728 # delta lower bound is larger than accepted upper bound
728 # delta lower bound is larger than accepted upper bound
729 continue
729 continue
730
730
731 # check the relative constraint on the delta size
731 # check the relative constraint on the delta size
732 revlength = revlog.length(rev)
732 revlength = revlog.length(rev)
733 if revlength < lowestrealisticdeltalen:
733 if revlength < lowestrealisticdeltalen:
734 # delta probable lower bound is larger than target base
734 # delta probable lower bound is larger than target base
735 continue
735 continue
736
736
737 group.append(rev)
737 group.append(rev)
738 if group:
738 if group:
739 # XXX: in the sparse revlog case, group can become large,
739 # XXX: in the sparse revlog case, group can become large,
740 # impacting performances. Some bounding or slicing mecanism
740 # impacting performances. Some bounding or slicing mecanism
741 # would help to reduce this impact.
741 # would help to reduce this impact.
742 good = yield tuple(group)
742 good = yield tuple(group)
743 yield None
743 yield None
744
744
745
745
746 def _findsnapshots(revlog, cache, start_rev):
746 def _findsnapshots(revlog, cache, start_rev):
747 """find snapshot from start_rev to tip"""
747 """find snapshot from start_rev to tip"""
748 if util.safehasattr(revlog.index, b'findsnapshots'):
748 if util.safehasattr(revlog.index, b'findsnapshots'):
749 revlog.index.findsnapshots(cache, start_rev)
749 revlog.index.findsnapshots(cache, start_rev)
750 else:
750 else:
751 deltaparent = revlog.deltaparent
751 deltaparent = revlog.deltaparent
752 issnapshot = revlog.issnapshot
752 issnapshot = revlog.issnapshot
753 for rev in revlog.revs(start_rev):
753 for rev in revlog.revs(start_rev):
754 if issnapshot(rev):
754 if issnapshot(rev):
755 cache[deltaparent(rev)].append(rev)
755 cache[deltaparent(rev)].append(rev)
756
756
757
757
758 def _refinedgroups(revlog, p1, p2, cachedelta):
758 def _refinedgroups(revlog, p1, p2, cachedelta):
759 good = None
759 good = None
760 # First we try to reuse a the delta contained in the bundle.
760 # First we try to reuse a the delta contained in the bundle.
761 # (or from the source revlog)
761 # (or from the source revlog)
762 #
762 #
763 # This logic only applies to general delta repositories and can be disabled
763 # This logic only applies to general delta repositories and can be disabled
764 # through configuration. Disabling reuse source delta is useful when
764 # through configuration. Disabling reuse source delta is useful when
765 # we want to make sure we recomputed "optimal" deltas.
765 # we want to make sure we recomputed "optimal" deltas.
766 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
766 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
767 # Assume what we received from the server is a good choice
767 # Assume what we received from the server is a good choice
768 # build delta will reuse the cache
768 # build delta will reuse the cache
769 good = yield (cachedelta[0],)
769 good = yield (cachedelta[0],)
770 if good is not None:
770 if good is not None:
771 yield None
771 yield None
772 return
772 return
773 snapshots = collections.defaultdict(list)
773 snapshots = collections.defaultdict(list)
774 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
774 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
775 good = yield candidates
775 good = yield candidates
776 if good is not None:
776 if good is not None:
777 break
777 break
778
778
779 # If sparse revlog is enabled, we can try to refine the available deltas
779 # If sparse revlog is enabled, we can try to refine the available deltas
780 if not revlog._sparserevlog:
780 if not revlog._sparserevlog:
781 yield None
781 yield None
782 return
782 return
783
783
784 # if we have a refinable value, try to refine it
784 # if we have a refinable value, try to refine it
785 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
785 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
786 # refine snapshot down
786 # refine snapshot down
787 previous = None
787 previous = None
788 while previous != good:
788 while previous != good:
789 previous = good
789 previous = good
790 base = revlog.deltaparent(good)
790 base = revlog.deltaparent(good)
791 if base == nullrev:
791 if base == nullrev:
792 break
792 break
793 good = yield (base,)
793 good = yield (base,)
794 # refine snapshot up
794 # refine snapshot up
795 if not snapshots:
795 if not snapshots:
796 _findsnapshots(revlog, snapshots, good + 1)
796 _findsnapshots(revlog, snapshots, good + 1)
797 previous = None
797 previous = None
798 while good != previous:
798 while good != previous:
799 previous = good
799 previous = good
800 children = tuple(sorted(c for c in snapshots[good]))
800 children = tuple(sorted(c for c in snapshots[good]))
801 good = yield children
801 good = yield children
802
802
803 # we have found nothing
803 # we have found nothing
804 yield None
804 yield None
805
805
806
806
807 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
807 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
808 """Provides group of revision to be tested as delta base
808 """Provides group of revision to be tested as delta base
809
809
810 This lower level function focus on emitting delta theorically interresting
810 This lower level function focus on emitting delta theorically interresting
811 without looking it any practical details.
811 without looking it any practical details.
812
812
813 The group order aims at providing fast or small candidates first.
813 The group order aims at providing fast or small candidates first.
814 """
814 """
815 gdelta = revlog._generaldelta
815 gdelta = revlog._generaldelta
816 # gate sparse behind general-delta because of issue6056
816 # gate sparse behind general-delta because of issue6056
817 sparse = gdelta and revlog._sparserevlog
817 sparse = gdelta and revlog._sparserevlog
818 curr = len(revlog)
818 curr = len(revlog)
819 prev = curr - 1
819 prev = curr - 1
820 deltachain = lambda rev: revlog._deltachain(rev)[0]
820 deltachain = lambda rev: revlog._deltachain(rev)[0]
821
821
822 if gdelta:
822 if gdelta:
823 # exclude already lazy tested base if any
823 # exclude already lazy tested base if any
824 parents = [p for p in (p1, p2) if p != nullrev]
824 parents = [p for p in (p1, p2) if p != nullrev]
825
825
826 if not revlog._deltabothparents and len(parents) == 2:
826 if not revlog._deltabothparents and len(parents) == 2:
827 parents.sort()
827 parents.sort()
828 # To minimize the chance of having to build a fulltext,
828 # To minimize the chance of having to build a fulltext,
829 # pick first whichever parent is closest to us (max rev)
829 # pick first whichever parent is closest to us (max rev)
830 yield (parents[1],)
830 yield (parents[1],)
831 # then the other one (min rev) if the first did not fit
831 # then the other one (min rev) if the first did not fit
832 yield (parents[0],)
832 yield (parents[0],)
833 elif len(parents) > 0:
833 elif len(parents) > 0:
834 # Test all parents (1 or 2), and keep the best candidate
834 # Test all parents (1 or 2), and keep the best candidate
835 yield parents
835 yield parents
836
836
837 if sparse and parents:
837 if sparse and parents:
838 if snapshots is None:
838 if snapshots is None:
839 # map: base-rev: snapshot-rev
839 # map: base-rev: snapshot-rev
840 snapshots = collections.defaultdict(list)
840 snapshots = collections.defaultdict(list)
841 # See if we can use an existing snapshot in the parent chains to use as
841 # See if we can use an existing snapshot in the parent chains to use as
842 # a base for a new intermediate-snapshot
842 # a base for a new intermediate-snapshot
843 #
843 #
844 # search for snapshot in parents delta chain
844 # search for snapshot in parents delta chain
845 # map: snapshot-level: snapshot-rev
845 # map: snapshot-level: snapshot-rev
846 parents_snaps = collections.defaultdict(set)
846 parents_snaps = collections.defaultdict(set)
847 candidate_chains = [deltachain(p) for p in parents]
847 candidate_chains = [deltachain(p) for p in parents]
848 for chain in candidate_chains:
848 for chain in candidate_chains:
849 for idx, s in enumerate(chain):
849 for idx, s in enumerate(chain):
850 if not revlog.issnapshot(s):
850 if not revlog.issnapshot(s):
851 break
851 break
852 parents_snaps[idx].add(s)
852 parents_snaps[idx].add(s)
853 snapfloor = min(parents_snaps[0]) + 1
853 snapfloor = min(parents_snaps[0]) + 1
854 _findsnapshots(revlog, snapshots, snapfloor)
854 _findsnapshots(revlog, snapshots, snapfloor)
855 # search for the highest "unrelated" revision
855 # search for the highest "unrelated" revision
856 #
856 #
857 # Adding snapshots used by "unrelated" revision increase the odd we
857 # Adding snapshots used by "unrelated" revision increase the odd we
858 # reuse an independant, yet better snapshot chain.
858 # reuse an independant, yet better snapshot chain.
859 #
859 #
860 # XXX instead of building a set of revisions, we could lazily enumerate
860 # XXX instead of building a set of revisions, we could lazily enumerate
861 # over the chains. That would be more efficient, however we stick to
861 # over the chains. That would be more efficient, however we stick to
862 # simple code for now.
862 # simple code for now.
863 all_revs = set()
863 all_revs = set()
864 for chain in candidate_chains:
864 for chain in candidate_chains:
865 all_revs.update(chain)
865 all_revs.update(chain)
866 other = None
866 other = None
867 for r in revlog.revs(prev, snapfloor):
867 for r in revlog.revs(prev, snapfloor):
868 if r not in all_revs:
868 if r not in all_revs:
869 other = r
869 other = r
870 break
870 break
871 if other is not None:
871 if other is not None:
872 # To avoid unfair competition, we won't use unrelated intermediate
872 # To avoid unfair competition, we won't use unrelated intermediate
873 # snapshot that are deeper than the ones from the parent delta
873 # snapshot that are deeper than the ones from the parent delta
874 # chain.
874 # chain.
875 max_depth = max(parents_snaps.keys())
875 max_depth = max(parents_snaps.keys())
876 chain = deltachain(other)
876 chain = deltachain(other)
877 for idx, s in enumerate(chain):
877 for idx, s in enumerate(chain):
878 if s < snapfloor:
878 if s < snapfloor:
879 continue
879 continue
880 if max_depth < idx:
880 if max_depth < idx:
881 break
881 break
882 if not revlog.issnapshot(s):
882 if not revlog.issnapshot(s):
883 break
883 break
884 parents_snaps[idx].add(s)
884 parents_snaps[idx].add(s)
885 # Test them as possible intermediate snapshot base
885 # Test them as possible intermediate snapshot base
886 # We test them from highest to lowest level. High level one are more
886 # We test them from highest to lowest level. High level one are more
887 # likely to result in small delta
887 # likely to result in small delta
888 floor = None
888 floor = None
889 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
889 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
890 siblings = set()
890 siblings = set()
891 for s in snaps:
891 for s in snaps:
892 siblings.update(snapshots[s])
892 siblings.update(snapshots[s])
893 # Before considering making a new intermediate snapshot, we check
893 # Before considering making a new intermediate snapshot, we check
894 # if an existing snapshot, children of base we consider, would be
894 # if an existing snapshot, children of base we consider, would be
895 # suitable.
895 # suitable.
896 #
896 #
897 # It give a change to reuse a delta chain "unrelated" to the
897 # It give a change to reuse a delta chain "unrelated" to the
898 # current revision instead of starting our own. Without such
898 # current revision instead of starting our own. Without such
899 # re-use, topological branches would keep reopening new chains.
899 # re-use, topological branches would keep reopening new chains.
900 # Creating more and more snapshot as the repository grow.
900 # Creating more and more snapshot as the repository grow.
901
901
902 if floor is not None:
902 if floor is not None:
903 # We only do this for siblings created after the one in our
903 # We only do this for siblings created after the one in our
904 # parent's delta chain. Those created before has less chances
904 # parent's delta chain. Those created before has less chances
905 # to be valid base since our ancestors had to create a new
905 # to be valid base since our ancestors had to create a new
906 # snapshot.
906 # snapshot.
907 siblings = [r for r in siblings if floor < r]
907 siblings = [r for r in siblings if floor < r]
908 yield tuple(sorted(siblings))
908 yield tuple(sorted(siblings))
909 # then test the base from our parent's delta chain.
909 # then test the base from our parent's delta chain.
910 yield tuple(sorted(snaps))
910 yield tuple(sorted(snaps))
911 floor = min(snaps)
911 floor = min(snaps)
912 # No suitable base found in the parent chain, search if any full
912 # No suitable base found in the parent chain, search if any full
913 # snapshots emitted since parent's base would be a suitable base for an
913 # snapshots emitted since parent's base would be a suitable base for an
914 # intermediate snapshot.
914 # intermediate snapshot.
915 #
915 #
916 # It give a chance to reuse a delta chain unrelated to the current
916 # It give a chance to reuse a delta chain unrelated to the current
917 # revisions instead of starting our own. Without such re-use,
917 # revisions instead of starting our own. Without such re-use,
918 # topological branches would keep reopening new full chains. Creating
918 # topological branches would keep reopening new full chains. Creating
919 # more and more snapshot as the repository grow.
919 # more and more snapshot as the repository grow.
920 yield tuple(snapshots[nullrev])
920 yield tuple(snapshots[nullrev])
921
921
922 if not sparse:
922 if not sparse:
923 # other approach failed try against prev to hopefully save us a
923 # other approach failed try against prev to hopefully save us a
924 # fulltext.
924 # fulltext.
925 yield (prev,)
925 yield (prev,)
926
926
927
927
928 class deltacomputer(object):
928 class deltacomputer(object):
929 def __init__(self, revlog):
929 def __init__(self, revlog):
930 self.revlog = revlog
930 self.revlog = revlog
931
931
932 def buildtext(self, revinfo, fh):
932 def buildtext(self, revinfo, fh):
933 """Builds a fulltext version of a revision
933 """Builds a fulltext version of a revision
934
934
935 revinfo: _revisioninfo instance that contains all needed info
935 revinfo: revisioninfo instance that contains all needed info
936 fh: file handle to either the .i or the .d revlog file,
936 fh: file handle to either the .i or the .d revlog file,
937 depending on whether it is inlined or not
937 depending on whether it is inlined or not
938 """
938 """
939 btext = revinfo.btext
939 btext = revinfo.btext
940 if btext[0] is not None:
940 if btext[0] is not None:
941 return btext[0]
941 return btext[0]
942
942
943 revlog = self.revlog
943 revlog = self.revlog
944 cachedelta = revinfo.cachedelta
944 cachedelta = revinfo.cachedelta
945 baserev = cachedelta[0]
945 baserev = cachedelta[0]
946 delta = cachedelta[1]
946 delta = cachedelta[1]
947
947
948 fulltext = btext[0] = _textfromdelta(
948 fulltext = btext[0] = _textfromdelta(
949 fh,
949 fh,
950 revlog,
950 revlog,
951 baserev,
951 baserev,
952 delta,
952 delta,
953 revinfo.p1,
953 revinfo.p1,
954 revinfo.p2,
954 revinfo.p2,
955 revinfo.flags,
955 revinfo.flags,
956 revinfo.node,
956 revinfo.node,
957 )
957 )
958 return fulltext
958 return fulltext
959
959
960 def _builddeltadiff(self, base, revinfo, fh):
960 def _builddeltadiff(self, base, revinfo, fh):
961 revlog = self.revlog
961 revlog = self.revlog
962 t = self.buildtext(revinfo, fh)
962 t = self.buildtext(revinfo, fh)
963 if revlog.iscensored(base):
963 if revlog.iscensored(base):
964 # deltas based on a censored revision must replace the
964 # deltas based on a censored revision must replace the
965 # full content in one patch, so delta works everywhere
965 # full content in one patch, so delta works everywhere
966 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
966 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
967 delta = header + t
967 delta = header + t
968 else:
968 else:
969 ptext = revlog.rawdata(base, _df=fh)
969 ptext = revlog.rawdata(base, _df=fh)
970 delta = mdiff.textdiff(ptext, t)
970 delta = mdiff.textdiff(ptext, t)
971
971
972 return delta
972 return delta
973
973
974 def _builddeltainfo(self, revinfo, base, fh):
974 def _builddeltainfo(self, revinfo, base, fh):
975 # can we use the cached delta?
975 # can we use the cached delta?
976 revlog = self.revlog
976 revlog = self.revlog
977 chainbase = revlog.chainbase(base)
977 chainbase = revlog.chainbase(base)
978 if revlog._generaldelta:
978 if revlog._generaldelta:
979 deltabase = base
979 deltabase = base
980 else:
980 else:
981 deltabase = chainbase
981 deltabase = chainbase
982 snapshotdepth = None
982 snapshotdepth = None
983 if revlog._sparserevlog and deltabase == nullrev:
983 if revlog._sparserevlog and deltabase == nullrev:
984 snapshotdepth = 0
984 snapshotdepth = 0
985 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
985 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
986 # A delta chain should always be one full snapshot,
986 # A delta chain should always be one full snapshot,
987 # zero or more semi-snapshots, and zero or more deltas
987 # zero or more semi-snapshots, and zero or more deltas
988 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
988 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
989 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
989 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
990 snapshotdepth = len(revlog._deltachain(deltabase)[0])
990 snapshotdepth = len(revlog._deltachain(deltabase)[0])
991 delta = None
991 delta = None
992 if revinfo.cachedelta:
992 if revinfo.cachedelta:
993 cachebase, cachediff = revinfo.cachedelta
993 cachebase, cachediff = revinfo.cachedelta
994 # check if the diff still apply
994 # check if the diff still apply
995 currentbase = cachebase
995 currentbase = cachebase
996 while (
996 while (
997 currentbase != nullrev
997 currentbase != nullrev
998 and currentbase != base
998 and currentbase != base
999 and self.revlog.length(currentbase) == 0
999 and self.revlog.length(currentbase) == 0
1000 ):
1000 ):
1001 currentbase = self.revlog.deltaparent(currentbase)
1001 currentbase = self.revlog.deltaparent(currentbase)
1002 if self.revlog._lazydelta and currentbase == base:
1002 if self.revlog._lazydelta and currentbase == base:
1003 delta = revinfo.cachedelta[1]
1003 delta = revinfo.cachedelta[1]
1004 if delta is None:
1004 if delta is None:
1005 delta = self._builddeltadiff(base, revinfo, fh)
1005 delta = self._builddeltadiff(base, revinfo, fh)
1006 # snapshotdept need to be neither None nor 0 level snapshot
1006 # snapshotdept need to be neither None nor 0 level snapshot
1007 if revlog.upperboundcomp is not None and snapshotdepth:
1007 if revlog.upperboundcomp is not None and snapshotdepth:
1008 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1008 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1009 snapshotlimit = revinfo.textlen >> snapshotdepth
1009 snapshotlimit = revinfo.textlen >> snapshotdepth
1010 if snapshotlimit < lowestrealisticdeltalen:
1010 if snapshotlimit < lowestrealisticdeltalen:
1011 return None
1011 return None
1012 if revlog.length(base) < lowestrealisticdeltalen:
1012 if revlog.length(base) < lowestrealisticdeltalen:
1013 return None
1013 return None
1014 header, data = revlog.compress(delta)
1014 header, data = revlog.compress(delta)
1015 deltalen = len(header) + len(data)
1015 deltalen = len(header) + len(data)
1016 offset = revlog.end(len(revlog) - 1)
1016 offset = revlog.end(len(revlog) - 1)
1017 dist = deltalen + offset - revlog.start(chainbase)
1017 dist = deltalen + offset - revlog.start(chainbase)
1018 chainlen, compresseddeltalen = revlog._chaininfo(base)
1018 chainlen, compresseddeltalen = revlog._chaininfo(base)
1019 chainlen += 1
1019 chainlen += 1
1020 compresseddeltalen += deltalen
1020 compresseddeltalen += deltalen
1021
1021
1022 return _deltainfo(
1022 return _deltainfo(
1023 dist,
1023 dist,
1024 deltalen,
1024 deltalen,
1025 (header, data),
1025 (header, data),
1026 deltabase,
1026 deltabase,
1027 chainbase,
1027 chainbase,
1028 chainlen,
1028 chainlen,
1029 compresseddeltalen,
1029 compresseddeltalen,
1030 snapshotdepth,
1030 snapshotdepth,
1031 )
1031 )
1032
1032
1033 def _fullsnapshotinfo(self, fh, revinfo):
1033 def _fullsnapshotinfo(self, fh, revinfo):
1034 curr = len(self.revlog)
1034 curr = len(self.revlog)
1035 rawtext = self.buildtext(revinfo, fh)
1035 rawtext = self.buildtext(revinfo, fh)
1036 data = self.revlog.compress(rawtext)
1036 data = self.revlog.compress(rawtext)
1037 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1037 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1038 deltabase = chainbase = curr
1038 deltabase = chainbase = curr
1039 snapshotdepth = 0
1039 snapshotdepth = 0
1040 chainlen = 1
1040 chainlen = 1
1041
1041
1042 return _deltainfo(
1042 return _deltainfo(
1043 dist,
1043 dist,
1044 deltalen,
1044 deltalen,
1045 data,
1045 data,
1046 deltabase,
1046 deltabase,
1047 chainbase,
1047 chainbase,
1048 chainlen,
1048 chainlen,
1049 compresseddeltalen,
1049 compresseddeltalen,
1050 snapshotdepth,
1050 snapshotdepth,
1051 )
1051 )
1052
1052
1053 def finddeltainfo(self, revinfo, fh):
1053 def finddeltainfo(self, revinfo, fh):
1054 """Find an acceptable delta against a candidate revision
1054 """Find an acceptable delta against a candidate revision
1055
1055
1056 revinfo: information about the revision (instance of _revisioninfo)
1056 revinfo: information about the revision (instance of _revisioninfo)
1057 fh: file handle to either the .i or the .d revlog file,
1057 fh: file handle to either the .i or the .d revlog file,
1058 depending on whether it is inlined or not
1058 depending on whether it is inlined or not
1059
1059
1060 Returns the first acceptable candidate revision, as ordered by
1060 Returns the first acceptable candidate revision, as ordered by
1061 _candidategroups
1061 _candidategroups
1062
1062
1063 If no suitable deltabase is found, we return delta info for a full
1063 If no suitable deltabase is found, we return delta info for a full
1064 snapshot.
1064 snapshot.
1065 """
1065 """
1066 if not revinfo.textlen:
1066 if not revinfo.textlen:
1067 return self._fullsnapshotinfo(fh, revinfo)
1067 return self._fullsnapshotinfo(fh, revinfo)
1068
1068
1069 # no delta for flag processor revision (see "candelta" for why)
1069 # no delta for flag processor revision (see "candelta" for why)
1070 # not calling candelta since only one revision needs test, also to
1070 # not calling candelta since only one revision needs test, also to
1071 # avoid overhead fetching flags again.
1071 # avoid overhead fetching flags again.
1072 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1072 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1073 return self._fullsnapshotinfo(fh, revinfo)
1073 return self._fullsnapshotinfo(fh, revinfo)
1074
1074
1075 cachedelta = revinfo.cachedelta
1075 cachedelta = revinfo.cachedelta
1076 p1 = revinfo.p1
1076 p1 = revinfo.p1
1077 p2 = revinfo.p2
1077 p2 = revinfo.p2
1078 revlog = self.revlog
1078 revlog = self.revlog
1079
1079
1080 deltainfo = None
1080 deltainfo = None
1081 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1081 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1082 groups = _candidategroups(
1082 groups = _candidategroups(
1083 self.revlog, revinfo.textlen, p1r, p2r, cachedelta
1083 self.revlog, revinfo.textlen, p1r, p2r, cachedelta
1084 )
1084 )
1085 candidaterevs = next(groups)
1085 candidaterevs = next(groups)
1086 while candidaterevs is not None:
1086 while candidaterevs is not None:
1087 nominateddeltas = []
1087 nominateddeltas = []
1088 if deltainfo is not None:
1088 if deltainfo is not None:
1089 # if we already found a good delta,
1089 # if we already found a good delta,
1090 # challenge it against refined candidates
1090 # challenge it against refined candidates
1091 nominateddeltas.append(deltainfo)
1091 nominateddeltas.append(deltainfo)
1092 for candidaterev in candidaterevs:
1092 for candidaterev in candidaterevs:
1093 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1093 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1094 if candidatedelta is not None:
1094 if candidatedelta is not None:
1095 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1095 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1096 nominateddeltas.append(candidatedelta)
1096 nominateddeltas.append(candidatedelta)
1097 if nominateddeltas:
1097 if nominateddeltas:
1098 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1098 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1099 if deltainfo is not None:
1099 if deltainfo is not None:
1100 candidaterevs = groups.send(deltainfo.base)
1100 candidaterevs = groups.send(deltainfo.base)
1101 else:
1101 else:
1102 candidaterevs = next(groups)
1102 candidaterevs = next(groups)
1103
1103
1104 if deltainfo is None:
1104 if deltainfo is None:
1105 deltainfo = self._fullsnapshotinfo(fh, revinfo)
1105 deltainfo = self._fullsnapshotinfo(fh, revinfo)
1106 return deltainfo
1106 return deltainfo
General Comments 0
You need to be logged in to leave comments. Login now