##// END OF EJS Templates
revlog: introduce a plain compression mode...
marmoute -
r48027:b876f0bf default
parent child Browse files
Show More
@@ -1,3328 +1,3360 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 COMP_MODE_INLINE,
38 COMP_MODE_INLINE,
39 COMP_MODE_PLAIN,
39 FEATURES_BY_VERSION,
40 FEATURES_BY_VERSION,
40 FLAG_GENERALDELTA,
41 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
42 FLAG_INLINE_DATA,
42 INDEX_HEADER,
43 INDEX_HEADER,
43 REVLOGV0,
44 REVLOGV0,
44 REVLOGV1,
45 REVLOGV1,
45 REVLOGV1_FLAGS,
46 REVLOGV1_FLAGS,
46 REVLOGV2,
47 REVLOGV2,
47 REVLOGV2_FLAGS,
48 REVLOGV2_FLAGS,
48 REVLOG_DEFAULT_FLAGS,
49 REVLOG_DEFAULT_FLAGS,
49 REVLOG_DEFAULT_FORMAT,
50 REVLOG_DEFAULT_FORMAT,
50 REVLOG_DEFAULT_VERSION,
51 REVLOG_DEFAULT_VERSION,
51 SUPPORTED_FLAGS,
52 SUPPORTED_FLAGS,
52 )
53 )
53 from .revlogutils.flagutil import (
54 from .revlogutils.flagutil import (
54 REVIDX_DEFAULT_FLAGS,
55 REVIDX_DEFAULT_FLAGS,
55 REVIDX_ELLIPSIS,
56 REVIDX_ELLIPSIS,
56 REVIDX_EXTSTORED,
57 REVIDX_EXTSTORED,
57 REVIDX_FLAGS_ORDER,
58 REVIDX_FLAGS_ORDER,
58 REVIDX_HASCOPIESINFO,
59 REVIDX_HASCOPIESINFO,
59 REVIDX_ISCENSORED,
60 REVIDX_ISCENSORED,
60 REVIDX_RAWTEXT_CHANGING_FLAGS,
61 REVIDX_RAWTEXT_CHANGING_FLAGS,
61 )
62 )
62 from .thirdparty import attr
63 from .thirdparty import attr
63 from . import (
64 from . import (
64 ancestor,
65 ancestor,
65 dagop,
66 dagop,
66 error,
67 error,
67 mdiff,
68 mdiff,
68 policy,
69 policy,
69 pycompat,
70 pycompat,
70 templatefilters,
71 templatefilters,
71 util,
72 util,
72 )
73 )
73 from .interfaces import (
74 from .interfaces import (
74 repository,
75 repository,
75 util as interfaceutil,
76 util as interfaceutil,
76 )
77 )
77 from .revlogutils import (
78 from .revlogutils import (
78 deltas as deltautil,
79 deltas as deltautil,
79 docket as docketutil,
80 docket as docketutil,
80 flagutil,
81 flagutil,
81 nodemap as nodemaputil,
82 nodemap as nodemaputil,
82 revlogv0,
83 revlogv0,
83 sidedata as sidedatautil,
84 sidedata as sidedatautil,
84 )
85 )
85 from .utils import (
86 from .utils import (
86 storageutil,
87 storageutil,
87 stringutil,
88 stringutil,
88 )
89 )
89
90
90 # blanked usage of all the name to prevent pyflakes constraints
91 # blanked usage of all the name to prevent pyflakes constraints
91 # We need these name available in the module for extensions.
92 # We need these name available in the module for extensions.
92
93
93 REVLOGV0
94 REVLOGV0
94 REVLOGV1
95 REVLOGV1
95 REVLOGV2
96 REVLOGV2
96 FLAG_INLINE_DATA
97 FLAG_INLINE_DATA
97 FLAG_GENERALDELTA
98 FLAG_GENERALDELTA
98 REVLOG_DEFAULT_FLAGS
99 REVLOG_DEFAULT_FLAGS
99 REVLOG_DEFAULT_FORMAT
100 REVLOG_DEFAULT_FORMAT
100 REVLOG_DEFAULT_VERSION
101 REVLOG_DEFAULT_VERSION
101 REVLOGV1_FLAGS
102 REVLOGV1_FLAGS
102 REVLOGV2_FLAGS
103 REVLOGV2_FLAGS
103 REVIDX_ISCENSORED
104 REVIDX_ISCENSORED
104 REVIDX_ELLIPSIS
105 REVIDX_ELLIPSIS
105 REVIDX_HASCOPIESINFO
106 REVIDX_HASCOPIESINFO
106 REVIDX_EXTSTORED
107 REVIDX_EXTSTORED
107 REVIDX_DEFAULT_FLAGS
108 REVIDX_DEFAULT_FLAGS
108 REVIDX_FLAGS_ORDER
109 REVIDX_FLAGS_ORDER
109 REVIDX_RAWTEXT_CHANGING_FLAGS
110 REVIDX_RAWTEXT_CHANGING_FLAGS
110
111
111 parsers = policy.importmod('parsers')
112 parsers = policy.importmod('parsers')
112 rustancestor = policy.importrust('ancestor')
113 rustancestor = policy.importrust('ancestor')
113 rustdagop = policy.importrust('dagop')
114 rustdagop = policy.importrust('dagop')
114 rustrevlog = policy.importrust('revlog')
115 rustrevlog = policy.importrust('revlog')
115
116
116 # Aliased for performance.
117 # Aliased for performance.
117 _zlibdecompress = zlib.decompress
118 _zlibdecompress = zlib.decompress
118
119
119 # max size of revlog with inline data
120 # max size of revlog with inline data
120 _maxinline = 131072
121 _maxinline = 131072
121 _chunksize = 1048576
122 _chunksize = 1048576
122
123
123 # Flag processors for REVIDX_ELLIPSIS.
124 # Flag processors for REVIDX_ELLIPSIS.
124 def ellipsisreadprocessor(rl, text):
125 def ellipsisreadprocessor(rl, text):
125 return text, False
126 return text, False
126
127
127
128
128 def ellipsiswriteprocessor(rl, text):
129 def ellipsiswriteprocessor(rl, text):
129 return text, False
130 return text, False
130
131
131
132
132 def ellipsisrawprocessor(rl, text):
133 def ellipsisrawprocessor(rl, text):
133 return False
134 return False
134
135
135
136
136 ellipsisprocessor = (
137 ellipsisprocessor = (
137 ellipsisreadprocessor,
138 ellipsisreadprocessor,
138 ellipsiswriteprocessor,
139 ellipsiswriteprocessor,
139 ellipsisrawprocessor,
140 ellipsisrawprocessor,
140 )
141 )
141
142
142
143
143 def offset_type(offset, type):
144 def offset_type(offset, type):
144 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
145 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
145 raise ValueError(b'unknown revlog index flags')
146 raise ValueError(b'unknown revlog index flags')
146 return int(int(offset) << 16 | type)
147 return int(int(offset) << 16 | type)
147
148
148
149
149 def _verify_revision(rl, skipflags, state, node):
150 def _verify_revision(rl, skipflags, state, node):
150 """Verify the integrity of the given revlog ``node`` while providing a hook
151 """Verify the integrity of the given revlog ``node`` while providing a hook
151 point for extensions to influence the operation."""
152 point for extensions to influence the operation."""
152 if skipflags:
153 if skipflags:
153 state[b'skipread'].add(node)
154 state[b'skipread'].add(node)
154 else:
155 else:
155 # Side-effect: read content and verify hash.
156 # Side-effect: read content and verify hash.
156 rl.revision(node)
157 rl.revision(node)
157
158
158
159
159 # True if a fast implementation for persistent-nodemap is available
160 # True if a fast implementation for persistent-nodemap is available
160 #
161 #
161 # We also consider we have a "fast" implementation in "pure" python because
162 # We also consider we have a "fast" implementation in "pure" python because
162 # people using pure don't really have performance consideration (and a
163 # people using pure don't really have performance consideration (and a
163 # wheelbarrow of other slowness source)
164 # wheelbarrow of other slowness source)
164 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
165 parsers, 'BaseIndexObject'
166 parsers, 'BaseIndexObject'
166 )
167 )
167
168
168
169
169 @attr.s(slots=True, frozen=True)
170 @attr.s(slots=True, frozen=True)
170 class _revisioninfo(object):
171 class _revisioninfo(object):
171 """Information about a revision that allows building its fulltext
172 """Information about a revision that allows building its fulltext
172 node: expected hash of the revision
173 node: expected hash of the revision
173 p1, p2: parent revs of the revision
174 p1, p2: parent revs of the revision
174 btext: built text cache consisting of a one-element list
175 btext: built text cache consisting of a one-element list
175 cachedelta: (baserev, uncompressed_delta) or None
176 cachedelta: (baserev, uncompressed_delta) or None
176 flags: flags associated to the revision storage
177 flags: flags associated to the revision storage
177
178
178 One of btext[0] or cachedelta must be set.
179 One of btext[0] or cachedelta must be set.
179 """
180 """
180
181
181 node = attr.ib()
182 node = attr.ib()
182 p1 = attr.ib()
183 p1 = attr.ib()
183 p2 = attr.ib()
184 p2 = attr.ib()
184 btext = attr.ib()
185 btext = attr.ib()
185 textlen = attr.ib()
186 textlen = attr.ib()
186 cachedelta = attr.ib()
187 cachedelta = attr.ib()
187 flags = attr.ib()
188 flags = attr.ib()
188
189
189
190
190 @interfaceutil.implementer(repository.irevisiondelta)
191 @interfaceutil.implementer(repository.irevisiondelta)
191 @attr.s(slots=True)
192 @attr.s(slots=True)
192 class revlogrevisiondelta(object):
193 class revlogrevisiondelta(object):
193 node = attr.ib()
194 node = attr.ib()
194 p1node = attr.ib()
195 p1node = attr.ib()
195 p2node = attr.ib()
196 p2node = attr.ib()
196 basenode = attr.ib()
197 basenode = attr.ib()
197 flags = attr.ib()
198 flags = attr.ib()
198 baserevisionsize = attr.ib()
199 baserevisionsize = attr.ib()
199 revision = attr.ib()
200 revision = attr.ib()
200 delta = attr.ib()
201 delta = attr.ib()
201 sidedata = attr.ib()
202 sidedata = attr.ib()
202 protocol_flags = attr.ib()
203 protocol_flags = attr.ib()
203 linknode = attr.ib(default=None)
204 linknode = attr.ib(default=None)
204
205
205
206
206 @interfaceutil.implementer(repository.iverifyproblem)
207 @interfaceutil.implementer(repository.iverifyproblem)
207 @attr.s(frozen=True)
208 @attr.s(frozen=True)
208 class revlogproblem(object):
209 class revlogproblem(object):
209 warning = attr.ib(default=None)
210 warning = attr.ib(default=None)
210 error = attr.ib(default=None)
211 error = attr.ib(default=None)
211 node = attr.ib(default=None)
212 node = attr.ib(default=None)
212
213
213
214
214 def parse_index_v1(data, inline):
215 def parse_index_v1(data, inline):
215 # call the C implementation to parse the index data
216 # call the C implementation to parse the index data
216 index, cache = parsers.parse_index2(data, inline)
217 index, cache = parsers.parse_index2(data, inline)
217 return index, cache
218 return index, cache
218
219
219
220
220 def parse_index_v2(data, inline):
221 def parse_index_v2(data, inline):
221 # call the C implementation to parse the index data
222 # call the C implementation to parse the index data
222 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
223 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
223 return index, cache
224 return index, cache
224
225
225
226
226 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
227 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
227
228
228 def parse_index_v1_nodemap(data, inline):
229 def parse_index_v1_nodemap(data, inline):
229 index, cache = parsers.parse_index_devel_nodemap(data, inline)
230 index, cache = parsers.parse_index_devel_nodemap(data, inline)
230 return index, cache
231 return index, cache
231
232
232
233
233 else:
234 else:
234 parse_index_v1_nodemap = None
235 parse_index_v1_nodemap = None
235
236
236
237
237 def parse_index_v1_mixed(data, inline):
238 def parse_index_v1_mixed(data, inline):
238 index, cache = parse_index_v1(data, inline)
239 index, cache = parse_index_v1(data, inline)
239 return rustrevlog.MixedIndex(index), cache
240 return rustrevlog.MixedIndex(index), cache
240
241
241
242
242 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
243 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
243 # signed integer)
244 # signed integer)
244 _maxentrysize = 0x7FFFFFFF
245 _maxentrysize = 0x7FFFFFFF
245
246
246
247
247 class revlog(object):
248 class revlog(object):
248 """
249 """
249 the underlying revision storage object
250 the underlying revision storage object
250
251
251 A revlog consists of two parts, an index and the revision data.
252 A revlog consists of two parts, an index and the revision data.
252
253
253 The index is a file with a fixed record size containing
254 The index is a file with a fixed record size containing
254 information on each revision, including its nodeid (hash), the
255 information on each revision, including its nodeid (hash), the
255 nodeids of its parents, the position and offset of its data within
256 nodeids of its parents, the position and offset of its data within
256 the data file, and the revision it's based on. Finally, each entry
257 the data file, and the revision it's based on. Finally, each entry
257 contains a linkrev entry that can serve as a pointer to external
258 contains a linkrev entry that can serve as a pointer to external
258 data.
259 data.
259
260
260 The revision data itself is a linear collection of data chunks.
261 The revision data itself is a linear collection of data chunks.
261 Each chunk represents a revision and is usually represented as a
262 Each chunk represents a revision and is usually represented as a
262 delta against the previous chunk. To bound lookup time, runs of
263 delta against the previous chunk. To bound lookup time, runs of
263 deltas are limited to about 2 times the length of the original
264 deltas are limited to about 2 times the length of the original
264 version data. This makes retrieval of a version proportional to
265 version data. This makes retrieval of a version proportional to
265 its size, or O(1) relative to the number of revisions.
266 its size, or O(1) relative to the number of revisions.
266
267
267 Both pieces of the revlog are written to in an append-only
268 Both pieces of the revlog are written to in an append-only
268 fashion, which means we never need to rewrite a file to insert or
269 fashion, which means we never need to rewrite a file to insert or
269 remove data, and can use some simple techniques to avoid the need
270 remove data, and can use some simple techniques to avoid the need
270 for locking while reading.
271 for locking while reading.
271
272
272 If checkambig, indexfile is opened with checkambig=True at
273 If checkambig, indexfile is opened with checkambig=True at
273 writing, to avoid file stat ambiguity.
274 writing, to avoid file stat ambiguity.
274
275
275 If mmaplargeindex is True, and an mmapindexthreshold is set, the
276 If mmaplargeindex is True, and an mmapindexthreshold is set, the
276 index will be mmapped rather than read if it is larger than the
277 index will be mmapped rather than read if it is larger than the
277 configured threshold.
278 configured threshold.
278
279
279 If censorable is True, the revlog can have censored revisions.
280 If censorable is True, the revlog can have censored revisions.
280
281
281 If `upperboundcomp` is not None, this is the expected maximal gain from
282 If `upperboundcomp` is not None, this is the expected maximal gain from
282 compression for the data content.
283 compression for the data content.
283
284
284 `concurrencychecker` is an optional function that receives 3 arguments: a
285 `concurrencychecker` is an optional function that receives 3 arguments: a
285 file handle, a filename, and an expected position. It should check whether
286 file handle, a filename, and an expected position. It should check whether
286 the current position in the file handle is valid, and log/warn/fail (by
287 the current position in the file handle is valid, and log/warn/fail (by
287 raising).
288 raising).
288
289
289
290
290 Internal details
291 Internal details
291 ----------------
292 ----------------
292
293
293 A large part of the revlog logic deals with revisions' "index entries", tuple
294 A large part of the revlog logic deals with revisions' "index entries", tuple
294 objects that contains the same "items" whatever the revlog version.
295 objects that contains the same "items" whatever the revlog version.
295 Different versions will have different ways of storing these items (sometimes
296 Different versions will have different ways of storing these items (sometimes
296 not having them at all), but the tuple will always be the same. New fields
297 not having them at all), but the tuple will always be the same. New fields
297 are usually added at the end to avoid breaking existing code that relies
298 are usually added at the end to avoid breaking existing code that relies
298 on the existing order. The field are defined as follows:
299 on the existing order. The field are defined as follows:
299
300
300 [0] offset:
301 [0] offset:
301 The byte index of the start of revision data chunk.
302 The byte index of the start of revision data chunk.
302 That value is shifted up by 16 bits. use "offset = field >> 16" to
303 That value is shifted up by 16 bits. use "offset = field >> 16" to
303 retrieve it.
304 retrieve it.
304
305
305 flags:
306 flags:
306 A flag field that carries special information or changes the behavior
307 A flag field that carries special information or changes the behavior
307 of the revision. (see `REVIDX_*` constants for details)
308 of the revision. (see `REVIDX_*` constants for details)
308 The flag field only occupies the first 16 bits of this field,
309 The flag field only occupies the first 16 bits of this field,
309 use "flags = field & 0xFFFF" to retrieve the value.
310 use "flags = field & 0xFFFF" to retrieve the value.
310
311
311 [1] compressed length:
312 [1] compressed length:
312 The size, in bytes, of the chunk on disk
313 The size, in bytes, of the chunk on disk
313
314
314 [2] uncompressed length:
315 [2] uncompressed length:
315 The size, in bytes, of the full revision once reconstructed.
316 The size, in bytes, of the full revision once reconstructed.
316
317
317 [3] base rev:
318 [3] base rev:
318 Either the base of the revision delta chain (without general
319 Either the base of the revision delta chain (without general
319 delta), or the base of the delta (stored in the data chunk)
320 delta), or the base of the delta (stored in the data chunk)
320 with general delta.
321 with general delta.
321
322
322 [4] link rev:
323 [4] link rev:
323 Changelog revision number of the changeset introducing this
324 Changelog revision number of the changeset introducing this
324 revision.
325 revision.
325
326
326 [5] parent 1 rev:
327 [5] parent 1 rev:
327 Revision number of the first parent
328 Revision number of the first parent
328
329
329 [6] parent 2 rev:
330 [6] parent 2 rev:
330 Revision number of the second parent
331 Revision number of the second parent
331
332
332 [7] node id:
333 [7] node id:
333 The node id of the current revision
334 The node id of the current revision
334
335
335 [8] sidedata offset:
336 [8] sidedata offset:
336 The byte index of the start of the revision's side-data chunk.
337 The byte index of the start of the revision's side-data chunk.
337
338
338 [9] sidedata chunk length:
339 [9] sidedata chunk length:
339 The size, in bytes, of the revision's side-data chunk.
340 The size, in bytes, of the revision's side-data chunk.
340
341
341 [10] data compression mode:
342 [10] data compression mode:
342 two bits that detail the way the data chunk is compressed on disk.
343 two bits that detail the way the data chunk is compressed on disk.
343 (see "COMP_MODE_*" constants for details). For revlog version 0 and
344 (see "COMP_MODE_*" constants for details). For revlog version 0 and
344 1 this will always be COMP_MODE_INLINE.
345 1 this will always be COMP_MODE_INLINE.
345
346
346 """
347 """
347
348
348 _flagserrorclass = error.RevlogError
349 _flagserrorclass = error.RevlogError
349
350
350 def __init__(
351 def __init__(
351 self,
352 self,
352 opener,
353 opener,
353 target,
354 target,
354 radix,
355 radix,
355 postfix=None, # only exist for `tmpcensored` now
356 postfix=None, # only exist for `tmpcensored` now
356 checkambig=False,
357 checkambig=False,
357 mmaplargeindex=False,
358 mmaplargeindex=False,
358 censorable=False,
359 censorable=False,
359 upperboundcomp=None,
360 upperboundcomp=None,
360 persistentnodemap=False,
361 persistentnodemap=False,
361 concurrencychecker=None,
362 concurrencychecker=None,
362 trypending=False,
363 trypending=False,
363 ):
364 ):
364 """
365 """
365 create a revlog object
366 create a revlog object
366
367
367 opener is a function that abstracts the file opening operation
368 opener is a function that abstracts the file opening operation
368 and can be used to implement COW semantics or the like.
369 and can be used to implement COW semantics or the like.
369
370
370 `target`: a (KIND, ID) tuple that identify the content stored in
371 `target`: a (KIND, ID) tuple that identify the content stored in
371 this revlog. It help the rest of the code to understand what the revlog
372 this revlog. It help the rest of the code to understand what the revlog
372 is about without having to resort to heuristic and index filename
373 is about without having to resort to heuristic and index filename
373 analysis. Note: that this must be reliably be set by normal code, but
374 analysis. Note: that this must be reliably be set by normal code, but
374 that test, debug, or performance measurement code might not set this to
375 that test, debug, or performance measurement code might not set this to
375 accurate value.
376 accurate value.
376 """
377 """
377 self.upperboundcomp = upperboundcomp
378 self.upperboundcomp = upperboundcomp
378
379
379 self.radix = radix
380 self.radix = radix
380
381
381 self._docket_file = None
382 self._docket_file = None
382 self._indexfile = None
383 self._indexfile = None
383 self._datafile = None
384 self._datafile = None
384 self._nodemap_file = None
385 self._nodemap_file = None
385 self.postfix = postfix
386 self.postfix = postfix
386 self._trypending = trypending
387 self._trypending = trypending
387 self.opener = opener
388 self.opener = opener
388 if persistentnodemap:
389 if persistentnodemap:
389 self._nodemap_file = nodemaputil.get_nodemap_file(self)
390 self._nodemap_file = nodemaputil.get_nodemap_file(self)
390
391
391 assert target[0] in ALL_KINDS
392 assert target[0] in ALL_KINDS
392 assert len(target) == 2
393 assert len(target) == 2
393 self.target = target
394 self.target = target
394 # When True, indexfile is opened with checkambig=True at writing, to
395 # When True, indexfile is opened with checkambig=True at writing, to
395 # avoid file stat ambiguity.
396 # avoid file stat ambiguity.
396 self._checkambig = checkambig
397 self._checkambig = checkambig
397 self._mmaplargeindex = mmaplargeindex
398 self._mmaplargeindex = mmaplargeindex
398 self._censorable = censorable
399 self._censorable = censorable
399 # 3-tuple of (node, rev, text) for a raw revision.
400 # 3-tuple of (node, rev, text) for a raw revision.
400 self._revisioncache = None
401 self._revisioncache = None
401 # Maps rev to chain base rev.
402 # Maps rev to chain base rev.
402 self._chainbasecache = util.lrucachedict(100)
403 self._chainbasecache = util.lrucachedict(100)
403 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
404 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
404 self._chunkcache = (0, b'')
405 self._chunkcache = (0, b'')
405 # How much data to read and cache into the raw revlog data cache.
406 # How much data to read and cache into the raw revlog data cache.
406 self._chunkcachesize = 65536
407 self._chunkcachesize = 65536
407 self._maxchainlen = None
408 self._maxchainlen = None
408 self._deltabothparents = True
409 self._deltabothparents = True
409 self.index = None
410 self.index = None
410 self._docket = None
411 self._docket = None
411 self._nodemap_docket = None
412 self._nodemap_docket = None
412 # Mapping of partial identifiers to full nodes.
413 # Mapping of partial identifiers to full nodes.
413 self._pcache = {}
414 self._pcache = {}
414 # Mapping of revision integer to full node.
415 # Mapping of revision integer to full node.
415 self._compengine = b'zlib'
416 self._compengine = b'zlib'
416 self._compengineopts = {}
417 self._compengineopts = {}
417 self._maxdeltachainspan = -1
418 self._maxdeltachainspan = -1
418 self._withsparseread = False
419 self._withsparseread = False
419 self._sparserevlog = False
420 self._sparserevlog = False
420 self.hassidedata = False
421 self.hassidedata = False
421 self._srdensitythreshold = 0.50
422 self._srdensitythreshold = 0.50
422 self._srmingapsize = 262144
423 self._srmingapsize = 262144
423
424
424 # Make copy of flag processors so each revlog instance can support
425 # Make copy of flag processors so each revlog instance can support
425 # custom flags.
426 # custom flags.
426 self._flagprocessors = dict(flagutil.flagprocessors)
427 self._flagprocessors = dict(flagutil.flagprocessors)
427
428
428 # 2-tuple of file handles being used for active writing.
429 # 2-tuple of file handles being used for active writing.
429 self._writinghandles = None
430 self._writinghandles = None
430 # prevent nesting of addgroup
431 # prevent nesting of addgroup
431 self._adding_group = None
432 self._adding_group = None
432
433
433 self._loadindex()
434 self._loadindex()
434
435
435 self._concurrencychecker = concurrencychecker
436 self._concurrencychecker = concurrencychecker
436
437
437 def _init_opts(self):
438 def _init_opts(self):
438 """process options (from above/config) to setup associated default revlog mode
439 """process options (from above/config) to setup associated default revlog mode
439
440
440 These values might be affected when actually reading on disk information.
441 These values might be affected when actually reading on disk information.
441
442
442 The relevant values are returned for use in _loadindex().
443 The relevant values are returned for use in _loadindex().
443
444
444 * newversionflags:
445 * newversionflags:
445 version header to use if we need to create a new revlog
446 version header to use if we need to create a new revlog
446
447
447 * mmapindexthreshold:
448 * mmapindexthreshold:
448 minimal index size for start to use mmap
449 minimal index size for start to use mmap
449
450
450 * force_nodemap:
451 * force_nodemap:
451 force the usage of a "development" version of the nodemap code
452 force the usage of a "development" version of the nodemap code
452 """
453 """
453 mmapindexthreshold = None
454 mmapindexthreshold = None
454 opts = self.opener.options
455 opts = self.opener.options
455
456
456 if b'revlogv2' in opts:
457 if b'revlogv2' in opts:
457 new_header = REVLOGV2 | FLAG_INLINE_DATA
458 new_header = REVLOGV2 | FLAG_INLINE_DATA
458 elif b'revlogv1' in opts:
459 elif b'revlogv1' in opts:
459 new_header = REVLOGV1 | FLAG_INLINE_DATA
460 new_header = REVLOGV1 | FLAG_INLINE_DATA
460 if b'generaldelta' in opts:
461 if b'generaldelta' in opts:
461 new_header |= FLAG_GENERALDELTA
462 new_header |= FLAG_GENERALDELTA
462 elif b'revlogv0' in self.opener.options:
463 elif b'revlogv0' in self.opener.options:
463 new_header = REVLOGV0
464 new_header = REVLOGV0
464 else:
465 else:
465 new_header = REVLOG_DEFAULT_VERSION
466 new_header = REVLOG_DEFAULT_VERSION
466
467
467 if b'chunkcachesize' in opts:
468 if b'chunkcachesize' in opts:
468 self._chunkcachesize = opts[b'chunkcachesize']
469 self._chunkcachesize = opts[b'chunkcachesize']
469 if b'maxchainlen' in opts:
470 if b'maxchainlen' in opts:
470 self._maxchainlen = opts[b'maxchainlen']
471 self._maxchainlen = opts[b'maxchainlen']
471 if b'deltabothparents' in opts:
472 if b'deltabothparents' in opts:
472 self._deltabothparents = opts[b'deltabothparents']
473 self._deltabothparents = opts[b'deltabothparents']
473 self._lazydelta = bool(opts.get(b'lazydelta', True))
474 self._lazydelta = bool(opts.get(b'lazydelta', True))
474 self._lazydeltabase = False
475 self._lazydeltabase = False
475 if self._lazydelta:
476 if self._lazydelta:
476 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
477 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
477 if b'compengine' in opts:
478 if b'compengine' in opts:
478 self._compengine = opts[b'compengine']
479 self._compengine = opts[b'compengine']
479 if b'zlib.level' in opts:
480 if b'zlib.level' in opts:
480 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
481 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
481 if b'zstd.level' in opts:
482 if b'zstd.level' in opts:
482 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
483 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
483 if b'maxdeltachainspan' in opts:
484 if b'maxdeltachainspan' in opts:
484 self._maxdeltachainspan = opts[b'maxdeltachainspan']
485 self._maxdeltachainspan = opts[b'maxdeltachainspan']
485 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
486 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
486 mmapindexthreshold = opts[b'mmapindexthreshold']
487 mmapindexthreshold = opts[b'mmapindexthreshold']
487 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
488 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
488 withsparseread = bool(opts.get(b'with-sparse-read', False))
489 withsparseread = bool(opts.get(b'with-sparse-read', False))
489 # sparse-revlog forces sparse-read
490 # sparse-revlog forces sparse-read
490 self._withsparseread = self._sparserevlog or withsparseread
491 self._withsparseread = self._sparserevlog or withsparseread
491 if b'sparse-read-density-threshold' in opts:
492 if b'sparse-read-density-threshold' in opts:
492 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
493 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
493 if b'sparse-read-min-gap-size' in opts:
494 if b'sparse-read-min-gap-size' in opts:
494 self._srmingapsize = opts[b'sparse-read-min-gap-size']
495 self._srmingapsize = opts[b'sparse-read-min-gap-size']
495 if opts.get(b'enableellipsis'):
496 if opts.get(b'enableellipsis'):
496 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
497 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
497
498
498 # revlog v0 doesn't have flag processors
499 # revlog v0 doesn't have flag processors
499 for flag, processor in pycompat.iteritems(
500 for flag, processor in pycompat.iteritems(
500 opts.get(b'flagprocessors', {})
501 opts.get(b'flagprocessors', {})
501 ):
502 ):
502 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
503 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
503
504
504 if self._chunkcachesize <= 0:
505 if self._chunkcachesize <= 0:
505 raise error.RevlogError(
506 raise error.RevlogError(
506 _(b'revlog chunk cache size %r is not greater than 0')
507 _(b'revlog chunk cache size %r is not greater than 0')
507 % self._chunkcachesize
508 % self._chunkcachesize
508 )
509 )
509 elif self._chunkcachesize & (self._chunkcachesize - 1):
510 elif self._chunkcachesize & (self._chunkcachesize - 1):
510 raise error.RevlogError(
511 raise error.RevlogError(
511 _(b'revlog chunk cache size %r is not a power of 2')
512 _(b'revlog chunk cache size %r is not a power of 2')
512 % self._chunkcachesize
513 % self._chunkcachesize
513 )
514 )
514 force_nodemap = opts.get(b'devel-force-nodemap', False)
515 force_nodemap = opts.get(b'devel-force-nodemap', False)
515 return new_header, mmapindexthreshold, force_nodemap
516 return new_header, mmapindexthreshold, force_nodemap
516
517
517 def _get_data(self, filepath, mmap_threshold, size=None):
518 def _get_data(self, filepath, mmap_threshold, size=None):
518 """return a file content with or without mmap
519 """return a file content with or without mmap
519
520
520 If the file is missing return the empty string"""
521 If the file is missing return the empty string"""
521 try:
522 try:
522 with self.opener(filepath) as fp:
523 with self.opener(filepath) as fp:
523 if mmap_threshold is not None:
524 if mmap_threshold is not None:
524 file_size = self.opener.fstat(fp).st_size
525 file_size = self.opener.fstat(fp).st_size
525 if file_size >= mmap_threshold:
526 if file_size >= mmap_threshold:
526 if size is not None:
527 if size is not None:
527 # avoid potentiel mmap crash
528 # avoid potentiel mmap crash
528 size = min(file_size, size)
529 size = min(file_size, size)
529 # TODO: should .close() to release resources without
530 # TODO: should .close() to release resources without
530 # relying on Python GC
531 # relying on Python GC
531 if size is None:
532 if size is None:
532 return util.buffer(util.mmapread(fp))
533 return util.buffer(util.mmapread(fp))
533 else:
534 else:
534 return util.buffer(util.mmapread(fp, size))
535 return util.buffer(util.mmapread(fp, size))
535 if size is None:
536 if size is None:
536 return fp.read()
537 return fp.read()
537 else:
538 else:
538 return fp.read(size)
539 return fp.read(size)
539 except IOError as inst:
540 except IOError as inst:
540 if inst.errno != errno.ENOENT:
541 if inst.errno != errno.ENOENT:
541 raise
542 raise
542 return b''
543 return b''
543
544
544 def _loadindex(self):
545 def _loadindex(self):
545
546
546 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
547 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
547
548
548 if self.postfix is not None:
549 if self.postfix is not None:
549 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
550 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
550 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
551 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
551 entry_point = b'%s.i.a' % self.radix
552 entry_point = b'%s.i.a' % self.radix
552 else:
553 else:
553 entry_point = b'%s.i' % self.radix
554 entry_point = b'%s.i' % self.radix
554
555
555 entry_data = b''
556 entry_data = b''
556 self._initempty = True
557 self._initempty = True
557 entry_data = self._get_data(entry_point, mmapindexthreshold)
558 entry_data = self._get_data(entry_point, mmapindexthreshold)
558 if len(entry_data) > 0:
559 if len(entry_data) > 0:
559 header = INDEX_HEADER.unpack(entry_data[:4])[0]
560 header = INDEX_HEADER.unpack(entry_data[:4])[0]
560 self._initempty = False
561 self._initempty = False
561 else:
562 else:
562 header = new_header
563 header = new_header
563
564
564 self._format_flags = header & ~0xFFFF
565 self._format_flags = header & ~0xFFFF
565 self._format_version = header & 0xFFFF
566 self._format_version = header & 0xFFFF
566
567
567 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
568 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
568 if supported_flags is None:
569 if supported_flags is None:
569 msg = _(b'unknown version (%d) in revlog %s')
570 msg = _(b'unknown version (%d) in revlog %s')
570 msg %= (self._format_version, self.display_id)
571 msg %= (self._format_version, self.display_id)
571 raise error.RevlogError(msg)
572 raise error.RevlogError(msg)
572 elif self._format_flags & ~supported_flags:
573 elif self._format_flags & ~supported_flags:
573 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
574 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
574 display_flag = self._format_flags >> 16
575 display_flag = self._format_flags >> 16
575 msg %= (display_flag, self._format_version, self.display_id)
576 msg %= (display_flag, self._format_version, self.display_id)
576 raise error.RevlogError(msg)
577 raise error.RevlogError(msg)
577
578
578 features = FEATURES_BY_VERSION[self._format_version]
579 features = FEATURES_BY_VERSION[self._format_version]
579 self._inline = features[b'inline'](self._format_flags)
580 self._inline = features[b'inline'](self._format_flags)
580 self._generaldelta = features[b'generaldelta'](self._format_flags)
581 self._generaldelta = features[b'generaldelta'](self._format_flags)
581 self.hassidedata = features[b'sidedata']
582 self.hassidedata = features[b'sidedata']
582
583
583 if not features[b'docket']:
584 if not features[b'docket']:
584 self._indexfile = entry_point
585 self._indexfile = entry_point
585 index_data = entry_data
586 index_data = entry_data
586 else:
587 else:
587 self._docket_file = entry_point
588 self._docket_file = entry_point
588 if self._initempty:
589 if self._initempty:
589 self._docket = docketutil.default_docket(self, header)
590 self._docket = docketutil.default_docket(self, header)
590 else:
591 else:
591 self._docket = docketutil.parse_docket(
592 self._docket = docketutil.parse_docket(
592 self, entry_data, use_pending=self._trypending
593 self, entry_data, use_pending=self._trypending
593 )
594 )
594 self._indexfile = self._docket.index_filepath()
595 self._indexfile = self._docket.index_filepath()
595 index_data = b''
596 index_data = b''
596 index_size = self._docket.index_end
597 index_size = self._docket.index_end
597 if index_size > 0:
598 if index_size > 0:
598 index_data = self._get_data(
599 index_data = self._get_data(
599 self._indexfile, mmapindexthreshold, size=index_size
600 self._indexfile, mmapindexthreshold, size=index_size
600 )
601 )
601 if len(index_data) < index_size:
602 if len(index_data) < index_size:
602 msg = _(b'too few index data for %s: got %d, expected %d')
603 msg = _(b'too few index data for %s: got %d, expected %d')
603 msg %= (self.display_id, len(index_data), index_size)
604 msg %= (self.display_id, len(index_data), index_size)
604 raise error.RevlogError(msg)
605 raise error.RevlogError(msg)
605
606
606 self._inline = False
607 self._inline = False
607 # generaldelta implied by version 2 revlogs.
608 # generaldelta implied by version 2 revlogs.
608 self._generaldelta = True
609 self._generaldelta = True
609 # the logic for persistent nodemap will be dealt with within the
610 # the logic for persistent nodemap will be dealt with within the
610 # main docket, so disable it for now.
611 # main docket, so disable it for now.
611 self._nodemap_file = None
612 self._nodemap_file = None
612
613
613 if self.postfix is None:
614 if self.postfix is None:
614 self._datafile = b'%s.d' % self.radix
615 self._datafile = b'%s.d' % self.radix
615 else:
616 else:
616 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
617 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
617
618
618 self.nodeconstants = sha1nodeconstants
619 self.nodeconstants = sha1nodeconstants
619 self.nullid = self.nodeconstants.nullid
620 self.nullid = self.nodeconstants.nullid
620
621
621 # sparse-revlog can't be on without general-delta (issue6056)
622 # sparse-revlog can't be on without general-delta (issue6056)
622 if not self._generaldelta:
623 if not self._generaldelta:
623 self._sparserevlog = False
624 self._sparserevlog = False
624
625
625 self._storedeltachains = True
626 self._storedeltachains = True
626
627
627 devel_nodemap = (
628 devel_nodemap = (
628 self._nodemap_file
629 self._nodemap_file
629 and force_nodemap
630 and force_nodemap
630 and parse_index_v1_nodemap is not None
631 and parse_index_v1_nodemap is not None
631 )
632 )
632
633
633 use_rust_index = False
634 use_rust_index = False
634 if rustrevlog is not None:
635 if rustrevlog is not None:
635 if self._nodemap_file is not None:
636 if self._nodemap_file is not None:
636 use_rust_index = True
637 use_rust_index = True
637 else:
638 else:
638 use_rust_index = self.opener.options.get(b'rust.index')
639 use_rust_index = self.opener.options.get(b'rust.index')
639
640
640 self._parse_index = parse_index_v1
641 self._parse_index = parse_index_v1
641 if self._format_version == REVLOGV0:
642 if self._format_version == REVLOGV0:
642 self._parse_index = revlogv0.parse_index_v0
643 self._parse_index = revlogv0.parse_index_v0
643 elif self._format_version == REVLOGV2:
644 elif self._format_version == REVLOGV2:
644 self._parse_index = parse_index_v2
645 self._parse_index = parse_index_v2
645 elif devel_nodemap:
646 elif devel_nodemap:
646 self._parse_index = parse_index_v1_nodemap
647 self._parse_index = parse_index_v1_nodemap
647 elif use_rust_index:
648 elif use_rust_index:
648 self._parse_index = parse_index_v1_mixed
649 self._parse_index = parse_index_v1_mixed
649 try:
650 try:
650 d = self._parse_index(index_data, self._inline)
651 d = self._parse_index(index_data, self._inline)
651 index, _chunkcache = d
652 index, _chunkcache = d
652 use_nodemap = (
653 use_nodemap = (
653 not self._inline
654 not self._inline
654 and self._nodemap_file is not None
655 and self._nodemap_file is not None
655 and util.safehasattr(index, 'update_nodemap_data')
656 and util.safehasattr(index, 'update_nodemap_data')
656 )
657 )
657 if use_nodemap:
658 if use_nodemap:
658 nodemap_data = nodemaputil.persisted_data(self)
659 nodemap_data = nodemaputil.persisted_data(self)
659 if nodemap_data is not None:
660 if nodemap_data is not None:
660 docket = nodemap_data[0]
661 docket = nodemap_data[0]
661 if (
662 if (
662 len(d[0]) > docket.tip_rev
663 len(d[0]) > docket.tip_rev
663 and d[0][docket.tip_rev][7] == docket.tip_node
664 and d[0][docket.tip_rev][7] == docket.tip_node
664 ):
665 ):
665 # no changelog tampering
666 # no changelog tampering
666 self._nodemap_docket = docket
667 self._nodemap_docket = docket
667 index.update_nodemap_data(*nodemap_data)
668 index.update_nodemap_data(*nodemap_data)
668 except (ValueError, IndexError):
669 except (ValueError, IndexError):
669 raise error.RevlogError(
670 raise error.RevlogError(
670 _(b"index %s is corrupted") % self.display_id
671 _(b"index %s is corrupted") % self.display_id
671 )
672 )
672 self.index, self._chunkcache = d
673 self.index, self._chunkcache = d
673 if not self._chunkcache:
674 if not self._chunkcache:
674 self._chunkclear()
675 self._chunkclear()
675 # revnum -> (chain-length, sum-delta-length)
676 # revnum -> (chain-length, sum-delta-length)
676 self._chaininfocache = util.lrucachedict(500)
677 self._chaininfocache = util.lrucachedict(500)
677 # revlog header -> revlog compressor
678 # revlog header -> revlog compressor
678 self._decompressors = {}
679 self._decompressors = {}
679
680
680 @util.propertycache
681 @util.propertycache
681 def revlog_kind(self):
682 def revlog_kind(self):
682 return self.target[0]
683 return self.target[0]
683
684
684 @util.propertycache
685 @util.propertycache
685 def display_id(self):
686 def display_id(self):
686 """The public facing "ID" of the revlog that we use in message"""
687 """The public facing "ID" of the revlog that we use in message"""
687 # Maybe we should build a user facing representation of
688 # Maybe we should build a user facing representation of
688 # revlog.target instead of using `self.radix`
689 # revlog.target instead of using `self.radix`
689 return self.radix
690 return self.radix
690
691
691 @util.propertycache
692 @util.propertycache
692 def _compressor(self):
693 def _compressor(self):
693 engine = util.compengines[self._compengine]
694 engine = util.compengines[self._compengine]
694 return engine.revlogcompressor(self._compengineopts)
695 return engine.revlogcompressor(self._compengineopts)
695
696
696 def _indexfp(self):
697 def _indexfp(self):
697 """file object for the revlog's index file"""
698 """file object for the revlog's index file"""
698 return self.opener(self._indexfile, mode=b"r")
699 return self.opener(self._indexfile, mode=b"r")
699
700
700 def __index_write_fp(self):
701 def __index_write_fp(self):
701 # You should not use this directly and use `_writing` instead
702 # You should not use this directly and use `_writing` instead
702 try:
703 try:
703 f = self.opener(
704 f = self.opener(
704 self._indexfile, mode=b"r+", checkambig=self._checkambig
705 self._indexfile, mode=b"r+", checkambig=self._checkambig
705 )
706 )
706 if self._docket is None:
707 if self._docket is None:
707 f.seek(0, os.SEEK_END)
708 f.seek(0, os.SEEK_END)
708 else:
709 else:
709 f.seek(self._docket.index_end, os.SEEK_SET)
710 f.seek(self._docket.index_end, os.SEEK_SET)
710 return f
711 return f
711 except IOError as inst:
712 except IOError as inst:
712 if inst.errno != errno.ENOENT:
713 if inst.errno != errno.ENOENT:
713 raise
714 raise
714 return self.opener(
715 return self.opener(
715 self._indexfile, mode=b"w+", checkambig=self._checkambig
716 self._indexfile, mode=b"w+", checkambig=self._checkambig
716 )
717 )
717
718
718 def __index_new_fp(self):
719 def __index_new_fp(self):
719 # You should not use this unless you are upgrading from inline revlog
720 # You should not use this unless you are upgrading from inline revlog
720 return self.opener(
721 return self.opener(
721 self._indexfile,
722 self._indexfile,
722 mode=b"w",
723 mode=b"w",
723 checkambig=self._checkambig,
724 checkambig=self._checkambig,
724 atomictemp=True,
725 atomictemp=True,
725 )
726 )
726
727
727 def _datafp(self, mode=b'r'):
728 def _datafp(self, mode=b'r'):
728 """file object for the revlog's data file"""
729 """file object for the revlog's data file"""
729 return self.opener(self._datafile, mode=mode)
730 return self.opener(self._datafile, mode=mode)
730
731
731 @contextlib.contextmanager
732 @contextlib.contextmanager
732 def _datareadfp(self, existingfp=None):
733 def _datareadfp(self, existingfp=None):
733 """file object suitable to read data"""
734 """file object suitable to read data"""
734 # Use explicit file handle, if given.
735 # Use explicit file handle, if given.
735 if existingfp is not None:
736 if existingfp is not None:
736 yield existingfp
737 yield existingfp
737
738
738 # Use a file handle being actively used for writes, if available.
739 # Use a file handle being actively used for writes, if available.
739 # There is some danger to doing this because reads will seek the
740 # There is some danger to doing this because reads will seek the
740 # file. However, _writeentry() performs a SEEK_END before all writes,
741 # file. However, _writeentry() performs a SEEK_END before all writes,
741 # so we should be safe.
742 # so we should be safe.
742 elif self._writinghandles:
743 elif self._writinghandles:
743 if self._inline:
744 if self._inline:
744 yield self._writinghandles[0]
745 yield self._writinghandles[0]
745 else:
746 else:
746 yield self._writinghandles[1]
747 yield self._writinghandles[1]
747
748
748 # Otherwise open a new file handle.
749 # Otherwise open a new file handle.
749 else:
750 else:
750 if self._inline:
751 if self._inline:
751 func = self._indexfp
752 func = self._indexfp
752 else:
753 else:
753 func = self._datafp
754 func = self._datafp
754 with func() as fp:
755 with func() as fp:
755 yield fp
756 yield fp
756
757
757 def tiprev(self):
758 def tiprev(self):
758 return len(self.index) - 1
759 return len(self.index) - 1
759
760
760 def tip(self):
761 def tip(self):
761 return self.node(self.tiprev())
762 return self.node(self.tiprev())
762
763
763 def __contains__(self, rev):
764 def __contains__(self, rev):
764 return 0 <= rev < len(self)
765 return 0 <= rev < len(self)
765
766
766 def __len__(self):
767 def __len__(self):
767 return len(self.index)
768 return len(self.index)
768
769
769 def __iter__(self):
770 def __iter__(self):
770 return iter(pycompat.xrange(len(self)))
771 return iter(pycompat.xrange(len(self)))
771
772
772 def revs(self, start=0, stop=None):
773 def revs(self, start=0, stop=None):
773 """iterate over all rev in this revlog (from start to stop)"""
774 """iterate over all rev in this revlog (from start to stop)"""
774 return storageutil.iterrevs(len(self), start=start, stop=stop)
775 return storageutil.iterrevs(len(self), start=start, stop=stop)
775
776
776 @property
777 @property
777 def nodemap(self):
778 def nodemap(self):
778 msg = (
779 msg = (
779 b"revlog.nodemap is deprecated, "
780 b"revlog.nodemap is deprecated, "
780 b"use revlog.index.[has_node|rev|get_rev]"
781 b"use revlog.index.[has_node|rev|get_rev]"
781 )
782 )
782 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
783 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
783 return self.index.nodemap
784 return self.index.nodemap
784
785
785 @property
786 @property
786 def _nodecache(self):
787 def _nodecache(self):
787 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
788 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
788 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
789 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
789 return self.index.nodemap
790 return self.index.nodemap
790
791
791 def hasnode(self, node):
792 def hasnode(self, node):
792 try:
793 try:
793 self.rev(node)
794 self.rev(node)
794 return True
795 return True
795 except KeyError:
796 except KeyError:
796 return False
797 return False
797
798
798 def candelta(self, baserev, rev):
799 def candelta(self, baserev, rev):
799 """whether two revisions (baserev, rev) can be delta-ed or not"""
800 """whether two revisions (baserev, rev) can be delta-ed or not"""
800 # Disable delta if either rev requires a content-changing flag
801 # Disable delta if either rev requires a content-changing flag
801 # processor (ex. LFS). This is because such flag processor can alter
802 # processor (ex. LFS). This is because such flag processor can alter
802 # the rawtext content that the delta will be based on, and two clients
803 # the rawtext content that the delta will be based on, and two clients
803 # could have a same revlog node with different flags (i.e. different
804 # could have a same revlog node with different flags (i.e. different
804 # rawtext contents) and the delta could be incompatible.
805 # rawtext contents) and the delta could be incompatible.
805 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
806 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
806 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
807 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
807 ):
808 ):
808 return False
809 return False
809 return True
810 return True
810
811
811 def update_caches(self, transaction):
812 def update_caches(self, transaction):
812 if self._nodemap_file is not None:
813 if self._nodemap_file is not None:
813 if transaction is None:
814 if transaction is None:
814 nodemaputil.update_persistent_nodemap(self)
815 nodemaputil.update_persistent_nodemap(self)
815 else:
816 else:
816 nodemaputil.setup_persistent_nodemap(transaction, self)
817 nodemaputil.setup_persistent_nodemap(transaction, self)
817
818
818 def clearcaches(self):
819 def clearcaches(self):
819 self._revisioncache = None
820 self._revisioncache = None
820 self._chainbasecache.clear()
821 self._chainbasecache.clear()
821 self._chunkcache = (0, b'')
822 self._chunkcache = (0, b'')
822 self._pcache = {}
823 self._pcache = {}
823 self._nodemap_docket = None
824 self._nodemap_docket = None
824 self.index.clearcaches()
825 self.index.clearcaches()
825 # The python code is the one responsible for validating the docket, we
826 # The python code is the one responsible for validating the docket, we
826 # end up having to refresh it here.
827 # end up having to refresh it here.
827 use_nodemap = (
828 use_nodemap = (
828 not self._inline
829 not self._inline
829 and self._nodemap_file is not None
830 and self._nodemap_file is not None
830 and util.safehasattr(self.index, 'update_nodemap_data')
831 and util.safehasattr(self.index, 'update_nodemap_data')
831 )
832 )
832 if use_nodemap:
833 if use_nodemap:
833 nodemap_data = nodemaputil.persisted_data(self)
834 nodemap_data = nodemaputil.persisted_data(self)
834 if nodemap_data is not None:
835 if nodemap_data is not None:
835 self._nodemap_docket = nodemap_data[0]
836 self._nodemap_docket = nodemap_data[0]
836 self.index.update_nodemap_data(*nodemap_data)
837 self.index.update_nodemap_data(*nodemap_data)
837
838
838 def rev(self, node):
839 def rev(self, node):
839 try:
840 try:
840 return self.index.rev(node)
841 return self.index.rev(node)
841 except TypeError:
842 except TypeError:
842 raise
843 raise
843 except error.RevlogError:
844 except error.RevlogError:
844 # parsers.c radix tree lookup failed
845 # parsers.c radix tree lookup failed
845 if (
846 if (
846 node == self.nodeconstants.wdirid
847 node == self.nodeconstants.wdirid
847 or node in self.nodeconstants.wdirfilenodeids
848 or node in self.nodeconstants.wdirfilenodeids
848 ):
849 ):
849 raise error.WdirUnsupported
850 raise error.WdirUnsupported
850 raise error.LookupError(node, self.display_id, _(b'no node'))
851 raise error.LookupError(node, self.display_id, _(b'no node'))
851
852
852 # Accessors for index entries.
853 # Accessors for index entries.
853
854
854 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
855 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
855 # are flags.
856 # are flags.
856 def start(self, rev):
857 def start(self, rev):
857 return int(self.index[rev][0] >> 16)
858 return int(self.index[rev][0] >> 16)
858
859
859 def flags(self, rev):
860 def flags(self, rev):
860 return self.index[rev][0] & 0xFFFF
861 return self.index[rev][0] & 0xFFFF
861
862
862 def length(self, rev):
863 def length(self, rev):
863 return self.index[rev][1]
864 return self.index[rev][1]
864
865
865 def sidedata_length(self, rev):
866 def sidedata_length(self, rev):
866 if not self.hassidedata:
867 if not self.hassidedata:
867 return 0
868 return 0
868 return self.index[rev][9]
869 return self.index[rev][9]
869
870
870 def rawsize(self, rev):
871 def rawsize(self, rev):
871 """return the length of the uncompressed text for a given revision"""
872 """return the length of the uncompressed text for a given revision"""
872 l = self.index[rev][2]
873 l = self.index[rev][2]
873 if l >= 0:
874 if l >= 0:
874 return l
875 return l
875
876
876 t = self.rawdata(rev)
877 t = self.rawdata(rev)
877 return len(t)
878 return len(t)
878
879
879 def size(self, rev):
880 def size(self, rev):
880 """length of non-raw text (processed by a "read" flag processor)"""
881 """length of non-raw text (processed by a "read" flag processor)"""
881 # fast path: if no "read" flag processor could change the content,
882 # fast path: if no "read" flag processor could change the content,
882 # size is rawsize. note: ELLIPSIS is known to not change the content.
883 # size is rawsize. note: ELLIPSIS is known to not change the content.
883 flags = self.flags(rev)
884 flags = self.flags(rev)
884 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
885 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
885 return self.rawsize(rev)
886 return self.rawsize(rev)
886
887
887 return len(self.revision(rev, raw=False))
888 return len(self.revision(rev, raw=False))
888
889
889 def chainbase(self, rev):
890 def chainbase(self, rev):
890 base = self._chainbasecache.get(rev)
891 base = self._chainbasecache.get(rev)
891 if base is not None:
892 if base is not None:
892 return base
893 return base
893
894
894 index = self.index
895 index = self.index
895 iterrev = rev
896 iterrev = rev
896 base = index[iterrev][3]
897 base = index[iterrev][3]
897 while base != iterrev:
898 while base != iterrev:
898 iterrev = base
899 iterrev = base
899 base = index[iterrev][3]
900 base = index[iterrev][3]
900
901
901 self._chainbasecache[rev] = base
902 self._chainbasecache[rev] = base
902 return base
903 return base
903
904
904 def linkrev(self, rev):
905 def linkrev(self, rev):
905 return self.index[rev][4]
906 return self.index[rev][4]
906
907
907 def parentrevs(self, rev):
908 def parentrevs(self, rev):
908 try:
909 try:
909 entry = self.index[rev]
910 entry = self.index[rev]
910 except IndexError:
911 except IndexError:
911 if rev == wdirrev:
912 if rev == wdirrev:
912 raise error.WdirUnsupported
913 raise error.WdirUnsupported
913 raise
914 raise
914 if entry[5] == nullrev:
915 if entry[5] == nullrev:
915 return entry[6], entry[5]
916 return entry[6], entry[5]
916 else:
917 else:
917 return entry[5], entry[6]
918 return entry[5], entry[6]
918
919
919 # fast parentrevs(rev) where rev isn't filtered
920 # fast parentrevs(rev) where rev isn't filtered
920 _uncheckedparentrevs = parentrevs
921 _uncheckedparentrevs = parentrevs
921
922
922 def node(self, rev):
923 def node(self, rev):
923 try:
924 try:
924 return self.index[rev][7]
925 return self.index[rev][7]
925 except IndexError:
926 except IndexError:
926 if rev == wdirrev:
927 if rev == wdirrev:
927 raise error.WdirUnsupported
928 raise error.WdirUnsupported
928 raise
929 raise
929
930
930 # Derived from index values.
931 # Derived from index values.
931
932
932 def end(self, rev):
933 def end(self, rev):
933 return self.start(rev) + self.length(rev)
934 return self.start(rev) + self.length(rev)
934
935
935 def parents(self, node):
936 def parents(self, node):
936 i = self.index
937 i = self.index
937 d = i[self.rev(node)]
938 d = i[self.rev(node)]
938 # inline node() to avoid function call overhead
939 # inline node() to avoid function call overhead
939 if d[5] == self.nullid:
940 if d[5] == self.nullid:
940 return i[d[6]][7], i[d[5]][7]
941 return i[d[6]][7], i[d[5]][7]
941 else:
942 else:
942 return i[d[5]][7], i[d[6]][7]
943 return i[d[5]][7], i[d[6]][7]
943
944
944 def chainlen(self, rev):
945 def chainlen(self, rev):
945 return self._chaininfo(rev)[0]
946 return self._chaininfo(rev)[0]
946
947
947 def _chaininfo(self, rev):
948 def _chaininfo(self, rev):
948 chaininfocache = self._chaininfocache
949 chaininfocache = self._chaininfocache
949 if rev in chaininfocache:
950 if rev in chaininfocache:
950 return chaininfocache[rev]
951 return chaininfocache[rev]
951 index = self.index
952 index = self.index
952 generaldelta = self._generaldelta
953 generaldelta = self._generaldelta
953 iterrev = rev
954 iterrev = rev
954 e = index[iterrev]
955 e = index[iterrev]
955 clen = 0
956 clen = 0
956 compresseddeltalen = 0
957 compresseddeltalen = 0
957 while iterrev != e[3]:
958 while iterrev != e[3]:
958 clen += 1
959 clen += 1
959 compresseddeltalen += e[1]
960 compresseddeltalen += e[1]
960 if generaldelta:
961 if generaldelta:
961 iterrev = e[3]
962 iterrev = e[3]
962 else:
963 else:
963 iterrev -= 1
964 iterrev -= 1
964 if iterrev in chaininfocache:
965 if iterrev in chaininfocache:
965 t = chaininfocache[iterrev]
966 t = chaininfocache[iterrev]
966 clen += t[0]
967 clen += t[0]
967 compresseddeltalen += t[1]
968 compresseddeltalen += t[1]
968 break
969 break
969 e = index[iterrev]
970 e = index[iterrev]
970 else:
971 else:
971 # Add text length of base since decompressing that also takes
972 # Add text length of base since decompressing that also takes
972 # work. For cache hits the length is already included.
973 # work. For cache hits the length is already included.
973 compresseddeltalen += e[1]
974 compresseddeltalen += e[1]
974 r = (clen, compresseddeltalen)
975 r = (clen, compresseddeltalen)
975 chaininfocache[rev] = r
976 chaininfocache[rev] = r
976 return r
977 return r
977
978
978 def _deltachain(self, rev, stoprev=None):
979 def _deltachain(self, rev, stoprev=None):
979 """Obtain the delta chain for a revision.
980 """Obtain the delta chain for a revision.
980
981
981 ``stoprev`` specifies a revision to stop at. If not specified, we
982 ``stoprev`` specifies a revision to stop at. If not specified, we
982 stop at the base of the chain.
983 stop at the base of the chain.
983
984
984 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
985 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
985 revs in ascending order and ``stopped`` is a bool indicating whether
986 revs in ascending order and ``stopped`` is a bool indicating whether
986 ``stoprev`` was hit.
987 ``stoprev`` was hit.
987 """
988 """
988 # Try C implementation.
989 # Try C implementation.
989 try:
990 try:
990 return self.index.deltachain(rev, stoprev, self._generaldelta)
991 return self.index.deltachain(rev, stoprev, self._generaldelta)
991 except AttributeError:
992 except AttributeError:
992 pass
993 pass
993
994
994 chain = []
995 chain = []
995
996
996 # Alias to prevent attribute lookup in tight loop.
997 # Alias to prevent attribute lookup in tight loop.
997 index = self.index
998 index = self.index
998 generaldelta = self._generaldelta
999 generaldelta = self._generaldelta
999
1000
1000 iterrev = rev
1001 iterrev = rev
1001 e = index[iterrev]
1002 e = index[iterrev]
1002 while iterrev != e[3] and iterrev != stoprev:
1003 while iterrev != e[3] and iterrev != stoprev:
1003 chain.append(iterrev)
1004 chain.append(iterrev)
1004 if generaldelta:
1005 if generaldelta:
1005 iterrev = e[3]
1006 iterrev = e[3]
1006 else:
1007 else:
1007 iterrev -= 1
1008 iterrev -= 1
1008 e = index[iterrev]
1009 e = index[iterrev]
1009
1010
1010 if iterrev == stoprev:
1011 if iterrev == stoprev:
1011 stopped = True
1012 stopped = True
1012 else:
1013 else:
1013 chain.append(iterrev)
1014 chain.append(iterrev)
1014 stopped = False
1015 stopped = False
1015
1016
1016 chain.reverse()
1017 chain.reverse()
1017 return chain, stopped
1018 return chain, stopped
1018
1019
1019 def ancestors(self, revs, stoprev=0, inclusive=False):
1020 def ancestors(self, revs, stoprev=0, inclusive=False):
1020 """Generate the ancestors of 'revs' in reverse revision order.
1021 """Generate the ancestors of 'revs' in reverse revision order.
1021 Does not generate revs lower than stoprev.
1022 Does not generate revs lower than stoprev.
1022
1023
1023 See the documentation for ancestor.lazyancestors for more details."""
1024 See the documentation for ancestor.lazyancestors for more details."""
1024
1025
1025 # first, make sure start revisions aren't filtered
1026 # first, make sure start revisions aren't filtered
1026 revs = list(revs)
1027 revs = list(revs)
1027 checkrev = self.node
1028 checkrev = self.node
1028 for r in revs:
1029 for r in revs:
1029 checkrev(r)
1030 checkrev(r)
1030 # and we're sure ancestors aren't filtered as well
1031 # and we're sure ancestors aren't filtered as well
1031
1032
1032 if rustancestor is not None:
1033 if rustancestor is not None:
1033 lazyancestors = rustancestor.LazyAncestors
1034 lazyancestors = rustancestor.LazyAncestors
1034 arg = self.index
1035 arg = self.index
1035 else:
1036 else:
1036 lazyancestors = ancestor.lazyancestors
1037 lazyancestors = ancestor.lazyancestors
1037 arg = self._uncheckedparentrevs
1038 arg = self._uncheckedparentrevs
1038 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1039 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1039
1040
1040 def descendants(self, revs):
1041 def descendants(self, revs):
1041 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1042 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1042
1043
1043 def findcommonmissing(self, common=None, heads=None):
1044 def findcommonmissing(self, common=None, heads=None):
1044 """Return a tuple of the ancestors of common and the ancestors of heads
1045 """Return a tuple of the ancestors of common and the ancestors of heads
1045 that are not ancestors of common. In revset terminology, we return the
1046 that are not ancestors of common. In revset terminology, we return the
1046 tuple:
1047 tuple:
1047
1048
1048 ::common, (::heads) - (::common)
1049 ::common, (::heads) - (::common)
1049
1050
1050 The list is sorted by revision number, meaning it is
1051 The list is sorted by revision number, meaning it is
1051 topologically sorted.
1052 topologically sorted.
1052
1053
1053 'heads' and 'common' are both lists of node IDs. If heads is
1054 'heads' and 'common' are both lists of node IDs. If heads is
1054 not supplied, uses all of the revlog's heads. If common is not
1055 not supplied, uses all of the revlog's heads. If common is not
1055 supplied, uses nullid."""
1056 supplied, uses nullid."""
1056 if common is None:
1057 if common is None:
1057 common = [self.nullid]
1058 common = [self.nullid]
1058 if heads is None:
1059 if heads is None:
1059 heads = self.heads()
1060 heads = self.heads()
1060
1061
1061 common = [self.rev(n) for n in common]
1062 common = [self.rev(n) for n in common]
1062 heads = [self.rev(n) for n in heads]
1063 heads = [self.rev(n) for n in heads]
1063
1064
1064 # we want the ancestors, but inclusive
1065 # we want the ancestors, but inclusive
1065 class lazyset(object):
1066 class lazyset(object):
1066 def __init__(self, lazyvalues):
1067 def __init__(self, lazyvalues):
1067 self.addedvalues = set()
1068 self.addedvalues = set()
1068 self.lazyvalues = lazyvalues
1069 self.lazyvalues = lazyvalues
1069
1070
1070 def __contains__(self, value):
1071 def __contains__(self, value):
1071 return value in self.addedvalues or value in self.lazyvalues
1072 return value in self.addedvalues or value in self.lazyvalues
1072
1073
1073 def __iter__(self):
1074 def __iter__(self):
1074 added = self.addedvalues
1075 added = self.addedvalues
1075 for r in added:
1076 for r in added:
1076 yield r
1077 yield r
1077 for r in self.lazyvalues:
1078 for r in self.lazyvalues:
1078 if not r in added:
1079 if not r in added:
1079 yield r
1080 yield r
1080
1081
1081 def add(self, value):
1082 def add(self, value):
1082 self.addedvalues.add(value)
1083 self.addedvalues.add(value)
1083
1084
1084 def update(self, values):
1085 def update(self, values):
1085 self.addedvalues.update(values)
1086 self.addedvalues.update(values)
1086
1087
1087 has = lazyset(self.ancestors(common))
1088 has = lazyset(self.ancestors(common))
1088 has.add(nullrev)
1089 has.add(nullrev)
1089 has.update(common)
1090 has.update(common)
1090
1091
1091 # take all ancestors from heads that aren't in has
1092 # take all ancestors from heads that aren't in has
1092 missing = set()
1093 missing = set()
1093 visit = collections.deque(r for r in heads if r not in has)
1094 visit = collections.deque(r for r in heads if r not in has)
1094 while visit:
1095 while visit:
1095 r = visit.popleft()
1096 r = visit.popleft()
1096 if r in missing:
1097 if r in missing:
1097 continue
1098 continue
1098 else:
1099 else:
1099 missing.add(r)
1100 missing.add(r)
1100 for p in self.parentrevs(r):
1101 for p in self.parentrevs(r):
1101 if p not in has:
1102 if p not in has:
1102 visit.append(p)
1103 visit.append(p)
1103 missing = list(missing)
1104 missing = list(missing)
1104 missing.sort()
1105 missing.sort()
1105 return has, [self.node(miss) for miss in missing]
1106 return has, [self.node(miss) for miss in missing]
1106
1107
1107 def incrementalmissingrevs(self, common=None):
1108 def incrementalmissingrevs(self, common=None):
1108 """Return an object that can be used to incrementally compute the
1109 """Return an object that can be used to incrementally compute the
1109 revision numbers of the ancestors of arbitrary sets that are not
1110 revision numbers of the ancestors of arbitrary sets that are not
1110 ancestors of common. This is an ancestor.incrementalmissingancestors
1111 ancestors of common. This is an ancestor.incrementalmissingancestors
1111 object.
1112 object.
1112
1113
1113 'common' is a list of revision numbers. If common is not supplied, uses
1114 'common' is a list of revision numbers. If common is not supplied, uses
1114 nullrev.
1115 nullrev.
1115 """
1116 """
1116 if common is None:
1117 if common is None:
1117 common = [nullrev]
1118 common = [nullrev]
1118
1119
1119 if rustancestor is not None:
1120 if rustancestor is not None:
1120 return rustancestor.MissingAncestors(self.index, common)
1121 return rustancestor.MissingAncestors(self.index, common)
1121 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1122 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1122
1123
1123 def findmissingrevs(self, common=None, heads=None):
1124 def findmissingrevs(self, common=None, heads=None):
1124 """Return the revision numbers of the ancestors of heads that
1125 """Return the revision numbers of the ancestors of heads that
1125 are not ancestors of common.
1126 are not ancestors of common.
1126
1127
1127 More specifically, return a list of revision numbers corresponding to
1128 More specifically, return a list of revision numbers corresponding to
1128 nodes N such that every N satisfies the following constraints:
1129 nodes N such that every N satisfies the following constraints:
1129
1130
1130 1. N is an ancestor of some node in 'heads'
1131 1. N is an ancestor of some node in 'heads'
1131 2. N is not an ancestor of any node in 'common'
1132 2. N is not an ancestor of any node in 'common'
1132
1133
1133 The list is sorted by revision number, meaning it is
1134 The list is sorted by revision number, meaning it is
1134 topologically sorted.
1135 topologically sorted.
1135
1136
1136 'heads' and 'common' are both lists of revision numbers. If heads is
1137 'heads' and 'common' are both lists of revision numbers. If heads is
1137 not supplied, uses all of the revlog's heads. If common is not
1138 not supplied, uses all of the revlog's heads. If common is not
1138 supplied, uses nullid."""
1139 supplied, uses nullid."""
1139 if common is None:
1140 if common is None:
1140 common = [nullrev]
1141 common = [nullrev]
1141 if heads is None:
1142 if heads is None:
1142 heads = self.headrevs()
1143 heads = self.headrevs()
1143
1144
1144 inc = self.incrementalmissingrevs(common=common)
1145 inc = self.incrementalmissingrevs(common=common)
1145 return inc.missingancestors(heads)
1146 return inc.missingancestors(heads)
1146
1147
1147 def findmissing(self, common=None, heads=None):
1148 def findmissing(self, common=None, heads=None):
1148 """Return the ancestors of heads that are not ancestors of common.
1149 """Return the ancestors of heads that are not ancestors of common.
1149
1150
1150 More specifically, return a list of nodes N such that every N
1151 More specifically, return a list of nodes N such that every N
1151 satisfies the following constraints:
1152 satisfies the following constraints:
1152
1153
1153 1. N is an ancestor of some node in 'heads'
1154 1. N is an ancestor of some node in 'heads'
1154 2. N is not an ancestor of any node in 'common'
1155 2. N is not an ancestor of any node in 'common'
1155
1156
1156 The list is sorted by revision number, meaning it is
1157 The list is sorted by revision number, meaning it is
1157 topologically sorted.
1158 topologically sorted.
1158
1159
1159 'heads' and 'common' are both lists of node IDs. If heads is
1160 'heads' and 'common' are both lists of node IDs. If heads is
1160 not supplied, uses all of the revlog's heads. If common is not
1161 not supplied, uses all of the revlog's heads. If common is not
1161 supplied, uses nullid."""
1162 supplied, uses nullid."""
1162 if common is None:
1163 if common is None:
1163 common = [self.nullid]
1164 common = [self.nullid]
1164 if heads is None:
1165 if heads is None:
1165 heads = self.heads()
1166 heads = self.heads()
1166
1167
1167 common = [self.rev(n) for n in common]
1168 common = [self.rev(n) for n in common]
1168 heads = [self.rev(n) for n in heads]
1169 heads = [self.rev(n) for n in heads]
1169
1170
1170 inc = self.incrementalmissingrevs(common=common)
1171 inc = self.incrementalmissingrevs(common=common)
1171 return [self.node(r) for r in inc.missingancestors(heads)]
1172 return [self.node(r) for r in inc.missingancestors(heads)]
1172
1173
1173 def nodesbetween(self, roots=None, heads=None):
1174 def nodesbetween(self, roots=None, heads=None):
1174 """Return a topological path from 'roots' to 'heads'.
1175 """Return a topological path from 'roots' to 'heads'.
1175
1176
1176 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1177 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1177 topologically sorted list of all nodes N that satisfy both of
1178 topologically sorted list of all nodes N that satisfy both of
1178 these constraints:
1179 these constraints:
1179
1180
1180 1. N is a descendant of some node in 'roots'
1181 1. N is a descendant of some node in 'roots'
1181 2. N is an ancestor of some node in 'heads'
1182 2. N is an ancestor of some node in 'heads'
1182
1183
1183 Every node is considered to be both a descendant and an ancestor
1184 Every node is considered to be both a descendant and an ancestor
1184 of itself, so every reachable node in 'roots' and 'heads' will be
1185 of itself, so every reachable node in 'roots' and 'heads' will be
1185 included in 'nodes'.
1186 included in 'nodes'.
1186
1187
1187 'outroots' is the list of reachable nodes in 'roots', i.e., the
1188 'outroots' is the list of reachable nodes in 'roots', i.e., the
1188 subset of 'roots' that is returned in 'nodes'. Likewise,
1189 subset of 'roots' that is returned in 'nodes'. Likewise,
1189 'outheads' is the subset of 'heads' that is also in 'nodes'.
1190 'outheads' is the subset of 'heads' that is also in 'nodes'.
1190
1191
1191 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1192 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1192 unspecified, uses nullid as the only root. If 'heads' is
1193 unspecified, uses nullid as the only root. If 'heads' is
1193 unspecified, uses list of all of the revlog's heads."""
1194 unspecified, uses list of all of the revlog's heads."""
1194 nonodes = ([], [], [])
1195 nonodes = ([], [], [])
1195 if roots is not None:
1196 if roots is not None:
1196 roots = list(roots)
1197 roots = list(roots)
1197 if not roots:
1198 if not roots:
1198 return nonodes
1199 return nonodes
1199 lowestrev = min([self.rev(n) for n in roots])
1200 lowestrev = min([self.rev(n) for n in roots])
1200 else:
1201 else:
1201 roots = [self.nullid] # Everybody's a descendant of nullid
1202 roots = [self.nullid] # Everybody's a descendant of nullid
1202 lowestrev = nullrev
1203 lowestrev = nullrev
1203 if (lowestrev == nullrev) and (heads is None):
1204 if (lowestrev == nullrev) and (heads is None):
1204 # We want _all_ the nodes!
1205 # We want _all_ the nodes!
1205 return (
1206 return (
1206 [self.node(r) for r in self],
1207 [self.node(r) for r in self],
1207 [self.nullid],
1208 [self.nullid],
1208 list(self.heads()),
1209 list(self.heads()),
1209 )
1210 )
1210 if heads is None:
1211 if heads is None:
1211 # All nodes are ancestors, so the latest ancestor is the last
1212 # All nodes are ancestors, so the latest ancestor is the last
1212 # node.
1213 # node.
1213 highestrev = len(self) - 1
1214 highestrev = len(self) - 1
1214 # Set ancestors to None to signal that every node is an ancestor.
1215 # Set ancestors to None to signal that every node is an ancestor.
1215 ancestors = None
1216 ancestors = None
1216 # Set heads to an empty dictionary for later discovery of heads
1217 # Set heads to an empty dictionary for later discovery of heads
1217 heads = {}
1218 heads = {}
1218 else:
1219 else:
1219 heads = list(heads)
1220 heads = list(heads)
1220 if not heads:
1221 if not heads:
1221 return nonodes
1222 return nonodes
1222 ancestors = set()
1223 ancestors = set()
1223 # Turn heads into a dictionary so we can remove 'fake' heads.
1224 # Turn heads into a dictionary so we can remove 'fake' heads.
1224 # Also, later we will be using it to filter out the heads we can't
1225 # Also, later we will be using it to filter out the heads we can't
1225 # find from roots.
1226 # find from roots.
1226 heads = dict.fromkeys(heads, False)
1227 heads = dict.fromkeys(heads, False)
1227 # Start at the top and keep marking parents until we're done.
1228 # Start at the top and keep marking parents until we're done.
1228 nodestotag = set(heads)
1229 nodestotag = set(heads)
1229 # Remember where the top was so we can use it as a limit later.
1230 # Remember where the top was so we can use it as a limit later.
1230 highestrev = max([self.rev(n) for n in nodestotag])
1231 highestrev = max([self.rev(n) for n in nodestotag])
1231 while nodestotag:
1232 while nodestotag:
1232 # grab a node to tag
1233 # grab a node to tag
1233 n = nodestotag.pop()
1234 n = nodestotag.pop()
1234 # Never tag nullid
1235 # Never tag nullid
1235 if n == self.nullid:
1236 if n == self.nullid:
1236 continue
1237 continue
1237 # A node's revision number represents its place in a
1238 # A node's revision number represents its place in a
1238 # topologically sorted list of nodes.
1239 # topologically sorted list of nodes.
1239 r = self.rev(n)
1240 r = self.rev(n)
1240 if r >= lowestrev:
1241 if r >= lowestrev:
1241 if n not in ancestors:
1242 if n not in ancestors:
1242 # If we are possibly a descendant of one of the roots
1243 # If we are possibly a descendant of one of the roots
1243 # and we haven't already been marked as an ancestor
1244 # and we haven't already been marked as an ancestor
1244 ancestors.add(n) # Mark as ancestor
1245 ancestors.add(n) # Mark as ancestor
1245 # Add non-nullid parents to list of nodes to tag.
1246 # Add non-nullid parents to list of nodes to tag.
1246 nodestotag.update(
1247 nodestotag.update(
1247 [p for p in self.parents(n) if p != self.nullid]
1248 [p for p in self.parents(n) if p != self.nullid]
1248 )
1249 )
1249 elif n in heads: # We've seen it before, is it a fake head?
1250 elif n in heads: # We've seen it before, is it a fake head?
1250 # So it is, real heads should not be the ancestors of
1251 # So it is, real heads should not be the ancestors of
1251 # any other heads.
1252 # any other heads.
1252 heads.pop(n)
1253 heads.pop(n)
1253 if not ancestors:
1254 if not ancestors:
1254 return nonodes
1255 return nonodes
1255 # Now that we have our set of ancestors, we want to remove any
1256 # Now that we have our set of ancestors, we want to remove any
1256 # roots that are not ancestors.
1257 # roots that are not ancestors.
1257
1258
1258 # If one of the roots was nullid, everything is included anyway.
1259 # If one of the roots was nullid, everything is included anyway.
1259 if lowestrev > nullrev:
1260 if lowestrev > nullrev:
1260 # But, since we weren't, let's recompute the lowest rev to not
1261 # But, since we weren't, let's recompute the lowest rev to not
1261 # include roots that aren't ancestors.
1262 # include roots that aren't ancestors.
1262
1263
1263 # Filter out roots that aren't ancestors of heads
1264 # Filter out roots that aren't ancestors of heads
1264 roots = [root for root in roots if root in ancestors]
1265 roots = [root for root in roots if root in ancestors]
1265 # Recompute the lowest revision
1266 # Recompute the lowest revision
1266 if roots:
1267 if roots:
1267 lowestrev = min([self.rev(root) for root in roots])
1268 lowestrev = min([self.rev(root) for root in roots])
1268 else:
1269 else:
1269 # No more roots? Return empty list
1270 # No more roots? Return empty list
1270 return nonodes
1271 return nonodes
1271 else:
1272 else:
1272 # We are descending from nullid, and don't need to care about
1273 # We are descending from nullid, and don't need to care about
1273 # any other roots.
1274 # any other roots.
1274 lowestrev = nullrev
1275 lowestrev = nullrev
1275 roots = [self.nullid]
1276 roots = [self.nullid]
1276 # Transform our roots list into a set.
1277 # Transform our roots list into a set.
1277 descendants = set(roots)
1278 descendants = set(roots)
1278 # Also, keep the original roots so we can filter out roots that aren't
1279 # Also, keep the original roots so we can filter out roots that aren't
1279 # 'real' roots (i.e. are descended from other roots).
1280 # 'real' roots (i.e. are descended from other roots).
1280 roots = descendants.copy()
1281 roots = descendants.copy()
1281 # Our topologically sorted list of output nodes.
1282 # Our topologically sorted list of output nodes.
1282 orderedout = []
1283 orderedout = []
1283 # Don't start at nullid since we don't want nullid in our output list,
1284 # Don't start at nullid since we don't want nullid in our output list,
1284 # and if nullid shows up in descendants, empty parents will look like
1285 # and if nullid shows up in descendants, empty parents will look like
1285 # they're descendants.
1286 # they're descendants.
1286 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1287 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1287 n = self.node(r)
1288 n = self.node(r)
1288 isdescendant = False
1289 isdescendant = False
1289 if lowestrev == nullrev: # Everybody is a descendant of nullid
1290 if lowestrev == nullrev: # Everybody is a descendant of nullid
1290 isdescendant = True
1291 isdescendant = True
1291 elif n in descendants:
1292 elif n in descendants:
1292 # n is already a descendant
1293 # n is already a descendant
1293 isdescendant = True
1294 isdescendant = True
1294 # This check only needs to be done here because all the roots
1295 # This check only needs to be done here because all the roots
1295 # will start being marked is descendants before the loop.
1296 # will start being marked is descendants before the loop.
1296 if n in roots:
1297 if n in roots:
1297 # If n was a root, check if it's a 'real' root.
1298 # If n was a root, check if it's a 'real' root.
1298 p = tuple(self.parents(n))
1299 p = tuple(self.parents(n))
1299 # If any of its parents are descendants, it's not a root.
1300 # If any of its parents are descendants, it's not a root.
1300 if (p[0] in descendants) or (p[1] in descendants):
1301 if (p[0] in descendants) or (p[1] in descendants):
1301 roots.remove(n)
1302 roots.remove(n)
1302 else:
1303 else:
1303 p = tuple(self.parents(n))
1304 p = tuple(self.parents(n))
1304 # A node is a descendant if either of its parents are
1305 # A node is a descendant if either of its parents are
1305 # descendants. (We seeded the dependents list with the roots
1306 # descendants. (We seeded the dependents list with the roots
1306 # up there, remember?)
1307 # up there, remember?)
1307 if (p[0] in descendants) or (p[1] in descendants):
1308 if (p[0] in descendants) or (p[1] in descendants):
1308 descendants.add(n)
1309 descendants.add(n)
1309 isdescendant = True
1310 isdescendant = True
1310 if isdescendant and ((ancestors is None) or (n in ancestors)):
1311 if isdescendant and ((ancestors is None) or (n in ancestors)):
1311 # Only include nodes that are both descendants and ancestors.
1312 # Only include nodes that are both descendants and ancestors.
1312 orderedout.append(n)
1313 orderedout.append(n)
1313 if (ancestors is not None) and (n in heads):
1314 if (ancestors is not None) and (n in heads):
1314 # We're trying to figure out which heads are reachable
1315 # We're trying to figure out which heads are reachable
1315 # from roots.
1316 # from roots.
1316 # Mark this head as having been reached
1317 # Mark this head as having been reached
1317 heads[n] = True
1318 heads[n] = True
1318 elif ancestors is None:
1319 elif ancestors is None:
1319 # Otherwise, we're trying to discover the heads.
1320 # Otherwise, we're trying to discover the heads.
1320 # Assume this is a head because if it isn't, the next step
1321 # Assume this is a head because if it isn't, the next step
1321 # will eventually remove it.
1322 # will eventually remove it.
1322 heads[n] = True
1323 heads[n] = True
1323 # But, obviously its parents aren't.
1324 # But, obviously its parents aren't.
1324 for p in self.parents(n):
1325 for p in self.parents(n):
1325 heads.pop(p, None)
1326 heads.pop(p, None)
1326 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1327 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1327 roots = list(roots)
1328 roots = list(roots)
1328 assert orderedout
1329 assert orderedout
1329 assert roots
1330 assert roots
1330 assert heads
1331 assert heads
1331 return (orderedout, roots, heads)
1332 return (orderedout, roots, heads)
1332
1333
1333 def headrevs(self, revs=None):
1334 def headrevs(self, revs=None):
1334 if revs is None:
1335 if revs is None:
1335 try:
1336 try:
1336 return self.index.headrevs()
1337 return self.index.headrevs()
1337 except AttributeError:
1338 except AttributeError:
1338 return self._headrevs()
1339 return self._headrevs()
1339 if rustdagop is not None:
1340 if rustdagop is not None:
1340 return rustdagop.headrevs(self.index, revs)
1341 return rustdagop.headrevs(self.index, revs)
1341 return dagop.headrevs(revs, self._uncheckedparentrevs)
1342 return dagop.headrevs(revs, self._uncheckedparentrevs)
1342
1343
1343 def computephases(self, roots):
1344 def computephases(self, roots):
1344 return self.index.computephasesmapsets(roots)
1345 return self.index.computephasesmapsets(roots)
1345
1346
1346 def _headrevs(self):
1347 def _headrevs(self):
1347 count = len(self)
1348 count = len(self)
1348 if not count:
1349 if not count:
1349 return [nullrev]
1350 return [nullrev]
1350 # we won't iter over filtered rev so nobody is a head at start
1351 # we won't iter over filtered rev so nobody is a head at start
1351 ishead = [0] * (count + 1)
1352 ishead = [0] * (count + 1)
1352 index = self.index
1353 index = self.index
1353 for r in self:
1354 for r in self:
1354 ishead[r] = 1 # I may be an head
1355 ishead[r] = 1 # I may be an head
1355 e = index[r]
1356 e = index[r]
1356 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1357 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1357 return [r for r, val in enumerate(ishead) if val]
1358 return [r for r, val in enumerate(ishead) if val]
1358
1359
1359 def heads(self, start=None, stop=None):
1360 def heads(self, start=None, stop=None):
1360 """return the list of all nodes that have no children
1361 """return the list of all nodes that have no children
1361
1362
1362 if start is specified, only heads that are descendants of
1363 if start is specified, only heads that are descendants of
1363 start will be returned
1364 start will be returned
1364 if stop is specified, it will consider all the revs from stop
1365 if stop is specified, it will consider all the revs from stop
1365 as if they had no children
1366 as if they had no children
1366 """
1367 """
1367 if start is None and stop is None:
1368 if start is None and stop is None:
1368 if not len(self):
1369 if not len(self):
1369 return [self.nullid]
1370 return [self.nullid]
1370 return [self.node(r) for r in self.headrevs()]
1371 return [self.node(r) for r in self.headrevs()]
1371
1372
1372 if start is None:
1373 if start is None:
1373 start = nullrev
1374 start = nullrev
1374 else:
1375 else:
1375 start = self.rev(start)
1376 start = self.rev(start)
1376
1377
1377 stoprevs = {self.rev(n) for n in stop or []}
1378 stoprevs = {self.rev(n) for n in stop or []}
1378
1379
1379 revs = dagop.headrevssubset(
1380 revs = dagop.headrevssubset(
1380 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1381 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1381 )
1382 )
1382
1383
1383 return [self.node(rev) for rev in revs]
1384 return [self.node(rev) for rev in revs]
1384
1385
1385 def children(self, node):
1386 def children(self, node):
1386 """find the children of a given node"""
1387 """find the children of a given node"""
1387 c = []
1388 c = []
1388 p = self.rev(node)
1389 p = self.rev(node)
1389 for r in self.revs(start=p + 1):
1390 for r in self.revs(start=p + 1):
1390 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1391 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1391 if prevs:
1392 if prevs:
1392 for pr in prevs:
1393 for pr in prevs:
1393 if pr == p:
1394 if pr == p:
1394 c.append(self.node(r))
1395 c.append(self.node(r))
1395 elif p == nullrev:
1396 elif p == nullrev:
1396 c.append(self.node(r))
1397 c.append(self.node(r))
1397 return c
1398 return c
1398
1399
1399 def commonancestorsheads(self, a, b):
1400 def commonancestorsheads(self, a, b):
1400 """calculate all the heads of the common ancestors of nodes a and b"""
1401 """calculate all the heads of the common ancestors of nodes a and b"""
1401 a, b = self.rev(a), self.rev(b)
1402 a, b = self.rev(a), self.rev(b)
1402 ancs = self._commonancestorsheads(a, b)
1403 ancs = self._commonancestorsheads(a, b)
1403 return pycompat.maplist(self.node, ancs)
1404 return pycompat.maplist(self.node, ancs)
1404
1405
1405 def _commonancestorsheads(self, *revs):
1406 def _commonancestorsheads(self, *revs):
1406 """calculate all the heads of the common ancestors of revs"""
1407 """calculate all the heads of the common ancestors of revs"""
1407 try:
1408 try:
1408 ancs = self.index.commonancestorsheads(*revs)
1409 ancs = self.index.commonancestorsheads(*revs)
1409 except (AttributeError, OverflowError): # C implementation failed
1410 except (AttributeError, OverflowError): # C implementation failed
1410 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1411 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1411 return ancs
1412 return ancs
1412
1413
1413 def isancestor(self, a, b):
1414 def isancestor(self, a, b):
1414 """return True if node a is an ancestor of node b
1415 """return True if node a is an ancestor of node b
1415
1416
1416 A revision is considered an ancestor of itself."""
1417 A revision is considered an ancestor of itself."""
1417 a, b = self.rev(a), self.rev(b)
1418 a, b = self.rev(a), self.rev(b)
1418 return self.isancestorrev(a, b)
1419 return self.isancestorrev(a, b)
1419
1420
1420 def isancestorrev(self, a, b):
1421 def isancestorrev(self, a, b):
1421 """return True if revision a is an ancestor of revision b
1422 """return True if revision a is an ancestor of revision b
1422
1423
1423 A revision is considered an ancestor of itself.
1424 A revision is considered an ancestor of itself.
1424
1425
1425 The implementation of this is trivial but the use of
1426 The implementation of this is trivial but the use of
1426 reachableroots is not."""
1427 reachableroots is not."""
1427 if a == nullrev:
1428 if a == nullrev:
1428 return True
1429 return True
1429 elif a == b:
1430 elif a == b:
1430 return True
1431 return True
1431 elif a > b:
1432 elif a > b:
1432 return False
1433 return False
1433 return bool(self.reachableroots(a, [b], [a], includepath=False))
1434 return bool(self.reachableroots(a, [b], [a], includepath=False))
1434
1435
1435 def reachableroots(self, minroot, heads, roots, includepath=False):
1436 def reachableroots(self, minroot, heads, roots, includepath=False):
1436 """return (heads(::(<roots> and <roots>::<heads>)))
1437 """return (heads(::(<roots> and <roots>::<heads>)))
1437
1438
1438 If includepath is True, return (<roots>::<heads>)."""
1439 If includepath is True, return (<roots>::<heads>)."""
1439 try:
1440 try:
1440 return self.index.reachableroots2(
1441 return self.index.reachableroots2(
1441 minroot, heads, roots, includepath
1442 minroot, heads, roots, includepath
1442 )
1443 )
1443 except AttributeError:
1444 except AttributeError:
1444 return dagop._reachablerootspure(
1445 return dagop._reachablerootspure(
1445 self.parentrevs, minroot, roots, heads, includepath
1446 self.parentrevs, minroot, roots, heads, includepath
1446 )
1447 )
1447
1448
1448 def ancestor(self, a, b):
1449 def ancestor(self, a, b):
1449 """calculate the "best" common ancestor of nodes a and b"""
1450 """calculate the "best" common ancestor of nodes a and b"""
1450
1451
1451 a, b = self.rev(a), self.rev(b)
1452 a, b = self.rev(a), self.rev(b)
1452 try:
1453 try:
1453 ancs = self.index.ancestors(a, b)
1454 ancs = self.index.ancestors(a, b)
1454 except (AttributeError, OverflowError):
1455 except (AttributeError, OverflowError):
1455 ancs = ancestor.ancestors(self.parentrevs, a, b)
1456 ancs = ancestor.ancestors(self.parentrevs, a, b)
1456 if ancs:
1457 if ancs:
1457 # choose a consistent winner when there's a tie
1458 # choose a consistent winner when there's a tie
1458 return min(map(self.node, ancs))
1459 return min(map(self.node, ancs))
1459 return self.nullid
1460 return self.nullid
1460
1461
1461 def _match(self, id):
1462 def _match(self, id):
1462 if isinstance(id, int):
1463 if isinstance(id, int):
1463 # rev
1464 # rev
1464 return self.node(id)
1465 return self.node(id)
1465 if len(id) == self.nodeconstants.nodelen:
1466 if len(id) == self.nodeconstants.nodelen:
1466 # possibly a binary node
1467 # possibly a binary node
1467 # odds of a binary node being all hex in ASCII are 1 in 10**25
1468 # odds of a binary node being all hex in ASCII are 1 in 10**25
1468 try:
1469 try:
1469 node = id
1470 node = id
1470 self.rev(node) # quick search the index
1471 self.rev(node) # quick search the index
1471 return node
1472 return node
1472 except error.LookupError:
1473 except error.LookupError:
1473 pass # may be partial hex id
1474 pass # may be partial hex id
1474 try:
1475 try:
1475 # str(rev)
1476 # str(rev)
1476 rev = int(id)
1477 rev = int(id)
1477 if b"%d" % rev != id:
1478 if b"%d" % rev != id:
1478 raise ValueError
1479 raise ValueError
1479 if rev < 0:
1480 if rev < 0:
1480 rev = len(self) + rev
1481 rev = len(self) + rev
1481 if rev < 0 or rev >= len(self):
1482 if rev < 0 or rev >= len(self):
1482 raise ValueError
1483 raise ValueError
1483 return self.node(rev)
1484 return self.node(rev)
1484 except (ValueError, OverflowError):
1485 except (ValueError, OverflowError):
1485 pass
1486 pass
1486 if len(id) == 2 * self.nodeconstants.nodelen:
1487 if len(id) == 2 * self.nodeconstants.nodelen:
1487 try:
1488 try:
1488 # a full hex nodeid?
1489 # a full hex nodeid?
1489 node = bin(id)
1490 node = bin(id)
1490 self.rev(node)
1491 self.rev(node)
1491 return node
1492 return node
1492 except (TypeError, error.LookupError):
1493 except (TypeError, error.LookupError):
1493 pass
1494 pass
1494
1495
1495 def _partialmatch(self, id):
1496 def _partialmatch(self, id):
1496 # we don't care wdirfilenodeids as they should be always full hash
1497 # we don't care wdirfilenodeids as they should be always full hash
1497 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1498 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1498 try:
1499 try:
1499 partial = self.index.partialmatch(id)
1500 partial = self.index.partialmatch(id)
1500 if partial and self.hasnode(partial):
1501 if partial and self.hasnode(partial):
1501 if maybewdir:
1502 if maybewdir:
1502 # single 'ff...' match in radix tree, ambiguous with wdir
1503 # single 'ff...' match in radix tree, ambiguous with wdir
1503 raise error.RevlogError
1504 raise error.RevlogError
1504 return partial
1505 return partial
1505 if maybewdir:
1506 if maybewdir:
1506 # no 'ff...' match in radix tree, wdir identified
1507 # no 'ff...' match in radix tree, wdir identified
1507 raise error.WdirUnsupported
1508 raise error.WdirUnsupported
1508 return None
1509 return None
1509 except error.RevlogError:
1510 except error.RevlogError:
1510 # parsers.c radix tree lookup gave multiple matches
1511 # parsers.c radix tree lookup gave multiple matches
1511 # fast path: for unfiltered changelog, radix tree is accurate
1512 # fast path: for unfiltered changelog, radix tree is accurate
1512 if not getattr(self, 'filteredrevs', None):
1513 if not getattr(self, 'filteredrevs', None):
1513 raise error.AmbiguousPrefixLookupError(
1514 raise error.AmbiguousPrefixLookupError(
1514 id, self.display_id, _(b'ambiguous identifier')
1515 id, self.display_id, _(b'ambiguous identifier')
1515 )
1516 )
1516 # fall through to slow path that filters hidden revisions
1517 # fall through to slow path that filters hidden revisions
1517 except (AttributeError, ValueError):
1518 except (AttributeError, ValueError):
1518 # we are pure python, or key was too short to search radix tree
1519 # we are pure python, or key was too short to search radix tree
1519 pass
1520 pass
1520
1521
1521 if id in self._pcache:
1522 if id in self._pcache:
1522 return self._pcache[id]
1523 return self._pcache[id]
1523
1524
1524 if len(id) <= 40:
1525 if len(id) <= 40:
1525 try:
1526 try:
1526 # hex(node)[:...]
1527 # hex(node)[:...]
1527 l = len(id) // 2 # grab an even number of digits
1528 l = len(id) // 2 # grab an even number of digits
1528 prefix = bin(id[: l * 2])
1529 prefix = bin(id[: l * 2])
1529 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1530 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1530 nl = [
1531 nl = [
1531 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1532 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1532 ]
1533 ]
1533 if self.nodeconstants.nullhex.startswith(id):
1534 if self.nodeconstants.nullhex.startswith(id):
1534 nl.append(self.nullid)
1535 nl.append(self.nullid)
1535 if len(nl) > 0:
1536 if len(nl) > 0:
1536 if len(nl) == 1 and not maybewdir:
1537 if len(nl) == 1 and not maybewdir:
1537 self._pcache[id] = nl[0]
1538 self._pcache[id] = nl[0]
1538 return nl[0]
1539 return nl[0]
1539 raise error.AmbiguousPrefixLookupError(
1540 raise error.AmbiguousPrefixLookupError(
1540 id, self.display_id, _(b'ambiguous identifier')
1541 id, self.display_id, _(b'ambiguous identifier')
1541 )
1542 )
1542 if maybewdir:
1543 if maybewdir:
1543 raise error.WdirUnsupported
1544 raise error.WdirUnsupported
1544 return None
1545 return None
1545 except TypeError:
1546 except TypeError:
1546 pass
1547 pass
1547
1548
1548 def lookup(self, id):
1549 def lookup(self, id):
1549 """locate a node based on:
1550 """locate a node based on:
1550 - revision number or str(revision number)
1551 - revision number or str(revision number)
1551 - nodeid or subset of hex nodeid
1552 - nodeid or subset of hex nodeid
1552 """
1553 """
1553 n = self._match(id)
1554 n = self._match(id)
1554 if n is not None:
1555 if n is not None:
1555 return n
1556 return n
1556 n = self._partialmatch(id)
1557 n = self._partialmatch(id)
1557 if n:
1558 if n:
1558 return n
1559 return n
1559
1560
1560 raise error.LookupError(id, self.display_id, _(b'no match found'))
1561 raise error.LookupError(id, self.display_id, _(b'no match found'))
1561
1562
1562 def shortest(self, node, minlength=1):
1563 def shortest(self, node, minlength=1):
1563 """Find the shortest unambiguous prefix that matches node."""
1564 """Find the shortest unambiguous prefix that matches node."""
1564
1565
1565 def isvalid(prefix):
1566 def isvalid(prefix):
1566 try:
1567 try:
1567 matchednode = self._partialmatch(prefix)
1568 matchednode = self._partialmatch(prefix)
1568 except error.AmbiguousPrefixLookupError:
1569 except error.AmbiguousPrefixLookupError:
1569 return False
1570 return False
1570 except error.WdirUnsupported:
1571 except error.WdirUnsupported:
1571 # single 'ff...' match
1572 # single 'ff...' match
1572 return True
1573 return True
1573 if matchednode is None:
1574 if matchednode is None:
1574 raise error.LookupError(node, self.display_id, _(b'no node'))
1575 raise error.LookupError(node, self.display_id, _(b'no node'))
1575 return True
1576 return True
1576
1577
1577 def maybewdir(prefix):
1578 def maybewdir(prefix):
1578 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1579 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1579
1580
1580 hexnode = hex(node)
1581 hexnode = hex(node)
1581
1582
1582 def disambiguate(hexnode, minlength):
1583 def disambiguate(hexnode, minlength):
1583 """Disambiguate against wdirid."""
1584 """Disambiguate against wdirid."""
1584 for length in range(minlength, len(hexnode) + 1):
1585 for length in range(minlength, len(hexnode) + 1):
1585 prefix = hexnode[:length]
1586 prefix = hexnode[:length]
1586 if not maybewdir(prefix):
1587 if not maybewdir(prefix):
1587 return prefix
1588 return prefix
1588
1589
1589 if not getattr(self, 'filteredrevs', None):
1590 if not getattr(self, 'filteredrevs', None):
1590 try:
1591 try:
1591 length = max(self.index.shortest(node), minlength)
1592 length = max(self.index.shortest(node), minlength)
1592 return disambiguate(hexnode, length)
1593 return disambiguate(hexnode, length)
1593 except error.RevlogError:
1594 except error.RevlogError:
1594 if node != self.nodeconstants.wdirid:
1595 if node != self.nodeconstants.wdirid:
1595 raise error.LookupError(
1596 raise error.LookupError(
1596 node, self.display_id, _(b'no node')
1597 node, self.display_id, _(b'no node')
1597 )
1598 )
1598 except AttributeError:
1599 except AttributeError:
1599 # Fall through to pure code
1600 # Fall through to pure code
1600 pass
1601 pass
1601
1602
1602 if node == self.nodeconstants.wdirid:
1603 if node == self.nodeconstants.wdirid:
1603 for length in range(minlength, len(hexnode) + 1):
1604 for length in range(minlength, len(hexnode) + 1):
1604 prefix = hexnode[:length]
1605 prefix = hexnode[:length]
1605 if isvalid(prefix):
1606 if isvalid(prefix):
1606 return prefix
1607 return prefix
1607
1608
1608 for length in range(minlength, len(hexnode) + 1):
1609 for length in range(minlength, len(hexnode) + 1):
1609 prefix = hexnode[:length]
1610 prefix = hexnode[:length]
1610 if isvalid(prefix):
1611 if isvalid(prefix):
1611 return disambiguate(hexnode, length)
1612 return disambiguate(hexnode, length)
1612
1613
1613 def cmp(self, node, text):
1614 def cmp(self, node, text):
1614 """compare text with a given file revision
1615 """compare text with a given file revision
1615
1616
1616 returns True if text is different than what is stored.
1617 returns True if text is different than what is stored.
1617 """
1618 """
1618 p1, p2 = self.parents(node)
1619 p1, p2 = self.parents(node)
1619 return storageutil.hashrevisionsha1(text, p1, p2) != node
1620 return storageutil.hashrevisionsha1(text, p1, p2) != node
1620
1621
1621 def _cachesegment(self, offset, data):
1622 def _cachesegment(self, offset, data):
1622 """Add a segment to the revlog cache.
1623 """Add a segment to the revlog cache.
1623
1624
1624 Accepts an absolute offset and the data that is at that location.
1625 Accepts an absolute offset and the data that is at that location.
1625 """
1626 """
1626 o, d = self._chunkcache
1627 o, d = self._chunkcache
1627 # try to add to existing cache
1628 # try to add to existing cache
1628 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1629 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1629 self._chunkcache = o, d + data
1630 self._chunkcache = o, d + data
1630 else:
1631 else:
1631 self._chunkcache = offset, data
1632 self._chunkcache = offset, data
1632
1633
1633 def _readsegment(self, offset, length, df=None):
1634 def _readsegment(self, offset, length, df=None):
1634 """Load a segment of raw data from the revlog.
1635 """Load a segment of raw data from the revlog.
1635
1636
1636 Accepts an absolute offset, length to read, and an optional existing
1637 Accepts an absolute offset, length to read, and an optional existing
1637 file handle to read from.
1638 file handle to read from.
1638
1639
1639 If an existing file handle is passed, it will be seeked and the
1640 If an existing file handle is passed, it will be seeked and the
1640 original seek position will NOT be restored.
1641 original seek position will NOT be restored.
1641
1642
1642 Returns a str or buffer of raw byte data.
1643 Returns a str or buffer of raw byte data.
1643
1644
1644 Raises if the requested number of bytes could not be read.
1645 Raises if the requested number of bytes could not be read.
1645 """
1646 """
1646 # Cache data both forward and backward around the requested
1647 # Cache data both forward and backward around the requested
1647 # data, in a fixed size window. This helps speed up operations
1648 # data, in a fixed size window. This helps speed up operations
1648 # involving reading the revlog backwards.
1649 # involving reading the revlog backwards.
1649 cachesize = self._chunkcachesize
1650 cachesize = self._chunkcachesize
1650 realoffset = offset & ~(cachesize - 1)
1651 realoffset = offset & ~(cachesize - 1)
1651 reallength = (
1652 reallength = (
1652 (offset + length + cachesize) & ~(cachesize - 1)
1653 (offset + length + cachesize) & ~(cachesize - 1)
1653 ) - realoffset
1654 ) - realoffset
1654 with self._datareadfp(df) as df:
1655 with self._datareadfp(df) as df:
1655 df.seek(realoffset)
1656 df.seek(realoffset)
1656 d = df.read(reallength)
1657 d = df.read(reallength)
1657
1658
1658 self._cachesegment(realoffset, d)
1659 self._cachesegment(realoffset, d)
1659 if offset != realoffset or reallength != length:
1660 if offset != realoffset or reallength != length:
1660 startoffset = offset - realoffset
1661 startoffset = offset - realoffset
1661 if len(d) - startoffset < length:
1662 if len(d) - startoffset < length:
1662 raise error.RevlogError(
1663 raise error.RevlogError(
1663 _(
1664 _(
1664 b'partial read of revlog %s; expected %d bytes from '
1665 b'partial read of revlog %s; expected %d bytes from '
1665 b'offset %d, got %d'
1666 b'offset %d, got %d'
1666 )
1667 )
1667 % (
1668 % (
1668 self._indexfile if self._inline else self._datafile,
1669 self._indexfile if self._inline else self._datafile,
1669 length,
1670 length,
1670 offset,
1671 offset,
1671 len(d) - startoffset,
1672 len(d) - startoffset,
1672 )
1673 )
1673 )
1674 )
1674
1675
1675 return util.buffer(d, startoffset, length)
1676 return util.buffer(d, startoffset, length)
1676
1677
1677 if len(d) < length:
1678 if len(d) < length:
1678 raise error.RevlogError(
1679 raise error.RevlogError(
1679 _(
1680 _(
1680 b'partial read of revlog %s; expected %d bytes from offset '
1681 b'partial read of revlog %s; expected %d bytes from offset '
1681 b'%d, got %d'
1682 b'%d, got %d'
1682 )
1683 )
1683 % (
1684 % (
1684 self._indexfile if self._inline else self._datafile,
1685 self._indexfile if self._inline else self._datafile,
1685 length,
1686 length,
1686 offset,
1687 offset,
1687 len(d),
1688 len(d),
1688 )
1689 )
1689 )
1690 )
1690
1691
1691 return d
1692 return d
1692
1693
1693 def _getsegment(self, offset, length, df=None):
1694 def _getsegment(self, offset, length, df=None):
1694 """Obtain a segment of raw data from the revlog.
1695 """Obtain a segment of raw data from the revlog.
1695
1696
1696 Accepts an absolute offset, length of bytes to obtain, and an
1697 Accepts an absolute offset, length of bytes to obtain, and an
1697 optional file handle to the already-opened revlog. If the file
1698 optional file handle to the already-opened revlog. If the file
1698 handle is used, it's original seek position will not be preserved.
1699 handle is used, it's original seek position will not be preserved.
1699
1700
1700 Requests for data may be returned from a cache.
1701 Requests for data may be returned from a cache.
1701
1702
1702 Returns a str or a buffer instance of raw byte data.
1703 Returns a str or a buffer instance of raw byte data.
1703 """
1704 """
1704 o, d = self._chunkcache
1705 o, d = self._chunkcache
1705 l = len(d)
1706 l = len(d)
1706
1707
1707 # is it in the cache?
1708 # is it in the cache?
1708 cachestart = offset - o
1709 cachestart = offset - o
1709 cacheend = cachestart + length
1710 cacheend = cachestart + length
1710 if cachestart >= 0 and cacheend <= l:
1711 if cachestart >= 0 and cacheend <= l:
1711 if cachestart == 0 and cacheend == l:
1712 if cachestart == 0 and cacheend == l:
1712 return d # avoid a copy
1713 return d # avoid a copy
1713 return util.buffer(d, cachestart, cacheend - cachestart)
1714 return util.buffer(d, cachestart, cacheend - cachestart)
1714
1715
1715 return self._readsegment(offset, length, df=df)
1716 return self._readsegment(offset, length, df=df)
1716
1717
1717 def _getsegmentforrevs(self, startrev, endrev, df=None):
1718 def _getsegmentforrevs(self, startrev, endrev, df=None):
1718 """Obtain a segment of raw data corresponding to a range of revisions.
1719 """Obtain a segment of raw data corresponding to a range of revisions.
1719
1720
1720 Accepts the start and end revisions and an optional already-open
1721 Accepts the start and end revisions and an optional already-open
1721 file handle to be used for reading. If the file handle is read, its
1722 file handle to be used for reading. If the file handle is read, its
1722 seek position will not be preserved.
1723 seek position will not be preserved.
1723
1724
1724 Requests for data may be satisfied by a cache.
1725 Requests for data may be satisfied by a cache.
1725
1726
1726 Returns a 2-tuple of (offset, data) for the requested range of
1727 Returns a 2-tuple of (offset, data) for the requested range of
1727 revisions. Offset is the integer offset from the beginning of the
1728 revisions. Offset is the integer offset from the beginning of the
1728 revlog and data is a str or buffer of the raw byte data.
1729 revlog and data is a str or buffer of the raw byte data.
1729
1730
1730 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1731 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1731 to determine where each revision's data begins and ends.
1732 to determine where each revision's data begins and ends.
1732 """
1733 """
1733 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1734 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1734 # (functions are expensive).
1735 # (functions are expensive).
1735 index = self.index
1736 index = self.index
1736 istart = index[startrev]
1737 istart = index[startrev]
1737 start = int(istart[0] >> 16)
1738 start = int(istart[0] >> 16)
1738 if startrev == endrev:
1739 if startrev == endrev:
1739 end = start + istart[1]
1740 end = start + istart[1]
1740 else:
1741 else:
1741 iend = index[endrev]
1742 iend = index[endrev]
1742 end = int(iend[0] >> 16) + iend[1]
1743 end = int(iend[0] >> 16) + iend[1]
1743
1744
1744 if self._inline:
1745 if self._inline:
1745 start += (startrev + 1) * self.index.entry_size
1746 start += (startrev + 1) * self.index.entry_size
1746 end += (endrev + 1) * self.index.entry_size
1747 end += (endrev + 1) * self.index.entry_size
1747 length = end - start
1748 length = end - start
1748
1749
1749 return start, self._getsegment(start, length, df=df)
1750 return start, self._getsegment(start, length, df=df)
1750
1751
1751 def _chunk(self, rev, df=None):
1752 def _chunk(self, rev, df=None):
1752 """Obtain a single decompressed chunk for a revision.
1753 """Obtain a single decompressed chunk for a revision.
1753
1754
1754 Accepts an integer revision and an optional already-open file handle
1755 Accepts an integer revision and an optional already-open file handle
1755 to be used for reading. If used, the seek position of the file will not
1756 to be used for reading. If used, the seek position of the file will not
1756 be preserved.
1757 be preserved.
1757
1758
1758 Returns a str holding uncompressed data for the requested revision.
1759 Returns a str holding uncompressed data for the requested revision.
1759 """
1760 """
1760 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1761 compression_mode = self.index[rev][10]
1762 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1763 if compression_mode == COMP_MODE_PLAIN:
1764 return data
1765 elif compression_mode == COMP_MODE_INLINE:
1766 return self.decompress(data)
1767 else:
1768 msg = 'unknown compression mode %d'
1769 msg %= compression_mode
1770 raise error.RevlogError(msg)
1761
1771
1762 def _chunks(self, revs, df=None, targetsize=None):
1772 def _chunks(self, revs, df=None, targetsize=None):
1763 """Obtain decompressed chunks for the specified revisions.
1773 """Obtain decompressed chunks for the specified revisions.
1764
1774
1765 Accepts an iterable of numeric revisions that are assumed to be in
1775 Accepts an iterable of numeric revisions that are assumed to be in
1766 ascending order. Also accepts an optional already-open file handle
1776 ascending order. Also accepts an optional already-open file handle
1767 to be used for reading. If used, the seek position of the file will
1777 to be used for reading. If used, the seek position of the file will
1768 not be preserved.
1778 not be preserved.
1769
1779
1770 This function is similar to calling ``self._chunk()`` multiple times,
1780 This function is similar to calling ``self._chunk()`` multiple times,
1771 but is faster.
1781 but is faster.
1772
1782
1773 Returns a list with decompressed data for each requested revision.
1783 Returns a list with decompressed data for each requested revision.
1774 """
1784 """
1775 if not revs:
1785 if not revs:
1776 return []
1786 return []
1777 start = self.start
1787 start = self.start
1778 length = self.length
1788 length = self.length
1779 inline = self._inline
1789 inline = self._inline
1780 iosize = self.index.entry_size
1790 iosize = self.index.entry_size
1781 buffer = util.buffer
1791 buffer = util.buffer
1782
1792
1783 l = []
1793 l = []
1784 ladd = l.append
1794 ladd = l.append
1785
1795
1786 if not self._withsparseread:
1796 if not self._withsparseread:
1787 slicedchunks = (revs,)
1797 slicedchunks = (revs,)
1788 else:
1798 else:
1789 slicedchunks = deltautil.slicechunk(
1799 slicedchunks = deltautil.slicechunk(
1790 self, revs, targetsize=targetsize
1800 self, revs, targetsize=targetsize
1791 )
1801 )
1792
1802
1793 for revschunk in slicedchunks:
1803 for revschunk in slicedchunks:
1794 firstrev = revschunk[0]
1804 firstrev = revschunk[0]
1795 # Skip trailing revisions with empty diff
1805 # Skip trailing revisions with empty diff
1796 for lastrev in revschunk[::-1]:
1806 for lastrev in revschunk[::-1]:
1797 if length(lastrev) != 0:
1807 if length(lastrev) != 0:
1798 break
1808 break
1799
1809
1800 try:
1810 try:
1801 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1811 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1802 except OverflowError:
1812 except OverflowError:
1803 # issue4215 - we can't cache a run of chunks greater than
1813 # issue4215 - we can't cache a run of chunks greater than
1804 # 2G on Windows
1814 # 2G on Windows
1805 return [self._chunk(rev, df=df) for rev in revschunk]
1815 return [self._chunk(rev, df=df) for rev in revschunk]
1806
1816
1807 decomp = self.decompress
1817 decomp = self.decompress
1808 for rev in revschunk:
1818 for rev in revschunk:
1809 chunkstart = start(rev)
1819 chunkstart = start(rev)
1810 if inline:
1820 if inline:
1811 chunkstart += (rev + 1) * iosize
1821 chunkstart += (rev + 1) * iosize
1812 chunklength = length(rev)
1822 chunklength = length(rev)
1823 comp_mode = self.index[rev][10]
1813 c = buffer(data, chunkstart - offset, chunklength)
1824 c = buffer(data, chunkstart - offset, chunklength)
1814 ladd(decomp(c))
1825 if comp_mode == COMP_MODE_PLAIN:
1826 ladd(c)
1827 elif comp_mode == COMP_MODE_INLINE:
1828 ladd(decomp(c))
1829 else:
1830 msg = 'unknown compression mode %d'
1831 msg %= comp_mode
1832 raise error.RevlogError(msg)
1815
1833
1816 return l
1834 return l
1817
1835
1818 def _chunkclear(self):
1836 def _chunkclear(self):
1819 """Clear the raw chunk cache."""
1837 """Clear the raw chunk cache."""
1820 self._chunkcache = (0, b'')
1838 self._chunkcache = (0, b'')
1821
1839
1822 def deltaparent(self, rev):
1840 def deltaparent(self, rev):
1823 """return deltaparent of the given revision"""
1841 """return deltaparent of the given revision"""
1824 base = self.index[rev][3]
1842 base = self.index[rev][3]
1825 if base == rev:
1843 if base == rev:
1826 return nullrev
1844 return nullrev
1827 elif self._generaldelta:
1845 elif self._generaldelta:
1828 return base
1846 return base
1829 else:
1847 else:
1830 return rev - 1
1848 return rev - 1
1831
1849
1832 def issnapshot(self, rev):
1850 def issnapshot(self, rev):
1833 """tells whether rev is a snapshot"""
1851 """tells whether rev is a snapshot"""
1834 if not self._sparserevlog:
1852 if not self._sparserevlog:
1835 return self.deltaparent(rev) == nullrev
1853 return self.deltaparent(rev) == nullrev
1836 elif util.safehasattr(self.index, b'issnapshot'):
1854 elif util.safehasattr(self.index, b'issnapshot'):
1837 # directly assign the method to cache the testing and access
1855 # directly assign the method to cache the testing and access
1838 self.issnapshot = self.index.issnapshot
1856 self.issnapshot = self.index.issnapshot
1839 return self.issnapshot(rev)
1857 return self.issnapshot(rev)
1840 if rev == nullrev:
1858 if rev == nullrev:
1841 return True
1859 return True
1842 entry = self.index[rev]
1860 entry = self.index[rev]
1843 base = entry[3]
1861 base = entry[3]
1844 if base == rev:
1862 if base == rev:
1845 return True
1863 return True
1846 if base == nullrev:
1864 if base == nullrev:
1847 return True
1865 return True
1848 p1 = entry[5]
1866 p1 = entry[5]
1849 p2 = entry[6]
1867 p2 = entry[6]
1850 if base == p1 or base == p2:
1868 if base == p1 or base == p2:
1851 return False
1869 return False
1852 return self.issnapshot(base)
1870 return self.issnapshot(base)
1853
1871
1854 def snapshotdepth(self, rev):
1872 def snapshotdepth(self, rev):
1855 """number of snapshot in the chain before this one"""
1873 """number of snapshot in the chain before this one"""
1856 if not self.issnapshot(rev):
1874 if not self.issnapshot(rev):
1857 raise error.ProgrammingError(b'revision %d not a snapshot')
1875 raise error.ProgrammingError(b'revision %d not a snapshot')
1858 return len(self._deltachain(rev)[0]) - 1
1876 return len(self._deltachain(rev)[0]) - 1
1859
1877
1860 def revdiff(self, rev1, rev2):
1878 def revdiff(self, rev1, rev2):
1861 """return or calculate a delta between two revisions
1879 """return or calculate a delta between two revisions
1862
1880
1863 The delta calculated is in binary form and is intended to be written to
1881 The delta calculated is in binary form and is intended to be written to
1864 revlog data directly. So this function needs raw revision data.
1882 revlog data directly. So this function needs raw revision data.
1865 """
1883 """
1866 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1884 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1867 return bytes(self._chunk(rev2))
1885 return bytes(self._chunk(rev2))
1868
1886
1869 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1887 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1870
1888
1871 def _processflags(self, text, flags, operation, raw=False):
1889 def _processflags(self, text, flags, operation, raw=False):
1872 """deprecated entry point to access flag processors"""
1890 """deprecated entry point to access flag processors"""
1873 msg = b'_processflag(...) use the specialized variant'
1891 msg = b'_processflag(...) use the specialized variant'
1874 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1892 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1875 if raw:
1893 if raw:
1876 return text, flagutil.processflagsraw(self, text, flags)
1894 return text, flagutil.processflagsraw(self, text, flags)
1877 elif operation == b'read':
1895 elif operation == b'read':
1878 return flagutil.processflagsread(self, text, flags)
1896 return flagutil.processflagsread(self, text, flags)
1879 else: # write operation
1897 else: # write operation
1880 return flagutil.processflagswrite(self, text, flags)
1898 return flagutil.processflagswrite(self, text, flags)
1881
1899
1882 def revision(self, nodeorrev, _df=None, raw=False):
1900 def revision(self, nodeorrev, _df=None, raw=False):
1883 """return an uncompressed revision of a given node or revision
1901 """return an uncompressed revision of a given node or revision
1884 number.
1902 number.
1885
1903
1886 _df - an existing file handle to read from. (internal-only)
1904 _df - an existing file handle to read from. (internal-only)
1887 raw - an optional argument specifying if the revision data is to be
1905 raw - an optional argument specifying if the revision data is to be
1888 treated as raw data when applying flag transforms. 'raw' should be set
1906 treated as raw data when applying flag transforms. 'raw' should be set
1889 to True when generating changegroups or in debug commands.
1907 to True when generating changegroups or in debug commands.
1890 """
1908 """
1891 if raw:
1909 if raw:
1892 msg = (
1910 msg = (
1893 b'revlog.revision(..., raw=True) is deprecated, '
1911 b'revlog.revision(..., raw=True) is deprecated, '
1894 b'use revlog.rawdata(...)'
1912 b'use revlog.rawdata(...)'
1895 )
1913 )
1896 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1914 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1897 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1915 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1898
1916
1899 def sidedata(self, nodeorrev, _df=None):
1917 def sidedata(self, nodeorrev, _df=None):
1900 """a map of extra data related to the changeset but not part of the hash
1918 """a map of extra data related to the changeset but not part of the hash
1901
1919
1902 This function currently return a dictionary. However, more advanced
1920 This function currently return a dictionary. However, more advanced
1903 mapping object will likely be used in the future for a more
1921 mapping object will likely be used in the future for a more
1904 efficient/lazy code.
1922 efficient/lazy code.
1905 """
1923 """
1906 return self._revisiondata(nodeorrev, _df)[1]
1924 return self._revisiondata(nodeorrev, _df)[1]
1907
1925
1908 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1926 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1909 # deal with <nodeorrev> argument type
1927 # deal with <nodeorrev> argument type
1910 if isinstance(nodeorrev, int):
1928 if isinstance(nodeorrev, int):
1911 rev = nodeorrev
1929 rev = nodeorrev
1912 node = self.node(rev)
1930 node = self.node(rev)
1913 else:
1931 else:
1914 node = nodeorrev
1932 node = nodeorrev
1915 rev = None
1933 rev = None
1916
1934
1917 # fast path the special `nullid` rev
1935 # fast path the special `nullid` rev
1918 if node == self.nullid:
1936 if node == self.nullid:
1919 return b"", {}
1937 return b"", {}
1920
1938
1921 # ``rawtext`` is the text as stored inside the revlog. Might be the
1939 # ``rawtext`` is the text as stored inside the revlog. Might be the
1922 # revision or might need to be processed to retrieve the revision.
1940 # revision or might need to be processed to retrieve the revision.
1923 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1941 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1924
1942
1925 if self.hassidedata:
1943 if self.hassidedata:
1926 if rev is None:
1944 if rev is None:
1927 rev = self.rev(node)
1945 rev = self.rev(node)
1928 sidedata = self._sidedata(rev)
1946 sidedata = self._sidedata(rev)
1929 else:
1947 else:
1930 sidedata = {}
1948 sidedata = {}
1931
1949
1932 if raw and validated:
1950 if raw and validated:
1933 # if we don't want to process the raw text and that raw
1951 # if we don't want to process the raw text and that raw
1934 # text is cached, we can exit early.
1952 # text is cached, we can exit early.
1935 return rawtext, sidedata
1953 return rawtext, sidedata
1936 if rev is None:
1954 if rev is None:
1937 rev = self.rev(node)
1955 rev = self.rev(node)
1938 # the revlog's flag for this revision
1956 # the revlog's flag for this revision
1939 # (usually alter its state or content)
1957 # (usually alter its state or content)
1940 flags = self.flags(rev)
1958 flags = self.flags(rev)
1941
1959
1942 if validated and flags == REVIDX_DEFAULT_FLAGS:
1960 if validated and flags == REVIDX_DEFAULT_FLAGS:
1943 # no extra flags set, no flag processor runs, text = rawtext
1961 # no extra flags set, no flag processor runs, text = rawtext
1944 return rawtext, sidedata
1962 return rawtext, sidedata
1945
1963
1946 if raw:
1964 if raw:
1947 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1965 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1948 text = rawtext
1966 text = rawtext
1949 else:
1967 else:
1950 r = flagutil.processflagsread(self, rawtext, flags)
1968 r = flagutil.processflagsread(self, rawtext, flags)
1951 text, validatehash = r
1969 text, validatehash = r
1952 if validatehash:
1970 if validatehash:
1953 self.checkhash(text, node, rev=rev)
1971 self.checkhash(text, node, rev=rev)
1954 if not validated:
1972 if not validated:
1955 self._revisioncache = (node, rev, rawtext)
1973 self._revisioncache = (node, rev, rawtext)
1956
1974
1957 return text, sidedata
1975 return text, sidedata
1958
1976
1959 def _rawtext(self, node, rev, _df=None):
1977 def _rawtext(self, node, rev, _df=None):
1960 """return the possibly unvalidated rawtext for a revision
1978 """return the possibly unvalidated rawtext for a revision
1961
1979
1962 returns (rev, rawtext, validated)
1980 returns (rev, rawtext, validated)
1963 """
1981 """
1964
1982
1965 # revision in the cache (could be useful to apply delta)
1983 # revision in the cache (could be useful to apply delta)
1966 cachedrev = None
1984 cachedrev = None
1967 # An intermediate text to apply deltas to
1985 # An intermediate text to apply deltas to
1968 basetext = None
1986 basetext = None
1969
1987
1970 # Check if we have the entry in cache
1988 # Check if we have the entry in cache
1971 # The cache entry looks like (node, rev, rawtext)
1989 # The cache entry looks like (node, rev, rawtext)
1972 if self._revisioncache:
1990 if self._revisioncache:
1973 if self._revisioncache[0] == node:
1991 if self._revisioncache[0] == node:
1974 return (rev, self._revisioncache[2], True)
1992 return (rev, self._revisioncache[2], True)
1975 cachedrev = self._revisioncache[1]
1993 cachedrev = self._revisioncache[1]
1976
1994
1977 if rev is None:
1995 if rev is None:
1978 rev = self.rev(node)
1996 rev = self.rev(node)
1979
1997
1980 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1998 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1981 if stopped:
1999 if stopped:
1982 basetext = self._revisioncache[2]
2000 basetext = self._revisioncache[2]
1983
2001
1984 # drop cache to save memory, the caller is expected to
2002 # drop cache to save memory, the caller is expected to
1985 # update self._revisioncache after validating the text
2003 # update self._revisioncache after validating the text
1986 self._revisioncache = None
2004 self._revisioncache = None
1987
2005
1988 targetsize = None
2006 targetsize = None
1989 rawsize = self.index[rev][2]
2007 rawsize = self.index[rev][2]
1990 if 0 <= rawsize:
2008 if 0 <= rawsize:
1991 targetsize = 4 * rawsize
2009 targetsize = 4 * rawsize
1992
2010
1993 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2011 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1994 if basetext is None:
2012 if basetext is None:
1995 basetext = bytes(bins[0])
2013 basetext = bytes(bins[0])
1996 bins = bins[1:]
2014 bins = bins[1:]
1997
2015
1998 rawtext = mdiff.patches(basetext, bins)
2016 rawtext = mdiff.patches(basetext, bins)
1999 del basetext # let us have a chance to free memory early
2017 del basetext # let us have a chance to free memory early
2000 return (rev, rawtext, False)
2018 return (rev, rawtext, False)
2001
2019
2002 def _sidedata(self, rev):
2020 def _sidedata(self, rev):
2003 """Return the sidedata for a given revision number."""
2021 """Return the sidedata for a given revision number."""
2004 index_entry = self.index[rev]
2022 index_entry = self.index[rev]
2005 sidedata_offset = index_entry[8]
2023 sidedata_offset = index_entry[8]
2006 sidedata_size = index_entry[9]
2024 sidedata_size = index_entry[9]
2007
2025
2008 if self._inline:
2026 if self._inline:
2009 sidedata_offset += self.index.entry_size * (1 + rev)
2027 sidedata_offset += self.index.entry_size * (1 + rev)
2010 if sidedata_size == 0:
2028 if sidedata_size == 0:
2011 return {}
2029 return {}
2012
2030
2013 segment = self._getsegment(sidedata_offset, sidedata_size)
2031 segment = self._getsegment(sidedata_offset, sidedata_size)
2014 sidedata = sidedatautil.deserialize_sidedata(segment)
2032 sidedata = sidedatautil.deserialize_sidedata(segment)
2015 return sidedata
2033 return sidedata
2016
2034
2017 def rawdata(self, nodeorrev, _df=None):
2035 def rawdata(self, nodeorrev, _df=None):
2018 """return an uncompressed raw data of a given node or revision number.
2036 """return an uncompressed raw data of a given node or revision number.
2019
2037
2020 _df - an existing file handle to read from. (internal-only)
2038 _df - an existing file handle to read from. (internal-only)
2021 """
2039 """
2022 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2040 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2023
2041
2024 def hash(self, text, p1, p2):
2042 def hash(self, text, p1, p2):
2025 """Compute a node hash.
2043 """Compute a node hash.
2026
2044
2027 Available as a function so that subclasses can replace the hash
2045 Available as a function so that subclasses can replace the hash
2028 as needed.
2046 as needed.
2029 """
2047 """
2030 return storageutil.hashrevisionsha1(text, p1, p2)
2048 return storageutil.hashrevisionsha1(text, p1, p2)
2031
2049
2032 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2050 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2033 """Check node hash integrity.
2051 """Check node hash integrity.
2034
2052
2035 Available as a function so that subclasses can extend hash mismatch
2053 Available as a function so that subclasses can extend hash mismatch
2036 behaviors as needed.
2054 behaviors as needed.
2037 """
2055 """
2038 try:
2056 try:
2039 if p1 is None and p2 is None:
2057 if p1 is None and p2 is None:
2040 p1, p2 = self.parents(node)
2058 p1, p2 = self.parents(node)
2041 if node != self.hash(text, p1, p2):
2059 if node != self.hash(text, p1, p2):
2042 # Clear the revision cache on hash failure. The revision cache
2060 # Clear the revision cache on hash failure. The revision cache
2043 # only stores the raw revision and clearing the cache does have
2061 # only stores the raw revision and clearing the cache does have
2044 # the side-effect that we won't have a cache hit when the raw
2062 # the side-effect that we won't have a cache hit when the raw
2045 # revision data is accessed. But this case should be rare and
2063 # revision data is accessed. But this case should be rare and
2046 # it is extra work to teach the cache about the hash
2064 # it is extra work to teach the cache about the hash
2047 # verification state.
2065 # verification state.
2048 if self._revisioncache and self._revisioncache[0] == node:
2066 if self._revisioncache and self._revisioncache[0] == node:
2049 self._revisioncache = None
2067 self._revisioncache = None
2050
2068
2051 revornode = rev
2069 revornode = rev
2052 if revornode is None:
2070 if revornode is None:
2053 revornode = templatefilters.short(hex(node))
2071 revornode = templatefilters.short(hex(node))
2054 raise error.RevlogError(
2072 raise error.RevlogError(
2055 _(b"integrity check failed on %s:%s")
2073 _(b"integrity check failed on %s:%s")
2056 % (self.display_id, pycompat.bytestr(revornode))
2074 % (self.display_id, pycompat.bytestr(revornode))
2057 )
2075 )
2058 except error.RevlogError:
2076 except error.RevlogError:
2059 if self._censorable and storageutil.iscensoredtext(text):
2077 if self._censorable and storageutil.iscensoredtext(text):
2060 raise error.CensoredNodeError(self.display_id, node, text)
2078 raise error.CensoredNodeError(self.display_id, node, text)
2061 raise
2079 raise
2062
2080
2063 def _enforceinlinesize(self, tr):
2081 def _enforceinlinesize(self, tr):
2064 """Check if the revlog is too big for inline and convert if so.
2082 """Check if the revlog is too big for inline and convert if so.
2065
2083
2066 This should be called after revisions are added to the revlog. If the
2084 This should be called after revisions are added to the revlog. If the
2067 revlog has grown too large to be an inline revlog, it will convert it
2085 revlog has grown too large to be an inline revlog, it will convert it
2068 to use multiple index and data files.
2086 to use multiple index and data files.
2069 """
2087 """
2070 tiprev = len(self) - 1
2088 tiprev = len(self) - 1
2071 total_size = self.start(tiprev) + self.length(tiprev)
2089 total_size = self.start(tiprev) + self.length(tiprev)
2072 if not self._inline or total_size < _maxinline:
2090 if not self._inline or total_size < _maxinline:
2073 return
2091 return
2074
2092
2075 troffset = tr.findoffset(self._indexfile)
2093 troffset = tr.findoffset(self._indexfile)
2076 if troffset is None:
2094 if troffset is None:
2077 raise error.RevlogError(
2095 raise error.RevlogError(
2078 _(b"%s not found in the transaction") % self._indexfile
2096 _(b"%s not found in the transaction") % self._indexfile
2079 )
2097 )
2080 trindex = 0
2098 trindex = 0
2081 tr.add(self._datafile, 0)
2099 tr.add(self._datafile, 0)
2082
2100
2083 existing_handles = False
2101 existing_handles = False
2084 if self._writinghandles is not None:
2102 if self._writinghandles is not None:
2085 existing_handles = True
2103 existing_handles = True
2086 fp = self._writinghandles[0]
2104 fp = self._writinghandles[0]
2087 fp.flush()
2105 fp.flush()
2088 fp.close()
2106 fp.close()
2089 # We can't use the cached file handle after close(). So prevent
2107 # We can't use the cached file handle after close(). So prevent
2090 # its usage.
2108 # its usage.
2091 self._writinghandles = None
2109 self._writinghandles = None
2092
2110
2093 new_dfh = self._datafp(b'w+')
2111 new_dfh = self._datafp(b'w+')
2094 new_dfh.truncate(0) # drop any potentially existing data
2112 new_dfh.truncate(0) # drop any potentially existing data
2095 try:
2113 try:
2096 with self._indexfp() as read_ifh:
2114 with self._indexfp() as read_ifh:
2097 for r in self:
2115 for r in self:
2098 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2116 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2099 if troffset <= self.start(r):
2117 if troffset <= self.start(r):
2100 trindex = r
2118 trindex = r
2101 new_dfh.flush()
2119 new_dfh.flush()
2102
2120
2103 with self.__index_new_fp() as fp:
2121 with self.__index_new_fp() as fp:
2104 self._format_flags &= ~FLAG_INLINE_DATA
2122 self._format_flags &= ~FLAG_INLINE_DATA
2105 self._inline = False
2123 self._inline = False
2106 for i in self:
2124 for i in self:
2107 e = self.index.entry_binary(i)
2125 e = self.index.entry_binary(i)
2108 if i == 0 and self._docket is None:
2126 if i == 0 and self._docket is None:
2109 header = self._format_flags | self._format_version
2127 header = self._format_flags | self._format_version
2110 header = self.index.pack_header(header)
2128 header = self.index.pack_header(header)
2111 e = header + e
2129 e = header + e
2112 fp.write(e)
2130 fp.write(e)
2113 if self._docket is not None:
2131 if self._docket is not None:
2114 self._docket.index_end = fp.tell()
2132 self._docket.index_end = fp.tell()
2115 # the temp file replace the real index when we exit the context
2133 # the temp file replace the real index when we exit the context
2116 # manager
2134 # manager
2117
2135
2118 tr.replace(self._indexfile, trindex * self.index.entry_size)
2136 tr.replace(self._indexfile, trindex * self.index.entry_size)
2119 nodemaputil.setup_persistent_nodemap(tr, self)
2137 nodemaputil.setup_persistent_nodemap(tr, self)
2120 self._chunkclear()
2138 self._chunkclear()
2121
2139
2122 if existing_handles:
2140 if existing_handles:
2123 # switched from inline to conventional reopen the index
2141 # switched from inline to conventional reopen the index
2124 ifh = self.__index_write_fp()
2142 ifh = self.__index_write_fp()
2125 self._writinghandles = (ifh, new_dfh)
2143 self._writinghandles = (ifh, new_dfh)
2126 new_dfh = None
2144 new_dfh = None
2127 finally:
2145 finally:
2128 if new_dfh is not None:
2146 if new_dfh is not None:
2129 new_dfh.close()
2147 new_dfh.close()
2130
2148
2131 def _nodeduplicatecallback(self, transaction, node):
2149 def _nodeduplicatecallback(self, transaction, node):
2132 """called when trying to add a node already stored."""
2150 """called when trying to add a node already stored."""
2133
2151
2134 @contextlib.contextmanager
2152 @contextlib.contextmanager
2135 def _writing(self, transaction):
2153 def _writing(self, transaction):
2136 if self._trypending:
2154 if self._trypending:
2137 msg = b'try to write in a `trypending` revlog: %s'
2155 msg = b'try to write in a `trypending` revlog: %s'
2138 msg %= self.display_id
2156 msg %= self.display_id
2139 raise error.ProgrammingError(msg)
2157 raise error.ProgrammingError(msg)
2140 if self._writinghandles is not None:
2158 if self._writinghandles is not None:
2141 yield
2159 yield
2142 else:
2160 else:
2143 r = len(self)
2161 r = len(self)
2144 dsize = 0
2162 dsize = 0
2145 if r:
2163 if r:
2146 dsize = self.end(r - 1)
2164 dsize = self.end(r - 1)
2147 dfh = None
2165 dfh = None
2148 if not self._inline:
2166 if not self._inline:
2149 try:
2167 try:
2150 dfh = self._datafp(b"r+")
2168 dfh = self._datafp(b"r+")
2151 if self._docket is None:
2169 if self._docket is None:
2152 dfh.seek(0, os.SEEK_END)
2170 dfh.seek(0, os.SEEK_END)
2153 else:
2171 else:
2154 dfh.seek(self._docket.data_end, os.SEEK_SET)
2172 dfh.seek(self._docket.data_end, os.SEEK_SET)
2155 except IOError as inst:
2173 except IOError as inst:
2156 if inst.errno != errno.ENOENT:
2174 if inst.errno != errno.ENOENT:
2157 raise
2175 raise
2158 dfh = self._datafp(b"w+")
2176 dfh = self._datafp(b"w+")
2159 transaction.add(self._datafile, dsize)
2177 transaction.add(self._datafile, dsize)
2160 try:
2178 try:
2161 isize = r * self.index.entry_size
2179 isize = r * self.index.entry_size
2162 ifh = self.__index_write_fp()
2180 ifh = self.__index_write_fp()
2163 if self._inline:
2181 if self._inline:
2164 transaction.add(self._indexfile, dsize + isize)
2182 transaction.add(self._indexfile, dsize + isize)
2165 else:
2183 else:
2166 transaction.add(self._indexfile, isize)
2184 transaction.add(self._indexfile, isize)
2167 try:
2185 try:
2168 self._writinghandles = (ifh, dfh)
2186 self._writinghandles = (ifh, dfh)
2169 try:
2187 try:
2170 yield
2188 yield
2171 if self._docket is not None:
2189 if self._docket is not None:
2172 self._write_docket(transaction)
2190 self._write_docket(transaction)
2173 finally:
2191 finally:
2174 self._writinghandles = None
2192 self._writinghandles = None
2175 finally:
2193 finally:
2176 ifh.close()
2194 ifh.close()
2177 finally:
2195 finally:
2178 if dfh is not None:
2196 if dfh is not None:
2179 dfh.close()
2197 dfh.close()
2180
2198
2181 def _write_docket(self, transaction):
2199 def _write_docket(self, transaction):
2182 """write the current docket on disk
2200 """write the current docket on disk
2183
2201
2184 Exist as a method to help changelog to implement transaction logic
2202 Exist as a method to help changelog to implement transaction logic
2185
2203
2186 We could also imagine using the same transaction logic for all revlog
2204 We could also imagine using the same transaction logic for all revlog
2187 since docket are cheap."""
2205 since docket are cheap."""
2188 self._docket.write(transaction)
2206 self._docket.write(transaction)
2189
2207
2190 def addrevision(
2208 def addrevision(
2191 self,
2209 self,
2192 text,
2210 text,
2193 transaction,
2211 transaction,
2194 link,
2212 link,
2195 p1,
2213 p1,
2196 p2,
2214 p2,
2197 cachedelta=None,
2215 cachedelta=None,
2198 node=None,
2216 node=None,
2199 flags=REVIDX_DEFAULT_FLAGS,
2217 flags=REVIDX_DEFAULT_FLAGS,
2200 deltacomputer=None,
2218 deltacomputer=None,
2201 sidedata=None,
2219 sidedata=None,
2202 ):
2220 ):
2203 """add a revision to the log
2221 """add a revision to the log
2204
2222
2205 text - the revision data to add
2223 text - the revision data to add
2206 transaction - the transaction object used for rollback
2224 transaction - the transaction object used for rollback
2207 link - the linkrev data to add
2225 link - the linkrev data to add
2208 p1, p2 - the parent nodeids of the revision
2226 p1, p2 - the parent nodeids of the revision
2209 cachedelta - an optional precomputed delta
2227 cachedelta - an optional precomputed delta
2210 node - nodeid of revision; typically node is not specified, and it is
2228 node - nodeid of revision; typically node is not specified, and it is
2211 computed by default as hash(text, p1, p2), however subclasses might
2229 computed by default as hash(text, p1, p2), however subclasses might
2212 use different hashing method (and override checkhash() in such case)
2230 use different hashing method (and override checkhash() in such case)
2213 flags - the known flags to set on the revision
2231 flags - the known flags to set on the revision
2214 deltacomputer - an optional deltacomputer instance shared between
2232 deltacomputer - an optional deltacomputer instance shared between
2215 multiple calls
2233 multiple calls
2216 """
2234 """
2217 if link == nullrev:
2235 if link == nullrev:
2218 raise error.RevlogError(
2236 raise error.RevlogError(
2219 _(b"attempted to add linkrev -1 to %s") % self.display_id
2237 _(b"attempted to add linkrev -1 to %s") % self.display_id
2220 )
2238 )
2221
2239
2222 if sidedata is None:
2240 if sidedata is None:
2223 sidedata = {}
2241 sidedata = {}
2224 elif sidedata and not self.hassidedata:
2242 elif sidedata and not self.hassidedata:
2225 raise error.ProgrammingError(
2243 raise error.ProgrammingError(
2226 _(b"trying to add sidedata to a revlog who don't support them")
2244 _(b"trying to add sidedata to a revlog who don't support them")
2227 )
2245 )
2228
2246
2229 if flags:
2247 if flags:
2230 node = node or self.hash(text, p1, p2)
2248 node = node or self.hash(text, p1, p2)
2231
2249
2232 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2250 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2233
2251
2234 # If the flag processor modifies the revision data, ignore any provided
2252 # If the flag processor modifies the revision data, ignore any provided
2235 # cachedelta.
2253 # cachedelta.
2236 if rawtext != text:
2254 if rawtext != text:
2237 cachedelta = None
2255 cachedelta = None
2238
2256
2239 if len(rawtext) > _maxentrysize:
2257 if len(rawtext) > _maxentrysize:
2240 raise error.RevlogError(
2258 raise error.RevlogError(
2241 _(
2259 _(
2242 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2260 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2243 )
2261 )
2244 % (self.display_id, len(rawtext))
2262 % (self.display_id, len(rawtext))
2245 )
2263 )
2246
2264
2247 node = node or self.hash(rawtext, p1, p2)
2265 node = node or self.hash(rawtext, p1, p2)
2248 rev = self.index.get_rev(node)
2266 rev = self.index.get_rev(node)
2249 if rev is not None:
2267 if rev is not None:
2250 return rev
2268 return rev
2251
2269
2252 if validatehash:
2270 if validatehash:
2253 self.checkhash(rawtext, node, p1=p1, p2=p2)
2271 self.checkhash(rawtext, node, p1=p1, p2=p2)
2254
2272
2255 return self.addrawrevision(
2273 return self.addrawrevision(
2256 rawtext,
2274 rawtext,
2257 transaction,
2275 transaction,
2258 link,
2276 link,
2259 p1,
2277 p1,
2260 p2,
2278 p2,
2261 node,
2279 node,
2262 flags,
2280 flags,
2263 cachedelta=cachedelta,
2281 cachedelta=cachedelta,
2264 deltacomputer=deltacomputer,
2282 deltacomputer=deltacomputer,
2265 sidedata=sidedata,
2283 sidedata=sidedata,
2266 )
2284 )
2267
2285
2268 def addrawrevision(
2286 def addrawrevision(
2269 self,
2287 self,
2270 rawtext,
2288 rawtext,
2271 transaction,
2289 transaction,
2272 link,
2290 link,
2273 p1,
2291 p1,
2274 p2,
2292 p2,
2275 node,
2293 node,
2276 flags,
2294 flags,
2277 cachedelta=None,
2295 cachedelta=None,
2278 deltacomputer=None,
2296 deltacomputer=None,
2279 sidedata=None,
2297 sidedata=None,
2280 ):
2298 ):
2281 """add a raw revision with known flags, node and parents
2299 """add a raw revision with known flags, node and parents
2282 useful when reusing a revision not stored in this revlog (ex: received
2300 useful when reusing a revision not stored in this revlog (ex: received
2283 over wire, or read from an external bundle).
2301 over wire, or read from an external bundle).
2284 """
2302 """
2285 with self._writing(transaction):
2303 with self._writing(transaction):
2286 return self._addrevision(
2304 return self._addrevision(
2287 node,
2305 node,
2288 rawtext,
2306 rawtext,
2289 transaction,
2307 transaction,
2290 link,
2308 link,
2291 p1,
2309 p1,
2292 p2,
2310 p2,
2293 flags,
2311 flags,
2294 cachedelta,
2312 cachedelta,
2295 deltacomputer=deltacomputer,
2313 deltacomputer=deltacomputer,
2296 sidedata=sidedata,
2314 sidedata=sidedata,
2297 )
2315 )
2298
2316
2299 def compress(self, data):
2317 def compress(self, data):
2300 """Generate a possibly-compressed representation of data."""
2318 """Generate a possibly-compressed representation of data."""
2301 if not data:
2319 if not data:
2302 return b'', data
2320 return b'', data
2303
2321
2304 compressed = self._compressor.compress(data)
2322 compressed = self._compressor.compress(data)
2305
2323
2306 if compressed:
2324 if compressed:
2307 # The revlog compressor added the header in the returned data.
2325 # The revlog compressor added the header in the returned data.
2308 return b'', compressed
2326 return b'', compressed
2309
2327
2310 if data[0:1] == b'\0':
2328 if data[0:1] == b'\0':
2311 return b'', data
2329 return b'', data
2312 return b'u', data
2330 return b'u', data
2313
2331
2314 def decompress(self, data):
2332 def decompress(self, data):
2315 """Decompress a revlog chunk.
2333 """Decompress a revlog chunk.
2316
2334
2317 The chunk is expected to begin with a header identifying the
2335 The chunk is expected to begin with a header identifying the
2318 format type so it can be routed to an appropriate decompressor.
2336 format type so it can be routed to an appropriate decompressor.
2319 """
2337 """
2320 if not data:
2338 if not data:
2321 return data
2339 return data
2322
2340
2323 # Revlogs are read much more frequently than they are written and many
2341 # Revlogs are read much more frequently than they are written and many
2324 # chunks only take microseconds to decompress, so performance is
2342 # chunks only take microseconds to decompress, so performance is
2325 # important here.
2343 # important here.
2326 #
2344 #
2327 # We can make a few assumptions about revlogs:
2345 # We can make a few assumptions about revlogs:
2328 #
2346 #
2329 # 1) the majority of chunks will be compressed (as opposed to inline
2347 # 1) the majority of chunks will be compressed (as opposed to inline
2330 # raw data).
2348 # raw data).
2331 # 2) decompressing *any* data will likely by at least 10x slower than
2349 # 2) decompressing *any* data will likely by at least 10x slower than
2332 # returning raw inline data.
2350 # returning raw inline data.
2333 # 3) we want to prioritize common and officially supported compression
2351 # 3) we want to prioritize common and officially supported compression
2334 # engines
2352 # engines
2335 #
2353 #
2336 # It follows that we want to optimize for "decompress compressed data
2354 # It follows that we want to optimize for "decompress compressed data
2337 # when encoded with common and officially supported compression engines"
2355 # when encoded with common and officially supported compression engines"
2338 # case over "raw data" and "data encoded by less common or non-official
2356 # case over "raw data" and "data encoded by less common or non-official
2339 # compression engines." That is why we have the inline lookup first
2357 # compression engines." That is why we have the inline lookup first
2340 # followed by the compengines lookup.
2358 # followed by the compengines lookup.
2341 #
2359 #
2342 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2360 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2343 # compressed chunks. And this matters for changelog and manifest reads.
2361 # compressed chunks. And this matters for changelog and manifest reads.
2344 t = data[0:1]
2362 t = data[0:1]
2345
2363
2346 if t == b'x':
2364 if t == b'x':
2347 try:
2365 try:
2348 return _zlibdecompress(data)
2366 return _zlibdecompress(data)
2349 except zlib.error as e:
2367 except zlib.error as e:
2350 raise error.RevlogError(
2368 raise error.RevlogError(
2351 _(b'revlog decompress error: %s')
2369 _(b'revlog decompress error: %s')
2352 % stringutil.forcebytestr(e)
2370 % stringutil.forcebytestr(e)
2353 )
2371 )
2354 # '\0' is more common than 'u' so it goes first.
2372 # '\0' is more common than 'u' so it goes first.
2355 elif t == b'\0':
2373 elif t == b'\0':
2356 return data
2374 return data
2357 elif t == b'u':
2375 elif t == b'u':
2358 return util.buffer(data, 1)
2376 return util.buffer(data, 1)
2359
2377
2360 try:
2378 try:
2361 compressor = self._decompressors[t]
2379 compressor = self._decompressors[t]
2362 except KeyError:
2380 except KeyError:
2363 try:
2381 try:
2364 engine = util.compengines.forrevlogheader(t)
2382 engine = util.compengines.forrevlogheader(t)
2365 compressor = engine.revlogcompressor(self._compengineopts)
2383 compressor = engine.revlogcompressor(self._compengineopts)
2366 self._decompressors[t] = compressor
2384 self._decompressors[t] = compressor
2367 except KeyError:
2385 except KeyError:
2368 raise error.RevlogError(
2386 raise error.RevlogError(
2369 _(b'unknown compression type %s') % binascii.hexlify(t)
2387 _(b'unknown compression type %s') % binascii.hexlify(t)
2370 )
2388 )
2371
2389
2372 return compressor.decompress(data)
2390 return compressor.decompress(data)
2373
2391
2374 def _addrevision(
2392 def _addrevision(
2375 self,
2393 self,
2376 node,
2394 node,
2377 rawtext,
2395 rawtext,
2378 transaction,
2396 transaction,
2379 link,
2397 link,
2380 p1,
2398 p1,
2381 p2,
2399 p2,
2382 flags,
2400 flags,
2383 cachedelta,
2401 cachedelta,
2384 alwayscache=False,
2402 alwayscache=False,
2385 deltacomputer=None,
2403 deltacomputer=None,
2386 sidedata=None,
2404 sidedata=None,
2387 ):
2405 ):
2388 """internal function to add revisions to the log
2406 """internal function to add revisions to the log
2389
2407
2390 see addrevision for argument descriptions.
2408 see addrevision for argument descriptions.
2391
2409
2392 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2410 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2393
2411
2394 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2412 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2395 be used.
2413 be used.
2396
2414
2397 invariants:
2415 invariants:
2398 - rawtext is optional (can be None); if not set, cachedelta must be set.
2416 - rawtext is optional (can be None); if not set, cachedelta must be set.
2399 if both are set, they must correspond to each other.
2417 if both are set, they must correspond to each other.
2400 """
2418 """
2401 if node == self.nullid:
2419 if node == self.nullid:
2402 raise error.RevlogError(
2420 raise error.RevlogError(
2403 _(b"%s: attempt to add null revision") % self.display_id
2421 _(b"%s: attempt to add null revision") % self.display_id
2404 )
2422 )
2405 if (
2423 if (
2406 node == self.nodeconstants.wdirid
2424 node == self.nodeconstants.wdirid
2407 or node in self.nodeconstants.wdirfilenodeids
2425 or node in self.nodeconstants.wdirfilenodeids
2408 ):
2426 ):
2409 raise error.RevlogError(
2427 raise error.RevlogError(
2410 _(b"%s: attempt to add wdir revision") % self.display_id
2428 _(b"%s: attempt to add wdir revision") % self.display_id
2411 )
2429 )
2412 if self._writinghandles is None:
2430 if self._writinghandles is None:
2413 msg = b'adding revision outside `revlog._writing` context'
2431 msg = b'adding revision outside `revlog._writing` context'
2414 raise error.ProgrammingError(msg)
2432 raise error.ProgrammingError(msg)
2415
2433
2416 if self._inline:
2434 if self._inline:
2417 fh = self._writinghandles[0]
2435 fh = self._writinghandles[0]
2418 else:
2436 else:
2419 fh = self._writinghandles[1]
2437 fh = self._writinghandles[1]
2420
2438
2421 btext = [rawtext]
2439 btext = [rawtext]
2422
2440
2423 curr = len(self)
2441 curr = len(self)
2424 prev = curr - 1
2442 prev = curr - 1
2425
2443
2426 offset = self._get_data_offset(prev)
2444 offset = self._get_data_offset(prev)
2427
2445
2428 if self._concurrencychecker:
2446 if self._concurrencychecker:
2429 ifh, dfh = self._writinghandles
2447 ifh, dfh = self._writinghandles
2430 if self._inline:
2448 if self._inline:
2431 # offset is "as if" it were in the .d file, so we need to add on
2449 # offset is "as if" it were in the .d file, so we need to add on
2432 # the size of the entry metadata.
2450 # the size of the entry metadata.
2433 self._concurrencychecker(
2451 self._concurrencychecker(
2434 ifh, self._indexfile, offset + curr * self.index.entry_size
2452 ifh, self._indexfile, offset + curr * self.index.entry_size
2435 )
2453 )
2436 else:
2454 else:
2437 # Entries in the .i are a consistent size.
2455 # Entries in the .i are a consistent size.
2438 self._concurrencychecker(
2456 self._concurrencychecker(
2439 ifh, self._indexfile, curr * self.index.entry_size
2457 ifh, self._indexfile, curr * self.index.entry_size
2440 )
2458 )
2441 self._concurrencychecker(dfh, self._datafile, offset)
2459 self._concurrencychecker(dfh, self._datafile, offset)
2442
2460
2443 p1r, p2r = self.rev(p1), self.rev(p2)
2461 p1r, p2r = self.rev(p1), self.rev(p2)
2444
2462
2445 # full versions are inserted when the needed deltas
2463 # full versions are inserted when the needed deltas
2446 # become comparable to the uncompressed text
2464 # become comparable to the uncompressed text
2447 if rawtext is None:
2465 if rawtext is None:
2448 # need rawtext size, before changed by flag processors, which is
2466 # need rawtext size, before changed by flag processors, which is
2449 # the non-raw size. use revlog explicitly to avoid filelog's extra
2467 # the non-raw size. use revlog explicitly to avoid filelog's extra
2450 # logic that might remove metadata size.
2468 # logic that might remove metadata size.
2451 textlen = mdiff.patchedsize(
2469 textlen = mdiff.patchedsize(
2452 revlog.size(self, cachedelta[0]), cachedelta[1]
2470 revlog.size(self, cachedelta[0]), cachedelta[1]
2453 )
2471 )
2454 else:
2472 else:
2455 textlen = len(rawtext)
2473 textlen = len(rawtext)
2456
2474
2457 if deltacomputer is None:
2475 if deltacomputer is None:
2458 deltacomputer = deltautil.deltacomputer(self)
2476 deltacomputer = deltautil.deltacomputer(self)
2459
2477
2460 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2478 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2461
2479
2462 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2480 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2463
2481
2482 compression_mode = COMP_MODE_INLINE
2483 if self._docket is not None:
2484 h, d = deltainfo.data
2485 if not h and not d:
2486 # not data to store at all... declare them uncompressed
2487 compression_mode = COMP_MODE_PLAIN
2488 elif not h and d[0:1] == b'\0':
2489 compression_mode = COMP_MODE_PLAIN
2490 elif h == b'u':
2491 # we have a more efficient way to declare uncompressed
2492 h = b''
2493 compression_mode = COMP_MODE_PLAIN
2494 deltainfo = deltautil.drop_u_compression(deltainfo)
2495
2464 if sidedata and self.hassidedata:
2496 if sidedata and self.hassidedata:
2465 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2497 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2466 sidedata_offset = offset + deltainfo.deltalen
2498 sidedata_offset = offset + deltainfo.deltalen
2467 else:
2499 else:
2468 serialized_sidedata = b""
2500 serialized_sidedata = b""
2469 # Don't store the offset if the sidedata is empty, that way
2501 # Don't store the offset if the sidedata is empty, that way
2470 # we can easily detect empty sidedata and they will be no different
2502 # we can easily detect empty sidedata and they will be no different
2471 # than ones we manually add.
2503 # than ones we manually add.
2472 sidedata_offset = 0
2504 sidedata_offset = 0
2473
2505
2474 e = (
2506 e = (
2475 offset_type(offset, flags),
2507 offset_type(offset, flags),
2476 deltainfo.deltalen,
2508 deltainfo.deltalen,
2477 textlen,
2509 textlen,
2478 deltainfo.base,
2510 deltainfo.base,
2479 link,
2511 link,
2480 p1r,
2512 p1r,
2481 p2r,
2513 p2r,
2482 node,
2514 node,
2483 sidedata_offset,
2515 sidedata_offset,
2484 len(serialized_sidedata),
2516 len(serialized_sidedata),
2485 COMP_MODE_INLINE,
2517 compression_mode,
2486 )
2518 )
2487
2519
2488 self.index.append(e)
2520 self.index.append(e)
2489 entry = self.index.entry_binary(curr)
2521 entry = self.index.entry_binary(curr)
2490 if curr == 0 and self._docket is None:
2522 if curr == 0 and self._docket is None:
2491 header = self._format_flags | self._format_version
2523 header = self._format_flags | self._format_version
2492 header = self.index.pack_header(header)
2524 header = self.index.pack_header(header)
2493 entry = header + entry
2525 entry = header + entry
2494 self._writeentry(
2526 self._writeentry(
2495 transaction,
2527 transaction,
2496 entry,
2528 entry,
2497 deltainfo.data,
2529 deltainfo.data,
2498 link,
2530 link,
2499 offset,
2531 offset,
2500 serialized_sidedata,
2532 serialized_sidedata,
2501 )
2533 )
2502
2534
2503 rawtext = btext[0]
2535 rawtext = btext[0]
2504
2536
2505 if alwayscache and rawtext is None:
2537 if alwayscache and rawtext is None:
2506 rawtext = deltacomputer.buildtext(revinfo, fh)
2538 rawtext = deltacomputer.buildtext(revinfo, fh)
2507
2539
2508 if type(rawtext) == bytes: # only accept immutable objects
2540 if type(rawtext) == bytes: # only accept immutable objects
2509 self._revisioncache = (node, curr, rawtext)
2541 self._revisioncache = (node, curr, rawtext)
2510 self._chainbasecache[curr] = deltainfo.chainbase
2542 self._chainbasecache[curr] = deltainfo.chainbase
2511 return curr
2543 return curr
2512
2544
2513 def _get_data_offset(self, prev):
2545 def _get_data_offset(self, prev):
2514 """Returns the current offset in the (in-transaction) data file.
2546 """Returns the current offset in the (in-transaction) data file.
2515 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2547 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2516 file to store that information: since sidedata can be rewritten to the
2548 file to store that information: since sidedata can be rewritten to the
2517 end of the data file within a transaction, you can have cases where, for
2549 end of the data file within a transaction, you can have cases where, for
2518 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2550 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2519 to `n - 1`'s sidedata being written after `n`'s data.
2551 to `n - 1`'s sidedata being written after `n`'s data.
2520
2552
2521 TODO cache this in a docket file before getting out of experimental."""
2553 TODO cache this in a docket file before getting out of experimental."""
2522 if self._docket is None:
2554 if self._docket is None:
2523 return self.end(prev)
2555 return self.end(prev)
2524 else:
2556 else:
2525 return self._docket.data_end
2557 return self._docket.data_end
2526
2558
2527 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2559 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2528 # Files opened in a+ mode have inconsistent behavior on various
2560 # Files opened in a+ mode have inconsistent behavior on various
2529 # platforms. Windows requires that a file positioning call be made
2561 # platforms. Windows requires that a file positioning call be made
2530 # when the file handle transitions between reads and writes. See
2562 # when the file handle transitions between reads and writes. See
2531 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2563 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2532 # platforms, Python or the platform itself can be buggy. Some versions
2564 # platforms, Python or the platform itself can be buggy. Some versions
2533 # of Solaris have been observed to not append at the end of the file
2565 # of Solaris have been observed to not append at the end of the file
2534 # if the file was seeked to before the end. See issue4943 for more.
2566 # if the file was seeked to before the end. See issue4943 for more.
2535 #
2567 #
2536 # We work around this issue by inserting a seek() before writing.
2568 # We work around this issue by inserting a seek() before writing.
2537 # Note: This is likely not necessary on Python 3. However, because
2569 # Note: This is likely not necessary on Python 3. However, because
2538 # the file handle is reused for reads and may be seeked there, we need
2570 # the file handle is reused for reads and may be seeked there, we need
2539 # to be careful before changing this.
2571 # to be careful before changing this.
2540 if self._writinghandles is None:
2572 if self._writinghandles is None:
2541 msg = b'adding revision outside `revlog._writing` context'
2573 msg = b'adding revision outside `revlog._writing` context'
2542 raise error.ProgrammingError(msg)
2574 raise error.ProgrammingError(msg)
2543 ifh, dfh = self._writinghandles
2575 ifh, dfh = self._writinghandles
2544 if self._docket is None:
2576 if self._docket is None:
2545 ifh.seek(0, os.SEEK_END)
2577 ifh.seek(0, os.SEEK_END)
2546 else:
2578 else:
2547 ifh.seek(self._docket.index_end, os.SEEK_SET)
2579 ifh.seek(self._docket.index_end, os.SEEK_SET)
2548 if dfh:
2580 if dfh:
2549 if self._docket is None:
2581 if self._docket is None:
2550 dfh.seek(0, os.SEEK_END)
2582 dfh.seek(0, os.SEEK_END)
2551 else:
2583 else:
2552 dfh.seek(self._docket.data_end, os.SEEK_SET)
2584 dfh.seek(self._docket.data_end, os.SEEK_SET)
2553
2585
2554 curr = len(self) - 1
2586 curr = len(self) - 1
2555 if not self._inline:
2587 if not self._inline:
2556 transaction.add(self._datafile, offset)
2588 transaction.add(self._datafile, offset)
2557 transaction.add(self._indexfile, curr * len(entry))
2589 transaction.add(self._indexfile, curr * len(entry))
2558 if data[0]:
2590 if data[0]:
2559 dfh.write(data[0])
2591 dfh.write(data[0])
2560 dfh.write(data[1])
2592 dfh.write(data[1])
2561 if sidedata:
2593 if sidedata:
2562 dfh.write(sidedata)
2594 dfh.write(sidedata)
2563 ifh.write(entry)
2595 ifh.write(entry)
2564 else:
2596 else:
2565 offset += curr * self.index.entry_size
2597 offset += curr * self.index.entry_size
2566 transaction.add(self._indexfile, offset)
2598 transaction.add(self._indexfile, offset)
2567 ifh.write(entry)
2599 ifh.write(entry)
2568 ifh.write(data[0])
2600 ifh.write(data[0])
2569 ifh.write(data[1])
2601 ifh.write(data[1])
2570 if sidedata:
2602 if sidedata:
2571 ifh.write(sidedata)
2603 ifh.write(sidedata)
2572 self._enforceinlinesize(transaction)
2604 self._enforceinlinesize(transaction)
2573 if self._docket is not None:
2605 if self._docket is not None:
2574 self._docket.index_end = self._writinghandles[0].tell()
2606 self._docket.index_end = self._writinghandles[0].tell()
2575 self._docket.data_end = self._writinghandles[1].tell()
2607 self._docket.data_end = self._writinghandles[1].tell()
2576
2608
2577 nodemaputil.setup_persistent_nodemap(transaction, self)
2609 nodemaputil.setup_persistent_nodemap(transaction, self)
2578
2610
2579 def addgroup(
2611 def addgroup(
2580 self,
2612 self,
2581 deltas,
2613 deltas,
2582 linkmapper,
2614 linkmapper,
2583 transaction,
2615 transaction,
2584 alwayscache=False,
2616 alwayscache=False,
2585 addrevisioncb=None,
2617 addrevisioncb=None,
2586 duplicaterevisioncb=None,
2618 duplicaterevisioncb=None,
2587 ):
2619 ):
2588 """
2620 """
2589 add a delta group
2621 add a delta group
2590
2622
2591 given a set of deltas, add them to the revision log. the
2623 given a set of deltas, add them to the revision log. the
2592 first delta is against its parent, which should be in our
2624 first delta is against its parent, which should be in our
2593 log, the rest are against the previous delta.
2625 log, the rest are against the previous delta.
2594
2626
2595 If ``addrevisioncb`` is defined, it will be called with arguments of
2627 If ``addrevisioncb`` is defined, it will be called with arguments of
2596 this revlog and the node that was added.
2628 this revlog and the node that was added.
2597 """
2629 """
2598
2630
2599 if self._adding_group:
2631 if self._adding_group:
2600 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2632 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2601
2633
2602 self._adding_group = True
2634 self._adding_group = True
2603 empty = True
2635 empty = True
2604 try:
2636 try:
2605 with self._writing(transaction):
2637 with self._writing(transaction):
2606 deltacomputer = deltautil.deltacomputer(self)
2638 deltacomputer = deltautil.deltacomputer(self)
2607 # loop through our set of deltas
2639 # loop through our set of deltas
2608 for data in deltas:
2640 for data in deltas:
2609 (
2641 (
2610 node,
2642 node,
2611 p1,
2643 p1,
2612 p2,
2644 p2,
2613 linknode,
2645 linknode,
2614 deltabase,
2646 deltabase,
2615 delta,
2647 delta,
2616 flags,
2648 flags,
2617 sidedata,
2649 sidedata,
2618 ) = data
2650 ) = data
2619 link = linkmapper(linknode)
2651 link = linkmapper(linknode)
2620 flags = flags or REVIDX_DEFAULT_FLAGS
2652 flags = flags or REVIDX_DEFAULT_FLAGS
2621
2653
2622 rev = self.index.get_rev(node)
2654 rev = self.index.get_rev(node)
2623 if rev is not None:
2655 if rev is not None:
2624 # this can happen if two branches make the same change
2656 # this can happen if two branches make the same change
2625 self._nodeduplicatecallback(transaction, rev)
2657 self._nodeduplicatecallback(transaction, rev)
2626 if duplicaterevisioncb:
2658 if duplicaterevisioncb:
2627 duplicaterevisioncb(self, rev)
2659 duplicaterevisioncb(self, rev)
2628 empty = False
2660 empty = False
2629 continue
2661 continue
2630
2662
2631 for p in (p1, p2):
2663 for p in (p1, p2):
2632 if not self.index.has_node(p):
2664 if not self.index.has_node(p):
2633 raise error.LookupError(
2665 raise error.LookupError(
2634 p, self.radix, _(b'unknown parent')
2666 p, self.radix, _(b'unknown parent')
2635 )
2667 )
2636
2668
2637 if not self.index.has_node(deltabase):
2669 if not self.index.has_node(deltabase):
2638 raise error.LookupError(
2670 raise error.LookupError(
2639 deltabase, self.display_id, _(b'unknown delta base')
2671 deltabase, self.display_id, _(b'unknown delta base')
2640 )
2672 )
2641
2673
2642 baserev = self.rev(deltabase)
2674 baserev = self.rev(deltabase)
2643
2675
2644 if baserev != nullrev and self.iscensored(baserev):
2676 if baserev != nullrev and self.iscensored(baserev):
2645 # if base is censored, delta must be full replacement in a
2677 # if base is censored, delta must be full replacement in a
2646 # single patch operation
2678 # single patch operation
2647 hlen = struct.calcsize(b">lll")
2679 hlen = struct.calcsize(b">lll")
2648 oldlen = self.rawsize(baserev)
2680 oldlen = self.rawsize(baserev)
2649 newlen = len(delta) - hlen
2681 newlen = len(delta) - hlen
2650 if delta[:hlen] != mdiff.replacediffheader(
2682 if delta[:hlen] != mdiff.replacediffheader(
2651 oldlen, newlen
2683 oldlen, newlen
2652 ):
2684 ):
2653 raise error.CensoredBaseError(
2685 raise error.CensoredBaseError(
2654 self.display_id, self.node(baserev)
2686 self.display_id, self.node(baserev)
2655 )
2687 )
2656
2688
2657 if not flags and self._peek_iscensored(baserev, delta):
2689 if not flags and self._peek_iscensored(baserev, delta):
2658 flags |= REVIDX_ISCENSORED
2690 flags |= REVIDX_ISCENSORED
2659
2691
2660 # We assume consumers of addrevisioncb will want to retrieve
2692 # We assume consumers of addrevisioncb will want to retrieve
2661 # the added revision, which will require a call to
2693 # the added revision, which will require a call to
2662 # revision(). revision() will fast path if there is a cache
2694 # revision(). revision() will fast path if there is a cache
2663 # hit. So, we tell _addrevision() to always cache in this case.
2695 # hit. So, we tell _addrevision() to always cache in this case.
2664 # We're only using addgroup() in the context of changegroup
2696 # We're only using addgroup() in the context of changegroup
2665 # generation so the revision data can always be handled as raw
2697 # generation so the revision data can always be handled as raw
2666 # by the flagprocessor.
2698 # by the flagprocessor.
2667 rev = self._addrevision(
2699 rev = self._addrevision(
2668 node,
2700 node,
2669 None,
2701 None,
2670 transaction,
2702 transaction,
2671 link,
2703 link,
2672 p1,
2704 p1,
2673 p2,
2705 p2,
2674 flags,
2706 flags,
2675 (baserev, delta),
2707 (baserev, delta),
2676 alwayscache=alwayscache,
2708 alwayscache=alwayscache,
2677 deltacomputer=deltacomputer,
2709 deltacomputer=deltacomputer,
2678 sidedata=sidedata,
2710 sidedata=sidedata,
2679 )
2711 )
2680
2712
2681 if addrevisioncb:
2713 if addrevisioncb:
2682 addrevisioncb(self, rev)
2714 addrevisioncb(self, rev)
2683 empty = False
2715 empty = False
2684 finally:
2716 finally:
2685 self._adding_group = False
2717 self._adding_group = False
2686 return not empty
2718 return not empty
2687
2719
2688 def iscensored(self, rev):
2720 def iscensored(self, rev):
2689 """Check if a file revision is censored."""
2721 """Check if a file revision is censored."""
2690 if not self._censorable:
2722 if not self._censorable:
2691 return False
2723 return False
2692
2724
2693 return self.flags(rev) & REVIDX_ISCENSORED
2725 return self.flags(rev) & REVIDX_ISCENSORED
2694
2726
2695 def _peek_iscensored(self, baserev, delta):
2727 def _peek_iscensored(self, baserev, delta):
2696 """Quickly check if a delta produces a censored revision."""
2728 """Quickly check if a delta produces a censored revision."""
2697 if not self._censorable:
2729 if not self._censorable:
2698 return False
2730 return False
2699
2731
2700 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2732 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2701
2733
2702 def getstrippoint(self, minlink):
2734 def getstrippoint(self, minlink):
2703 """find the minimum rev that must be stripped to strip the linkrev
2735 """find the minimum rev that must be stripped to strip the linkrev
2704
2736
2705 Returns a tuple containing the minimum rev and a set of all revs that
2737 Returns a tuple containing the minimum rev and a set of all revs that
2706 have linkrevs that will be broken by this strip.
2738 have linkrevs that will be broken by this strip.
2707 """
2739 """
2708 return storageutil.resolvestripinfo(
2740 return storageutil.resolvestripinfo(
2709 minlink,
2741 minlink,
2710 len(self) - 1,
2742 len(self) - 1,
2711 self.headrevs(),
2743 self.headrevs(),
2712 self.linkrev,
2744 self.linkrev,
2713 self.parentrevs,
2745 self.parentrevs,
2714 )
2746 )
2715
2747
2716 def strip(self, minlink, transaction):
2748 def strip(self, minlink, transaction):
2717 """truncate the revlog on the first revision with a linkrev >= minlink
2749 """truncate the revlog on the first revision with a linkrev >= minlink
2718
2750
2719 This function is called when we're stripping revision minlink and
2751 This function is called when we're stripping revision minlink and
2720 its descendants from the repository.
2752 its descendants from the repository.
2721
2753
2722 We have to remove all revisions with linkrev >= minlink, because
2754 We have to remove all revisions with linkrev >= minlink, because
2723 the equivalent changelog revisions will be renumbered after the
2755 the equivalent changelog revisions will be renumbered after the
2724 strip.
2756 strip.
2725
2757
2726 So we truncate the revlog on the first of these revisions, and
2758 So we truncate the revlog on the first of these revisions, and
2727 trust that the caller has saved the revisions that shouldn't be
2759 trust that the caller has saved the revisions that shouldn't be
2728 removed and that it'll re-add them after this truncation.
2760 removed and that it'll re-add them after this truncation.
2729 """
2761 """
2730 if len(self) == 0:
2762 if len(self) == 0:
2731 return
2763 return
2732
2764
2733 rev, _ = self.getstrippoint(minlink)
2765 rev, _ = self.getstrippoint(minlink)
2734 if rev == len(self):
2766 if rev == len(self):
2735 return
2767 return
2736
2768
2737 # first truncate the files on disk
2769 # first truncate the files on disk
2738 data_end = self.start(rev)
2770 data_end = self.start(rev)
2739 if not self._inline:
2771 if not self._inline:
2740 transaction.add(self._datafile, data_end)
2772 transaction.add(self._datafile, data_end)
2741 end = rev * self.index.entry_size
2773 end = rev * self.index.entry_size
2742 else:
2774 else:
2743 end = data_end + (rev * self.index.entry_size)
2775 end = data_end + (rev * self.index.entry_size)
2744
2776
2745 transaction.add(self._indexfile, end)
2777 transaction.add(self._indexfile, end)
2746 if self._docket is not None:
2778 if self._docket is not None:
2747 # XXX we could, leverage the docket while stripping. However it is
2779 # XXX we could, leverage the docket while stripping. However it is
2748 # not powerfull enough at the time of this comment
2780 # not powerfull enough at the time of this comment
2749 self._docket.index_end = end
2781 self._docket.index_end = end
2750 self._docket.data_end = data_end
2782 self._docket.data_end = data_end
2751 self._docket.write(transaction, stripping=True)
2783 self._docket.write(transaction, stripping=True)
2752
2784
2753 # then reset internal state in memory to forget those revisions
2785 # then reset internal state in memory to forget those revisions
2754 self._revisioncache = None
2786 self._revisioncache = None
2755 self._chaininfocache = util.lrucachedict(500)
2787 self._chaininfocache = util.lrucachedict(500)
2756 self._chunkclear()
2788 self._chunkclear()
2757
2789
2758 del self.index[rev:-1]
2790 del self.index[rev:-1]
2759
2791
2760 def checksize(self):
2792 def checksize(self):
2761 """Check size of index and data files
2793 """Check size of index and data files
2762
2794
2763 return a (dd, di) tuple.
2795 return a (dd, di) tuple.
2764 - dd: extra bytes for the "data" file
2796 - dd: extra bytes for the "data" file
2765 - di: extra bytes for the "index" file
2797 - di: extra bytes for the "index" file
2766
2798
2767 A healthy revlog will return (0, 0).
2799 A healthy revlog will return (0, 0).
2768 """
2800 """
2769 expected = 0
2801 expected = 0
2770 if len(self):
2802 if len(self):
2771 expected = max(0, self.end(len(self) - 1))
2803 expected = max(0, self.end(len(self) - 1))
2772
2804
2773 try:
2805 try:
2774 with self._datafp() as f:
2806 with self._datafp() as f:
2775 f.seek(0, io.SEEK_END)
2807 f.seek(0, io.SEEK_END)
2776 actual = f.tell()
2808 actual = f.tell()
2777 dd = actual - expected
2809 dd = actual - expected
2778 except IOError as inst:
2810 except IOError as inst:
2779 if inst.errno != errno.ENOENT:
2811 if inst.errno != errno.ENOENT:
2780 raise
2812 raise
2781 dd = 0
2813 dd = 0
2782
2814
2783 try:
2815 try:
2784 f = self.opener(self._indexfile)
2816 f = self.opener(self._indexfile)
2785 f.seek(0, io.SEEK_END)
2817 f.seek(0, io.SEEK_END)
2786 actual = f.tell()
2818 actual = f.tell()
2787 f.close()
2819 f.close()
2788 s = self.index.entry_size
2820 s = self.index.entry_size
2789 i = max(0, actual // s)
2821 i = max(0, actual // s)
2790 di = actual - (i * s)
2822 di = actual - (i * s)
2791 if self._inline:
2823 if self._inline:
2792 databytes = 0
2824 databytes = 0
2793 for r in self:
2825 for r in self:
2794 databytes += max(0, self.length(r))
2826 databytes += max(0, self.length(r))
2795 dd = 0
2827 dd = 0
2796 di = actual - len(self) * s - databytes
2828 di = actual - len(self) * s - databytes
2797 except IOError as inst:
2829 except IOError as inst:
2798 if inst.errno != errno.ENOENT:
2830 if inst.errno != errno.ENOENT:
2799 raise
2831 raise
2800 di = 0
2832 di = 0
2801
2833
2802 return (dd, di)
2834 return (dd, di)
2803
2835
2804 def files(self):
2836 def files(self):
2805 res = [self._indexfile]
2837 res = [self._indexfile]
2806 if not self._inline:
2838 if not self._inline:
2807 res.append(self._datafile)
2839 res.append(self._datafile)
2808 return res
2840 return res
2809
2841
2810 def emitrevisions(
2842 def emitrevisions(
2811 self,
2843 self,
2812 nodes,
2844 nodes,
2813 nodesorder=None,
2845 nodesorder=None,
2814 revisiondata=False,
2846 revisiondata=False,
2815 assumehaveparentrevisions=False,
2847 assumehaveparentrevisions=False,
2816 deltamode=repository.CG_DELTAMODE_STD,
2848 deltamode=repository.CG_DELTAMODE_STD,
2817 sidedata_helpers=None,
2849 sidedata_helpers=None,
2818 ):
2850 ):
2819 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2851 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2820 raise error.ProgrammingError(
2852 raise error.ProgrammingError(
2821 b'unhandled value for nodesorder: %s' % nodesorder
2853 b'unhandled value for nodesorder: %s' % nodesorder
2822 )
2854 )
2823
2855
2824 if nodesorder is None and not self._generaldelta:
2856 if nodesorder is None and not self._generaldelta:
2825 nodesorder = b'storage'
2857 nodesorder = b'storage'
2826
2858
2827 if (
2859 if (
2828 not self._storedeltachains
2860 not self._storedeltachains
2829 and deltamode != repository.CG_DELTAMODE_PREV
2861 and deltamode != repository.CG_DELTAMODE_PREV
2830 ):
2862 ):
2831 deltamode = repository.CG_DELTAMODE_FULL
2863 deltamode = repository.CG_DELTAMODE_FULL
2832
2864
2833 return storageutil.emitrevisions(
2865 return storageutil.emitrevisions(
2834 self,
2866 self,
2835 nodes,
2867 nodes,
2836 nodesorder,
2868 nodesorder,
2837 revlogrevisiondelta,
2869 revlogrevisiondelta,
2838 deltaparentfn=self.deltaparent,
2870 deltaparentfn=self.deltaparent,
2839 candeltafn=self.candelta,
2871 candeltafn=self.candelta,
2840 rawsizefn=self.rawsize,
2872 rawsizefn=self.rawsize,
2841 revdifffn=self.revdiff,
2873 revdifffn=self.revdiff,
2842 flagsfn=self.flags,
2874 flagsfn=self.flags,
2843 deltamode=deltamode,
2875 deltamode=deltamode,
2844 revisiondata=revisiondata,
2876 revisiondata=revisiondata,
2845 assumehaveparentrevisions=assumehaveparentrevisions,
2877 assumehaveparentrevisions=assumehaveparentrevisions,
2846 sidedata_helpers=sidedata_helpers,
2878 sidedata_helpers=sidedata_helpers,
2847 )
2879 )
2848
2880
2849 DELTAREUSEALWAYS = b'always'
2881 DELTAREUSEALWAYS = b'always'
2850 DELTAREUSESAMEREVS = b'samerevs'
2882 DELTAREUSESAMEREVS = b'samerevs'
2851 DELTAREUSENEVER = b'never'
2883 DELTAREUSENEVER = b'never'
2852
2884
2853 DELTAREUSEFULLADD = b'fulladd'
2885 DELTAREUSEFULLADD = b'fulladd'
2854
2886
2855 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2887 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2856
2888
2857 def clone(
2889 def clone(
2858 self,
2890 self,
2859 tr,
2891 tr,
2860 destrevlog,
2892 destrevlog,
2861 addrevisioncb=None,
2893 addrevisioncb=None,
2862 deltareuse=DELTAREUSESAMEREVS,
2894 deltareuse=DELTAREUSESAMEREVS,
2863 forcedeltabothparents=None,
2895 forcedeltabothparents=None,
2864 sidedata_helpers=None,
2896 sidedata_helpers=None,
2865 ):
2897 ):
2866 """Copy this revlog to another, possibly with format changes.
2898 """Copy this revlog to another, possibly with format changes.
2867
2899
2868 The destination revlog will contain the same revisions and nodes.
2900 The destination revlog will contain the same revisions and nodes.
2869 However, it may not be bit-for-bit identical due to e.g. delta encoding
2901 However, it may not be bit-for-bit identical due to e.g. delta encoding
2870 differences.
2902 differences.
2871
2903
2872 The ``deltareuse`` argument control how deltas from the existing revlog
2904 The ``deltareuse`` argument control how deltas from the existing revlog
2873 are preserved in the destination revlog. The argument can have the
2905 are preserved in the destination revlog. The argument can have the
2874 following values:
2906 following values:
2875
2907
2876 DELTAREUSEALWAYS
2908 DELTAREUSEALWAYS
2877 Deltas will always be reused (if possible), even if the destination
2909 Deltas will always be reused (if possible), even if the destination
2878 revlog would not select the same revisions for the delta. This is the
2910 revlog would not select the same revisions for the delta. This is the
2879 fastest mode of operation.
2911 fastest mode of operation.
2880 DELTAREUSESAMEREVS
2912 DELTAREUSESAMEREVS
2881 Deltas will be reused if the destination revlog would pick the same
2913 Deltas will be reused if the destination revlog would pick the same
2882 revisions for the delta. This mode strikes a balance between speed
2914 revisions for the delta. This mode strikes a balance between speed
2883 and optimization.
2915 and optimization.
2884 DELTAREUSENEVER
2916 DELTAREUSENEVER
2885 Deltas will never be reused. This is the slowest mode of execution.
2917 Deltas will never be reused. This is the slowest mode of execution.
2886 This mode can be used to recompute deltas (e.g. if the diff/delta
2918 This mode can be used to recompute deltas (e.g. if the diff/delta
2887 algorithm changes).
2919 algorithm changes).
2888 DELTAREUSEFULLADD
2920 DELTAREUSEFULLADD
2889 Revision will be re-added as if their were new content. This is
2921 Revision will be re-added as if their were new content. This is
2890 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2922 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2891 eg: large file detection and handling.
2923 eg: large file detection and handling.
2892
2924
2893 Delta computation can be slow, so the choice of delta reuse policy can
2925 Delta computation can be slow, so the choice of delta reuse policy can
2894 significantly affect run time.
2926 significantly affect run time.
2895
2927
2896 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2928 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2897 two extremes. Deltas will be reused if they are appropriate. But if the
2929 two extremes. Deltas will be reused if they are appropriate. But if the
2898 delta could choose a better revision, it will do so. This means if you
2930 delta could choose a better revision, it will do so. This means if you
2899 are converting a non-generaldelta revlog to a generaldelta revlog,
2931 are converting a non-generaldelta revlog to a generaldelta revlog,
2900 deltas will be recomputed if the delta's parent isn't a parent of the
2932 deltas will be recomputed if the delta's parent isn't a parent of the
2901 revision.
2933 revision.
2902
2934
2903 In addition to the delta policy, the ``forcedeltabothparents``
2935 In addition to the delta policy, the ``forcedeltabothparents``
2904 argument controls whether to force compute deltas against both parents
2936 argument controls whether to force compute deltas against both parents
2905 for merges. By default, the current default is used.
2937 for merges. By default, the current default is used.
2906
2938
2907 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2939 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2908 `sidedata_helpers`.
2940 `sidedata_helpers`.
2909 """
2941 """
2910 if deltareuse not in self.DELTAREUSEALL:
2942 if deltareuse not in self.DELTAREUSEALL:
2911 raise ValueError(
2943 raise ValueError(
2912 _(b'value for deltareuse invalid: %s') % deltareuse
2944 _(b'value for deltareuse invalid: %s') % deltareuse
2913 )
2945 )
2914
2946
2915 if len(destrevlog):
2947 if len(destrevlog):
2916 raise ValueError(_(b'destination revlog is not empty'))
2948 raise ValueError(_(b'destination revlog is not empty'))
2917
2949
2918 if getattr(self, 'filteredrevs', None):
2950 if getattr(self, 'filteredrevs', None):
2919 raise ValueError(_(b'source revlog has filtered revisions'))
2951 raise ValueError(_(b'source revlog has filtered revisions'))
2920 if getattr(destrevlog, 'filteredrevs', None):
2952 if getattr(destrevlog, 'filteredrevs', None):
2921 raise ValueError(_(b'destination revlog has filtered revisions'))
2953 raise ValueError(_(b'destination revlog has filtered revisions'))
2922
2954
2923 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2955 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2924 # if possible.
2956 # if possible.
2925 oldlazydelta = destrevlog._lazydelta
2957 oldlazydelta = destrevlog._lazydelta
2926 oldlazydeltabase = destrevlog._lazydeltabase
2958 oldlazydeltabase = destrevlog._lazydeltabase
2927 oldamd = destrevlog._deltabothparents
2959 oldamd = destrevlog._deltabothparents
2928
2960
2929 try:
2961 try:
2930 if deltareuse == self.DELTAREUSEALWAYS:
2962 if deltareuse == self.DELTAREUSEALWAYS:
2931 destrevlog._lazydeltabase = True
2963 destrevlog._lazydeltabase = True
2932 destrevlog._lazydelta = True
2964 destrevlog._lazydelta = True
2933 elif deltareuse == self.DELTAREUSESAMEREVS:
2965 elif deltareuse == self.DELTAREUSESAMEREVS:
2934 destrevlog._lazydeltabase = False
2966 destrevlog._lazydeltabase = False
2935 destrevlog._lazydelta = True
2967 destrevlog._lazydelta = True
2936 elif deltareuse == self.DELTAREUSENEVER:
2968 elif deltareuse == self.DELTAREUSENEVER:
2937 destrevlog._lazydeltabase = False
2969 destrevlog._lazydeltabase = False
2938 destrevlog._lazydelta = False
2970 destrevlog._lazydelta = False
2939
2971
2940 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2972 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2941
2973
2942 self._clone(
2974 self._clone(
2943 tr,
2975 tr,
2944 destrevlog,
2976 destrevlog,
2945 addrevisioncb,
2977 addrevisioncb,
2946 deltareuse,
2978 deltareuse,
2947 forcedeltabothparents,
2979 forcedeltabothparents,
2948 sidedata_helpers,
2980 sidedata_helpers,
2949 )
2981 )
2950
2982
2951 finally:
2983 finally:
2952 destrevlog._lazydelta = oldlazydelta
2984 destrevlog._lazydelta = oldlazydelta
2953 destrevlog._lazydeltabase = oldlazydeltabase
2985 destrevlog._lazydeltabase = oldlazydeltabase
2954 destrevlog._deltabothparents = oldamd
2986 destrevlog._deltabothparents = oldamd
2955
2987
2956 def _clone(
2988 def _clone(
2957 self,
2989 self,
2958 tr,
2990 tr,
2959 destrevlog,
2991 destrevlog,
2960 addrevisioncb,
2992 addrevisioncb,
2961 deltareuse,
2993 deltareuse,
2962 forcedeltabothparents,
2994 forcedeltabothparents,
2963 sidedata_helpers,
2995 sidedata_helpers,
2964 ):
2996 ):
2965 """perform the core duty of `revlog.clone` after parameter processing"""
2997 """perform the core duty of `revlog.clone` after parameter processing"""
2966 deltacomputer = deltautil.deltacomputer(destrevlog)
2998 deltacomputer = deltautil.deltacomputer(destrevlog)
2967 index = self.index
2999 index = self.index
2968 for rev in self:
3000 for rev in self:
2969 entry = index[rev]
3001 entry = index[rev]
2970
3002
2971 # Some classes override linkrev to take filtered revs into
3003 # Some classes override linkrev to take filtered revs into
2972 # account. Use raw entry from index.
3004 # account. Use raw entry from index.
2973 flags = entry[0] & 0xFFFF
3005 flags = entry[0] & 0xFFFF
2974 linkrev = entry[4]
3006 linkrev = entry[4]
2975 p1 = index[entry[5]][7]
3007 p1 = index[entry[5]][7]
2976 p2 = index[entry[6]][7]
3008 p2 = index[entry[6]][7]
2977 node = entry[7]
3009 node = entry[7]
2978
3010
2979 # (Possibly) reuse the delta from the revlog if allowed and
3011 # (Possibly) reuse the delta from the revlog if allowed and
2980 # the revlog chunk is a delta.
3012 # the revlog chunk is a delta.
2981 cachedelta = None
3013 cachedelta = None
2982 rawtext = None
3014 rawtext = None
2983 if deltareuse == self.DELTAREUSEFULLADD:
3015 if deltareuse == self.DELTAREUSEFULLADD:
2984 text, sidedata = self._revisiondata(rev)
3016 text, sidedata = self._revisiondata(rev)
2985
3017
2986 if sidedata_helpers is not None:
3018 if sidedata_helpers is not None:
2987 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3019 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2988 self, sidedata_helpers, sidedata, rev
3020 self, sidedata_helpers, sidedata, rev
2989 )
3021 )
2990 flags = flags | new_flags[0] & ~new_flags[1]
3022 flags = flags | new_flags[0] & ~new_flags[1]
2991
3023
2992 destrevlog.addrevision(
3024 destrevlog.addrevision(
2993 text,
3025 text,
2994 tr,
3026 tr,
2995 linkrev,
3027 linkrev,
2996 p1,
3028 p1,
2997 p2,
3029 p2,
2998 cachedelta=cachedelta,
3030 cachedelta=cachedelta,
2999 node=node,
3031 node=node,
3000 flags=flags,
3032 flags=flags,
3001 deltacomputer=deltacomputer,
3033 deltacomputer=deltacomputer,
3002 sidedata=sidedata,
3034 sidedata=sidedata,
3003 )
3035 )
3004 else:
3036 else:
3005 if destrevlog._lazydelta:
3037 if destrevlog._lazydelta:
3006 dp = self.deltaparent(rev)
3038 dp = self.deltaparent(rev)
3007 if dp != nullrev:
3039 if dp != nullrev:
3008 cachedelta = (dp, bytes(self._chunk(rev)))
3040 cachedelta = (dp, bytes(self._chunk(rev)))
3009
3041
3010 sidedata = None
3042 sidedata = None
3011 if not cachedelta:
3043 if not cachedelta:
3012 rawtext, sidedata = self._revisiondata(rev)
3044 rawtext, sidedata = self._revisiondata(rev)
3013 if sidedata is None:
3045 if sidedata is None:
3014 sidedata = self.sidedata(rev)
3046 sidedata = self.sidedata(rev)
3015
3047
3016 if sidedata_helpers is not None:
3048 if sidedata_helpers is not None:
3017 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3049 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3018 self, sidedata_helpers, sidedata, rev
3050 self, sidedata_helpers, sidedata, rev
3019 )
3051 )
3020 flags = flags | new_flags[0] & ~new_flags[1]
3052 flags = flags | new_flags[0] & ~new_flags[1]
3021
3053
3022 with destrevlog._writing(tr):
3054 with destrevlog._writing(tr):
3023 destrevlog._addrevision(
3055 destrevlog._addrevision(
3024 node,
3056 node,
3025 rawtext,
3057 rawtext,
3026 tr,
3058 tr,
3027 linkrev,
3059 linkrev,
3028 p1,
3060 p1,
3029 p2,
3061 p2,
3030 flags,
3062 flags,
3031 cachedelta,
3063 cachedelta,
3032 deltacomputer=deltacomputer,
3064 deltacomputer=deltacomputer,
3033 sidedata=sidedata,
3065 sidedata=sidedata,
3034 )
3066 )
3035
3067
3036 if addrevisioncb:
3068 if addrevisioncb:
3037 addrevisioncb(self, rev, node)
3069 addrevisioncb(self, rev, node)
3038
3070
3039 def censorrevision(self, tr, censornode, tombstone=b''):
3071 def censorrevision(self, tr, censornode, tombstone=b''):
3040 if self._format_version == REVLOGV0:
3072 if self._format_version == REVLOGV0:
3041 raise error.RevlogError(
3073 raise error.RevlogError(
3042 _(b'cannot censor with version %d revlogs')
3074 _(b'cannot censor with version %d revlogs')
3043 % self._format_version
3075 % self._format_version
3044 )
3076 )
3045
3077
3046 censorrev = self.rev(censornode)
3078 censorrev = self.rev(censornode)
3047 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3079 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3048
3080
3049 if len(tombstone) > self.rawsize(censorrev):
3081 if len(tombstone) > self.rawsize(censorrev):
3050 raise error.Abort(
3082 raise error.Abort(
3051 _(b'censor tombstone must be no longer than censored data')
3083 _(b'censor tombstone must be no longer than censored data')
3052 )
3084 )
3053
3085
3054 # Rewriting the revlog in place is hard. Our strategy for censoring is
3086 # Rewriting the revlog in place is hard. Our strategy for censoring is
3055 # to create a new revlog, copy all revisions to it, then replace the
3087 # to create a new revlog, copy all revisions to it, then replace the
3056 # revlogs on transaction close.
3088 # revlogs on transaction close.
3057 #
3089 #
3058 # This is a bit dangerous. We could easily have a mismatch of state.
3090 # This is a bit dangerous. We could easily have a mismatch of state.
3059 newrl = revlog(
3091 newrl = revlog(
3060 self.opener,
3092 self.opener,
3061 target=self.target,
3093 target=self.target,
3062 radix=self.radix,
3094 radix=self.radix,
3063 postfix=b'tmpcensored',
3095 postfix=b'tmpcensored',
3064 censorable=True,
3096 censorable=True,
3065 )
3097 )
3066 newrl._format_version = self._format_version
3098 newrl._format_version = self._format_version
3067 newrl._format_flags = self._format_flags
3099 newrl._format_flags = self._format_flags
3068 newrl._generaldelta = self._generaldelta
3100 newrl._generaldelta = self._generaldelta
3069 newrl._parse_index = self._parse_index
3101 newrl._parse_index = self._parse_index
3070
3102
3071 for rev in self.revs():
3103 for rev in self.revs():
3072 node = self.node(rev)
3104 node = self.node(rev)
3073 p1, p2 = self.parents(node)
3105 p1, p2 = self.parents(node)
3074
3106
3075 if rev == censorrev:
3107 if rev == censorrev:
3076 newrl.addrawrevision(
3108 newrl.addrawrevision(
3077 tombstone,
3109 tombstone,
3078 tr,
3110 tr,
3079 self.linkrev(censorrev),
3111 self.linkrev(censorrev),
3080 p1,
3112 p1,
3081 p2,
3113 p2,
3082 censornode,
3114 censornode,
3083 REVIDX_ISCENSORED,
3115 REVIDX_ISCENSORED,
3084 )
3116 )
3085
3117
3086 if newrl.deltaparent(rev) != nullrev:
3118 if newrl.deltaparent(rev) != nullrev:
3087 raise error.Abort(
3119 raise error.Abort(
3088 _(
3120 _(
3089 b'censored revision stored as delta; '
3121 b'censored revision stored as delta; '
3090 b'cannot censor'
3122 b'cannot censor'
3091 ),
3123 ),
3092 hint=_(
3124 hint=_(
3093 b'censoring of revlogs is not '
3125 b'censoring of revlogs is not '
3094 b'fully implemented; please report '
3126 b'fully implemented; please report '
3095 b'this bug'
3127 b'this bug'
3096 ),
3128 ),
3097 )
3129 )
3098 continue
3130 continue
3099
3131
3100 if self.iscensored(rev):
3132 if self.iscensored(rev):
3101 if self.deltaparent(rev) != nullrev:
3133 if self.deltaparent(rev) != nullrev:
3102 raise error.Abort(
3134 raise error.Abort(
3103 _(
3135 _(
3104 b'cannot censor due to censored '
3136 b'cannot censor due to censored '
3105 b'revision having delta stored'
3137 b'revision having delta stored'
3106 )
3138 )
3107 )
3139 )
3108 rawtext = self._chunk(rev)
3140 rawtext = self._chunk(rev)
3109 else:
3141 else:
3110 rawtext = self.rawdata(rev)
3142 rawtext = self.rawdata(rev)
3111
3143
3112 newrl.addrawrevision(
3144 newrl.addrawrevision(
3113 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3145 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3114 )
3146 )
3115
3147
3116 tr.addbackup(self._indexfile, location=b'store')
3148 tr.addbackup(self._indexfile, location=b'store')
3117 if not self._inline:
3149 if not self._inline:
3118 tr.addbackup(self._datafile, location=b'store')
3150 tr.addbackup(self._datafile, location=b'store')
3119
3151
3120 self.opener.rename(newrl._indexfile, self._indexfile)
3152 self.opener.rename(newrl._indexfile, self._indexfile)
3121 if not self._inline:
3153 if not self._inline:
3122 self.opener.rename(newrl._datafile, self._datafile)
3154 self.opener.rename(newrl._datafile, self._datafile)
3123
3155
3124 self.clearcaches()
3156 self.clearcaches()
3125 self._loadindex()
3157 self._loadindex()
3126
3158
3127 def verifyintegrity(self, state):
3159 def verifyintegrity(self, state):
3128 """Verifies the integrity of the revlog.
3160 """Verifies the integrity of the revlog.
3129
3161
3130 Yields ``revlogproblem`` instances describing problems that are
3162 Yields ``revlogproblem`` instances describing problems that are
3131 found.
3163 found.
3132 """
3164 """
3133 dd, di = self.checksize()
3165 dd, di = self.checksize()
3134 if dd:
3166 if dd:
3135 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3167 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3136 if di:
3168 if di:
3137 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3169 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3138
3170
3139 version = self._format_version
3171 version = self._format_version
3140
3172
3141 # The verifier tells us what version revlog we should be.
3173 # The verifier tells us what version revlog we should be.
3142 if version != state[b'expectedversion']:
3174 if version != state[b'expectedversion']:
3143 yield revlogproblem(
3175 yield revlogproblem(
3144 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3176 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3145 % (self.display_id, version, state[b'expectedversion'])
3177 % (self.display_id, version, state[b'expectedversion'])
3146 )
3178 )
3147
3179
3148 state[b'skipread'] = set()
3180 state[b'skipread'] = set()
3149 state[b'safe_renamed'] = set()
3181 state[b'safe_renamed'] = set()
3150
3182
3151 for rev in self:
3183 for rev in self:
3152 node = self.node(rev)
3184 node = self.node(rev)
3153
3185
3154 # Verify contents. 4 cases to care about:
3186 # Verify contents. 4 cases to care about:
3155 #
3187 #
3156 # common: the most common case
3188 # common: the most common case
3157 # rename: with a rename
3189 # rename: with a rename
3158 # meta: file content starts with b'\1\n', the metadata
3190 # meta: file content starts with b'\1\n', the metadata
3159 # header defined in filelog.py, but without a rename
3191 # header defined in filelog.py, but without a rename
3160 # ext: content stored externally
3192 # ext: content stored externally
3161 #
3193 #
3162 # More formally, their differences are shown below:
3194 # More formally, their differences are shown below:
3163 #
3195 #
3164 # | common | rename | meta | ext
3196 # | common | rename | meta | ext
3165 # -------------------------------------------------------
3197 # -------------------------------------------------------
3166 # flags() | 0 | 0 | 0 | not 0
3198 # flags() | 0 | 0 | 0 | not 0
3167 # renamed() | False | True | False | ?
3199 # renamed() | False | True | False | ?
3168 # rawtext[0:2]=='\1\n'| False | True | True | ?
3200 # rawtext[0:2]=='\1\n'| False | True | True | ?
3169 #
3201 #
3170 # "rawtext" means the raw text stored in revlog data, which
3202 # "rawtext" means the raw text stored in revlog data, which
3171 # could be retrieved by "rawdata(rev)". "text"
3203 # could be retrieved by "rawdata(rev)". "text"
3172 # mentioned below is "revision(rev)".
3204 # mentioned below is "revision(rev)".
3173 #
3205 #
3174 # There are 3 different lengths stored physically:
3206 # There are 3 different lengths stored physically:
3175 # 1. L1: rawsize, stored in revlog index
3207 # 1. L1: rawsize, stored in revlog index
3176 # 2. L2: len(rawtext), stored in revlog data
3208 # 2. L2: len(rawtext), stored in revlog data
3177 # 3. L3: len(text), stored in revlog data if flags==0, or
3209 # 3. L3: len(text), stored in revlog data if flags==0, or
3178 # possibly somewhere else if flags!=0
3210 # possibly somewhere else if flags!=0
3179 #
3211 #
3180 # L1 should be equal to L2. L3 could be different from them.
3212 # L1 should be equal to L2. L3 could be different from them.
3181 # "text" may or may not affect commit hash depending on flag
3213 # "text" may or may not affect commit hash depending on flag
3182 # processors (see flagutil.addflagprocessor).
3214 # processors (see flagutil.addflagprocessor).
3183 #
3215 #
3184 # | common | rename | meta | ext
3216 # | common | rename | meta | ext
3185 # -------------------------------------------------
3217 # -------------------------------------------------
3186 # rawsize() | L1 | L1 | L1 | L1
3218 # rawsize() | L1 | L1 | L1 | L1
3187 # size() | L1 | L2-LM | L1(*) | L1 (?)
3219 # size() | L1 | L2-LM | L1(*) | L1 (?)
3188 # len(rawtext) | L2 | L2 | L2 | L2
3220 # len(rawtext) | L2 | L2 | L2 | L2
3189 # len(text) | L2 | L2 | L2 | L3
3221 # len(text) | L2 | L2 | L2 | L3
3190 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3222 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3191 #
3223 #
3192 # LM: length of metadata, depending on rawtext
3224 # LM: length of metadata, depending on rawtext
3193 # (*): not ideal, see comment in filelog.size
3225 # (*): not ideal, see comment in filelog.size
3194 # (?): could be "- len(meta)" if the resolved content has
3226 # (?): could be "- len(meta)" if the resolved content has
3195 # rename metadata
3227 # rename metadata
3196 #
3228 #
3197 # Checks needed to be done:
3229 # Checks needed to be done:
3198 # 1. length check: L1 == L2, in all cases.
3230 # 1. length check: L1 == L2, in all cases.
3199 # 2. hash check: depending on flag processor, we may need to
3231 # 2. hash check: depending on flag processor, we may need to
3200 # use either "text" (external), or "rawtext" (in revlog).
3232 # use either "text" (external), or "rawtext" (in revlog).
3201
3233
3202 try:
3234 try:
3203 skipflags = state.get(b'skipflags', 0)
3235 skipflags = state.get(b'skipflags', 0)
3204 if skipflags:
3236 if skipflags:
3205 skipflags &= self.flags(rev)
3237 skipflags &= self.flags(rev)
3206
3238
3207 _verify_revision(self, skipflags, state, node)
3239 _verify_revision(self, skipflags, state, node)
3208
3240
3209 l1 = self.rawsize(rev)
3241 l1 = self.rawsize(rev)
3210 l2 = len(self.rawdata(node))
3242 l2 = len(self.rawdata(node))
3211
3243
3212 if l1 != l2:
3244 if l1 != l2:
3213 yield revlogproblem(
3245 yield revlogproblem(
3214 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3246 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3215 node=node,
3247 node=node,
3216 )
3248 )
3217
3249
3218 except error.CensoredNodeError:
3250 except error.CensoredNodeError:
3219 if state[b'erroroncensored']:
3251 if state[b'erroroncensored']:
3220 yield revlogproblem(
3252 yield revlogproblem(
3221 error=_(b'censored file data'), node=node
3253 error=_(b'censored file data'), node=node
3222 )
3254 )
3223 state[b'skipread'].add(node)
3255 state[b'skipread'].add(node)
3224 except Exception as e:
3256 except Exception as e:
3225 yield revlogproblem(
3257 yield revlogproblem(
3226 error=_(b'unpacking %s: %s')
3258 error=_(b'unpacking %s: %s')
3227 % (short(node), stringutil.forcebytestr(e)),
3259 % (short(node), stringutil.forcebytestr(e)),
3228 node=node,
3260 node=node,
3229 )
3261 )
3230 state[b'skipread'].add(node)
3262 state[b'skipread'].add(node)
3231
3263
3232 def storageinfo(
3264 def storageinfo(
3233 self,
3265 self,
3234 exclusivefiles=False,
3266 exclusivefiles=False,
3235 sharedfiles=False,
3267 sharedfiles=False,
3236 revisionscount=False,
3268 revisionscount=False,
3237 trackedsize=False,
3269 trackedsize=False,
3238 storedsize=False,
3270 storedsize=False,
3239 ):
3271 ):
3240 d = {}
3272 d = {}
3241
3273
3242 if exclusivefiles:
3274 if exclusivefiles:
3243 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3275 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3244 if not self._inline:
3276 if not self._inline:
3245 d[b'exclusivefiles'].append((self.opener, self._datafile))
3277 d[b'exclusivefiles'].append((self.opener, self._datafile))
3246
3278
3247 if sharedfiles:
3279 if sharedfiles:
3248 d[b'sharedfiles'] = []
3280 d[b'sharedfiles'] = []
3249
3281
3250 if revisionscount:
3282 if revisionscount:
3251 d[b'revisionscount'] = len(self)
3283 d[b'revisionscount'] = len(self)
3252
3284
3253 if trackedsize:
3285 if trackedsize:
3254 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3286 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3255
3287
3256 if storedsize:
3288 if storedsize:
3257 d[b'storedsize'] = sum(
3289 d[b'storedsize'] = sum(
3258 self.opener.stat(path).st_size for path in self.files()
3290 self.opener.stat(path).st_size for path in self.files()
3259 )
3291 )
3260
3292
3261 return d
3293 return d
3262
3294
3263 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3295 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3264 if not self.hassidedata:
3296 if not self.hassidedata:
3265 return
3297 return
3266 # revlog formats with sidedata support does not support inline
3298 # revlog formats with sidedata support does not support inline
3267 assert not self._inline
3299 assert not self._inline
3268 if not helpers[1] and not helpers[2]:
3300 if not helpers[1] and not helpers[2]:
3269 # Nothing to generate or remove
3301 # Nothing to generate or remove
3270 return
3302 return
3271
3303
3272 new_entries = []
3304 new_entries = []
3273 # append the new sidedata
3305 # append the new sidedata
3274 with self._writing(transaction):
3306 with self._writing(transaction):
3275 ifh, dfh = self._writinghandles
3307 ifh, dfh = self._writinghandles
3276 if self._docket is not None:
3308 if self._docket is not None:
3277 dfh.seek(self._docket.data_end, os.SEEK_SET)
3309 dfh.seek(self._docket.data_end, os.SEEK_SET)
3278 else:
3310 else:
3279 dfh.seek(0, os.SEEK_END)
3311 dfh.seek(0, os.SEEK_END)
3280
3312
3281 current_offset = dfh.tell()
3313 current_offset = dfh.tell()
3282 for rev in range(startrev, endrev + 1):
3314 for rev in range(startrev, endrev + 1):
3283 entry = self.index[rev]
3315 entry = self.index[rev]
3284 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3316 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3285 store=self,
3317 store=self,
3286 sidedata_helpers=helpers,
3318 sidedata_helpers=helpers,
3287 sidedata={},
3319 sidedata={},
3288 rev=rev,
3320 rev=rev,
3289 )
3321 )
3290
3322
3291 serialized_sidedata = sidedatautil.serialize_sidedata(
3323 serialized_sidedata = sidedatautil.serialize_sidedata(
3292 new_sidedata
3324 new_sidedata
3293 )
3325 )
3294 if entry[8] != 0 or entry[9] != 0:
3326 if entry[8] != 0 or entry[9] != 0:
3295 # rewriting entries that already have sidedata is not
3327 # rewriting entries that already have sidedata is not
3296 # supported yet, because it introduces garbage data in the
3328 # supported yet, because it introduces garbage data in the
3297 # revlog.
3329 # revlog.
3298 msg = b"rewriting existing sidedata is not supported yet"
3330 msg = b"rewriting existing sidedata is not supported yet"
3299 raise error.Abort(msg)
3331 raise error.Abort(msg)
3300
3332
3301 # Apply (potential) flags to add and to remove after running
3333 # Apply (potential) flags to add and to remove after running
3302 # the sidedata helpers
3334 # the sidedata helpers
3303 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3335 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3304 entry_update = (
3336 entry_update = (
3305 current_offset,
3337 current_offset,
3306 len(serialized_sidedata),
3338 len(serialized_sidedata),
3307 new_offset_flags,
3339 new_offset_flags,
3308 )
3340 )
3309
3341
3310 # the sidedata computation might have move the file cursors around
3342 # the sidedata computation might have move the file cursors around
3311 dfh.seek(current_offset, os.SEEK_SET)
3343 dfh.seek(current_offset, os.SEEK_SET)
3312 dfh.write(serialized_sidedata)
3344 dfh.write(serialized_sidedata)
3313 new_entries.append(entry_update)
3345 new_entries.append(entry_update)
3314 current_offset += len(serialized_sidedata)
3346 current_offset += len(serialized_sidedata)
3315 if self._docket is not None:
3347 if self._docket is not None:
3316 self._docket.data_end = dfh.tell()
3348 self._docket.data_end = dfh.tell()
3317
3349
3318 # rewrite the new index entries
3350 # rewrite the new index entries
3319 ifh.seek(startrev * self.index.entry_size)
3351 ifh.seek(startrev * self.index.entry_size)
3320 for i, e in enumerate(new_entries):
3352 for i, e in enumerate(new_entries):
3321 rev = startrev + i
3353 rev = startrev + i
3322 self.index.replace_sidedata_info(rev, *e)
3354 self.index.replace_sidedata_info(rev, *e)
3323 packed = self.index.entry_binary(rev)
3355 packed = self.index.entry_binary(rev)
3324 if rev == 0 and self._docket is None:
3356 if rev == 0 and self._docket is None:
3325 header = self._format_flags | self._format_version
3357 header = self._format_flags | self._format_version
3326 header = self.index.pack_header(header)
3358 header = self.index.pack_header(header)
3327 packed = header + packed
3359 packed = header + packed
3328 ifh.write(packed)
3360 ifh.write(packed)
@@ -1,165 +1,169 b''
1 # revlogdeltas.py - constant used for revlog logic.
1 # revlogdeltas.py - constant used for revlog logic.
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import struct
12 import struct
13
13
14 from ..interfaces import repository
14 from ..interfaces import repository
15
15
16 ### Internal utily constants
16 ### Internal utily constants
17
17
18 KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes
18 KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes
19 KIND_MANIFESTLOG = 1002
19 KIND_MANIFESTLOG = 1002
20 KIND_FILELOG = 1003
20 KIND_FILELOG = 1003
21 KIND_OTHER = 1004
21 KIND_OTHER = 1004
22
22
23 ALL_KINDS = {
23 ALL_KINDS = {
24 KIND_CHANGELOG,
24 KIND_CHANGELOG,
25 KIND_MANIFESTLOG,
25 KIND_MANIFESTLOG,
26 KIND_FILELOG,
26 KIND_FILELOG,
27 KIND_OTHER,
27 KIND_OTHER,
28 }
28 }
29
29
30 ### main revlog header
30 ### main revlog header
31
31
32 INDEX_HEADER = struct.Struct(b">I")
32 INDEX_HEADER = struct.Struct(b">I")
33
33
34 ## revlog version
34 ## revlog version
35 REVLOGV0 = 0
35 REVLOGV0 = 0
36 REVLOGV1 = 1
36 REVLOGV1 = 1
37 # Dummy value until file format is finalized.
37 # Dummy value until file format is finalized.
38 REVLOGV2 = 0xDEAD
38 REVLOGV2 = 0xDEAD
39
39
40 ## global revlog header flags
40 ## global revlog header flags
41 # Shared across v1 and v2.
41 # Shared across v1 and v2.
42 FLAG_INLINE_DATA = 1 << 16
42 FLAG_INLINE_DATA = 1 << 16
43 # Only used by v1, implied by v2.
43 # Only used by v1, implied by v2.
44 FLAG_GENERALDELTA = 1 << 17
44 FLAG_GENERALDELTA = 1 << 17
45 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
45 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
46 REVLOG_DEFAULT_FORMAT = REVLOGV1
46 REVLOG_DEFAULT_FORMAT = REVLOGV1
47 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
47 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
48 REVLOGV0_FLAGS = 0
48 REVLOGV0_FLAGS = 0
49 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
49 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
50 REVLOGV2_FLAGS = FLAG_INLINE_DATA
50 REVLOGV2_FLAGS = FLAG_INLINE_DATA
51
51
52 ### individual entry
52 ### individual entry
53
53
54 ## index v0:
54 ## index v0:
55 # 4 bytes: offset
55 # 4 bytes: offset
56 # 4 bytes: compressed length
56 # 4 bytes: compressed length
57 # 4 bytes: base rev
57 # 4 bytes: base rev
58 # 4 bytes: link rev
58 # 4 bytes: link rev
59 # 20 bytes: parent 1 nodeid
59 # 20 bytes: parent 1 nodeid
60 # 20 bytes: parent 2 nodeid
60 # 20 bytes: parent 2 nodeid
61 # 20 bytes: nodeid
61 # 20 bytes: nodeid
62 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
62 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
63
63
64 ## index v1
64 ## index v1
65 # 6 bytes: offset
65 # 6 bytes: offset
66 # 2 bytes: flags
66 # 2 bytes: flags
67 # 4 bytes: compressed length
67 # 4 bytes: compressed length
68 # 4 bytes: uncompressed length
68 # 4 bytes: uncompressed length
69 # 4 bytes: base rev
69 # 4 bytes: base rev
70 # 4 bytes: link rev
70 # 4 bytes: link rev
71 # 4 bytes: parent 1 rev
71 # 4 bytes: parent 1 rev
72 # 4 bytes: parent 2 rev
72 # 4 bytes: parent 2 rev
73 # 32 bytes: nodeid
73 # 32 bytes: nodeid
74 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
74 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
75 assert INDEX_ENTRY_V1.size == 32 * 2
75 assert INDEX_ENTRY_V1.size == 32 * 2
76
76
77 # 6 bytes: offset
77 # 6 bytes: offset
78 # 2 bytes: flags
78 # 2 bytes: flags
79 # 4 bytes: compressed length
79 # 4 bytes: compressed length
80 # 4 bytes: uncompressed length
80 # 4 bytes: uncompressed length
81 # 4 bytes: base rev
81 # 4 bytes: base rev
82 # 4 bytes: link rev
82 # 4 bytes: link rev
83 # 4 bytes: parent 1 rev
83 # 4 bytes: parent 1 rev
84 # 4 bytes: parent 2 rev
84 # 4 bytes: parent 2 rev
85 # 32 bytes: nodeid
85 # 32 bytes: nodeid
86 # 8 bytes: sidedata offset
86 # 8 bytes: sidedata offset
87 # 4 bytes: sidedata compressed length
87 # 4 bytes: sidedata compressed length
88 # 1 bytes: compression mode (2 lower bit are data_compression_mode)
88 # 1 bytes: compression mode (2 lower bit are data_compression_mode)
89 # 19 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
89 # 19 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
90 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQiB19x")
90 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQiB19x")
91 assert INDEX_ENTRY_V2.size == 32 * 3, INDEX_ENTRY_V2.size
91 assert INDEX_ENTRY_V2.size == 32 * 3, INDEX_ENTRY_V2.size
92
92
93 # revlog index flags
93 # revlog index flags
94
94
95 # For historical reasons, revlog's internal flags were exposed via the
95 # For historical reasons, revlog's internal flags were exposed via the
96 # wire protocol and are even exposed in parts of the storage APIs.
96 # wire protocol and are even exposed in parts of the storage APIs.
97
97
98 # revision has censor metadata, must be verified
98 # revision has censor metadata, must be verified
99 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
99 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
100 # revision hash does not match data (narrowhg)
100 # revision hash does not match data (narrowhg)
101 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
101 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
102 # revision data is stored externally
102 # revision data is stored externally
103 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
103 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
104 # revision changes files in a way that could affect copy tracing.
104 # revision changes files in a way that could affect copy tracing.
105 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
105 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
106 REVIDX_DEFAULT_FLAGS = 0
106 REVIDX_DEFAULT_FLAGS = 0
107 # stable order in which flags need to be processed and their processors applied
107 # stable order in which flags need to be processed and their processors applied
108 REVIDX_FLAGS_ORDER = [
108 REVIDX_FLAGS_ORDER = [
109 REVIDX_ISCENSORED,
109 REVIDX_ISCENSORED,
110 REVIDX_ELLIPSIS,
110 REVIDX_ELLIPSIS,
111 REVIDX_EXTSTORED,
111 REVIDX_EXTSTORED,
112 REVIDX_HASCOPIESINFO,
112 REVIDX_HASCOPIESINFO,
113 ]
113 ]
114
114
115 # bitmark for flags that could cause rawdata content change
115 # bitmark for flags that could cause rawdata content change
116 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
116 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
117
117
118 ## chunk compression mode constants:
118 ## chunk compression mode constants:
119 # These constants are used in revlog version >=2 to denote the compression used
119 # These constants are used in revlog version >=2 to denote the compression used
120 # for a chunk.
120 # for a chunk.
121
121
122 # Chunk use no compression, the data stored on disk can be directly use as
123 # chunk value. Without any header information prefixed.
124 COMP_MODE_PLAIN = 0
125
122 # Chunk use a compression mode stored "inline" at the start of the chunk
126 # Chunk use a compression mode stored "inline" at the start of the chunk
123 # itself. This is the mode always used for revlog version "0" and "1"
127 # itself. This is the mode always used for revlog version "0" and "1"
124 COMP_MODE_INLINE = 2
128 COMP_MODE_INLINE = 2
125
129
126 SUPPORTED_FLAGS = {
130 SUPPORTED_FLAGS = {
127 REVLOGV0: REVLOGV0_FLAGS,
131 REVLOGV0: REVLOGV0_FLAGS,
128 REVLOGV1: REVLOGV1_FLAGS,
132 REVLOGV1: REVLOGV1_FLAGS,
129 REVLOGV2: REVLOGV2_FLAGS,
133 REVLOGV2: REVLOGV2_FLAGS,
130 }
134 }
131
135
132 _no = lambda flags: False
136 _no = lambda flags: False
133 _yes = lambda flags: True
137 _yes = lambda flags: True
134
138
135
139
136 def _from_flag(flag):
140 def _from_flag(flag):
137 return lambda flags: bool(flags & flag)
141 return lambda flags: bool(flags & flag)
138
142
139
143
140 FEATURES_BY_VERSION = {
144 FEATURES_BY_VERSION = {
141 REVLOGV0: {
145 REVLOGV0: {
142 b'inline': _no,
146 b'inline': _no,
143 b'generaldelta': _no,
147 b'generaldelta': _no,
144 b'sidedata': False,
148 b'sidedata': False,
145 b'docket': False,
149 b'docket': False,
146 },
150 },
147 REVLOGV1: {
151 REVLOGV1: {
148 b'inline': _from_flag(FLAG_INLINE_DATA),
152 b'inline': _from_flag(FLAG_INLINE_DATA),
149 b'generaldelta': _from_flag(FLAG_GENERALDELTA),
153 b'generaldelta': _from_flag(FLAG_GENERALDELTA),
150 b'sidedata': False,
154 b'sidedata': False,
151 b'docket': False,
155 b'docket': False,
152 },
156 },
153 REVLOGV2: {
157 REVLOGV2: {
154 # The point of inline-revlog is to reduce the number of files used in
158 # The point of inline-revlog is to reduce the number of files used in
155 # the store. Using a docket defeat this purpose. So we needs other
159 # the store. Using a docket defeat this purpose. So we needs other
156 # means to reduce the number of files for revlogv2.
160 # means to reduce the number of files for revlogv2.
157 b'inline': _no,
161 b'inline': _no,
158 b'generaldelta': _yes,
162 b'generaldelta': _yes,
159 b'sidedata': True,
163 b'sidedata': True,
160 b'docket': True,
164 b'docket': True,
161 },
165 },
162 }
166 }
163
167
164
168
165 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
169 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
@@ -1,1088 +1,1106 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import collections
12 import collections
13 import struct
13 import struct
14
14
15 # import stuff from node for others to import from revlog
15 # import stuff from node for others to import from revlog
16 from ..node import nullrev
16 from ..node import nullrev
17 from ..i18n import _
17 from ..i18n import _
18 from ..pycompat import getattr
18 from ..pycompat import getattr
19
19
20 from .constants import (
20 from .constants import (
21 REVIDX_ISCENSORED,
21 REVIDX_ISCENSORED,
22 REVIDX_RAWTEXT_CHANGING_FLAGS,
22 REVIDX_RAWTEXT_CHANGING_FLAGS,
23 )
23 )
24
24
25 from ..thirdparty import attr
25 from ..thirdparty import attr
26
26
27 from .. import (
27 from .. import (
28 error,
28 error,
29 mdiff,
29 mdiff,
30 util,
30 util,
31 )
31 )
32
32
33 from . import flagutil
33 from . import flagutil
34
34
35 # maximum <delta-chain-data>/<revision-text-length> ratio
35 # maximum <delta-chain-data>/<revision-text-length> ratio
36 LIMIT_DELTA2TEXT = 2
36 LIMIT_DELTA2TEXT = 2
37
37
38
38
39 class _testrevlog(object):
39 class _testrevlog(object):
40 """minimalist fake revlog to use in doctests"""
40 """minimalist fake revlog to use in doctests"""
41
41
42 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
42 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
43 """data is an list of revision payload boundaries"""
43 """data is an list of revision payload boundaries"""
44 self._data = data
44 self._data = data
45 self._srdensitythreshold = density
45 self._srdensitythreshold = density
46 self._srmingapsize = mingap
46 self._srmingapsize = mingap
47 self._snapshot = set(snapshot)
47 self._snapshot = set(snapshot)
48 self.index = None
48 self.index = None
49
49
50 def start(self, rev):
50 def start(self, rev):
51 if rev == nullrev:
51 if rev == nullrev:
52 return 0
52 return 0
53 if rev == 0:
53 if rev == 0:
54 return 0
54 return 0
55 return self._data[rev - 1]
55 return self._data[rev - 1]
56
56
57 def end(self, rev):
57 def end(self, rev):
58 if rev == nullrev:
58 if rev == nullrev:
59 return 0
59 return 0
60 return self._data[rev]
60 return self._data[rev]
61
61
62 def length(self, rev):
62 def length(self, rev):
63 return self.end(rev) - self.start(rev)
63 return self.end(rev) - self.start(rev)
64
64
65 def __len__(self):
65 def __len__(self):
66 return len(self._data)
66 return len(self._data)
67
67
68 def issnapshot(self, rev):
68 def issnapshot(self, rev):
69 if rev == nullrev:
69 if rev == nullrev:
70 return True
70 return True
71 return rev in self._snapshot
71 return rev in self._snapshot
72
72
73
73
74 def slicechunk(revlog, revs, targetsize=None):
74 def slicechunk(revlog, revs, targetsize=None):
75 """slice revs to reduce the amount of unrelated data to be read from disk.
75 """slice revs to reduce the amount of unrelated data to be read from disk.
76
76
77 ``revs`` is sliced into groups that should be read in one time.
77 ``revs`` is sliced into groups that should be read in one time.
78 Assume that revs are sorted.
78 Assume that revs are sorted.
79
79
80 The initial chunk is sliced until the overall density (payload/chunks-span
80 The initial chunk is sliced until the overall density (payload/chunks-span
81 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
81 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
82 `revlog._srmingapsize` is skipped.
82 `revlog._srmingapsize` is skipped.
83
83
84 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
84 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
85 For consistency with other slicing choice, this limit won't go lower than
85 For consistency with other slicing choice, this limit won't go lower than
86 `revlog._srmingapsize`.
86 `revlog._srmingapsize`.
87
87
88 If individual revisions chunk are larger than this limit, they will still
88 If individual revisions chunk are larger than this limit, they will still
89 be raised individually.
89 be raised individually.
90
90
91 >>> data = [
91 >>> data = [
92 ... 5, #00 (5)
92 ... 5, #00 (5)
93 ... 10, #01 (5)
93 ... 10, #01 (5)
94 ... 12, #02 (2)
94 ... 12, #02 (2)
95 ... 12, #03 (empty)
95 ... 12, #03 (empty)
96 ... 27, #04 (15)
96 ... 27, #04 (15)
97 ... 31, #05 (4)
97 ... 31, #05 (4)
98 ... 31, #06 (empty)
98 ... 31, #06 (empty)
99 ... 42, #07 (11)
99 ... 42, #07 (11)
100 ... 47, #08 (5)
100 ... 47, #08 (5)
101 ... 47, #09 (empty)
101 ... 47, #09 (empty)
102 ... 48, #10 (1)
102 ... 48, #10 (1)
103 ... 51, #11 (3)
103 ... 51, #11 (3)
104 ... 74, #12 (23)
104 ... 74, #12 (23)
105 ... 85, #13 (11)
105 ... 85, #13 (11)
106 ... 86, #14 (1)
106 ... 86, #14 (1)
107 ... 91, #15 (5)
107 ... 91, #15 (5)
108 ... ]
108 ... ]
109 >>> revlog = _testrevlog(data, snapshot=range(16))
109 >>> revlog = _testrevlog(data, snapshot=range(16))
110
110
111 >>> list(slicechunk(revlog, list(range(16))))
111 >>> list(slicechunk(revlog, list(range(16))))
112 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
112 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
113 >>> list(slicechunk(revlog, [0, 15]))
113 >>> list(slicechunk(revlog, [0, 15]))
114 [[0], [15]]
114 [[0], [15]]
115 >>> list(slicechunk(revlog, [0, 11, 15]))
115 >>> list(slicechunk(revlog, [0, 11, 15]))
116 [[0], [11], [15]]
116 [[0], [11], [15]]
117 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
117 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
118 [[0], [11, 13, 15]]
118 [[0], [11, 13, 15]]
119 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
119 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
120 [[1, 2], [5, 8, 10, 11], [14]]
120 [[1, 2], [5, 8, 10, 11], [14]]
121
121
122 Slicing with a maximum chunk size
122 Slicing with a maximum chunk size
123 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
123 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
124 [[0], [11], [13], [15]]
124 [[0], [11], [13], [15]]
125 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
125 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
126 [[0], [11], [13, 15]]
126 [[0], [11], [13, 15]]
127
127
128 Slicing involving nullrev
128 Slicing involving nullrev
129 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
129 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
130 [[-1, 0], [11], [13, 15]]
130 [[-1, 0], [11], [13, 15]]
131 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
131 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
132 [[-1], [13], [15]]
132 [[-1], [13], [15]]
133 """
133 """
134 if targetsize is not None:
134 if targetsize is not None:
135 targetsize = max(targetsize, revlog._srmingapsize)
135 targetsize = max(targetsize, revlog._srmingapsize)
136 # targetsize should not be specified when evaluating delta candidates:
136 # targetsize should not be specified when evaluating delta candidates:
137 # * targetsize is used to ensure we stay within specification when reading,
137 # * targetsize is used to ensure we stay within specification when reading,
138 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
138 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
139 if densityslicing is None:
139 if densityslicing is None:
140 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
140 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
141 for chunk in densityslicing(
141 for chunk in densityslicing(
142 revs, revlog._srdensitythreshold, revlog._srmingapsize
142 revs, revlog._srdensitythreshold, revlog._srmingapsize
143 ):
143 ):
144 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
144 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
145 yield subchunk
145 yield subchunk
146
146
147
147
148 def _slicechunktosize(revlog, revs, targetsize=None):
148 def _slicechunktosize(revlog, revs, targetsize=None):
149 """slice revs to match the target size
149 """slice revs to match the target size
150
150
151 This is intended to be used on chunk that density slicing selected by that
151 This is intended to be used on chunk that density slicing selected by that
152 are still too large compared to the read garantee of revlog. This might
152 are still too large compared to the read garantee of revlog. This might
153 happens when "minimal gap size" interrupted the slicing or when chain are
153 happens when "minimal gap size" interrupted the slicing or when chain are
154 built in a way that create large blocks next to each other.
154 built in a way that create large blocks next to each other.
155
155
156 >>> data = [
156 >>> data = [
157 ... 3, #0 (3)
157 ... 3, #0 (3)
158 ... 5, #1 (2)
158 ... 5, #1 (2)
159 ... 6, #2 (1)
159 ... 6, #2 (1)
160 ... 8, #3 (2)
160 ... 8, #3 (2)
161 ... 8, #4 (empty)
161 ... 8, #4 (empty)
162 ... 11, #5 (3)
162 ... 11, #5 (3)
163 ... 12, #6 (1)
163 ... 12, #6 (1)
164 ... 13, #7 (1)
164 ... 13, #7 (1)
165 ... 14, #8 (1)
165 ... 14, #8 (1)
166 ... ]
166 ... ]
167
167
168 == All snapshots cases ==
168 == All snapshots cases ==
169 >>> revlog = _testrevlog(data, snapshot=range(9))
169 >>> revlog = _testrevlog(data, snapshot=range(9))
170
170
171 Cases where chunk is already small enough
171 Cases where chunk is already small enough
172 >>> list(_slicechunktosize(revlog, [0], 3))
172 >>> list(_slicechunktosize(revlog, [0], 3))
173 [[0]]
173 [[0]]
174 >>> list(_slicechunktosize(revlog, [6, 7], 3))
174 >>> list(_slicechunktosize(revlog, [6, 7], 3))
175 [[6, 7]]
175 [[6, 7]]
176 >>> list(_slicechunktosize(revlog, [0], None))
176 >>> list(_slicechunktosize(revlog, [0], None))
177 [[0]]
177 [[0]]
178 >>> list(_slicechunktosize(revlog, [6, 7], None))
178 >>> list(_slicechunktosize(revlog, [6, 7], None))
179 [[6, 7]]
179 [[6, 7]]
180
180
181 cases where we need actual slicing
181 cases where we need actual slicing
182 >>> list(_slicechunktosize(revlog, [0, 1], 3))
182 >>> list(_slicechunktosize(revlog, [0, 1], 3))
183 [[0], [1]]
183 [[0], [1]]
184 >>> list(_slicechunktosize(revlog, [1, 3], 3))
184 >>> list(_slicechunktosize(revlog, [1, 3], 3))
185 [[1], [3]]
185 [[1], [3]]
186 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
186 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
187 [[1, 2], [3]]
187 [[1, 2], [3]]
188 >>> list(_slicechunktosize(revlog, [3, 5], 3))
188 >>> list(_slicechunktosize(revlog, [3, 5], 3))
189 [[3], [5]]
189 [[3], [5]]
190 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
190 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
191 [[3], [5]]
191 [[3], [5]]
192 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
192 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
193 [[5], [6, 7, 8]]
193 [[5], [6, 7, 8]]
194 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
194 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
195 [[0], [1, 2], [3], [5], [6, 7, 8]]
195 [[0], [1, 2], [3], [5], [6, 7, 8]]
196
196
197 Case with too large individual chunk (must return valid chunk)
197 Case with too large individual chunk (must return valid chunk)
198 >>> list(_slicechunktosize(revlog, [0, 1], 2))
198 >>> list(_slicechunktosize(revlog, [0, 1], 2))
199 [[0], [1]]
199 [[0], [1]]
200 >>> list(_slicechunktosize(revlog, [1, 3], 1))
200 >>> list(_slicechunktosize(revlog, [1, 3], 1))
201 [[1], [3]]
201 [[1], [3]]
202 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
202 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
203 [[3], [5]]
203 [[3], [5]]
204
204
205 == No Snapshot cases ==
205 == No Snapshot cases ==
206 >>> revlog = _testrevlog(data)
206 >>> revlog = _testrevlog(data)
207
207
208 Cases where chunk is already small enough
208 Cases where chunk is already small enough
209 >>> list(_slicechunktosize(revlog, [0], 3))
209 >>> list(_slicechunktosize(revlog, [0], 3))
210 [[0]]
210 [[0]]
211 >>> list(_slicechunktosize(revlog, [6, 7], 3))
211 >>> list(_slicechunktosize(revlog, [6, 7], 3))
212 [[6, 7]]
212 [[6, 7]]
213 >>> list(_slicechunktosize(revlog, [0], None))
213 >>> list(_slicechunktosize(revlog, [0], None))
214 [[0]]
214 [[0]]
215 >>> list(_slicechunktosize(revlog, [6, 7], None))
215 >>> list(_slicechunktosize(revlog, [6, 7], None))
216 [[6, 7]]
216 [[6, 7]]
217
217
218 cases where we need actual slicing
218 cases where we need actual slicing
219 >>> list(_slicechunktosize(revlog, [0, 1], 3))
219 >>> list(_slicechunktosize(revlog, [0, 1], 3))
220 [[0], [1]]
220 [[0], [1]]
221 >>> list(_slicechunktosize(revlog, [1, 3], 3))
221 >>> list(_slicechunktosize(revlog, [1, 3], 3))
222 [[1], [3]]
222 [[1], [3]]
223 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
223 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
224 [[1], [2, 3]]
224 [[1], [2, 3]]
225 >>> list(_slicechunktosize(revlog, [3, 5], 3))
225 >>> list(_slicechunktosize(revlog, [3, 5], 3))
226 [[3], [5]]
226 [[3], [5]]
227 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
227 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
228 [[3], [4, 5]]
228 [[3], [4, 5]]
229 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
229 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
230 [[5], [6, 7, 8]]
230 [[5], [6, 7, 8]]
231 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
231 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
232 [[0], [1, 2], [3], [5], [6, 7, 8]]
232 [[0], [1, 2], [3], [5], [6, 7, 8]]
233
233
234 Case with too large individual chunk (must return valid chunk)
234 Case with too large individual chunk (must return valid chunk)
235 >>> list(_slicechunktosize(revlog, [0, 1], 2))
235 >>> list(_slicechunktosize(revlog, [0, 1], 2))
236 [[0], [1]]
236 [[0], [1]]
237 >>> list(_slicechunktosize(revlog, [1, 3], 1))
237 >>> list(_slicechunktosize(revlog, [1, 3], 1))
238 [[1], [3]]
238 [[1], [3]]
239 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
239 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
240 [[3], [5]]
240 [[3], [5]]
241
241
242 == mixed case ==
242 == mixed case ==
243 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
243 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
244 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
244 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
245 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
245 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
246 """
246 """
247 assert targetsize is None or 0 <= targetsize
247 assert targetsize is None or 0 <= targetsize
248 startdata = revlog.start(revs[0])
248 startdata = revlog.start(revs[0])
249 enddata = revlog.end(revs[-1])
249 enddata = revlog.end(revs[-1])
250 fullspan = enddata - startdata
250 fullspan = enddata - startdata
251 if targetsize is None or fullspan <= targetsize:
251 if targetsize is None or fullspan <= targetsize:
252 yield revs
252 yield revs
253 return
253 return
254
254
255 startrevidx = 0
255 startrevidx = 0
256 endrevidx = 1
256 endrevidx = 1
257 iterrevs = enumerate(revs)
257 iterrevs = enumerate(revs)
258 next(iterrevs) # skip first rev.
258 next(iterrevs) # skip first rev.
259 # first step: get snapshots out of the way
259 # first step: get snapshots out of the way
260 for idx, r in iterrevs:
260 for idx, r in iterrevs:
261 span = revlog.end(r) - startdata
261 span = revlog.end(r) - startdata
262 snapshot = revlog.issnapshot(r)
262 snapshot = revlog.issnapshot(r)
263 if span <= targetsize and snapshot:
263 if span <= targetsize and snapshot:
264 endrevidx = idx + 1
264 endrevidx = idx + 1
265 else:
265 else:
266 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
266 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
267 if chunk:
267 if chunk:
268 yield chunk
268 yield chunk
269 startrevidx = idx
269 startrevidx = idx
270 startdata = revlog.start(r)
270 startdata = revlog.start(r)
271 endrevidx = idx + 1
271 endrevidx = idx + 1
272 if not snapshot:
272 if not snapshot:
273 break
273 break
274
274
275 # for the others, we use binary slicing to quickly converge toward valid
275 # for the others, we use binary slicing to quickly converge toward valid
276 # chunks (otherwise, we might end up looking for start/end of many
276 # chunks (otherwise, we might end up looking for start/end of many
277 # revisions). This logic is not looking for the perfect slicing point, it
277 # revisions). This logic is not looking for the perfect slicing point, it
278 # focuses on quickly converging toward valid chunks.
278 # focuses on quickly converging toward valid chunks.
279 nbitem = len(revs)
279 nbitem = len(revs)
280 while (enddata - startdata) > targetsize:
280 while (enddata - startdata) > targetsize:
281 endrevidx = nbitem
281 endrevidx = nbitem
282 if nbitem - startrevidx <= 1:
282 if nbitem - startrevidx <= 1:
283 break # protect against individual chunk larger than limit
283 break # protect against individual chunk larger than limit
284 localenddata = revlog.end(revs[endrevidx - 1])
284 localenddata = revlog.end(revs[endrevidx - 1])
285 span = localenddata - startdata
285 span = localenddata - startdata
286 while span > targetsize:
286 while span > targetsize:
287 if endrevidx - startrevidx <= 1:
287 if endrevidx - startrevidx <= 1:
288 break # protect against individual chunk larger than limit
288 break # protect against individual chunk larger than limit
289 endrevidx -= (endrevidx - startrevidx) // 2
289 endrevidx -= (endrevidx - startrevidx) // 2
290 localenddata = revlog.end(revs[endrevidx - 1])
290 localenddata = revlog.end(revs[endrevidx - 1])
291 span = localenddata - startdata
291 span = localenddata - startdata
292 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
292 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
293 if chunk:
293 if chunk:
294 yield chunk
294 yield chunk
295 startrevidx = endrevidx
295 startrevidx = endrevidx
296 startdata = revlog.start(revs[startrevidx])
296 startdata = revlog.start(revs[startrevidx])
297
297
298 chunk = _trimchunk(revlog, revs, startrevidx)
298 chunk = _trimchunk(revlog, revs, startrevidx)
299 if chunk:
299 if chunk:
300 yield chunk
300 yield chunk
301
301
302
302
303 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
303 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
304 """slice revs to reduce the amount of unrelated data to be read from disk.
304 """slice revs to reduce the amount of unrelated data to be read from disk.
305
305
306 ``revs`` is sliced into groups that should be read in one time.
306 ``revs`` is sliced into groups that should be read in one time.
307 Assume that revs are sorted.
307 Assume that revs are sorted.
308
308
309 The initial chunk is sliced until the overall density (payload/chunks-span
309 The initial chunk is sliced until the overall density (payload/chunks-span
310 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
310 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
311 skipped.
311 skipped.
312
312
313 >>> revlog = _testrevlog([
313 >>> revlog = _testrevlog([
314 ... 5, #00 (5)
314 ... 5, #00 (5)
315 ... 10, #01 (5)
315 ... 10, #01 (5)
316 ... 12, #02 (2)
316 ... 12, #02 (2)
317 ... 12, #03 (empty)
317 ... 12, #03 (empty)
318 ... 27, #04 (15)
318 ... 27, #04 (15)
319 ... 31, #05 (4)
319 ... 31, #05 (4)
320 ... 31, #06 (empty)
320 ... 31, #06 (empty)
321 ... 42, #07 (11)
321 ... 42, #07 (11)
322 ... 47, #08 (5)
322 ... 47, #08 (5)
323 ... 47, #09 (empty)
323 ... 47, #09 (empty)
324 ... 48, #10 (1)
324 ... 48, #10 (1)
325 ... 51, #11 (3)
325 ... 51, #11 (3)
326 ... 74, #12 (23)
326 ... 74, #12 (23)
327 ... 85, #13 (11)
327 ... 85, #13 (11)
328 ... 86, #14 (1)
328 ... 86, #14 (1)
329 ... 91, #15 (5)
329 ... 91, #15 (5)
330 ... ])
330 ... ])
331
331
332 >>> list(_slicechunktodensity(revlog, list(range(16))))
332 >>> list(_slicechunktodensity(revlog, list(range(16))))
333 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
333 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
334 >>> list(_slicechunktodensity(revlog, [0, 15]))
334 >>> list(_slicechunktodensity(revlog, [0, 15]))
335 [[0], [15]]
335 [[0], [15]]
336 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
336 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
337 [[0], [11], [15]]
337 [[0], [11], [15]]
338 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
338 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
339 [[0], [11, 13, 15]]
339 [[0], [11, 13, 15]]
340 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
340 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
341 [[1, 2], [5, 8, 10, 11], [14]]
341 [[1, 2], [5, 8, 10, 11], [14]]
342 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
342 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
343 ... mingapsize=20))
343 ... mingapsize=20))
344 [[1, 2, 3, 5, 8, 10, 11], [14]]
344 [[1, 2, 3, 5, 8, 10, 11], [14]]
345 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
345 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
346 ... targetdensity=0.95))
346 ... targetdensity=0.95))
347 [[1, 2], [5], [8, 10, 11], [14]]
347 [[1, 2], [5], [8, 10, 11], [14]]
348 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
348 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
349 ... targetdensity=0.95, mingapsize=12))
349 ... targetdensity=0.95, mingapsize=12))
350 [[1, 2], [5, 8, 10, 11], [14]]
350 [[1, 2], [5, 8, 10, 11], [14]]
351 """
351 """
352 start = revlog.start
352 start = revlog.start
353 length = revlog.length
353 length = revlog.length
354
354
355 if len(revs) <= 1:
355 if len(revs) <= 1:
356 yield revs
356 yield revs
357 return
357 return
358
358
359 deltachainspan = segmentspan(revlog, revs)
359 deltachainspan = segmentspan(revlog, revs)
360
360
361 if deltachainspan < mingapsize:
361 if deltachainspan < mingapsize:
362 yield revs
362 yield revs
363 return
363 return
364
364
365 readdata = deltachainspan
365 readdata = deltachainspan
366 chainpayload = sum(length(r) for r in revs)
366 chainpayload = sum(length(r) for r in revs)
367
367
368 if deltachainspan:
368 if deltachainspan:
369 density = chainpayload / float(deltachainspan)
369 density = chainpayload / float(deltachainspan)
370 else:
370 else:
371 density = 1.0
371 density = 1.0
372
372
373 if density >= targetdensity:
373 if density >= targetdensity:
374 yield revs
374 yield revs
375 return
375 return
376
376
377 # Store the gaps in a heap to have them sorted by decreasing size
377 # Store the gaps in a heap to have them sorted by decreasing size
378 gaps = []
378 gaps = []
379 prevend = None
379 prevend = None
380 for i, rev in enumerate(revs):
380 for i, rev in enumerate(revs):
381 revstart = start(rev)
381 revstart = start(rev)
382 revlen = length(rev)
382 revlen = length(rev)
383
383
384 # Skip empty revisions to form larger holes
384 # Skip empty revisions to form larger holes
385 if revlen == 0:
385 if revlen == 0:
386 continue
386 continue
387
387
388 if prevend is not None:
388 if prevend is not None:
389 gapsize = revstart - prevend
389 gapsize = revstart - prevend
390 # only consider holes that are large enough
390 # only consider holes that are large enough
391 if gapsize > mingapsize:
391 if gapsize > mingapsize:
392 gaps.append((gapsize, i))
392 gaps.append((gapsize, i))
393
393
394 prevend = revstart + revlen
394 prevend = revstart + revlen
395 # sort the gaps to pop them from largest to small
395 # sort the gaps to pop them from largest to small
396 gaps.sort()
396 gaps.sort()
397
397
398 # Collect the indices of the largest holes until the density is acceptable
398 # Collect the indices of the largest holes until the density is acceptable
399 selected = []
399 selected = []
400 while gaps and density < targetdensity:
400 while gaps and density < targetdensity:
401 gapsize, gapidx = gaps.pop()
401 gapsize, gapidx = gaps.pop()
402
402
403 selected.append(gapidx)
403 selected.append(gapidx)
404
404
405 # the gap sizes are stored as negatives to be sorted decreasingly
405 # the gap sizes are stored as negatives to be sorted decreasingly
406 # by the heap
406 # by the heap
407 readdata -= gapsize
407 readdata -= gapsize
408 if readdata > 0:
408 if readdata > 0:
409 density = chainpayload / float(readdata)
409 density = chainpayload / float(readdata)
410 else:
410 else:
411 density = 1.0
411 density = 1.0
412 selected.sort()
412 selected.sort()
413
413
414 # Cut the revs at collected indices
414 # Cut the revs at collected indices
415 previdx = 0
415 previdx = 0
416 for idx in selected:
416 for idx in selected:
417
417
418 chunk = _trimchunk(revlog, revs, previdx, idx)
418 chunk = _trimchunk(revlog, revs, previdx, idx)
419 if chunk:
419 if chunk:
420 yield chunk
420 yield chunk
421
421
422 previdx = idx
422 previdx = idx
423
423
424 chunk = _trimchunk(revlog, revs, previdx)
424 chunk = _trimchunk(revlog, revs, previdx)
425 if chunk:
425 if chunk:
426 yield chunk
426 yield chunk
427
427
428
428
429 def _trimchunk(revlog, revs, startidx, endidx=None):
429 def _trimchunk(revlog, revs, startidx, endidx=None):
430 """returns revs[startidx:endidx] without empty trailing revs
430 """returns revs[startidx:endidx] without empty trailing revs
431
431
432 Doctest Setup
432 Doctest Setup
433 >>> revlog = _testrevlog([
433 >>> revlog = _testrevlog([
434 ... 5, #0
434 ... 5, #0
435 ... 10, #1
435 ... 10, #1
436 ... 12, #2
436 ... 12, #2
437 ... 12, #3 (empty)
437 ... 12, #3 (empty)
438 ... 17, #4
438 ... 17, #4
439 ... 21, #5
439 ... 21, #5
440 ... 21, #6 (empty)
440 ... 21, #6 (empty)
441 ... ])
441 ... ])
442
442
443 Contiguous cases:
443 Contiguous cases:
444 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
444 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
445 [0, 1, 2, 3, 4, 5]
445 [0, 1, 2, 3, 4, 5]
446 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
446 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
447 [0, 1, 2, 3, 4]
447 [0, 1, 2, 3, 4]
448 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
448 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
449 [0, 1, 2]
449 [0, 1, 2]
450 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
450 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
451 [2]
451 [2]
452 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
452 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
453 [3, 4, 5]
453 [3, 4, 5]
454 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
454 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
455 [3, 4]
455 [3, 4]
456
456
457 Discontiguous cases:
457 Discontiguous cases:
458 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
458 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
459 [1, 3, 5]
459 [1, 3, 5]
460 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
460 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
461 [1]
461 [1]
462 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
462 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
463 [3, 5]
463 [3, 5]
464 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
464 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
465 [3, 5]
465 [3, 5]
466 """
466 """
467 length = revlog.length
467 length = revlog.length
468
468
469 if endidx is None:
469 if endidx is None:
470 endidx = len(revs)
470 endidx = len(revs)
471
471
472 # If we have a non-emtpy delta candidate, there are nothing to trim
472 # If we have a non-emtpy delta candidate, there are nothing to trim
473 if revs[endidx - 1] < len(revlog):
473 if revs[endidx - 1] < len(revlog):
474 # Trim empty revs at the end, except the very first revision of a chain
474 # Trim empty revs at the end, except the very first revision of a chain
475 while (
475 while (
476 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
476 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
477 ):
477 ):
478 endidx -= 1
478 endidx -= 1
479
479
480 return revs[startidx:endidx]
480 return revs[startidx:endidx]
481
481
482
482
483 def segmentspan(revlog, revs):
483 def segmentspan(revlog, revs):
484 """Get the byte span of a segment of revisions
484 """Get the byte span of a segment of revisions
485
485
486 revs is a sorted array of revision numbers
486 revs is a sorted array of revision numbers
487
487
488 >>> revlog = _testrevlog([
488 >>> revlog = _testrevlog([
489 ... 5, #0
489 ... 5, #0
490 ... 10, #1
490 ... 10, #1
491 ... 12, #2
491 ... 12, #2
492 ... 12, #3 (empty)
492 ... 12, #3 (empty)
493 ... 17, #4
493 ... 17, #4
494 ... ])
494 ... ])
495
495
496 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
496 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
497 17
497 17
498 >>> segmentspan(revlog, [0, 4])
498 >>> segmentspan(revlog, [0, 4])
499 17
499 17
500 >>> segmentspan(revlog, [3, 4])
500 >>> segmentspan(revlog, [3, 4])
501 5
501 5
502 >>> segmentspan(revlog, [1, 2, 3,])
502 >>> segmentspan(revlog, [1, 2, 3,])
503 7
503 7
504 >>> segmentspan(revlog, [1, 3])
504 >>> segmentspan(revlog, [1, 3])
505 7
505 7
506 """
506 """
507 if not revs:
507 if not revs:
508 return 0
508 return 0
509 end = revlog.end(revs[-1])
509 end = revlog.end(revs[-1])
510 return end - revlog.start(revs[0])
510 return end - revlog.start(revs[0])
511
511
512
512
513 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
513 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
514 """build full text from a (base, delta) pair and other metadata"""
514 """build full text from a (base, delta) pair and other metadata"""
515 # special case deltas which replace entire base; no need to decode
515 # special case deltas which replace entire base; no need to decode
516 # base revision. this neatly avoids censored bases, which throw when
516 # base revision. this neatly avoids censored bases, which throw when
517 # they're decoded.
517 # they're decoded.
518 hlen = struct.calcsize(b">lll")
518 hlen = struct.calcsize(b">lll")
519 if delta[:hlen] == mdiff.replacediffheader(
519 if delta[:hlen] == mdiff.replacediffheader(
520 revlog.rawsize(baserev), len(delta) - hlen
520 revlog.rawsize(baserev), len(delta) - hlen
521 ):
521 ):
522 fulltext = delta[hlen:]
522 fulltext = delta[hlen:]
523 else:
523 else:
524 # deltabase is rawtext before changed by flag processors, which is
524 # deltabase is rawtext before changed by flag processors, which is
525 # equivalent to non-raw text
525 # equivalent to non-raw text
526 basetext = revlog.revision(baserev, _df=fh, raw=False)
526 basetext = revlog.revision(baserev, _df=fh, raw=False)
527 fulltext = mdiff.patch(basetext, delta)
527 fulltext = mdiff.patch(basetext, delta)
528
528
529 try:
529 try:
530 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
530 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
531 if validatehash:
531 if validatehash:
532 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
532 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
533 if flags & REVIDX_ISCENSORED:
533 if flags & REVIDX_ISCENSORED:
534 raise error.StorageError(
534 raise error.StorageError(
535 _(b'node %s is not censored') % expectednode
535 _(b'node %s is not censored') % expectednode
536 )
536 )
537 except error.CensoredNodeError:
537 except error.CensoredNodeError:
538 # must pass the censored index flag to add censored revisions
538 # must pass the censored index flag to add censored revisions
539 if not flags & REVIDX_ISCENSORED:
539 if not flags & REVIDX_ISCENSORED:
540 raise
540 raise
541 return fulltext
541 return fulltext
542
542
543
543
544 @attr.s(slots=True, frozen=True)
544 @attr.s(slots=True, frozen=True)
545 class _deltainfo(object):
545 class _deltainfo(object):
546 distance = attr.ib()
546 distance = attr.ib()
547 deltalen = attr.ib()
547 deltalen = attr.ib()
548 data = attr.ib()
548 data = attr.ib()
549 base = attr.ib()
549 base = attr.ib()
550 chainbase = attr.ib()
550 chainbase = attr.ib()
551 chainlen = attr.ib()
551 chainlen = attr.ib()
552 compresseddeltalen = attr.ib()
552 compresseddeltalen = attr.ib()
553 snapshotdepth = attr.ib()
553 snapshotdepth = attr.ib()
554
554
555
555
556 def drop_u_compression(delta):
557 """turn into a "u" (no-compression) into no-compression without header
558
559 This is useful for revlog format that has better compression method.
560 """
561 assert delta.data[0] == b'u', delta.data[0]
562 return _deltainfo(
563 delta.distance,
564 delta.deltalen - 1,
565 (b'', delta.data[1]),
566 delta.base,
567 delta.chainbase,
568 delta.chainlen,
569 delta.compresseddeltalen,
570 delta.snapshotdepth,
571 )
572
573
556 def isgooddeltainfo(revlog, deltainfo, revinfo):
574 def isgooddeltainfo(revlog, deltainfo, revinfo):
557 """Returns True if the given delta is good. Good means that it is within
575 """Returns True if the given delta is good. Good means that it is within
558 the disk span, disk size, and chain length bounds that we know to be
576 the disk span, disk size, and chain length bounds that we know to be
559 performant."""
577 performant."""
560 if deltainfo is None:
578 if deltainfo is None:
561 return False
579 return False
562
580
563 # - 'deltainfo.distance' is the distance from the base revision --
581 # - 'deltainfo.distance' is the distance from the base revision --
564 # bounding it limits the amount of I/O we need to do.
582 # bounding it limits the amount of I/O we need to do.
565 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
583 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
566 # deltas we need to apply -- bounding it limits the amount of CPU
584 # deltas we need to apply -- bounding it limits the amount of CPU
567 # we consume.
585 # we consume.
568
586
569 textlen = revinfo.textlen
587 textlen = revinfo.textlen
570 defaultmax = textlen * 4
588 defaultmax = textlen * 4
571 maxdist = revlog._maxdeltachainspan
589 maxdist = revlog._maxdeltachainspan
572 if not maxdist:
590 if not maxdist:
573 maxdist = deltainfo.distance # ensure the conditional pass
591 maxdist = deltainfo.distance # ensure the conditional pass
574 maxdist = max(maxdist, defaultmax)
592 maxdist = max(maxdist, defaultmax)
575
593
576 # Bad delta from read span:
594 # Bad delta from read span:
577 #
595 #
578 # If the span of data read is larger than the maximum allowed.
596 # If the span of data read is larger than the maximum allowed.
579 #
597 #
580 # In the sparse-revlog case, we rely on the associated "sparse reading"
598 # In the sparse-revlog case, we rely on the associated "sparse reading"
581 # to avoid issue related to the span of data. In theory, it would be
599 # to avoid issue related to the span of data. In theory, it would be
582 # possible to build pathological revlog where delta pattern would lead
600 # possible to build pathological revlog where delta pattern would lead
583 # to too many reads. However, they do not happen in practice at all. So
601 # to too many reads. However, they do not happen in practice at all. So
584 # we skip the span check entirely.
602 # we skip the span check entirely.
585 if not revlog._sparserevlog and maxdist < deltainfo.distance:
603 if not revlog._sparserevlog and maxdist < deltainfo.distance:
586 return False
604 return False
587
605
588 # Bad delta from new delta size:
606 # Bad delta from new delta size:
589 #
607 #
590 # If the delta size is larger than the target text, storing the
608 # If the delta size is larger than the target text, storing the
591 # delta will be inefficient.
609 # delta will be inefficient.
592 if textlen < deltainfo.deltalen:
610 if textlen < deltainfo.deltalen:
593 return False
611 return False
594
612
595 # Bad delta from cumulated payload size:
613 # Bad delta from cumulated payload size:
596 #
614 #
597 # If the sum of delta get larger than K * target text length.
615 # If the sum of delta get larger than K * target text length.
598 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
616 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
599 return False
617 return False
600
618
601 # Bad delta from chain length:
619 # Bad delta from chain length:
602 #
620 #
603 # If the number of delta in the chain gets too high.
621 # If the number of delta in the chain gets too high.
604 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
622 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
605 return False
623 return False
606
624
607 # bad delta from intermediate snapshot size limit
625 # bad delta from intermediate snapshot size limit
608 #
626 #
609 # If an intermediate snapshot size is higher than the limit. The
627 # If an intermediate snapshot size is higher than the limit. The
610 # limit exist to prevent endless chain of intermediate delta to be
628 # limit exist to prevent endless chain of intermediate delta to be
611 # created.
629 # created.
612 if (
630 if (
613 deltainfo.snapshotdepth is not None
631 deltainfo.snapshotdepth is not None
614 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
632 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
615 ):
633 ):
616 return False
634 return False
617
635
618 # bad delta if new intermediate snapshot is larger than the previous
636 # bad delta if new intermediate snapshot is larger than the previous
619 # snapshot
637 # snapshot
620 if (
638 if (
621 deltainfo.snapshotdepth
639 deltainfo.snapshotdepth
622 and revlog.length(deltainfo.base) < deltainfo.deltalen
640 and revlog.length(deltainfo.base) < deltainfo.deltalen
623 ):
641 ):
624 return False
642 return False
625
643
626 return True
644 return True
627
645
628
646
629 # If a revision's full text is that much bigger than a base candidate full
647 # If a revision's full text is that much bigger than a base candidate full
630 # text's, it is very unlikely that it will produce a valid delta. We no longer
648 # text's, it is very unlikely that it will produce a valid delta. We no longer
631 # consider these candidates.
649 # consider these candidates.
632 LIMIT_BASE2TEXT = 500
650 LIMIT_BASE2TEXT = 500
633
651
634
652
635 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
653 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
636 """Provides group of revision to be tested as delta base
654 """Provides group of revision to be tested as delta base
637
655
638 This top level function focus on emitting groups with unique and worthwhile
656 This top level function focus on emitting groups with unique and worthwhile
639 content. See _raw_candidate_groups for details about the group order.
657 content. See _raw_candidate_groups for details about the group order.
640 """
658 """
641 # should we try to build a delta?
659 # should we try to build a delta?
642 if not (len(revlog) and revlog._storedeltachains):
660 if not (len(revlog) and revlog._storedeltachains):
643 yield None
661 yield None
644 return
662 return
645
663
646 deltalength = revlog.length
664 deltalength = revlog.length
647 deltaparent = revlog.deltaparent
665 deltaparent = revlog.deltaparent
648 sparse = revlog._sparserevlog
666 sparse = revlog._sparserevlog
649 good = None
667 good = None
650
668
651 deltas_limit = textlen * LIMIT_DELTA2TEXT
669 deltas_limit = textlen * LIMIT_DELTA2TEXT
652
670
653 tested = {nullrev}
671 tested = {nullrev}
654 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
672 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
655 while True:
673 while True:
656 temptative = candidates.send(good)
674 temptative = candidates.send(good)
657 if temptative is None:
675 if temptative is None:
658 break
676 break
659 group = []
677 group = []
660 for rev in temptative:
678 for rev in temptative:
661 # skip over empty delta (no need to include them in a chain)
679 # skip over empty delta (no need to include them in a chain)
662 while revlog._generaldelta and not (
680 while revlog._generaldelta and not (
663 rev == nullrev or rev in tested or deltalength(rev)
681 rev == nullrev or rev in tested or deltalength(rev)
664 ):
682 ):
665 tested.add(rev)
683 tested.add(rev)
666 rev = deltaparent(rev)
684 rev = deltaparent(rev)
667 # no need to try a delta against nullrev, this will be done as a
685 # no need to try a delta against nullrev, this will be done as a
668 # last resort.
686 # last resort.
669 if rev == nullrev:
687 if rev == nullrev:
670 continue
688 continue
671 # filter out revision we tested already
689 # filter out revision we tested already
672 if rev in tested:
690 if rev in tested:
673 continue
691 continue
674 tested.add(rev)
692 tested.add(rev)
675 # filter out delta base that will never produce good delta
693 # filter out delta base that will never produce good delta
676 if deltas_limit < revlog.length(rev):
694 if deltas_limit < revlog.length(rev):
677 continue
695 continue
678 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
696 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
679 continue
697 continue
680 # no delta for rawtext-changing revs (see "candelta" for why)
698 # no delta for rawtext-changing revs (see "candelta" for why)
681 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
699 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
682 continue
700 continue
683 # If we reach here, we are about to build and test a delta.
701 # If we reach here, we are about to build and test a delta.
684 # The delta building process will compute the chaininfo in all
702 # The delta building process will compute the chaininfo in all
685 # case, since that computation is cached, it is fine to access it
703 # case, since that computation is cached, it is fine to access it
686 # here too.
704 # here too.
687 chainlen, chainsize = revlog._chaininfo(rev)
705 chainlen, chainsize = revlog._chaininfo(rev)
688 # if chain will be too long, skip base
706 # if chain will be too long, skip base
689 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
707 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
690 continue
708 continue
691 # if chain already have too much data, skip base
709 # if chain already have too much data, skip base
692 if deltas_limit < chainsize:
710 if deltas_limit < chainsize:
693 continue
711 continue
694 if sparse and revlog.upperboundcomp is not None:
712 if sparse and revlog.upperboundcomp is not None:
695 maxcomp = revlog.upperboundcomp
713 maxcomp = revlog.upperboundcomp
696 basenotsnap = (p1, p2, nullrev)
714 basenotsnap = (p1, p2, nullrev)
697 if rev not in basenotsnap and revlog.issnapshot(rev):
715 if rev not in basenotsnap and revlog.issnapshot(rev):
698 snapshotdepth = revlog.snapshotdepth(rev)
716 snapshotdepth = revlog.snapshotdepth(rev)
699 # If text is significantly larger than the base, we can
717 # If text is significantly larger than the base, we can
700 # expect the resulting delta to be proportional to the size
718 # expect the resulting delta to be proportional to the size
701 # difference
719 # difference
702 revsize = revlog.rawsize(rev)
720 revsize = revlog.rawsize(rev)
703 rawsizedistance = max(textlen - revsize, 0)
721 rawsizedistance = max(textlen - revsize, 0)
704 # use an estimate of the compression upper bound.
722 # use an estimate of the compression upper bound.
705 lowestrealisticdeltalen = rawsizedistance // maxcomp
723 lowestrealisticdeltalen = rawsizedistance // maxcomp
706
724
707 # check the absolute constraint on the delta size
725 # check the absolute constraint on the delta size
708 snapshotlimit = textlen >> snapshotdepth
726 snapshotlimit = textlen >> snapshotdepth
709 if snapshotlimit < lowestrealisticdeltalen:
727 if snapshotlimit < lowestrealisticdeltalen:
710 # delta lower bound is larger than accepted upper bound
728 # delta lower bound is larger than accepted upper bound
711 continue
729 continue
712
730
713 # check the relative constraint on the delta size
731 # check the relative constraint on the delta size
714 revlength = revlog.length(rev)
732 revlength = revlog.length(rev)
715 if revlength < lowestrealisticdeltalen:
733 if revlength < lowestrealisticdeltalen:
716 # delta probable lower bound is larger than target base
734 # delta probable lower bound is larger than target base
717 continue
735 continue
718
736
719 group.append(rev)
737 group.append(rev)
720 if group:
738 if group:
721 # XXX: in the sparse revlog case, group can become large,
739 # XXX: in the sparse revlog case, group can become large,
722 # impacting performances. Some bounding or slicing mecanism
740 # impacting performances. Some bounding or slicing mecanism
723 # would help to reduce this impact.
741 # would help to reduce this impact.
724 good = yield tuple(group)
742 good = yield tuple(group)
725 yield None
743 yield None
726
744
727
745
728 def _findsnapshots(revlog, cache, start_rev):
746 def _findsnapshots(revlog, cache, start_rev):
729 """find snapshot from start_rev to tip"""
747 """find snapshot from start_rev to tip"""
730 if util.safehasattr(revlog.index, b'findsnapshots'):
748 if util.safehasattr(revlog.index, b'findsnapshots'):
731 revlog.index.findsnapshots(cache, start_rev)
749 revlog.index.findsnapshots(cache, start_rev)
732 else:
750 else:
733 deltaparent = revlog.deltaparent
751 deltaparent = revlog.deltaparent
734 issnapshot = revlog.issnapshot
752 issnapshot = revlog.issnapshot
735 for rev in revlog.revs(start_rev):
753 for rev in revlog.revs(start_rev):
736 if issnapshot(rev):
754 if issnapshot(rev):
737 cache[deltaparent(rev)].append(rev)
755 cache[deltaparent(rev)].append(rev)
738
756
739
757
740 def _refinedgroups(revlog, p1, p2, cachedelta):
758 def _refinedgroups(revlog, p1, p2, cachedelta):
741 good = None
759 good = None
742 # First we try to reuse a the delta contained in the bundle.
760 # First we try to reuse a the delta contained in the bundle.
743 # (or from the source revlog)
761 # (or from the source revlog)
744 #
762 #
745 # This logic only applies to general delta repositories and can be disabled
763 # This logic only applies to general delta repositories and can be disabled
746 # through configuration. Disabling reuse source delta is useful when
764 # through configuration. Disabling reuse source delta is useful when
747 # we want to make sure we recomputed "optimal" deltas.
765 # we want to make sure we recomputed "optimal" deltas.
748 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
766 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
749 # Assume what we received from the server is a good choice
767 # Assume what we received from the server is a good choice
750 # build delta will reuse the cache
768 # build delta will reuse the cache
751 good = yield (cachedelta[0],)
769 good = yield (cachedelta[0],)
752 if good is not None:
770 if good is not None:
753 yield None
771 yield None
754 return
772 return
755 snapshots = collections.defaultdict(list)
773 snapshots = collections.defaultdict(list)
756 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
774 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
757 good = yield candidates
775 good = yield candidates
758 if good is not None:
776 if good is not None:
759 break
777 break
760
778
761 # If sparse revlog is enabled, we can try to refine the available deltas
779 # If sparse revlog is enabled, we can try to refine the available deltas
762 if not revlog._sparserevlog:
780 if not revlog._sparserevlog:
763 yield None
781 yield None
764 return
782 return
765
783
766 # if we have a refinable value, try to refine it
784 # if we have a refinable value, try to refine it
767 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
785 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
768 # refine snapshot down
786 # refine snapshot down
769 previous = None
787 previous = None
770 while previous != good:
788 while previous != good:
771 previous = good
789 previous = good
772 base = revlog.deltaparent(good)
790 base = revlog.deltaparent(good)
773 if base == nullrev:
791 if base == nullrev:
774 break
792 break
775 good = yield (base,)
793 good = yield (base,)
776 # refine snapshot up
794 # refine snapshot up
777 if not snapshots:
795 if not snapshots:
778 _findsnapshots(revlog, snapshots, good + 1)
796 _findsnapshots(revlog, snapshots, good + 1)
779 previous = None
797 previous = None
780 while good != previous:
798 while good != previous:
781 previous = good
799 previous = good
782 children = tuple(sorted(c for c in snapshots[good]))
800 children = tuple(sorted(c for c in snapshots[good]))
783 good = yield children
801 good = yield children
784
802
785 # we have found nothing
803 # we have found nothing
786 yield None
804 yield None
787
805
788
806
789 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
807 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
790 """Provides group of revision to be tested as delta base
808 """Provides group of revision to be tested as delta base
791
809
792 This lower level function focus on emitting delta theorically interresting
810 This lower level function focus on emitting delta theorically interresting
793 without looking it any practical details.
811 without looking it any practical details.
794
812
795 The group order aims at providing fast or small candidates first.
813 The group order aims at providing fast or small candidates first.
796 """
814 """
797 gdelta = revlog._generaldelta
815 gdelta = revlog._generaldelta
798 # gate sparse behind general-delta because of issue6056
816 # gate sparse behind general-delta because of issue6056
799 sparse = gdelta and revlog._sparserevlog
817 sparse = gdelta and revlog._sparserevlog
800 curr = len(revlog)
818 curr = len(revlog)
801 prev = curr - 1
819 prev = curr - 1
802 deltachain = lambda rev: revlog._deltachain(rev)[0]
820 deltachain = lambda rev: revlog._deltachain(rev)[0]
803
821
804 if gdelta:
822 if gdelta:
805 # exclude already lazy tested base if any
823 # exclude already lazy tested base if any
806 parents = [p for p in (p1, p2) if p != nullrev]
824 parents = [p for p in (p1, p2) if p != nullrev]
807
825
808 if not revlog._deltabothparents and len(parents) == 2:
826 if not revlog._deltabothparents and len(parents) == 2:
809 parents.sort()
827 parents.sort()
810 # To minimize the chance of having to build a fulltext,
828 # To minimize the chance of having to build a fulltext,
811 # pick first whichever parent is closest to us (max rev)
829 # pick first whichever parent is closest to us (max rev)
812 yield (parents[1],)
830 yield (parents[1],)
813 # then the other one (min rev) if the first did not fit
831 # then the other one (min rev) if the first did not fit
814 yield (parents[0],)
832 yield (parents[0],)
815 elif len(parents) > 0:
833 elif len(parents) > 0:
816 # Test all parents (1 or 2), and keep the best candidate
834 # Test all parents (1 or 2), and keep the best candidate
817 yield parents
835 yield parents
818
836
819 if sparse and parents:
837 if sparse and parents:
820 if snapshots is None:
838 if snapshots is None:
821 # map: base-rev: snapshot-rev
839 # map: base-rev: snapshot-rev
822 snapshots = collections.defaultdict(list)
840 snapshots = collections.defaultdict(list)
823 # See if we can use an existing snapshot in the parent chains to use as
841 # See if we can use an existing snapshot in the parent chains to use as
824 # a base for a new intermediate-snapshot
842 # a base for a new intermediate-snapshot
825 #
843 #
826 # search for snapshot in parents delta chain
844 # search for snapshot in parents delta chain
827 # map: snapshot-level: snapshot-rev
845 # map: snapshot-level: snapshot-rev
828 parents_snaps = collections.defaultdict(set)
846 parents_snaps = collections.defaultdict(set)
829 candidate_chains = [deltachain(p) for p in parents]
847 candidate_chains = [deltachain(p) for p in parents]
830 for chain in candidate_chains:
848 for chain in candidate_chains:
831 for idx, s in enumerate(chain):
849 for idx, s in enumerate(chain):
832 if not revlog.issnapshot(s):
850 if not revlog.issnapshot(s):
833 break
851 break
834 parents_snaps[idx].add(s)
852 parents_snaps[idx].add(s)
835 snapfloor = min(parents_snaps[0]) + 1
853 snapfloor = min(parents_snaps[0]) + 1
836 _findsnapshots(revlog, snapshots, snapfloor)
854 _findsnapshots(revlog, snapshots, snapfloor)
837 # search for the highest "unrelated" revision
855 # search for the highest "unrelated" revision
838 #
856 #
839 # Adding snapshots used by "unrelated" revision increase the odd we
857 # Adding snapshots used by "unrelated" revision increase the odd we
840 # reuse an independant, yet better snapshot chain.
858 # reuse an independant, yet better snapshot chain.
841 #
859 #
842 # XXX instead of building a set of revisions, we could lazily enumerate
860 # XXX instead of building a set of revisions, we could lazily enumerate
843 # over the chains. That would be more efficient, however we stick to
861 # over the chains. That would be more efficient, however we stick to
844 # simple code for now.
862 # simple code for now.
845 all_revs = set()
863 all_revs = set()
846 for chain in candidate_chains:
864 for chain in candidate_chains:
847 all_revs.update(chain)
865 all_revs.update(chain)
848 other = None
866 other = None
849 for r in revlog.revs(prev, snapfloor):
867 for r in revlog.revs(prev, snapfloor):
850 if r not in all_revs:
868 if r not in all_revs:
851 other = r
869 other = r
852 break
870 break
853 if other is not None:
871 if other is not None:
854 # To avoid unfair competition, we won't use unrelated intermediate
872 # To avoid unfair competition, we won't use unrelated intermediate
855 # snapshot that are deeper than the ones from the parent delta
873 # snapshot that are deeper than the ones from the parent delta
856 # chain.
874 # chain.
857 max_depth = max(parents_snaps.keys())
875 max_depth = max(parents_snaps.keys())
858 chain = deltachain(other)
876 chain = deltachain(other)
859 for idx, s in enumerate(chain):
877 for idx, s in enumerate(chain):
860 if s < snapfloor:
878 if s < snapfloor:
861 continue
879 continue
862 if max_depth < idx:
880 if max_depth < idx:
863 break
881 break
864 if not revlog.issnapshot(s):
882 if not revlog.issnapshot(s):
865 break
883 break
866 parents_snaps[idx].add(s)
884 parents_snaps[idx].add(s)
867 # Test them as possible intermediate snapshot base
885 # Test them as possible intermediate snapshot base
868 # We test them from highest to lowest level. High level one are more
886 # We test them from highest to lowest level. High level one are more
869 # likely to result in small delta
887 # likely to result in small delta
870 floor = None
888 floor = None
871 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
889 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
872 siblings = set()
890 siblings = set()
873 for s in snaps:
891 for s in snaps:
874 siblings.update(snapshots[s])
892 siblings.update(snapshots[s])
875 # Before considering making a new intermediate snapshot, we check
893 # Before considering making a new intermediate snapshot, we check
876 # if an existing snapshot, children of base we consider, would be
894 # if an existing snapshot, children of base we consider, would be
877 # suitable.
895 # suitable.
878 #
896 #
879 # It give a change to reuse a delta chain "unrelated" to the
897 # It give a change to reuse a delta chain "unrelated" to the
880 # current revision instead of starting our own. Without such
898 # current revision instead of starting our own. Without such
881 # re-use, topological branches would keep reopening new chains.
899 # re-use, topological branches would keep reopening new chains.
882 # Creating more and more snapshot as the repository grow.
900 # Creating more and more snapshot as the repository grow.
883
901
884 if floor is not None:
902 if floor is not None:
885 # We only do this for siblings created after the one in our
903 # We only do this for siblings created after the one in our
886 # parent's delta chain. Those created before has less chances
904 # parent's delta chain. Those created before has less chances
887 # to be valid base since our ancestors had to create a new
905 # to be valid base since our ancestors had to create a new
888 # snapshot.
906 # snapshot.
889 siblings = [r for r in siblings if floor < r]
907 siblings = [r for r in siblings if floor < r]
890 yield tuple(sorted(siblings))
908 yield tuple(sorted(siblings))
891 # then test the base from our parent's delta chain.
909 # then test the base from our parent's delta chain.
892 yield tuple(sorted(snaps))
910 yield tuple(sorted(snaps))
893 floor = min(snaps)
911 floor = min(snaps)
894 # No suitable base found in the parent chain, search if any full
912 # No suitable base found in the parent chain, search if any full
895 # snapshots emitted since parent's base would be a suitable base for an
913 # snapshots emitted since parent's base would be a suitable base for an
896 # intermediate snapshot.
914 # intermediate snapshot.
897 #
915 #
898 # It give a chance to reuse a delta chain unrelated to the current
916 # It give a chance to reuse a delta chain unrelated to the current
899 # revisions instead of starting our own. Without such re-use,
917 # revisions instead of starting our own. Without such re-use,
900 # topological branches would keep reopening new full chains. Creating
918 # topological branches would keep reopening new full chains. Creating
901 # more and more snapshot as the repository grow.
919 # more and more snapshot as the repository grow.
902 yield tuple(snapshots[nullrev])
920 yield tuple(snapshots[nullrev])
903
921
904 if not sparse:
922 if not sparse:
905 # other approach failed try against prev to hopefully save us a
923 # other approach failed try against prev to hopefully save us a
906 # fulltext.
924 # fulltext.
907 yield (prev,)
925 yield (prev,)
908
926
909
927
910 class deltacomputer(object):
928 class deltacomputer(object):
911 def __init__(self, revlog):
929 def __init__(self, revlog):
912 self.revlog = revlog
930 self.revlog = revlog
913
931
914 def buildtext(self, revinfo, fh):
932 def buildtext(self, revinfo, fh):
915 """Builds a fulltext version of a revision
933 """Builds a fulltext version of a revision
916
934
917 revinfo: _revisioninfo instance that contains all needed info
935 revinfo: _revisioninfo instance that contains all needed info
918 fh: file handle to either the .i or the .d revlog file,
936 fh: file handle to either the .i or the .d revlog file,
919 depending on whether it is inlined or not
937 depending on whether it is inlined or not
920 """
938 """
921 btext = revinfo.btext
939 btext = revinfo.btext
922 if btext[0] is not None:
940 if btext[0] is not None:
923 return btext[0]
941 return btext[0]
924
942
925 revlog = self.revlog
943 revlog = self.revlog
926 cachedelta = revinfo.cachedelta
944 cachedelta = revinfo.cachedelta
927 baserev = cachedelta[0]
945 baserev = cachedelta[0]
928 delta = cachedelta[1]
946 delta = cachedelta[1]
929
947
930 fulltext = btext[0] = _textfromdelta(
948 fulltext = btext[0] = _textfromdelta(
931 fh,
949 fh,
932 revlog,
950 revlog,
933 baserev,
951 baserev,
934 delta,
952 delta,
935 revinfo.p1,
953 revinfo.p1,
936 revinfo.p2,
954 revinfo.p2,
937 revinfo.flags,
955 revinfo.flags,
938 revinfo.node,
956 revinfo.node,
939 )
957 )
940 return fulltext
958 return fulltext
941
959
942 def _builddeltadiff(self, base, revinfo, fh):
960 def _builddeltadiff(self, base, revinfo, fh):
943 revlog = self.revlog
961 revlog = self.revlog
944 t = self.buildtext(revinfo, fh)
962 t = self.buildtext(revinfo, fh)
945 if revlog.iscensored(base):
963 if revlog.iscensored(base):
946 # deltas based on a censored revision must replace the
964 # deltas based on a censored revision must replace the
947 # full content in one patch, so delta works everywhere
965 # full content in one patch, so delta works everywhere
948 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
966 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
949 delta = header + t
967 delta = header + t
950 else:
968 else:
951 ptext = revlog.rawdata(base, _df=fh)
969 ptext = revlog.rawdata(base, _df=fh)
952 delta = mdiff.textdiff(ptext, t)
970 delta = mdiff.textdiff(ptext, t)
953
971
954 return delta
972 return delta
955
973
956 def _builddeltainfo(self, revinfo, base, fh):
974 def _builddeltainfo(self, revinfo, base, fh):
957 # can we use the cached delta?
975 # can we use the cached delta?
958 revlog = self.revlog
976 revlog = self.revlog
959 chainbase = revlog.chainbase(base)
977 chainbase = revlog.chainbase(base)
960 if revlog._generaldelta:
978 if revlog._generaldelta:
961 deltabase = base
979 deltabase = base
962 else:
980 else:
963 deltabase = chainbase
981 deltabase = chainbase
964 snapshotdepth = None
982 snapshotdepth = None
965 if revlog._sparserevlog and deltabase == nullrev:
983 if revlog._sparserevlog and deltabase == nullrev:
966 snapshotdepth = 0
984 snapshotdepth = 0
967 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
985 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
968 # A delta chain should always be one full snapshot,
986 # A delta chain should always be one full snapshot,
969 # zero or more semi-snapshots, and zero or more deltas
987 # zero or more semi-snapshots, and zero or more deltas
970 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
988 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
971 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
989 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
972 snapshotdepth = len(revlog._deltachain(deltabase)[0])
990 snapshotdepth = len(revlog._deltachain(deltabase)[0])
973 delta = None
991 delta = None
974 if revinfo.cachedelta:
992 if revinfo.cachedelta:
975 cachebase, cachediff = revinfo.cachedelta
993 cachebase, cachediff = revinfo.cachedelta
976 # check if the diff still apply
994 # check if the diff still apply
977 currentbase = cachebase
995 currentbase = cachebase
978 while (
996 while (
979 currentbase != nullrev
997 currentbase != nullrev
980 and currentbase != base
998 and currentbase != base
981 and self.revlog.length(currentbase) == 0
999 and self.revlog.length(currentbase) == 0
982 ):
1000 ):
983 currentbase = self.revlog.deltaparent(currentbase)
1001 currentbase = self.revlog.deltaparent(currentbase)
984 if self.revlog._lazydelta and currentbase == base:
1002 if self.revlog._lazydelta and currentbase == base:
985 delta = revinfo.cachedelta[1]
1003 delta = revinfo.cachedelta[1]
986 if delta is None:
1004 if delta is None:
987 delta = self._builddeltadiff(base, revinfo, fh)
1005 delta = self._builddeltadiff(base, revinfo, fh)
988 # snapshotdept need to be neither None nor 0 level snapshot
1006 # snapshotdept need to be neither None nor 0 level snapshot
989 if revlog.upperboundcomp is not None and snapshotdepth:
1007 if revlog.upperboundcomp is not None and snapshotdepth:
990 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1008 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
991 snapshotlimit = revinfo.textlen >> snapshotdepth
1009 snapshotlimit = revinfo.textlen >> snapshotdepth
992 if snapshotlimit < lowestrealisticdeltalen:
1010 if snapshotlimit < lowestrealisticdeltalen:
993 return None
1011 return None
994 if revlog.length(base) < lowestrealisticdeltalen:
1012 if revlog.length(base) < lowestrealisticdeltalen:
995 return None
1013 return None
996 header, data = revlog.compress(delta)
1014 header, data = revlog.compress(delta)
997 deltalen = len(header) + len(data)
1015 deltalen = len(header) + len(data)
998 offset = revlog.end(len(revlog) - 1)
1016 offset = revlog.end(len(revlog) - 1)
999 dist = deltalen + offset - revlog.start(chainbase)
1017 dist = deltalen + offset - revlog.start(chainbase)
1000 chainlen, compresseddeltalen = revlog._chaininfo(base)
1018 chainlen, compresseddeltalen = revlog._chaininfo(base)
1001 chainlen += 1
1019 chainlen += 1
1002 compresseddeltalen += deltalen
1020 compresseddeltalen += deltalen
1003
1021
1004 return _deltainfo(
1022 return _deltainfo(
1005 dist,
1023 dist,
1006 deltalen,
1024 deltalen,
1007 (header, data),
1025 (header, data),
1008 deltabase,
1026 deltabase,
1009 chainbase,
1027 chainbase,
1010 chainlen,
1028 chainlen,
1011 compresseddeltalen,
1029 compresseddeltalen,
1012 snapshotdepth,
1030 snapshotdepth,
1013 )
1031 )
1014
1032
1015 def _fullsnapshotinfo(self, fh, revinfo):
1033 def _fullsnapshotinfo(self, fh, revinfo):
1016 curr = len(self.revlog)
1034 curr = len(self.revlog)
1017 rawtext = self.buildtext(revinfo, fh)
1035 rawtext = self.buildtext(revinfo, fh)
1018 data = self.revlog.compress(rawtext)
1036 data = self.revlog.compress(rawtext)
1019 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1037 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1020 deltabase = chainbase = curr
1038 deltabase = chainbase = curr
1021 snapshotdepth = 0
1039 snapshotdepth = 0
1022 chainlen = 1
1040 chainlen = 1
1023
1041
1024 return _deltainfo(
1042 return _deltainfo(
1025 dist,
1043 dist,
1026 deltalen,
1044 deltalen,
1027 data,
1045 data,
1028 deltabase,
1046 deltabase,
1029 chainbase,
1047 chainbase,
1030 chainlen,
1048 chainlen,
1031 compresseddeltalen,
1049 compresseddeltalen,
1032 snapshotdepth,
1050 snapshotdepth,
1033 )
1051 )
1034
1052
1035 def finddeltainfo(self, revinfo, fh):
1053 def finddeltainfo(self, revinfo, fh):
1036 """Find an acceptable delta against a candidate revision
1054 """Find an acceptable delta against a candidate revision
1037
1055
1038 revinfo: information about the revision (instance of _revisioninfo)
1056 revinfo: information about the revision (instance of _revisioninfo)
1039 fh: file handle to either the .i or the .d revlog file,
1057 fh: file handle to either the .i or the .d revlog file,
1040 depending on whether it is inlined or not
1058 depending on whether it is inlined or not
1041
1059
1042 Returns the first acceptable candidate revision, as ordered by
1060 Returns the first acceptable candidate revision, as ordered by
1043 _candidategroups
1061 _candidategroups
1044
1062
1045 If no suitable deltabase is found, we return delta info for a full
1063 If no suitable deltabase is found, we return delta info for a full
1046 snapshot.
1064 snapshot.
1047 """
1065 """
1048 if not revinfo.textlen:
1066 if not revinfo.textlen:
1049 return self._fullsnapshotinfo(fh, revinfo)
1067 return self._fullsnapshotinfo(fh, revinfo)
1050
1068
1051 # no delta for flag processor revision (see "candelta" for why)
1069 # no delta for flag processor revision (see "candelta" for why)
1052 # not calling candelta since only one revision needs test, also to
1070 # not calling candelta since only one revision needs test, also to
1053 # avoid overhead fetching flags again.
1071 # avoid overhead fetching flags again.
1054 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1072 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1055 return self._fullsnapshotinfo(fh, revinfo)
1073 return self._fullsnapshotinfo(fh, revinfo)
1056
1074
1057 cachedelta = revinfo.cachedelta
1075 cachedelta = revinfo.cachedelta
1058 p1 = revinfo.p1
1076 p1 = revinfo.p1
1059 p2 = revinfo.p2
1077 p2 = revinfo.p2
1060 revlog = self.revlog
1078 revlog = self.revlog
1061
1079
1062 deltainfo = None
1080 deltainfo = None
1063 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1081 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1064 groups = _candidategroups(
1082 groups = _candidategroups(
1065 self.revlog, revinfo.textlen, p1r, p2r, cachedelta
1083 self.revlog, revinfo.textlen, p1r, p2r, cachedelta
1066 )
1084 )
1067 candidaterevs = next(groups)
1085 candidaterevs = next(groups)
1068 while candidaterevs is not None:
1086 while candidaterevs is not None:
1069 nominateddeltas = []
1087 nominateddeltas = []
1070 if deltainfo is not None:
1088 if deltainfo is not None:
1071 # if we already found a good delta,
1089 # if we already found a good delta,
1072 # challenge it against refined candidates
1090 # challenge it against refined candidates
1073 nominateddeltas.append(deltainfo)
1091 nominateddeltas.append(deltainfo)
1074 for candidaterev in candidaterevs:
1092 for candidaterev in candidaterevs:
1075 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1093 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1076 if candidatedelta is not None:
1094 if candidatedelta is not None:
1077 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1095 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1078 nominateddeltas.append(candidatedelta)
1096 nominateddeltas.append(candidatedelta)
1079 if nominateddeltas:
1097 if nominateddeltas:
1080 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1098 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1081 if deltainfo is not None:
1099 if deltainfo is not None:
1082 candidaterevs = groups.send(deltainfo.base)
1100 candidaterevs = groups.send(deltainfo.base)
1083 else:
1101 else:
1084 candidaterevs = next(groups)
1102 candidaterevs = next(groups)
1085
1103
1086 if deltainfo is None:
1104 if deltainfo is None:
1087 deltainfo = self._fullsnapshotinfo(fh, revinfo)
1105 deltainfo = self._fullsnapshotinfo(fh, revinfo)
1088 return deltainfo
1106 return deltainfo
General Comments 0
You need to be logged in to leave comments. Login now