##// END OF EJS Templates
revlog: do not call Rust code if the index is not compatible with it...
marmoute -
r48043:6be2a7ca default
parent child Browse files
Show More
@@ -1,3445 +1,3445 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 CHANGELOGV2,
38 CHANGELOGV2,
39 COMP_MODE_DEFAULT,
39 COMP_MODE_DEFAULT,
40 COMP_MODE_INLINE,
40 COMP_MODE_INLINE,
41 COMP_MODE_PLAIN,
41 COMP_MODE_PLAIN,
42 FEATURES_BY_VERSION,
42 FEATURES_BY_VERSION,
43 FLAG_GENERALDELTA,
43 FLAG_GENERALDELTA,
44 FLAG_INLINE_DATA,
44 FLAG_INLINE_DATA,
45 INDEX_HEADER,
45 INDEX_HEADER,
46 KIND_CHANGELOG,
46 KIND_CHANGELOG,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 SUPPORTED_FLAGS,
55 SUPPORTED_FLAGS,
56 )
56 )
57 from .revlogutils.flagutil import (
57 from .revlogutils.flagutil import (
58 REVIDX_DEFAULT_FLAGS,
58 REVIDX_DEFAULT_FLAGS,
59 REVIDX_ELLIPSIS,
59 REVIDX_ELLIPSIS,
60 REVIDX_EXTSTORED,
60 REVIDX_EXTSTORED,
61 REVIDX_FLAGS_ORDER,
61 REVIDX_FLAGS_ORDER,
62 REVIDX_HASCOPIESINFO,
62 REVIDX_HASCOPIESINFO,
63 REVIDX_ISCENSORED,
63 REVIDX_ISCENSORED,
64 REVIDX_RAWTEXT_CHANGING_FLAGS,
64 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 )
65 )
66 from .thirdparty import attr
66 from .thirdparty import attr
67 from . import (
67 from . import (
68 ancestor,
68 ancestor,
69 dagop,
69 dagop,
70 error,
70 error,
71 mdiff,
71 mdiff,
72 policy,
72 policy,
73 pycompat,
73 pycompat,
74 templatefilters,
74 templatefilters,
75 util,
75 util,
76 )
76 )
77 from .interfaces import (
77 from .interfaces import (
78 repository,
78 repository,
79 util as interfaceutil,
79 util as interfaceutil,
80 )
80 )
81 from .revlogutils import (
81 from .revlogutils import (
82 deltas as deltautil,
82 deltas as deltautil,
83 docket as docketutil,
83 docket as docketutil,
84 flagutil,
84 flagutil,
85 nodemap as nodemaputil,
85 nodemap as nodemaputil,
86 revlogv0,
86 revlogv0,
87 sidedata as sidedatautil,
87 sidedata as sidedatautil,
88 )
88 )
89 from .utils import (
89 from .utils import (
90 storageutil,
90 storageutil,
91 stringutil,
91 stringutil,
92 )
92 )
93
93
94 # blanked usage of all the name to prevent pyflakes constraints
94 # blanked usage of all the name to prevent pyflakes constraints
95 # We need these name available in the module for extensions.
95 # We need these name available in the module for extensions.
96
96
97 REVLOGV0
97 REVLOGV0
98 REVLOGV1
98 REVLOGV1
99 REVLOGV2
99 REVLOGV2
100 FLAG_INLINE_DATA
100 FLAG_INLINE_DATA
101 FLAG_GENERALDELTA
101 FLAG_GENERALDELTA
102 REVLOG_DEFAULT_FLAGS
102 REVLOG_DEFAULT_FLAGS
103 REVLOG_DEFAULT_FORMAT
103 REVLOG_DEFAULT_FORMAT
104 REVLOG_DEFAULT_VERSION
104 REVLOG_DEFAULT_VERSION
105 REVLOGV1_FLAGS
105 REVLOGV1_FLAGS
106 REVLOGV2_FLAGS
106 REVLOGV2_FLAGS
107 REVIDX_ISCENSORED
107 REVIDX_ISCENSORED
108 REVIDX_ELLIPSIS
108 REVIDX_ELLIPSIS
109 REVIDX_HASCOPIESINFO
109 REVIDX_HASCOPIESINFO
110 REVIDX_EXTSTORED
110 REVIDX_EXTSTORED
111 REVIDX_DEFAULT_FLAGS
111 REVIDX_DEFAULT_FLAGS
112 REVIDX_FLAGS_ORDER
112 REVIDX_FLAGS_ORDER
113 REVIDX_RAWTEXT_CHANGING_FLAGS
113 REVIDX_RAWTEXT_CHANGING_FLAGS
114
114
115 parsers = policy.importmod('parsers')
115 parsers = policy.importmod('parsers')
116 rustancestor = policy.importrust('ancestor')
116 rustancestor = policy.importrust('ancestor')
117 rustdagop = policy.importrust('dagop')
117 rustdagop = policy.importrust('dagop')
118 rustrevlog = policy.importrust('revlog')
118 rustrevlog = policy.importrust('revlog')
119
119
120 # Aliased for performance.
120 # Aliased for performance.
121 _zlibdecompress = zlib.decompress
121 _zlibdecompress = zlib.decompress
122
122
123 # max size of revlog with inline data
123 # max size of revlog with inline data
124 _maxinline = 131072
124 _maxinline = 131072
125 _chunksize = 1048576
125 _chunksize = 1048576
126
126
127 # Flag processors for REVIDX_ELLIPSIS.
127 # Flag processors for REVIDX_ELLIPSIS.
128 def ellipsisreadprocessor(rl, text):
128 def ellipsisreadprocessor(rl, text):
129 return text, False
129 return text, False
130
130
131
131
132 def ellipsiswriteprocessor(rl, text):
132 def ellipsiswriteprocessor(rl, text):
133 return text, False
133 return text, False
134
134
135
135
136 def ellipsisrawprocessor(rl, text):
136 def ellipsisrawprocessor(rl, text):
137 return False
137 return False
138
138
139
139
140 ellipsisprocessor = (
140 ellipsisprocessor = (
141 ellipsisreadprocessor,
141 ellipsisreadprocessor,
142 ellipsiswriteprocessor,
142 ellipsiswriteprocessor,
143 ellipsisrawprocessor,
143 ellipsisrawprocessor,
144 )
144 )
145
145
146
146
147 def offset_type(offset, type):
147 def offset_type(offset, type):
148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
149 raise ValueError(b'unknown revlog index flags')
149 raise ValueError(b'unknown revlog index flags')
150 return int(int(offset) << 16 | type)
150 return int(int(offset) << 16 | type)
151
151
152
152
153 def _verify_revision(rl, skipflags, state, node):
153 def _verify_revision(rl, skipflags, state, node):
154 """Verify the integrity of the given revlog ``node`` while providing a hook
154 """Verify the integrity of the given revlog ``node`` while providing a hook
155 point for extensions to influence the operation."""
155 point for extensions to influence the operation."""
156 if skipflags:
156 if skipflags:
157 state[b'skipread'].add(node)
157 state[b'skipread'].add(node)
158 else:
158 else:
159 # Side-effect: read content and verify hash.
159 # Side-effect: read content and verify hash.
160 rl.revision(node)
160 rl.revision(node)
161
161
162
162
163 # True if a fast implementation for persistent-nodemap is available
163 # True if a fast implementation for persistent-nodemap is available
164 #
164 #
165 # We also consider we have a "fast" implementation in "pure" python because
165 # We also consider we have a "fast" implementation in "pure" python because
166 # people using pure don't really have performance consideration (and a
166 # people using pure don't really have performance consideration (and a
167 # wheelbarrow of other slowness source)
167 # wheelbarrow of other slowness source)
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
169 parsers, 'BaseIndexObject'
169 parsers, 'BaseIndexObject'
170 )
170 )
171
171
172
172
173 @attr.s(slots=True, frozen=True)
173 @attr.s(slots=True, frozen=True)
174 class _revisioninfo(object):
174 class _revisioninfo(object):
175 """Information about a revision that allows building its fulltext
175 """Information about a revision that allows building its fulltext
176 node: expected hash of the revision
176 node: expected hash of the revision
177 p1, p2: parent revs of the revision
177 p1, p2: parent revs of the revision
178 btext: built text cache consisting of a one-element list
178 btext: built text cache consisting of a one-element list
179 cachedelta: (baserev, uncompressed_delta) or None
179 cachedelta: (baserev, uncompressed_delta) or None
180 flags: flags associated to the revision storage
180 flags: flags associated to the revision storage
181
181
182 One of btext[0] or cachedelta must be set.
182 One of btext[0] or cachedelta must be set.
183 """
183 """
184
184
185 node = attr.ib()
185 node = attr.ib()
186 p1 = attr.ib()
186 p1 = attr.ib()
187 p2 = attr.ib()
187 p2 = attr.ib()
188 btext = attr.ib()
188 btext = attr.ib()
189 textlen = attr.ib()
189 textlen = attr.ib()
190 cachedelta = attr.ib()
190 cachedelta = attr.ib()
191 flags = attr.ib()
191 flags = attr.ib()
192
192
193
193
194 @interfaceutil.implementer(repository.irevisiondelta)
194 @interfaceutil.implementer(repository.irevisiondelta)
195 @attr.s(slots=True)
195 @attr.s(slots=True)
196 class revlogrevisiondelta(object):
196 class revlogrevisiondelta(object):
197 node = attr.ib()
197 node = attr.ib()
198 p1node = attr.ib()
198 p1node = attr.ib()
199 p2node = attr.ib()
199 p2node = attr.ib()
200 basenode = attr.ib()
200 basenode = attr.ib()
201 flags = attr.ib()
201 flags = attr.ib()
202 baserevisionsize = attr.ib()
202 baserevisionsize = attr.ib()
203 revision = attr.ib()
203 revision = attr.ib()
204 delta = attr.ib()
204 delta = attr.ib()
205 sidedata = attr.ib()
205 sidedata = attr.ib()
206 protocol_flags = attr.ib()
206 protocol_flags = attr.ib()
207 linknode = attr.ib(default=None)
207 linknode = attr.ib(default=None)
208
208
209
209
210 @interfaceutil.implementer(repository.iverifyproblem)
210 @interfaceutil.implementer(repository.iverifyproblem)
211 @attr.s(frozen=True)
211 @attr.s(frozen=True)
212 class revlogproblem(object):
212 class revlogproblem(object):
213 warning = attr.ib(default=None)
213 warning = attr.ib(default=None)
214 error = attr.ib(default=None)
214 error = attr.ib(default=None)
215 node = attr.ib(default=None)
215 node = attr.ib(default=None)
216
216
217
217
218 def parse_index_v1(data, inline):
218 def parse_index_v1(data, inline):
219 # call the C implementation to parse the index data
219 # call the C implementation to parse the index data
220 index, cache = parsers.parse_index2(data, inline)
220 index, cache = parsers.parse_index2(data, inline)
221 return index, cache
221 return index, cache
222
222
223
223
224 def parse_index_v2(data, inline):
224 def parse_index_v2(data, inline):
225 # call the C implementation to parse the index data
225 # call the C implementation to parse the index data
226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
227 return index, cache
227 return index, cache
228
228
229
229
230 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
230 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
231
231
232 def parse_index_v1_nodemap(data, inline):
232 def parse_index_v1_nodemap(data, inline):
233 index, cache = parsers.parse_index_devel_nodemap(data, inline)
233 index, cache = parsers.parse_index_devel_nodemap(data, inline)
234 return index, cache
234 return index, cache
235
235
236
236
237 else:
237 else:
238 parse_index_v1_nodemap = None
238 parse_index_v1_nodemap = None
239
239
240
240
241 def parse_index_v1_mixed(data, inline):
241 def parse_index_v1_mixed(data, inline):
242 index, cache = parse_index_v1(data, inline)
242 index, cache = parse_index_v1(data, inline)
243 return rustrevlog.MixedIndex(index), cache
243 return rustrevlog.MixedIndex(index), cache
244
244
245
245
246 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
246 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
247 # signed integer)
247 # signed integer)
248 _maxentrysize = 0x7FFFFFFF
248 _maxentrysize = 0x7FFFFFFF
249
249
250
250
251 class revlog(object):
251 class revlog(object):
252 """
252 """
253 the underlying revision storage object
253 the underlying revision storage object
254
254
255 A revlog consists of two parts, an index and the revision data.
255 A revlog consists of two parts, an index and the revision data.
256
256
257 The index is a file with a fixed record size containing
257 The index is a file with a fixed record size containing
258 information on each revision, including its nodeid (hash), the
258 information on each revision, including its nodeid (hash), the
259 nodeids of its parents, the position and offset of its data within
259 nodeids of its parents, the position and offset of its data within
260 the data file, and the revision it's based on. Finally, each entry
260 the data file, and the revision it's based on. Finally, each entry
261 contains a linkrev entry that can serve as a pointer to external
261 contains a linkrev entry that can serve as a pointer to external
262 data.
262 data.
263
263
264 The revision data itself is a linear collection of data chunks.
264 The revision data itself is a linear collection of data chunks.
265 Each chunk represents a revision and is usually represented as a
265 Each chunk represents a revision and is usually represented as a
266 delta against the previous chunk. To bound lookup time, runs of
266 delta against the previous chunk. To bound lookup time, runs of
267 deltas are limited to about 2 times the length of the original
267 deltas are limited to about 2 times the length of the original
268 version data. This makes retrieval of a version proportional to
268 version data. This makes retrieval of a version proportional to
269 its size, or O(1) relative to the number of revisions.
269 its size, or O(1) relative to the number of revisions.
270
270
271 Both pieces of the revlog are written to in an append-only
271 Both pieces of the revlog are written to in an append-only
272 fashion, which means we never need to rewrite a file to insert or
272 fashion, which means we never need to rewrite a file to insert or
273 remove data, and can use some simple techniques to avoid the need
273 remove data, and can use some simple techniques to avoid the need
274 for locking while reading.
274 for locking while reading.
275
275
276 If checkambig, indexfile is opened with checkambig=True at
276 If checkambig, indexfile is opened with checkambig=True at
277 writing, to avoid file stat ambiguity.
277 writing, to avoid file stat ambiguity.
278
278
279 If mmaplargeindex is True, and an mmapindexthreshold is set, the
279 If mmaplargeindex is True, and an mmapindexthreshold is set, the
280 index will be mmapped rather than read if it is larger than the
280 index will be mmapped rather than read if it is larger than the
281 configured threshold.
281 configured threshold.
282
282
283 If censorable is True, the revlog can have censored revisions.
283 If censorable is True, the revlog can have censored revisions.
284
284
285 If `upperboundcomp` is not None, this is the expected maximal gain from
285 If `upperboundcomp` is not None, this is the expected maximal gain from
286 compression for the data content.
286 compression for the data content.
287
287
288 `concurrencychecker` is an optional function that receives 3 arguments: a
288 `concurrencychecker` is an optional function that receives 3 arguments: a
289 file handle, a filename, and an expected position. It should check whether
289 file handle, a filename, and an expected position. It should check whether
290 the current position in the file handle is valid, and log/warn/fail (by
290 the current position in the file handle is valid, and log/warn/fail (by
291 raising).
291 raising).
292
292
293
293
294 Internal details
294 Internal details
295 ----------------
295 ----------------
296
296
297 A large part of the revlog logic deals with revisions' "index entries", tuple
297 A large part of the revlog logic deals with revisions' "index entries", tuple
298 objects that contains the same "items" whatever the revlog version.
298 objects that contains the same "items" whatever the revlog version.
299 Different versions will have different ways of storing these items (sometimes
299 Different versions will have different ways of storing these items (sometimes
300 not having them at all), but the tuple will always be the same. New fields
300 not having them at all), but the tuple will always be the same. New fields
301 are usually added at the end to avoid breaking existing code that relies
301 are usually added at the end to avoid breaking existing code that relies
302 on the existing order. The field are defined as follows:
302 on the existing order. The field are defined as follows:
303
303
304 [0] offset:
304 [0] offset:
305 The byte index of the start of revision data chunk.
305 The byte index of the start of revision data chunk.
306 That value is shifted up by 16 bits. use "offset = field >> 16" to
306 That value is shifted up by 16 bits. use "offset = field >> 16" to
307 retrieve it.
307 retrieve it.
308
308
309 flags:
309 flags:
310 A flag field that carries special information or changes the behavior
310 A flag field that carries special information or changes the behavior
311 of the revision. (see `REVIDX_*` constants for details)
311 of the revision. (see `REVIDX_*` constants for details)
312 The flag field only occupies the first 16 bits of this field,
312 The flag field only occupies the first 16 bits of this field,
313 use "flags = field & 0xFFFF" to retrieve the value.
313 use "flags = field & 0xFFFF" to retrieve the value.
314
314
315 [1] compressed length:
315 [1] compressed length:
316 The size, in bytes, of the chunk on disk
316 The size, in bytes, of the chunk on disk
317
317
318 [2] uncompressed length:
318 [2] uncompressed length:
319 The size, in bytes, of the full revision once reconstructed.
319 The size, in bytes, of the full revision once reconstructed.
320
320
321 [3] base rev:
321 [3] base rev:
322 Either the base of the revision delta chain (without general
322 Either the base of the revision delta chain (without general
323 delta), or the base of the delta (stored in the data chunk)
323 delta), or the base of the delta (stored in the data chunk)
324 with general delta.
324 with general delta.
325
325
326 [4] link rev:
326 [4] link rev:
327 Changelog revision number of the changeset introducing this
327 Changelog revision number of the changeset introducing this
328 revision.
328 revision.
329
329
330 [5] parent 1 rev:
330 [5] parent 1 rev:
331 Revision number of the first parent
331 Revision number of the first parent
332
332
333 [6] parent 2 rev:
333 [6] parent 2 rev:
334 Revision number of the second parent
334 Revision number of the second parent
335
335
336 [7] node id:
336 [7] node id:
337 The node id of the current revision
337 The node id of the current revision
338
338
339 [8] sidedata offset:
339 [8] sidedata offset:
340 The byte index of the start of the revision's side-data chunk.
340 The byte index of the start of the revision's side-data chunk.
341
341
342 [9] sidedata chunk length:
342 [9] sidedata chunk length:
343 The size, in bytes, of the revision's side-data chunk.
343 The size, in bytes, of the revision's side-data chunk.
344
344
345 [10] data compression mode:
345 [10] data compression mode:
346 two bits that detail the way the data chunk is compressed on disk.
346 two bits that detail the way the data chunk is compressed on disk.
347 (see "COMP_MODE_*" constants for details). For revlog version 0 and
347 (see "COMP_MODE_*" constants for details). For revlog version 0 and
348 1 this will always be COMP_MODE_INLINE.
348 1 this will always be COMP_MODE_INLINE.
349
349
350 [11] side-data compression mode:
350 [11] side-data compression mode:
351 two bits that detail the way the sidedata chunk is compressed on disk.
351 two bits that detail the way the sidedata chunk is compressed on disk.
352 (see "COMP_MODE_*" constants for details)
352 (see "COMP_MODE_*" constants for details)
353 """
353 """
354
354
355 _flagserrorclass = error.RevlogError
355 _flagserrorclass = error.RevlogError
356
356
357 def __init__(
357 def __init__(
358 self,
358 self,
359 opener,
359 opener,
360 target,
360 target,
361 radix,
361 radix,
362 postfix=None, # only exist for `tmpcensored` now
362 postfix=None, # only exist for `tmpcensored` now
363 checkambig=False,
363 checkambig=False,
364 mmaplargeindex=False,
364 mmaplargeindex=False,
365 censorable=False,
365 censorable=False,
366 upperboundcomp=None,
366 upperboundcomp=None,
367 persistentnodemap=False,
367 persistentnodemap=False,
368 concurrencychecker=None,
368 concurrencychecker=None,
369 trypending=False,
369 trypending=False,
370 ):
370 ):
371 """
371 """
372 create a revlog object
372 create a revlog object
373
373
374 opener is a function that abstracts the file opening operation
374 opener is a function that abstracts the file opening operation
375 and can be used to implement COW semantics or the like.
375 and can be used to implement COW semantics or the like.
376
376
377 `target`: a (KIND, ID) tuple that identify the content stored in
377 `target`: a (KIND, ID) tuple that identify the content stored in
378 this revlog. It help the rest of the code to understand what the revlog
378 this revlog. It help the rest of the code to understand what the revlog
379 is about without having to resort to heuristic and index filename
379 is about without having to resort to heuristic and index filename
380 analysis. Note: that this must be reliably be set by normal code, but
380 analysis. Note: that this must be reliably be set by normal code, but
381 that test, debug, or performance measurement code might not set this to
381 that test, debug, or performance measurement code might not set this to
382 accurate value.
382 accurate value.
383 """
383 """
384 self.upperboundcomp = upperboundcomp
384 self.upperboundcomp = upperboundcomp
385
385
386 self.radix = radix
386 self.radix = radix
387
387
388 self._docket_file = None
388 self._docket_file = None
389 self._indexfile = None
389 self._indexfile = None
390 self._datafile = None
390 self._datafile = None
391 self._nodemap_file = None
391 self._nodemap_file = None
392 self.postfix = postfix
392 self.postfix = postfix
393 self._trypending = trypending
393 self._trypending = trypending
394 self.opener = opener
394 self.opener = opener
395 if persistentnodemap:
395 if persistentnodemap:
396 self._nodemap_file = nodemaputil.get_nodemap_file(self)
396 self._nodemap_file = nodemaputil.get_nodemap_file(self)
397
397
398 assert target[0] in ALL_KINDS
398 assert target[0] in ALL_KINDS
399 assert len(target) == 2
399 assert len(target) == 2
400 self.target = target
400 self.target = target
401 # When True, indexfile is opened with checkambig=True at writing, to
401 # When True, indexfile is opened with checkambig=True at writing, to
402 # avoid file stat ambiguity.
402 # avoid file stat ambiguity.
403 self._checkambig = checkambig
403 self._checkambig = checkambig
404 self._mmaplargeindex = mmaplargeindex
404 self._mmaplargeindex = mmaplargeindex
405 self._censorable = censorable
405 self._censorable = censorable
406 # 3-tuple of (node, rev, text) for a raw revision.
406 # 3-tuple of (node, rev, text) for a raw revision.
407 self._revisioncache = None
407 self._revisioncache = None
408 # Maps rev to chain base rev.
408 # Maps rev to chain base rev.
409 self._chainbasecache = util.lrucachedict(100)
409 self._chainbasecache = util.lrucachedict(100)
410 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
410 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
411 self._chunkcache = (0, b'')
411 self._chunkcache = (0, b'')
412 # How much data to read and cache into the raw revlog data cache.
412 # How much data to read and cache into the raw revlog data cache.
413 self._chunkcachesize = 65536
413 self._chunkcachesize = 65536
414 self._maxchainlen = None
414 self._maxchainlen = None
415 self._deltabothparents = True
415 self._deltabothparents = True
416 self.index = None
416 self.index = None
417 self._docket = None
417 self._docket = None
418 self._nodemap_docket = None
418 self._nodemap_docket = None
419 # Mapping of partial identifiers to full nodes.
419 # Mapping of partial identifiers to full nodes.
420 self._pcache = {}
420 self._pcache = {}
421 # Mapping of revision integer to full node.
421 # Mapping of revision integer to full node.
422 self._compengine = b'zlib'
422 self._compengine = b'zlib'
423 self._compengineopts = {}
423 self._compengineopts = {}
424 self._maxdeltachainspan = -1
424 self._maxdeltachainspan = -1
425 self._withsparseread = False
425 self._withsparseread = False
426 self._sparserevlog = False
426 self._sparserevlog = False
427 self.hassidedata = False
427 self.hassidedata = False
428 self._srdensitythreshold = 0.50
428 self._srdensitythreshold = 0.50
429 self._srmingapsize = 262144
429 self._srmingapsize = 262144
430
430
431 # Make copy of flag processors so each revlog instance can support
431 # Make copy of flag processors so each revlog instance can support
432 # custom flags.
432 # custom flags.
433 self._flagprocessors = dict(flagutil.flagprocessors)
433 self._flagprocessors = dict(flagutil.flagprocessors)
434
434
435 # 2-tuple of file handles being used for active writing.
435 # 2-tuple of file handles being used for active writing.
436 self._writinghandles = None
436 self._writinghandles = None
437 # prevent nesting of addgroup
437 # prevent nesting of addgroup
438 self._adding_group = None
438 self._adding_group = None
439
439
440 self._loadindex()
440 self._loadindex()
441
441
442 self._concurrencychecker = concurrencychecker
442 self._concurrencychecker = concurrencychecker
443
443
444 def _init_opts(self):
444 def _init_opts(self):
445 """process options (from above/config) to setup associated default revlog mode
445 """process options (from above/config) to setup associated default revlog mode
446
446
447 These values might be affected when actually reading on disk information.
447 These values might be affected when actually reading on disk information.
448
448
449 The relevant values are returned for use in _loadindex().
449 The relevant values are returned for use in _loadindex().
450
450
451 * newversionflags:
451 * newversionflags:
452 version header to use if we need to create a new revlog
452 version header to use if we need to create a new revlog
453
453
454 * mmapindexthreshold:
454 * mmapindexthreshold:
455 minimal index size for start to use mmap
455 minimal index size for start to use mmap
456
456
457 * force_nodemap:
457 * force_nodemap:
458 force the usage of a "development" version of the nodemap code
458 force the usage of a "development" version of the nodemap code
459 """
459 """
460 mmapindexthreshold = None
460 mmapindexthreshold = None
461 opts = self.opener.options
461 opts = self.opener.options
462
462
463 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
463 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
464 new_header = CHANGELOGV2
464 new_header = CHANGELOGV2
465 elif b'revlogv2' in opts:
465 elif b'revlogv2' in opts:
466 new_header = REVLOGV2
466 new_header = REVLOGV2
467 elif b'revlogv1' in opts:
467 elif b'revlogv1' in opts:
468 new_header = REVLOGV1 | FLAG_INLINE_DATA
468 new_header = REVLOGV1 | FLAG_INLINE_DATA
469 if b'generaldelta' in opts:
469 if b'generaldelta' in opts:
470 new_header |= FLAG_GENERALDELTA
470 new_header |= FLAG_GENERALDELTA
471 elif b'revlogv0' in self.opener.options:
471 elif b'revlogv0' in self.opener.options:
472 new_header = REVLOGV0
472 new_header = REVLOGV0
473 else:
473 else:
474 new_header = REVLOG_DEFAULT_VERSION
474 new_header = REVLOG_DEFAULT_VERSION
475
475
476 if b'chunkcachesize' in opts:
476 if b'chunkcachesize' in opts:
477 self._chunkcachesize = opts[b'chunkcachesize']
477 self._chunkcachesize = opts[b'chunkcachesize']
478 if b'maxchainlen' in opts:
478 if b'maxchainlen' in opts:
479 self._maxchainlen = opts[b'maxchainlen']
479 self._maxchainlen = opts[b'maxchainlen']
480 if b'deltabothparents' in opts:
480 if b'deltabothparents' in opts:
481 self._deltabothparents = opts[b'deltabothparents']
481 self._deltabothparents = opts[b'deltabothparents']
482 self._lazydelta = bool(opts.get(b'lazydelta', True))
482 self._lazydelta = bool(opts.get(b'lazydelta', True))
483 self._lazydeltabase = False
483 self._lazydeltabase = False
484 if self._lazydelta:
484 if self._lazydelta:
485 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
485 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
486 if b'compengine' in opts:
486 if b'compengine' in opts:
487 self._compengine = opts[b'compengine']
487 self._compengine = opts[b'compengine']
488 if b'zlib.level' in opts:
488 if b'zlib.level' in opts:
489 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
489 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
490 if b'zstd.level' in opts:
490 if b'zstd.level' in opts:
491 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
491 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
492 if b'maxdeltachainspan' in opts:
492 if b'maxdeltachainspan' in opts:
493 self._maxdeltachainspan = opts[b'maxdeltachainspan']
493 self._maxdeltachainspan = opts[b'maxdeltachainspan']
494 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
494 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
495 mmapindexthreshold = opts[b'mmapindexthreshold']
495 mmapindexthreshold = opts[b'mmapindexthreshold']
496 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
496 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
497 withsparseread = bool(opts.get(b'with-sparse-read', False))
497 withsparseread = bool(opts.get(b'with-sparse-read', False))
498 # sparse-revlog forces sparse-read
498 # sparse-revlog forces sparse-read
499 self._withsparseread = self._sparserevlog or withsparseread
499 self._withsparseread = self._sparserevlog or withsparseread
500 if b'sparse-read-density-threshold' in opts:
500 if b'sparse-read-density-threshold' in opts:
501 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
501 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
502 if b'sparse-read-min-gap-size' in opts:
502 if b'sparse-read-min-gap-size' in opts:
503 self._srmingapsize = opts[b'sparse-read-min-gap-size']
503 self._srmingapsize = opts[b'sparse-read-min-gap-size']
504 if opts.get(b'enableellipsis'):
504 if opts.get(b'enableellipsis'):
505 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
505 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
506
506
507 # revlog v0 doesn't have flag processors
507 # revlog v0 doesn't have flag processors
508 for flag, processor in pycompat.iteritems(
508 for flag, processor in pycompat.iteritems(
509 opts.get(b'flagprocessors', {})
509 opts.get(b'flagprocessors', {})
510 ):
510 ):
511 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
511 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
512
512
513 if self._chunkcachesize <= 0:
513 if self._chunkcachesize <= 0:
514 raise error.RevlogError(
514 raise error.RevlogError(
515 _(b'revlog chunk cache size %r is not greater than 0')
515 _(b'revlog chunk cache size %r is not greater than 0')
516 % self._chunkcachesize
516 % self._chunkcachesize
517 )
517 )
518 elif self._chunkcachesize & (self._chunkcachesize - 1):
518 elif self._chunkcachesize & (self._chunkcachesize - 1):
519 raise error.RevlogError(
519 raise error.RevlogError(
520 _(b'revlog chunk cache size %r is not a power of 2')
520 _(b'revlog chunk cache size %r is not a power of 2')
521 % self._chunkcachesize
521 % self._chunkcachesize
522 )
522 )
523 force_nodemap = opts.get(b'devel-force-nodemap', False)
523 force_nodemap = opts.get(b'devel-force-nodemap', False)
524 return new_header, mmapindexthreshold, force_nodemap
524 return new_header, mmapindexthreshold, force_nodemap
525
525
526 def _get_data(self, filepath, mmap_threshold, size=None):
526 def _get_data(self, filepath, mmap_threshold, size=None):
527 """return a file content with or without mmap
527 """return a file content with or without mmap
528
528
529 If the file is missing return the empty string"""
529 If the file is missing return the empty string"""
530 try:
530 try:
531 with self.opener(filepath) as fp:
531 with self.opener(filepath) as fp:
532 if mmap_threshold is not None:
532 if mmap_threshold is not None:
533 file_size = self.opener.fstat(fp).st_size
533 file_size = self.opener.fstat(fp).st_size
534 if file_size >= mmap_threshold:
534 if file_size >= mmap_threshold:
535 if size is not None:
535 if size is not None:
536 # avoid potentiel mmap crash
536 # avoid potentiel mmap crash
537 size = min(file_size, size)
537 size = min(file_size, size)
538 # TODO: should .close() to release resources without
538 # TODO: should .close() to release resources without
539 # relying on Python GC
539 # relying on Python GC
540 if size is None:
540 if size is None:
541 return util.buffer(util.mmapread(fp))
541 return util.buffer(util.mmapread(fp))
542 else:
542 else:
543 return util.buffer(util.mmapread(fp, size))
543 return util.buffer(util.mmapread(fp, size))
544 if size is None:
544 if size is None:
545 return fp.read()
545 return fp.read()
546 else:
546 else:
547 return fp.read(size)
547 return fp.read(size)
548 except IOError as inst:
548 except IOError as inst:
549 if inst.errno != errno.ENOENT:
549 if inst.errno != errno.ENOENT:
550 raise
550 raise
551 return b''
551 return b''
552
552
553 def _loadindex(self):
553 def _loadindex(self):
554
554
555 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
555 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
556
556
557 if self.postfix is not None:
557 if self.postfix is not None:
558 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
558 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
559 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
559 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
560 entry_point = b'%s.i.a' % self.radix
560 entry_point = b'%s.i.a' % self.radix
561 else:
561 else:
562 entry_point = b'%s.i' % self.radix
562 entry_point = b'%s.i' % self.radix
563
563
564 entry_data = b''
564 entry_data = b''
565 self._initempty = True
565 self._initempty = True
566 entry_data = self._get_data(entry_point, mmapindexthreshold)
566 entry_data = self._get_data(entry_point, mmapindexthreshold)
567 if len(entry_data) > 0:
567 if len(entry_data) > 0:
568 header = INDEX_HEADER.unpack(entry_data[:4])[0]
568 header = INDEX_HEADER.unpack(entry_data[:4])[0]
569 self._initempty = False
569 self._initempty = False
570 else:
570 else:
571 header = new_header
571 header = new_header
572
572
573 self._format_flags = header & ~0xFFFF
573 self._format_flags = header & ~0xFFFF
574 self._format_version = header & 0xFFFF
574 self._format_version = header & 0xFFFF
575
575
576 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
576 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
577 if supported_flags is None:
577 if supported_flags is None:
578 msg = _(b'unknown version (%d) in revlog %s')
578 msg = _(b'unknown version (%d) in revlog %s')
579 msg %= (self._format_version, self.display_id)
579 msg %= (self._format_version, self.display_id)
580 raise error.RevlogError(msg)
580 raise error.RevlogError(msg)
581 elif self._format_flags & ~supported_flags:
581 elif self._format_flags & ~supported_flags:
582 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
582 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
583 display_flag = self._format_flags >> 16
583 display_flag = self._format_flags >> 16
584 msg %= (display_flag, self._format_version, self.display_id)
584 msg %= (display_flag, self._format_version, self.display_id)
585 raise error.RevlogError(msg)
585 raise error.RevlogError(msg)
586
586
587 features = FEATURES_BY_VERSION[self._format_version]
587 features = FEATURES_BY_VERSION[self._format_version]
588 self._inline = features[b'inline'](self._format_flags)
588 self._inline = features[b'inline'](self._format_flags)
589 self._generaldelta = features[b'generaldelta'](self._format_flags)
589 self._generaldelta = features[b'generaldelta'](self._format_flags)
590 self.hassidedata = features[b'sidedata']
590 self.hassidedata = features[b'sidedata']
591
591
592 if not features[b'docket']:
592 if not features[b'docket']:
593 self._indexfile = entry_point
593 self._indexfile = entry_point
594 index_data = entry_data
594 index_data = entry_data
595 else:
595 else:
596 self._docket_file = entry_point
596 self._docket_file = entry_point
597 if self._initempty:
597 if self._initempty:
598 self._docket = docketutil.default_docket(self, header)
598 self._docket = docketutil.default_docket(self, header)
599 else:
599 else:
600 self._docket = docketutil.parse_docket(
600 self._docket = docketutil.parse_docket(
601 self, entry_data, use_pending=self._trypending
601 self, entry_data, use_pending=self._trypending
602 )
602 )
603 self._indexfile = self._docket.index_filepath()
603 self._indexfile = self._docket.index_filepath()
604 index_data = b''
604 index_data = b''
605 index_size = self._docket.index_end
605 index_size = self._docket.index_end
606 if index_size > 0:
606 if index_size > 0:
607 index_data = self._get_data(
607 index_data = self._get_data(
608 self._indexfile, mmapindexthreshold, size=index_size
608 self._indexfile, mmapindexthreshold, size=index_size
609 )
609 )
610 if len(index_data) < index_size:
610 if len(index_data) < index_size:
611 msg = _(b'too few index data for %s: got %d, expected %d')
611 msg = _(b'too few index data for %s: got %d, expected %d')
612 msg %= (self.display_id, len(index_data), index_size)
612 msg %= (self.display_id, len(index_data), index_size)
613 raise error.RevlogError(msg)
613 raise error.RevlogError(msg)
614
614
615 self._inline = False
615 self._inline = False
616 # generaldelta implied by version 2 revlogs.
616 # generaldelta implied by version 2 revlogs.
617 self._generaldelta = True
617 self._generaldelta = True
618 # the logic for persistent nodemap will be dealt with within the
618 # the logic for persistent nodemap will be dealt with within the
619 # main docket, so disable it for now.
619 # main docket, so disable it for now.
620 self._nodemap_file = None
620 self._nodemap_file = None
621
621
622 if self.postfix is None:
622 if self.postfix is None:
623 self._datafile = b'%s.d' % self.radix
623 self._datafile = b'%s.d' % self.radix
624 else:
624 else:
625 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
625 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
626
626
627 self.nodeconstants = sha1nodeconstants
627 self.nodeconstants = sha1nodeconstants
628 self.nullid = self.nodeconstants.nullid
628 self.nullid = self.nodeconstants.nullid
629
629
630 # sparse-revlog can't be on without general-delta (issue6056)
630 # sparse-revlog can't be on without general-delta (issue6056)
631 if not self._generaldelta:
631 if not self._generaldelta:
632 self._sparserevlog = False
632 self._sparserevlog = False
633
633
634 self._storedeltachains = True
634 self._storedeltachains = True
635
635
636 devel_nodemap = (
636 devel_nodemap = (
637 self._nodemap_file
637 self._nodemap_file
638 and force_nodemap
638 and force_nodemap
639 and parse_index_v1_nodemap is not None
639 and parse_index_v1_nodemap is not None
640 )
640 )
641
641
642 use_rust_index = False
642 use_rust_index = False
643 if rustrevlog is not None:
643 if rustrevlog is not None:
644 if self._nodemap_file is not None:
644 if self._nodemap_file is not None:
645 use_rust_index = True
645 use_rust_index = True
646 else:
646 else:
647 use_rust_index = self.opener.options.get(b'rust.index')
647 use_rust_index = self.opener.options.get(b'rust.index')
648
648
649 self._parse_index = parse_index_v1
649 self._parse_index = parse_index_v1
650 if self._format_version == REVLOGV0:
650 if self._format_version == REVLOGV0:
651 self._parse_index = revlogv0.parse_index_v0
651 self._parse_index = revlogv0.parse_index_v0
652 elif self._format_version == REVLOGV2:
652 elif self._format_version == REVLOGV2:
653 self._parse_index = parse_index_v2
653 self._parse_index = parse_index_v2
654 elif self._format_version == CHANGELOGV2:
654 elif self._format_version == CHANGELOGV2:
655 self._parse_index = parse_index_v2
655 self._parse_index = parse_index_v2
656 elif devel_nodemap:
656 elif devel_nodemap:
657 self._parse_index = parse_index_v1_nodemap
657 self._parse_index = parse_index_v1_nodemap
658 elif use_rust_index:
658 elif use_rust_index:
659 self._parse_index = parse_index_v1_mixed
659 self._parse_index = parse_index_v1_mixed
660 try:
660 try:
661 d = self._parse_index(index_data, self._inline)
661 d = self._parse_index(index_data, self._inline)
662 index, _chunkcache = d
662 index, _chunkcache = d
663 use_nodemap = (
663 use_nodemap = (
664 not self._inline
664 not self._inline
665 and self._nodemap_file is not None
665 and self._nodemap_file is not None
666 and util.safehasattr(index, 'update_nodemap_data')
666 and util.safehasattr(index, 'update_nodemap_data')
667 )
667 )
668 if use_nodemap:
668 if use_nodemap:
669 nodemap_data = nodemaputil.persisted_data(self)
669 nodemap_data = nodemaputil.persisted_data(self)
670 if nodemap_data is not None:
670 if nodemap_data is not None:
671 docket = nodemap_data[0]
671 docket = nodemap_data[0]
672 if (
672 if (
673 len(d[0]) > docket.tip_rev
673 len(d[0]) > docket.tip_rev
674 and d[0][docket.tip_rev][7] == docket.tip_node
674 and d[0][docket.tip_rev][7] == docket.tip_node
675 ):
675 ):
676 # no changelog tampering
676 # no changelog tampering
677 self._nodemap_docket = docket
677 self._nodemap_docket = docket
678 index.update_nodemap_data(*nodemap_data)
678 index.update_nodemap_data(*nodemap_data)
679 except (ValueError, IndexError):
679 except (ValueError, IndexError):
680 raise error.RevlogError(
680 raise error.RevlogError(
681 _(b"index %s is corrupted") % self.display_id
681 _(b"index %s is corrupted") % self.display_id
682 )
682 )
683 self.index, self._chunkcache = d
683 self.index, self._chunkcache = d
684 if not self._chunkcache:
684 if not self._chunkcache:
685 self._chunkclear()
685 self._chunkclear()
686 # revnum -> (chain-length, sum-delta-length)
686 # revnum -> (chain-length, sum-delta-length)
687 self._chaininfocache = util.lrucachedict(500)
687 self._chaininfocache = util.lrucachedict(500)
688 # revlog header -> revlog compressor
688 # revlog header -> revlog compressor
689 self._decompressors = {}
689 self._decompressors = {}
690
690
691 @util.propertycache
691 @util.propertycache
692 def revlog_kind(self):
692 def revlog_kind(self):
693 return self.target[0]
693 return self.target[0]
694
694
695 @util.propertycache
695 @util.propertycache
696 def display_id(self):
696 def display_id(self):
697 """The public facing "ID" of the revlog that we use in message"""
697 """The public facing "ID" of the revlog that we use in message"""
698 # Maybe we should build a user facing representation of
698 # Maybe we should build a user facing representation of
699 # revlog.target instead of using `self.radix`
699 # revlog.target instead of using `self.radix`
700 return self.radix
700 return self.radix
701
701
702 def _get_decompressor(self, t):
702 def _get_decompressor(self, t):
703 try:
703 try:
704 compressor = self._decompressors[t]
704 compressor = self._decompressors[t]
705 except KeyError:
705 except KeyError:
706 try:
706 try:
707 engine = util.compengines.forrevlogheader(t)
707 engine = util.compengines.forrevlogheader(t)
708 compressor = engine.revlogcompressor(self._compengineopts)
708 compressor = engine.revlogcompressor(self._compengineopts)
709 self._decompressors[t] = compressor
709 self._decompressors[t] = compressor
710 except KeyError:
710 except KeyError:
711 raise error.RevlogError(
711 raise error.RevlogError(
712 _(b'unknown compression type %s') % binascii.hexlify(t)
712 _(b'unknown compression type %s') % binascii.hexlify(t)
713 )
713 )
714 return compressor
714 return compressor
715
715
716 @util.propertycache
716 @util.propertycache
717 def _compressor(self):
717 def _compressor(self):
718 engine = util.compengines[self._compengine]
718 engine = util.compengines[self._compengine]
719 return engine.revlogcompressor(self._compengineopts)
719 return engine.revlogcompressor(self._compengineopts)
720
720
721 @util.propertycache
721 @util.propertycache
722 def _decompressor(self):
722 def _decompressor(self):
723 """the default decompressor"""
723 """the default decompressor"""
724 if self._docket is None:
724 if self._docket is None:
725 return None
725 return None
726 t = self._docket.default_compression_header
726 t = self._docket.default_compression_header
727 c = self._get_decompressor(t)
727 c = self._get_decompressor(t)
728 return c.decompress
728 return c.decompress
729
729
730 def _indexfp(self):
730 def _indexfp(self):
731 """file object for the revlog's index file"""
731 """file object for the revlog's index file"""
732 return self.opener(self._indexfile, mode=b"r")
732 return self.opener(self._indexfile, mode=b"r")
733
733
734 def __index_write_fp(self):
734 def __index_write_fp(self):
735 # You should not use this directly and use `_writing` instead
735 # You should not use this directly and use `_writing` instead
736 try:
736 try:
737 f = self.opener(
737 f = self.opener(
738 self._indexfile, mode=b"r+", checkambig=self._checkambig
738 self._indexfile, mode=b"r+", checkambig=self._checkambig
739 )
739 )
740 if self._docket is None:
740 if self._docket is None:
741 f.seek(0, os.SEEK_END)
741 f.seek(0, os.SEEK_END)
742 else:
742 else:
743 f.seek(self._docket.index_end, os.SEEK_SET)
743 f.seek(self._docket.index_end, os.SEEK_SET)
744 return f
744 return f
745 except IOError as inst:
745 except IOError as inst:
746 if inst.errno != errno.ENOENT:
746 if inst.errno != errno.ENOENT:
747 raise
747 raise
748 return self.opener(
748 return self.opener(
749 self._indexfile, mode=b"w+", checkambig=self._checkambig
749 self._indexfile, mode=b"w+", checkambig=self._checkambig
750 )
750 )
751
751
752 def __index_new_fp(self):
752 def __index_new_fp(self):
753 # You should not use this unless you are upgrading from inline revlog
753 # You should not use this unless you are upgrading from inline revlog
754 return self.opener(
754 return self.opener(
755 self._indexfile,
755 self._indexfile,
756 mode=b"w",
756 mode=b"w",
757 checkambig=self._checkambig,
757 checkambig=self._checkambig,
758 atomictemp=True,
758 atomictemp=True,
759 )
759 )
760
760
761 def _datafp(self, mode=b'r'):
761 def _datafp(self, mode=b'r'):
762 """file object for the revlog's data file"""
762 """file object for the revlog's data file"""
763 return self.opener(self._datafile, mode=mode)
763 return self.opener(self._datafile, mode=mode)
764
764
765 @contextlib.contextmanager
765 @contextlib.contextmanager
766 def _datareadfp(self, existingfp=None):
766 def _datareadfp(self, existingfp=None):
767 """file object suitable to read data"""
767 """file object suitable to read data"""
768 # Use explicit file handle, if given.
768 # Use explicit file handle, if given.
769 if existingfp is not None:
769 if existingfp is not None:
770 yield existingfp
770 yield existingfp
771
771
772 # Use a file handle being actively used for writes, if available.
772 # Use a file handle being actively used for writes, if available.
773 # There is some danger to doing this because reads will seek the
773 # There is some danger to doing this because reads will seek the
774 # file. However, _writeentry() performs a SEEK_END before all writes,
774 # file. However, _writeentry() performs a SEEK_END before all writes,
775 # so we should be safe.
775 # so we should be safe.
776 elif self._writinghandles:
776 elif self._writinghandles:
777 if self._inline:
777 if self._inline:
778 yield self._writinghandles[0]
778 yield self._writinghandles[0]
779 else:
779 else:
780 yield self._writinghandles[1]
780 yield self._writinghandles[1]
781
781
782 # Otherwise open a new file handle.
782 # Otherwise open a new file handle.
783 else:
783 else:
784 if self._inline:
784 if self._inline:
785 func = self._indexfp
785 func = self._indexfp
786 else:
786 else:
787 func = self._datafp
787 func = self._datafp
788 with func() as fp:
788 with func() as fp:
789 yield fp
789 yield fp
790
790
791 def tiprev(self):
791 def tiprev(self):
792 return len(self.index) - 1
792 return len(self.index) - 1
793
793
794 def tip(self):
794 def tip(self):
795 return self.node(self.tiprev())
795 return self.node(self.tiprev())
796
796
797 def __contains__(self, rev):
797 def __contains__(self, rev):
798 return 0 <= rev < len(self)
798 return 0 <= rev < len(self)
799
799
800 def __len__(self):
800 def __len__(self):
801 return len(self.index)
801 return len(self.index)
802
802
803 def __iter__(self):
803 def __iter__(self):
804 return iter(pycompat.xrange(len(self)))
804 return iter(pycompat.xrange(len(self)))
805
805
806 def revs(self, start=0, stop=None):
806 def revs(self, start=0, stop=None):
807 """iterate over all rev in this revlog (from start to stop)"""
807 """iterate over all rev in this revlog (from start to stop)"""
808 return storageutil.iterrevs(len(self), start=start, stop=stop)
808 return storageutil.iterrevs(len(self), start=start, stop=stop)
809
809
810 @property
810 @property
811 def nodemap(self):
811 def nodemap(self):
812 msg = (
812 msg = (
813 b"revlog.nodemap is deprecated, "
813 b"revlog.nodemap is deprecated, "
814 b"use revlog.index.[has_node|rev|get_rev]"
814 b"use revlog.index.[has_node|rev|get_rev]"
815 )
815 )
816 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
816 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
817 return self.index.nodemap
817 return self.index.nodemap
818
818
819 @property
819 @property
820 def _nodecache(self):
820 def _nodecache(self):
821 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
821 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
822 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
822 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
823 return self.index.nodemap
823 return self.index.nodemap
824
824
825 def hasnode(self, node):
825 def hasnode(self, node):
826 try:
826 try:
827 self.rev(node)
827 self.rev(node)
828 return True
828 return True
829 except KeyError:
829 except KeyError:
830 return False
830 return False
831
831
832 def candelta(self, baserev, rev):
832 def candelta(self, baserev, rev):
833 """whether two revisions (baserev, rev) can be delta-ed or not"""
833 """whether two revisions (baserev, rev) can be delta-ed or not"""
834 # Disable delta if either rev requires a content-changing flag
834 # Disable delta if either rev requires a content-changing flag
835 # processor (ex. LFS). This is because such flag processor can alter
835 # processor (ex. LFS). This is because such flag processor can alter
836 # the rawtext content that the delta will be based on, and two clients
836 # the rawtext content that the delta will be based on, and two clients
837 # could have a same revlog node with different flags (i.e. different
837 # could have a same revlog node with different flags (i.e. different
838 # rawtext contents) and the delta could be incompatible.
838 # rawtext contents) and the delta could be incompatible.
839 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
839 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
840 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
840 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
841 ):
841 ):
842 return False
842 return False
843 return True
843 return True
844
844
845 def update_caches(self, transaction):
845 def update_caches(self, transaction):
846 if self._nodemap_file is not None:
846 if self._nodemap_file is not None:
847 if transaction is None:
847 if transaction is None:
848 nodemaputil.update_persistent_nodemap(self)
848 nodemaputil.update_persistent_nodemap(self)
849 else:
849 else:
850 nodemaputil.setup_persistent_nodemap(transaction, self)
850 nodemaputil.setup_persistent_nodemap(transaction, self)
851
851
852 def clearcaches(self):
852 def clearcaches(self):
853 self._revisioncache = None
853 self._revisioncache = None
854 self._chainbasecache.clear()
854 self._chainbasecache.clear()
855 self._chunkcache = (0, b'')
855 self._chunkcache = (0, b'')
856 self._pcache = {}
856 self._pcache = {}
857 self._nodemap_docket = None
857 self._nodemap_docket = None
858 self.index.clearcaches()
858 self.index.clearcaches()
859 # The python code is the one responsible for validating the docket, we
859 # The python code is the one responsible for validating the docket, we
860 # end up having to refresh it here.
860 # end up having to refresh it here.
861 use_nodemap = (
861 use_nodemap = (
862 not self._inline
862 not self._inline
863 and self._nodemap_file is not None
863 and self._nodemap_file is not None
864 and util.safehasattr(self.index, 'update_nodemap_data')
864 and util.safehasattr(self.index, 'update_nodemap_data')
865 )
865 )
866 if use_nodemap:
866 if use_nodemap:
867 nodemap_data = nodemaputil.persisted_data(self)
867 nodemap_data = nodemaputil.persisted_data(self)
868 if nodemap_data is not None:
868 if nodemap_data is not None:
869 self._nodemap_docket = nodemap_data[0]
869 self._nodemap_docket = nodemap_data[0]
870 self.index.update_nodemap_data(*nodemap_data)
870 self.index.update_nodemap_data(*nodemap_data)
871
871
872 def rev(self, node):
872 def rev(self, node):
873 try:
873 try:
874 return self.index.rev(node)
874 return self.index.rev(node)
875 except TypeError:
875 except TypeError:
876 raise
876 raise
877 except error.RevlogError:
877 except error.RevlogError:
878 # parsers.c radix tree lookup failed
878 # parsers.c radix tree lookup failed
879 if (
879 if (
880 node == self.nodeconstants.wdirid
880 node == self.nodeconstants.wdirid
881 or node in self.nodeconstants.wdirfilenodeids
881 or node in self.nodeconstants.wdirfilenodeids
882 ):
882 ):
883 raise error.WdirUnsupported
883 raise error.WdirUnsupported
884 raise error.LookupError(node, self.display_id, _(b'no node'))
884 raise error.LookupError(node, self.display_id, _(b'no node'))
885
885
886 # Accessors for index entries.
886 # Accessors for index entries.
887
887
888 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
888 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
889 # are flags.
889 # are flags.
890 def start(self, rev):
890 def start(self, rev):
891 return int(self.index[rev][0] >> 16)
891 return int(self.index[rev][0] >> 16)
892
892
893 def flags(self, rev):
893 def flags(self, rev):
894 return self.index[rev][0] & 0xFFFF
894 return self.index[rev][0] & 0xFFFF
895
895
896 def length(self, rev):
896 def length(self, rev):
897 return self.index[rev][1]
897 return self.index[rev][1]
898
898
899 def sidedata_length(self, rev):
899 def sidedata_length(self, rev):
900 if not self.hassidedata:
900 if not self.hassidedata:
901 return 0
901 return 0
902 return self.index[rev][9]
902 return self.index[rev][9]
903
903
904 def rawsize(self, rev):
904 def rawsize(self, rev):
905 """return the length of the uncompressed text for a given revision"""
905 """return the length of the uncompressed text for a given revision"""
906 l = self.index[rev][2]
906 l = self.index[rev][2]
907 if l >= 0:
907 if l >= 0:
908 return l
908 return l
909
909
910 t = self.rawdata(rev)
910 t = self.rawdata(rev)
911 return len(t)
911 return len(t)
912
912
913 def size(self, rev):
913 def size(self, rev):
914 """length of non-raw text (processed by a "read" flag processor)"""
914 """length of non-raw text (processed by a "read" flag processor)"""
915 # fast path: if no "read" flag processor could change the content,
915 # fast path: if no "read" flag processor could change the content,
916 # size is rawsize. note: ELLIPSIS is known to not change the content.
916 # size is rawsize. note: ELLIPSIS is known to not change the content.
917 flags = self.flags(rev)
917 flags = self.flags(rev)
918 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
918 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
919 return self.rawsize(rev)
919 return self.rawsize(rev)
920
920
921 return len(self.revision(rev, raw=False))
921 return len(self.revision(rev, raw=False))
922
922
923 def chainbase(self, rev):
923 def chainbase(self, rev):
924 base = self._chainbasecache.get(rev)
924 base = self._chainbasecache.get(rev)
925 if base is not None:
925 if base is not None:
926 return base
926 return base
927
927
928 index = self.index
928 index = self.index
929 iterrev = rev
929 iterrev = rev
930 base = index[iterrev][3]
930 base = index[iterrev][3]
931 while base != iterrev:
931 while base != iterrev:
932 iterrev = base
932 iterrev = base
933 base = index[iterrev][3]
933 base = index[iterrev][3]
934
934
935 self._chainbasecache[rev] = base
935 self._chainbasecache[rev] = base
936 return base
936 return base
937
937
938 def linkrev(self, rev):
938 def linkrev(self, rev):
939 return self.index[rev][4]
939 return self.index[rev][4]
940
940
941 def parentrevs(self, rev):
941 def parentrevs(self, rev):
942 try:
942 try:
943 entry = self.index[rev]
943 entry = self.index[rev]
944 except IndexError:
944 except IndexError:
945 if rev == wdirrev:
945 if rev == wdirrev:
946 raise error.WdirUnsupported
946 raise error.WdirUnsupported
947 raise
947 raise
948 if entry[5] == nullrev:
948 if entry[5] == nullrev:
949 return entry[6], entry[5]
949 return entry[6], entry[5]
950 else:
950 else:
951 return entry[5], entry[6]
951 return entry[5], entry[6]
952
952
953 # fast parentrevs(rev) where rev isn't filtered
953 # fast parentrevs(rev) where rev isn't filtered
954 _uncheckedparentrevs = parentrevs
954 _uncheckedparentrevs = parentrevs
955
955
956 def node(self, rev):
956 def node(self, rev):
957 try:
957 try:
958 return self.index[rev][7]
958 return self.index[rev][7]
959 except IndexError:
959 except IndexError:
960 if rev == wdirrev:
960 if rev == wdirrev:
961 raise error.WdirUnsupported
961 raise error.WdirUnsupported
962 raise
962 raise
963
963
964 # Derived from index values.
964 # Derived from index values.
965
965
966 def end(self, rev):
966 def end(self, rev):
967 return self.start(rev) + self.length(rev)
967 return self.start(rev) + self.length(rev)
968
968
969 def parents(self, node):
969 def parents(self, node):
970 i = self.index
970 i = self.index
971 d = i[self.rev(node)]
971 d = i[self.rev(node)]
972 # inline node() to avoid function call overhead
972 # inline node() to avoid function call overhead
973 if d[5] == self.nullid:
973 if d[5] == self.nullid:
974 return i[d[6]][7], i[d[5]][7]
974 return i[d[6]][7], i[d[5]][7]
975 else:
975 else:
976 return i[d[5]][7], i[d[6]][7]
976 return i[d[5]][7], i[d[6]][7]
977
977
978 def chainlen(self, rev):
978 def chainlen(self, rev):
979 return self._chaininfo(rev)[0]
979 return self._chaininfo(rev)[0]
980
980
981 def _chaininfo(self, rev):
981 def _chaininfo(self, rev):
982 chaininfocache = self._chaininfocache
982 chaininfocache = self._chaininfocache
983 if rev in chaininfocache:
983 if rev in chaininfocache:
984 return chaininfocache[rev]
984 return chaininfocache[rev]
985 index = self.index
985 index = self.index
986 generaldelta = self._generaldelta
986 generaldelta = self._generaldelta
987 iterrev = rev
987 iterrev = rev
988 e = index[iterrev]
988 e = index[iterrev]
989 clen = 0
989 clen = 0
990 compresseddeltalen = 0
990 compresseddeltalen = 0
991 while iterrev != e[3]:
991 while iterrev != e[3]:
992 clen += 1
992 clen += 1
993 compresseddeltalen += e[1]
993 compresseddeltalen += e[1]
994 if generaldelta:
994 if generaldelta:
995 iterrev = e[3]
995 iterrev = e[3]
996 else:
996 else:
997 iterrev -= 1
997 iterrev -= 1
998 if iterrev in chaininfocache:
998 if iterrev in chaininfocache:
999 t = chaininfocache[iterrev]
999 t = chaininfocache[iterrev]
1000 clen += t[0]
1000 clen += t[0]
1001 compresseddeltalen += t[1]
1001 compresseddeltalen += t[1]
1002 break
1002 break
1003 e = index[iterrev]
1003 e = index[iterrev]
1004 else:
1004 else:
1005 # Add text length of base since decompressing that also takes
1005 # Add text length of base since decompressing that also takes
1006 # work. For cache hits the length is already included.
1006 # work. For cache hits the length is already included.
1007 compresseddeltalen += e[1]
1007 compresseddeltalen += e[1]
1008 r = (clen, compresseddeltalen)
1008 r = (clen, compresseddeltalen)
1009 chaininfocache[rev] = r
1009 chaininfocache[rev] = r
1010 return r
1010 return r
1011
1011
1012 def _deltachain(self, rev, stoprev=None):
1012 def _deltachain(self, rev, stoprev=None):
1013 """Obtain the delta chain for a revision.
1013 """Obtain the delta chain for a revision.
1014
1014
1015 ``stoprev`` specifies a revision to stop at. If not specified, we
1015 ``stoprev`` specifies a revision to stop at. If not specified, we
1016 stop at the base of the chain.
1016 stop at the base of the chain.
1017
1017
1018 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1018 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1019 revs in ascending order and ``stopped`` is a bool indicating whether
1019 revs in ascending order and ``stopped`` is a bool indicating whether
1020 ``stoprev`` was hit.
1020 ``stoprev`` was hit.
1021 """
1021 """
1022 # Try C implementation.
1022 # Try C implementation.
1023 try:
1023 try:
1024 return self.index.deltachain(rev, stoprev, self._generaldelta)
1024 return self.index.deltachain(rev, stoprev, self._generaldelta)
1025 except AttributeError:
1025 except AttributeError:
1026 pass
1026 pass
1027
1027
1028 chain = []
1028 chain = []
1029
1029
1030 # Alias to prevent attribute lookup in tight loop.
1030 # Alias to prevent attribute lookup in tight loop.
1031 index = self.index
1031 index = self.index
1032 generaldelta = self._generaldelta
1032 generaldelta = self._generaldelta
1033
1033
1034 iterrev = rev
1034 iterrev = rev
1035 e = index[iterrev]
1035 e = index[iterrev]
1036 while iterrev != e[3] and iterrev != stoprev:
1036 while iterrev != e[3] and iterrev != stoprev:
1037 chain.append(iterrev)
1037 chain.append(iterrev)
1038 if generaldelta:
1038 if generaldelta:
1039 iterrev = e[3]
1039 iterrev = e[3]
1040 else:
1040 else:
1041 iterrev -= 1
1041 iterrev -= 1
1042 e = index[iterrev]
1042 e = index[iterrev]
1043
1043
1044 if iterrev == stoprev:
1044 if iterrev == stoprev:
1045 stopped = True
1045 stopped = True
1046 else:
1046 else:
1047 chain.append(iterrev)
1047 chain.append(iterrev)
1048 stopped = False
1048 stopped = False
1049
1049
1050 chain.reverse()
1050 chain.reverse()
1051 return chain, stopped
1051 return chain, stopped
1052
1052
1053 def ancestors(self, revs, stoprev=0, inclusive=False):
1053 def ancestors(self, revs, stoprev=0, inclusive=False):
1054 """Generate the ancestors of 'revs' in reverse revision order.
1054 """Generate the ancestors of 'revs' in reverse revision order.
1055 Does not generate revs lower than stoprev.
1055 Does not generate revs lower than stoprev.
1056
1056
1057 See the documentation for ancestor.lazyancestors for more details."""
1057 See the documentation for ancestor.lazyancestors for more details."""
1058
1058
1059 # first, make sure start revisions aren't filtered
1059 # first, make sure start revisions aren't filtered
1060 revs = list(revs)
1060 revs = list(revs)
1061 checkrev = self.node
1061 checkrev = self.node
1062 for r in revs:
1062 for r in revs:
1063 checkrev(r)
1063 checkrev(r)
1064 # and we're sure ancestors aren't filtered as well
1064 # and we're sure ancestors aren't filtered as well
1065
1065
1066 if rustancestor is not None:
1066 if rustancestor is not None and self.index.rust_ext_compat:
1067 lazyancestors = rustancestor.LazyAncestors
1067 lazyancestors = rustancestor.LazyAncestors
1068 arg = self.index
1068 arg = self.index
1069 else:
1069 else:
1070 lazyancestors = ancestor.lazyancestors
1070 lazyancestors = ancestor.lazyancestors
1071 arg = self._uncheckedparentrevs
1071 arg = self._uncheckedparentrevs
1072 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1072 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1073
1073
1074 def descendants(self, revs):
1074 def descendants(self, revs):
1075 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1075 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1076
1076
1077 def findcommonmissing(self, common=None, heads=None):
1077 def findcommonmissing(self, common=None, heads=None):
1078 """Return a tuple of the ancestors of common and the ancestors of heads
1078 """Return a tuple of the ancestors of common and the ancestors of heads
1079 that are not ancestors of common. In revset terminology, we return the
1079 that are not ancestors of common. In revset terminology, we return the
1080 tuple:
1080 tuple:
1081
1081
1082 ::common, (::heads) - (::common)
1082 ::common, (::heads) - (::common)
1083
1083
1084 The list is sorted by revision number, meaning it is
1084 The list is sorted by revision number, meaning it is
1085 topologically sorted.
1085 topologically sorted.
1086
1086
1087 'heads' and 'common' are both lists of node IDs. If heads is
1087 'heads' and 'common' are both lists of node IDs. If heads is
1088 not supplied, uses all of the revlog's heads. If common is not
1088 not supplied, uses all of the revlog's heads. If common is not
1089 supplied, uses nullid."""
1089 supplied, uses nullid."""
1090 if common is None:
1090 if common is None:
1091 common = [self.nullid]
1091 common = [self.nullid]
1092 if heads is None:
1092 if heads is None:
1093 heads = self.heads()
1093 heads = self.heads()
1094
1094
1095 common = [self.rev(n) for n in common]
1095 common = [self.rev(n) for n in common]
1096 heads = [self.rev(n) for n in heads]
1096 heads = [self.rev(n) for n in heads]
1097
1097
1098 # we want the ancestors, but inclusive
1098 # we want the ancestors, but inclusive
1099 class lazyset(object):
1099 class lazyset(object):
1100 def __init__(self, lazyvalues):
1100 def __init__(self, lazyvalues):
1101 self.addedvalues = set()
1101 self.addedvalues = set()
1102 self.lazyvalues = lazyvalues
1102 self.lazyvalues = lazyvalues
1103
1103
1104 def __contains__(self, value):
1104 def __contains__(self, value):
1105 return value in self.addedvalues or value in self.lazyvalues
1105 return value in self.addedvalues or value in self.lazyvalues
1106
1106
1107 def __iter__(self):
1107 def __iter__(self):
1108 added = self.addedvalues
1108 added = self.addedvalues
1109 for r in added:
1109 for r in added:
1110 yield r
1110 yield r
1111 for r in self.lazyvalues:
1111 for r in self.lazyvalues:
1112 if not r in added:
1112 if not r in added:
1113 yield r
1113 yield r
1114
1114
1115 def add(self, value):
1115 def add(self, value):
1116 self.addedvalues.add(value)
1116 self.addedvalues.add(value)
1117
1117
1118 def update(self, values):
1118 def update(self, values):
1119 self.addedvalues.update(values)
1119 self.addedvalues.update(values)
1120
1120
1121 has = lazyset(self.ancestors(common))
1121 has = lazyset(self.ancestors(common))
1122 has.add(nullrev)
1122 has.add(nullrev)
1123 has.update(common)
1123 has.update(common)
1124
1124
1125 # take all ancestors from heads that aren't in has
1125 # take all ancestors from heads that aren't in has
1126 missing = set()
1126 missing = set()
1127 visit = collections.deque(r for r in heads if r not in has)
1127 visit = collections.deque(r for r in heads if r not in has)
1128 while visit:
1128 while visit:
1129 r = visit.popleft()
1129 r = visit.popleft()
1130 if r in missing:
1130 if r in missing:
1131 continue
1131 continue
1132 else:
1132 else:
1133 missing.add(r)
1133 missing.add(r)
1134 for p in self.parentrevs(r):
1134 for p in self.parentrevs(r):
1135 if p not in has:
1135 if p not in has:
1136 visit.append(p)
1136 visit.append(p)
1137 missing = list(missing)
1137 missing = list(missing)
1138 missing.sort()
1138 missing.sort()
1139 return has, [self.node(miss) for miss in missing]
1139 return has, [self.node(miss) for miss in missing]
1140
1140
1141 def incrementalmissingrevs(self, common=None):
1141 def incrementalmissingrevs(self, common=None):
1142 """Return an object that can be used to incrementally compute the
1142 """Return an object that can be used to incrementally compute the
1143 revision numbers of the ancestors of arbitrary sets that are not
1143 revision numbers of the ancestors of arbitrary sets that are not
1144 ancestors of common. This is an ancestor.incrementalmissingancestors
1144 ancestors of common. This is an ancestor.incrementalmissingancestors
1145 object.
1145 object.
1146
1146
1147 'common' is a list of revision numbers. If common is not supplied, uses
1147 'common' is a list of revision numbers. If common is not supplied, uses
1148 nullrev.
1148 nullrev.
1149 """
1149 """
1150 if common is None:
1150 if common is None:
1151 common = [nullrev]
1151 common = [nullrev]
1152
1152
1153 if rustancestor is not None:
1153 if rustancestor is not None and self.index.rust_ext_compat:
1154 return rustancestor.MissingAncestors(self.index, common)
1154 return rustancestor.MissingAncestors(self.index, common)
1155 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1155 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1156
1156
1157 def findmissingrevs(self, common=None, heads=None):
1157 def findmissingrevs(self, common=None, heads=None):
1158 """Return the revision numbers of the ancestors of heads that
1158 """Return the revision numbers of the ancestors of heads that
1159 are not ancestors of common.
1159 are not ancestors of common.
1160
1160
1161 More specifically, return a list of revision numbers corresponding to
1161 More specifically, return a list of revision numbers corresponding to
1162 nodes N such that every N satisfies the following constraints:
1162 nodes N such that every N satisfies the following constraints:
1163
1163
1164 1. N is an ancestor of some node in 'heads'
1164 1. N is an ancestor of some node in 'heads'
1165 2. N is not an ancestor of any node in 'common'
1165 2. N is not an ancestor of any node in 'common'
1166
1166
1167 The list is sorted by revision number, meaning it is
1167 The list is sorted by revision number, meaning it is
1168 topologically sorted.
1168 topologically sorted.
1169
1169
1170 'heads' and 'common' are both lists of revision numbers. If heads is
1170 'heads' and 'common' are both lists of revision numbers. If heads is
1171 not supplied, uses all of the revlog's heads. If common is not
1171 not supplied, uses all of the revlog's heads. If common is not
1172 supplied, uses nullid."""
1172 supplied, uses nullid."""
1173 if common is None:
1173 if common is None:
1174 common = [nullrev]
1174 common = [nullrev]
1175 if heads is None:
1175 if heads is None:
1176 heads = self.headrevs()
1176 heads = self.headrevs()
1177
1177
1178 inc = self.incrementalmissingrevs(common=common)
1178 inc = self.incrementalmissingrevs(common=common)
1179 return inc.missingancestors(heads)
1179 return inc.missingancestors(heads)
1180
1180
1181 def findmissing(self, common=None, heads=None):
1181 def findmissing(self, common=None, heads=None):
1182 """Return the ancestors of heads that are not ancestors of common.
1182 """Return the ancestors of heads that are not ancestors of common.
1183
1183
1184 More specifically, return a list of nodes N such that every N
1184 More specifically, return a list of nodes N such that every N
1185 satisfies the following constraints:
1185 satisfies the following constraints:
1186
1186
1187 1. N is an ancestor of some node in 'heads'
1187 1. N is an ancestor of some node in 'heads'
1188 2. N is not an ancestor of any node in 'common'
1188 2. N is not an ancestor of any node in 'common'
1189
1189
1190 The list is sorted by revision number, meaning it is
1190 The list is sorted by revision number, meaning it is
1191 topologically sorted.
1191 topologically sorted.
1192
1192
1193 'heads' and 'common' are both lists of node IDs. If heads is
1193 'heads' and 'common' are both lists of node IDs. If heads is
1194 not supplied, uses all of the revlog's heads. If common is not
1194 not supplied, uses all of the revlog's heads. If common is not
1195 supplied, uses nullid."""
1195 supplied, uses nullid."""
1196 if common is None:
1196 if common is None:
1197 common = [self.nullid]
1197 common = [self.nullid]
1198 if heads is None:
1198 if heads is None:
1199 heads = self.heads()
1199 heads = self.heads()
1200
1200
1201 common = [self.rev(n) for n in common]
1201 common = [self.rev(n) for n in common]
1202 heads = [self.rev(n) for n in heads]
1202 heads = [self.rev(n) for n in heads]
1203
1203
1204 inc = self.incrementalmissingrevs(common=common)
1204 inc = self.incrementalmissingrevs(common=common)
1205 return [self.node(r) for r in inc.missingancestors(heads)]
1205 return [self.node(r) for r in inc.missingancestors(heads)]
1206
1206
1207 def nodesbetween(self, roots=None, heads=None):
1207 def nodesbetween(self, roots=None, heads=None):
1208 """Return a topological path from 'roots' to 'heads'.
1208 """Return a topological path from 'roots' to 'heads'.
1209
1209
1210 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1210 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1211 topologically sorted list of all nodes N that satisfy both of
1211 topologically sorted list of all nodes N that satisfy both of
1212 these constraints:
1212 these constraints:
1213
1213
1214 1. N is a descendant of some node in 'roots'
1214 1. N is a descendant of some node in 'roots'
1215 2. N is an ancestor of some node in 'heads'
1215 2. N is an ancestor of some node in 'heads'
1216
1216
1217 Every node is considered to be both a descendant and an ancestor
1217 Every node is considered to be both a descendant and an ancestor
1218 of itself, so every reachable node in 'roots' and 'heads' will be
1218 of itself, so every reachable node in 'roots' and 'heads' will be
1219 included in 'nodes'.
1219 included in 'nodes'.
1220
1220
1221 'outroots' is the list of reachable nodes in 'roots', i.e., the
1221 'outroots' is the list of reachable nodes in 'roots', i.e., the
1222 subset of 'roots' that is returned in 'nodes'. Likewise,
1222 subset of 'roots' that is returned in 'nodes'. Likewise,
1223 'outheads' is the subset of 'heads' that is also in 'nodes'.
1223 'outheads' is the subset of 'heads' that is also in 'nodes'.
1224
1224
1225 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1225 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1226 unspecified, uses nullid as the only root. If 'heads' is
1226 unspecified, uses nullid as the only root. If 'heads' is
1227 unspecified, uses list of all of the revlog's heads."""
1227 unspecified, uses list of all of the revlog's heads."""
1228 nonodes = ([], [], [])
1228 nonodes = ([], [], [])
1229 if roots is not None:
1229 if roots is not None:
1230 roots = list(roots)
1230 roots = list(roots)
1231 if not roots:
1231 if not roots:
1232 return nonodes
1232 return nonodes
1233 lowestrev = min([self.rev(n) for n in roots])
1233 lowestrev = min([self.rev(n) for n in roots])
1234 else:
1234 else:
1235 roots = [self.nullid] # Everybody's a descendant of nullid
1235 roots = [self.nullid] # Everybody's a descendant of nullid
1236 lowestrev = nullrev
1236 lowestrev = nullrev
1237 if (lowestrev == nullrev) and (heads is None):
1237 if (lowestrev == nullrev) and (heads is None):
1238 # We want _all_ the nodes!
1238 # We want _all_ the nodes!
1239 return (
1239 return (
1240 [self.node(r) for r in self],
1240 [self.node(r) for r in self],
1241 [self.nullid],
1241 [self.nullid],
1242 list(self.heads()),
1242 list(self.heads()),
1243 )
1243 )
1244 if heads is None:
1244 if heads is None:
1245 # All nodes are ancestors, so the latest ancestor is the last
1245 # All nodes are ancestors, so the latest ancestor is the last
1246 # node.
1246 # node.
1247 highestrev = len(self) - 1
1247 highestrev = len(self) - 1
1248 # Set ancestors to None to signal that every node is an ancestor.
1248 # Set ancestors to None to signal that every node is an ancestor.
1249 ancestors = None
1249 ancestors = None
1250 # Set heads to an empty dictionary for later discovery of heads
1250 # Set heads to an empty dictionary for later discovery of heads
1251 heads = {}
1251 heads = {}
1252 else:
1252 else:
1253 heads = list(heads)
1253 heads = list(heads)
1254 if not heads:
1254 if not heads:
1255 return nonodes
1255 return nonodes
1256 ancestors = set()
1256 ancestors = set()
1257 # Turn heads into a dictionary so we can remove 'fake' heads.
1257 # Turn heads into a dictionary so we can remove 'fake' heads.
1258 # Also, later we will be using it to filter out the heads we can't
1258 # Also, later we will be using it to filter out the heads we can't
1259 # find from roots.
1259 # find from roots.
1260 heads = dict.fromkeys(heads, False)
1260 heads = dict.fromkeys(heads, False)
1261 # Start at the top and keep marking parents until we're done.
1261 # Start at the top and keep marking parents until we're done.
1262 nodestotag = set(heads)
1262 nodestotag = set(heads)
1263 # Remember where the top was so we can use it as a limit later.
1263 # Remember where the top was so we can use it as a limit later.
1264 highestrev = max([self.rev(n) for n in nodestotag])
1264 highestrev = max([self.rev(n) for n in nodestotag])
1265 while nodestotag:
1265 while nodestotag:
1266 # grab a node to tag
1266 # grab a node to tag
1267 n = nodestotag.pop()
1267 n = nodestotag.pop()
1268 # Never tag nullid
1268 # Never tag nullid
1269 if n == self.nullid:
1269 if n == self.nullid:
1270 continue
1270 continue
1271 # A node's revision number represents its place in a
1271 # A node's revision number represents its place in a
1272 # topologically sorted list of nodes.
1272 # topologically sorted list of nodes.
1273 r = self.rev(n)
1273 r = self.rev(n)
1274 if r >= lowestrev:
1274 if r >= lowestrev:
1275 if n not in ancestors:
1275 if n not in ancestors:
1276 # If we are possibly a descendant of one of the roots
1276 # If we are possibly a descendant of one of the roots
1277 # and we haven't already been marked as an ancestor
1277 # and we haven't already been marked as an ancestor
1278 ancestors.add(n) # Mark as ancestor
1278 ancestors.add(n) # Mark as ancestor
1279 # Add non-nullid parents to list of nodes to tag.
1279 # Add non-nullid parents to list of nodes to tag.
1280 nodestotag.update(
1280 nodestotag.update(
1281 [p for p in self.parents(n) if p != self.nullid]
1281 [p for p in self.parents(n) if p != self.nullid]
1282 )
1282 )
1283 elif n in heads: # We've seen it before, is it a fake head?
1283 elif n in heads: # We've seen it before, is it a fake head?
1284 # So it is, real heads should not be the ancestors of
1284 # So it is, real heads should not be the ancestors of
1285 # any other heads.
1285 # any other heads.
1286 heads.pop(n)
1286 heads.pop(n)
1287 if not ancestors:
1287 if not ancestors:
1288 return nonodes
1288 return nonodes
1289 # Now that we have our set of ancestors, we want to remove any
1289 # Now that we have our set of ancestors, we want to remove any
1290 # roots that are not ancestors.
1290 # roots that are not ancestors.
1291
1291
1292 # If one of the roots was nullid, everything is included anyway.
1292 # If one of the roots was nullid, everything is included anyway.
1293 if lowestrev > nullrev:
1293 if lowestrev > nullrev:
1294 # But, since we weren't, let's recompute the lowest rev to not
1294 # But, since we weren't, let's recompute the lowest rev to not
1295 # include roots that aren't ancestors.
1295 # include roots that aren't ancestors.
1296
1296
1297 # Filter out roots that aren't ancestors of heads
1297 # Filter out roots that aren't ancestors of heads
1298 roots = [root for root in roots if root in ancestors]
1298 roots = [root for root in roots if root in ancestors]
1299 # Recompute the lowest revision
1299 # Recompute the lowest revision
1300 if roots:
1300 if roots:
1301 lowestrev = min([self.rev(root) for root in roots])
1301 lowestrev = min([self.rev(root) for root in roots])
1302 else:
1302 else:
1303 # No more roots? Return empty list
1303 # No more roots? Return empty list
1304 return nonodes
1304 return nonodes
1305 else:
1305 else:
1306 # We are descending from nullid, and don't need to care about
1306 # We are descending from nullid, and don't need to care about
1307 # any other roots.
1307 # any other roots.
1308 lowestrev = nullrev
1308 lowestrev = nullrev
1309 roots = [self.nullid]
1309 roots = [self.nullid]
1310 # Transform our roots list into a set.
1310 # Transform our roots list into a set.
1311 descendants = set(roots)
1311 descendants = set(roots)
1312 # Also, keep the original roots so we can filter out roots that aren't
1312 # Also, keep the original roots so we can filter out roots that aren't
1313 # 'real' roots (i.e. are descended from other roots).
1313 # 'real' roots (i.e. are descended from other roots).
1314 roots = descendants.copy()
1314 roots = descendants.copy()
1315 # Our topologically sorted list of output nodes.
1315 # Our topologically sorted list of output nodes.
1316 orderedout = []
1316 orderedout = []
1317 # Don't start at nullid since we don't want nullid in our output list,
1317 # Don't start at nullid since we don't want nullid in our output list,
1318 # and if nullid shows up in descendants, empty parents will look like
1318 # and if nullid shows up in descendants, empty parents will look like
1319 # they're descendants.
1319 # they're descendants.
1320 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1320 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1321 n = self.node(r)
1321 n = self.node(r)
1322 isdescendant = False
1322 isdescendant = False
1323 if lowestrev == nullrev: # Everybody is a descendant of nullid
1323 if lowestrev == nullrev: # Everybody is a descendant of nullid
1324 isdescendant = True
1324 isdescendant = True
1325 elif n in descendants:
1325 elif n in descendants:
1326 # n is already a descendant
1326 # n is already a descendant
1327 isdescendant = True
1327 isdescendant = True
1328 # This check only needs to be done here because all the roots
1328 # This check only needs to be done here because all the roots
1329 # will start being marked is descendants before the loop.
1329 # will start being marked is descendants before the loop.
1330 if n in roots:
1330 if n in roots:
1331 # If n was a root, check if it's a 'real' root.
1331 # If n was a root, check if it's a 'real' root.
1332 p = tuple(self.parents(n))
1332 p = tuple(self.parents(n))
1333 # If any of its parents are descendants, it's not a root.
1333 # If any of its parents are descendants, it's not a root.
1334 if (p[0] in descendants) or (p[1] in descendants):
1334 if (p[0] in descendants) or (p[1] in descendants):
1335 roots.remove(n)
1335 roots.remove(n)
1336 else:
1336 else:
1337 p = tuple(self.parents(n))
1337 p = tuple(self.parents(n))
1338 # A node is a descendant if either of its parents are
1338 # A node is a descendant if either of its parents are
1339 # descendants. (We seeded the dependents list with the roots
1339 # descendants. (We seeded the dependents list with the roots
1340 # up there, remember?)
1340 # up there, remember?)
1341 if (p[0] in descendants) or (p[1] in descendants):
1341 if (p[0] in descendants) or (p[1] in descendants):
1342 descendants.add(n)
1342 descendants.add(n)
1343 isdescendant = True
1343 isdescendant = True
1344 if isdescendant and ((ancestors is None) or (n in ancestors)):
1344 if isdescendant and ((ancestors is None) or (n in ancestors)):
1345 # Only include nodes that are both descendants and ancestors.
1345 # Only include nodes that are both descendants and ancestors.
1346 orderedout.append(n)
1346 orderedout.append(n)
1347 if (ancestors is not None) and (n in heads):
1347 if (ancestors is not None) and (n in heads):
1348 # We're trying to figure out which heads are reachable
1348 # We're trying to figure out which heads are reachable
1349 # from roots.
1349 # from roots.
1350 # Mark this head as having been reached
1350 # Mark this head as having been reached
1351 heads[n] = True
1351 heads[n] = True
1352 elif ancestors is None:
1352 elif ancestors is None:
1353 # Otherwise, we're trying to discover the heads.
1353 # Otherwise, we're trying to discover the heads.
1354 # Assume this is a head because if it isn't, the next step
1354 # Assume this is a head because if it isn't, the next step
1355 # will eventually remove it.
1355 # will eventually remove it.
1356 heads[n] = True
1356 heads[n] = True
1357 # But, obviously its parents aren't.
1357 # But, obviously its parents aren't.
1358 for p in self.parents(n):
1358 for p in self.parents(n):
1359 heads.pop(p, None)
1359 heads.pop(p, None)
1360 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1360 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1361 roots = list(roots)
1361 roots = list(roots)
1362 assert orderedout
1362 assert orderedout
1363 assert roots
1363 assert roots
1364 assert heads
1364 assert heads
1365 return (orderedout, roots, heads)
1365 return (orderedout, roots, heads)
1366
1366
1367 def headrevs(self, revs=None):
1367 def headrevs(self, revs=None):
1368 if revs is None:
1368 if revs is None:
1369 try:
1369 try:
1370 return self.index.headrevs()
1370 return self.index.headrevs()
1371 except AttributeError:
1371 except AttributeError:
1372 return self._headrevs()
1372 return self._headrevs()
1373 if rustdagop is not None:
1373 if rustdagop is not None and self.index.rust_ext_compat:
1374 return rustdagop.headrevs(self.index, revs)
1374 return rustdagop.headrevs(self.index, revs)
1375 return dagop.headrevs(revs, self._uncheckedparentrevs)
1375 return dagop.headrevs(revs, self._uncheckedparentrevs)
1376
1376
1377 def computephases(self, roots):
1377 def computephases(self, roots):
1378 return self.index.computephasesmapsets(roots)
1378 return self.index.computephasesmapsets(roots)
1379
1379
1380 def _headrevs(self):
1380 def _headrevs(self):
1381 count = len(self)
1381 count = len(self)
1382 if not count:
1382 if not count:
1383 return [nullrev]
1383 return [nullrev]
1384 # we won't iter over filtered rev so nobody is a head at start
1384 # we won't iter over filtered rev so nobody is a head at start
1385 ishead = [0] * (count + 1)
1385 ishead = [0] * (count + 1)
1386 index = self.index
1386 index = self.index
1387 for r in self:
1387 for r in self:
1388 ishead[r] = 1 # I may be an head
1388 ishead[r] = 1 # I may be an head
1389 e = index[r]
1389 e = index[r]
1390 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1390 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1391 return [r for r, val in enumerate(ishead) if val]
1391 return [r for r, val in enumerate(ishead) if val]
1392
1392
1393 def heads(self, start=None, stop=None):
1393 def heads(self, start=None, stop=None):
1394 """return the list of all nodes that have no children
1394 """return the list of all nodes that have no children
1395
1395
1396 if start is specified, only heads that are descendants of
1396 if start is specified, only heads that are descendants of
1397 start will be returned
1397 start will be returned
1398 if stop is specified, it will consider all the revs from stop
1398 if stop is specified, it will consider all the revs from stop
1399 as if they had no children
1399 as if they had no children
1400 """
1400 """
1401 if start is None and stop is None:
1401 if start is None and stop is None:
1402 if not len(self):
1402 if not len(self):
1403 return [self.nullid]
1403 return [self.nullid]
1404 return [self.node(r) for r in self.headrevs()]
1404 return [self.node(r) for r in self.headrevs()]
1405
1405
1406 if start is None:
1406 if start is None:
1407 start = nullrev
1407 start = nullrev
1408 else:
1408 else:
1409 start = self.rev(start)
1409 start = self.rev(start)
1410
1410
1411 stoprevs = {self.rev(n) for n in stop or []}
1411 stoprevs = {self.rev(n) for n in stop or []}
1412
1412
1413 revs = dagop.headrevssubset(
1413 revs = dagop.headrevssubset(
1414 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1414 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1415 )
1415 )
1416
1416
1417 return [self.node(rev) for rev in revs]
1417 return [self.node(rev) for rev in revs]
1418
1418
1419 def children(self, node):
1419 def children(self, node):
1420 """find the children of a given node"""
1420 """find the children of a given node"""
1421 c = []
1421 c = []
1422 p = self.rev(node)
1422 p = self.rev(node)
1423 for r in self.revs(start=p + 1):
1423 for r in self.revs(start=p + 1):
1424 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1424 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1425 if prevs:
1425 if prevs:
1426 for pr in prevs:
1426 for pr in prevs:
1427 if pr == p:
1427 if pr == p:
1428 c.append(self.node(r))
1428 c.append(self.node(r))
1429 elif p == nullrev:
1429 elif p == nullrev:
1430 c.append(self.node(r))
1430 c.append(self.node(r))
1431 return c
1431 return c
1432
1432
1433 def commonancestorsheads(self, a, b):
1433 def commonancestorsheads(self, a, b):
1434 """calculate all the heads of the common ancestors of nodes a and b"""
1434 """calculate all the heads of the common ancestors of nodes a and b"""
1435 a, b = self.rev(a), self.rev(b)
1435 a, b = self.rev(a), self.rev(b)
1436 ancs = self._commonancestorsheads(a, b)
1436 ancs = self._commonancestorsheads(a, b)
1437 return pycompat.maplist(self.node, ancs)
1437 return pycompat.maplist(self.node, ancs)
1438
1438
1439 def _commonancestorsheads(self, *revs):
1439 def _commonancestorsheads(self, *revs):
1440 """calculate all the heads of the common ancestors of revs"""
1440 """calculate all the heads of the common ancestors of revs"""
1441 try:
1441 try:
1442 ancs = self.index.commonancestorsheads(*revs)
1442 ancs = self.index.commonancestorsheads(*revs)
1443 except (AttributeError, OverflowError): # C implementation failed
1443 except (AttributeError, OverflowError): # C implementation failed
1444 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1444 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1445 return ancs
1445 return ancs
1446
1446
1447 def isancestor(self, a, b):
1447 def isancestor(self, a, b):
1448 """return True if node a is an ancestor of node b
1448 """return True if node a is an ancestor of node b
1449
1449
1450 A revision is considered an ancestor of itself."""
1450 A revision is considered an ancestor of itself."""
1451 a, b = self.rev(a), self.rev(b)
1451 a, b = self.rev(a), self.rev(b)
1452 return self.isancestorrev(a, b)
1452 return self.isancestorrev(a, b)
1453
1453
1454 def isancestorrev(self, a, b):
1454 def isancestorrev(self, a, b):
1455 """return True if revision a is an ancestor of revision b
1455 """return True if revision a is an ancestor of revision b
1456
1456
1457 A revision is considered an ancestor of itself.
1457 A revision is considered an ancestor of itself.
1458
1458
1459 The implementation of this is trivial but the use of
1459 The implementation of this is trivial but the use of
1460 reachableroots is not."""
1460 reachableroots is not."""
1461 if a == nullrev:
1461 if a == nullrev:
1462 return True
1462 return True
1463 elif a == b:
1463 elif a == b:
1464 return True
1464 return True
1465 elif a > b:
1465 elif a > b:
1466 return False
1466 return False
1467 return bool(self.reachableroots(a, [b], [a], includepath=False))
1467 return bool(self.reachableroots(a, [b], [a], includepath=False))
1468
1468
1469 def reachableroots(self, minroot, heads, roots, includepath=False):
1469 def reachableroots(self, minroot, heads, roots, includepath=False):
1470 """return (heads(::(<roots> and <roots>::<heads>)))
1470 """return (heads(::(<roots> and <roots>::<heads>)))
1471
1471
1472 If includepath is True, return (<roots>::<heads>)."""
1472 If includepath is True, return (<roots>::<heads>)."""
1473 try:
1473 try:
1474 return self.index.reachableroots2(
1474 return self.index.reachableroots2(
1475 minroot, heads, roots, includepath
1475 minroot, heads, roots, includepath
1476 )
1476 )
1477 except AttributeError:
1477 except AttributeError:
1478 return dagop._reachablerootspure(
1478 return dagop._reachablerootspure(
1479 self.parentrevs, minroot, roots, heads, includepath
1479 self.parentrevs, minroot, roots, heads, includepath
1480 )
1480 )
1481
1481
1482 def ancestor(self, a, b):
1482 def ancestor(self, a, b):
1483 """calculate the "best" common ancestor of nodes a and b"""
1483 """calculate the "best" common ancestor of nodes a and b"""
1484
1484
1485 a, b = self.rev(a), self.rev(b)
1485 a, b = self.rev(a), self.rev(b)
1486 try:
1486 try:
1487 ancs = self.index.ancestors(a, b)
1487 ancs = self.index.ancestors(a, b)
1488 except (AttributeError, OverflowError):
1488 except (AttributeError, OverflowError):
1489 ancs = ancestor.ancestors(self.parentrevs, a, b)
1489 ancs = ancestor.ancestors(self.parentrevs, a, b)
1490 if ancs:
1490 if ancs:
1491 # choose a consistent winner when there's a tie
1491 # choose a consistent winner when there's a tie
1492 return min(map(self.node, ancs))
1492 return min(map(self.node, ancs))
1493 return self.nullid
1493 return self.nullid
1494
1494
1495 def _match(self, id):
1495 def _match(self, id):
1496 if isinstance(id, int):
1496 if isinstance(id, int):
1497 # rev
1497 # rev
1498 return self.node(id)
1498 return self.node(id)
1499 if len(id) == self.nodeconstants.nodelen:
1499 if len(id) == self.nodeconstants.nodelen:
1500 # possibly a binary node
1500 # possibly a binary node
1501 # odds of a binary node being all hex in ASCII are 1 in 10**25
1501 # odds of a binary node being all hex in ASCII are 1 in 10**25
1502 try:
1502 try:
1503 node = id
1503 node = id
1504 self.rev(node) # quick search the index
1504 self.rev(node) # quick search the index
1505 return node
1505 return node
1506 except error.LookupError:
1506 except error.LookupError:
1507 pass # may be partial hex id
1507 pass # may be partial hex id
1508 try:
1508 try:
1509 # str(rev)
1509 # str(rev)
1510 rev = int(id)
1510 rev = int(id)
1511 if b"%d" % rev != id:
1511 if b"%d" % rev != id:
1512 raise ValueError
1512 raise ValueError
1513 if rev < 0:
1513 if rev < 0:
1514 rev = len(self) + rev
1514 rev = len(self) + rev
1515 if rev < 0 or rev >= len(self):
1515 if rev < 0 or rev >= len(self):
1516 raise ValueError
1516 raise ValueError
1517 return self.node(rev)
1517 return self.node(rev)
1518 except (ValueError, OverflowError):
1518 except (ValueError, OverflowError):
1519 pass
1519 pass
1520 if len(id) == 2 * self.nodeconstants.nodelen:
1520 if len(id) == 2 * self.nodeconstants.nodelen:
1521 try:
1521 try:
1522 # a full hex nodeid?
1522 # a full hex nodeid?
1523 node = bin(id)
1523 node = bin(id)
1524 self.rev(node)
1524 self.rev(node)
1525 return node
1525 return node
1526 except (TypeError, error.LookupError):
1526 except (TypeError, error.LookupError):
1527 pass
1527 pass
1528
1528
1529 def _partialmatch(self, id):
1529 def _partialmatch(self, id):
1530 # we don't care wdirfilenodeids as they should be always full hash
1530 # we don't care wdirfilenodeids as they should be always full hash
1531 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1531 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1532 try:
1532 try:
1533 partial = self.index.partialmatch(id)
1533 partial = self.index.partialmatch(id)
1534 if partial and self.hasnode(partial):
1534 if partial and self.hasnode(partial):
1535 if maybewdir:
1535 if maybewdir:
1536 # single 'ff...' match in radix tree, ambiguous with wdir
1536 # single 'ff...' match in radix tree, ambiguous with wdir
1537 raise error.RevlogError
1537 raise error.RevlogError
1538 return partial
1538 return partial
1539 if maybewdir:
1539 if maybewdir:
1540 # no 'ff...' match in radix tree, wdir identified
1540 # no 'ff...' match in radix tree, wdir identified
1541 raise error.WdirUnsupported
1541 raise error.WdirUnsupported
1542 return None
1542 return None
1543 except error.RevlogError:
1543 except error.RevlogError:
1544 # parsers.c radix tree lookup gave multiple matches
1544 # parsers.c radix tree lookup gave multiple matches
1545 # fast path: for unfiltered changelog, radix tree is accurate
1545 # fast path: for unfiltered changelog, radix tree is accurate
1546 if not getattr(self, 'filteredrevs', None):
1546 if not getattr(self, 'filteredrevs', None):
1547 raise error.AmbiguousPrefixLookupError(
1547 raise error.AmbiguousPrefixLookupError(
1548 id, self.display_id, _(b'ambiguous identifier')
1548 id, self.display_id, _(b'ambiguous identifier')
1549 )
1549 )
1550 # fall through to slow path that filters hidden revisions
1550 # fall through to slow path that filters hidden revisions
1551 except (AttributeError, ValueError):
1551 except (AttributeError, ValueError):
1552 # we are pure python, or key was too short to search radix tree
1552 # we are pure python, or key was too short to search radix tree
1553 pass
1553 pass
1554
1554
1555 if id in self._pcache:
1555 if id in self._pcache:
1556 return self._pcache[id]
1556 return self._pcache[id]
1557
1557
1558 if len(id) <= 40:
1558 if len(id) <= 40:
1559 try:
1559 try:
1560 # hex(node)[:...]
1560 # hex(node)[:...]
1561 l = len(id) // 2 # grab an even number of digits
1561 l = len(id) // 2 # grab an even number of digits
1562 prefix = bin(id[: l * 2])
1562 prefix = bin(id[: l * 2])
1563 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1563 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1564 nl = [
1564 nl = [
1565 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1565 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1566 ]
1566 ]
1567 if self.nodeconstants.nullhex.startswith(id):
1567 if self.nodeconstants.nullhex.startswith(id):
1568 nl.append(self.nullid)
1568 nl.append(self.nullid)
1569 if len(nl) > 0:
1569 if len(nl) > 0:
1570 if len(nl) == 1 and not maybewdir:
1570 if len(nl) == 1 and not maybewdir:
1571 self._pcache[id] = nl[0]
1571 self._pcache[id] = nl[0]
1572 return nl[0]
1572 return nl[0]
1573 raise error.AmbiguousPrefixLookupError(
1573 raise error.AmbiguousPrefixLookupError(
1574 id, self.display_id, _(b'ambiguous identifier')
1574 id, self.display_id, _(b'ambiguous identifier')
1575 )
1575 )
1576 if maybewdir:
1576 if maybewdir:
1577 raise error.WdirUnsupported
1577 raise error.WdirUnsupported
1578 return None
1578 return None
1579 except TypeError:
1579 except TypeError:
1580 pass
1580 pass
1581
1581
1582 def lookup(self, id):
1582 def lookup(self, id):
1583 """locate a node based on:
1583 """locate a node based on:
1584 - revision number or str(revision number)
1584 - revision number or str(revision number)
1585 - nodeid or subset of hex nodeid
1585 - nodeid or subset of hex nodeid
1586 """
1586 """
1587 n = self._match(id)
1587 n = self._match(id)
1588 if n is not None:
1588 if n is not None:
1589 return n
1589 return n
1590 n = self._partialmatch(id)
1590 n = self._partialmatch(id)
1591 if n:
1591 if n:
1592 return n
1592 return n
1593
1593
1594 raise error.LookupError(id, self.display_id, _(b'no match found'))
1594 raise error.LookupError(id, self.display_id, _(b'no match found'))
1595
1595
1596 def shortest(self, node, minlength=1):
1596 def shortest(self, node, minlength=1):
1597 """Find the shortest unambiguous prefix that matches node."""
1597 """Find the shortest unambiguous prefix that matches node."""
1598
1598
1599 def isvalid(prefix):
1599 def isvalid(prefix):
1600 try:
1600 try:
1601 matchednode = self._partialmatch(prefix)
1601 matchednode = self._partialmatch(prefix)
1602 except error.AmbiguousPrefixLookupError:
1602 except error.AmbiguousPrefixLookupError:
1603 return False
1603 return False
1604 except error.WdirUnsupported:
1604 except error.WdirUnsupported:
1605 # single 'ff...' match
1605 # single 'ff...' match
1606 return True
1606 return True
1607 if matchednode is None:
1607 if matchednode is None:
1608 raise error.LookupError(node, self.display_id, _(b'no node'))
1608 raise error.LookupError(node, self.display_id, _(b'no node'))
1609 return True
1609 return True
1610
1610
1611 def maybewdir(prefix):
1611 def maybewdir(prefix):
1612 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1612 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1613
1613
1614 hexnode = hex(node)
1614 hexnode = hex(node)
1615
1615
1616 def disambiguate(hexnode, minlength):
1616 def disambiguate(hexnode, minlength):
1617 """Disambiguate against wdirid."""
1617 """Disambiguate against wdirid."""
1618 for length in range(minlength, len(hexnode) + 1):
1618 for length in range(minlength, len(hexnode) + 1):
1619 prefix = hexnode[:length]
1619 prefix = hexnode[:length]
1620 if not maybewdir(prefix):
1620 if not maybewdir(prefix):
1621 return prefix
1621 return prefix
1622
1622
1623 if not getattr(self, 'filteredrevs', None):
1623 if not getattr(self, 'filteredrevs', None):
1624 try:
1624 try:
1625 length = max(self.index.shortest(node), minlength)
1625 length = max(self.index.shortest(node), minlength)
1626 return disambiguate(hexnode, length)
1626 return disambiguate(hexnode, length)
1627 except error.RevlogError:
1627 except error.RevlogError:
1628 if node != self.nodeconstants.wdirid:
1628 if node != self.nodeconstants.wdirid:
1629 raise error.LookupError(
1629 raise error.LookupError(
1630 node, self.display_id, _(b'no node')
1630 node, self.display_id, _(b'no node')
1631 )
1631 )
1632 except AttributeError:
1632 except AttributeError:
1633 # Fall through to pure code
1633 # Fall through to pure code
1634 pass
1634 pass
1635
1635
1636 if node == self.nodeconstants.wdirid:
1636 if node == self.nodeconstants.wdirid:
1637 for length in range(minlength, len(hexnode) + 1):
1637 for length in range(minlength, len(hexnode) + 1):
1638 prefix = hexnode[:length]
1638 prefix = hexnode[:length]
1639 if isvalid(prefix):
1639 if isvalid(prefix):
1640 return prefix
1640 return prefix
1641
1641
1642 for length in range(minlength, len(hexnode) + 1):
1642 for length in range(minlength, len(hexnode) + 1):
1643 prefix = hexnode[:length]
1643 prefix = hexnode[:length]
1644 if isvalid(prefix):
1644 if isvalid(prefix):
1645 return disambiguate(hexnode, length)
1645 return disambiguate(hexnode, length)
1646
1646
1647 def cmp(self, node, text):
1647 def cmp(self, node, text):
1648 """compare text with a given file revision
1648 """compare text with a given file revision
1649
1649
1650 returns True if text is different than what is stored.
1650 returns True if text is different than what is stored.
1651 """
1651 """
1652 p1, p2 = self.parents(node)
1652 p1, p2 = self.parents(node)
1653 return storageutil.hashrevisionsha1(text, p1, p2) != node
1653 return storageutil.hashrevisionsha1(text, p1, p2) != node
1654
1654
1655 def _cachesegment(self, offset, data):
1655 def _cachesegment(self, offset, data):
1656 """Add a segment to the revlog cache.
1656 """Add a segment to the revlog cache.
1657
1657
1658 Accepts an absolute offset and the data that is at that location.
1658 Accepts an absolute offset and the data that is at that location.
1659 """
1659 """
1660 o, d = self._chunkcache
1660 o, d = self._chunkcache
1661 # try to add to existing cache
1661 # try to add to existing cache
1662 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1662 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1663 self._chunkcache = o, d + data
1663 self._chunkcache = o, d + data
1664 else:
1664 else:
1665 self._chunkcache = offset, data
1665 self._chunkcache = offset, data
1666
1666
1667 def _readsegment(self, offset, length, df=None):
1667 def _readsegment(self, offset, length, df=None):
1668 """Load a segment of raw data from the revlog.
1668 """Load a segment of raw data from the revlog.
1669
1669
1670 Accepts an absolute offset, length to read, and an optional existing
1670 Accepts an absolute offset, length to read, and an optional existing
1671 file handle to read from.
1671 file handle to read from.
1672
1672
1673 If an existing file handle is passed, it will be seeked and the
1673 If an existing file handle is passed, it will be seeked and the
1674 original seek position will NOT be restored.
1674 original seek position will NOT be restored.
1675
1675
1676 Returns a str or buffer of raw byte data.
1676 Returns a str or buffer of raw byte data.
1677
1677
1678 Raises if the requested number of bytes could not be read.
1678 Raises if the requested number of bytes could not be read.
1679 """
1679 """
1680 # Cache data both forward and backward around the requested
1680 # Cache data both forward and backward around the requested
1681 # data, in a fixed size window. This helps speed up operations
1681 # data, in a fixed size window. This helps speed up operations
1682 # involving reading the revlog backwards.
1682 # involving reading the revlog backwards.
1683 cachesize = self._chunkcachesize
1683 cachesize = self._chunkcachesize
1684 realoffset = offset & ~(cachesize - 1)
1684 realoffset = offset & ~(cachesize - 1)
1685 reallength = (
1685 reallength = (
1686 (offset + length + cachesize) & ~(cachesize - 1)
1686 (offset + length + cachesize) & ~(cachesize - 1)
1687 ) - realoffset
1687 ) - realoffset
1688 with self._datareadfp(df) as df:
1688 with self._datareadfp(df) as df:
1689 df.seek(realoffset)
1689 df.seek(realoffset)
1690 d = df.read(reallength)
1690 d = df.read(reallength)
1691
1691
1692 self._cachesegment(realoffset, d)
1692 self._cachesegment(realoffset, d)
1693 if offset != realoffset or reallength != length:
1693 if offset != realoffset or reallength != length:
1694 startoffset = offset - realoffset
1694 startoffset = offset - realoffset
1695 if len(d) - startoffset < length:
1695 if len(d) - startoffset < length:
1696 raise error.RevlogError(
1696 raise error.RevlogError(
1697 _(
1697 _(
1698 b'partial read of revlog %s; expected %d bytes from '
1698 b'partial read of revlog %s; expected %d bytes from '
1699 b'offset %d, got %d'
1699 b'offset %d, got %d'
1700 )
1700 )
1701 % (
1701 % (
1702 self._indexfile if self._inline else self._datafile,
1702 self._indexfile if self._inline else self._datafile,
1703 length,
1703 length,
1704 offset,
1704 offset,
1705 len(d) - startoffset,
1705 len(d) - startoffset,
1706 )
1706 )
1707 )
1707 )
1708
1708
1709 return util.buffer(d, startoffset, length)
1709 return util.buffer(d, startoffset, length)
1710
1710
1711 if len(d) < length:
1711 if len(d) < length:
1712 raise error.RevlogError(
1712 raise error.RevlogError(
1713 _(
1713 _(
1714 b'partial read of revlog %s; expected %d bytes from offset '
1714 b'partial read of revlog %s; expected %d bytes from offset '
1715 b'%d, got %d'
1715 b'%d, got %d'
1716 )
1716 )
1717 % (
1717 % (
1718 self._indexfile if self._inline else self._datafile,
1718 self._indexfile if self._inline else self._datafile,
1719 length,
1719 length,
1720 offset,
1720 offset,
1721 len(d),
1721 len(d),
1722 )
1722 )
1723 )
1723 )
1724
1724
1725 return d
1725 return d
1726
1726
1727 def _getsegment(self, offset, length, df=None):
1727 def _getsegment(self, offset, length, df=None):
1728 """Obtain a segment of raw data from the revlog.
1728 """Obtain a segment of raw data from the revlog.
1729
1729
1730 Accepts an absolute offset, length of bytes to obtain, and an
1730 Accepts an absolute offset, length of bytes to obtain, and an
1731 optional file handle to the already-opened revlog. If the file
1731 optional file handle to the already-opened revlog. If the file
1732 handle is used, it's original seek position will not be preserved.
1732 handle is used, it's original seek position will not be preserved.
1733
1733
1734 Requests for data may be returned from a cache.
1734 Requests for data may be returned from a cache.
1735
1735
1736 Returns a str or a buffer instance of raw byte data.
1736 Returns a str or a buffer instance of raw byte data.
1737 """
1737 """
1738 o, d = self._chunkcache
1738 o, d = self._chunkcache
1739 l = len(d)
1739 l = len(d)
1740
1740
1741 # is it in the cache?
1741 # is it in the cache?
1742 cachestart = offset - o
1742 cachestart = offset - o
1743 cacheend = cachestart + length
1743 cacheend = cachestart + length
1744 if cachestart >= 0 and cacheend <= l:
1744 if cachestart >= 0 and cacheend <= l:
1745 if cachestart == 0 and cacheend == l:
1745 if cachestart == 0 and cacheend == l:
1746 return d # avoid a copy
1746 return d # avoid a copy
1747 return util.buffer(d, cachestart, cacheend - cachestart)
1747 return util.buffer(d, cachestart, cacheend - cachestart)
1748
1748
1749 return self._readsegment(offset, length, df=df)
1749 return self._readsegment(offset, length, df=df)
1750
1750
1751 def _getsegmentforrevs(self, startrev, endrev, df=None):
1751 def _getsegmentforrevs(self, startrev, endrev, df=None):
1752 """Obtain a segment of raw data corresponding to a range of revisions.
1752 """Obtain a segment of raw data corresponding to a range of revisions.
1753
1753
1754 Accepts the start and end revisions and an optional already-open
1754 Accepts the start and end revisions and an optional already-open
1755 file handle to be used for reading. If the file handle is read, its
1755 file handle to be used for reading. If the file handle is read, its
1756 seek position will not be preserved.
1756 seek position will not be preserved.
1757
1757
1758 Requests for data may be satisfied by a cache.
1758 Requests for data may be satisfied by a cache.
1759
1759
1760 Returns a 2-tuple of (offset, data) for the requested range of
1760 Returns a 2-tuple of (offset, data) for the requested range of
1761 revisions. Offset is the integer offset from the beginning of the
1761 revisions. Offset is the integer offset from the beginning of the
1762 revlog and data is a str or buffer of the raw byte data.
1762 revlog and data is a str or buffer of the raw byte data.
1763
1763
1764 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1764 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1765 to determine where each revision's data begins and ends.
1765 to determine where each revision's data begins and ends.
1766 """
1766 """
1767 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1767 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1768 # (functions are expensive).
1768 # (functions are expensive).
1769 index = self.index
1769 index = self.index
1770 istart = index[startrev]
1770 istart = index[startrev]
1771 start = int(istart[0] >> 16)
1771 start = int(istart[0] >> 16)
1772 if startrev == endrev:
1772 if startrev == endrev:
1773 end = start + istart[1]
1773 end = start + istart[1]
1774 else:
1774 else:
1775 iend = index[endrev]
1775 iend = index[endrev]
1776 end = int(iend[0] >> 16) + iend[1]
1776 end = int(iend[0] >> 16) + iend[1]
1777
1777
1778 if self._inline:
1778 if self._inline:
1779 start += (startrev + 1) * self.index.entry_size
1779 start += (startrev + 1) * self.index.entry_size
1780 end += (endrev + 1) * self.index.entry_size
1780 end += (endrev + 1) * self.index.entry_size
1781 length = end - start
1781 length = end - start
1782
1782
1783 return start, self._getsegment(start, length, df=df)
1783 return start, self._getsegment(start, length, df=df)
1784
1784
1785 def _chunk(self, rev, df=None):
1785 def _chunk(self, rev, df=None):
1786 """Obtain a single decompressed chunk for a revision.
1786 """Obtain a single decompressed chunk for a revision.
1787
1787
1788 Accepts an integer revision and an optional already-open file handle
1788 Accepts an integer revision and an optional already-open file handle
1789 to be used for reading. If used, the seek position of the file will not
1789 to be used for reading. If used, the seek position of the file will not
1790 be preserved.
1790 be preserved.
1791
1791
1792 Returns a str holding uncompressed data for the requested revision.
1792 Returns a str holding uncompressed data for the requested revision.
1793 """
1793 """
1794 compression_mode = self.index[rev][10]
1794 compression_mode = self.index[rev][10]
1795 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1795 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1796 if compression_mode == COMP_MODE_PLAIN:
1796 if compression_mode == COMP_MODE_PLAIN:
1797 return data
1797 return data
1798 elif compression_mode == COMP_MODE_DEFAULT:
1798 elif compression_mode == COMP_MODE_DEFAULT:
1799 return self._decompressor(data)
1799 return self._decompressor(data)
1800 elif compression_mode == COMP_MODE_INLINE:
1800 elif compression_mode == COMP_MODE_INLINE:
1801 return self.decompress(data)
1801 return self.decompress(data)
1802 else:
1802 else:
1803 msg = 'unknown compression mode %d'
1803 msg = 'unknown compression mode %d'
1804 msg %= compression_mode
1804 msg %= compression_mode
1805 raise error.RevlogError(msg)
1805 raise error.RevlogError(msg)
1806
1806
1807 def _chunks(self, revs, df=None, targetsize=None):
1807 def _chunks(self, revs, df=None, targetsize=None):
1808 """Obtain decompressed chunks for the specified revisions.
1808 """Obtain decompressed chunks for the specified revisions.
1809
1809
1810 Accepts an iterable of numeric revisions that are assumed to be in
1810 Accepts an iterable of numeric revisions that are assumed to be in
1811 ascending order. Also accepts an optional already-open file handle
1811 ascending order. Also accepts an optional already-open file handle
1812 to be used for reading. If used, the seek position of the file will
1812 to be used for reading. If used, the seek position of the file will
1813 not be preserved.
1813 not be preserved.
1814
1814
1815 This function is similar to calling ``self._chunk()`` multiple times,
1815 This function is similar to calling ``self._chunk()`` multiple times,
1816 but is faster.
1816 but is faster.
1817
1817
1818 Returns a list with decompressed data for each requested revision.
1818 Returns a list with decompressed data for each requested revision.
1819 """
1819 """
1820 if not revs:
1820 if not revs:
1821 return []
1821 return []
1822 start = self.start
1822 start = self.start
1823 length = self.length
1823 length = self.length
1824 inline = self._inline
1824 inline = self._inline
1825 iosize = self.index.entry_size
1825 iosize = self.index.entry_size
1826 buffer = util.buffer
1826 buffer = util.buffer
1827
1827
1828 l = []
1828 l = []
1829 ladd = l.append
1829 ladd = l.append
1830
1830
1831 if not self._withsparseread:
1831 if not self._withsparseread:
1832 slicedchunks = (revs,)
1832 slicedchunks = (revs,)
1833 else:
1833 else:
1834 slicedchunks = deltautil.slicechunk(
1834 slicedchunks = deltautil.slicechunk(
1835 self, revs, targetsize=targetsize
1835 self, revs, targetsize=targetsize
1836 )
1836 )
1837
1837
1838 for revschunk in slicedchunks:
1838 for revschunk in slicedchunks:
1839 firstrev = revschunk[0]
1839 firstrev = revschunk[0]
1840 # Skip trailing revisions with empty diff
1840 # Skip trailing revisions with empty diff
1841 for lastrev in revschunk[::-1]:
1841 for lastrev in revschunk[::-1]:
1842 if length(lastrev) != 0:
1842 if length(lastrev) != 0:
1843 break
1843 break
1844
1844
1845 try:
1845 try:
1846 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1846 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1847 except OverflowError:
1847 except OverflowError:
1848 # issue4215 - we can't cache a run of chunks greater than
1848 # issue4215 - we can't cache a run of chunks greater than
1849 # 2G on Windows
1849 # 2G on Windows
1850 return [self._chunk(rev, df=df) for rev in revschunk]
1850 return [self._chunk(rev, df=df) for rev in revschunk]
1851
1851
1852 decomp = self.decompress
1852 decomp = self.decompress
1853 # self._decompressor might be None, but will not be used in that case
1853 # self._decompressor might be None, but will not be used in that case
1854 def_decomp = self._decompressor
1854 def_decomp = self._decompressor
1855 for rev in revschunk:
1855 for rev in revschunk:
1856 chunkstart = start(rev)
1856 chunkstart = start(rev)
1857 if inline:
1857 if inline:
1858 chunkstart += (rev + 1) * iosize
1858 chunkstart += (rev + 1) * iosize
1859 chunklength = length(rev)
1859 chunklength = length(rev)
1860 comp_mode = self.index[rev][10]
1860 comp_mode = self.index[rev][10]
1861 c = buffer(data, chunkstart - offset, chunklength)
1861 c = buffer(data, chunkstart - offset, chunklength)
1862 if comp_mode == COMP_MODE_PLAIN:
1862 if comp_mode == COMP_MODE_PLAIN:
1863 ladd(c)
1863 ladd(c)
1864 elif comp_mode == COMP_MODE_INLINE:
1864 elif comp_mode == COMP_MODE_INLINE:
1865 ladd(decomp(c))
1865 ladd(decomp(c))
1866 elif comp_mode == COMP_MODE_DEFAULT:
1866 elif comp_mode == COMP_MODE_DEFAULT:
1867 ladd(def_decomp(c))
1867 ladd(def_decomp(c))
1868 else:
1868 else:
1869 msg = 'unknown compression mode %d'
1869 msg = 'unknown compression mode %d'
1870 msg %= comp_mode
1870 msg %= comp_mode
1871 raise error.RevlogError(msg)
1871 raise error.RevlogError(msg)
1872
1872
1873 return l
1873 return l
1874
1874
1875 def _chunkclear(self):
1875 def _chunkclear(self):
1876 """Clear the raw chunk cache."""
1876 """Clear the raw chunk cache."""
1877 self._chunkcache = (0, b'')
1877 self._chunkcache = (0, b'')
1878
1878
1879 def deltaparent(self, rev):
1879 def deltaparent(self, rev):
1880 """return deltaparent of the given revision"""
1880 """return deltaparent of the given revision"""
1881 base = self.index[rev][3]
1881 base = self.index[rev][3]
1882 if base == rev:
1882 if base == rev:
1883 return nullrev
1883 return nullrev
1884 elif self._generaldelta:
1884 elif self._generaldelta:
1885 return base
1885 return base
1886 else:
1886 else:
1887 return rev - 1
1887 return rev - 1
1888
1888
1889 def issnapshot(self, rev):
1889 def issnapshot(self, rev):
1890 """tells whether rev is a snapshot"""
1890 """tells whether rev is a snapshot"""
1891 if not self._sparserevlog:
1891 if not self._sparserevlog:
1892 return self.deltaparent(rev) == nullrev
1892 return self.deltaparent(rev) == nullrev
1893 elif util.safehasattr(self.index, b'issnapshot'):
1893 elif util.safehasattr(self.index, b'issnapshot'):
1894 # directly assign the method to cache the testing and access
1894 # directly assign the method to cache the testing and access
1895 self.issnapshot = self.index.issnapshot
1895 self.issnapshot = self.index.issnapshot
1896 return self.issnapshot(rev)
1896 return self.issnapshot(rev)
1897 if rev == nullrev:
1897 if rev == nullrev:
1898 return True
1898 return True
1899 entry = self.index[rev]
1899 entry = self.index[rev]
1900 base = entry[3]
1900 base = entry[3]
1901 if base == rev:
1901 if base == rev:
1902 return True
1902 return True
1903 if base == nullrev:
1903 if base == nullrev:
1904 return True
1904 return True
1905 p1 = entry[5]
1905 p1 = entry[5]
1906 p2 = entry[6]
1906 p2 = entry[6]
1907 if base == p1 or base == p2:
1907 if base == p1 or base == p2:
1908 return False
1908 return False
1909 return self.issnapshot(base)
1909 return self.issnapshot(base)
1910
1910
1911 def snapshotdepth(self, rev):
1911 def snapshotdepth(self, rev):
1912 """number of snapshot in the chain before this one"""
1912 """number of snapshot in the chain before this one"""
1913 if not self.issnapshot(rev):
1913 if not self.issnapshot(rev):
1914 raise error.ProgrammingError(b'revision %d not a snapshot')
1914 raise error.ProgrammingError(b'revision %d not a snapshot')
1915 return len(self._deltachain(rev)[0]) - 1
1915 return len(self._deltachain(rev)[0]) - 1
1916
1916
1917 def revdiff(self, rev1, rev2):
1917 def revdiff(self, rev1, rev2):
1918 """return or calculate a delta between two revisions
1918 """return or calculate a delta between two revisions
1919
1919
1920 The delta calculated is in binary form and is intended to be written to
1920 The delta calculated is in binary form and is intended to be written to
1921 revlog data directly. So this function needs raw revision data.
1921 revlog data directly. So this function needs raw revision data.
1922 """
1922 """
1923 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1923 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1924 return bytes(self._chunk(rev2))
1924 return bytes(self._chunk(rev2))
1925
1925
1926 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1926 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1927
1927
1928 def _processflags(self, text, flags, operation, raw=False):
1928 def _processflags(self, text, flags, operation, raw=False):
1929 """deprecated entry point to access flag processors"""
1929 """deprecated entry point to access flag processors"""
1930 msg = b'_processflag(...) use the specialized variant'
1930 msg = b'_processflag(...) use the specialized variant'
1931 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1931 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1932 if raw:
1932 if raw:
1933 return text, flagutil.processflagsraw(self, text, flags)
1933 return text, flagutil.processflagsraw(self, text, flags)
1934 elif operation == b'read':
1934 elif operation == b'read':
1935 return flagutil.processflagsread(self, text, flags)
1935 return flagutil.processflagsread(self, text, flags)
1936 else: # write operation
1936 else: # write operation
1937 return flagutil.processflagswrite(self, text, flags)
1937 return flagutil.processflagswrite(self, text, flags)
1938
1938
1939 def revision(self, nodeorrev, _df=None, raw=False):
1939 def revision(self, nodeorrev, _df=None, raw=False):
1940 """return an uncompressed revision of a given node or revision
1940 """return an uncompressed revision of a given node or revision
1941 number.
1941 number.
1942
1942
1943 _df - an existing file handle to read from. (internal-only)
1943 _df - an existing file handle to read from. (internal-only)
1944 raw - an optional argument specifying if the revision data is to be
1944 raw - an optional argument specifying if the revision data is to be
1945 treated as raw data when applying flag transforms. 'raw' should be set
1945 treated as raw data when applying flag transforms. 'raw' should be set
1946 to True when generating changegroups or in debug commands.
1946 to True when generating changegroups or in debug commands.
1947 """
1947 """
1948 if raw:
1948 if raw:
1949 msg = (
1949 msg = (
1950 b'revlog.revision(..., raw=True) is deprecated, '
1950 b'revlog.revision(..., raw=True) is deprecated, '
1951 b'use revlog.rawdata(...)'
1951 b'use revlog.rawdata(...)'
1952 )
1952 )
1953 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1953 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1954 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1954 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1955
1955
1956 def sidedata(self, nodeorrev, _df=None):
1956 def sidedata(self, nodeorrev, _df=None):
1957 """a map of extra data related to the changeset but not part of the hash
1957 """a map of extra data related to the changeset but not part of the hash
1958
1958
1959 This function currently return a dictionary. However, more advanced
1959 This function currently return a dictionary. However, more advanced
1960 mapping object will likely be used in the future for a more
1960 mapping object will likely be used in the future for a more
1961 efficient/lazy code.
1961 efficient/lazy code.
1962 """
1962 """
1963 return self._revisiondata(nodeorrev, _df)[1]
1963 return self._revisiondata(nodeorrev, _df)[1]
1964
1964
1965 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1965 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1966 # deal with <nodeorrev> argument type
1966 # deal with <nodeorrev> argument type
1967 if isinstance(nodeorrev, int):
1967 if isinstance(nodeorrev, int):
1968 rev = nodeorrev
1968 rev = nodeorrev
1969 node = self.node(rev)
1969 node = self.node(rev)
1970 else:
1970 else:
1971 node = nodeorrev
1971 node = nodeorrev
1972 rev = None
1972 rev = None
1973
1973
1974 # fast path the special `nullid` rev
1974 # fast path the special `nullid` rev
1975 if node == self.nullid:
1975 if node == self.nullid:
1976 return b"", {}
1976 return b"", {}
1977
1977
1978 # ``rawtext`` is the text as stored inside the revlog. Might be the
1978 # ``rawtext`` is the text as stored inside the revlog. Might be the
1979 # revision or might need to be processed to retrieve the revision.
1979 # revision or might need to be processed to retrieve the revision.
1980 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1980 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1981
1981
1982 if self.hassidedata:
1982 if self.hassidedata:
1983 if rev is None:
1983 if rev is None:
1984 rev = self.rev(node)
1984 rev = self.rev(node)
1985 sidedata = self._sidedata(rev)
1985 sidedata = self._sidedata(rev)
1986 else:
1986 else:
1987 sidedata = {}
1987 sidedata = {}
1988
1988
1989 if raw and validated:
1989 if raw and validated:
1990 # if we don't want to process the raw text and that raw
1990 # if we don't want to process the raw text and that raw
1991 # text is cached, we can exit early.
1991 # text is cached, we can exit early.
1992 return rawtext, sidedata
1992 return rawtext, sidedata
1993 if rev is None:
1993 if rev is None:
1994 rev = self.rev(node)
1994 rev = self.rev(node)
1995 # the revlog's flag for this revision
1995 # the revlog's flag for this revision
1996 # (usually alter its state or content)
1996 # (usually alter its state or content)
1997 flags = self.flags(rev)
1997 flags = self.flags(rev)
1998
1998
1999 if validated and flags == REVIDX_DEFAULT_FLAGS:
1999 if validated and flags == REVIDX_DEFAULT_FLAGS:
2000 # no extra flags set, no flag processor runs, text = rawtext
2000 # no extra flags set, no flag processor runs, text = rawtext
2001 return rawtext, sidedata
2001 return rawtext, sidedata
2002
2002
2003 if raw:
2003 if raw:
2004 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2004 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2005 text = rawtext
2005 text = rawtext
2006 else:
2006 else:
2007 r = flagutil.processflagsread(self, rawtext, flags)
2007 r = flagutil.processflagsread(self, rawtext, flags)
2008 text, validatehash = r
2008 text, validatehash = r
2009 if validatehash:
2009 if validatehash:
2010 self.checkhash(text, node, rev=rev)
2010 self.checkhash(text, node, rev=rev)
2011 if not validated:
2011 if not validated:
2012 self._revisioncache = (node, rev, rawtext)
2012 self._revisioncache = (node, rev, rawtext)
2013
2013
2014 return text, sidedata
2014 return text, sidedata
2015
2015
2016 def _rawtext(self, node, rev, _df=None):
2016 def _rawtext(self, node, rev, _df=None):
2017 """return the possibly unvalidated rawtext for a revision
2017 """return the possibly unvalidated rawtext for a revision
2018
2018
2019 returns (rev, rawtext, validated)
2019 returns (rev, rawtext, validated)
2020 """
2020 """
2021
2021
2022 # revision in the cache (could be useful to apply delta)
2022 # revision in the cache (could be useful to apply delta)
2023 cachedrev = None
2023 cachedrev = None
2024 # An intermediate text to apply deltas to
2024 # An intermediate text to apply deltas to
2025 basetext = None
2025 basetext = None
2026
2026
2027 # Check if we have the entry in cache
2027 # Check if we have the entry in cache
2028 # The cache entry looks like (node, rev, rawtext)
2028 # The cache entry looks like (node, rev, rawtext)
2029 if self._revisioncache:
2029 if self._revisioncache:
2030 if self._revisioncache[0] == node:
2030 if self._revisioncache[0] == node:
2031 return (rev, self._revisioncache[2], True)
2031 return (rev, self._revisioncache[2], True)
2032 cachedrev = self._revisioncache[1]
2032 cachedrev = self._revisioncache[1]
2033
2033
2034 if rev is None:
2034 if rev is None:
2035 rev = self.rev(node)
2035 rev = self.rev(node)
2036
2036
2037 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2037 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2038 if stopped:
2038 if stopped:
2039 basetext = self._revisioncache[2]
2039 basetext = self._revisioncache[2]
2040
2040
2041 # drop cache to save memory, the caller is expected to
2041 # drop cache to save memory, the caller is expected to
2042 # update self._revisioncache after validating the text
2042 # update self._revisioncache after validating the text
2043 self._revisioncache = None
2043 self._revisioncache = None
2044
2044
2045 targetsize = None
2045 targetsize = None
2046 rawsize = self.index[rev][2]
2046 rawsize = self.index[rev][2]
2047 if 0 <= rawsize:
2047 if 0 <= rawsize:
2048 targetsize = 4 * rawsize
2048 targetsize = 4 * rawsize
2049
2049
2050 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2050 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2051 if basetext is None:
2051 if basetext is None:
2052 basetext = bytes(bins[0])
2052 basetext = bytes(bins[0])
2053 bins = bins[1:]
2053 bins = bins[1:]
2054
2054
2055 rawtext = mdiff.patches(basetext, bins)
2055 rawtext = mdiff.patches(basetext, bins)
2056 del basetext # let us have a chance to free memory early
2056 del basetext # let us have a chance to free memory early
2057 return (rev, rawtext, False)
2057 return (rev, rawtext, False)
2058
2058
2059 def _sidedata(self, rev):
2059 def _sidedata(self, rev):
2060 """Return the sidedata for a given revision number."""
2060 """Return the sidedata for a given revision number."""
2061 index_entry = self.index[rev]
2061 index_entry = self.index[rev]
2062 sidedata_offset = index_entry[8]
2062 sidedata_offset = index_entry[8]
2063 sidedata_size = index_entry[9]
2063 sidedata_size = index_entry[9]
2064
2064
2065 if self._inline:
2065 if self._inline:
2066 sidedata_offset += self.index.entry_size * (1 + rev)
2066 sidedata_offset += self.index.entry_size * (1 + rev)
2067 if sidedata_size == 0:
2067 if sidedata_size == 0:
2068 return {}
2068 return {}
2069
2069
2070 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2070 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2071 comp = self.index[rev][11]
2071 comp = self.index[rev][11]
2072 if comp == COMP_MODE_PLAIN:
2072 if comp == COMP_MODE_PLAIN:
2073 segment = comp_segment
2073 segment = comp_segment
2074 elif comp == COMP_MODE_DEFAULT:
2074 elif comp == COMP_MODE_DEFAULT:
2075 segment = self._decompressor(comp_segment)
2075 segment = self._decompressor(comp_segment)
2076 elif comp == COMP_MODE_INLINE:
2076 elif comp == COMP_MODE_INLINE:
2077 segment = self.decompress(comp_segment)
2077 segment = self.decompress(comp_segment)
2078 else:
2078 else:
2079 msg = 'unknown compression mode %d'
2079 msg = 'unknown compression mode %d'
2080 msg %= comp
2080 msg %= comp
2081 raise error.RevlogError(msg)
2081 raise error.RevlogError(msg)
2082
2082
2083 sidedata = sidedatautil.deserialize_sidedata(segment)
2083 sidedata = sidedatautil.deserialize_sidedata(segment)
2084 return sidedata
2084 return sidedata
2085
2085
2086 def rawdata(self, nodeorrev, _df=None):
2086 def rawdata(self, nodeorrev, _df=None):
2087 """return an uncompressed raw data of a given node or revision number.
2087 """return an uncompressed raw data of a given node or revision number.
2088
2088
2089 _df - an existing file handle to read from. (internal-only)
2089 _df - an existing file handle to read from. (internal-only)
2090 """
2090 """
2091 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2091 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2092
2092
2093 def hash(self, text, p1, p2):
2093 def hash(self, text, p1, p2):
2094 """Compute a node hash.
2094 """Compute a node hash.
2095
2095
2096 Available as a function so that subclasses can replace the hash
2096 Available as a function so that subclasses can replace the hash
2097 as needed.
2097 as needed.
2098 """
2098 """
2099 return storageutil.hashrevisionsha1(text, p1, p2)
2099 return storageutil.hashrevisionsha1(text, p1, p2)
2100
2100
2101 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2101 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2102 """Check node hash integrity.
2102 """Check node hash integrity.
2103
2103
2104 Available as a function so that subclasses can extend hash mismatch
2104 Available as a function so that subclasses can extend hash mismatch
2105 behaviors as needed.
2105 behaviors as needed.
2106 """
2106 """
2107 try:
2107 try:
2108 if p1 is None and p2 is None:
2108 if p1 is None and p2 is None:
2109 p1, p2 = self.parents(node)
2109 p1, p2 = self.parents(node)
2110 if node != self.hash(text, p1, p2):
2110 if node != self.hash(text, p1, p2):
2111 # Clear the revision cache on hash failure. The revision cache
2111 # Clear the revision cache on hash failure. The revision cache
2112 # only stores the raw revision and clearing the cache does have
2112 # only stores the raw revision and clearing the cache does have
2113 # the side-effect that we won't have a cache hit when the raw
2113 # the side-effect that we won't have a cache hit when the raw
2114 # revision data is accessed. But this case should be rare and
2114 # revision data is accessed. But this case should be rare and
2115 # it is extra work to teach the cache about the hash
2115 # it is extra work to teach the cache about the hash
2116 # verification state.
2116 # verification state.
2117 if self._revisioncache and self._revisioncache[0] == node:
2117 if self._revisioncache and self._revisioncache[0] == node:
2118 self._revisioncache = None
2118 self._revisioncache = None
2119
2119
2120 revornode = rev
2120 revornode = rev
2121 if revornode is None:
2121 if revornode is None:
2122 revornode = templatefilters.short(hex(node))
2122 revornode = templatefilters.short(hex(node))
2123 raise error.RevlogError(
2123 raise error.RevlogError(
2124 _(b"integrity check failed on %s:%s")
2124 _(b"integrity check failed on %s:%s")
2125 % (self.display_id, pycompat.bytestr(revornode))
2125 % (self.display_id, pycompat.bytestr(revornode))
2126 )
2126 )
2127 except error.RevlogError:
2127 except error.RevlogError:
2128 if self._censorable and storageutil.iscensoredtext(text):
2128 if self._censorable and storageutil.iscensoredtext(text):
2129 raise error.CensoredNodeError(self.display_id, node, text)
2129 raise error.CensoredNodeError(self.display_id, node, text)
2130 raise
2130 raise
2131
2131
2132 def _enforceinlinesize(self, tr):
2132 def _enforceinlinesize(self, tr):
2133 """Check if the revlog is too big for inline and convert if so.
2133 """Check if the revlog is too big for inline and convert if so.
2134
2134
2135 This should be called after revisions are added to the revlog. If the
2135 This should be called after revisions are added to the revlog. If the
2136 revlog has grown too large to be an inline revlog, it will convert it
2136 revlog has grown too large to be an inline revlog, it will convert it
2137 to use multiple index and data files.
2137 to use multiple index and data files.
2138 """
2138 """
2139 tiprev = len(self) - 1
2139 tiprev = len(self) - 1
2140 total_size = self.start(tiprev) + self.length(tiprev)
2140 total_size = self.start(tiprev) + self.length(tiprev)
2141 if not self._inline or total_size < _maxinline:
2141 if not self._inline or total_size < _maxinline:
2142 return
2142 return
2143
2143
2144 troffset = tr.findoffset(self._indexfile)
2144 troffset = tr.findoffset(self._indexfile)
2145 if troffset is None:
2145 if troffset is None:
2146 raise error.RevlogError(
2146 raise error.RevlogError(
2147 _(b"%s not found in the transaction") % self._indexfile
2147 _(b"%s not found in the transaction") % self._indexfile
2148 )
2148 )
2149 trindex = 0
2149 trindex = 0
2150 tr.add(self._datafile, 0)
2150 tr.add(self._datafile, 0)
2151
2151
2152 existing_handles = False
2152 existing_handles = False
2153 if self._writinghandles is not None:
2153 if self._writinghandles is not None:
2154 existing_handles = True
2154 existing_handles = True
2155 fp = self._writinghandles[0]
2155 fp = self._writinghandles[0]
2156 fp.flush()
2156 fp.flush()
2157 fp.close()
2157 fp.close()
2158 # We can't use the cached file handle after close(). So prevent
2158 # We can't use the cached file handle after close(). So prevent
2159 # its usage.
2159 # its usage.
2160 self._writinghandles = None
2160 self._writinghandles = None
2161
2161
2162 new_dfh = self._datafp(b'w+')
2162 new_dfh = self._datafp(b'w+')
2163 new_dfh.truncate(0) # drop any potentially existing data
2163 new_dfh.truncate(0) # drop any potentially existing data
2164 try:
2164 try:
2165 with self._indexfp() as read_ifh:
2165 with self._indexfp() as read_ifh:
2166 for r in self:
2166 for r in self:
2167 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2167 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2168 if troffset <= self.start(r):
2168 if troffset <= self.start(r):
2169 trindex = r
2169 trindex = r
2170 new_dfh.flush()
2170 new_dfh.flush()
2171
2171
2172 with self.__index_new_fp() as fp:
2172 with self.__index_new_fp() as fp:
2173 self._format_flags &= ~FLAG_INLINE_DATA
2173 self._format_flags &= ~FLAG_INLINE_DATA
2174 self._inline = False
2174 self._inline = False
2175 for i in self:
2175 for i in self:
2176 e = self.index.entry_binary(i)
2176 e = self.index.entry_binary(i)
2177 if i == 0 and self._docket is None:
2177 if i == 0 and self._docket is None:
2178 header = self._format_flags | self._format_version
2178 header = self._format_flags | self._format_version
2179 header = self.index.pack_header(header)
2179 header = self.index.pack_header(header)
2180 e = header + e
2180 e = header + e
2181 fp.write(e)
2181 fp.write(e)
2182 if self._docket is not None:
2182 if self._docket is not None:
2183 self._docket.index_end = fp.tell()
2183 self._docket.index_end = fp.tell()
2184 # the temp file replace the real index when we exit the context
2184 # the temp file replace the real index when we exit the context
2185 # manager
2185 # manager
2186
2186
2187 tr.replace(self._indexfile, trindex * self.index.entry_size)
2187 tr.replace(self._indexfile, trindex * self.index.entry_size)
2188 nodemaputil.setup_persistent_nodemap(tr, self)
2188 nodemaputil.setup_persistent_nodemap(tr, self)
2189 self._chunkclear()
2189 self._chunkclear()
2190
2190
2191 if existing_handles:
2191 if existing_handles:
2192 # switched from inline to conventional reopen the index
2192 # switched from inline to conventional reopen the index
2193 ifh = self.__index_write_fp()
2193 ifh = self.__index_write_fp()
2194 self._writinghandles = (ifh, new_dfh)
2194 self._writinghandles = (ifh, new_dfh)
2195 new_dfh = None
2195 new_dfh = None
2196 finally:
2196 finally:
2197 if new_dfh is not None:
2197 if new_dfh is not None:
2198 new_dfh.close()
2198 new_dfh.close()
2199
2199
2200 def _nodeduplicatecallback(self, transaction, node):
2200 def _nodeduplicatecallback(self, transaction, node):
2201 """called when trying to add a node already stored."""
2201 """called when trying to add a node already stored."""
2202
2202
2203 @contextlib.contextmanager
2203 @contextlib.contextmanager
2204 def _writing(self, transaction):
2204 def _writing(self, transaction):
2205 if self._trypending:
2205 if self._trypending:
2206 msg = b'try to write in a `trypending` revlog: %s'
2206 msg = b'try to write in a `trypending` revlog: %s'
2207 msg %= self.display_id
2207 msg %= self.display_id
2208 raise error.ProgrammingError(msg)
2208 raise error.ProgrammingError(msg)
2209 if self._writinghandles is not None:
2209 if self._writinghandles is not None:
2210 yield
2210 yield
2211 else:
2211 else:
2212 r = len(self)
2212 r = len(self)
2213 dsize = 0
2213 dsize = 0
2214 if r:
2214 if r:
2215 dsize = self.end(r - 1)
2215 dsize = self.end(r - 1)
2216 dfh = None
2216 dfh = None
2217 if not self._inline:
2217 if not self._inline:
2218 try:
2218 try:
2219 dfh = self._datafp(b"r+")
2219 dfh = self._datafp(b"r+")
2220 if self._docket is None:
2220 if self._docket is None:
2221 dfh.seek(0, os.SEEK_END)
2221 dfh.seek(0, os.SEEK_END)
2222 else:
2222 else:
2223 dfh.seek(self._docket.data_end, os.SEEK_SET)
2223 dfh.seek(self._docket.data_end, os.SEEK_SET)
2224 except IOError as inst:
2224 except IOError as inst:
2225 if inst.errno != errno.ENOENT:
2225 if inst.errno != errno.ENOENT:
2226 raise
2226 raise
2227 dfh = self._datafp(b"w+")
2227 dfh = self._datafp(b"w+")
2228 transaction.add(self._datafile, dsize)
2228 transaction.add(self._datafile, dsize)
2229 try:
2229 try:
2230 isize = r * self.index.entry_size
2230 isize = r * self.index.entry_size
2231 ifh = self.__index_write_fp()
2231 ifh = self.__index_write_fp()
2232 if self._inline:
2232 if self._inline:
2233 transaction.add(self._indexfile, dsize + isize)
2233 transaction.add(self._indexfile, dsize + isize)
2234 else:
2234 else:
2235 transaction.add(self._indexfile, isize)
2235 transaction.add(self._indexfile, isize)
2236 try:
2236 try:
2237 self._writinghandles = (ifh, dfh)
2237 self._writinghandles = (ifh, dfh)
2238 try:
2238 try:
2239 yield
2239 yield
2240 if self._docket is not None:
2240 if self._docket is not None:
2241 self._write_docket(transaction)
2241 self._write_docket(transaction)
2242 finally:
2242 finally:
2243 self._writinghandles = None
2243 self._writinghandles = None
2244 finally:
2244 finally:
2245 ifh.close()
2245 ifh.close()
2246 finally:
2246 finally:
2247 if dfh is not None:
2247 if dfh is not None:
2248 dfh.close()
2248 dfh.close()
2249
2249
2250 def _write_docket(self, transaction):
2250 def _write_docket(self, transaction):
2251 """write the current docket on disk
2251 """write the current docket on disk
2252
2252
2253 Exist as a method to help changelog to implement transaction logic
2253 Exist as a method to help changelog to implement transaction logic
2254
2254
2255 We could also imagine using the same transaction logic for all revlog
2255 We could also imagine using the same transaction logic for all revlog
2256 since docket are cheap."""
2256 since docket are cheap."""
2257 self._docket.write(transaction)
2257 self._docket.write(transaction)
2258
2258
2259 def addrevision(
2259 def addrevision(
2260 self,
2260 self,
2261 text,
2261 text,
2262 transaction,
2262 transaction,
2263 link,
2263 link,
2264 p1,
2264 p1,
2265 p2,
2265 p2,
2266 cachedelta=None,
2266 cachedelta=None,
2267 node=None,
2267 node=None,
2268 flags=REVIDX_DEFAULT_FLAGS,
2268 flags=REVIDX_DEFAULT_FLAGS,
2269 deltacomputer=None,
2269 deltacomputer=None,
2270 sidedata=None,
2270 sidedata=None,
2271 ):
2271 ):
2272 """add a revision to the log
2272 """add a revision to the log
2273
2273
2274 text - the revision data to add
2274 text - the revision data to add
2275 transaction - the transaction object used for rollback
2275 transaction - the transaction object used for rollback
2276 link - the linkrev data to add
2276 link - the linkrev data to add
2277 p1, p2 - the parent nodeids of the revision
2277 p1, p2 - the parent nodeids of the revision
2278 cachedelta - an optional precomputed delta
2278 cachedelta - an optional precomputed delta
2279 node - nodeid of revision; typically node is not specified, and it is
2279 node - nodeid of revision; typically node is not specified, and it is
2280 computed by default as hash(text, p1, p2), however subclasses might
2280 computed by default as hash(text, p1, p2), however subclasses might
2281 use different hashing method (and override checkhash() in such case)
2281 use different hashing method (and override checkhash() in such case)
2282 flags - the known flags to set on the revision
2282 flags - the known flags to set on the revision
2283 deltacomputer - an optional deltacomputer instance shared between
2283 deltacomputer - an optional deltacomputer instance shared between
2284 multiple calls
2284 multiple calls
2285 """
2285 """
2286 if link == nullrev:
2286 if link == nullrev:
2287 raise error.RevlogError(
2287 raise error.RevlogError(
2288 _(b"attempted to add linkrev -1 to %s") % self.display_id
2288 _(b"attempted to add linkrev -1 to %s") % self.display_id
2289 )
2289 )
2290
2290
2291 if sidedata is None:
2291 if sidedata is None:
2292 sidedata = {}
2292 sidedata = {}
2293 elif sidedata and not self.hassidedata:
2293 elif sidedata and not self.hassidedata:
2294 raise error.ProgrammingError(
2294 raise error.ProgrammingError(
2295 _(b"trying to add sidedata to a revlog who don't support them")
2295 _(b"trying to add sidedata to a revlog who don't support them")
2296 )
2296 )
2297
2297
2298 if flags:
2298 if flags:
2299 node = node or self.hash(text, p1, p2)
2299 node = node or self.hash(text, p1, p2)
2300
2300
2301 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2301 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2302
2302
2303 # If the flag processor modifies the revision data, ignore any provided
2303 # If the flag processor modifies the revision data, ignore any provided
2304 # cachedelta.
2304 # cachedelta.
2305 if rawtext != text:
2305 if rawtext != text:
2306 cachedelta = None
2306 cachedelta = None
2307
2307
2308 if len(rawtext) > _maxentrysize:
2308 if len(rawtext) > _maxentrysize:
2309 raise error.RevlogError(
2309 raise error.RevlogError(
2310 _(
2310 _(
2311 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2311 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2312 )
2312 )
2313 % (self.display_id, len(rawtext))
2313 % (self.display_id, len(rawtext))
2314 )
2314 )
2315
2315
2316 node = node or self.hash(rawtext, p1, p2)
2316 node = node or self.hash(rawtext, p1, p2)
2317 rev = self.index.get_rev(node)
2317 rev = self.index.get_rev(node)
2318 if rev is not None:
2318 if rev is not None:
2319 return rev
2319 return rev
2320
2320
2321 if validatehash:
2321 if validatehash:
2322 self.checkhash(rawtext, node, p1=p1, p2=p2)
2322 self.checkhash(rawtext, node, p1=p1, p2=p2)
2323
2323
2324 return self.addrawrevision(
2324 return self.addrawrevision(
2325 rawtext,
2325 rawtext,
2326 transaction,
2326 transaction,
2327 link,
2327 link,
2328 p1,
2328 p1,
2329 p2,
2329 p2,
2330 node,
2330 node,
2331 flags,
2331 flags,
2332 cachedelta=cachedelta,
2332 cachedelta=cachedelta,
2333 deltacomputer=deltacomputer,
2333 deltacomputer=deltacomputer,
2334 sidedata=sidedata,
2334 sidedata=sidedata,
2335 )
2335 )
2336
2336
2337 def addrawrevision(
2337 def addrawrevision(
2338 self,
2338 self,
2339 rawtext,
2339 rawtext,
2340 transaction,
2340 transaction,
2341 link,
2341 link,
2342 p1,
2342 p1,
2343 p2,
2343 p2,
2344 node,
2344 node,
2345 flags,
2345 flags,
2346 cachedelta=None,
2346 cachedelta=None,
2347 deltacomputer=None,
2347 deltacomputer=None,
2348 sidedata=None,
2348 sidedata=None,
2349 ):
2349 ):
2350 """add a raw revision with known flags, node and parents
2350 """add a raw revision with known flags, node and parents
2351 useful when reusing a revision not stored in this revlog (ex: received
2351 useful when reusing a revision not stored in this revlog (ex: received
2352 over wire, or read from an external bundle).
2352 over wire, or read from an external bundle).
2353 """
2353 """
2354 with self._writing(transaction):
2354 with self._writing(transaction):
2355 return self._addrevision(
2355 return self._addrevision(
2356 node,
2356 node,
2357 rawtext,
2357 rawtext,
2358 transaction,
2358 transaction,
2359 link,
2359 link,
2360 p1,
2360 p1,
2361 p2,
2361 p2,
2362 flags,
2362 flags,
2363 cachedelta,
2363 cachedelta,
2364 deltacomputer=deltacomputer,
2364 deltacomputer=deltacomputer,
2365 sidedata=sidedata,
2365 sidedata=sidedata,
2366 )
2366 )
2367
2367
2368 def compress(self, data):
2368 def compress(self, data):
2369 """Generate a possibly-compressed representation of data."""
2369 """Generate a possibly-compressed representation of data."""
2370 if not data:
2370 if not data:
2371 return b'', data
2371 return b'', data
2372
2372
2373 compressed = self._compressor.compress(data)
2373 compressed = self._compressor.compress(data)
2374
2374
2375 if compressed:
2375 if compressed:
2376 # The revlog compressor added the header in the returned data.
2376 # The revlog compressor added the header in the returned data.
2377 return b'', compressed
2377 return b'', compressed
2378
2378
2379 if data[0:1] == b'\0':
2379 if data[0:1] == b'\0':
2380 return b'', data
2380 return b'', data
2381 return b'u', data
2381 return b'u', data
2382
2382
2383 def decompress(self, data):
2383 def decompress(self, data):
2384 """Decompress a revlog chunk.
2384 """Decompress a revlog chunk.
2385
2385
2386 The chunk is expected to begin with a header identifying the
2386 The chunk is expected to begin with a header identifying the
2387 format type so it can be routed to an appropriate decompressor.
2387 format type so it can be routed to an appropriate decompressor.
2388 """
2388 """
2389 if not data:
2389 if not data:
2390 return data
2390 return data
2391
2391
2392 # Revlogs are read much more frequently than they are written and many
2392 # Revlogs are read much more frequently than they are written and many
2393 # chunks only take microseconds to decompress, so performance is
2393 # chunks only take microseconds to decompress, so performance is
2394 # important here.
2394 # important here.
2395 #
2395 #
2396 # We can make a few assumptions about revlogs:
2396 # We can make a few assumptions about revlogs:
2397 #
2397 #
2398 # 1) the majority of chunks will be compressed (as opposed to inline
2398 # 1) the majority of chunks will be compressed (as opposed to inline
2399 # raw data).
2399 # raw data).
2400 # 2) decompressing *any* data will likely by at least 10x slower than
2400 # 2) decompressing *any* data will likely by at least 10x slower than
2401 # returning raw inline data.
2401 # returning raw inline data.
2402 # 3) we want to prioritize common and officially supported compression
2402 # 3) we want to prioritize common and officially supported compression
2403 # engines
2403 # engines
2404 #
2404 #
2405 # It follows that we want to optimize for "decompress compressed data
2405 # It follows that we want to optimize for "decompress compressed data
2406 # when encoded with common and officially supported compression engines"
2406 # when encoded with common and officially supported compression engines"
2407 # case over "raw data" and "data encoded by less common or non-official
2407 # case over "raw data" and "data encoded by less common or non-official
2408 # compression engines." That is why we have the inline lookup first
2408 # compression engines." That is why we have the inline lookup first
2409 # followed by the compengines lookup.
2409 # followed by the compengines lookup.
2410 #
2410 #
2411 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2411 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2412 # compressed chunks. And this matters for changelog and manifest reads.
2412 # compressed chunks. And this matters for changelog and manifest reads.
2413 t = data[0:1]
2413 t = data[0:1]
2414
2414
2415 if t == b'x':
2415 if t == b'x':
2416 try:
2416 try:
2417 return _zlibdecompress(data)
2417 return _zlibdecompress(data)
2418 except zlib.error as e:
2418 except zlib.error as e:
2419 raise error.RevlogError(
2419 raise error.RevlogError(
2420 _(b'revlog decompress error: %s')
2420 _(b'revlog decompress error: %s')
2421 % stringutil.forcebytestr(e)
2421 % stringutil.forcebytestr(e)
2422 )
2422 )
2423 # '\0' is more common than 'u' so it goes first.
2423 # '\0' is more common than 'u' so it goes first.
2424 elif t == b'\0':
2424 elif t == b'\0':
2425 return data
2425 return data
2426 elif t == b'u':
2426 elif t == b'u':
2427 return util.buffer(data, 1)
2427 return util.buffer(data, 1)
2428
2428
2429 compressor = self._get_decompressor(t)
2429 compressor = self._get_decompressor(t)
2430
2430
2431 return compressor.decompress(data)
2431 return compressor.decompress(data)
2432
2432
2433 def _addrevision(
2433 def _addrevision(
2434 self,
2434 self,
2435 node,
2435 node,
2436 rawtext,
2436 rawtext,
2437 transaction,
2437 transaction,
2438 link,
2438 link,
2439 p1,
2439 p1,
2440 p2,
2440 p2,
2441 flags,
2441 flags,
2442 cachedelta,
2442 cachedelta,
2443 alwayscache=False,
2443 alwayscache=False,
2444 deltacomputer=None,
2444 deltacomputer=None,
2445 sidedata=None,
2445 sidedata=None,
2446 ):
2446 ):
2447 """internal function to add revisions to the log
2447 """internal function to add revisions to the log
2448
2448
2449 see addrevision for argument descriptions.
2449 see addrevision for argument descriptions.
2450
2450
2451 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2451 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2452
2452
2453 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2453 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2454 be used.
2454 be used.
2455
2455
2456 invariants:
2456 invariants:
2457 - rawtext is optional (can be None); if not set, cachedelta must be set.
2457 - rawtext is optional (can be None); if not set, cachedelta must be set.
2458 if both are set, they must correspond to each other.
2458 if both are set, they must correspond to each other.
2459 """
2459 """
2460 if node == self.nullid:
2460 if node == self.nullid:
2461 raise error.RevlogError(
2461 raise error.RevlogError(
2462 _(b"%s: attempt to add null revision") % self.display_id
2462 _(b"%s: attempt to add null revision") % self.display_id
2463 )
2463 )
2464 if (
2464 if (
2465 node == self.nodeconstants.wdirid
2465 node == self.nodeconstants.wdirid
2466 or node in self.nodeconstants.wdirfilenodeids
2466 or node in self.nodeconstants.wdirfilenodeids
2467 ):
2467 ):
2468 raise error.RevlogError(
2468 raise error.RevlogError(
2469 _(b"%s: attempt to add wdir revision") % self.display_id
2469 _(b"%s: attempt to add wdir revision") % self.display_id
2470 )
2470 )
2471 if self._writinghandles is None:
2471 if self._writinghandles is None:
2472 msg = b'adding revision outside `revlog._writing` context'
2472 msg = b'adding revision outside `revlog._writing` context'
2473 raise error.ProgrammingError(msg)
2473 raise error.ProgrammingError(msg)
2474
2474
2475 if self._inline:
2475 if self._inline:
2476 fh = self._writinghandles[0]
2476 fh = self._writinghandles[0]
2477 else:
2477 else:
2478 fh = self._writinghandles[1]
2478 fh = self._writinghandles[1]
2479
2479
2480 btext = [rawtext]
2480 btext = [rawtext]
2481
2481
2482 curr = len(self)
2482 curr = len(self)
2483 prev = curr - 1
2483 prev = curr - 1
2484
2484
2485 offset = self._get_data_offset(prev)
2485 offset = self._get_data_offset(prev)
2486
2486
2487 if self._concurrencychecker:
2487 if self._concurrencychecker:
2488 ifh, dfh = self._writinghandles
2488 ifh, dfh = self._writinghandles
2489 if self._inline:
2489 if self._inline:
2490 # offset is "as if" it were in the .d file, so we need to add on
2490 # offset is "as if" it were in the .d file, so we need to add on
2491 # the size of the entry metadata.
2491 # the size of the entry metadata.
2492 self._concurrencychecker(
2492 self._concurrencychecker(
2493 ifh, self._indexfile, offset + curr * self.index.entry_size
2493 ifh, self._indexfile, offset + curr * self.index.entry_size
2494 )
2494 )
2495 else:
2495 else:
2496 # Entries in the .i are a consistent size.
2496 # Entries in the .i are a consistent size.
2497 self._concurrencychecker(
2497 self._concurrencychecker(
2498 ifh, self._indexfile, curr * self.index.entry_size
2498 ifh, self._indexfile, curr * self.index.entry_size
2499 )
2499 )
2500 self._concurrencychecker(dfh, self._datafile, offset)
2500 self._concurrencychecker(dfh, self._datafile, offset)
2501
2501
2502 p1r, p2r = self.rev(p1), self.rev(p2)
2502 p1r, p2r = self.rev(p1), self.rev(p2)
2503
2503
2504 # full versions are inserted when the needed deltas
2504 # full versions are inserted when the needed deltas
2505 # become comparable to the uncompressed text
2505 # become comparable to the uncompressed text
2506 if rawtext is None:
2506 if rawtext is None:
2507 # need rawtext size, before changed by flag processors, which is
2507 # need rawtext size, before changed by flag processors, which is
2508 # the non-raw size. use revlog explicitly to avoid filelog's extra
2508 # the non-raw size. use revlog explicitly to avoid filelog's extra
2509 # logic that might remove metadata size.
2509 # logic that might remove metadata size.
2510 textlen = mdiff.patchedsize(
2510 textlen = mdiff.patchedsize(
2511 revlog.size(self, cachedelta[0]), cachedelta[1]
2511 revlog.size(self, cachedelta[0]), cachedelta[1]
2512 )
2512 )
2513 else:
2513 else:
2514 textlen = len(rawtext)
2514 textlen = len(rawtext)
2515
2515
2516 if deltacomputer is None:
2516 if deltacomputer is None:
2517 deltacomputer = deltautil.deltacomputer(self)
2517 deltacomputer = deltautil.deltacomputer(self)
2518
2518
2519 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2519 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2520
2520
2521 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2521 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2522
2522
2523 compression_mode = COMP_MODE_INLINE
2523 compression_mode = COMP_MODE_INLINE
2524 if self._docket is not None:
2524 if self._docket is not None:
2525 h, d = deltainfo.data
2525 h, d = deltainfo.data
2526 if not h and not d:
2526 if not h and not d:
2527 # not data to store at all... declare them uncompressed
2527 # not data to store at all... declare them uncompressed
2528 compression_mode = COMP_MODE_PLAIN
2528 compression_mode = COMP_MODE_PLAIN
2529 elif not h:
2529 elif not h:
2530 t = d[0:1]
2530 t = d[0:1]
2531 if t == b'\0':
2531 if t == b'\0':
2532 compression_mode = COMP_MODE_PLAIN
2532 compression_mode = COMP_MODE_PLAIN
2533 elif t == self._docket.default_compression_header:
2533 elif t == self._docket.default_compression_header:
2534 compression_mode = COMP_MODE_DEFAULT
2534 compression_mode = COMP_MODE_DEFAULT
2535 elif h == b'u':
2535 elif h == b'u':
2536 # we have a more efficient way to declare uncompressed
2536 # we have a more efficient way to declare uncompressed
2537 h = b''
2537 h = b''
2538 compression_mode = COMP_MODE_PLAIN
2538 compression_mode = COMP_MODE_PLAIN
2539 deltainfo = deltautil.drop_u_compression(deltainfo)
2539 deltainfo = deltautil.drop_u_compression(deltainfo)
2540
2540
2541 sidedata_compression_mode = COMP_MODE_INLINE
2541 sidedata_compression_mode = COMP_MODE_INLINE
2542 if sidedata and self.hassidedata:
2542 if sidedata and self.hassidedata:
2543 sidedata_compression_mode = COMP_MODE_PLAIN
2543 sidedata_compression_mode = COMP_MODE_PLAIN
2544 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2544 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2545 sidedata_offset = offset + deltainfo.deltalen
2545 sidedata_offset = offset + deltainfo.deltalen
2546 h, comp_sidedata = self.compress(serialized_sidedata)
2546 h, comp_sidedata = self.compress(serialized_sidedata)
2547 if (
2547 if (
2548 h != b'u'
2548 h != b'u'
2549 and comp_sidedata[0:1] != b'\0'
2549 and comp_sidedata[0:1] != b'\0'
2550 and len(comp_sidedata) < len(serialized_sidedata)
2550 and len(comp_sidedata) < len(serialized_sidedata)
2551 ):
2551 ):
2552 assert not h
2552 assert not h
2553 if (
2553 if (
2554 comp_sidedata[0:1]
2554 comp_sidedata[0:1]
2555 == self._docket.default_compression_header
2555 == self._docket.default_compression_header
2556 ):
2556 ):
2557 sidedata_compression_mode = COMP_MODE_DEFAULT
2557 sidedata_compression_mode = COMP_MODE_DEFAULT
2558 serialized_sidedata = comp_sidedata
2558 serialized_sidedata = comp_sidedata
2559 else:
2559 else:
2560 sidedata_compression_mode = COMP_MODE_INLINE
2560 sidedata_compression_mode = COMP_MODE_INLINE
2561 serialized_sidedata = comp_sidedata
2561 serialized_sidedata = comp_sidedata
2562 else:
2562 else:
2563 serialized_sidedata = b""
2563 serialized_sidedata = b""
2564 # Don't store the offset if the sidedata is empty, that way
2564 # Don't store the offset if the sidedata is empty, that way
2565 # we can easily detect empty sidedata and they will be no different
2565 # we can easily detect empty sidedata and they will be no different
2566 # than ones we manually add.
2566 # than ones we manually add.
2567 sidedata_offset = 0
2567 sidedata_offset = 0
2568
2568
2569 e = (
2569 e = (
2570 offset_type(offset, flags),
2570 offset_type(offset, flags),
2571 deltainfo.deltalen,
2571 deltainfo.deltalen,
2572 textlen,
2572 textlen,
2573 deltainfo.base,
2573 deltainfo.base,
2574 link,
2574 link,
2575 p1r,
2575 p1r,
2576 p2r,
2576 p2r,
2577 node,
2577 node,
2578 sidedata_offset,
2578 sidedata_offset,
2579 len(serialized_sidedata),
2579 len(serialized_sidedata),
2580 compression_mode,
2580 compression_mode,
2581 sidedata_compression_mode,
2581 sidedata_compression_mode,
2582 )
2582 )
2583
2583
2584 self.index.append(e)
2584 self.index.append(e)
2585 entry = self.index.entry_binary(curr)
2585 entry = self.index.entry_binary(curr)
2586 if curr == 0 and self._docket is None:
2586 if curr == 0 and self._docket is None:
2587 header = self._format_flags | self._format_version
2587 header = self._format_flags | self._format_version
2588 header = self.index.pack_header(header)
2588 header = self.index.pack_header(header)
2589 entry = header + entry
2589 entry = header + entry
2590 self._writeentry(
2590 self._writeentry(
2591 transaction,
2591 transaction,
2592 entry,
2592 entry,
2593 deltainfo.data,
2593 deltainfo.data,
2594 link,
2594 link,
2595 offset,
2595 offset,
2596 serialized_sidedata,
2596 serialized_sidedata,
2597 )
2597 )
2598
2598
2599 rawtext = btext[0]
2599 rawtext = btext[0]
2600
2600
2601 if alwayscache and rawtext is None:
2601 if alwayscache and rawtext is None:
2602 rawtext = deltacomputer.buildtext(revinfo, fh)
2602 rawtext = deltacomputer.buildtext(revinfo, fh)
2603
2603
2604 if type(rawtext) == bytes: # only accept immutable objects
2604 if type(rawtext) == bytes: # only accept immutable objects
2605 self._revisioncache = (node, curr, rawtext)
2605 self._revisioncache = (node, curr, rawtext)
2606 self._chainbasecache[curr] = deltainfo.chainbase
2606 self._chainbasecache[curr] = deltainfo.chainbase
2607 return curr
2607 return curr
2608
2608
2609 def _get_data_offset(self, prev):
2609 def _get_data_offset(self, prev):
2610 """Returns the current offset in the (in-transaction) data file.
2610 """Returns the current offset in the (in-transaction) data file.
2611 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2611 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2612 file to store that information: since sidedata can be rewritten to the
2612 file to store that information: since sidedata can be rewritten to the
2613 end of the data file within a transaction, you can have cases where, for
2613 end of the data file within a transaction, you can have cases where, for
2614 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2614 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2615 to `n - 1`'s sidedata being written after `n`'s data.
2615 to `n - 1`'s sidedata being written after `n`'s data.
2616
2616
2617 TODO cache this in a docket file before getting out of experimental."""
2617 TODO cache this in a docket file before getting out of experimental."""
2618 if self._docket is None:
2618 if self._docket is None:
2619 return self.end(prev)
2619 return self.end(prev)
2620 else:
2620 else:
2621 return self._docket.data_end
2621 return self._docket.data_end
2622
2622
2623 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2623 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2624 # Files opened in a+ mode have inconsistent behavior on various
2624 # Files opened in a+ mode have inconsistent behavior on various
2625 # platforms. Windows requires that a file positioning call be made
2625 # platforms. Windows requires that a file positioning call be made
2626 # when the file handle transitions between reads and writes. See
2626 # when the file handle transitions between reads and writes. See
2627 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2627 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2628 # platforms, Python or the platform itself can be buggy. Some versions
2628 # platforms, Python or the platform itself can be buggy. Some versions
2629 # of Solaris have been observed to not append at the end of the file
2629 # of Solaris have been observed to not append at the end of the file
2630 # if the file was seeked to before the end. See issue4943 for more.
2630 # if the file was seeked to before the end. See issue4943 for more.
2631 #
2631 #
2632 # We work around this issue by inserting a seek() before writing.
2632 # We work around this issue by inserting a seek() before writing.
2633 # Note: This is likely not necessary on Python 3. However, because
2633 # Note: This is likely not necessary on Python 3. However, because
2634 # the file handle is reused for reads and may be seeked there, we need
2634 # the file handle is reused for reads and may be seeked there, we need
2635 # to be careful before changing this.
2635 # to be careful before changing this.
2636 if self._writinghandles is None:
2636 if self._writinghandles is None:
2637 msg = b'adding revision outside `revlog._writing` context'
2637 msg = b'adding revision outside `revlog._writing` context'
2638 raise error.ProgrammingError(msg)
2638 raise error.ProgrammingError(msg)
2639 ifh, dfh = self._writinghandles
2639 ifh, dfh = self._writinghandles
2640 if self._docket is None:
2640 if self._docket is None:
2641 ifh.seek(0, os.SEEK_END)
2641 ifh.seek(0, os.SEEK_END)
2642 else:
2642 else:
2643 ifh.seek(self._docket.index_end, os.SEEK_SET)
2643 ifh.seek(self._docket.index_end, os.SEEK_SET)
2644 if dfh:
2644 if dfh:
2645 if self._docket is None:
2645 if self._docket is None:
2646 dfh.seek(0, os.SEEK_END)
2646 dfh.seek(0, os.SEEK_END)
2647 else:
2647 else:
2648 dfh.seek(self._docket.data_end, os.SEEK_SET)
2648 dfh.seek(self._docket.data_end, os.SEEK_SET)
2649
2649
2650 curr = len(self) - 1
2650 curr = len(self) - 1
2651 if not self._inline:
2651 if not self._inline:
2652 transaction.add(self._datafile, offset)
2652 transaction.add(self._datafile, offset)
2653 transaction.add(self._indexfile, curr * len(entry))
2653 transaction.add(self._indexfile, curr * len(entry))
2654 if data[0]:
2654 if data[0]:
2655 dfh.write(data[0])
2655 dfh.write(data[0])
2656 dfh.write(data[1])
2656 dfh.write(data[1])
2657 if sidedata:
2657 if sidedata:
2658 dfh.write(sidedata)
2658 dfh.write(sidedata)
2659 ifh.write(entry)
2659 ifh.write(entry)
2660 else:
2660 else:
2661 offset += curr * self.index.entry_size
2661 offset += curr * self.index.entry_size
2662 transaction.add(self._indexfile, offset)
2662 transaction.add(self._indexfile, offset)
2663 ifh.write(entry)
2663 ifh.write(entry)
2664 ifh.write(data[0])
2664 ifh.write(data[0])
2665 ifh.write(data[1])
2665 ifh.write(data[1])
2666 if sidedata:
2666 if sidedata:
2667 ifh.write(sidedata)
2667 ifh.write(sidedata)
2668 self._enforceinlinesize(transaction)
2668 self._enforceinlinesize(transaction)
2669 if self._docket is not None:
2669 if self._docket is not None:
2670 self._docket.index_end = self._writinghandles[0].tell()
2670 self._docket.index_end = self._writinghandles[0].tell()
2671 self._docket.data_end = self._writinghandles[1].tell()
2671 self._docket.data_end = self._writinghandles[1].tell()
2672
2672
2673 nodemaputil.setup_persistent_nodemap(transaction, self)
2673 nodemaputil.setup_persistent_nodemap(transaction, self)
2674
2674
2675 def addgroup(
2675 def addgroup(
2676 self,
2676 self,
2677 deltas,
2677 deltas,
2678 linkmapper,
2678 linkmapper,
2679 transaction,
2679 transaction,
2680 alwayscache=False,
2680 alwayscache=False,
2681 addrevisioncb=None,
2681 addrevisioncb=None,
2682 duplicaterevisioncb=None,
2682 duplicaterevisioncb=None,
2683 ):
2683 ):
2684 """
2684 """
2685 add a delta group
2685 add a delta group
2686
2686
2687 given a set of deltas, add them to the revision log. the
2687 given a set of deltas, add them to the revision log. the
2688 first delta is against its parent, which should be in our
2688 first delta is against its parent, which should be in our
2689 log, the rest are against the previous delta.
2689 log, the rest are against the previous delta.
2690
2690
2691 If ``addrevisioncb`` is defined, it will be called with arguments of
2691 If ``addrevisioncb`` is defined, it will be called with arguments of
2692 this revlog and the node that was added.
2692 this revlog and the node that was added.
2693 """
2693 """
2694
2694
2695 if self._adding_group:
2695 if self._adding_group:
2696 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2696 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2697
2697
2698 self._adding_group = True
2698 self._adding_group = True
2699 empty = True
2699 empty = True
2700 try:
2700 try:
2701 with self._writing(transaction):
2701 with self._writing(transaction):
2702 deltacomputer = deltautil.deltacomputer(self)
2702 deltacomputer = deltautil.deltacomputer(self)
2703 # loop through our set of deltas
2703 # loop through our set of deltas
2704 for data in deltas:
2704 for data in deltas:
2705 (
2705 (
2706 node,
2706 node,
2707 p1,
2707 p1,
2708 p2,
2708 p2,
2709 linknode,
2709 linknode,
2710 deltabase,
2710 deltabase,
2711 delta,
2711 delta,
2712 flags,
2712 flags,
2713 sidedata,
2713 sidedata,
2714 ) = data
2714 ) = data
2715 link = linkmapper(linknode)
2715 link = linkmapper(linknode)
2716 flags = flags or REVIDX_DEFAULT_FLAGS
2716 flags = flags or REVIDX_DEFAULT_FLAGS
2717
2717
2718 rev = self.index.get_rev(node)
2718 rev = self.index.get_rev(node)
2719 if rev is not None:
2719 if rev is not None:
2720 # this can happen if two branches make the same change
2720 # this can happen if two branches make the same change
2721 self._nodeduplicatecallback(transaction, rev)
2721 self._nodeduplicatecallback(transaction, rev)
2722 if duplicaterevisioncb:
2722 if duplicaterevisioncb:
2723 duplicaterevisioncb(self, rev)
2723 duplicaterevisioncb(self, rev)
2724 empty = False
2724 empty = False
2725 continue
2725 continue
2726
2726
2727 for p in (p1, p2):
2727 for p in (p1, p2):
2728 if not self.index.has_node(p):
2728 if not self.index.has_node(p):
2729 raise error.LookupError(
2729 raise error.LookupError(
2730 p, self.radix, _(b'unknown parent')
2730 p, self.radix, _(b'unknown parent')
2731 )
2731 )
2732
2732
2733 if not self.index.has_node(deltabase):
2733 if not self.index.has_node(deltabase):
2734 raise error.LookupError(
2734 raise error.LookupError(
2735 deltabase, self.display_id, _(b'unknown delta base')
2735 deltabase, self.display_id, _(b'unknown delta base')
2736 )
2736 )
2737
2737
2738 baserev = self.rev(deltabase)
2738 baserev = self.rev(deltabase)
2739
2739
2740 if baserev != nullrev and self.iscensored(baserev):
2740 if baserev != nullrev and self.iscensored(baserev):
2741 # if base is censored, delta must be full replacement in a
2741 # if base is censored, delta must be full replacement in a
2742 # single patch operation
2742 # single patch operation
2743 hlen = struct.calcsize(b">lll")
2743 hlen = struct.calcsize(b">lll")
2744 oldlen = self.rawsize(baserev)
2744 oldlen = self.rawsize(baserev)
2745 newlen = len(delta) - hlen
2745 newlen = len(delta) - hlen
2746 if delta[:hlen] != mdiff.replacediffheader(
2746 if delta[:hlen] != mdiff.replacediffheader(
2747 oldlen, newlen
2747 oldlen, newlen
2748 ):
2748 ):
2749 raise error.CensoredBaseError(
2749 raise error.CensoredBaseError(
2750 self.display_id, self.node(baserev)
2750 self.display_id, self.node(baserev)
2751 )
2751 )
2752
2752
2753 if not flags and self._peek_iscensored(baserev, delta):
2753 if not flags and self._peek_iscensored(baserev, delta):
2754 flags |= REVIDX_ISCENSORED
2754 flags |= REVIDX_ISCENSORED
2755
2755
2756 # We assume consumers of addrevisioncb will want to retrieve
2756 # We assume consumers of addrevisioncb will want to retrieve
2757 # the added revision, which will require a call to
2757 # the added revision, which will require a call to
2758 # revision(). revision() will fast path if there is a cache
2758 # revision(). revision() will fast path if there is a cache
2759 # hit. So, we tell _addrevision() to always cache in this case.
2759 # hit. So, we tell _addrevision() to always cache in this case.
2760 # We're only using addgroup() in the context of changegroup
2760 # We're only using addgroup() in the context of changegroup
2761 # generation so the revision data can always be handled as raw
2761 # generation so the revision data can always be handled as raw
2762 # by the flagprocessor.
2762 # by the flagprocessor.
2763 rev = self._addrevision(
2763 rev = self._addrevision(
2764 node,
2764 node,
2765 None,
2765 None,
2766 transaction,
2766 transaction,
2767 link,
2767 link,
2768 p1,
2768 p1,
2769 p2,
2769 p2,
2770 flags,
2770 flags,
2771 (baserev, delta),
2771 (baserev, delta),
2772 alwayscache=alwayscache,
2772 alwayscache=alwayscache,
2773 deltacomputer=deltacomputer,
2773 deltacomputer=deltacomputer,
2774 sidedata=sidedata,
2774 sidedata=sidedata,
2775 )
2775 )
2776
2776
2777 if addrevisioncb:
2777 if addrevisioncb:
2778 addrevisioncb(self, rev)
2778 addrevisioncb(self, rev)
2779 empty = False
2779 empty = False
2780 finally:
2780 finally:
2781 self._adding_group = False
2781 self._adding_group = False
2782 return not empty
2782 return not empty
2783
2783
2784 def iscensored(self, rev):
2784 def iscensored(self, rev):
2785 """Check if a file revision is censored."""
2785 """Check if a file revision is censored."""
2786 if not self._censorable:
2786 if not self._censorable:
2787 return False
2787 return False
2788
2788
2789 return self.flags(rev) & REVIDX_ISCENSORED
2789 return self.flags(rev) & REVIDX_ISCENSORED
2790
2790
2791 def _peek_iscensored(self, baserev, delta):
2791 def _peek_iscensored(self, baserev, delta):
2792 """Quickly check if a delta produces a censored revision."""
2792 """Quickly check if a delta produces a censored revision."""
2793 if not self._censorable:
2793 if not self._censorable:
2794 return False
2794 return False
2795
2795
2796 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2796 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2797
2797
2798 def getstrippoint(self, minlink):
2798 def getstrippoint(self, minlink):
2799 """find the minimum rev that must be stripped to strip the linkrev
2799 """find the minimum rev that must be stripped to strip the linkrev
2800
2800
2801 Returns a tuple containing the minimum rev and a set of all revs that
2801 Returns a tuple containing the minimum rev and a set of all revs that
2802 have linkrevs that will be broken by this strip.
2802 have linkrevs that will be broken by this strip.
2803 """
2803 """
2804 return storageutil.resolvestripinfo(
2804 return storageutil.resolvestripinfo(
2805 minlink,
2805 minlink,
2806 len(self) - 1,
2806 len(self) - 1,
2807 self.headrevs(),
2807 self.headrevs(),
2808 self.linkrev,
2808 self.linkrev,
2809 self.parentrevs,
2809 self.parentrevs,
2810 )
2810 )
2811
2811
2812 def strip(self, minlink, transaction):
2812 def strip(self, minlink, transaction):
2813 """truncate the revlog on the first revision with a linkrev >= minlink
2813 """truncate the revlog on the first revision with a linkrev >= minlink
2814
2814
2815 This function is called when we're stripping revision minlink and
2815 This function is called when we're stripping revision minlink and
2816 its descendants from the repository.
2816 its descendants from the repository.
2817
2817
2818 We have to remove all revisions with linkrev >= minlink, because
2818 We have to remove all revisions with linkrev >= minlink, because
2819 the equivalent changelog revisions will be renumbered after the
2819 the equivalent changelog revisions will be renumbered after the
2820 strip.
2820 strip.
2821
2821
2822 So we truncate the revlog on the first of these revisions, and
2822 So we truncate the revlog on the first of these revisions, and
2823 trust that the caller has saved the revisions that shouldn't be
2823 trust that the caller has saved the revisions that shouldn't be
2824 removed and that it'll re-add them after this truncation.
2824 removed and that it'll re-add them after this truncation.
2825 """
2825 """
2826 if len(self) == 0:
2826 if len(self) == 0:
2827 return
2827 return
2828
2828
2829 rev, _ = self.getstrippoint(minlink)
2829 rev, _ = self.getstrippoint(minlink)
2830 if rev == len(self):
2830 if rev == len(self):
2831 return
2831 return
2832
2832
2833 # first truncate the files on disk
2833 # first truncate the files on disk
2834 data_end = self.start(rev)
2834 data_end = self.start(rev)
2835 if not self._inline:
2835 if not self._inline:
2836 transaction.add(self._datafile, data_end)
2836 transaction.add(self._datafile, data_end)
2837 end = rev * self.index.entry_size
2837 end = rev * self.index.entry_size
2838 else:
2838 else:
2839 end = data_end + (rev * self.index.entry_size)
2839 end = data_end + (rev * self.index.entry_size)
2840
2840
2841 transaction.add(self._indexfile, end)
2841 transaction.add(self._indexfile, end)
2842 if self._docket is not None:
2842 if self._docket is not None:
2843 # XXX we could, leverage the docket while stripping. However it is
2843 # XXX we could, leverage the docket while stripping. However it is
2844 # not powerfull enough at the time of this comment
2844 # not powerfull enough at the time of this comment
2845 self._docket.index_end = end
2845 self._docket.index_end = end
2846 self._docket.data_end = data_end
2846 self._docket.data_end = data_end
2847 self._docket.write(transaction, stripping=True)
2847 self._docket.write(transaction, stripping=True)
2848
2848
2849 # then reset internal state in memory to forget those revisions
2849 # then reset internal state in memory to forget those revisions
2850 self._revisioncache = None
2850 self._revisioncache = None
2851 self._chaininfocache = util.lrucachedict(500)
2851 self._chaininfocache = util.lrucachedict(500)
2852 self._chunkclear()
2852 self._chunkclear()
2853
2853
2854 del self.index[rev:-1]
2854 del self.index[rev:-1]
2855
2855
2856 def checksize(self):
2856 def checksize(self):
2857 """Check size of index and data files
2857 """Check size of index and data files
2858
2858
2859 return a (dd, di) tuple.
2859 return a (dd, di) tuple.
2860 - dd: extra bytes for the "data" file
2860 - dd: extra bytes for the "data" file
2861 - di: extra bytes for the "index" file
2861 - di: extra bytes for the "index" file
2862
2862
2863 A healthy revlog will return (0, 0).
2863 A healthy revlog will return (0, 0).
2864 """
2864 """
2865 expected = 0
2865 expected = 0
2866 if len(self):
2866 if len(self):
2867 expected = max(0, self.end(len(self) - 1))
2867 expected = max(0, self.end(len(self) - 1))
2868
2868
2869 try:
2869 try:
2870 with self._datafp() as f:
2870 with self._datafp() as f:
2871 f.seek(0, io.SEEK_END)
2871 f.seek(0, io.SEEK_END)
2872 actual = f.tell()
2872 actual = f.tell()
2873 dd = actual - expected
2873 dd = actual - expected
2874 except IOError as inst:
2874 except IOError as inst:
2875 if inst.errno != errno.ENOENT:
2875 if inst.errno != errno.ENOENT:
2876 raise
2876 raise
2877 dd = 0
2877 dd = 0
2878
2878
2879 try:
2879 try:
2880 f = self.opener(self._indexfile)
2880 f = self.opener(self._indexfile)
2881 f.seek(0, io.SEEK_END)
2881 f.seek(0, io.SEEK_END)
2882 actual = f.tell()
2882 actual = f.tell()
2883 f.close()
2883 f.close()
2884 s = self.index.entry_size
2884 s = self.index.entry_size
2885 i = max(0, actual // s)
2885 i = max(0, actual // s)
2886 di = actual - (i * s)
2886 di = actual - (i * s)
2887 if self._inline:
2887 if self._inline:
2888 databytes = 0
2888 databytes = 0
2889 for r in self:
2889 for r in self:
2890 databytes += max(0, self.length(r))
2890 databytes += max(0, self.length(r))
2891 dd = 0
2891 dd = 0
2892 di = actual - len(self) * s - databytes
2892 di = actual - len(self) * s - databytes
2893 except IOError as inst:
2893 except IOError as inst:
2894 if inst.errno != errno.ENOENT:
2894 if inst.errno != errno.ENOENT:
2895 raise
2895 raise
2896 di = 0
2896 di = 0
2897
2897
2898 return (dd, di)
2898 return (dd, di)
2899
2899
2900 def files(self):
2900 def files(self):
2901 res = [self._indexfile]
2901 res = [self._indexfile]
2902 if not self._inline:
2902 if not self._inline:
2903 res.append(self._datafile)
2903 res.append(self._datafile)
2904 return res
2904 return res
2905
2905
2906 def emitrevisions(
2906 def emitrevisions(
2907 self,
2907 self,
2908 nodes,
2908 nodes,
2909 nodesorder=None,
2909 nodesorder=None,
2910 revisiondata=False,
2910 revisiondata=False,
2911 assumehaveparentrevisions=False,
2911 assumehaveparentrevisions=False,
2912 deltamode=repository.CG_DELTAMODE_STD,
2912 deltamode=repository.CG_DELTAMODE_STD,
2913 sidedata_helpers=None,
2913 sidedata_helpers=None,
2914 ):
2914 ):
2915 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2915 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2916 raise error.ProgrammingError(
2916 raise error.ProgrammingError(
2917 b'unhandled value for nodesorder: %s' % nodesorder
2917 b'unhandled value for nodesorder: %s' % nodesorder
2918 )
2918 )
2919
2919
2920 if nodesorder is None and not self._generaldelta:
2920 if nodesorder is None and not self._generaldelta:
2921 nodesorder = b'storage'
2921 nodesorder = b'storage'
2922
2922
2923 if (
2923 if (
2924 not self._storedeltachains
2924 not self._storedeltachains
2925 and deltamode != repository.CG_DELTAMODE_PREV
2925 and deltamode != repository.CG_DELTAMODE_PREV
2926 ):
2926 ):
2927 deltamode = repository.CG_DELTAMODE_FULL
2927 deltamode = repository.CG_DELTAMODE_FULL
2928
2928
2929 return storageutil.emitrevisions(
2929 return storageutil.emitrevisions(
2930 self,
2930 self,
2931 nodes,
2931 nodes,
2932 nodesorder,
2932 nodesorder,
2933 revlogrevisiondelta,
2933 revlogrevisiondelta,
2934 deltaparentfn=self.deltaparent,
2934 deltaparentfn=self.deltaparent,
2935 candeltafn=self.candelta,
2935 candeltafn=self.candelta,
2936 rawsizefn=self.rawsize,
2936 rawsizefn=self.rawsize,
2937 revdifffn=self.revdiff,
2937 revdifffn=self.revdiff,
2938 flagsfn=self.flags,
2938 flagsfn=self.flags,
2939 deltamode=deltamode,
2939 deltamode=deltamode,
2940 revisiondata=revisiondata,
2940 revisiondata=revisiondata,
2941 assumehaveparentrevisions=assumehaveparentrevisions,
2941 assumehaveparentrevisions=assumehaveparentrevisions,
2942 sidedata_helpers=sidedata_helpers,
2942 sidedata_helpers=sidedata_helpers,
2943 )
2943 )
2944
2944
2945 DELTAREUSEALWAYS = b'always'
2945 DELTAREUSEALWAYS = b'always'
2946 DELTAREUSESAMEREVS = b'samerevs'
2946 DELTAREUSESAMEREVS = b'samerevs'
2947 DELTAREUSENEVER = b'never'
2947 DELTAREUSENEVER = b'never'
2948
2948
2949 DELTAREUSEFULLADD = b'fulladd'
2949 DELTAREUSEFULLADD = b'fulladd'
2950
2950
2951 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2951 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2952
2952
2953 def clone(
2953 def clone(
2954 self,
2954 self,
2955 tr,
2955 tr,
2956 destrevlog,
2956 destrevlog,
2957 addrevisioncb=None,
2957 addrevisioncb=None,
2958 deltareuse=DELTAREUSESAMEREVS,
2958 deltareuse=DELTAREUSESAMEREVS,
2959 forcedeltabothparents=None,
2959 forcedeltabothparents=None,
2960 sidedata_helpers=None,
2960 sidedata_helpers=None,
2961 ):
2961 ):
2962 """Copy this revlog to another, possibly with format changes.
2962 """Copy this revlog to another, possibly with format changes.
2963
2963
2964 The destination revlog will contain the same revisions and nodes.
2964 The destination revlog will contain the same revisions and nodes.
2965 However, it may not be bit-for-bit identical due to e.g. delta encoding
2965 However, it may not be bit-for-bit identical due to e.g. delta encoding
2966 differences.
2966 differences.
2967
2967
2968 The ``deltareuse`` argument control how deltas from the existing revlog
2968 The ``deltareuse`` argument control how deltas from the existing revlog
2969 are preserved in the destination revlog. The argument can have the
2969 are preserved in the destination revlog. The argument can have the
2970 following values:
2970 following values:
2971
2971
2972 DELTAREUSEALWAYS
2972 DELTAREUSEALWAYS
2973 Deltas will always be reused (if possible), even if the destination
2973 Deltas will always be reused (if possible), even if the destination
2974 revlog would not select the same revisions for the delta. This is the
2974 revlog would not select the same revisions for the delta. This is the
2975 fastest mode of operation.
2975 fastest mode of operation.
2976 DELTAREUSESAMEREVS
2976 DELTAREUSESAMEREVS
2977 Deltas will be reused if the destination revlog would pick the same
2977 Deltas will be reused if the destination revlog would pick the same
2978 revisions for the delta. This mode strikes a balance between speed
2978 revisions for the delta. This mode strikes a balance between speed
2979 and optimization.
2979 and optimization.
2980 DELTAREUSENEVER
2980 DELTAREUSENEVER
2981 Deltas will never be reused. This is the slowest mode of execution.
2981 Deltas will never be reused. This is the slowest mode of execution.
2982 This mode can be used to recompute deltas (e.g. if the diff/delta
2982 This mode can be used to recompute deltas (e.g. if the diff/delta
2983 algorithm changes).
2983 algorithm changes).
2984 DELTAREUSEFULLADD
2984 DELTAREUSEFULLADD
2985 Revision will be re-added as if their were new content. This is
2985 Revision will be re-added as if their were new content. This is
2986 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2986 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2987 eg: large file detection and handling.
2987 eg: large file detection and handling.
2988
2988
2989 Delta computation can be slow, so the choice of delta reuse policy can
2989 Delta computation can be slow, so the choice of delta reuse policy can
2990 significantly affect run time.
2990 significantly affect run time.
2991
2991
2992 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2992 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2993 two extremes. Deltas will be reused if they are appropriate. But if the
2993 two extremes. Deltas will be reused if they are appropriate. But if the
2994 delta could choose a better revision, it will do so. This means if you
2994 delta could choose a better revision, it will do so. This means if you
2995 are converting a non-generaldelta revlog to a generaldelta revlog,
2995 are converting a non-generaldelta revlog to a generaldelta revlog,
2996 deltas will be recomputed if the delta's parent isn't a parent of the
2996 deltas will be recomputed if the delta's parent isn't a parent of the
2997 revision.
2997 revision.
2998
2998
2999 In addition to the delta policy, the ``forcedeltabothparents``
2999 In addition to the delta policy, the ``forcedeltabothparents``
3000 argument controls whether to force compute deltas against both parents
3000 argument controls whether to force compute deltas against both parents
3001 for merges. By default, the current default is used.
3001 for merges. By default, the current default is used.
3002
3002
3003 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3003 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3004 `sidedata_helpers`.
3004 `sidedata_helpers`.
3005 """
3005 """
3006 if deltareuse not in self.DELTAREUSEALL:
3006 if deltareuse not in self.DELTAREUSEALL:
3007 raise ValueError(
3007 raise ValueError(
3008 _(b'value for deltareuse invalid: %s') % deltareuse
3008 _(b'value for deltareuse invalid: %s') % deltareuse
3009 )
3009 )
3010
3010
3011 if len(destrevlog):
3011 if len(destrevlog):
3012 raise ValueError(_(b'destination revlog is not empty'))
3012 raise ValueError(_(b'destination revlog is not empty'))
3013
3013
3014 if getattr(self, 'filteredrevs', None):
3014 if getattr(self, 'filteredrevs', None):
3015 raise ValueError(_(b'source revlog has filtered revisions'))
3015 raise ValueError(_(b'source revlog has filtered revisions'))
3016 if getattr(destrevlog, 'filteredrevs', None):
3016 if getattr(destrevlog, 'filteredrevs', None):
3017 raise ValueError(_(b'destination revlog has filtered revisions'))
3017 raise ValueError(_(b'destination revlog has filtered revisions'))
3018
3018
3019 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3019 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3020 # if possible.
3020 # if possible.
3021 oldlazydelta = destrevlog._lazydelta
3021 oldlazydelta = destrevlog._lazydelta
3022 oldlazydeltabase = destrevlog._lazydeltabase
3022 oldlazydeltabase = destrevlog._lazydeltabase
3023 oldamd = destrevlog._deltabothparents
3023 oldamd = destrevlog._deltabothparents
3024
3024
3025 try:
3025 try:
3026 if deltareuse == self.DELTAREUSEALWAYS:
3026 if deltareuse == self.DELTAREUSEALWAYS:
3027 destrevlog._lazydeltabase = True
3027 destrevlog._lazydeltabase = True
3028 destrevlog._lazydelta = True
3028 destrevlog._lazydelta = True
3029 elif deltareuse == self.DELTAREUSESAMEREVS:
3029 elif deltareuse == self.DELTAREUSESAMEREVS:
3030 destrevlog._lazydeltabase = False
3030 destrevlog._lazydeltabase = False
3031 destrevlog._lazydelta = True
3031 destrevlog._lazydelta = True
3032 elif deltareuse == self.DELTAREUSENEVER:
3032 elif deltareuse == self.DELTAREUSENEVER:
3033 destrevlog._lazydeltabase = False
3033 destrevlog._lazydeltabase = False
3034 destrevlog._lazydelta = False
3034 destrevlog._lazydelta = False
3035
3035
3036 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3036 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3037
3037
3038 self._clone(
3038 self._clone(
3039 tr,
3039 tr,
3040 destrevlog,
3040 destrevlog,
3041 addrevisioncb,
3041 addrevisioncb,
3042 deltareuse,
3042 deltareuse,
3043 forcedeltabothparents,
3043 forcedeltabothparents,
3044 sidedata_helpers,
3044 sidedata_helpers,
3045 )
3045 )
3046
3046
3047 finally:
3047 finally:
3048 destrevlog._lazydelta = oldlazydelta
3048 destrevlog._lazydelta = oldlazydelta
3049 destrevlog._lazydeltabase = oldlazydeltabase
3049 destrevlog._lazydeltabase = oldlazydeltabase
3050 destrevlog._deltabothparents = oldamd
3050 destrevlog._deltabothparents = oldamd
3051
3051
3052 def _clone(
3052 def _clone(
3053 self,
3053 self,
3054 tr,
3054 tr,
3055 destrevlog,
3055 destrevlog,
3056 addrevisioncb,
3056 addrevisioncb,
3057 deltareuse,
3057 deltareuse,
3058 forcedeltabothparents,
3058 forcedeltabothparents,
3059 sidedata_helpers,
3059 sidedata_helpers,
3060 ):
3060 ):
3061 """perform the core duty of `revlog.clone` after parameter processing"""
3061 """perform the core duty of `revlog.clone` after parameter processing"""
3062 deltacomputer = deltautil.deltacomputer(destrevlog)
3062 deltacomputer = deltautil.deltacomputer(destrevlog)
3063 index = self.index
3063 index = self.index
3064 for rev in self:
3064 for rev in self:
3065 entry = index[rev]
3065 entry = index[rev]
3066
3066
3067 # Some classes override linkrev to take filtered revs into
3067 # Some classes override linkrev to take filtered revs into
3068 # account. Use raw entry from index.
3068 # account. Use raw entry from index.
3069 flags = entry[0] & 0xFFFF
3069 flags = entry[0] & 0xFFFF
3070 linkrev = entry[4]
3070 linkrev = entry[4]
3071 p1 = index[entry[5]][7]
3071 p1 = index[entry[5]][7]
3072 p2 = index[entry[6]][7]
3072 p2 = index[entry[6]][7]
3073 node = entry[7]
3073 node = entry[7]
3074
3074
3075 # (Possibly) reuse the delta from the revlog if allowed and
3075 # (Possibly) reuse the delta from the revlog if allowed and
3076 # the revlog chunk is a delta.
3076 # the revlog chunk is a delta.
3077 cachedelta = None
3077 cachedelta = None
3078 rawtext = None
3078 rawtext = None
3079 if deltareuse == self.DELTAREUSEFULLADD:
3079 if deltareuse == self.DELTAREUSEFULLADD:
3080 text, sidedata = self._revisiondata(rev)
3080 text, sidedata = self._revisiondata(rev)
3081
3081
3082 if sidedata_helpers is not None:
3082 if sidedata_helpers is not None:
3083 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3083 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3084 self, sidedata_helpers, sidedata, rev
3084 self, sidedata_helpers, sidedata, rev
3085 )
3085 )
3086 flags = flags | new_flags[0] & ~new_flags[1]
3086 flags = flags | new_flags[0] & ~new_flags[1]
3087
3087
3088 destrevlog.addrevision(
3088 destrevlog.addrevision(
3089 text,
3089 text,
3090 tr,
3090 tr,
3091 linkrev,
3091 linkrev,
3092 p1,
3092 p1,
3093 p2,
3093 p2,
3094 cachedelta=cachedelta,
3094 cachedelta=cachedelta,
3095 node=node,
3095 node=node,
3096 flags=flags,
3096 flags=flags,
3097 deltacomputer=deltacomputer,
3097 deltacomputer=deltacomputer,
3098 sidedata=sidedata,
3098 sidedata=sidedata,
3099 )
3099 )
3100 else:
3100 else:
3101 if destrevlog._lazydelta:
3101 if destrevlog._lazydelta:
3102 dp = self.deltaparent(rev)
3102 dp = self.deltaparent(rev)
3103 if dp != nullrev:
3103 if dp != nullrev:
3104 cachedelta = (dp, bytes(self._chunk(rev)))
3104 cachedelta = (dp, bytes(self._chunk(rev)))
3105
3105
3106 sidedata = None
3106 sidedata = None
3107 if not cachedelta:
3107 if not cachedelta:
3108 rawtext, sidedata = self._revisiondata(rev)
3108 rawtext, sidedata = self._revisiondata(rev)
3109 if sidedata is None:
3109 if sidedata is None:
3110 sidedata = self.sidedata(rev)
3110 sidedata = self.sidedata(rev)
3111
3111
3112 if sidedata_helpers is not None:
3112 if sidedata_helpers is not None:
3113 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3113 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3114 self, sidedata_helpers, sidedata, rev
3114 self, sidedata_helpers, sidedata, rev
3115 )
3115 )
3116 flags = flags | new_flags[0] & ~new_flags[1]
3116 flags = flags | new_flags[0] & ~new_flags[1]
3117
3117
3118 with destrevlog._writing(tr):
3118 with destrevlog._writing(tr):
3119 destrevlog._addrevision(
3119 destrevlog._addrevision(
3120 node,
3120 node,
3121 rawtext,
3121 rawtext,
3122 tr,
3122 tr,
3123 linkrev,
3123 linkrev,
3124 p1,
3124 p1,
3125 p2,
3125 p2,
3126 flags,
3126 flags,
3127 cachedelta,
3127 cachedelta,
3128 deltacomputer=deltacomputer,
3128 deltacomputer=deltacomputer,
3129 sidedata=sidedata,
3129 sidedata=sidedata,
3130 )
3130 )
3131
3131
3132 if addrevisioncb:
3132 if addrevisioncb:
3133 addrevisioncb(self, rev, node)
3133 addrevisioncb(self, rev, node)
3134
3134
3135 def censorrevision(self, tr, censornode, tombstone=b''):
3135 def censorrevision(self, tr, censornode, tombstone=b''):
3136 if self._format_version == REVLOGV0:
3136 if self._format_version == REVLOGV0:
3137 raise error.RevlogError(
3137 raise error.RevlogError(
3138 _(b'cannot censor with version %d revlogs')
3138 _(b'cannot censor with version %d revlogs')
3139 % self._format_version
3139 % self._format_version
3140 )
3140 )
3141
3141
3142 censorrev = self.rev(censornode)
3142 censorrev = self.rev(censornode)
3143 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3143 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3144
3144
3145 if len(tombstone) > self.rawsize(censorrev):
3145 if len(tombstone) > self.rawsize(censorrev):
3146 raise error.Abort(
3146 raise error.Abort(
3147 _(b'censor tombstone must be no longer than censored data')
3147 _(b'censor tombstone must be no longer than censored data')
3148 )
3148 )
3149
3149
3150 # Rewriting the revlog in place is hard. Our strategy for censoring is
3150 # Rewriting the revlog in place is hard. Our strategy for censoring is
3151 # to create a new revlog, copy all revisions to it, then replace the
3151 # to create a new revlog, copy all revisions to it, then replace the
3152 # revlogs on transaction close.
3152 # revlogs on transaction close.
3153 #
3153 #
3154 # This is a bit dangerous. We could easily have a mismatch of state.
3154 # This is a bit dangerous. We could easily have a mismatch of state.
3155 newrl = revlog(
3155 newrl = revlog(
3156 self.opener,
3156 self.opener,
3157 target=self.target,
3157 target=self.target,
3158 radix=self.radix,
3158 radix=self.radix,
3159 postfix=b'tmpcensored',
3159 postfix=b'tmpcensored',
3160 censorable=True,
3160 censorable=True,
3161 )
3161 )
3162 newrl._format_version = self._format_version
3162 newrl._format_version = self._format_version
3163 newrl._format_flags = self._format_flags
3163 newrl._format_flags = self._format_flags
3164 newrl._generaldelta = self._generaldelta
3164 newrl._generaldelta = self._generaldelta
3165 newrl._parse_index = self._parse_index
3165 newrl._parse_index = self._parse_index
3166
3166
3167 for rev in self.revs():
3167 for rev in self.revs():
3168 node = self.node(rev)
3168 node = self.node(rev)
3169 p1, p2 = self.parents(node)
3169 p1, p2 = self.parents(node)
3170
3170
3171 if rev == censorrev:
3171 if rev == censorrev:
3172 newrl.addrawrevision(
3172 newrl.addrawrevision(
3173 tombstone,
3173 tombstone,
3174 tr,
3174 tr,
3175 self.linkrev(censorrev),
3175 self.linkrev(censorrev),
3176 p1,
3176 p1,
3177 p2,
3177 p2,
3178 censornode,
3178 censornode,
3179 REVIDX_ISCENSORED,
3179 REVIDX_ISCENSORED,
3180 )
3180 )
3181
3181
3182 if newrl.deltaparent(rev) != nullrev:
3182 if newrl.deltaparent(rev) != nullrev:
3183 raise error.Abort(
3183 raise error.Abort(
3184 _(
3184 _(
3185 b'censored revision stored as delta; '
3185 b'censored revision stored as delta; '
3186 b'cannot censor'
3186 b'cannot censor'
3187 ),
3187 ),
3188 hint=_(
3188 hint=_(
3189 b'censoring of revlogs is not '
3189 b'censoring of revlogs is not '
3190 b'fully implemented; please report '
3190 b'fully implemented; please report '
3191 b'this bug'
3191 b'this bug'
3192 ),
3192 ),
3193 )
3193 )
3194 continue
3194 continue
3195
3195
3196 if self.iscensored(rev):
3196 if self.iscensored(rev):
3197 if self.deltaparent(rev) != nullrev:
3197 if self.deltaparent(rev) != nullrev:
3198 raise error.Abort(
3198 raise error.Abort(
3199 _(
3199 _(
3200 b'cannot censor due to censored '
3200 b'cannot censor due to censored '
3201 b'revision having delta stored'
3201 b'revision having delta stored'
3202 )
3202 )
3203 )
3203 )
3204 rawtext = self._chunk(rev)
3204 rawtext = self._chunk(rev)
3205 else:
3205 else:
3206 rawtext = self.rawdata(rev)
3206 rawtext = self.rawdata(rev)
3207
3207
3208 newrl.addrawrevision(
3208 newrl.addrawrevision(
3209 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3209 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3210 )
3210 )
3211
3211
3212 tr.addbackup(self._indexfile, location=b'store')
3212 tr.addbackup(self._indexfile, location=b'store')
3213 if not self._inline:
3213 if not self._inline:
3214 tr.addbackup(self._datafile, location=b'store')
3214 tr.addbackup(self._datafile, location=b'store')
3215
3215
3216 self.opener.rename(newrl._indexfile, self._indexfile)
3216 self.opener.rename(newrl._indexfile, self._indexfile)
3217 if not self._inline:
3217 if not self._inline:
3218 self.opener.rename(newrl._datafile, self._datafile)
3218 self.opener.rename(newrl._datafile, self._datafile)
3219
3219
3220 self.clearcaches()
3220 self.clearcaches()
3221 self._loadindex()
3221 self._loadindex()
3222
3222
3223 def verifyintegrity(self, state):
3223 def verifyintegrity(self, state):
3224 """Verifies the integrity of the revlog.
3224 """Verifies the integrity of the revlog.
3225
3225
3226 Yields ``revlogproblem`` instances describing problems that are
3226 Yields ``revlogproblem`` instances describing problems that are
3227 found.
3227 found.
3228 """
3228 """
3229 dd, di = self.checksize()
3229 dd, di = self.checksize()
3230 if dd:
3230 if dd:
3231 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3231 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3232 if di:
3232 if di:
3233 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3233 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3234
3234
3235 version = self._format_version
3235 version = self._format_version
3236
3236
3237 # The verifier tells us what version revlog we should be.
3237 # The verifier tells us what version revlog we should be.
3238 if version != state[b'expectedversion']:
3238 if version != state[b'expectedversion']:
3239 yield revlogproblem(
3239 yield revlogproblem(
3240 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3240 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3241 % (self.display_id, version, state[b'expectedversion'])
3241 % (self.display_id, version, state[b'expectedversion'])
3242 )
3242 )
3243
3243
3244 state[b'skipread'] = set()
3244 state[b'skipread'] = set()
3245 state[b'safe_renamed'] = set()
3245 state[b'safe_renamed'] = set()
3246
3246
3247 for rev in self:
3247 for rev in self:
3248 node = self.node(rev)
3248 node = self.node(rev)
3249
3249
3250 # Verify contents. 4 cases to care about:
3250 # Verify contents. 4 cases to care about:
3251 #
3251 #
3252 # common: the most common case
3252 # common: the most common case
3253 # rename: with a rename
3253 # rename: with a rename
3254 # meta: file content starts with b'\1\n', the metadata
3254 # meta: file content starts with b'\1\n', the metadata
3255 # header defined in filelog.py, but without a rename
3255 # header defined in filelog.py, but without a rename
3256 # ext: content stored externally
3256 # ext: content stored externally
3257 #
3257 #
3258 # More formally, their differences are shown below:
3258 # More formally, their differences are shown below:
3259 #
3259 #
3260 # | common | rename | meta | ext
3260 # | common | rename | meta | ext
3261 # -------------------------------------------------------
3261 # -------------------------------------------------------
3262 # flags() | 0 | 0 | 0 | not 0
3262 # flags() | 0 | 0 | 0 | not 0
3263 # renamed() | False | True | False | ?
3263 # renamed() | False | True | False | ?
3264 # rawtext[0:2]=='\1\n'| False | True | True | ?
3264 # rawtext[0:2]=='\1\n'| False | True | True | ?
3265 #
3265 #
3266 # "rawtext" means the raw text stored in revlog data, which
3266 # "rawtext" means the raw text stored in revlog data, which
3267 # could be retrieved by "rawdata(rev)". "text"
3267 # could be retrieved by "rawdata(rev)". "text"
3268 # mentioned below is "revision(rev)".
3268 # mentioned below is "revision(rev)".
3269 #
3269 #
3270 # There are 3 different lengths stored physically:
3270 # There are 3 different lengths stored physically:
3271 # 1. L1: rawsize, stored in revlog index
3271 # 1. L1: rawsize, stored in revlog index
3272 # 2. L2: len(rawtext), stored in revlog data
3272 # 2. L2: len(rawtext), stored in revlog data
3273 # 3. L3: len(text), stored in revlog data if flags==0, or
3273 # 3. L3: len(text), stored in revlog data if flags==0, or
3274 # possibly somewhere else if flags!=0
3274 # possibly somewhere else if flags!=0
3275 #
3275 #
3276 # L1 should be equal to L2. L3 could be different from them.
3276 # L1 should be equal to L2. L3 could be different from them.
3277 # "text" may or may not affect commit hash depending on flag
3277 # "text" may or may not affect commit hash depending on flag
3278 # processors (see flagutil.addflagprocessor).
3278 # processors (see flagutil.addflagprocessor).
3279 #
3279 #
3280 # | common | rename | meta | ext
3280 # | common | rename | meta | ext
3281 # -------------------------------------------------
3281 # -------------------------------------------------
3282 # rawsize() | L1 | L1 | L1 | L1
3282 # rawsize() | L1 | L1 | L1 | L1
3283 # size() | L1 | L2-LM | L1(*) | L1 (?)
3283 # size() | L1 | L2-LM | L1(*) | L1 (?)
3284 # len(rawtext) | L2 | L2 | L2 | L2
3284 # len(rawtext) | L2 | L2 | L2 | L2
3285 # len(text) | L2 | L2 | L2 | L3
3285 # len(text) | L2 | L2 | L2 | L3
3286 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3286 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3287 #
3287 #
3288 # LM: length of metadata, depending on rawtext
3288 # LM: length of metadata, depending on rawtext
3289 # (*): not ideal, see comment in filelog.size
3289 # (*): not ideal, see comment in filelog.size
3290 # (?): could be "- len(meta)" if the resolved content has
3290 # (?): could be "- len(meta)" if the resolved content has
3291 # rename metadata
3291 # rename metadata
3292 #
3292 #
3293 # Checks needed to be done:
3293 # Checks needed to be done:
3294 # 1. length check: L1 == L2, in all cases.
3294 # 1. length check: L1 == L2, in all cases.
3295 # 2. hash check: depending on flag processor, we may need to
3295 # 2. hash check: depending on flag processor, we may need to
3296 # use either "text" (external), or "rawtext" (in revlog).
3296 # use either "text" (external), or "rawtext" (in revlog).
3297
3297
3298 try:
3298 try:
3299 skipflags = state.get(b'skipflags', 0)
3299 skipflags = state.get(b'skipflags', 0)
3300 if skipflags:
3300 if skipflags:
3301 skipflags &= self.flags(rev)
3301 skipflags &= self.flags(rev)
3302
3302
3303 _verify_revision(self, skipflags, state, node)
3303 _verify_revision(self, skipflags, state, node)
3304
3304
3305 l1 = self.rawsize(rev)
3305 l1 = self.rawsize(rev)
3306 l2 = len(self.rawdata(node))
3306 l2 = len(self.rawdata(node))
3307
3307
3308 if l1 != l2:
3308 if l1 != l2:
3309 yield revlogproblem(
3309 yield revlogproblem(
3310 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3310 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3311 node=node,
3311 node=node,
3312 )
3312 )
3313
3313
3314 except error.CensoredNodeError:
3314 except error.CensoredNodeError:
3315 if state[b'erroroncensored']:
3315 if state[b'erroroncensored']:
3316 yield revlogproblem(
3316 yield revlogproblem(
3317 error=_(b'censored file data'), node=node
3317 error=_(b'censored file data'), node=node
3318 )
3318 )
3319 state[b'skipread'].add(node)
3319 state[b'skipread'].add(node)
3320 except Exception as e:
3320 except Exception as e:
3321 yield revlogproblem(
3321 yield revlogproblem(
3322 error=_(b'unpacking %s: %s')
3322 error=_(b'unpacking %s: %s')
3323 % (short(node), stringutil.forcebytestr(e)),
3323 % (short(node), stringutil.forcebytestr(e)),
3324 node=node,
3324 node=node,
3325 )
3325 )
3326 state[b'skipread'].add(node)
3326 state[b'skipread'].add(node)
3327
3327
3328 def storageinfo(
3328 def storageinfo(
3329 self,
3329 self,
3330 exclusivefiles=False,
3330 exclusivefiles=False,
3331 sharedfiles=False,
3331 sharedfiles=False,
3332 revisionscount=False,
3332 revisionscount=False,
3333 trackedsize=False,
3333 trackedsize=False,
3334 storedsize=False,
3334 storedsize=False,
3335 ):
3335 ):
3336 d = {}
3336 d = {}
3337
3337
3338 if exclusivefiles:
3338 if exclusivefiles:
3339 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3339 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3340 if not self._inline:
3340 if not self._inline:
3341 d[b'exclusivefiles'].append((self.opener, self._datafile))
3341 d[b'exclusivefiles'].append((self.opener, self._datafile))
3342
3342
3343 if sharedfiles:
3343 if sharedfiles:
3344 d[b'sharedfiles'] = []
3344 d[b'sharedfiles'] = []
3345
3345
3346 if revisionscount:
3346 if revisionscount:
3347 d[b'revisionscount'] = len(self)
3347 d[b'revisionscount'] = len(self)
3348
3348
3349 if trackedsize:
3349 if trackedsize:
3350 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3350 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3351
3351
3352 if storedsize:
3352 if storedsize:
3353 d[b'storedsize'] = sum(
3353 d[b'storedsize'] = sum(
3354 self.opener.stat(path).st_size for path in self.files()
3354 self.opener.stat(path).st_size for path in self.files()
3355 )
3355 )
3356
3356
3357 return d
3357 return d
3358
3358
3359 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3359 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3360 if not self.hassidedata:
3360 if not self.hassidedata:
3361 return
3361 return
3362 # revlog formats with sidedata support does not support inline
3362 # revlog formats with sidedata support does not support inline
3363 assert not self._inline
3363 assert not self._inline
3364 if not helpers[1] and not helpers[2]:
3364 if not helpers[1] and not helpers[2]:
3365 # Nothing to generate or remove
3365 # Nothing to generate or remove
3366 return
3366 return
3367
3367
3368 new_entries = []
3368 new_entries = []
3369 # append the new sidedata
3369 # append the new sidedata
3370 with self._writing(transaction):
3370 with self._writing(transaction):
3371 ifh, dfh = self._writinghandles
3371 ifh, dfh = self._writinghandles
3372 if self._docket is not None:
3372 if self._docket is not None:
3373 dfh.seek(self._docket.data_end, os.SEEK_SET)
3373 dfh.seek(self._docket.data_end, os.SEEK_SET)
3374 else:
3374 else:
3375 dfh.seek(0, os.SEEK_END)
3375 dfh.seek(0, os.SEEK_END)
3376
3376
3377 current_offset = dfh.tell()
3377 current_offset = dfh.tell()
3378 for rev in range(startrev, endrev + 1):
3378 for rev in range(startrev, endrev + 1):
3379 entry = self.index[rev]
3379 entry = self.index[rev]
3380 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3380 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3381 store=self,
3381 store=self,
3382 sidedata_helpers=helpers,
3382 sidedata_helpers=helpers,
3383 sidedata={},
3383 sidedata={},
3384 rev=rev,
3384 rev=rev,
3385 )
3385 )
3386
3386
3387 serialized_sidedata = sidedatautil.serialize_sidedata(
3387 serialized_sidedata = sidedatautil.serialize_sidedata(
3388 new_sidedata
3388 new_sidedata
3389 )
3389 )
3390
3390
3391 sidedata_compression_mode = COMP_MODE_INLINE
3391 sidedata_compression_mode = COMP_MODE_INLINE
3392 if serialized_sidedata and self.hassidedata:
3392 if serialized_sidedata and self.hassidedata:
3393 sidedata_compression_mode = COMP_MODE_PLAIN
3393 sidedata_compression_mode = COMP_MODE_PLAIN
3394 h, comp_sidedata = self.compress(serialized_sidedata)
3394 h, comp_sidedata = self.compress(serialized_sidedata)
3395 if (
3395 if (
3396 h != b'u'
3396 h != b'u'
3397 and comp_sidedata[0] != b'\0'
3397 and comp_sidedata[0] != b'\0'
3398 and len(comp_sidedata) < len(serialized_sidedata)
3398 and len(comp_sidedata) < len(serialized_sidedata)
3399 ):
3399 ):
3400 assert not h
3400 assert not h
3401 if (
3401 if (
3402 comp_sidedata[0]
3402 comp_sidedata[0]
3403 == self._docket.default_compression_header
3403 == self._docket.default_compression_header
3404 ):
3404 ):
3405 sidedata_compression_mode = COMP_MODE_DEFAULT
3405 sidedata_compression_mode = COMP_MODE_DEFAULT
3406 serialized_sidedata = comp_sidedata
3406 serialized_sidedata = comp_sidedata
3407 else:
3407 else:
3408 sidedata_compression_mode = COMP_MODE_INLINE
3408 sidedata_compression_mode = COMP_MODE_INLINE
3409 serialized_sidedata = comp_sidedata
3409 serialized_sidedata = comp_sidedata
3410 if entry[8] != 0 or entry[9] != 0:
3410 if entry[8] != 0 or entry[9] != 0:
3411 # rewriting entries that already have sidedata is not
3411 # rewriting entries that already have sidedata is not
3412 # supported yet, because it introduces garbage data in the
3412 # supported yet, because it introduces garbage data in the
3413 # revlog.
3413 # revlog.
3414 msg = b"rewriting existing sidedata is not supported yet"
3414 msg = b"rewriting existing sidedata is not supported yet"
3415 raise error.Abort(msg)
3415 raise error.Abort(msg)
3416
3416
3417 # Apply (potential) flags to add and to remove after running
3417 # Apply (potential) flags to add and to remove after running
3418 # the sidedata helpers
3418 # the sidedata helpers
3419 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3419 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3420 entry_update = (
3420 entry_update = (
3421 current_offset,
3421 current_offset,
3422 len(serialized_sidedata),
3422 len(serialized_sidedata),
3423 new_offset_flags,
3423 new_offset_flags,
3424 sidedata_compression_mode,
3424 sidedata_compression_mode,
3425 )
3425 )
3426
3426
3427 # the sidedata computation might have move the file cursors around
3427 # the sidedata computation might have move the file cursors around
3428 dfh.seek(current_offset, os.SEEK_SET)
3428 dfh.seek(current_offset, os.SEEK_SET)
3429 dfh.write(serialized_sidedata)
3429 dfh.write(serialized_sidedata)
3430 new_entries.append(entry_update)
3430 new_entries.append(entry_update)
3431 current_offset += len(serialized_sidedata)
3431 current_offset += len(serialized_sidedata)
3432 if self._docket is not None:
3432 if self._docket is not None:
3433 self._docket.data_end = dfh.tell()
3433 self._docket.data_end = dfh.tell()
3434
3434
3435 # rewrite the new index entries
3435 # rewrite the new index entries
3436 ifh.seek(startrev * self.index.entry_size)
3436 ifh.seek(startrev * self.index.entry_size)
3437 for i, e in enumerate(new_entries):
3437 for i, e in enumerate(new_entries):
3438 rev = startrev + i
3438 rev = startrev + i
3439 self.index.replace_sidedata_info(rev, *e)
3439 self.index.replace_sidedata_info(rev, *e)
3440 packed = self.index.entry_binary(rev)
3440 packed = self.index.entry_binary(rev)
3441 if rev == 0 and self._docket is None:
3441 if rev == 0 and self._docket is None:
3442 header = self._format_flags | self._format_version
3442 header = self._format_flags | self._format_version
3443 header = self.index.pack_header(header)
3443 header = self.index.pack_header(header)
3444 packed = header + packed
3444 packed = header + packed
3445 ifh.write(packed)
3445 ifh.write(packed)
@@ -1,516 +1,520 b''
1 # setdiscovery.py - improved discovery of common nodeset for mercurial
1 # setdiscovery.py - improved discovery of common nodeset for mercurial
2 #
2 #
3 # Copyright 2010 Benoit Boissinot <bboissin@gmail.com>
3 # Copyright 2010 Benoit Boissinot <bboissin@gmail.com>
4 # and Peter Arrenbrecht <peter@arrenbrecht.ch>
4 # and Peter Arrenbrecht <peter@arrenbrecht.ch>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """
8 """
9 Algorithm works in the following way. You have two repository: local and
9 Algorithm works in the following way. You have two repository: local and
10 remote. They both contains a DAG of changelists.
10 remote. They both contains a DAG of changelists.
11
11
12 The goal of the discovery protocol is to find one set of node *common*,
12 The goal of the discovery protocol is to find one set of node *common*,
13 the set of nodes shared by local and remote.
13 the set of nodes shared by local and remote.
14
14
15 One of the issue with the original protocol was latency, it could
15 One of the issue with the original protocol was latency, it could
16 potentially require lots of roundtrips to discover that the local repo was a
16 potentially require lots of roundtrips to discover that the local repo was a
17 subset of remote (which is a very common case, you usually have few changes
17 subset of remote (which is a very common case, you usually have few changes
18 compared to upstream, while upstream probably had lots of development).
18 compared to upstream, while upstream probably had lots of development).
19
19
20 The new protocol only requires one interface for the remote repo: `known()`,
20 The new protocol only requires one interface for the remote repo: `known()`,
21 which given a set of changelists tells you if they are present in the DAG.
21 which given a set of changelists tells you if they are present in the DAG.
22
22
23 The algorithm then works as follow:
23 The algorithm then works as follow:
24
24
25 - We will be using three sets, `common`, `missing`, `unknown`. Originally
25 - We will be using three sets, `common`, `missing`, `unknown`. Originally
26 all nodes are in `unknown`.
26 all nodes are in `unknown`.
27 - Take a sample from `unknown`, call `remote.known(sample)`
27 - Take a sample from `unknown`, call `remote.known(sample)`
28 - For each node that remote knows, move it and all its ancestors to `common`
28 - For each node that remote knows, move it and all its ancestors to `common`
29 - For each node that remote doesn't know, move it and all its descendants
29 - For each node that remote doesn't know, move it and all its descendants
30 to `missing`
30 to `missing`
31 - Iterate until `unknown` is empty
31 - Iterate until `unknown` is empty
32
32
33 There are a couple optimizations, first is instead of starting with a random
33 There are a couple optimizations, first is instead of starting with a random
34 sample of missing, start by sending all heads, in the case where the local
34 sample of missing, start by sending all heads, in the case where the local
35 repo is a subset, you computed the answer in one round trip.
35 repo is a subset, you computed the answer in one round trip.
36
36
37 Then you can do something similar to the bisecting strategy used when
37 Then you can do something similar to the bisecting strategy used when
38 finding faulty changesets. Instead of random samples, you can try picking
38 finding faulty changesets. Instead of random samples, you can try picking
39 nodes that will maximize the number of nodes that will be
39 nodes that will maximize the number of nodes that will be
40 classified with it (since all ancestors or descendants will be marked as well).
40 classified with it (since all ancestors or descendants will be marked as well).
41 """
41 """
42
42
43 from __future__ import absolute_import
43 from __future__ import absolute_import
44
44
45 import collections
45 import collections
46 import random
46 import random
47
47
48 from .i18n import _
48 from .i18n import _
49 from .node import nullrev
49 from .node import nullrev
50 from . import (
50 from . import (
51 error,
51 error,
52 policy,
52 policy,
53 util,
53 util,
54 )
54 )
55
55
56
56
57 def _updatesample(revs, heads, sample, parentfn, quicksamplesize=0):
57 def _updatesample(revs, heads, sample, parentfn, quicksamplesize=0):
58 """update an existing sample to match the expected size
58 """update an existing sample to match the expected size
59
59
60 The sample is updated with revs exponentially distant from each head of the
60 The sample is updated with revs exponentially distant from each head of the
61 <revs> set. (H~1, H~2, H~4, H~8, etc).
61 <revs> set. (H~1, H~2, H~4, H~8, etc).
62
62
63 If a target size is specified, the sampling will stop once this size is
63 If a target size is specified, the sampling will stop once this size is
64 reached. Otherwise sampling will happen until roots of the <revs> set are
64 reached. Otherwise sampling will happen until roots of the <revs> set are
65 reached.
65 reached.
66
66
67 :revs: set of revs we want to discover (if None, assume the whole dag)
67 :revs: set of revs we want to discover (if None, assume the whole dag)
68 :heads: set of DAG head revs
68 :heads: set of DAG head revs
69 :sample: a sample to update
69 :sample: a sample to update
70 :parentfn: a callable to resolve parents for a revision
70 :parentfn: a callable to resolve parents for a revision
71 :quicksamplesize: optional target size of the sample"""
71 :quicksamplesize: optional target size of the sample"""
72 dist = {}
72 dist = {}
73 visit = collections.deque(heads)
73 visit = collections.deque(heads)
74 seen = set()
74 seen = set()
75 factor = 1
75 factor = 1
76 while visit:
76 while visit:
77 curr = visit.popleft()
77 curr = visit.popleft()
78 if curr in seen:
78 if curr in seen:
79 continue
79 continue
80 d = dist.setdefault(curr, 1)
80 d = dist.setdefault(curr, 1)
81 if d > factor:
81 if d > factor:
82 factor *= 2
82 factor *= 2
83 if d == factor:
83 if d == factor:
84 sample.add(curr)
84 sample.add(curr)
85 if quicksamplesize and (len(sample) >= quicksamplesize):
85 if quicksamplesize and (len(sample) >= quicksamplesize):
86 return
86 return
87 seen.add(curr)
87 seen.add(curr)
88
88
89 for p in parentfn(curr):
89 for p in parentfn(curr):
90 if p != nullrev and (not revs or p in revs):
90 if p != nullrev and (not revs or p in revs):
91 dist.setdefault(p, d + 1)
91 dist.setdefault(p, d + 1)
92 visit.append(p)
92 visit.append(p)
93
93
94
94
95 def _limitsample(sample, desiredlen, randomize=True):
95 def _limitsample(sample, desiredlen, randomize=True):
96 """return a random subset of sample of at most desiredlen item.
96 """return a random subset of sample of at most desiredlen item.
97
97
98 If randomize is False, though, a deterministic subset is returned.
98 If randomize is False, though, a deterministic subset is returned.
99 This is meant for integration tests.
99 This is meant for integration tests.
100 """
100 """
101 if len(sample) <= desiredlen:
101 if len(sample) <= desiredlen:
102 return sample
102 return sample
103 if randomize:
103 if randomize:
104 return set(random.sample(sample, desiredlen))
104 return set(random.sample(sample, desiredlen))
105 sample = list(sample)
105 sample = list(sample)
106 sample.sort()
106 sample.sort()
107 return set(sample[:desiredlen])
107 return set(sample[:desiredlen])
108
108
109
109
110 class partialdiscovery(object):
110 class partialdiscovery(object):
111 """an object representing ongoing discovery
111 """an object representing ongoing discovery
112
112
113 Feed with data from the remote repository, this object keep track of the
113 Feed with data from the remote repository, this object keep track of the
114 current set of changeset in various states:
114 current set of changeset in various states:
115
115
116 - common: revs also known remotely
116 - common: revs also known remotely
117 - undecided: revs we don't have information on yet
117 - undecided: revs we don't have information on yet
118 - missing: revs missing remotely
118 - missing: revs missing remotely
119 (all tracked revisions are known locally)
119 (all tracked revisions are known locally)
120 """
120 """
121
121
122 def __init__(self, repo, targetheads, respectsize, randomize=True):
122 def __init__(self, repo, targetheads, respectsize, randomize=True):
123 self._repo = repo
123 self._repo = repo
124 self._targetheads = targetheads
124 self._targetheads = targetheads
125 self._common = repo.changelog.incrementalmissingrevs()
125 self._common = repo.changelog.incrementalmissingrevs()
126 self._undecided = None
126 self._undecided = None
127 self.missing = set()
127 self.missing = set()
128 self._childrenmap = None
128 self._childrenmap = None
129 self._respectsize = respectsize
129 self._respectsize = respectsize
130 self.randomize = randomize
130 self.randomize = randomize
131
131
132 def addcommons(self, commons):
132 def addcommons(self, commons):
133 """register nodes known as common"""
133 """register nodes known as common"""
134 self._common.addbases(commons)
134 self._common.addbases(commons)
135 if self._undecided is not None:
135 if self._undecided is not None:
136 self._common.removeancestorsfrom(self._undecided)
136 self._common.removeancestorsfrom(self._undecided)
137
137
138 def addmissings(self, missings):
138 def addmissings(self, missings):
139 """register some nodes as missing"""
139 """register some nodes as missing"""
140 newmissing = self._repo.revs(b'%ld::%ld', missings, self.undecided)
140 newmissing = self._repo.revs(b'%ld::%ld', missings, self.undecided)
141 if newmissing:
141 if newmissing:
142 self.missing.update(newmissing)
142 self.missing.update(newmissing)
143 self.undecided.difference_update(newmissing)
143 self.undecided.difference_update(newmissing)
144
144
145 def addinfo(self, sample):
145 def addinfo(self, sample):
146 """consume an iterable of (rev, known) tuples"""
146 """consume an iterable of (rev, known) tuples"""
147 common = set()
147 common = set()
148 missing = set()
148 missing = set()
149 for rev, known in sample:
149 for rev, known in sample:
150 if known:
150 if known:
151 common.add(rev)
151 common.add(rev)
152 else:
152 else:
153 missing.add(rev)
153 missing.add(rev)
154 if common:
154 if common:
155 self.addcommons(common)
155 self.addcommons(common)
156 if missing:
156 if missing:
157 self.addmissings(missing)
157 self.addmissings(missing)
158
158
159 def hasinfo(self):
159 def hasinfo(self):
160 """return True is we have any clue about the remote state"""
160 """return True is we have any clue about the remote state"""
161 return self._common.hasbases()
161 return self._common.hasbases()
162
162
163 def iscomplete(self):
163 def iscomplete(self):
164 """True if all the necessary data have been gathered"""
164 """True if all the necessary data have been gathered"""
165 return self._undecided is not None and not self._undecided
165 return self._undecided is not None and not self._undecided
166
166
167 @property
167 @property
168 def undecided(self):
168 def undecided(self):
169 if self._undecided is not None:
169 if self._undecided is not None:
170 return self._undecided
170 return self._undecided
171 self._undecided = set(self._common.missingancestors(self._targetheads))
171 self._undecided = set(self._common.missingancestors(self._targetheads))
172 return self._undecided
172 return self._undecided
173
173
174 def stats(self):
174 def stats(self):
175 return {
175 return {
176 'undecided': len(self.undecided),
176 'undecided': len(self.undecided),
177 }
177 }
178
178
179 def commonheads(self):
179 def commonheads(self):
180 """the heads of the known common set"""
180 """the heads of the known common set"""
181 # heads(common) == heads(common.bases) since common represents
181 # heads(common) == heads(common.bases) since common represents
182 # common.bases and all its ancestors
182 # common.bases and all its ancestors
183 return self._common.basesheads()
183 return self._common.basesheads()
184
184
185 def _parentsgetter(self):
185 def _parentsgetter(self):
186 getrev = self._repo.changelog.index.__getitem__
186 getrev = self._repo.changelog.index.__getitem__
187
187
188 def getparents(r):
188 def getparents(r):
189 return getrev(r)[5:7]
189 return getrev(r)[5:7]
190
190
191 return getparents
191 return getparents
192
192
193 def _childrengetter(self):
193 def _childrengetter(self):
194
194
195 if self._childrenmap is not None:
195 if self._childrenmap is not None:
196 # During discovery, the `undecided` set keep shrinking.
196 # During discovery, the `undecided` set keep shrinking.
197 # Therefore, the map computed for an iteration N will be
197 # Therefore, the map computed for an iteration N will be
198 # valid for iteration N+1. Instead of computing the same
198 # valid for iteration N+1. Instead of computing the same
199 # data over and over we cached it the first time.
199 # data over and over we cached it the first time.
200 return self._childrenmap.__getitem__
200 return self._childrenmap.__getitem__
201
201
202 # _updatesample() essentially does interaction over revisions to look
202 # _updatesample() essentially does interaction over revisions to look
203 # up their children. This lookup is expensive and doing it in a loop is
203 # up their children. This lookup is expensive and doing it in a loop is
204 # quadratic. We precompute the children for all relevant revisions and
204 # quadratic. We precompute the children for all relevant revisions and
205 # make the lookup in _updatesample() a simple dict lookup.
205 # make the lookup in _updatesample() a simple dict lookup.
206 self._childrenmap = children = {}
206 self._childrenmap = children = {}
207
207
208 parentrevs = self._parentsgetter()
208 parentrevs = self._parentsgetter()
209 revs = self.undecided
209 revs = self.undecided
210
210
211 for rev in sorted(revs):
211 for rev in sorted(revs):
212 # Always ensure revision has an entry so we don't need to worry
212 # Always ensure revision has an entry so we don't need to worry
213 # about missing keys.
213 # about missing keys.
214 children[rev] = []
214 children[rev] = []
215 for prev in parentrevs(rev):
215 for prev in parentrevs(rev):
216 if prev == nullrev:
216 if prev == nullrev:
217 continue
217 continue
218 c = children.get(prev)
218 c = children.get(prev)
219 if c is not None:
219 if c is not None:
220 c.append(rev)
220 c.append(rev)
221 return children.__getitem__
221 return children.__getitem__
222
222
223 def takequicksample(self, headrevs, size):
223 def takequicksample(self, headrevs, size):
224 """takes a quick sample of size <size>
224 """takes a quick sample of size <size>
225
225
226 It is meant for initial sampling and focuses on querying heads and close
226 It is meant for initial sampling and focuses on querying heads and close
227 ancestors of heads.
227 ancestors of heads.
228
228
229 :headrevs: set of head revisions in local DAG to consider
229 :headrevs: set of head revisions in local DAG to consider
230 :size: the maximum size of the sample"""
230 :size: the maximum size of the sample"""
231 revs = self.undecided
231 revs = self.undecided
232 if len(revs) <= size:
232 if len(revs) <= size:
233 return list(revs)
233 return list(revs)
234 sample = set(self._repo.revs(b'heads(%ld)', revs))
234 sample = set(self._repo.revs(b'heads(%ld)', revs))
235
235
236 if len(sample) >= size:
236 if len(sample) >= size:
237 return _limitsample(sample, size, randomize=self.randomize)
237 return _limitsample(sample, size, randomize=self.randomize)
238
238
239 _updatesample(
239 _updatesample(
240 None, headrevs, sample, self._parentsgetter(), quicksamplesize=size
240 None, headrevs, sample, self._parentsgetter(), quicksamplesize=size
241 )
241 )
242 return sample
242 return sample
243
243
244 def takefullsample(self, headrevs, size):
244 def takefullsample(self, headrevs, size):
245 revs = self.undecided
245 revs = self.undecided
246 if len(revs) <= size:
246 if len(revs) <= size:
247 return list(revs)
247 return list(revs)
248 repo = self._repo
248 repo = self._repo
249 sample = set(repo.revs(b'heads(%ld)', revs))
249 sample = set(repo.revs(b'heads(%ld)', revs))
250 parentrevs = self._parentsgetter()
250 parentrevs = self._parentsgetter()
251
251
252 # update from heads
252 # update from heads
253 revsheads = sample.copy()
253 revsheads = sample.copy()
254 _updatesample(revs, revsheads, sample, parentrevs)
254 _updatesample(revs, revsheads, sample, parentrevs)
255
255
256 # update from roots
256 # update from roots
257 revsroots = set(repo.revs(b'roots(%ld)', revs))
257 revsroots = set(repo.revs(b'roots(%ld)', revs))
258 childrenrevs = self._childrengetter()
258 childrenrevs = self._childrengetter()
259 _updatesample(revs, revsroots, sample, childrenrevs)
259 _updatesample(revs, revsroots, sample, childrenrevs)
260 assert sample
260 assert sample
261
261
262 if not self._respectsize:
262 if not self._respectsize:
263 size = max(size, min(len(revsroots), len(revsheads)))
263 size = max(size, min(len(revsroots), len(revsheads)))
264
264
265 sample = _limitsample(sample, size, randomize=self.randomize)
265 sample = _limitsample(sample, size, randomize=self.randomize)
266 if len(sample) < size:
266 if len(sample) < size:
267 more = size - len(sample)
267 more = size - len(sample)
268 takefrom = list(revs - sample)
268 takefrom = list(revs - sample)
269 if self.randomize:
269 if self.randomize:
270 sample.update(random.sample(takefrom, more))
270 sample.update(random.sample(takefrom, more))
271 else:
271 else:
272 takefrom.sort()
272 takefrom.sort()
273 sample.update(takefrom[:more])
273 sample.update(takefrom[:more])
274 return sample
274 return sample
275
275
276
276
277 pure_partialdiscovery = partialdiscovery
278
277 partialdiscovery = policy.importrust(
279 partialdiscovery = policy.importrust(
278 'discovery', member='PartialDiscovery', default=partialdiscovery
280 'discovery', member='PartialDiscovery', default=partialdiscovery
279 )
281 )
280
282
281
283
282 def findcommonheads(
284 def findcommonheads(
283 ui,
285 ui,
284 local,
286 local,
285 remote,
287 remote,
286 abortwhenunrelated=True,
288 abortwhenunrelated=True,
287 ancestorsof=None,
289 ancestorsof=None,
288 audit=None,
290 audit=None,
289 ):
291 ):
290 """Return a tuple (common, anyincoming, remoteheads) used to identify
292 """Return a tuple (common, anyincoming, remoteheads) used to identify
291 missing nodes from or in remote.
293 missing nodes from or in remote.
292
294
293 The audit argument is an optional dictionnary that a caller can pass. it
295 The audit argument is an optional dictionnary that a caller can pass. it
294 will be updated with extra data about the discovery, this is useful for
296 will be updated with extra data about the discovery, this is useful for
295 debug.
297 debug.
296 """
298 """
297
299
298 samplegrowth = float(ui.config(b'devel', b'discovery.grow-sample.rate'))
300 samplegrowth = float(ui.config(b'devel', b'discovery.grow-sample.rate'))
299
301
300 start = util.timer()
302 start = util.timer()
301
303
302 roundtrips = 0
304 roundtrips = 0
303 cl = local.changelog
305 cl = local.changelog
304 clnode = cl.node
306 clnode = cl.node
305 clrev = cl.rev
307 clrev = cl.rev
306
308
307 if ancestorsof is not None:
309 if ancestorsof is not None:
308 ownheads = [clrev(n) for n in ancestorsof]
310 ownheads = [clrev(n) for n in ancestorsof]
309 else:
311 else:
310 ownheads = [rev for rev in cl.headrevs() if rev != nullrev]
312 ownheads = [rev for rev in cl.headrevs() if rev != nullrev]
311
313
312 initial_head_exchange = ui.configbool(b'devel', b'discovery.exchange-heads')
314 initial_head_exchange = ui.configbool(b'devel', b'discovery.exchange-heads')
313 initialsamplesize = ui.configint(b'devel', b'discovery.sample-size.initial')
315 initialsamplesize = ui.configint(b'devel', b'discovery.sample-size.initial')
314 fullsamplesize = ui.configint(b'devel', b'discovery.sample-size')
316 fullsamplesize = ui.configint(b'devel', b'discovery.sample-size')
315 # We also ask remote about all the local heads. That set can be arbitrarily
317 # We also ask remote about all the local heads. That set can be arbitrarily
316 # large, so we used to limit it size to `initialsamplesize`. We no longer
318 # large, so we used to limit it size to `initialsamplesize`. We no longer
317 # do as it proved counter productive. The skipped heads could lead to a
319 # do as it proved counter productive. The skipped heads could lead to a
318 # large "undecided" set, slower to be clarified than if we asked the
320 # large "undecided" set, slower to be clarified than if we asked the
319 # question for all heads right away.
321 # question for all heads right away.
320 #
322 #
321 # We are already fetching all server heads using the `heads` commands,
323 # We are already fetching all server heads using the `heads` commands,
322 # sending a equivalent number of heads the other way should not have a
324 # sending a equivalent number of heads the other way should not have a
323 # significant impact. In addition, it is very likely that we are going to
325 # significant impact. In addition, it is very likely that we are going to
324 # have to issue "known" request for an equivalent amount of revisions in
326 # have to issue "known" request for an equivalent amount of revisions in
325 # order to decide if theses heads are common or missing.
327 # order to decide if theses heads are common or missing.
326 #
328 #
327 # find a detailled analysis below.
329 # find a detailled analysis below.
328 #
330 #
329 # Case A: local and server both has few heads
331 # Case A: local and server both has few heads
330 #
332 #
331 # Ownheads is below initialsamplesize, limit would not have any effect.
333 # Ownheads is below initialsamplesize, limit would not have any effect.
332 #
334 #
333 # Case B: local has few heads and server has many
335 # Case B: local has few heads and server has many
334 #
336 #
335 # Ownheads is below initialsamplesize, limit would not have any effect.
337 # Ownheads is below initialsamplesize, limit would not have any effect.
336 #
338 #
337 # Case C: local and server both has many heads
339 # Case C: local and server both has many heads
338 #
340 #
339 # We now transfert some more data, but not significantly more than is
341 # We now transfert some more data, but not significantly more than is
340 # already transfered to carry the server heads.
342 # already transfered to carry the server heads.
341 #
343 #
342 # Case D: local has many heads, server has few
344 # Case D: local has many heads, server has few
343 #
345 #
344 # D.1 local heads are mostly known remotely
346 # D.1 local heads are mostly known remotely
345 #
347 #
346 # All the known head will have be part of a `known` request at some
348 # All the known head will have be part of a `known` request at some
347 # point for the discovery to finish. Sending them all earlier is
349 # point for the discovery to finish. Sending them all earlier is
348 # actually helping.
350 # actually helping.
349 #
351 #
350 # (This case is fairly unlikely, it requires the numerous heads to all
352 # (This case is fairly unlikely, it requires the numerous heads to all
351 # be merged server side in only a few heads)
353 # be merged server side in only a few heads)
352 #
354 #
353 # D.2 local heads are mostly missing remotely
355 # D.2 local heads are mostly missing remotely
354 #
356 #
355 # To determine that the heads are missing, we'll have to issue `known`
357 # To determine that the heads are missing, we'll have to issue `known`
356 # request for them or one of their ancestors. This amount of `known`
358 # request for them or one of their ancestors. This amount of `known`
357 # request will likely be in the same order of magnitude than the amount
359 # request will likely be in the same order of magnitude than the amount
358 # of local heads.
360 # of local heads.
359 #
361 #
360 # The only case where we can be more efficient using `known` request on
362 # The only case where we can be more efficient using `known` request on
361 # ancestors are case were all the "missing" local heads are based on a
363 # ancestors are case were all the "missing" local heads are based on a
362 # few changeset, also "missing". This means we would have a "complex"
364 # few changeset, also "missing". This means we would have a "complex"
363 # graph (with many heads) attached to, but very independant to a the
365 # graph (with many heads) attached to, but very independant to a the
364 # "simple" graph on the server. This is a fairly usual case and have
366 # "simple" graph on the server. This is a fairly usual case and have
365 # not been met in the wild so far.
367 # not been met in the wild so far.
366 if initial_head_exchange:
368 if initial_head_exchange:
367 if remote.limitedarguments:
369 if remote.limitedarguments:
368 sample = _limitsample(ownheads, initialsamplesize)
370 sample = _limitsample(ownheads, initialsamplesize)
369 # indices between sample and externalized version must match
371 # indices between sample and externalized version must match
370 sample = list(sample)
372 sample = list(sample)
371 else:
373 else:
372 sample = ownheads
374 sample = ownheads
373
375
374 ui.debug(b"query 1; heads\n")
376 ui.debug(b"query 1; heads\n")
375 roundtrips += 1
377 roundtrips += 1
376 with remote.commandexecutor() as e:
378 with remote.commandexecutor() as e:
377 fheads = e.callcommand(b'heads', {})
379 fheads = e.callcommand(b'heads', {})
378 fknown = e.callcommand(
380 fknown = e.callcommand(
379 b'known',
381 b'known',
380 {
382 {
381 b'nodes': [clnode(r) for r in sample],
383 b'nodes': [clnode(r) for r in sample],
382 },
384 },
383 )
385 )
384
386
385 srvheadhashes, yesno = fheads.result(), fknown.result()
387 srvheadhashes, yesno = fheads.result(), fknown.result()
386
388
387 if audit is not None:
389 if audit is not None:
388 audit[b'total-roundtrips'] = 1
390 audit[b'total-roundtrips'] = 1
389
391
390 if cl.tiprev() == nullrev:
392 if cl.tiprev() == nullrev:
391 if srvheadhashes != [cl.nullid]:
393 if srvheadhashes != [cl.nullid]:
392 return [cl.nullid], True, srvheadhashes
394 return [cl.nullid], True, srvheadhashes
393 return [cl.nullid], False, []
395 return [cl.nullid], False, []
394 else:
396 else:
395 # we still need the remote head for the function return
397 # we still need the remote head for the function return
396 with remote.commandexecutor() as e:
398 with remote.commandexecutor() as e:
397 fheads = e.callcommand(b'heads', {})
399 fheads = e.callcommand(b'heads', {})
398 srvheadhashes = fheads.result()
400 srvheadhashes = fheads.result()
399
401
400 # start actual discovery (we note this before the next "if" for
402 # start actual discovery (we note this before the next "if" for
401 # compatibility reasons)
403 # compatibility reasons)
402 ui.status(_(b"searching for changes\n"))
404 ui.status(_(b"searching for changes\n"))
403
405
404 knownsrvheads = [] # revnos of remote heads that are known locally
406 knownsrvheads = [] # revnos of remote heads that are known locally
405 for node in srvheadhashes:
407 for node in srvheadhashes:
406 if node == cl.nullid:
408 if node == cl.nullid:
407 continue
409 continue
408
410
409 try:
411 try:
410 knownsrvheads.append(clrev(node))
412 knownsrvheads.append(clrev(node))
411 # Catches unknown and filtered nodes.
413 # Catches unknown and filtered nodes.
412 except error.LookupError:
414 except error.LookupError:
413 continue
415 continue
414
416
415 if initial_head_exchange:
417 if initial_head_exchange:
416 # early exit if we know all the specified remote heads already
418 # early exit if we know all the specified remote heads already
417 if len(knownsrvheads) == len(srvheadhashes):
419 if len(knownsrvheads) == len(srvheadhashes):
418 ui.debug(b"all remote heads known locally\n")
420 ui.debug(b"all remote heads known locally\n")
419 return srvheadhashes, False, srvheadhashes
421 return srvheadhashes, False, srvheadhashes
420
422
421 if len(sample) == len(ownheads) and all(yesno):
423 if len(sample) == len(ownheads) and all(yesno):
422 ui.note(_(b"all local changesets known remotely\n"))
424 ui.note(_(b"all local changesets known remotely\n"))
423 ownheadhashes = [clnode(r) for r in ownheads]
425 ownheadhashes = [clnode(r) for r in ownheads]
424 return ownheadhashes, True, srvheadhashes
426 return ownheadhashes, True, srvheadhashes
425
427
426 # full blown discovery
428 # full blown discovery
427
429
428 # if the server has a limit to its arguments size, we can't grow the sample.
430 # if the server has a limit to its arguments size, we can't grow the sample.
429 configbool = local.ui.configbool
431 configbool = local.ui.configbool
430 grow_sample = configbool(b'devel', b'discovery.grow-sample')
432 grow_sample = configbool(b'devel', b'discovery.grow-sample')
431 grow_sample = grow_sample and not remote.limitedarguments
433 grow_sample = grow_sample and not remote.limitedarguments
432
434
433 dynamic_sample = configbool(b'devel', b'discovery.grow-sample.dynamic')
435 dynamic_sample = configbool(b'devel', b'discovery.grow-sample.dynamic')
434 hard_limit_sample = not (dynamic_sample or remote.limitedarguments)
436 hard_limit_sample = not (dynamic_sample or remote.limitedarguments)
435
437
436 randomize = ui.configbool(b'devel', b'discovery.randomize')
438 randomize = ui.configbool(b'devel', b'discovery.randomize')
437 disco = partialdiscovery(
439 if cl.index.rust_ext_compat:
438 local, ownheads, hard_limit_sample, randomize=randomize
440 pd = partialdiscovery
439 )
441 else:
442 pd = pure_partialdiscovery
443 disco = pd(local, ownheads, hard_limit_sample, randomize=randomize)
440 if initial_head_exchange:
444 if initial_head_exchange:
441 # treat remote heads (and maybe own heads) as a first implicit sample
445 # treat remote heads (and maybe own heads) as a first implicit sample
442 # response
446 # response
443 disco.addcommons(knownsrvheads)
447 disco.addcommons(knownsrvheads)
444 disco.addinfo(zip(sample, yesno))
448 disco.addinfo(zip(sample, yesno))
445
449
446 full = not initial_head_exchange
450 full = not initial_head_exchange
447 progress = ui.makeprogress(_(b'searching'), unit=_(b'queries'))
451 progress = ui.makeprogress(_(b'searching'), unit=_(b'queries'))
448 while not disco.iscomplete():
452 while not disco.iscomplete():
449
453
450 if full or disco.hasinfo():
454 if full or disco.hasinfo():
451 if full:
455 if full:
452 ui.note(_(b"sampling from both directions\n"))
456 ui.note(_(b"sampling from both directions\n"))
453 else:
457 else:
454 ui.debug(b"taking initial sample\n")
458 ui.debug(b"taking initial sample\n")
455 samplefunc = disco.takefullsample
459 samplefunc = disco.takefullsample
456 targetsize = fullsamplesize
460 targetsize = fullsamplesize
457 if grow_sample:
461 if grow_sample:
458 fullsamplesize = int(fullsamplesize * samplegrowth)
462 fullsamplesize = int(fullsamplesize * samplegrowth)
459 else:
463 else:
460 # use even cheaper initial sample
464 # use even cheaper initial sample
461 ui.debug(b"taking quick initial sample\n")
465 ui.debug(b"taking quick initial sample\n")
462 samplefunc = disco.takequicksample
466 samplefunc = disco.takequicksample
463 targetsize = initialsamplesize
467 targetsize = initialsamplesize
464 sample = samplefunc(ownheads, targetsize)
468 sample = samplefunc(ownheads, targetsize)
465
469
466 roundtrips += 1
470 roundtrips += 1
467 progress.update(roundtrips)
471 progress.update(roundtrips)
468 stats = disco.stats()
472 stats = disco.stats()
469 ui.debug(
473 ui.debug(
470 b"query %i; still undecided: %i, sample size is: %i\n"
474 b"query %i; still undecided: %i, sample size is: %i\n"
471 % (roundtrips, stats['undecided'], len(sample))
475 % (roundtrips, stats['undecided'], len(sample))
472 )
476 )
473
477
474 # indices between sample and externalized version must match
478 # indices between sample and externalized version must match
475 sample = list(sample)
479 sample = list(sample)
476
480
477 with remote.commandexecutor() as e:
481 with remote.commandexecutor() as e:
478 yesno = e.callcommand(
482 yesno = e.callcommand(
479 b'known',
483 b'known',
480 {
484 {
481 b'nodes': [clnode(r) for r in sample],
485 b'nodes': [clnode(r) for r in sample],
482 },
486 },
483 ).result()
487 ).result()
484
488
485 full = True
489 full = True
486
490
487 disco.addinfo(zip(sample, yesno))
491 disco.addinfo(zip(sample, yesno))
488
492
489 result = disco.commonheads()
493 result = disco.commonheads()
490 elapsed = util.timer() - start
494 elapsed = util.timer() - start
491 progress.complete()
495 progress.complete()
492 ui.debug(b"%d total queries in %.4fs\n" % (roundtrips, elapsed))
496 ui.debug(b"%d total queries in %.4fs\n" % (roundtrips, elapsed))
493 msg = (
497 msg = (
494 b'found %d common and %d unknown server heads,'
498 b'found %d common and %d unknown server heads,'
495 b' %d roundtrips in %.4fs\n'
499 b' %d roundtrips in %.4fs\n'
496 )
500 )
497 missing = set(result) - set(knownsrvheads)
501 missing = set(result) - set(knownsrvheads)
498 ui.log(b'discovery', msg, len(result), len(missing), roundtrips, elapsed)
502 ui.log(b'discovery', msg, len(result), len(missing), roundtrips, elapsed)
499
503
500 if audit is not None:
504 if audit is not None:
501 audit[b'total-roundtrips'] = roundtrips
505 audit[b'total-roundtrips'] = roundtrips
502
506
503 if not result and srvheadhashes != [cl.nullid]:
507 if not result and srvheadhashes != [cl.nullid]:
504 if abortwhenunrelated:
508 if abortwhenunrelated:
505 raise error.Abort(_(b"repository is unrelated"))
509 raise error.Abort(_(b"repository is unrelated"))
506 else:
510 else:
507 ui.warn(_(b"warning: repository is unrelated\n"))
511 ui.warn(_(b"warning: repository is unrelated\n"))
508 return (
512 return (
509 {cl.nullid},
513 {cl.nullid},
510 True,
514 True,
511 srvheadhashes,
515 srvheadhashes,
512 )
516 )
513
517
514 anyincoming = srvheadhashes != [cl.nullid]
518 anyincoming = srvheadhashes != [cl.nullid]
515 result = {clnode(r) for r in result}
519 result = {clnode(r) for r in result}
516 return result, anyincoming, srvheadhashes
520 return result, anyincoming, srvheadhashes
General Comments 0
You need to be logged in to leave comments. Login now