##// END OF EJS Templates
revlog: avoid raising no-arg RevlogError for internal flow control...
Martin von Zweigbergk -
r48074:93a0abe0 default
parent child Browse files
Show More
@@ -1,3461 +1,3466 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 CHANGELOGV2,
38 CHANGELOGV2,
39 COMP_MODE_DEFAULT,
39 COMP_MODE_DEFAULT,
40 COMP_MODE_INLINE,
40 COMP_MODE_INLINE,
41 COMP_MODE_PLAIN,
41 COMP_MODE_PLAIN,
42 FEATURES_BY_VERSION,
42 FEATURES_BY_VERSION,
43 FLAG_GENERALDELTA,
43 FLAG_GENERALDELTA,
44 FLAG_INLINE_DATA,
44 FLAG_INLINE_DATA,
45 INDEX_HEADER,
45 INDEX_HEADER,
46 KIND_CHANGELOG,
46 KIND_CHANGELOG,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 SUPPORTED_FLAGS,
55 SUPPORTED_FLAGS,
56 )
56 )
57 from .revlogutils.flagutil import (
57 from .revlogutils.flagutil import (
58 REVIDX_DEFAULT_FLAGS,
58 REVIDX_DEFAULT_FLAGS,
59 REVIDX_ELLIPSIS,
59 REVIDX_ELLIPSIS,
60 REVIDX_EXTSTORED,
60 REVIDX_EXTSTORED,
61 REVIDX_FLAGS_ORDER,
61 REVIDX_FLAGS_ORDER,
62 REVIDX_HASCOPIESINFO,
62 REVIDX_HASCOPIESINFO,
63 REVIDX_ISCENSORED,
63 REVIDX_ISCENSORED,
64 REVIDX_RAWTEXT_CHANGING_FLAGS,
64 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 )
65 )
66 from .thirdparty import attr
66 from .thirdparty import attr
67 from . import (
67 from . import (
68 ancestor,
68 ancestor,
69 dagop,
69 dagop,
70 error,
70 error,
71 mdiff,
71 mdiff,
72 policy,
72 policy,
73 pycompat,
73 pycompat,
74 templatefilters,
74 templatefilters,
75 util,
75 util,
76 )
76 )
77 from .interfaces import (
77 from .interfaces import (
78 repository,
78 repository,
79 util as interfaceutil,
79 util as interfaceutil,
80 )
80 )
81 from .revlogutils import (
81 from .revlogutils import (
82 deltas as deltautil,
82 deltas as deltautil,
83 docket as docketutil,
83 docket as docketutil,
84 flagutil,
84 flagutil,
85 nodemap as nodemaputil,
85 nodemap as nodemaputil,
86 revlogv0,
86 revlogv0,
87 sidedata as sidedatautil,
87 sidedata as sidedatautil,
88 )
88 )
89 from .utils import (
89 from .utils import (
90 storageutil,
90 storageutil,
91 stringutil,
91 stringutil,
92 )
92 )
93
93
94 # blanked usage of all the name to prevent pyflakes constraints
94 # blanked usage of all the name to prevent pyflakes constraints
95 # We need these name available in the module for extensions.
95 # We need these name available in the module for extensions.
96
96
97 REVLOGV0
97 REVLOGV0
98 REVLOGV1
98 REVLOGV1
99 REVLOGV2
99 REVLOGV2
100 FLAG_INLINE_DATA
100 FLAG_INLINE_DATA
101 FLAG_GENERALDELTA
101 FLAG_GENERALDELTA
102 REVLOG_DEFAULT_FLAGS
102 REVLOG_DEFAULT_FLAGS
103 REVLOG_DEFAULT_FORMAT
103 REVLOG_DEFAULT_FORMAT
104 REVLOG_DEFAULT_VERSION
104 REVLOG_DEFAULT_VERSION
105 REVLOGV1_FLAGS
105 REVLOGV1_FLAGS
106 REVLOGV2_FLAGS
106 REVLOGV2_FLAGS
107 REVIDX_ISCENSORED
107 REVIDX_ISCENSORED
108 REVIDX_ELLIPSIS
108 REVIDX_ELLIPSIS
109 REVIDX_HASCOPIESINFO
109 REVIDX_HASCOPIESINFO
110 REVIDX_EXTSTORED
110 REVIDX_EXTSTORED
111 REVIDX_DEFAULT_FLAGS
111 REVIDX_DEFAULT_FLAGS
112 REVIDX_FLAGS_ORDER
112 REVIDX_FLAGS_ORDER
113 REVIDX_RAWTEXT_CHANGING_FLAGS
113 REVIDX_RAWTEXT_CHANGING_FLAGS
114
114
115 parsers = policy.importmod('parsers')
115 parsers = policy.importmod('parsers')
116 rustancestor = policy.importrust('ancestor')
116 rustancestor = policy.importrust('ancestor')
117 rustdagop = policy.importrust('dagop')
117 rustdagop = policy.importrust('dagop')
118 rustrevlog = policy.importrust('revlog')
118 rustrevlog = policy.importrust('revlog')
119
119
120 # Aliased for performance.
120 # Aliased for performance.
121 _zlibdecompress = zlib.decompress
121 _zlibdecompress = zlib.decompress
122
122
123 # max size of revlog with inline data
123 # max size of revlog with inline data
124 _maxinline = 131072
124 _maxinline = 131072
125 _chunksize = 1048576
125 _chunksize = 1048576
126
126
127 # Flag processors for REVIDX_ELLIPSIS.
127 # Flag processors for REVIDX_ELLIPSIS.
128 def ellipsisreadprocessor(rl, text):
128 def ellipsisreadprocessor(rl, text):
129 return text, False
129 return text, False
130
130
131
131
132 def ellipsiswriteprocessor(rl, text):
132 def ellipsiswriteprocessor(rl, text):
133 return text, False
133 return text, False
134
134
135
135
136 def ellipsisrawprocessor(rl, text):
136 def ellipsisrawprocessor(rl, text):
137 return False
137 return False
138
138
139
139
140 ellipsisprocessor = (
140 ellipsisprocessor = (
141 ellipsisreadprocessor,
141 ellipsisreadprocessor,
142 ellipsiswriteprocessor,
142 ellipsiswriteprocessor,
143 ellipsisrawprocessor,
143 ellipsisrawprocessor,
144 )
144 )
145
145
146
146
147 def offset_type(offset, type):
147 def offset_type(offset, type):
148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
149 raise ValueError(b'unknown revlog index flags')
149 raise ValueError(b'unknown revlog index flags')
150 return int(int(offset) << 16 | type)
150 return int(int(offset) << 16 | type)
151
151
152
152
153 def _verify_revision(rl, skipflags, state, node):
153 def _verify_revision(rl, skipflags, state, node):
154 """Verify the integrity of the given revlog ``node`` while providing a hook
154 """Verify the integrity of the given revlog ``node`` while providing a hook
155 point for extensions to influence the operation."""
155 point for extensions to influence the operation."""
156 if skipflags:
156 if skipflags:
157 state[b'skipread'].add(node)
157 state[b'skipread'].add(node)
158 else:
158 else:
159 # Side-effect: read content and verify hash.
159 # Side-effect: read content and verify hash.
160 rl.revision(node)
160 rl.revision(node)
161
161
162
162
163 # True if a fast implementation for persistent-nodemap is available
163 # True if a fast implementation for persistent-nodemap is available
164 #
164 #
165 # We also consider we have a "fast" implementation in "pure" python because
165 # We also consider we have a "fast" implementation in "pure" python because
166 # people using pure don't really have performance consideration (and a
166 # people using pure don't really have performance consideration (and a
167 # wheelbarrow of other slowness source)
167 # wheelbarrow of other slowness source)
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
169 parsers, 'BaseIndexObject'
169 parsers, 'BaseIndexObject'
170 )
170 )
171
171
172
172
173 @attr.s(slots=True, frozen=True)
173 @attr.s(slots=True, frozen=True)
174 class _revisioninfo(object):
174 class _revisioninfo(object):
175 """Information about a revision that allows building its fulltext
175 """Information about a revision that allows building its fulltext
176 node: expected hash of the revision
176 node: expected hash of the revision
177 p1, p2: parent revs of the revision
177 p1, p2: parent revs of the revision
178 btext: built text cache consisting of a one-element list
178 btext: built text cache consisting of a one-element list
179 cachedelta: (baserev, uncompressed_delta) or None
179 cachedelta: (baserev, uncompressed_delta) or None
180 flags: flags associated to the revision storage
180 flags: flags associated to the revision storage
181
181
182 One of btext[0] or cachedelta must be set.
182 One of btext[0] or cachedelta must be set.
183 """
183 """
184
184
185 node = attr.ib()
185 node = attr.ib()
186 p1 = attr.ib()
186 p1 = attr.ib()
187 p2 = attr.ib()
187 p2 = attr.ib()
188 btext = attr.ib()
188 btext = attr.ib()
189 textlen = attr.ib()
189 textlen = attr.ib()
190 cachedelta = attr.ib()
190 cachedelta = attr.ib()
191 flags = attr.ib()
191 flags = attr.ib()
192
192
193
193
194 @interfaceutil.implementer(repository.irevisiondelta)
194 @interfaceutil.implementer(repository.irevisiondelta)
195 @attr.s(slots=True)
195 @attr.s(slots=True)
196 class revlogrevisiondelta(object):
196 class revlogrevisiondelta(object):
197 node = attr.ib()
197 node = attr.ib()
198 p1node = attr.ib()
198 p1node = attr.ib()
199 p2node = attr.ib()
199 p2node = attr.ib()
200 basenode = attr.ib()
200 basenode = attr.ib()
201 flags = attr.ib()
201 flags = attr.ib()
202 baserevisionsize = attr.ib()
202 baserevisionsize = attr.ib()
203 revision = attr.ib()
203 revision = attr.ib()
204 delta = attr.ib()
204 delta = attr.ib()
205 sidedata = attr.ib()
205 sidedata = attr.ib()
206 protocol_flags = attr.ib()
206 protocol_flags = attr.ib()
207 linknode = attr.ib(default=None)
207 linknode = attr.ib(default=None)
208
208
209
209
210 @interfaceutil.implementer(repository.iverifyproblem)
210 @interfaceutil.implementer(repository.iverifyproblem)
211 @attr.s(frozen=True)
211 @attr.s(frozen=True)
212 class revlogproblem(object):
212 class revlogproblem(object):
213 warning = attr.ib(default=None)
213 warning = attr.ib(default=None)
214 error = attr.ib(default=None)
214 error = attr.ib(default=None)
215 node = attr.ib(default=None)
215 node = attr.ib(default=None)
216
216
217
217
218 def parse_index_v1(data, inline):
218 def parse_index_v1(data, inline):
219 # call the C implementation to parse the index data
219 # call the C implementation to parse the index data
220 index, cache = parsers.parse_index2(data, inline)
220 index, cache = parsers.parse_index2(data, inline)
221 return index, cache
221 return index, cache
222
222
223
223
224 def parse_index_v2(data, inline):
224 def parse_index_v2(data, inline):
225 # call the C implementation to parse the index data
225 # call the C implementation to parse the index data
226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
227 return index, cache
227 return index, cache
228
228
229
229
230 def parse_index_cl_v2(data, inline):
230 def parse_index_cl_v2(data, inline):
231 # call the C implementation to parse the index data
231 # call the C implementation to parse the index data
232 assert not inline
232 assert not inline
233 from .pure.parsers import parse_index_cl_v2
233 from .pure.parsers import parse_index_cl_v2
234
234
235 index, cache = parse_index_cl_v2(data)
235 index, cache = parse_index_cl_v2(data)
236 return index, cache
236 return index, cache
237
237
238
238
239 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
239 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
240
240
241 def parse_index_v1_nodemap(data, inline):
241 def parse_index_v1_nodemap(data, inline):
242 index, cache = parsers.parse_index_devel_nodemap(data, inline)
242 index, cache = parsers.parse_index_devel_nodemap(data, inline)
243 return index, cache
243 return index, cache
244
244
245
245
246 else:
246 else:
247 parse_index_v1_nodemap = None
247 parse_index_v1_nodemap = None
248
248
249
249
250 def parse_index_v1_mixed(data, inline):
250 def parse_index_v1_mixed(data, inline):
251 index, cache = parse_index_v1(data, inline)
251 index, cache = parse_index_v1(data, inline)
252 return rustrevlog.MixedIndex(index), cache
252 return rustrevlog.MixedIndex(index), cache
253
253
254
254
255 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
255 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
256 # signed integer)
256 # signed integer)
257 _maxentrysize = 0x7FFFFFFF
257 _maxentrysize = 0x7FFFFFFF
258
258
259
259
260 class revlog(object):
260 class revlog(object):
261 """
261 """
262 the underlying revision storage object
262 the underlying revision storage object
263
263
264 A revlog consists of two parts, an index and the revision data.
264 A revlog consists of two parts, an index and the revision data.
265
265
266 The index is a file with a fixed record size containing
266 The index is a file with a fixed record size containing
267 information on each revision, including its nodeid (hash), the
267 information on each revision, including its nodeid (hash), the
268 nodeids of its parents, the position and offset of its data within
268 nodeids of its parents, the position and offset of its data within
269 the data file, and the revision it's based on. Finally, each entry
269 the data file, and the revision it's based on. Finally, each entry
270 contains a linkrev entry that can serve as a pointer to external
270 contains a linkrev entry that can serve as a pointer to external
271 data.
271 data.
272
272
273 The revision data itself is a linear collection of data chunks.
273 The revision data itself is a linear collection of data chunks.
274 Each chunk represents a revision and is usually represented as a
274 Each chunk represents a revision and is usually represented as a
275 delta against the previous chunk. To bound lookup time, runs of
275 delta against the previous chunk. To bound lookup time, runs of
276 deltas are limited to about 2 times the length of the original
276 deltas are limited to about 2 times the length of the original
277 version data. This makes retrieval of a version proportional to
277 version data. This makes retrieval of a version proportional to
278 its size, or O(1) relative to the number of revisions.
278 its size, or O(1) relative to the number of revisions.
279
279
280 Both pieces of the revlog are written to in an append-only
280 Both pieces of the revlog are written to in an append-only
281 fashion, which means we never need to rewrite a file to insert or
281 fashion, which means we never need to rewrite a file to insert or
282 remove data, and can use some simple techniques to avoid the need
282 remove data, and can use some simple techniques to avoid the need
283 for locking while reading.
283 for locking while reading.
284
284
285 If checkambig, indexfile is opened with checkambig=True at
285 If checkambig, indexfile is opened with checkambig=True at
286 writing, to avoid file stat ambiguity.
286 writing, to avoid file stat ambiguity.
287
287
288 If mmaplargeindex is True, and an mmapindexthreshold is set, the
288 If mmaplargeindex is True, and an mmapindexthreshold is set, the
289 index will be mmapped rather than read if it is larger than the
289 index will be mmapped rather than read if it is larger than the
290 configured threshold.
290 configured threshold.
291
291
292 If censorable is True, the revlog can have censored revisions.
292 If censorable is True, the revlog can have censored revisions.
293
293
294 If `upperboundcomp` is not None, this is the expected maximal gain from
294 If `upperboundcomp` is not None, this is the expected maximal gain from
295 compression for the data content.
295 compression for the data content.
296
296
297 `concurrencychecker` is an optional function that receives 3 arguments: a
297 `concurrencychecker` is an optional function that receives 3 arguments: a
298 file handle, a filename, and an expected position. It should check whether
298 file handle, a filename, and an expected position. It should check whether
299 the current position in the file handle is valid, and log/warn/fail (by
299 the current position in the file handle is valid, and log/warn/fail (by
300 raising).
300 raising).
301
301
302
302
303 Internal details
303 Internal details
304 ----------------
304 ----------------
305
305
306 A large part of the revlog logic deals with revisions' "index entries", tuple
306 A large part of the revlog logic deals with revisions' "index entries", tuple
307 objects that contains the same "items" whatever the revlog version.
307 objects that contains the same "items" whatever the revlog version.
308 Different versions will have different ways of storing these items (sometimes
308 Different versions will have different ways of storing these items (sometimes
309 not having them at all), but the tuple will always be the same. New fields
309 not having them at all), but the tuple will always be the same. New fields
310 are usually added at the end to avoid breaking existing code that relies
310 are usually added at the end to avoid breaking existing code that relies
311 on the existing order. The field are defined as follows:
311 on the existing order. The field are defined as follows:
312
312
313 [0] offset:
313 [0] offset:
314 The byte index of the start of revision data chunk.
314 The byte index of the start of revision data chunk.
315 That value is shifted up by 16 bits. use "offset = field >> 16" to
315 That value is shifted up by 16 bits. use "offset = field >> 16" to
316 retrieve it.
316 retrieve it.
317
317
318 flags:
318 flags:
319 A flag field that carries special information or changes the behavior
319 A flag field that carries special information or changes the behavior
320 of the revision. (see `REVIDX_*` constants for details)
320 of the revision. (see `REVIDX_*` constants for details)
321 The flag field only occupies the first 16 bits of this field,
321 The flag field only occupies the first 16 bits of this field,
322 use "flags = field & 0xFFFF" to retrieve the value.
322 use "flags = field & 0xFFFF" to retrieve the value.
323
323
324 [1] compressed length:
324 [1] compressed length:
325 The size, in bytes, of the chunk on disk
325 The size, in bytes, of the chunk on disk
326
326
327 [2] uncompressed length:
327 [2] uncompressed length:
328 The size, in bytes, of the full revision once reconstructed.
328 The size, in bytes, of the full revision once reconstructed.
329
329
330 [3] base rev:
330 [3] base rev:
331 Either the base of the revision delta chain (without general
331 Either the base of the revision delta chain (without general
332 delta), or the base of the delta (stored in the data chunk)
332 delta), or the base of the delta (stored in the data chunk)
333 with general delta.
333 with general delta.
334
334
335 [4] link rev:
335 [4] link rev:
336 Changelog revision number of the changeset introducing this
336 Changelog revision number of the changeset introducing this
337 revision.
337 revision.
338
338
339 [5] parent 1 rev:
339 [5] parent 1 rev:
340 Revision number of the first parent
340 Revision number of the first parent
341
341
342 [6] parent 2 rev:
342 [6] parent 2 rev:
343 Revision number of the second parent
343 Revision number of the second parent
344
344
345 [7] node id:
345 [7] node id:
346 The node id of the current revision
346 The node id of the current revision
347
347
348 [8] sidedata offset:
348 [8] sidedata offset:
349 The byte index of the start of the revision's side-data chunk.
349 The byte index of the start of the revision's side-data chunk.
350
350
351 [9] sidedata chunk length:
351 [9] sidedata chunk length:
352 The size, in bytes, of the revision's side-data chunk.
352 The size, in bytes, of the revision's side-data chunk.
353
353
354 [10] data compression mode:
354 [10] data compression mode:
355 two bits that detail the way the data chunk is compressed on disk.
355 two bits that detail the way the data chunk is compressed on disk.
356 (see "COMP_MODE_*" constants for details). For revlog version 0 and
356 (see "COMP_MODE_*" constants for details). For revlog version 0 and
357 1 this will always be COMP_MODE_INLINE.
357 1 this will always be COMP_MODE_INLINE.
358
358
359 [11] side-data compression mode:
359 [11] side-data compression mode:
360 two bits that detail the way the sidedata chunk is compressed on disk.
360 two bits that detail the way the sidedata chunk is compressed on disk.
361 (see "COMP_MODE_*" constants for details)
361 (see "COMP_MODE_*" constants for details)
362 """
362 """
363
363
364 _flagserrorclass = error.RevlogError
364 _flagserrorclass = error.RevlogError
365
365
366 def __init__(
366 def __init__(
367 self,
367 self,
368 opener,
368 opener,
369 target,
369 target,
370 radix,
370 radix,
371 postfix=None, # only exist for `tmpcensored` now
371 postfix=None, # only exist for `tmpcensored` now
372 checkambig=False,
372 checkambig=False,
373 mmaplargeindex=False,
373 mmaplargeindex=False,
374 censorable=False,
374 censorable=False,
375 upperboundcomp=None,
375 upperboundcomp=None,
376 persistentnodemap=False,
376 persistentnodemap=False,
377 concurrencychecker=None,
377 concurrencychecker=None,
378 trypending=False,
378 trypending=False,
379 ):
379 ):
380 """
380 """
381 create a revlog object
381 create a revlog object
382
382
383 opener is a function that abstracts the file opening operation
383 opener is a function that abstracts the file opening operation
384 and can be used to implement COW semantics or the like.
384 and can be used to implement COW semantics or the like.
385
385
386 `target`: a (KIND, ID) tuple that identify the content stored in
386 `target`: a (KIND, ID) tuple that identify the content stored in
387 this revlog. It help the rest of the code to understand what the revlog
387 this revlog. It help the rest of the code to understand what the revlog
388 is about without having to resort to heuristic and index filename
388 is about without having to resort to heuristic and index filename
389 analysis. Note: that this must be reliably be set by normal code, but
389 analysis. Note: that this must be reliably be set by normal code, but
390 that test, debug, or performance measurement code might not set this to
390 that test, debug, or performance measurement code might not set this to
391 accurate value.
391 accurate value.
392 """
392 """
393 self.upperboundcomp = upperboundcomp
393 self.upperboundcomp = upperboundcomp
394
394
395 self.radix = radix
395 self.radix = radix
396
396
397 self._docket_file = None
397 self._docket_file = None
398 self._indexfile = None
398 self._indexfile = None
399 self._datafile = None
399 self._datafile = None
400 self._nodemap_file = None
400 self._nodemap_file = None
401 self.postfix = postfix
401 self.postfix = postfix
402 self._trypending = trypending
402 self._trypending = trypending
403 self.opener = opener
403 self.opener = opener
404 if persistentnodemap:
404 if persistentnodemap:
405 self._nodemap_file = nodemaputil.get_nodemap_file(self)
405 self._nodemap_file = nodemaputil.get_nodemap_file(self)
406
406
407 assert target[0] in ALL_KINDS
407 assert target[0] in ALL_KINDS
408 assert len(target) == 2
408 assert len(target) == 2
409 self.target = target
409 self.target = target
410 # When True, indexfile is opened with checkambig=True at writing, to
410 # When True, indexfile is opened with checkambig=True at writing, to
411 # avoid file stat ambiguity.
411 # avoid file stat ambiguity.
412 self._checkambig = checkambig
412 self._checkambig = checkambig
413 self._mmaplargeindex = mmaplargeindex
413 self._mmaplargeindex = mmaplargeindex
414 self._censorable = censorable
414 self._censorable = censorable
415 # 3-tuple of (node, rev, text) for a raw revision.
415 # 3-tuple of (node, rev, text) for a raw revision.
416 self._revisioncache = None
416 self._revisioncache = None
417 # Maps rev to chain base rev.
417 # Maps rev to chain base rev.
418 self._chainbasecache = util.lrucachedict(100)
418 self._chainbasecache = util.lrucachedict(100)
419 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
419 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
420 self._chunkcache = (0, b'')
420 self._chunkcache = (0, b'')
421 # How much data to read and cache into the raw revlog data cache.
421 # How much data to read and cache into the raw revlog data cache.
422 self._chunkcachesize = 65536
422 self._chunkcachesize = 65536
423 self._maxchainlen = None
423 self._maxchainlen = None
424 self._deltabothparents = True
424 self._deltabothparents = True
425 self.index = None
425 self.index = None
426 self._docket = None
426 self._docket = None
427 self._nodemap_docket = None
427 self._nodemap_docket = None
428 # Mapping of partial identifiers to full nodes.
428 # Mapping of partial identifiers to full nodes.
429 self._pcache = {}
429 self._pcache = {}
430 # Mapping of revision integer to full node.
430 # Mapping of revision integer to full node.
431 self._compengine = b'zlib'
431 self._compengine = b'zlib'
432 self._compengineopts = {}
432 self._compengineopts = {}
433 self._maxdeltachainspan = -1
433 self._maxdeltachainspan = -1
434 self._withsparseread = False
434 self._withsparseread = False
435 self._sparserevlog = False
435 self._sparserevlog = False
436 self.hassidedata = False
436 self.hassidedata = False
437 self._srdensitythreshold = 0.50
437 self._srdensitythreshold = 0.50
438 self._srmingapsize = 262144
438 self._srmingapsize = 262144
439
439
440 # Make copy of flag processors so each revlog instance can support
440 # Make copy of flag processors so each revlog instance can support
441 # custom flags.
441 # custom flags.
442 self._flagprocessors = dict(flagutil.flagprocessors)
442 self._flagprocessors = dict(flagutil.flagprocessors)
443
443
444 # 2-tuple of file handles being used for active writing.
444 # 2-tuple of file handles being used for active writing.
445 self._writinghandles = None
445 self._writinghandles = None
446 # prevent nesting of addgroup
446 # prevent nesting of addgroup
447 self._adding_group = None
447 self._adding_group = None
448
448
449 self._loadindex()
449 self._loadindex()
450
450
451 self._concurrencychecker = concurrencychecker
451 self._concurrencychecker = concurrencychecker
452
452
453 def _init_opts(self):
453 def _init_opts(self):
454 """process options (from above/config) to setup associated default revlog mode
454 """process options (from above/config) to setup associated default revlog mode
455
455
456 These values might be affected when actually reading on disk information.
456 These values might be affected when actually reading on disk information.
457
457
458 The relevant values are returned for use in _loadindex().
458 The relevant values are returned for use in _loadindex().
459
459
460 * newversionflags:
460 * newversionflags:
461 version header to use if we need to create a new revlog
461 version header to use if we need to create a new revlog
462
462
463 * mmapindexthreshold:
463 * mmapindexthreshold:
464 minimal index size for start to use mmap
464 minimal index size for start to use mmap
465
465
466 * force_nodemap:
466 * force_nodemap:
467 force the usage of a "development" version of the nodemap code
467 force the usage of a "development" version of the nodemap code
468 """
468 """
469 mmapindexthreshold = None
469 mmapindexthreshold = None
470 opts = self.opener.options
470 opts = self.opener.options
471
471
472 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
472 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
473 new_header = CHANGELOGV2
473 new_header = CHANGELOGV2
474 elif b'revlogv2' in opts:
474 elif b'revlogv2' in opts:
475 new_header = REVLOGV2
475 new_header = REVLOGV2
476 elif b'revlogv1' in opts:
476 elif b'revlogv1' in opts:
477 new_header = REVLOGV1 | FLAG_INLINE_DATA
477 new_header = REVLOGV1 | FLAG_INLINE_DATA
478 if b'generaldelta' in opts:
478 if b'generaldelta' in opts:
479 new_header |= FLAG_GENERALDELTA
479 new_header |= FLAG_GENERALDELTA
480 elif b'revlogv0' in self.opener.options:
480 elif b'revlogv0' in self.opener.options:
481 new_header = REVLOGV0
481 new_header = REVLOGV0
482 else:
482 else:
483 new_header = REVLOG_DEFAULT_VERSION
483 new_header = REVLOG_DEFAULT_VERSION
484
484
485 if b'chunkcachesize' in opts:
485 if b'chunkcachesize' in opts:
486 self._chunkcachesize = opts[b'chunkcachesize']
486 self._chunkcachesize = opts[b'chunkcachesize']
487 if b'maxchainlen' in opts:
487 if b'maxchainlen' in opts:
488 self._maxchainlen = opts[b'maxchainlen']
488 self._maxchainlen = opts[b'maxchainlen']
489 if b'deltabothparents' in opts:
489 if b'deltabothparents' in opts:
490 self._deltabothparents = opts[b'deltabothparents']
490 self._deltabothparents = opts[b'deltabothparents']
491 self._lazydelta = bool(opts.get(b'lazydelta', True))
491 self._lazydelta = bool(opts.get(b'lazydelta', True))
492 self._lazydeltabase = False
492 self._lazydeltabase = False
493 if self._lazydelta:
493 if self._lazydelta:
494 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
494 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
495 if b'compengine' in opts:
495 if b'compengine' in opts:
496 self._compengine = opts[b'compengine']
496 self._compengine = opts[b'compengine']
497 if b'zlib.level' in opts:
497 if b'zlib.level' in opts:
498 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
498 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
499 if b'zstd.level' in opts:
499 if b'zstd.level' in opts:
500 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
500 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
501 if b'maxdeltachainspan' in opts:
501 if b'maxdeltachainspan' in opts:
502 self._maxdeltachainspan = opts[b'maxdeltachainspan']
502 self._maxdeltachainspan = opts[b'maxdeltachainspan']
503 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
503 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
504 mmapindexthreshold = opts[b'mmapindexthreshold']
504 mmapindexthreshold = opts[b'mmapindexthreshold']
505 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
505 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
506 withsparseread = bool(opts.get(b'with-sparse-read', False))
506 withsparseread = bool(opts.get(b'with-sparse-read', False))
507 # sparse-revlog forces sparse-read
507 # sparse-revlog forces sparse-read
508 self._withsparseread = self._sparserevlog or withsparseread
508 self._withsparseread = self._sparserevlog or withsparseread
509 if b'sparse-read-density-threshold' in opts:
509 if b'sparse-read-density-threshold' in opts:
510 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
510 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
511 if b'sparse-read-min-gap-size' in opts:
511 if b'sparse-read-min-gap-size' in opts:
512 self._srmingapsize = opts[b'sparse-read-min-gap-size']
512 self._srmingapsize = opts[b'sparse-read-min-gap-size']
513 if opts.get(b'enableellipsis'):
513 if opts.get(b'enableellipsis'):
514 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
514 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
515
515
516 # revlog v0 doesn't have flag processors
516 # revlog v0 doesn't have flag processors
517 for flag, processor in pycompat.iteritems(
517 for flag, processor in pycompat.iteritems(
518 opts.get(b'flagprocessors', {})
518 opts.get(b'flagprocessors', {})
519 ):
519 ):
520 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
520 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
521
521
522 if self._chunkcachesize <= 0:
522 if self._chunkcachesize <= 0:
523 raise error.RevlogError(
523 raise error.RevlogError(
524 _(b'revlog chunk cache size %r is not greater than 0')
524 _(b'revlog chunk cache size %r is not greater than 0')
525 % self._chunkcachesize
525 % self._chunkcachesize
526 )
526 )
527 elif self._chunkcachesize & (self._chunkcachesize - 1):
527 elif self._chunkcachesize & (self._chunkcachesize - 1):
528 raise error.RevlogError(
528 raise error.RevlogError(
529 _(b'revlog chunk cache size %r is not a power of 2')
529 _(b'revlog chunk cache size %r is not a power of 2')
530 % self._chunkcachesize
530 % self._chunkcachesize
531 )
531 )
532 force_nodemap = opts.get(b'devel-force-nodemap', False)
532 force_nodemap = opts.get(b'devel-force-nodemap', False)
533 return new_header, mmapindexthreshold, force_nodemap
533 return new_header, mmapindexthreshold, force_nodemap
534
534
535 def _get_data(self, filepath, mmap_threshold, size=None):
535 def _get_data(self, filepath, mmap_threshold, size=None):
536 """return a file content with or without mmap
536 """return a file content with or without mmap
537
537
538 If the file is missing return the empty string"""
538 If the file is missing return the empty string"""
539 try:
539 try:
540 with self.opener(filepath) as fp:
540 with self.opener(filepath) as fp:
541 if mmap_threshold is not None:
541 if mmap_threshold is not None:
542 file_size = self.opener.fstat(fp).st_size
542 file_size = self.opener.fstat(fp).st_size
543 if file_size >= mmap_threshold:
543 if file_size >= mmap_threshold:
544 if size is not None:
544 if size is not None:
545 # avoid potentiel mmap crash
545 # avoid potentiel mmap crash
546 size = min(file_size, size)
546 size = min(file_size, size)
547 # TODO: should .close() to release resources without
547 # TODO: should .close() to release resources without
548 # relying on Python GC
548 # relying on Python GC
549 if size is None:
549 if size is None:
550 return util.buffer(util.mmapread(fp))
550 return util.buffer(util.mmapread(fp))
551 else:
551 else:
552 return util.buffer(util.mmapread(fp, size))
552 return util.buffer(util.mmapread(fp, size))
553 if size is None:
553 if size is None:
554 return fp.read()
554 return fp.read()
555 else:
555 else:
556 return fp.read(size)
556 return fp.read(size)
557 except IOError as inst:
557 except IOError as inst:
558 if inst.errno != errno.ENOENT:
558 if inst.errno != errno.ENOENT:
559 raise
559 raise
560 return b''
560 return b''
561
561
562 def _loadindex(self):
562 def _loadindex(self):
563
563
564 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
564 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
565
565
566 if self.postfix is not None:
566 if self.postfix is not None:
567 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
567 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
568 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
568 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
569 entry_point = b'%s.i.a' % self.radix
569 entry_point = b'%s.i.a' % self.radix
570 else:
570 else:
571 entry_point = b'%s.i' % self.radix
571 entry_point = b'%s.i' % self.radix
572
572
573 entry_data = b''
573 entry_data = b''
574 self._initempty = True
574 self._initempty = True
575 entry_data = self._get_data(entry_point, mmapindexthreshold)
575 entry_data = self._get_data(entry_point, mmapindexthreshold)
576 if len(entry_data) > 0:
576 if len(entry_data) > 0:
577 header = INDEX_HEADER.unpack(entry_data[:4])[0]
577 header = INDEX_HEADER.unpack(entry_data[:4])[0]
578 self._initempty = False
578 self._initempty = False
579 else:
579 else:
580 header = new_header
580 header = new_header
581
581
582 self._format_flags = header & ~0xFFFF
582 self._format_flags = header & ~0xFFFF
583 self._format_version = header & 0xFFFF
583 self._format_version = header & 0xFFFF
584
584
585 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
585 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
586 if supported_flags is None:
586 if supported_flags is None:
587 msg = _(b'unknown version (%d) in revlog %s')
587 msg = _(b'unknown version (%d) in revlog %s')
588 msg %= (self._format_version, self.display_id)
588 msg %= (self._format_version, self.display_id)
589 raise error.RevlogError(msg)
589 raise error.RevlogError(msg)
590 elif self._format_flags & ~supported_flags:
590 elif self._format_flags & ~supported_flags:
591 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
591 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
592 display_flag = self._format_flags >> 16
592 display_flag = self._format_flags >> 16
593 msg %= (display_flag, self._format_version, self.display_id)
593 msg %= (display_flag, self._format_version, self.display_id)
594 raise error.RevlogError(msg)
594 raise error.RevlogError(msg)
595
595
596 features = FEATURES_BY_VERSION[self._format_version]
596 features = FEATURES_BY_VERSION[self._format_version]
597 self._inline = features[b'inline'](self._format_flags)
597 self._inline = features[b'inline'](self._format_flags)
598 self._generaldelta = features[b'generaldelta'](self._format_flags)
598 self._generaldelta = features[b'generaldelta'](self._format_flags)
599 self.hassidedata = features[b'sidedata']
599 self.hassidedata = features[b'sidedata']
600
600
601 if not features[b'docket']:
601 if not features[b'docket']:
602 self._indexfile = entry_point
602 self._indexfile = entry_point
603 index_data = entry_data
603 index_data = entry_data
604 else:
604 else:
605 self._docket_file = entry_point
605 self._docket_file = entry_point
606 if self._initempty:
606 if self._initempty:
607 self._docket = docketutil.default_docket(self, header)
607 self._docket = docketutil.default_docket(self, header)
608 else:
608 else:
609 self._docket = docketutil.parse_docket(
609 self._docket = docketutil.parse_docket(
610 self, entry_data, use_pending=self._trypending
610 self, entry_data, use_pending=self._trypending
611 )
611 )
612 self._indexfile = self._docket.index_filepath()
612 self._indexfile = self._docket.index_filepath()
613 index_data = b''
613 index_data = b''
614 index_size = self._docket.index_end
614 index_size = self._docket.index_end
615 if index_size > 0:
615 if index_size > 0:
616 index_data = self._get_data(
616 index_data = self._get_data(
617 self._indexfile, mmapindexthreshold, size=index_size
617 self._indexfile, mmapindexthreshold, size=index_size
618 )
618 )
619 if len(index_data) < index_size:
619 if len(index_data) < index_size:
620 msg = _(b'too few index data for %s: got %d, expected %d')
620 msg = _(b'too few index data for %s: got %d, expected %d')
621 msg %= (self.display_id, len(index_data), index_size)
621 msg %= (self.display_id, len(index_data), index_size)
622 raise error.RevlogError(msg)
622 raise error.RevlogError(msg)
623
623
624 self._inline = False
624 self._inline = False
625 # generaldelta implied by version 2 revlogs.
625 # generaldelta implied by version 2 revlogs.
626 self._generaldelta = True
626 self._generaldelta = True
627 # the logic for persistent nodemap will be dealt with within the
627 # the logic for persistent nodemap will be dealt with within the
628 # main docket, so disable it for now.
628 # main docket, so disable it for now.
629 self._nodemap_file = None
629 self._nodemap_file = None
630
630
631 if self.postfix is None:
631 if self.postfix is None:
632 self._datafile = b'%s.d' % self.radix
632 self._datafile = b'%s.d' % self.radix
633 else:
633 else:
634 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
634 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
635
635
636 self.nodeconstants = sha1nodeconstants
636 self.nodeconstants = sha1nodeconstants
637 self.nullid = self.nodeconstants.nullid
637 self.nullid = self.nodeconstants.nullid
638
638
639 # sparse-revlog can't be on without general-delta (issue6056)
639 # sparse-revlog can't be on without general-delta (issue6056)
640 if not self._generaldelta:
640 if not self._generaldelta:
641 self._sparserevlog = False
641 self._sparserevlog = False
642
642
643 self._storedeltachains = True
643 self._storedeltachains = True
644
644
645 devel_nodemap = (
645 devel_nodemap = (
646 self._nodemap_file
646 self._nodemap_file
647 and force_nodemap
647 and force_nodemap
648 and parse_index_v1_nodemap is not None
648 and parse_index_v1_nodemap is not None
649 )
649 )
650
650
651 use_rust_index = False
651 use_rust_index = False
652 if rustrevlog is not None:
652 if rustrevlog is not None:
653 if self._nodemap_file is not None:
653 if self._nodemap_file is not None:
654 use_rust_index = True
654 use_rust_index = True
655 else:
655 else:
656 use_rust_index = self.opener.options.get(b'rust.index')
656 use_rust_index = self.opener.options.get(b'rust.index')
657
657
658 self._parse_index = parse_index_v1
658 self._parse_index = parse_index_v1
659 if self._format_version == REVLOGV0:
659 if self._format_version == REVLOGV0:
660 self._parse_index = revlogv0.parse_index_v0
660 self._parse_index = revlogv0.parse_index_v0
661 elif self._format_version == REVLOGV2:
661 elif self._format_version == REVLOGV2:
662 self._parse_index = parse_index_v2
662 self._parse_index = parse_index_v2
663 elif self._format_version == CHANGELOGV2:
663 elif self._format_version == CHANGELOGV2:
664 self._parse_index = parse_index_cl_v2
664 self._parse_index = parse_index_cl_v2
665 elif devel_nodemap:
665 elif devel_nodemap:
666 self._parse_index = parse_index_v1_nodemap
666 self._parse_index = parse_index_v1_nodemap
667 elif use_rust_index:
667 elif use_rust_index:
668 self._parse_index = parse_index_v1_mixed
668 self._parse_index = parse_index_v1_mixed
669 try:
669 try:
670 d = self._parse_index(index_data, self._inline)
670 d = self._parse_index(index_data, self._inline)
671 index, _chunkcache = d
671 index, _chunkcache = d
672 use_nodemap = (
672 use_nodemap = (
673 not self._inline
673 not self._inline
674 and self._nodemap_file is not None
674 and self._nodemap_file is not None
675 and util.safehasattr(index, 'update_nodemap_data')
675 and util.safehasattr(index, 'update_nodemap_data')
676 )
676 )
677 if use_nodemap:
677 if use_nodemap:
678 nodemap_data = nodemaputil.persisted_data(self)
678 nodemap_data = nodemaputil.persisted_data(self)
679 if nodemap_data is not None:
679 if nodemap_data is not None:
680 docket = nodemap_data[0]
680 docket = nodemap_data[0]
681 if (
681 if (
682 len(d[0]) > docket.tip_rev
682 len(d[0]) > docket.tip_rev
683 and d[0][docket.tip_rev][7] == docket.tip_node
683 and d[0][docket.tip_rev][7] == docket.tip_node
684 ):
684 ):
685 # no changelog tampering
685 # no changelog tampering
686 self._nodemap_docket = docket
686 self._nodemap_docket = docket
687 index.update_nodemap_data(*nodemap_data)
687 index.update_nodemap_data(*nodemap_data)
688 except (ValueError, IndexError):
688 except (ValueError, IndexError):
689 raise error.RevlogError(
689 raise error.RevlogError(
690 _(b"index %s is corrupted") % self.display_id
690 _(b"index %s is corrupted") % self.display_id
691 )
691 )
692 self.index, self._chunkcache = d
692 self.index, self._chunkcache = d
693 if not self._chunkcache:
693 if not self._chunkcache:
694 self._chunkclear()
694 self._chunkclear()
695 # revnum -> (chain-length, sum-delta-length)
695 # revnum -> (chain-length, sum-delta-length)
696 self._chaininfocache = util.lrucachedict(500)
696 self._chaininfocache = util.lrucachedict(500)
697 # revlog header -> revlog compressor
697 # revlog header -> revlog compressor
698 self._decompressors = {}
698 self._decompressors = {}
699
699
700 @util.propertycache
700 @util.propertycache
701 def revlog_kind(self):
701 def revlog_kind(self):
702 return self.target[0]
702 return self.target[0]
703
703
704 @util.propertycache
704 @util.propertycache
705 def display_id(self):
705 def display_id(self):
706 """The public facing "ID" of the revlog that we use in message"""
706 """The public facing "ID" of the revlog that we use in message"""
707 # Maybe we should build a user facing representation of
707 # Maybe we should build a user facing representation of
708 # revlog.target instead of using `self.radix`
708 # revlog.target instead of using `self.radix`
709 return self.radix
709 return self.radix
710
710
711 def _get_decompressor(self, t):
711 def _get_decompressor(self, t):
712 try:
712 try:
713 compressor = self._decompressors[t]
713 compressor = self._decompressors[t]
714 except KeyError:
714 except KeyError:
715 try:
715 try:
716 engine = util.compengines.forrevlogheader(t)
716 engine = util.compengines.forrevlogheader(t)
717 compressor = engine.revlogcompressor(self._compengineopts)
717 compressor = engine.revlogcompressor(self._compengineopts)
718 self._decompressors[t] = compressor
718 self._decompressors[t] = compressor
719 except KeyError:
719 except KeyError:
720 raise error.RevlogError(
720 raise error.RevlogError(
721 _(b'unknown compression type %s') % binascii.hexlify(t)
721 _(b'unknown compression type %s') % binascii.hexlify(t)
722 )
722 )
723 return compressor
723 return compressor
724
724
725 @util.propertycache
725 @util.propertycache
726 def _compressor(self):
726 def _compressor(self):
727 engine = util.compengines[self._compengine]
727 engine = util.compengines[self._compengine]
728 return engine.revlogcompressor(self._compengineopts)
728 return engine.revlogcompressor(self._compengineopts)
729
729
730 @util.propertycache
730 @util.propertycache
731 def _decompressor(self):
731 def _decompressor(self):
732 """the default decompressor"""
732 """the default decompressor"""
733 if self._docket is None:
733 if self._docket is None:
734 return None
734 return None
735 t = self._docket.default_compression_header
735 t = self._docket.default_compression_header
736 c = self._get_decompressor(t)
736 c = self._get_decompressor(t)
737 return c.decompress
737 return c.decompress
738
738
739 def _indexfp(self):
739 def _indexfp(self):
740 """file object for the revlog's index file"""
740 """file object for the revlog's index file"""
741 return self.opener(self._indexfile, mode=b"r")
741 return self.opener(self._indexfile, mode=b"r")
742
742
743 def __index_write_fp(self):
743 def __index_write_fp(self):
744 # You should not use this directly and use `_writing` instead
744 # You should not use this directly and use `_writing` instead
745 try:
745 try:
746 f = self.opener(
746 f = self.opener(
747 self._indexfile, mode=b"r+", checkambig=self._checkambig
747 self._indexfile, mode=b"r+", checkambig=self._checkambig
748 )
748 )
749 if self._docket is None:
749 if self._docket is None:
750 f.seek(0, os.SEEK_END)
750 f.seek(0, os.SEEK_END)
751 else:
751 else:
752 f.seek(self._docket.index_end, os.SEEK_SET)
752 f.seek(self._docket.index_end, os.SEEK_SET)
753 return f
753 return f
754 except IOError as inst:
754 except IOError as inst:
755 if inst.errno != errno.ENOENT:
755 if inst.errno != errno.ENOENT:
756 raise
756 raise
757 return self.opener(
757 return self.opener(
758 self._indexfile, mode=b"w+", checkambig=self._checkambig
758 self._indexfile, mode=b"w+", checkambig=self._checkambig
759 )
759 )
760
760
761 def __index_new_fp(self):
761 def __index_new_fp(self):
762 # You should not use this unless you are upgrading from inline revlog
762 # You should not use this unless you are upgrading from inline revlog
763 return self.opener(
763 return self.opener(
764 self._indexfile,
764 self._indexfile,
765 mode=b"w",
765 mode=b"w",
766 checkambig=self._checkambig,
766 checkambig=self._checkambig,
767 atomictemp=True,
767 atomictemp=True,
768 )
768 )
769
769
770 def _datafp(self, mode=b'r'):
770 def _datafp(self, mode=b'r'):
771 """file object for the revlog's data file"""
771 """file object for the revlog's data file"""
772 return self.opener(self._datafile, mode=mode)
772 return self.opener(self._datafile, mode=mode)
773
773
774 @contextlib.contextmanager
774 @contextlib.contextmanager
775 def _datareadfp(self, existingfp=None):
775 def _datareadfp(self, existingfp=None):
776 """file object suitable to read data"""
776 """file object suitable to read data"""
777 # Use explicit file handle, if given.
777 # Use explicit file handle, if given.
778 if existingfp is not None:
778 if existingfp is not None:
779 yield existingfp
779 yield existingfp
780
780
781 # Use a file handle being actively used for writes, if available.
781 # Use a file handle being actively used for writes, if available.
782 # There is some danger to doing this because reads will seek the
782 # There is some danger to doing this because reads will seek the
783 # file. However, _writeentry() performs a SEEK_END before all writes,
783 # file. However, _writeentry() performs a SEEK_END before all writes,
784 # so we should be safe.
784 # so we should be safe.
785 elif self._writinghandles:
785 elif self._writinghandles:
786 if self._inline:
786 if self._inline:
787 yield self._writinghandles[0]
787 yield self._writinghandles[0]
788 else:
788 else:
789 yield self._writinghandles[1]
789 yield self._writinghandles[1]
790
790
791 # Otherwise open a new file handle.
791 # Otherwise open a new file handle.
792 else:
792 else:
793 if self._inline:
793 if self._inline:
794 func = self._indexfp
794 func = self._indexfp
795 else:
795 else:
796 func = self._datafp
796 func = self._datafp
797 with func() as fp:
797 with func() as fp:
798 yield fp
798 yield fp
799
799
800 def tiprev(self):
800 def tiprev(self):
801 return len(self.index) - 1
801 return len(self.index) - 1
802
802
803 def tip(self):
803 def tip(self):
804 return self.node(self.tiprev())
804 return self.node(self.tiprev())
805
805
806 def __contains__(self, rev):
806 def __contains__(self, rev):
807 return 0 <= rev < len(self)
807 return 0 <= rev < len(self)
808
808
809 def __len__(self):
809 def __len__(self):
810 return len(self.index)
810 return len(self.index)
811
811
812 def __iter__(self):
812 def __iter__(self):
813 return iter(pycompat.xrange(len(self)))
813 return iter(pycompat.xrange(len(self)))
814
814
815 def revs(self, start=0, stop=None):
815 def revs(self, start=0, stop=None):
816 """iterate over all rev in this revlog (from start to stop)"""
816 """iterate over all rev in this revlog (from start to stop)"""
817 return storageutil.iterrevs(len(self), start=start, stop=stop)
817 return storageutil.iterrevs(len(self), start=start, stop=stop)
818
818
819 @property
819 @property
820 def nodemap(self):
820 def nodemap(self):
821 msg = (
821 msg = (
822 b"revlog.nodemap is deprecated, "
822 b"revlog.nodemap is deprecated, "
823 b"use revlog.index.[has_node|rev|get_rev]"
823 b"use revlog.index.[has_node|rev|get_rev]"
824 )
824 )
825 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
825 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
826 return self.index.nodemap
826 return self.index.nodemap
827
827
828 @property
828 @property
829 def _nodecache(self):
829 def _nodecache(self):
830 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
830 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
831 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
831 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
832 return self.index.nodemap
832 return self.index.nodemap
833
833
834 def hasnode(self, node):
834 def hasnode(self, node):
835 try:
835 try:
836 self.rev(node)
836 self.rev(node)
837 return True
837 return True
838 except KeyError:
838 except KeyError:
839 return False
839 return False
840
840
841 def candelta(self, baserev, rev):
841 def candelta(self, baserev, rev):
842 """whether two revisions (baserev, rev) can be delta-ed or not"""
842 """whether two revisions (baserev, rev) can be delta-ed or not"""
843 # Disable delta if either rev requires a content-changing flag
843 # Disable delta if either rev requires a content-changing flag
844 # processor (ex. LFS). This is because such flag processor can alter
844 # processor (ex. LFS). This is because such flag processor can alter
845 # the rawtext content that the delta will be based on, and two clients
845 # the rawtext content that the delta will be based on, and two clients
846 # could have a same revlog node with different flags (i.e. different
846 # could have a same revlog node with different flags (i.e. different
847 # rawtext contents) and the delta could be incompatible.
847 # rawtext contents) and the delta could be incompatible.
848 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
848 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
849 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
849 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
850 ):
850 ):
851 return False
851 return False
852 return True
852 return True
853
853
854 def update_caches(self, transaction):
854 def update_caches(self, transaction):
855 if self._nodemap_file is not None:
855 if self._nodemap_file is not None:
856 if transaction is None:
856 if transaction is None:
857 nodemaputil.update_persistent_nodemap(self)
857 nodemaputil.update_persistent_nodemap(self)
858 else:
858 else:
859 nodemaputil.setup_persistent_nodemap(transaction, self)
859 nodemaputil.setup_persistent_nodemap(transaction, self)
860
860
861 def clearcaches(self):
861 def clearcaches(self):
862 self._revisioncache = None
862 self._revisioncache = None
863 self._chainbasecache.clear()
863 self._chainbasecache.clear()
864 self._chunkcache = (0, b'')
864 self._chunkcache = (0, b'')
865 self._pcache = {}
865 self._pcache = {}
866 self._nodemap_docket = None
866 self._nodemap_docket = None
867 self.index.clearcaches()
867 self.index.clearcaches()
868 # The python code is the one responsible for validating the docket, we
868 # The python code is the one responsible for validating the docket, we
869 # end up having to refresh it here.
869 # end up having to refresh it here.
870 use_nodemap = (
870 use_nodemap = (
871 not self._inline
871 not self._inline
872 and self._nodemap_file is not None
872 and self._nodemap_file is not None
873 and util.safehasattr(self.index, 'update_nodemap_data')
873 and util.safehasattr(self.index, 'update_nodemap_data')
874 )
874 )
875 if use_nodemap:
875 if use_nodemap:
876 nodemap_data = nodemaputil.persisted_data(self)
876 nodemap_data = nodemaputil.persisted_data(self)
877 if nodemap_data is not None:
877 if nodemap_data is not None:
878 self._nodemap_docket = nodemap_data[0]
878 self._nodemap_docket = nodemap_data[0]
879 self.index.update_nodemap_data(*nodemap_data)
879 self.index.update_nodemap_data(*nodemap_data)
880
880
881 def rev(self, node):
881 def rev(self, node):
882 try:
882 try:
883 return self.index.rev(node)
883 return self.index.rev(node)
884 except TypeError:
884 except TypeError:
885 raise
885 raise
886 except error.RevlogError:
886 except error.RevlogError:
887 # parsers.c radix tree lookup failed
887 # parsers.c radix tree lookup failed
888 if (
888 if (
889 node == self.nodeconstants.wdirid
889 node == self.nodeconstants.wdirid
890 or node in self.nodeconstants.wdirfilenodeids
890 or node in self.nodeconstants.wdirfilenodeids
891 ):
891 ):
892 raise error.WdirUnsupported
892 raise error.WdirUnsupported
893 raise error.LookupError(node, self.display_id, _(b'no node'))
893 raise error.LookupError(node, self.display_id, _(b'no node'))
894
894
895 # Accessors for index entries.
895 # Accessors for index entries.
896
896
897 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
897 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
898 # are flags.
898 # are flags.
899 def start(self, rev):
899 def start(self, rev):
900 return int(self.index[rev][0] >> 16)
900 return int(self.index[rev][0] >> 16)
901
901
902 def flags(self, rev):
902 def flags(self, rev):
903 return self.index[rev][0] & 0xFFFF
903 return self.index[rev][0] & 0xFFFF
904
904
905 def length(self, rev):
905 def length(self, rev):
906 return self.index[rev][1]
906 return self.index[rev][1]
907
907
908 def sidedata_length(self, rev):
908 def sidedata_length(self, rev):
909 if not self.hassidedata:
909 if not self.hassidedata:
910 return 0
910 return 0
911 return self.index[rev][9]
911 return self.index[rev][9]
912
912
913 def rawsize(self, rev):
913 def rawsize(self, rev):
914 """return the length of the uncompressed text for a given revision"""
914 """return the length of the uncompressed text for a given revision"""
915 l = self.index[rev][2]
915 l = self.index[rev][2]
916 if l >= 0:
916 if l >= 0:
917 return l
917 return l
918
918
919 t = self.rawdata(rev)
919 t = self.rawdata(rev)
920 return len(t)
920 return len(t)
921
921
922 def size(self, rev):
922 def size(self, rev):
923 """length of non-raw text (processed by a "read" flag processor)"""
923 """length of non-raw text (processed by a "read" flag processor)"""
924 # fast path: if no "read" flag processor could change the content,
924 # fast path: if no "read" flag processor could change the content,
925 # size is rawsize. note: ELLIPSIS is known to not change the content.
925 # size is rawsize. note: ELLIPSIS is known to not change the content.
926 flags = self.flags(rev)
926 flags = self.flags(rev)
927 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
927 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
928 return self.rawsize(rev)
928 return self.rawsize(rev)
929
929
930 return len(self.revision(rev, raw=False))
930 return len(self.revision(rev, raw=False))
931
931
932 def chainbase(self, rev):
932 def chainbase(self, rev):
933 base = self._chainbasecache.get(rev)
933 base = self._chainbasecache.get(rev)
934 if base is not None:
934 if base is not None:
935 return base
935 return base
936
936
937 index = self.index
937 index = self.index
938 iterrev = rev
938 iterrev = rev
939 base = index[iterrev][3]
939 base = index[iterrev][3]
940 while base != iterrev:
940 while base != iterrev:
941 iterrev = base
941 iterrev = base
942 base = index[iterrev][3]
942 base = index[iterrev][3]
943
943
944 self._chainbasecache[rev] = base
944 self._chainbasecache[rev] = base
945 return base
945 return base
946
946
947 def linkrev(self, rev):
947 def linkrev(self, rev):
948 return self.index[rev][4]
948 return self.index[rev][4]
949
949
950 def parentrevs(self, rev):
950 def parentrevs(self, rev):
951 try:
951 try:
952 entry = self.index[rev]
952 entry = self.index[rev]
953 except IndexError:
953 except IndexError:
954 if rev == wdirrev:
954 if rev == wdirrev:
955 raise error.WdirUnsupported
955 raise error.WdirUnsupported
956 raise
956 raise
957 if entry[5] == nullrev:
957 if entry[5] == nullrev:
958 return entry[6], entry[5]
958 return entry[6], entry[5]
959 else:
959 else:
960 return entry[5], entry[6]
960 return entry[5], entry[6]
961
961
962 # fast parentrevs(rev) where rev isn't filtered
962 # fast parentrevs(rev) where rev isn't filtered
963 _uncheckedparentrevs = parentrevs
963 _uncheckedparentrevs = parentrevs
964
964
965 def node(self, rev):
965 def node(self, rev):
966 try:
966 try:
967 return self.index[rev][7]
967 return self.index[rev][7]
968 except IndexError:
968 except IndexError:
969 if rev == wdirrev:
969 if rev == wdirrev:
970 raise error.WdirUnsupported
970 raise error.WdirUnsupported
971 raise
971 raise
972
972
973 # Derived from index values.
973 # Derived from index values.
974
974
975 def end(self, rev):
975 def end(self, rev):
976 return self.start(rev) + self.length(rev)
976 return self.start(rev) + self.length(rev)
977
977
978 def parents(self, node):
978 def parents(self, node):
979 i = self.index
979 i = self.index
980 d = i[self.rev(node)]
980 d = i[self.rev(node)]
981 # inline node() to avoid function call overhead
981 # inline node() to avoid function call overhead
982 if d[5] == self.nullid:
982 if d[5] == self.nullid:
983 return i[d[6]][7], i[d[5]][7]
983 return i[d[6]][7], i[d[5]][7]
984 else:
984 else:
985 return i[d[5]][7], i[d[6]][7]
985 return i[d[5]][7], i[d[6]][7]
986
986
987 def chainlen(self, rev):
987 def chainlen(self, rev):
988 return self._chaininfo(rev)[0]
988 return self._chaininfo(rev)[0]
989
989
990 def _chaininfo(self, rev):
990 def _chaininfo(self, rev):
991 chaininfocache = self._chaininfocache
991 chaininfocache = self._chaininfocache
992 if rev in chaininfocache:
992 if rev in chaininfocache:
993 return chaininfocache[rev]
993 return chaininfocache[rev]
994 index = self.index
994 index = self.index
995 generaldelta = self._generaldelta
995 generaldelta = self._generaldelta
996 iterrev = rev
996 iterrev = rev
997 e = index[iterrev]
997 e = index[iterrev]
998 clen = 0
998 clen = 0
999 compresseddeltalen = 0
999 compresseddeltalen = 0
1000 while iterrev != e[3]:
1000 while iterrev != e[3]:
1001 clen += 1
1001 clen += 1
1002 compresseddeltalen += e[1]
1002 compresseddeltalen += e[1]
1003 if generaldelta:
1003 if generaldelta:
1004 iterrev = e[3]
1004 iterrev = e[3]
1005 else:
1005 else:
1006 iterrev -= 1
1006 iterrev -= 1
1007 if iterrev in chaininfocache:
1007 if iterrev in chaininfocache:
1008 t = chaininfocache[iterrev]
1008 t = chaininfocache[iterrev]
1009 clen += t[0]
1009 clen += t[0]
1010 compresseddeltalen += t[1]
1010 compresseddeltalen += t[1]
1011 break
1011 break
1012 e = index[iterrev]
1012 e = index[iterrev]
1013 else:
1013 else:
1014 # Add text length of base since decompressing that also takes
1014 # Add text length of base since decompressing that also takes
1015 # work. For cache hits the length is already included.
1015 # work. For cache hits the length is already included.
1016 compresseddeltalen += e[1]
1016 compresseddeltalen += e[1]
1017 r = (clen, compresseddeltalen)
1017 r = (clen, compresseddeltalen)
1018 chaininfocache[rev] = r
1018 chaininfocache[rev] = r
1019 return r
1019 return r
1020
1020
1021 def _deltachain(self, rev, stoprev=None):
1021 def _deltachain(self, rev, stoprev=None):
1022 """Obtain the delta chain for a revision.
1022 """Obtain the delta chain for a revision.
1023
1023
1024 ``stoprev`` specifies a revision to stop at. If not specified, we
1024 ``stoprev`` specifies a revision to stop at. If not specified, we
1025 stop at the base of the chain.
1025 stop at the base of the chain.
1026
1026
1027 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1027 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1028 revs in ascending order and ``stopped`` is a bool indicating whether
1028 revs in ascending order and ``stopped`` is a bool indicating whether
1029 ``stoprev`` was hit.
1029 ``stoprev`` was hit.
1030 """
1030 """
1031 # Try C implementation.
1031 # Try C implementation.
1032 try:
1032 try:
1033 return self.index.deltachain(rev, stoprev, self._generaldelta)
1033 return self.index.deltachain(rev, stoprev, self._generaldelta)
1034 except AttributeError:
1034 except AttributeError:
1035 pass
1035 pass
1036
1036
1037 chain = []
1037 chain = []
1038
1038
1039 # Alias to prevent attribute lookup in tight loop.
1039 # Alias to prevent attribute lookup in tight loop.
1040 index = self.index
1040 index = self.index
1041 generaldelta = self._generaldelta
1041 generaldelta = self._generaldelta
1042
1042
1043 iterrev = rev
1043 iterrev = rev
1044 e = index[iterrev]
1044 e = index[iterrev]
1045 while iterrev != e[3] and iterrev != stoprev:
1045 while iterrev != e[3] and iterrev != stoprev:
1046 chain.append(iterrev)
1046 chain.append(iterrev)
1047 if generaldelta:
1047 if generaldelta:
1048 iterrev = e[3]
1048 iterrev = e[3]
1049 else:
1049 else:
1050 iterrev -= 1
1050 iterrev -= 1
1051 e = index[iterrev]
1051 e = index[iterrev]
1052
1052
1053 if iterrev == stoprev:
1053 if iterrev == stoprev:
1054 stopped = True
1054 stopped = True
1055 else:
1055 else:
1056 chain.append(iterrev)
1056 chain.append(iterrev)
1057 stopped = False
1057 stopped = False
1058
1058
1059 chain.reverse()
1059 chain.reverse()
1060 return chain, stopped
1060 return chain, stopped
1061
1061
1062 def ancestors(self, revs, stoprev=0, inclusive=False):
1062 def ancestors(self, revs, stoprev=0, inclusive=False):
1063 """Generate the ancestors of 'revs' in reverse revision order.
1063 """Generate the ancestors of 'revs' in reverse revision order.
1064 Does not generate revs lower than stoprev.
1064 Does not generate revs lower than stoprev.
1065
1065
1066 See the documentation for ancestor.lazyancestors for more details."""
1066 See the documentation for ancestor.lazyancestors for more details."""
1067
1067
1068 # first, make sure start revisions aren't filtered
1068 # first, make sure start revisions aren't filtered
1069 revs = list(revs)
1069 revs = list(revs)
1070 checkrev = self.node
1070 checkrev = self.node
1071 for r in revs:
1071 for r in revs:
1072 checkrev(r)
1072 checkrev(r)
1073 # and we're sure ancestors aren't filtered as well
1073 # and we're sure ancestors aren't filtered as well
1074
1074
1075 if rustancestor is not None and self.index.rust_ext_compat:
1075 if rustancestor is not None and self.index.rust_ext_compat:
1076 lazyancestors = rustancestor.LazyAncestors
1076 lazyancestors = rustancestor.LazyAncestors
1077 arg = self.index
1077 arg = self.index
1078 else:
1078 else:
1079 lazyancestors = ancestor.lazyancestors
1079 lazyancestors = ancestor.lazyancestors
1080 arg = self._uncheckedparentrevs
1080 arg = self._uncheckedparentrevs
1081 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1081 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1082
1082
1083 def descendants(self, revs):
1083 def descendants(self, revs):
1084 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1084 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1085
1085
1086 def findcommonmissing(self, common=None, heads=None):
1086 def findcommonmissing(self, common=None, heads=None):
1087 """Return a tuple of the ancestors of common and the ancestors of heads
1087 """Return a tuple of the ancestors of common and the ancestors of heads
1088 that are not ancestors of common. In revset terminology, we return the
1088 that are not ancestors of common. In revset terminology, we return the
1089 tuple:
1089 tuple:
1090
1090
1091 ::common, (::heads) - (::common)
1091 ::common, (::heads) - (::common)
1092
1092
1093 The list is sorted by revision number, meaning it is
1093 The list is sorted by revision number, meaning it is
1094 topologically sorted.
1094 topologically sorted.
1095
1095
1096 'heads' and 'common' are both lists of node IDs. If heads is
1096 'heads' and 'common' are both lists of node IDs. If heads is
1097 not supplied, uses all of the revlog's heads. If common is not
1097 not supplied, uses all of the revlog's heads. If common is not
1098 supplied, uses nullid."""
1098 supplied, uses nullid."""
1099 if common is None:
1099 if common is None:
1100 common = [self.nullid]
1100 common = [self.nullid]
1101 if heads is None:
1101 if heads is None:
1102 heads = self.heads()
1102 heads = self.heads()
1103
1103
1104 common = [self.rev(n) for n in common]
1104 common = [self.rev(n) for n in common]
1105 heads = [self.rev(n) for n in heads]
1105 heads = [self.rev(n) for n in heads]
1106
1106
1107 # we want the ancestors, but inclusive
1107 # we want the ancestors, but inclusive
1108 class lazyset(object):
1108 class lazyset(object):
1109 def __init__(self, lazyvalues):
1109 def __init__(self, lazyvalues):
1110 self.addedvalues = set()
1110 self.addedvalues = set()
1111 self.lazyvalues = lazyvalues
1111 self.lazyvalues = lazyvalues
1112
1112
1113 def __contains__(self, value):
1113 def __contains__(self, value):
1114 return value in self.addedvalues or value in self.lazyvalues
1114 return value in self.addedvalues or value in self.lazyvalues
1115
1115
1116 def __iter__(self):
1116 def __iter__(self):
1117 added = self.addedvalues
1117 added = self.addedvalues
1118 for r in added:
1118 for r in added:
1119 yield r
1119 yield r
1120 for r in self.lazyvalues:
1120 for r in self.lazyvalues:
1121 if not r in added:
1121 if not r in added:
1122 yield r
1122 yield r
1123
1123
1124 def add(self, value):
1124 def add(self, value):
1125 self.addedvalues.add(value)
1125 self.addedvalues.add(value)
1126
1126
1127 def update(self, values):
1127 def update(self, values):
1128 self.addedvalues.update(values)
1128 self.addedvalues.update(values)
1129
1129
1130 has = lazyset(self.ancestors(common))
1130 has = lazyset(self.ancestors(common))
1131 has.add(nullrev)
1131 has.add(nullrev)
1132 has.update(common)
1132 has.update(common)
1133
1133
1134 # take all ancestors from heads that aren't in has
1134 # take all ancestors from heads that aren't in has
1135 missing = set()
1135 missing = set()
1136 visit = collections.deque(r for r in heads if r not in has)
1136 visit = collections.deque(r for r in heads if r not in has)
1137 while visit:
1137 while visit:
1138 r = visit.popleft()
1138 r = visit.popleft()
1139 if r in missing:
1139 if r in missing:
1140 continue
1140 continue
1141 else:
1141 else:
1142 missing.add(r)
1142 missing.add(r)
1143 for p in self.parentrevs(r):
1143 for p in self.parentrevs(r):
1144 if p not in has:
1144 if p not in has:
1145 visit.append(p)
1145 visit.append(p)
1146 missing = list(missing)
1146 missing = list(missing)
1147 missing.sort()
1147 missing.sort()
1148 return has, [self.node(miss) for miss in missing]
1148 return has, [self.node(miss) for miss in missing]
1149
1149
1150 def incrementalmissingrevs(self, common=None):
1150 def incrementalmissingrevs(self, common=None):
1151 """Return an object that can be used to incrementally compute the
1151 """Return an object that can be used to incrementally compute the
1152 revision numbers of the ancestors of arbitrary sets that are not
1152 revision numbers of the ancestors of arbitrary sets that are not
1153 ancestors of common. This is an ancestor.incrementalmissingancestors
1153 ancestors of common. This is an ancestor.incrementalmissingancestors
1154 object.
1154 object.
1155
1155
1156 'common' is a list of revision numbers. If common is not supplied, uses
1156 'common' is a list of revision numbers. If common is not supplied, uses
1157 nullrev.
1157 nullrev.
1158 """
1158 """
1159 if common is None:
1159 if common is None:
1160 common = [nullrev]
1160 common = [nullrev]
1161
1161
1162 if rustancestor is not None and self.index.rust_ext_compat:
1162 if rustancestor is not None and self.index.rust_ext_compat:
1163 return rustancestor.MissingAncestors(self.index, common)
1163 return rustancestor.MissingAncestors(self.index, common)
1164 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1164 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1165
1165
1166 def findmissingrevs(self, common=None, heads=None):
1166 def findmissingrevs(self, common=None, heads=None):
1167 """Return the revision numbers of the ancestors of heads that
1167 """Return the revision numbers of the ancestors of heads that
1168 are not ancestors of common.
1168 are not ancestors of common.
1169
1169
1170 More specifically, return a list of revision numbers corresponding to
1170 More specifically, return a list of revision numbers corresponding to
1171 nodes N such that every N satisfies the following constraints:
1171 nodes N such that every N satisfies the following constraints:
1172
1172
1173 1. N is an ancestor of some node in 'heads'
1173 1. N is an ancestor of some node in 'heads'
1174 2. N is not an ancestor of any node in 'common'
1174 2. N is not an ancestor of any node in 'common'
1175
1175
1176 The list is sorted by revision number, meaning it is
1176 The list is sorted by revision number, meaning it is
1177 topologically sorted.
1177 topologically sorted.
1178
1178
1179 'heads' and 'common' are both lists of revision numbers. If heads is
1179 'heads' and 'common' are both lists of revision numbers. If heads is
1180 not supplied, uses all of the revlog's heads. If common is not
1180 not supplied, uses all of the revlog's heads. If common is not
1181 supplied, uses nullid."""
1181 supplied, uses nullid."""
1182 if common is None:
1182 if common is None:
1183 common = [nullrev]
1183 common = [nullrev]
1184 if heads is None:
1184 if heads is None:
1185 heads = self.headrevs()
1185 heads = self.headrevs()
1186
1186
1187 inc = self.incrementalmissingrevs(common=common)
1187 inc = self.incrementalmissingrevs(common=common)
1188 return inc.missingancestors(heads)
1188 return inc.missingancestors(heads)
1189
1189
1190 def findmissing(self, common=None, heads=None):
1190 def findmissing(self, common=None, heads=None):
1191 """Return the ancestors of heads that are not ancestors of common.
1191 """Return the ancestors of heads that are not ancestors of common.
1192
1192
1193 More specifically, return a list of nodes N such that every N
1193 More specifically, return a list of nodes N such that every N
1194 satisfies the following constraints:
1194 satisfies the following constraints:
1195
1195
1196 1. N is an ancestor of some node in 'heads'
1196 1. N is an ancestor of some node in 'heads'
1197 2. N is not an ancestor of any node in 'common'
1197 2. N is not an ancestor of any node in 'common'
1198
1198
1199 The list is sorted by revision number, meaning it is
1199 The list is sorted by revision number, meaning it is
1200 topologically sorted.
1200 topologically sorted.
1201
1201
1202 'heads' and 'common' are both lists of node IDs. If heads is
1202 'heads' and 'common' are both lists of node IDs. If heads is
1203 not supplied, uses all of the revlog's heads. If common is not
1203 not supplied, uses all of the revlog's heads. If common is not
1204 supplied, uses nullid."""
1204 supplied, uses nullid."""
1205 if common is None:
1205 if common is None:
1206 common = [self.nullid]
1206 common = [self.nullid]
1207 if heads is None:
1207 if heads is None:
1208 heads = self.heads()
1208 heads = self.heads()
1209
1209
1210 common = [self.rev(n) for n in common]
1210 common = [self.rev(n) for n in common]
1211 heads = [self.rev(n) for n in heads]
1211 heads = [self.rev(n) for n in heads]
1212
1212
1213 inc = self.incrementalmissingrevs(common=common)
1213 inc = self.incrementalmissingrevs(common=common)
1214 return [self.node(r) for r in inc.missingancestors(heads)]
1214 return [self.node(r) for r in inc.missingancestors(heads)]
1215
1215
1216 def nodesbetween(self, roots=None, heads=None):
1216 def nodesbetween(self, roots=None, heads=None):
1217 """Return a topological path from 'roots' to 'heads'.
1217 """Return a topological path from 'roots' to 'heads'.
1218
1218
1219 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1219 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1220 topologically sorted list of all nodes N that satisfy both of
1220 topologically sorted list of all nodes N that satisfy both of
1221 these constraints:
1221 these constraints:
1222
1222
1223 1. N is a descendant of some node in 'roots'
1223 1. N is a descendant of some node in 'roots'
1224 2. N is an ancestor of some node in 'heads'
1224 2. N is an ancestor of some node in 'heads'
1225
1225
1226 Every node is considered to be both a descendant and an ancestor
1226 Every node is considered to be both a descendant and an ancestor
1227 of itself, so every reachable node in 'roots' and 'heads' will be
1227 of itself, so every reachable node in 'roots' and 'heads' will be
1228 included in 'nodes'.
1228 included in 'nodes'.
1229
1229
1230 'outroots' is the list of reachable nodes in 'roots', i.e., the
1230 'outroots' is the list of reachable nodes in 'roots', i.e., the
1231 subset of 'roots' that is returned in 'nodes'. Likewise,
1231 subset of 'roots' that is returned in 'nodes'. Likewise,
1232 'outheads' is the subset of 'heads' that is also in 'nodes'.
1232 'outheads' is the subset of 'heads' that is also in 'nodes'.
1233
1233
1234 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1234 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1235 unspecified, uses nullid as the only root. If 'heads' is
1235 unspecified, uses nullid as the only root. If 'heads' is
1236 unspecified, uses list of all of the revlog's heads."""
1236 unspecified, uses list of all of the revlog's heads."""
1237 nonodes = ([], [], [])
1237 nonodes = ([], [], [])
1238 if roots is not None:
1238 if roots is not None:
1239 roots = list(roots)
1239 roots = list(roots)
1240 if not roots:
1240 if not roots:
1241 return nonodes
1241 return nonodes
1242 lowestrev = min([self.rev(n) for n in roots])
1242 lowestrev = min([self.rev(n) for n in roots])
1243 else:
1243 else:
1244 roots = [self.nullid] # Everybody's a descendant of nullid
1244 roots = [self.nullid] # Everybody's a descendant of nullid
1245 lowestrev = nullrev
1245 lowestrev = nullrev
1246 if (lowestrev == nullrev) and (heads is None):
1246 if (lowestrev == nullrev) and (heads is None):
1247 # We want _all_ the nodes!
1247 # We want _all_ the nodes!
1248 return (
1248 return (
1249 [self.node(r) for r in self],
1249 [self.node(r) for r in self],
1250 [self.nullid],
1250 [self.nullid],
1251 list(self.heads()),
1251 list(self.heads()),
1252 )
1252 )
1253 if heads is None:
1253 if heads is None:
1254 # All nodes are ancestors, so the latest ancestor is the last
1254 # All nodes are ancestors, so the latest ancestor is the last
1255 # node.
1255 # node.
1256 highestrev = len(self) - 1
1256 highestrev = len(self) - 1
1257 # Set ancestors to None to signal that every node is an ancestor.
1257 # Set ancestors to None to signal that every node is an ancestor.
1258 ancestors = None
1258 ancestors = None
1259 # Set heads to an empty dictionary for later discovery of heads
1259 # Set heads to an empty dictionary for later discovery of heads
1260 heads = {}
1260 heads = {}
1261 else:
1261 else:
1262 heads = list(heads)
1262 heads = list(heads)
1263 if not heads:
1263 if not heads:
1264 return nonodes
1264 return nonodes
1265 ancestors = set()
1265 ancestors = set()
1266 # Turn heads into a dictionary so we can remove 'fake' heads.
1266 # Turn heads into a dictionary so we can remove 'fake' heads.
1267 # Also, later we will be using it to filter out the heads we can't
1267 # Also, later we will be using it to filter out the heads we can't
1268 # find from roots.
1268 # find from roots.
1269 heads = dict.fromkeys(heads, False)
1269 heads = dict.fromkeys(heads, False)
1270 # Start at the top and keep marking parents until we're done.
1270 # Start at the top and keep marking parents until we're done.
1271 nodestotag = set(heads)
1271 nodestotag = set(heads)
1272 # Remember where the top was so we can use it as a limit later.
1272 # Remember where the top was so we can use it as a limit later.
1273 highestrev = max([self.rev(n) for n in nodestotag])
1273 highestrev = max([self.rev(n) for n in nodestotag])
1274 while nodestotag:
1274 while nodestotag:
1275 # grab a node to tag
1275 # grab a node to tag
1276 n = nodestotag.pop()
1276 n = nodestotag.pop()
1277 # Never tag nullid
1277 # Never tag nullid
1278 if n == self.nullid:
1278 if n == self.nullid:
1279 continue
1279 continue
1280 # A node's revision number represents its place in a
1280 # A node's revision number represents its place in a
1281 # topologically sorted list of nodes.
1281 # topologically sorted list of nodes.
1282 r = self.rev(n)
1282 r = self.rev(n)
1283 if r >= lowestrev:
1283 if r >= lowestrev:
1284 if n not in ancestors:
1284 if n not in ancestors:
1285 # If we are possibly a descendant of one of the roots
1285 # If we are possibly a descendant of one of the roots
1286 # and we haven't already been marked as an ancestor
1286 # and we haven't already been marked as an ancestor
1287 ancestors.add(n) # Mark as ancestor
1287 ancestors.add(n) # Mark as ancestor
1288 # Add non-nullid parents to list of nodes to tag.
1288 # Add non-nullid parents to list of nodes to tag.
1289 nodestotag.update(
1289 nodestotag.update(
1290 [p for p in self.parents(n) if p != self.nullid]
1290 [p for p in self.parents(n) if p != self.nullid]
1291 )
1291 )
1292 elif n in heads: # We've seen it before, is it a fake head?
1292 elif n in heads: # We've seen it before, is it a fake head?
1293 # So it is, real heads should not be the ancestors of
1293 # So it is, real heads should not be the ancestors of
1294 # any other heads.
1294 # any other heads.
1295 heads.pop(n)
1295 heads.pop(n)
1296 if not ancestors:
1296 if not ancestors:
1297 return nonodes
1297 return nonodes
1298 # Now that we have our set of ancestors, we want to remove any
1298 # Now that we have our set of ancestors, we want to remove any
1299 # roots that are not ancestors.
1299 # roots that are not ancestors.
1300
1300
1301 # If one of the roots was nullid, everything is included anyway.
1301 # If one of the roots was nullid, everything is included anyway.
1302 if lowestrev > nullrev:
1302 if lowestrev > nullrev:
1303 # But, since we weren't, let's recompute the lowest rev to not
1303 # But, since we weren't, let's recompute the lowest rev to not
1304 # include roots that aren't ancestors.
1304 # include roots that aren't ancestors.
1305
1305
1306 # Filter out roots that aren't ancestors of heads
1306 # Filter out roots that aren't ancestors of heads
1307 roots = [root for root in roots if root in ancestors]
1307 roots = [root for root in roots if root in ancestors]
1308 # Recompute the lowest revision
1308 # Recompute the lowest revision
1309 if roots:
1309 if roots:
1310 lowestrev = min([self.rev(root) for root in roots])
1310 lowestrev = min([self.rev(root) for root in roots])
1311 else:
1311 else:
1312 # No more roots? Return empty list
1312 # No more roots? Return empty list
1313 return nonodes
1313 return nonodes
1314 else:
1314 else:
1315 # We are descending from nullid, and don't need to care about
1315 # We are descending from nullid, and don't need to care about
1316 # any other roots.
1316 # any other roots.
1317 lowestrev = nullrev
1317 lowestrev = nullrev
1318 roots = [self.nullid]
1318 roots = [self.nullid]
1319 # Transform our roots list into a set.
1319 # Transform our roots list into a set.
1320 descendants = set(roots)
1320 descendants = set(roots)
1321 # Also, keep the original roots so we can filter out roots that aren't
1321 # Also, keep the original roots so we can filter out roots that aren't
1322 # 'real' roots (i.e. are descended from other roots).
1322 # 'real' roots (i.e. are descended from other roots).
1323 roots = descendants.copy()
1323 roots = descendants.copy()
1324 # Our topologically sorted list of output nodes.
1324 # Our topologically sorted list of output nodes.
1325 orderedout = []
1325 orderedout = []
1326 # Don't start at nullid since we don't want nullid in our output list,
1326 # Don't start at nullid since we don't want nullid in our output list,
1327 # and if nullid shows up in descendants, empty parents will look like
1327 # and if nullid shows up in descendants, empty parents will look like
1328 # they're descendants.
1328 # they're descendants.
1329 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1329 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1330 n = self.node(r)
1330 n = self.node(r)
1331 isdescendant = False
1331 isdescendant = False
1332 if lowestrev == nullrev: # Everybody is a descendant of nullid
1332 if lowestrev == nullrev: # Everybody is a descendant of nullid
1333 isdescendant = True
1333 isdescendant = True
1334 elif n in descendants:
1334 elif n in descendants:
1335 # n is already a descendant
1335 # n is already a descendant
1336 isdescendant = True
1336 isdescendant = True
1337 # This check only needs to be done here because all the roots
1337 # This check only needs to be done here because all the roots
1338 # will start being marked is descendants before the loop.
1338 # will start being marked is descendants before the loop.
1339 if n in roots:
1339 if n in roots:
1340 # If n was a root, check if it's a 'real' root.
1340 # If n was a root, check if it's a 'real' root.
1341 p = tuple(self.parents(n))
1341 p = tuple(self.parents(n))
1342 # If any of its parents are descendants, it's not a root.
1342 # If any of its parents are descendants, it's not a root.
1343 if (p[0] in descendants) or (p[1] in descendants):
1343 if (p[0] in descendants) or (p[1] in descendants):
1344 roots.remove(n)
1344 roots.remove(n)
1345 else:
1345 else:
1346 p = tuple(self.parents(n))
1346 p = tuple(self.parents(n))
1347 # A node is a descendant if either of its parents are
1347 # A node is a descendant if either of its parents are
1348 # descendants. (We seeded the dependents list with the roots
1348 # descendants. (We seeded the dependents list with the roots
1349 # up there, remember?)
1349 # up there, remember?)
1350 if (p[0] in descendants) or (p[1] in descendants):
1350 if (p[0] in descendants) or (p[1] in descendants):
1351 descendants.add(n)
1351 descendants.add(n)
1352 isdescendant = True
1352 isdescendant = True
1353 if isdescendant and ((ancestors is None) or (n in ancestors)):
1353 if isdescendant and ((ancestors is None) or (n in ancestors)):
1354 # Only include nodes that are both descendants and ancestors.
1354 # Only include nodes that are both descendants and ancestors.
1355 orderedout.append(n)
1355 orderedout.append(n)
1356 if (ancestors is not None) and (n in heads):
1356 if (ancestors is not None) and (n in heads):
1357 # We're trying to figure out which heads are reachable
1357 # We're trying to figure out which heads are reachable
1358 # from roots.
1358 # from roots.
1359 # Mark this head as having been reached
1359 # Mark this head as having been reached
1360 heads[n] = True
1360 heads[n] = True
1361 elif ancestors is None:
1361 elif ancestors is None:
1362 # Otherwise, we're trying to discover the heads.
1362 # Otherwise, we're trying to discover the heads.
1363 # Assume this is a head because if it isn't, the next step
1363 # Assume this is a head because if it isn't, the next step
1364 # will eventually remove it.
1364 # will eventually remove it.
1365 heads[n] = True
1365 heads[n] = True
1366 # But, obviously its parents aren't.
1366 # But, obviously its parents aren't.
1367 for p in self.parents(n):
1367 for p in self.parents(n):
1368 heads.pop(p, None)
1368 heads.pop(p, None)
1369 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1369 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1370 roots = list(roots)
1370 roots = list(roots)
1371 assert orderedout
1371 assert orderedout
1372 assert roots
1372 assert roots
1373 assert heads
1373 assert heads
1374 return (orderedout, roots, heads)
1374 return (orderedout, roots, heads)
1375
1375
1376 def headrevs(self, revs=None):
1376 def headrevs(self, revs=None):
1377 if revs is None:
1377 if revs is None:
1378 try:
1378 try:
1379 return self.index.headrevs()
1379 return self.index.headrevs()
1380 except AttributeError:
1380 except AttributeError:
1381 return self._headrevs()
1381 return self._headrevs()
1382 if rustdagop is not None and self.index.rust_ext_compat:
1382 if rustdagop is not None and self.index.rust_ext_compat:
1383 return rustdagop.headrevs(self.index, revs)
1383 return rustdagop.headrevs(self.index, revs)
1384 return dagop.headrevs(revs, self._uncheckedparentrevs)
1384 return dagop.headrevs(revs, self._uncheckedparentrevs)
1385
1385
1386 def computephases(self, roots):
1386 def computephases(self, roots):
1387 return self.index.computephasesmapsets(roots)
1387 return self.index.computephasesmapsets(roots)
1388
1388
1389 def _headrevs(self):
1389 def _headrevs(self):
1390 count = len(self)
1390 count = len(self)
1391 if not count:
1391 if not count:
1392 return [nullrev]
1392 return [nullrev]
1393 # we won't iter over filtered rev so nobody is a head at start
1393 # we won't iter over filtered rev so nobody is a head at start
1394 ishead = [0] * (count + 1)
1394 ishead = [0] * (count + 1)
1395 index = self.index
1395 index = self.index
1396 for r in self:
1396 for r in self:
1397 ishead[r] = 1 # I may be an head
1397 ishead[r] = 1 # I may be an head
1398 e = index[r]
1398 e = index[r]
1399 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1399 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1400 return [r for r, val in enumerate(ishead) if val]
1400 return [r for r, val in enumerate(ishead) if val]
1401
1401
1402 def heads(self, start=None, stop=None):
1402 def heads(self, start=None, stop=None):
1403 """return the list of all nodes that have no children
1403 """return the list of all nodes that have no children
1404
1404
1405 if start is specified, only heads that are descendants of
1405 if start is specified, only heads that are descendants of
1406 start will be returned
1406 start will be returned
1407 if stop is specified, it will consider all the revs from stop
1407 if stop is specified, it will consider all the revs from stop
1408 as if they had no children
1408 as if they had no children
1409 """
1409 """
1410 if start is None and stop is None:
1410 if start is None and stop is None:
1411 if not len(self):
1411 if not len(self):
1412 return [self.nullid]
1412 return [self.nullid]
1413 return [self.node(r) for r in self.headrevs()]
1413 return [self.node(r) for r in self.headrevs()]
1414
1414
1415 if start is None:
1415 if start is None:
1416 start = nullrev
1416 start = nullrev
1417 else:
1417 else:
1418 start = self.rev(start)
1418 start = self.rev(start)
1419
1419
1420 stoprevs = {self.rev(n) for n in stop or []}
1420 stoprevs = {self.rev(n) for n in stop or []}
1421
1421
1422 revs = dagop.headrevssubset(
1422 revs = dagop.headrevssubset(
1423 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1423 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1424 )
1424 )
1425
1425
1426 return [self.node(rev) for rev in revs]
1426 return [self.node(rev) for rev in revs]
1427
1427
1428 def children(self, node):
1428 def children(self, node):
1429 """find the children of a given node"""
1429 """find the children of a given node"""
1430 c = []
1430 c = []
1431 p = self.rev(node)
1431 p = self.rev(node)
1432 for r in self.revs(start=p + 1):
1432 for r in self.revs(start=p + 1):
1433 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1433 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1434 if prevs:
1434 if prevs:
1435 for pr in prevs:
1435 for pr in prevs:
1436 if pr == p:
1436 if pr == p:
1437 c.append(self.node(r))
1437 c.append(self.node(r))
1438 elif p == nullrev:
1438 elif p == nullrev:
1439 c.append(self.node(r))
1439 c.append(self.node(r))
1440 return c
1440 return c
1441
1441
1442 def commonancestorsheads(self, a, b):
1442 def commonancestorsheads(self, a, b):
1443 """calculate all the heads of the common ancestors of nodes a and b"""
1443 """calculate all the heads of the common ancestors of nodes a and b"""
1444 a, b = self.rev(a), self.rev(b)
1444 a, b = self.rev(a), self.rev(b)
1445 ancs = self._commonancestorsheads(a, b)
1445 ancs = self._commonancestorsheads(a, b)
1446 return pycompat.maplist(self.node, ancs)
1446 return pycompat.maplist(self.node, ancs)
1447
1447
1448 def _commonancestorsheads(self, *revs):
1448 def _commonancestorsheads(self, *revs):
1449 """calculate all the heads of the common ancestors of revs"""
1449 """calculate all the heads of the common ancestors of revs"""
1450 try:
1450 try:
1451 ancs = self.index.commonancestorsheads(*revs)
1451 ancs = self.index.commonancestorsheads(*revs)
1452 except (AttributeError, OverflowError): # C implementation failed
1452 except (AttributeError, OverflowError): # C implementation failed
1453 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1453 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1454 return ancs
1454 return ancs
1455
1455
1456 def isancestor(self, a, b):
1456 def isancestor(self, a, b):
1457 """return True if node a is an ancestor of node b
1457 """return True if node a is an ancestor of node b
1458
1458
1459 A revision is considered an ancestor of itself."""
1459 A revision is considered an ancestor of itself."""
1460 a, b = self.rev(a), self.rev(b)
1460 a, b = self.rev(a), self.rev(b)
1461 return self.isancestorrev(a, b)
1461 return self.isancestorrev(a, b)
1462
1462
1463 def isancestorrev(self, a, b):
1463 def isancestorrev(self, a, b):
1464 """return True if revision a is an ancestor of revision b
1464 """return True if revision a is an ancestor of revision b
1465
1465
1466 A revision is considered an ancestor of itself.
1466 A revision is considered an ancestor of itself.
1467
1467
1468 The implementation of this is trivial but the use of
1468 The implementation of this is trivial but the use of
1469 reachableroots is not."""
1469 reachableroots is not."""
1470 if a == nullrev:
1470 if a == nullrev:
1471 return True
1471 return True
1472 elif a == b:
1472 elif a == b:
1473 return True
1473 return True
1474 elif a > b:
1474 elif a > b:
1475 return False
1475 return False
1476 return bool(self.reachableroots(a, [b], [a], includepath=False))
1476 return bool(self.reachableroots(a, [b], [a], includepath=False))
1477
1477
1478 def reachableroots(self, minroot, heads, roots, includepath=False):
1478 def reachableroots(self, minroot, heads, roots, includepath=False):
1479 """return (heads(::(<roots> and <roots>::<heads>)))
1479 """return (heads(::(<roots> and <roots>::<heads>)))
1480
1480
1481 If includepath is True, return (<roots>::<heads>)."""
1481 If includepath is True, return (<roots>::<heads>)."""
1482 try:
1482 try:
1483 return self.index.reachableroots2(
1483 return self.index.reachableroots2(
1484 minroot, heads, roots, includepath
1484 minroot, heads, roots, includepath
1485 )
1485 )
1486 except AttributeError:
1486 except AttributeError:
1487 return dagop._reachablerootspure(
1487 return dagop._reachablerootspure(
1488 self.parentrevs, minroot, roots, heads, includepath
1488 self.parentrevs, minroot, roots, heads, includepath
1489 )
1489 )
1490
1490
1491 def ancestor(self, a, b):
1491 def ancestor(self, a, b):
1492 """calculate the "best" common ancestor of nodes a and b"""
1492 """calculate the "best" common ancestor of nodes a and b"""
1493
1493
1494 a, b = self.rev(a), self.rev(b)
1494 a, b = self.rev(a), self.rev(b)
1495 try:
1495 try:
1496 ancs = self.index.ancestors(a, b)
1496 ancs = self.index.ancestors(a, b)
1497 except (AttributeError, OverflowError):
1497 except (AttributeError, OverflowError):
1498 ancs = ancestor.ancestors(self.parentrevs, a, b)
1498 ancs = ancestor.ancestors(self.parentrevs, a, b)
1499 if ancs:
1499 if ancs:
1500 # choose a consistent winner when there's a tie
1500 # choose a consistent winner when there's a tie
1501 return min(map(self.node, ancs))
1501 return min(map(self.node, ancs))
1502 return self.nullid
1502 return self.nullid
1503
1503
1504 def _match(self, id):
1504 def _match(self, id):
1505 if isinstance(id, int):
1505 if isinstance(id, int):
1506 # rev
1506 # rev
1507 return self.node(id)
1507 return self.node(id)
1508 if len(id) == self.nodeconstants.nodelen:
1508 if len(id) == self.nodeconstants.nodelen:
1509 # possibly a binary node
1509 # possibly a binary node
1510 # odds of a binary node being all hex in ASCII are 1 in 10**25
1510 # odds of a binary node being all hex in ASCII are 1 in 10**25
1511 try:
1511 try:
1512 node = id
1512 node = id
1513 self.rev(node) # quick search the index
1513 self.rev(node) # quick search the index
1514 return node
1514 return node
1515 except error.LookupError:
1515 except error.LookupError:
1516 pass # may be partial hex id
1516 pass # may be partial hex id
1517 try:
1517 try:
1518 # str(rev)
1518 # str(rev)
1519 rev = int(id)
1519 rev = int(id)
1520 if b"%d" % rev != id:
1520 if b"%d" % rev != id:
1521 raise ValueError
1521 raise ValueError
1522 if rev < 0:
1522 if rev < 0:
1523 rev = len(self) + rev
1523 rev = len(self) + rev
1524 if rev < 0 or rev >= len(self):
1524 if rev < 0 or rev >= len(self):
1525 raise ValueError
1525 raise ValueError
1526 return self.node(rev)
1526 return self.node(rev)
1527 except (ValueError, OverflowError):
1527 except (ValueError, OverflowError):
1528 pass
1528 pass
1529 if len(id) == 2 * self.nodeconstants.nodelen:
1529 if len(id) == 2 * self.nodeconstants.nodelen:
1530 try:
1530 try:
1531 # a full hex nodeid?
1531 # a full hex nodeid?
1532 node = bin(id)
1532 node = bin(id)
1533 self.rev(node)
1533 self.rev(node)
1534 return node
1534 return node
1535 except (TypeError, error.LookupError):
1535 except (TypeError, error.LookupError):
1536 pass
1536 pass
1537
1537
1538 def _partialmatch(self, id):
1538 def _partialmatch(self, id):
1539 # we don't care wdirfilenodeids as they should be always full hash
1539 # we don't care wdirfilenodeids as they should be always full hash
1540 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1540 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1541 ambiguous = False
1541 try:
1542 try:
1542 partial = self.index.partialmatch(id)
1543 partial = self.index.partialmatch(id)
1543 if partial and self.hasnode(partial):
1544 if partial and self.hasnode(partial):
1544 if maybewdir:
1545 if maybewdir:
1545 # single 'ff...' match in radix tree, ambiguous with wdir
1546 # single 'ff...' match in radix tree, ambiguous with wdir
1546 raise error.RevlogError
1547 ambiguous = True
1548 else:
1547 return partial
1549 return partial
1548 if maybewdir:
1550 elif maybewdir:
1549 # no 'ff...' match in radix tree, wdir identified
1551 # no 'ff...' match in radix tree, wdir identified
1550 raise error.WdirUnsupported
1552 raise error.WdirUnsupported
1553 else:
1551 return None
1554 return None
1552 except error.RevlogError:
1555 except error.RevlogError:
1553 # parsers.c radix tree lookup gave multiple matches
1556 # parsers.c radix tree lookup gave multiple matches
1554 # fast path: for unfiltered changelog, radix tree is accurate
1557 # fast path: for unfiltered changelog, radix tree is accurate
1555 if not getattr(self, 'filteredrevs', None):
1558 if not getattr(self, 'filteredrevs', None):
1556 raise error.AmbiguousPrefixLookupError(
1559 ambiguous = True
1557 id, self.display_id, _(b'ambiguous identifier')
1558 )
1559 # fall through to slow path that filters hidden revisions
1560 # fall through to slow path that filters hidden revisions
1560 except (AttributeError, ValueError):
1561 except (AttributeError, ValueError):
1561 # we are pure python, or key was too short to search radix tree
1562 # we are pure python, or key was too short to search radix tree
1562 pass
1563 pass
1564 if ambiguous:
1565 raise error.AmbiguousPrefixLookupError(
1566 id, self.display_id, _(b'ambiguous identifier')
1567 )
1563
1568
1564 if id in self._pcache:
1569 if id in self._pcache:
1565 return self._pcache[id]
1570 return self._pcache[id]
1566
1571
1567 if len(id) <= 40:
1572 if len(id) <= 40:
1568 try:
1573 try:
1569 # hex(node)[:...]
1574 # hex(node)[:...]
1570 l = len(id) // 2 # grab an even number of digits
1575 l = len(id) // 2 # grab an even number of digits
1571 prefix = bin(id[: l * 2])
1576 prefix = bin(id[: l * 2])
1572 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1577 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1573 nl = [
1578 nl = [
1574 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1579 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1575 ]
1580 ]
1576 if self.nodeconstants.nullhex.startswith(id):
1581 if self.nodeconstants.nullhex.startswith(id):
1577 nl.append(self.nullid)
1582 nl.append(self.nullid)
1578 if len(nl) > 0:
1583 if len(nl) > 0:
1579 if len(nl) == 1 and not maybewdir:
1584 if len(nl) == 1 and not maybewdir:
1580 self._pcache[id] = nl[0]
1585 self._pcache[id] = nl[0]
1581 return nl[0]
1586 return nl[0]
1582 raise error.AmbiguousPrefixLookupError(
1587 raise error.AmbiguousPrefixLookupError(
1583 id, self.display_id, _(b'ambiguous identifier')
1588 id, self.display_id, _(b'ambiguous identifier')
1584 )
1589 )
1585 if maybewdir:
1590 if maybewdir:
1586 raise error.WdirUnsupported
1591 raise error.WdirUnsupported
1587 return None
1592 return None
1588 except TypeError:
1593 except TypeError:
1589 pass
1594 pass
1590
1595
1591 def lookup(self, id):
1596 def lookup(self, id):
1592 """locate a node based on:
1597 """locate a node based on:
1593 - revision number or str(revision number)
1598 - revision number or str(revision number)
1594 - nodeid or subset of hex nodeid
1599 - nodeid or subset of hex nodeid
1595 """
1600 """
1596 n = self._match(id)
1601 n = self._match(id)
1597 if n is not None:
1602 if n is not None:
1598 return n
1603 return n
1599 n = self._partialmatch(id)
1604 n = self._partialmatch(id)
1600 if n:
1605 if n:
1601 return n
1606 return n
1602
1607
1603 raise error.LookupError(id, self.display_id, _(b'no match found'))
1608 raise error.LookupError(id, self.display_id, _(b'no match found'))
1604
1609
1605 def shortest(self, node, minlength=1):
1610 def shortest(self, node, minlength=1):
1606 """Find the shortest unambiguous prefix that matches node."""
1611 """Find the shortest unambiguous prefix that matches node."""
1607
1612
1608 def isvalid(prefix):
1613 def isvalid(prefix):
1609 try:
1614 try:
1610 matchednode = self._partialmatch(prefix)
1615 matchednode = self._partialmatch(prefix)
1611 except error.AmbiguousPrefixLookupError:
1616 except error.AmbiguousPrefixLookupError:
1612 return False
1617 return False
1613 except error.WdirUnsupported:
1618 except error.WdirUnsupported:
1614 # single 'ff...' match
1619 # single 'ff...' match
1615 return True
1620 return True
1616 if matchednode is None:
1621 if matchednode is None:
1617 raise error.LookupError(node, self.display_id, _(b'no node'))
1622 raise error.LookupError(node, self.display_id, _(b'no node'))
1618 return True
1623 return True
1619
1624
1620 def maybewdir(prefix):
1625 def maybewdir(prefix):
1621 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1626 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1622
1627
1623 hexnode = hex(node)
1628 hexnode = hex(node)
1624
1629
1625 def disambiguate(hexnode, minlength):
1630 def disambiguate(hexnode, minlength):
1626 """Disambiguate against wdirid."""
1631 """Disambiguate against wdirid."""
1627 for length in range(minlength, len(hexnode) + 1):
1632 for length in range(minlength, len(hexnode) + 1):
1628 prefix = hexnode[:length]
1633 prefix = hexnode[:length]
1629 if not maybewdir(prefix):
1634 if not maybewdir(prefix):
1630 return prefix
1635 return prefix
1631
1636
1632 if not getattr(self, 'filteredrevs', None):
1637 if not getattr(self, 'filteredrevs', None):
1633 try:
1638 try:
1634 length = max(self.index.shortest(node), minlength)
1639 length = max(self.index.shortest(node), minlength)
1635 return disambiguate(hexnode, length)
1640 return disambiguate(hexnode, length)
1636 except error.RevlogError:
1641 except error.RevlogError:
1637 if node != self.nodeconstants.wdirid:
1642 if node != self.nodeconstants.wdirid:
1638 raise error.LookupError(
1643 raise error.LookupError(
1639 node, self.display_id, _(b'no node')
1644 node, self.display_id, _(b'no node')
1640 )
1645 )
1641 except AttributeError:
1646 except AttributeError:
1642 # Fall through to pure code
1647 # Fall through to pure code
1643 pass
1648 pass
1644
1649
1645 if node == self.nodeconstants.wdirid:
1650 if node == self.nodeconstants.wdirid:
1646 for length in range(minlength, len(hexnode) + 1):
1651 for length in range(minlength, len(hexnode) + 1):
1647 prefix = hexnode[:length]
1652 prefix = hexnode[:length]
1648 if isvalid(prefix):
1653 if isvalid(prefix):
1649 return prefix
1654 return prefix
1650
1655
1651 for length in range(minlength, len(hexnode) + 1):
1656 for length in range(minlength, len(hexnode) + 1):
1652 prefix = hexnode[:length]
1657 prefix = hexnode[:length]
1653 if isvalid(prefix):
1658 if isvalid(prefix):
1654 return disambiguate(hexnode, length)
1659 return disambiguate(hexnode, length)
1655
1660
1656 def cmp(self, node, text):
1661 def cmp(self, node, text):
1657 """compare text with a given file revision
1662 """compare text with a given file revision
1658
1663
1659 returns True if text is different than what is stored.
1664 returns True if text is different than what is stored.
1660 """
1665 """
1661 p1, p2 = self.parents(node)
1666 p1, p2 = self.parents(node)
1662 return storageutil.hashrevisionsha1(text, p1, p2) != node
1667 return storageutil.hashrevisionsha1(text, p1, p2) != node
1663
1668
1664 def _cachesegment(self, offset, data):
1669 def _cachesegment(self, offset, data):
1665 """Add a segment to the revlog cache.
1670 """Add a segment to the revlog cache.
1666
1671
1667 Accepts an absolute offset and the data that is at that location.
1672 Accepts an absolute offset and the data that is at that location.
1668 """
1673 """
1669 o, d = self._chunkcache
1674 o, d = self._chunkcache
1670 # try to add to existing cache
1675 # try to add to existing cache
1671 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1676 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1672 self._chunkcache = o, d + data
1677 self._chunkcache = o, d + data
1673 else:
1678 else:
1674 self._chunkcache = offset, data
1679 self._chunkcache = offset, data
1675
1680
1676 def _readsegment(self, offset, length, df=None):
1681 def _readsegment(self, offset, length, df=None):
1677 """Load a segment of raw data from the revlog.
1682 """Load a segment of raw data from the revlog.
1678
1683
1679 Accepts an absolute offset, length to read, and an optional existing
1684 Accepts an absolute offset, length to read, and an optional existing
1680 file handle to read from.
1685 file handle to read from.
1681
1686
1682 If an existing file handle is passed, it will be seeked and the
1687 If an existing file handle is passed, it will be seeked and the
1683 original seek position will NOT be restored.
1688 original seek position will NOT be restored.
1684
1689
1685 Returns a str or buffer of raw byte data.
1690 Returns a str or buffer of raw byte data.
1686
1691
1687 Raises if the requested number of bytes could not be read.
1692 Raises if the requested number of bytes could not be read.
1688 """
1693 """
1689 # Cache data both forward and backward around the requested
1694 # Cache data both forward and backward around the requested
1690 # data, in a fixed size window. This helps speed up operations
1695 # data, in a fixed size window. This helps speed up operations
1691 # involving reading the revlog backwards.
1696 # involving reading the revlog backwards.
1692 cachesize = self._chunkcachesize
1697 cachesize = self._chunkcachesize
1693 realoffset = offset & ~(cachesize - 1)
1698 realoffset = offset & ~(cachesize - 1)
1694 reallength = (
1699 reallength = (
1695 (offset + length + cachesize) & ~(cachesize - 1)
1700 (offset + length + cachesize) & ~(cachesize - 1)
1696 ) - realoffset
1701 ) - realoffset
1697 with self._datareadfp(df) as df:
1702 with self._datareadfp(df) as df:
1698 df.seek(realoffset)
1703 df.seek(realoffset)
1699 d = df.read(reallength)
1704 d = df.read(reallength)
1700
1705
1701 self._cachesegment(realoffset, d)
1706 self._cachesegment(realoffset, d)
1702 if offset != realoffset or reallength != length:
1707 if offset != realoffset or reallength != length:
1703 startoffset = offset - realoffset
1708 startoffset = offset - realoffset
1704 if len(d) - startoffset < length:
1709 if len(d) - startoffset < length:
1705 raise error.RevlogError(
1710 raise error.RevlogError(
1706 _(
1711 _(
1707 b'partial read of revlog %s; expected %d bytes from '
1712 b'partial read of revlog %s; expected %d bytes from '
1708 b'offset %d, got %d'
1713 b'offset %d, got %d'
1709 )
1714 )
1710 % (
1715 % (
1711 self._indexfile if self._inline else self._datafile,
1716 self._indexfile if self._inline else self._datafile,
1712 length,
1717 length,
1713 offset,
1718 offset,
1714 len(d) - startoffset,
1719 len(d) - startoffset,
1715 )
1720 )
1716 )
1721 )
1717
1722
1718 return util.buffer(d, startoffset, length)
1723 return util.buffer(d, startoffset, length)
1719
1724
1720 if len(d) < length:
1725 if len(d) < length:
1721 raise error.RevlogError(
1726 raise error.RevlogError(
1722 _(
1727 _(
1723 b'partial read of revlog %s; expected %d bytes from offset '
1728 b'partial read of revlog %s; expected %d bytes from offset '
1724 b'%d, got %d'
1729 b'%d, got %d'
1725 )
1730 )
1726 % (
1731 % (
1727 self._indexfile if self._inline else self._datafile,
1732 self._indexfile if self._inline else self._datafile,
1728 length,
1733 length,
1729 offset,
1734 offset,
1730 len(d),
1735 len(d),
1731 )
1736 )
1732 )
1737 )
1733
1738
1734 return d
1739 return d
1735
1740
1736 def _getsegment(self, offset, length, df=None):
1741 def _getsegment(self, offset, length, df=None):
1737 """Obtain a segment of raw data from the revlog.
1742 """Obtain a segment of raw data from the revlog.
1738
1743
1739 Accepts an absolute offset, length of bytes to obtain, and an
1744 Accepts an absolute offset, length of bytes to obtain, and an
1740 optional file handle to the already-opened revlog. If the file
1745 optional file handle to the already-opened revlog. If the file
1741 handle is used, it's original seek position will not be preserved.
1746 handle is used, it's original seek position will not be preserved.
1742
1747
1743 Requests for data may be returned from a cache.
1748 Requests for data may be returned from a cache.
1744
1749
1745 Returns a str or a buffer instance of raw byte data.
1750 Returns a str or a buffer instance of raw byte data.
1746 """
1751 """
1747 o, d = self._chunkcache
1752 o, d = self._chunkcache
1748 l = len(d)
1753 l = len(d)
1749
1754
1750 # is it in the cache?
1755 # is it in the cache?
1751 cachestart = offset - o
1756 cachestart = offset - o
1752 cacheend = cachestart + length
1757 cacheend = cachestart + length
1753 if cachestart >= 0 and cacheend <= l:
1758 if cachestart >= 0 and cacheend <= l:
1754 if cachestart == 0 and cacheend == l:
1759 if cachestart == 0 and cacheend == l:
1755 return d # avoid a copy
1760 return d # avoid a copy
1756 return util.buffer(d, cachestart, cacheend - cachestart)
1761 return util.buffer(d, cachestart, cacheend - cachestart)
1757
1762
1758 return self._readsegment(offset, length, df=df)
1763 return self._readsegment(offset, length, df=df)
1759
1764
1760 def _getsegmentforrevs(self, startrev, endrev, df=None):
1765 def _getsegmentforrevs(self, startrev, endrev, df=None):
1761 """Obtain a segment of raw data corresponding to a range of revisions.
1766 """Obtain a segment of raw data corresponding to a range of revisions.
1762
1767
1763 Accepts the start and end revisions and an optional already-open
1768 Accepts the start and end revisions and an optional already-open
1764 file handle to be used for reading. If the file handle is read, its
1769 file handle to be used for reading. If the file handle is read, its
1765 seek position will not be preserved.
1770 seek position will not be preserved.
1766
1771
1767 Requests for data may be satisfied by a cache.
1772 Requests for data may be satisfied by a cache.
1768
1773
1769 Returns a 2-tuple of (offset, data) for the requested range of
1774 Returns a 2-tuple of (offset, data) for the requested range of
1770 revisions. Offset is the integer offset from the beginning of the
1775 revisions. Offset is the integer offset from the beginning of the
1771 revlog and data is a str or buffer of the raw byte data.
1776 revlog and data is a str or buffer of the raw byte data.
1772
1777
1773 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1778 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1774 to determine where each revision's data begins and ends.
1779 to determine where each revision's data begins and ends.
1775 """
1780 """
1776 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1781 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1777 # (functions are expensive).
1782 # (functions are expensive).
1778 index = self.index
1783 index = self.index
1779 istart = index[startrev]
1784 istart = index[startrev]
1780 start = int(istart[0] >> 16)
1785 start = int(istart[0] >> 16)
1781 if startrev == endrev:
1786 if startrev == endrev:
1782 end = start + istart[1]
1787 end = start + istart[1]
1783 else:
1788 else:
1784 iend = index[endrev]
1789 iend = index[endrev]
1785 end = int(iend[0] >> 16) + iend[1]
1790 end = int(iend[0] >> 16) + iend[1]
1786
1791
1787 if self._inline:
1792 if self._inline:
1788 start += (startrev + 1) * self.index.entry_size
1793 start += (startrev + 1) * self.index.entry_size
1789 end += (endrev + 1) * self.index.entry_size
1794 end += (endrev + 1) * self.index.entry_size
1790 length = end - start
1795 length = end - start
1791
1796
1792 return start, self._getsegment(start, length, df=df)
1797 return start, self._getsegment(start, length, df=df)
1793
1798
1794 def _chunk(self, rev, df=None):
1799 def _chunk(self, rev, df=None):
1795 """Obtain a single decompressed chunk for a revision.
1800 """Obtain a single decompressed chunk for a revision.
1796
1801
1797 Accepts an integer revision and an optional already-open file handle
1802 Accepts an integer revision and an optional already-open file handle
1798 to be used for reading. If used, the seek position of the file will not
1803 to be used for reading. If used, the seek position of the file will not
1799 be preserved.
1804 be preserved.
1800
1805
1801 Returns a str holding uncompressed data for the requested revision.
1806 Returns a str holding uncompressed data for the requested revision.
1802 """
1807 """
1803 compression_mode = self.index[rev][10]
1808 compression_mode = self.index[rev][10]
1804 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1809 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1805 if compression_mode == COMP_MODE_PLAIN:
1810 if compression_mode == COMP_MODE_PLAIN:
1806 return data
1811 return data
1807 elif compression_mode == COMP_MODE_DEFAULT:
1812 elif compression_mode == COMP_MODE_DEFAULT:
1808 return self._decompressor(data)
1813 return self._decompressor(data)
1809 elif compression_mode == COMP_MODE_INLINE:
1814 elif compression_mode == COMP_MODE_INLINE:
1810 return self.decompress(data)
1815 return self.decompress(data)
1811 else:
1816 else:
1812 msg = 'unknown compression mode %d'
1817 msg = 'unknown compression mode %d'
1813 msg %= compression_mode
1818 msg %= compression_mode
1814 raise error.RevlogError(msg)
1819 raise error.RevlogError(msg)
1815
1820
1816 def _chunks(self, revs, df=None, targetsize=None):
1821 def _chunks(self, revs, df=None, targetsize=None):
1817 """Obtain decompressed chunks for the specified revisions.
1822 """Obtain decompressed chunks for the specified revisions.
1818
1823
1819 Accepts an iterable of numeric revisions that are assumed to be in
1824 Accepts an iterable of numeric revisions that are assumed to be in
1820 ascending order. Also accepts an optional already-open file handle
1825 ascending order. Also accepts an optional already-open file handle
1821 to be used for reading. If used, the seek position of the file will
1826 to be used for reading. If used, the seek position of the file will
1822 not be preserved.
1827 not be preserved.
1823
1828
1824 This function is similar to calling ``self._chunk()`` multiple times,
1829 This function is similar to calling ``self._chunk()`` multiple times,
1825 but is faster.
1830 but is faster.
1826
1831
1827 Returns a list with decompressed data for each requested revision.
1832 Returns a list with decompressed data for each requested revision.
1828 """
1833 """
1829 if not revs:
1834 if not revs:
1830 return []
1835 return []
1831 start = self.start
1836 start = self.start
1832 length = self.length
1837 length = self.length
1833 inline = self._inline
1838 inline = self._inline
1834 iosize = self.index.entry_size
1839 iosize = self.index.entry_size
1835 buffer = util.buffer
1840 buffer = util.buffer
1836
1841
1837 l = []
1842 l = []
1838 ladd = l.append
1843 ladd = l.append
1839
1844
1840 if not self._withsparseread:
1845 if not self._withsparseread:
1841 slicedchunks = (revs,)
1846 slicedchunks = (revs,)
1842 else:
1847 else:
1843 slicedchunks = deltautil.slicechunk(
1848 slicedchunks = deltautil.slicechunk(
1844 self, revs, targetsize=targetsize
1849 self, revs, targetsize=targetsize
1845 )
1850 )
1846
1851
1847 for revschunk in slicedchunks:
1852 for revschunk in slicedchunks:
1848 firstrev = revschunk[0]
1853 firstrev = revschunk[0]
1849 # Skip trailing revisions with empty diff
1854 # Skip trailing revisions with empty diff
1850 for lastrev in revschunk[::-1]:
1855 for lastrev in revschunk[::-1]:
1851 if length(lastrev) != 0:
1856 if length(lastrev) != 0:
1852 break
1857 break
1853
1858
1854 try:
1859 try:
1855 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1860 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1856 except OverflowError:
1861 except OverflowError:
1857 # issue4215 - we can't cache a run of chunks greater than
1862 # issue4215 - we can't cache a run of chunks greater than
1858 # 2G on Windows
1863 # 2G on Windows
1859 return [self._chunk(rev, df=df) for rev in revschunk]
1864 return [self._chunk(rev, df=df) for rev in revschunk]
1860
1865
1861 decomp = self.decompress
1866 decomp = self.decompress
1862 # self._decompressor might be None, but will not be used in that case
1867 # self._decompressor might be None, but will not be used in that case
1863 def_decomp = self._decompressor
1868 def_decomp = self._decompressor
1864 for rev in revschunk:
1869 for rev in revschunk:
1865 chunkstart = start(rev)
1870 chunkstart = start(rev)
1866 if inline:
1871 if inline:
1867 chunkstart += (rev + 1) * iosize
1872 chunkstart += (rev + 1) * iosize
1868 chunklength = length(rev)
1873 chunklength = length(rev)
1869 comp_mode = self.index[rev][10]
1874 comp_mode = self.index[rev][10]
1870 c = buffer(data, chunkstart - offset, chunklength)
1875 c = buffer(data, chunkstart - offset, chunklength)
1871 if comp_mode == COMP_MODE_PLAIN:
1876 if comp_mode == COMP_MODE_PLAIN:
1872 ladd(c)
1877 ladd(c)
1873 elif comp_mode == COMP_MODE_INLINE:
1878 elif comp_mode == COMP_MODE_INLINE:
1874 ladd(decomp(c))
1879 ladd(decomp(c))
1875 elif comp_mode == COMP_MODE_DEFAULT:
1880 elif comp_mode == COMP_MODE_DEFAULT:
1876 ladd(def_decomp(c))
1881 ladd(def_decomp(c))
1877 else:
1882 else:
1878 msg = 'unknown compression mode %d'
1883 msg = 'unknown compression mode %d'
1879 msg %= comp_mode
1884 msg %= comp_mode
1880 raise error.RevlogError(msg)
1885 raise error.RevlogError(msg)
1881
1886
1882 return l
1887 return l
1883
1888
1884 def _chunkclear(self):
1889 def _chunkclear(self):
1885 """Clear the raw chunk cache."""
1890 """Clear the raw chunk cache."""
1886 self._chunkcache = (0, b'')
1891 self._chunkcache = (0, b'')
1887
1892
1888 def deltaparent(self, rev):
1893 def deltaparent(self, rev):
1889 """return deltaparent of the given revision"""
1894 """return deltaparent of the given revision"""
1890 base = self.index[rev][3]
1895 base = self.index[rev][3]
1891 if base == rev:
1896 if base == rev:
1892 return nullrev
1897 return nullrev
1893 elif self._generaldelta:
1898 elif self._generaldelta:
1894 return base
1899 return base
1895 else:
1900 else:
1896 return rev - 1
1901 return rev - 1
1897
1902
1898 def issnapshot(self, rev):
1903 def issnapshot(self, rev):
1899 """tells whether rev is a snapshot"""
1904 """tells whether rev is a snapshot"""
1900 if not self._sparserevlog:
1905 if not self._sparserevlog:
1901 return self.deltaparent(rev) == nullrev
1906 return self.deltaparent(rev) == nullrev
1902 elif util.safehasattr(self.index, b'issnapshot'):
1907 elif util.safehasattr(self.index, b'issnapshot'):
1903 # directly assign the method to cache the testing and access
1908 # directly assign the method to cache the testing and access
1904 self.issnapshot = self.index.issnapshot
1909 self.issnapshot = self.index.issnapshot
1905 return self.issnapshot(rev)
1910 return self.issnapshot(rev)
1906 if rev == nullrev:
1911 if rev == nullrev:
1907 return True
1912 return True
1908 entry = self.index[rev]
1913 entry = self.index[rev]
1909 base = entry[3]
1914 base = entry[3]
1910 if base == rev:
1915 if base == rev:
1911 return True
1916 return True
1912 if base == nullrev:
1917 if base == nullrev:
1913 return True
1918 return True
1914 p1 = entry[5]
1919 p1 = entry[5]
1915 p2 = entry[6]
1920 p2 = entry[6]
1916 if base == p1 or base == p2:
1921 if base == p1 or base == p2:
1917 return False
1922 return False
1918 return self.issnapshot(base)
1923 return self.issnapshot(base)
1919
1924
1920 def snapshotdepth(self, rev):
1925 def snapshotdepth(self, rev):
1921 """number of snapshot in the chain before this one"""
1926 """number of snapshot in the chain before this one"""
1922 if not self.issnapshot(rev):
1927 if not self.issnapshot(rev):
1923 raise error.ProgrammingError(b'revision %d not a snapshot')
1928 raise error.ProgrammingError(b'revision %d not a snapshot')
1924 return len(self._deltachain(rev)[0]) - 1
1929 return len(self._deltachain(rev)[0]) - 1
1925
1930
1926 def revdiff(self, rev1, rev2):
1931 def revdiff(self, rev1, rev2):
1927 """return or calculate a delta between two revisions
1932 """return or calculate a delta between two revisions
1928
1933
1929 The delta calculated is in binary form and is intended to be written to
1934 The delta calculated is in binary form and is intended to be written to
1930 revlog data directly. So this function needs raw revision data.
1935 revlog data directly. So this function needs raw revision data.
1931 """
1936 """
1932 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1937 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1933 return bytes(self._chunk(rev2))
1938 return bytes(self._chunk(rev2))
1934
1939
1935 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1940 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1936
1941
1937 def _processflags(self, text, flags, operation, raw=False):
1942 def _processflags(self, text, flags, operation, raw=False):
1938 """deprecated entry point to access flag processors"""
1943 """deprecated entry point to access flag processors"""
1939 msg = b'_processflag(...) use the specialized variant'
1944 msg = b'_processflag(...) use the specialized variant'
1940 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1945 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1941 if raw:
1946 if raw:
1942 return text, flagutil.processflagsraw(self, text, flags)
1947 return text, flagutil.processflagsraw(self, text, flags)
1943 elif operation == b'read':
1948 elif operation == b'read':
1944 return flagutil.processflagsread(self, text, flags)
1949 return flagutil.processflagsread(self, text, flags)
1945 else: # write operation
1950 else: # write operation
1946 return flagutil.processflagswrite(self, text, flags)
1951 return flagutil.processflagswrite(self, text, flags)
1947
1952
1948 def revision(self, nodeorrev, _df=None, raw=False):
1953 def revision(self, nodeorrev, _df=None, raw=False):
1949 """return an uncompressed revision of a given node or revision
1954 """return an uncompressed revision of a given node or revision
1950 number.
1955 number.
1951
1956
1952 _df - an existing file handle to read from. (internal-only)
1957 _df - an existing file handle to read from. (internal-only)
1953 raw - an optional argument specifying if the revision data is to be
1958 raw - an optional argument specifying if the revision data is to be
1954 treated as raw data when applying flag transforms. 'raw' should be set
1959 treated as raw data when applying flag transforms. 'raw' should be set
1955 to True when generating changegroups or in debug commands.
1960 to True when generating changegroups or in debug commands.
1956 """
1961 """
1957 if raw:
1962 if raw:
1958 msg = (
1963 msg = (
1959 b'revlog.revision(..., raw=True) is deprecated, '
1964 b'revlog.revision(..., raw=True) is deprecated, '
1960 b'use revlog.rawdata(...)'
1965 b'use revlog.rawdata(...)'
1961 )
1966 )
1962 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1967 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1963 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1968 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1964
1969
1965 def sidedata(self, nodeorrev, _df=None):
1970 def sidedata(self, nodeorrev, _df=None):
1966 """a map of extra data related to the changeset but not part of the hash
1971 """a map of extra data related to the changeset but not part of the hash
1967
1972
1968 This function currently return a dictionary. However, more advanced
1973 This function currently return a dictionary. However, more advanced
1969 mapping object will likely be used in the future for a more
1974 mapping object will likely be used in the future for a more
1970 efficient/lazy code.
1975 efficient/lazy code.
1971 """
1976 """
1972 return self._revisiondata(nodeorrev, _df)[1]
1977 return self._revisiondata(nodeorrev, _df)[1]
1973
1978
1974 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1979 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1975 # deal with <nodeorrev> argument type
1980 # deal with <nodeorrev> argument type
1976 if isinstance(nodeorrev, int):
1981 if isinstance(nodeorrev, int):
1977 rev = nodeorrev
1982 rev = nodeorrev
1978 node = self.node(rev)
1983 node = self.node(rev)
1979 else:
1984 else:
1980 node = nodeorrev
1985 node = nodeorrev
1981 rev = None
1986 rev = None
1982
1987
1983 # fast path the special `nullid` rev
1988 # fast path the special `nullid` rev
1984 if node == self.nullid:
1989 if node == self.nullid:
1985 return b"", {}
1990 return b"", {}
1986
1991
1987 # ``rawtext`` is the text as stored inside the revlog. Might be the
1992 # ``rawtext`` is the text as stored inside the revlog. Might be the
1988 # revision or might need to be processed to retrieve the revision.
1993 # revision or might need to be processed to retrieve the revision.
1989 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1994 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1990
1995
1991 if self.hassidedata:
1996 if self.hassidedata:
1992 if rev is None:
1997 if rev is None:
1993 rev = self.rev(node)
1998 rev = self.rev(node)
1994 sidedata = self._sidedata(rev)
1999 sidedata = self._sidedata(rev)
1995 else:
2000 else:
1996 sidedata = {}
2001 sidedata = {}
1997
2002
1998 if raw and validated:
2003 if raw and validated:
1999 # if we don't want to process the raw text and that raw
2004 # if we don't want to process the raw text and that raw
2000 # text is cached, we can exit early.
2005 # text is cached, we can exit early.
2001 return rawtext, sidedata
2006 return rawtext, sidedata
2002 if rev is None:
2007 if rev is None:
2003 rev = self.rev(node)
2008 rev = self.rev(node)
2004 # the revlog's flag for this revision
2009 # the revlog's flag for this revision
2005 # (usually alter its state or content)
2010 # (usually alter its state or content)
2006 flags = self.flags(rev)
2011 flags = self.flags(rev)
2007
2012
2008 if validated and flags == REVIDX_DEFAULT_FLAGS:
2013 if validated and flags == REVIDX_DEFAULT_FLAGS:
2009 # no extra flags set, no flag processor runs, text = rawtext
2014 # no extra flags set, no flag processor runs, text = rawtext
2010 return rawtext, sidedata
2015 return rawtext, sidedata
2011
2016
2012 if raw:
2017 if raw:
2013 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2018 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2014 text = rawtext
2019 text = rawtext
2015 else:
2020 else:
2016 r = flagutil.processflagsread(self, rawtext, flags)
2021 r = flagutil.processflagsread(self, rawtext, flags)
2017 text, validatehash = r
2022 text, validatehash = r
2018 if validatehash:
2023 if validatehash:
2019 self.checkhash(text, node, rev=rev)
2024 self.checkhash(text, node, rev=rev)
2020 if not validated:
2025 if not validated:
2021 self._revisioncache = (node, rev, rawtext)
2026 self._revisioncache = (node, rev, rawtext)
2022
2027
2023 return text, sidedata
2028 return text, sidedata
2024
2029
2025 def _rawtext(self, node, rev, _df=None):
2030 def _rawtext(self, node, rev, _df=None):
2026 """return the possibly unvalidated rawtext for a revision
2031 """return the possibly unvalidated rawtext for a revision
2027
2032
2028 returns (rev, rawtext, validated)
2033 returns (rev, rawtext, validated)
2029 """
2034 """
2030
2035
2031 # revision in the cache (could be useful to apply delta)
2036 # revision in the cache (could be useful to apply delta)
2032 cachedrev = None
2037 cachedrev = None
2033 # An intermediate text to apply deltas to
2038 # An intermediate text to apply deltas to
2034 basetext = None
2039 basetext = None
2035
2040
2036 # Check if we have the entry in cache
2041 # Check if we have the entry in cache
2037 # The cache entry looks like (node, rev, rawtext)
2042 # The cache entry looks like (node, rev, rawtext)
2038 if self._revisioncache:
2043 if self._revisioncache:
2039 if self._revisioncache[0] == node:
2044 if self._revisioncache[0] == node:
2040 return (rev, self._revisioncache[2], True)
2045 return (rev, self._revisioncache[2], True)
2041 cachedrev = self._revisioncache[1]
2046 cachedrev = self._revisioncache[1]
2042
2047
2043 if rev is None:
2048 if rev is None:
2044 rev = self.rev(node)
2049 rev = self.rev(node)
2045
2050
2046 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2051 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2047 if stopped:
2052 if stopped:
2048 basetext = self._revisioncache[2]
2053 basetext = self._revisioncache[2]
2049
2054
2050 # drop cache to save memory, the caller is expected to
2055 # drop cache to save memory, the caller is expected to
2051 # update self._revisioncache after validating the text
2056 # update self._revisioncache after validating the text
2052 self._revisioncache = None
2057 self._revisioncache = None
2053
2058
2054 targetsize = None
2059 targetsize = None
2055 rawsize = self.index[rev][2]
2060 rawsize = self.index[rev][2]
2056 if 0 <= rawsize:
2061 if 0 <= rawsize:
2057 targetsize = 4 * rawsize
2062 targetsize = 4 * rawsize
2058
2063
2059 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2064 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2060 if basetext is None:
2065 if basetext is None:
2061 basetext = bytes(bins[0])
2066 basetext = bytes(bins[0])
2062 bins = bins[1:]
2067 bins = bins[1:]
2063
2068
2064 rawtext = mdiff.patches(basetext, bins)
2069 rawtext = mdiff.patches(basetext, bins)
2065 del basetext # let us have a chance to free memory early
2070 del basetext # let us have a chance to free memory early
2066 return (rev, rawtext, False)
2071 return (rev, rawtext, False)
2067
2072
2068 def _sidedata(self, rev):
2073 def _sidedata(self, rev):
2069 """Return the sidedata for a given revision number."""
2074 """Return the sidedata for a given revision number."""
2070 index_entry = self.index[rev]
2075 index_entry = self.index[rev]
2071 sidedata_offset = index_entry[8]
2076 sidedata_offset = index_entry[8]
2072 sidedata_size = index_entry[9]
2077 sidedata_size = index_entry[9]
2073
2078
2074 if self._inline:
2079 if self._inline:
2075 sidedata_offset += self.index.entry_size * (1 + rev)
2080 sidedata_offset += self.index.entry_size * (1 + rev)
2076 if sidedata_size == 0:
2081 if sidedata_size == 0:
2077 return {}
2082 return {}
2078
2083
2079 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2084 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2080 comp = self.index[rev][11]
2085 comp = self.index[rev][11]
2081 if comp == COMP_MODE_PLAIN:
2086 if comp == COMP_MODE_PLAIN:
2082 segment = comp_segment
2087 segment = comp_segment
2083 elif comp == COMP_MODE_DEFAULT:
2088 elif comp == COMP_MODE_DEFAULT:
2084 segment = self._decompressor(comp_segment)
2089 segment = self._decompressor(comp_segment)
2085 elif comp == COMP_MODE_INLINE:
2090 elif comp == COMP_MODE_INLINE:
2086 segment = self.decompress(comp_segment)
2091 segment = self.decompress(comp_segment)
2087 else:
2092 else:
2088 msg = 'unknown compression mode %d'
2093 msg = 'unknown compression mode %d'
2089 msg %= comp
2094 msg %= comp
2090 raise error.RevlogError(msg)
2095 raise error.RevlogError(msg)
2091
2096
2092 sidedata = sidedatautil.deserialize_sidedata(segment)
2097 sidedata = sidedatautil.deserialize_sidedata(segment)
2093 return sidedata
2098 return sidedata
2094
2099
2095 def rawdata(self, nodeorrev, _df=None):
2100 def rawdata(self, nodeorrev, _df=None):
2096 """return an uncompressed raw data of a given node or revision number.
2101 """return an uncompressed raw data of a given node or revision number.
2097
2102
2098 _df - an existing file handle to read from. (internal-only)
2103 _df - an existing file handle to read from. (internal-only)
2099 """
2104 """
2100 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2105 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2101
2106
2102 def hash(self, text, p1, p2):
2107 def hash(self, text, p1, p2):
2103 """Compute a node hash.
2108 """Compute a node hash.
2104
2109
2105 Available as a function so that subclasses can replace the hash
2110 Available as a function so that subclasses can replace the hash
2106 as needed.
2111 as needed.
2107 """
2112 """
2108 return storageutil.hashrevisionsha1(text, p1, p2)
2113 return storageutil.hashrevisionsha1(text, p1, p2)
2109
2114
2110 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2115 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2111 """Check node hash integrity.
2116 """Check node hash integrity.
2112
2117
2113 Available as a function so that subclasses can extend hash mismatch
2118 Available as a function so that subclasses can extend hash mismatch
2114 behaviors as needed.
2119 behaviors as needed.
2115 """
2120 """
2116 try:
2121 try:
2117 if p1 is None and p2 is None:
2122 if p1 is None and p2 is None:
2118 p1, p2 = self.parents(node)
2123 p1, p2 = self.parents(node)
2119 if node != self.hash(text, p1, p2):
2124 if node != self.hash(text, p1, p2):
2120 # Clear the revision cache on hash failure. The revision cache
2125 # Clear the revision cache on hash failure. The revision cache
2121 # only stores the raw revision and clearing the cache does have
2126 # only stores the raw revision and clearing the cache does have
2122 # the side-effect that we won't have a cache hit when the raw
2127 # the side-effect that we won't have a cache hit when the raw
2123 # revision data is accessed. But this case should be rare and
2128 # revision data is accessed. But this case should be rare and
2124 # it is extra work to teach the cache about the hash
2129 # it is extra work to teach the cache about the hash
2125 # verification state.
2130 # verification state.
2126 if self._revisioncache and self._revisioncache[0] == node:
2131 if self._revisioncache and self._revisioncache[0] == node:
2127 self._revisioncache = None
2132 self._revisioncache = None
2128
2133
2129 revornode = rev
2134 revornode = rev
2130 if revornode is None:
2135 if revornode is None:
2131 revornode = templatefilters.short(hex(node))
2136 revornode = templatefilters.short(hex(node))
2132 raise error.RevlogError(
2137 raise error.RevlogError(
2133 _(b"integrity check failed on %s:%s")
2138 _(b"integrity check failed on %s:%s")
2134 % (self.display_id, pycompat.bytestr(revornode))
2139 % (self.display_id, pycompat.bytestr(revornode))
2135 )
2140 )
2136 except error.RevlogError:
2141 except error.RevlogError:
2137 if self._censorable and storageutil.iscensoredtext(text):
2142 if self._censorable and storageutil.iscensoredtext(text):
2138 raise error.CensoredNodeError(self.display_id, node, text)
2143 raise error.CensoredNodeError(self.display_id, node, text)
2139 raise
2144 raise
2140
2145
2141 def _enforceinlinesize(self, tr):
2146 def _enforceinlinesize(self, tr):
2142 """Check if the revlog is too big for inline and convert if so.
2147 """Check if the revlog is too big for inline and convert if so.
2143
2148
2144 This should be called after revisions are added to the revlog. If the
2149 This should be called after revisions are added to the revlog. If the
2145 revlog has grown too large to be an inline revlog, it will convert it
2150 revlog has grown too large to be an inline revlog, it will convert it
2146 to use multiple index and data files.
2151 to use multiple index and data files.
2147 """
2152 """
2148 tiprev = len(self) - 1
2153 tiprev = len(self) - 1
2149 total_size = self.start(tiprev) + self.length(tiprev)
2154 total_size = self.start(tiprev) + self.length(tiprev)
2150 if not self._inline or total_size < _maxinline:
2155 if not self._inline or total_size < _maxinline:
2151 return
2156 return
2152
2157
2153 troffset = tr.findoffset(self._indexfile)
2158 troffset = tr.findoffset(self._indexfile)
2154 if troffset is None:
2159 if troffset is None:
2155 raise error.RevlogError(
2160 raise error.RevlogError(
2156 _(b"%s not found in the transaction") % self._indexfile
2161 _(b"%s not found in the transaction") % self._indexfile
2157 )
2162 )
2158 trindex = 0
2163 trindex = 0
2159 tr.add(self._datafile, 0)
2164 tr.add(self._datafile, 0)
2160
2165
2161 existing_handles = False
2166 existing_handles = False
2162 if self._writinghandles is not None:
2167 if self._writinghandles is not None:
2163 existing_handles = True
2168 existing_handles = True
2164 fp = self._writinghandles[0]
2169 fp = self._writinghandles[0]
2165 fp.flush()
2170 fp.flush()
2166 fp.close()
2171 fp.close()
2167 # We can't use the cached file handle after close(). So prevent
2172 # We can't use the cached file handle after close(). So prevent
2168 # its usage.
2173 # its usage.
2169 self._writinghandles = None
2174 self._writinghandles = None
2170
2175
2171 new_dfh = self._datafp(b'w+')
2176 new_dfh = self._datafp(b'w+')
2172 new_dfh.truncate(0) # drop any potentially existing data
2177 new_dfh.truncate(0) # drop any potentially existing data
2173 try:
2178 try:
2174 with self._indexfp() as read_ifh:
2179 with self._indexfp() as read_ifh:
2175 for r in self:
2180 for r in self:
2176 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2181 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2177 if troffset <= self.start(r) + r * self.index.entry_size:
2182 if troffset <= self.start(r) + r * self.index.entry_size:
2178 trindex = r
2183 trindex = r
2179 new_dfh.flush()
2184 new_dfh.flush()
2180
2185
2181 with self.__index_new_fp() as fp:
2186 with self.__index_new_fp() as fp:
2182 self._format_flags &= ~FLAG_INLINE_DATA
2187 self._format_flags &= ~FLAG_INLINE_DATA
2183 self._inline = False
2188 self._inline = False
2184 for i in self:
2189 for i in self:
2185 e = self.index.entry_binary(i)
2190 e = self.index.entry_binary(i)
2186 if i == 0 and self._docket is None:
2191 if i == 0 and self._docket is None:
2187 header = self._format_flags | self._format_version
2192 header = self._format_flags | self._format_version
2188 header = self.index.pack_header(header)
2193 header = self.index.pack_header(header)
2189 e = header + e
2194 e = header + e
2190 fp.write(e)
2195 fp.write(e)
2191 if self._docket is not None:
2196 if self._docket is not None:
2192 self._docket.index_end = fp.tell()
2197 self._docket.index_end = fp.tell()
2193
2198
2194 # There is a small transactional race here. If the rename of
2199 # There is a small transactional race here. If the rename of
2195 # the index fails, we should remove the datafile. It is more
2200 # the index fails, we should remove the datafile. It is more
2196 # important to ensure that the data file is not truncated
2201 # important to ensure that the data file is not truncated
2197 # when the index is replaced as otherwise data is lost.
2202 # when the index is replaced as otherwise data is lost.
2198 tr.replace(self._datafile, self.start(trindex))
2203 tr.replace(self._datafile, self.start(trindex))
2199
2204
2200 # the temp file replace the real index when we exit the context
2205 # the temp file replace the real index when we exit the context
2201 # manager
2206 # manager
2202
2207
2203 tr.replace(self._indexfile, trindex * self.index.entry_size)
2208 tr.replace(self._indexfile, trindex * self.index.entry_size)
2204 nodemaputil.setup_persistent_nodemap(tr, self)
2209 nodemaputil.setup_persistent_nodemap(tr, self)
2205 self._chunkclear()
2210 self._chunkclear()
2206
2211
2207 if existing_handles:
2212 if existing_handles:
2208 # switched from inline to conventional reopen the index
2213 # switched from inline to conventional reopen the index
2209 ifh = self.__index_write_fp()
2214 ifh = self.__index_write_fp()
2210 self._writinghandles = (ifh, new_dfh)
2215 self._writinghandles = (ifh, new_dfh)
2211 new_dfh = None
2216 new_dfh = None
2212 finally:
2217 finally:
2213 if new_dfh is not None:
2218 if new_dfh is not None:
2214 new_dfh.close()
2219 new_dfh.close()
2215
2220
2216 def _nodeduplicatecallback(self, transaction, node):
2221 def _nodeduplicatecallback(self, transaction, node):
2217 """called when trying to add a node already stored."""
2222 """called when trying to add a node already stored."""
2218
2223
2219 @contextlib.contextmanager
2224 @contextlib.contextmanager
2220 def _writing(self, transaction):
2225 def _writing(self, transaction):
2221 if self._trypending:
2226 if self._trypending:
2222 msg = b'try to write in a `trypending` revlog: %s'
2227 msg = b'try to write in a `trypending` revlog: %s'
2223 msg %= self.display_id
2228 msg %= self.display_id
2224 raise error.ProgrammingError(msg)
2229 raise error.ProgrammingError(msg)
2225 if self._writinghandles is not None:
2230 if self._writinghandles is not None:
2226 yield
2231 yield
2227 else:
2232 else:
2228 r = len(self)
2233 r = len(self)
2229 dsize = 0
2234 dsize = 0
2230 if r:
2235 if r:
2231 dsize = self.end(r - 1)
2236 dsize = self.end(r - 1)
2232 dfh = None
2237 dfh = None
2233 if not self._inline:
2238 if not self._inline:
2234 try:
2239 try:
2235 dfh = self._datafp(b"r+")
2240 dfh = self._datafp(b"r+")
2236 if self._docket is None:
2241 if self._docket is None:
2237 dfh.seek(0, os.SEEK_END)
2242 dfh.seek(0, os.SEEK_END)
2238 else:
2243 else:
2239 dfh.seek(self._docket.data_end, os.SEEK_SET)
2244 dfh.seek(self._docket.data_end, os.SEEK_SET)
2240 except IOError as inst:
2245 except IOError as inst:
2241 if inst.errno != errno.ENOENT:
2246 if inst.errno != errno.ENOENT:
2242 raise
2247 raise
2243 dfh = self._datafp(b"w+")
2248 dfh = self._datafp(b"w+")
2244 transaction.add(self._datafile, dsize)
2249 transaction.add(self._datafile, dsize)
2245 try:
2250 try:
2246 isize = r * self.index.entry_size
2251 isize = r * self.index.entry_size
2247 ifh = self.__index_write_fp()
2252 ifh = self.__index_write_fp()
2248 if self._inline:
2253 if self._inline:
2249 transaction.add(self._indexfile, dsize + isize)
2254 transaction.add(self._indexfile, dsize + isize)
2250 else:
2255 else:
2251 transaction.add(self._indexfile, isize)
2256 transaction.add(self._indexfile, isize)
2252 try:
2257 try:
2253 self._writinghandles = (ifh, dfh)
2258 self._writinghandles = (ifh, dfh)
2254 try:
2259 try:
2255 yield
2260 yield
2256 if self._docket is not None:
2261 if self._docket is not None:
2257 self._write_docket(transaction)
2262 self._write_docket(transaction)
2258 finally:
2263 finally:
2259 self._writinghandles = None
2264 self._writinghandles = None
2260 finally:
2265 finally:
2261 ifh.close()
2266 ifh.close()
2262 finally:
2267 finally:
2263 if dfh is not None:
2268 if dfh is not None:
2264 dfh.close()
2269 dfh.close()
2265
2270
2266 def _write_docket(self, transaction):
2271 def _write_docket(self, transaction):
2267 """write the current docket on disk
2272 """write the current docket on disk
2268
2273
2269 Exist as a method to help changelog to implement transaction logic
2274 Exist as a method to help changelog to implement transaction logic
2270
2275
2271 We could also imagine using the same transaction logic for all revlog
2276 We could also imagine using the same transaction logic for all revlog
2272 since docket are cheap."""
2277 since docket are cheap."""
2273 self._docket.write(transaction)
2278 self._docket.write(transaction)
2274
2279
2275 def addrevision(
2280 def addrevision(
2276 self,
2281 self,
2277 text,
2282 text,
2278 transaction,
2283 transaction,
2279 link,
2284 link,
2280 p1,
2285 p1,
2281 p2,
2286 p2,
2282 cachedelta=None,
2287 cachedelta=None,
2283 node=None,
2288 node=None,
2284 flags=REVIDX_DEFAULT_FLAGS,
2289 flags=REVIDX_DEFAULT_FLAGS,
2285 deltacomputer=None,
2290 deltacomputer=None,
2286 sidedata=None,
2291 sidedata=None,
2287 ):
2292 ):
2288 """add a revision to the log
2293 """add a revision to the log
2289
2294
2290 text - the revision data to add
2295 text - the revision data to add
2291 transaction - the transaction object used for rollback
2296 transaction - the transaction object used for rollback
2292 link - the linkrev data to add
2297 link - the linkrev data to add
2293 p1, p2 - the parent nodeids of the revision
2298 p1, p2 - the parent nodeids of the revision
2294 cachedelta - an optional precomputed delta
2299 cachedelta - an optional precomputed delta
2295 node - nodeid of revision; typically node is not specified, and it is
2300 node - nodeid of revision; typically node is not specified, and it is
2296 computed by default as hash(text, p1, p2), however subclasses might
2301 computed by default as hash(text, p1, p2), however subclasses might
2297 use different hashing method (and override checkhash() in such case)
2302 use different hashing method (and override checkhash() in such case)
2298 flags - the known flags to set on the revision
2303 flags - the known flags to set on the revision
2299 deltacomputer - an optional deltacomputer instance shared between
2304 deltacomputer - an optional deltacomputer instance shared between
2300 multiple calls
2305 multiple calls
2301 """
2306 """
2302 if link == nullrev:
2307 if link == nullrev:
2303 raise error.RevlogError(
2308 raise error.RevlogError(
2304 _(b"attempted to add linkrev -1 to %s") % self.display_id
2309 _(b"attempted to add linkrev -1 to %s") % self.display_id
2305 )
2310 )
2306
2311
2307 if sidedata is None:
2312 if sidedata is None:
2308 sidedata = {}
2313 sidedata = {}
2309 elif sidedata and not self.hassidedata:
2314 elif sidedata and not self.hassidedata:
2310 raise error.ProgrammingError(
2315 raise error.ProgrammingError(
2311 _(b"trying to add sidedata to a revlog who don't support them")
2316 _(b"trying to add sidedata to a revlog who don't support them")
2312 )
2317 )
2313
2318
2314 if flags:
2319 if flags:
2315 node = node or self.hash(text, p1, p2)
2320 node = node or self.hash(text, p1, p2)
2316
2321
2317 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2322 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2318
2323
2319 # If the flag processor modifies the revision data, ignore any provided
2324 # If the flag processor modifies the revision data, ignore any provided
2320 # cachedelta.
2325 # cachedelta.
2321 if rawtext != text:
2326 if rawtext != text:
2322 cachedelta = None
2327 cachedelta = None
2323
2328
2324 if len(rawtext) > _maxentrysize:
2329 if len(rawtext) > _maxentrysize:
2325 raise error.RevlogError(
2330 raise error.RevlogError(
2326 _(
2331 _(
2327 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2332 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2328 )
2333 )
2329 % (self.display_id, len(rawtext))
2334 % (self.display_id, len(rawtext))
2330 )
2335 )
2331
2336
2332 node = node or self.hash(rawtext, p1, p2)
2337 node = node or self.hash(rawtext, p1, p2)
2333 rev = self.index.get_rev(node)
2338 rev = self.index.get_rev(node)
2334 if rev is not None:
2339 if rev is not None:
2335 return rev
2340 return rev
2336
2341
2337 if validatehash:
2342 if validatehash:
2338 self.checkhash(rawtext, node, p1=p1, p2=p2)
2343 self.checkhash(rawtext, node, p1=p1, p2=p2)
2339
2344
2340 return self.addrawrevision(
2345 return self.addrawrevision(
2341 rawtext,
2346 rawtext,
2342 transaction,
2347 transaction,
2343 link,
2348 link,
2344 p1,
2349 p1,
2345 p2,
2350 p2,
2346 node,
2351 node,
2347 flags,
2352 flags,
2348 cachedelta=cachedelta,
2353 cachedelta=cachedelta,
2349 deltacomputer=deltacomputer,
2354 deltacomputer=deltacomputer,
2350 sidedata=sidedata,
2355 sidedata=sidedata,
2351 )
2356 )
2352
2357
2353 def addrawrevision(
2358 def addrawrevision(
2354 self,
2359 self,
2355 rawtext,
2360 rawtext,
2356 transaction,
2361 transaction,
2357 link,
2362 link,
2358 p1,
2363 p1,
2359 p2,
2364 p2,
2360 node,
2365 node,
2361 flags,
2366 flags,
2362 cachedelta=None,
2367 cachedelta=None,
2363 deltacomputer=None,
2368 deltacomputer=None,
2364 sidedata=None,
2369 sidedata=None,
2365 ):
2370 ):
2366 """add a raw revision with known flags, node and parents
2371 """add a raw revision with known flags, node and parents
2367 useful when reusing a revision not stored in this revlog (ex: received
2372 useful when reusing a revision not stored in this revlog (ex: received
2368 over wire, or read from an external bundle).
2373 over wire, or read from an external bundle).
2369 """
2374 """
2370 with self._writing(transaction):
2375 with self._writing(transaction):
2371 return self._addrevision(
2376 return self._addrevision(
2372 node,
2377 node,
2373 rawtext,
2378 rawtext,
2374 transaction,
2379 transaction,
2375 link,
2380 link,
2376 p1,
2381 p1,
2377 p2,
2382 p2,
2378 flags,
2383 flags,
2379 cachedelta,
2384 cachedelta,
2380 deltacomputer=deltacomputer,
2385 deltacomputer=deltacomputer,
2381 sidedata=sidedata,
2386 sidedata=sidedata,
2382 )
2387 )
2383
2388
2384 def compress(self, data):
2389 def compress(self, data):
2385 """Generate a possibly-compressed representation of data."""
2390 """Generate a possibly-compressed representation of data."""
2386 if not data:
2391 if not data:
2387 return b'', data
2392 return b'', data
2388
2393
2389 compressed = self._compressor.compress(data)
2394 compressed = self._compressor.compress(data)
2390
2395
2391 if compressed:
2396 if compressed:
2392 # The revlog compressor added the header in the returned data.
2397 # The revlog compressor added the header in the returned data.
2393 return b'', compressed
2398 return b'', compressed
2394
2399
2395 if data[0:1] == b'\0':
2400 if data[0:1] == b'\0':
2396 return b'', data
2401 return b'', data
2397 return b'u', data
2402 return b'u', data
2398
2403
2399 def decompress(self, data):
2404 def decompress(self, data):
2400 """Decompress a revlog chunk.
2405 """Decompress a revlog chunk.
2401
2406
2402 The chunk is expected to begin with a header identifying the
2407 The chunk is expected to begin with a header identifying the
2403 format type so it can be routed to an appropriate decompressor.
2408 format type so it can be routed to an appropriate decompressor.
2404 """
2409 """
2405 if not data:
2410 if not data:
2406 return data
2411 return data
2407
2412
2408 # Revlogs are read much more frequently than they are written and many
2413 # Revlogs are read much more frequently than they are written and many
2409 # chunks only take microseconds to decompress, so performance is
2414 # chunks only take microseconds to decompress, so performance is
2410 # important here.
2415 # important here.
2411 #
2416 #
2412 # We can make a few assumptions about revlogs:
2417 # We can make a few assumptions about revlogs:
2413 #
2418 #
2414 # 1) the majority of chunks will be compressed (as opposed to inline
2419 # 1) the majority of chunks will be compressed (as opposed to inline
2415 # raw data).
2420 # raw data).
2416 # 2) decompressing *any* data will likely by at least 10x slower than
2421 # 2) decompressing *any* data will likely by at least 10x slower than
2417 # returning raw inline data.
2422 # returning raw inline data.
2418 # 3) we want to prioritize common and officially supported compression
2423 # 3) we want to prioritize common and officially supported compression
2419 # engines
2424 # engines
2420 #
2425 #
2421 # It follows that we want to optimize for "decompress compressed data
2426 # It follows that we want to optimize for "decompress compressed data
2422 # when encoded with common and officially supported compression engines"
2427 # when encoded with common and officially supported compression engines"
2423 # case over "raw data" and "data encoded by less common or non-official
2428 # case over "raw data" and "data encoded by less common or non-official
2424 # compression engines." That is why we have the inline lookup first
2429 # compression engines." That is why we have the inline lookup first
2425 # followed by the compengines lookup.
2430 # followed by the compengines lookup.
2426 #
2431 #
2427 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2432 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2428 # compressed chunks. And this matters for changelog and manifest reads.
2433 # compressed chunks. And this matters for changelog and manifest reads.
2429 t = data[0:1]
2434 t = data[0:1]
2430
2435
2431 if t == b'x':
2436 if t == b'x':
2432 try:
2437 try:
2433 return _zlibdecompress(data)
2438 return _zlibdecompress(data)
2434 except zlib.error as e:
2439 except zlib.error as e:
2435 raise error.RevlogError(
2440 raise error.RevlogError(
2436 _(b'revlog decompress error: %s')
2441 _(b'revlog decompress error: %s')
2437 % stringutil.forcebytestr(e)
2442 % stringutil.forcebytestr(e)
2438 )
2443 )
2439 # '\0' is more common than 'u' so it goes first.
2444 # '\0' is more common than 'u' so it goes first.
2440 elif t == b'\0':
2445 elif t == b'\0':
2441 return data
2446 return data
2442 elif t == b'u':
2447 elif t == b'u':
2443 return util.buffer(data, 1)
2448 return util.buffer(data, 1)
2444
2449
2445 compressor = self._get_decompressor(t)
2450 compressor = self._get_decompressor(t)
2446
2451
2447 return compressor.decompress(data)
2452 return compressor.decompress(data)
2448
2453
2449 def _addrevision(
2454 def _addrevision(
2450 self,
2455 self,
2451 node,
2456 node,
2452 rawtext,
2457 rawtext,
2453 transaction,
2458 transaction,
2454 link,
2459 link,
2455 p1,
2460 p1,
2456 p2,
2461 p2,
2457 flags,
2462 flags,
2458 cachedelta,
2463 cachedelta,
2459 alwayscache=False,
2464 alwayscache=False,
2460 deltacomputer=None,
2465 deltacomputer=None,
2461 sidedata=None,
2466 sidedata=None,
2462 ):
2467 ):
2463 """internal function to add revisions to the log
2468 """internal function to add revisions to the log
2464
2469
2465 see addrevision for argument descriptions.
2470 see addrevision for argument descriptions.
2466
2471
2467 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2472 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2468
2473
2469 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2474 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2470 be used.
2475 be used.
2471
2476
2472 invariants:
2477 invariants:
2473 - rawtext is optional (can be None); if not set, cachedelta must be set.
2478 - rawtext is optional (can be None); if not set, cachedelta must be set.
2474 if both are set, they must correspond to each other.
2479 if both are set, they must correspond to each other.
2475 """
2480 """
2476 if node == self.nullid:
2481 if node == self.nullid:
2477 raise error.RevlogError(
2482 raise error.RevlogError(
2478 _(b"%s: attempt to add null revision") % self.display_id
2483 _(b"%s: attempt to add null revision") % self.display_id
2479 )
2484 )
2480 if (
2485 if (
2481 node == self.nodeconstants.wdirid
2486 node == self.nodeconstants.wdirid
2482 or node in self.nodeconstants.wdirfilenodeids
2487 or node in self.nodeconstants.wdirfilenodeids
2483 ):
2488 ):
2484 raise error.RevlogError(
2489 raise error.RevlogError(
2485 _(b"%s: attempt to add wdir revision") % self.display_id
2490 _(b"%s: attempt to add wdir revision") % self.display_id
2486 )
2491 )
2487 if self._writinghandles is None:
2492 if self._writinghandles is None:
2488 msg = b'adding revision outside `revlog._writing` context'
2493 msg = b'adding revision outside `revlog._writing` context'
2489 raise error.ProgrammingError(msg)
2494 raise error.ProgrammingError(msg)
2490
2495
2491 if self._inline:
2496 if self._inline:
2492 fh = self._writinghandles[0]
2497 fh = self._writinghandles[0]
2493 else:
2498 else:
2494 fh = self._writinghandles[1]
2499 fh = self._writinghandles[1]
2495
2500
2496 btext = [rawtext]
2501 btext = [rawtext]
2497
2502
2498 curr = len(self)
2503 curr = len(self)
2499 prev = curr - 1
2504 prev = curr - 1
2500
2505
2501 offset = self._get_data_offset(prev)
2506 offset = self._get_data_offset(prev)
2502
2507
2503 if self._concurrencychecker:
2508 if self._concurrencychecker:
2504 ifh, dfh = self._writinghandles
2509 ifh, dfh = self._writinghandles
2505 if self._inline:
2510 if self._inline:
2506 # offset is "as if" it were in the .d file, so we need to add on
2511 # offset is "as if" it were in the .d file, so we need to add on
2507 # the size of the entry metadata.
2512 # the size of the entry metadata.
2508 self._concurrencychecker(
2513 self._concurrencychecker(
2509 ifh, self._indexfile, offset + curr * self.index.entry_size
2514 ifh, self._indexfile, offset + curr * self.index.entry_size
2510 )
2515 )
2511 else:
2516 else:
2512 # Entries in the .i are a consistent size.
2517 # Entries in the .i are a consistent size.
2513 self._concurrencychecker(
2518 self._concurrencychecker(
2514 ifh, self._indexfile, curr * self.index.entry_size
2519 ifh, self._indexfile, curr * self.index.entry_size
2515 )
2520 )
2516 self._concurrencychecker(dfh, self._datafile, offset)
2521 self._concurrencychecker(dfh, self._datafile, offset)
2517
2522
2518 p1r, p2r = self.rev(p1), self.rev(p2)
2523 p1r, p2r = self.rev(p1), self.rev(p2)
2519
2524
2520 # full versions are inserted when the needed deltas
2525 # full versions are inserted when the needed deltas
2521 # become comparable to the uncompressed text
2526 # become comparable to the uncompressed text
2522 if rawtext is None:
2527 if rawtext is None:
2523 # need rawtext size, before changed by flag processors, which is
2528 # need rawtext size, before changed by flag processors, which is
2524 # the non-raw size. use revlog explicitly to avoid filelog's extra
2529 # the non-raw size. use revlog explicitly to avoid filelog's extra
2525 # logic that might remove metadata size.
2530 # logic that might remove metadata size.
2526 textlen = mdiff.patchedsize(
2531 textlen = mdiff.patchedsize(
2527 revlog.size(self, cachedelta[0]), cachedelta[1]
2532 revlog.size(self, cachedelta[0]), cachedelta[1]
2528 )
2533 )
2529 else:
2534 else:
2530 textlen = len(rawtext)
2535 textlen = len(rawtext)
2531
2536
2532 if deltacomputer is None:
2537 if deltacomputer is None:
2533 deltacomputer = deltautil.deltacomputer(self)
2538 deltacomputer = deltautil.deltacomputer(self)
2534
2539
2535 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2540 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2536
2541
2537 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2542 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2538
2543
2539 compression_mode = COMP_MODE_INLINE
2544 compression_mode = COMP_MODE_INLINE
2540 if self._docket is not None:
2545 if self._docket is not None:
2541 h, d = deltainfo.data
2546 h, d = deltainfo.data
2542 if not h and not d:
2547 if not h and not d:
2543 # not data to store at all... declare them uncompressed
2548 # not data to store at all... declare them uncompressed
2544 compression_mode = COMP_MODE_PLAIN
2549 compression_mode = COMP_MODE_PLAIN
2545 elif not h:
2550 elif not h:
2546 t = d[0:1]
2551 t = d[0:1]
2547 if t == b'\0':
2552 if t == b'\0':
2548 compression_mode = COMP_MODE_PLAIN
2553 compression_mode = COMP_MODE_PLAIN
2549 elif t == self._docket.default_compression_header:
2554 elif t == self._docket.default_compression_header:
2550 compression_mode = COMP_MODE_DEFAULT
2555 compression_mode = COMP_MODE_DEFAULT
2551 elif h == b'u':
2556 elif h == b'u':
2552 # we have a more efficient way to declare uncompressed
2557 # we have a more efficient way to declare uncompressed
2553 h = b''
2558 h = b''
2554 compression_mode = COMP_MODE_PLAIN
2559 compression_mode = COMP_MODE_PLAIN
2555 deltainfo = deltautil.drop_u_compression(deltainfo)
2560 deltainfo = deltautil.drop_u_compression(deltainfo)
2556
2561
2557 sidedata_compression_mode = COMP_MODE_INLINE
2562 sidedata_compression_mode = COMP_MODE_INLINE
2558 if sidedata and self.hassidedata:
2563 if sidedata and self.hassidedata:
2559 sidedata_compression_mode = COMP_MODE_PLAIN
2564 sidedata_compression_mode = COMP_MODE_PLAIN
2560 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2565 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2561 sidedata_offset = offset + deltainfo.deltalen
2566 sidedata_offset = offset + deltainfo.deltalen
2562 h, comp_sidedata = self.compress(serialized_sidedata)
2567 h, comp_sidedata = self.compress(serialized_sidedata)
2563 if (
2568 if (
2564 h != b'u'
2569 h != b'u'
2565 and comp_sidedata[0:1] != b'\0'
2570 and comp_sidedata[0:1] != b'\0'
2566 and len(comp_sidedata) < len(serialized_sidedata)
2571 and len(comp_sidedata) < len(serialized_sidedata)
2567 ):
2572 ):
2568 assert not h
2573 assert not h
2569 if (
2574 if (
2570 comp_sidedata[0:1]
2575 comp_sidedata[0:1]
2571 == self._docket.default_compression_header
2576 == self._docket.default_compression_header
2572 ):
2577 ):
2573 sidedata_compression_mode = COMP_MODE_DEFAULT
2578 sidedata_compression_mode = COMP_MODE_DEFAULT
2574 serialized_sidedata = comp_sidedata
2579 serialized_sidedata = comp_sidedata
2575 else:
2580 else:
2576 sidedata_compression_mode = COMP_MODE_INLINE
2581 sidedata_compression_mode = COMP_MODE_INLINE
2577 serialized_sidedata = comp_sidedata
2582 serialized_sidedata = comp_sidedata
2578 else:
2583 else:
2579 serialized_sidedata = b""
2584 serialized_sidedata = b""
2580 # Don't store the offset if the sidedata is empty, that way
2585 # Don't store the offset if the sidedata is empty, that way
2581 # we can easily detect empty sidedata and they will be no different
2586 # we can easily detect empty sidedata and they will be no different
2582 # than ones we manually add.
2587 # than ones we manually add.
2583 sidedata_offset = 0
2588 sidedata_offset = 0
2584
2589
2585 e = (
2590 e = (
2586 offset_type(offset, flags),
2591 offset_type(offset, flags),
2587 deltainfo.deltalen,
2592 deltainfo.deltalen,
2588 textlen,
2593 textlen,
2589 deltainfo.base,
2594 deltainfo.base,
2590 link,
2595 link,
2591 p1r,
2596 p1r,
2592 p2r,
2597 p2r,
2593 node,
2598 node,
2594 sidedata_offset,
2599 sidedata_offset,
2595 len(serialized_sidedata),
2600 len(serialized_sidedata),
2596 compression_mode,
2601 compression_mode,
2597 sidedata_compression_mode,
2602 sidedata_compression_mode,
2598 )
2603 )
2599
2604
2600 self.index.append(e)
2605 self.index.append(e)
2601 entry = self.index.entry_binary(curr)
2606 entry = self.index.entry_binary(curr)
2602 if curr == 0 and self._docket is None:
2607 if curr == 0 and self._docket is None:
2603 header = self._format_flags | self._format_version
2608 header = self._format_flags | self._format_version
2604 header = self.index.pack_header(header)
2609 header = self.index.pack_header(header)
2605 entry = header + entry
2610 entry = header + entry
2606 self._writeentry(
2611 self._writeentry(
2607 transaction,
2612 transaction,
2608 entry,
2613 entry,
2609 deltainfo.data,
2614 deltainfo.data,
2610 link,
2615 link,
2611 offset,
2616 offset,
2612 serialized_sidedata,
2617 serialized_sidedata,
2613 )
2618 )
2614
2619
2615 rawtext = btext[0]
2620 rawtext = btext[0]
2616
2621
2617 if alwayscache and rawtext is None:
2622 if alwayscache and rawtext is None:
2618 rawtext = deltacomputer.buildtext(revinfo, fh)
2623 rawtext = deltacomputer.buildtext(revinfo, fh)
2619
2624
2620 if type(rawtext) == bytes: # only accept immutable objects
2625 if type(rawtext) == bytes: # only accept immutable objects
2621 self._revisioncache = (node, curr, rawtext)
2626 self._revisioncache = (node, curr, rawtext)
2622 self._chainbasecache[curr] = deltainfo.chainbase
2627 self._chainbasecache[curr] = deltainfo.chainbase
2623 return curr
2628 return curr
2624
2629
2625 def _get_data_offset(self, prev):
2630 def _get_data_offset(self, prev):
2626 """Returns the current offset in the (in-transaction) data file.
2631 """Returns the current offset in the (in-transaction) data file.
2627 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2632 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2628 file to store that information: since sidedata can be rewritten to the
2633 file to store that information: since sidedata can be rewritten to the
2629 end of the data file within a transaction, you can have cases where, for
2634 end of the data file within a transaction, you can have cases where, for
2630 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2635 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2631 to `n - 1`'s sidedata being written after `n`'s data.
2636 to `n - 1`'s sidedata being written after `n`'s data.
2632
2637
2633 TODO cache this in a docket file before getting out of experimental."""
2638 TODO cache this in a docket file before getting out of experimental."""
2634 if self._docket is None:
2639 if self._docket is None:
2635 return self.end(prev)
2640 return self.end(prev)
2636 else:
2641 else:
2637 return self._docket.data_end
2642 return self._docket.data_end
2638
2643
2639 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2644 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2640 # Files opened in a+ mode have inconsistent behavior on various
2645 # Files opened in a+ mode have inconsistent behavior on various
2641 # platforms. Windows requires that a file positioning call be made
2646 # platforms. Windows requires that a file positioning call be made
2642 # when the file handle transitions between reads and writes. See
2647 # when the file handle transitions between reads and writes. See
2643 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2648 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2644 # platforms, Python or the platform itself can be buggy. Some versions
2649 # platforms, Python or the platform itself can be buggy. Some versions
2645 # of Solaris have been observed to not append at the end of the file
2650 # of Solaris have been observed to not append at the end of the file
2646 # if the file was seeked to before the end. See issue4943 for more.
2651 # if the file was seeked to before the end. See issue4943 for more.
2647 #
2652 #
2648 # We work around this issue by inserting a seek() before writing.
2653 # We work around this issue by inserting a seek() before writing.
2649 # Note: This is likely not necessary on Python 3. However, because
2654 # Note: This is likely not necessary on Python 3. However, because
2650 # the file handle is reused for reads and may be seeked there, we need
2655 # the file handle is reused for reads and may be seeked there, we need
2651 # to be careful before changing this.
2656 # to be careful before changing this.
2652 if self._writinghandles is None:
2657 if self._writinghandles is None:
2653 msg = b'adding revision outside `revlog._writing` context'
2658 msg = b'adding revision outside `revlog._writing` context'
2654 raise error.ProgrammingError(msg)
2659 raise error.ProgrammingError(msg)
2655 ifh, dfh = self._writinghandles
2660 ifh, dfh = self._writinghandles
2656 if self._docket is None:
2661 if self._docket is None:
2657 ifh.seek(0, os.SEEK_END)
2662 ifh.seek(0, os.SEEK_END)
2658 else:
2663 else:
2659 ifh.seek(self._docket.index_end, os.SEEK_SET)
2664 ifh.seek(self._docket.index_end, os.SEEK_SET)
2660 if dfh:
2665 if dfh:
2661 if self._docket is None:
2666 if self._docket is None:
2662 dfh.seek(0, os.SEEK_END)
2667 dfh.seek(0, os.SEEK_END)
2663 else:
2668 else:
2664 dfh.seek(self._docket.data_end, os.SEEK_SET)
2669 dfh.seek(self._docket.data_end, os.SEEK_SET)
2665
2670
2666 curr = len(self) - 1
2671 curr = len(self) - 1
2667 if not self._inline:
2672 if not self._inline:
2668 transaction.add(self._datafile, offset)
2673 transaction.add(self._datafile, offset)
2669 transaction.add(self._indexfile, curr * len(entry))
2674 transaction.add(self._indexfile, curr * len(entry))
2670 if data[0]:
2675 if data[0]:
2671 dfh.write(data[0])
2676 dfh.write(data[0])
2672 dfh.write(data[1])
2677 dfh.write(data[1])
2673 if sidedata:
2678 if sidedata:
2674 dfh.write(sidedata)
2679 dfh.write(sidedata)
2675 ifh.write(entry)
2680 ifh.write(entry)
2676 else:
2681 else:
2677 offset += curr * self.index.entry_size
2682 offset += curr * self.index.entry_size
2678 transaction.add(self._indexfile, offset)
2683 transaction.add(self._indexfile, offset)
2679 ifh.write(entry)
2684 ifh.write(entry)
2680 ifh.write(data[0])
2685 ifh.write(data[0])
2681 ifh.write(data[1])
2686 ifh.write(data[1])
2682 if sidedata:
2687 if sidedata:
2683 ifh.write(sidedata)
2688 ifh.write(sidedata)
2684 self._enforceinlinesize(transaction)
2689 self._enforceinlinesize(transaction)
2685 if self._docket is not None:
2690 if self._docket is not None:
2686 self._docket.index_end = self._writinghandles[0].tell()
2691 self._docket.index_end = self._writinghandles[0].tell()
2687 self._docket.data_end = self._writinghandles[1].tell()
2692 self._docket.data_end = self._writinghandles[1].tell()
2688
2693
2689 nodemaputil.setup_persistent_nodemap(transaction, self)
2694 nodemaputil.setup_persistent_nodemap(transaction, self)
2690
2695
2691 def addgroup(
2696 def addgroup(
2692 self,
2697 self,
2693 deltas,
2698 deltas,
2694 linkmapper,
2699 linkmapper,
2695 transaction,
2700 transaction,
2696 alwayscache=False,
2701 alwayscache=False,
2697 addrevisioncb=None,
2702 addrevisioncb=None,
2698 duplicaterevisioncb=None,
2703 duplicaterevisioncb=None,
2699 ):
2704 ):
2700 """
2705 """
2701 add a delta group
2706 add a delta group
2702
2707
2703 given a set of deltas, add them to the revision log. the
2708 given a set of deltas, add them to the revision log. the
2704 first delta is against its parent, which should be in our
2709 first delta is against its parent, which should be in our
2705 log, the rest are against the previous delta.
2710 log, the rest are against the previous delta.
2706
2711
2707 If ``addrevisioncb`` is defined, it will be called with arguments of
2712 If ``addrevisioncb`` is defined, it will be called with arguments of
2708 this revlog and the node that was added.
2713 this revlog and the node that was added.
2709 """
2714 """
2710
2715
2711 if self._adding_group:
2716 if self._adding_group:
2712 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2717 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2713
2718
2714 self._adding_group = True
2719 self._adding_group = True
2715 empty = True
2720 empty = True
2716 try:
2721 try:
2717 with self._writing(transaction):
2722 with self._writing(transaction):
2718 deltacomputer = deltautil.deltacomputer(self)
2723 deltacomputer = deltautil.deltacomputer(self)
2719 # loop through our set of deltas
2724 # loop through our set of deltas
2720 for data in deltas:
2725 for data in deltas:
2721 (
2726 (
2722 node,
2727 node,
2723 p1,
2728 p1,
2724 p2,
2729 p2,
2725 linknode,
2730 linknode,
2726 deltabase,
2731 deltabase,
2727 delta,
2732 delta,
2728 flags,
2733 flags,
2729 sidedata,
2734 sidedata,
2730 ) = data
2735 ) = data
2731 link = linkmapper(linknode)
2736 link = linkmapper(linknode)
2732 flags = flags or REVIDX_DEFAULT_FLAGS
2737 flags = flags or REVIDX_DEFAULT_FLAGS
2733
2738
2734 rev = self.index.get_rev(node)
2739 rev = self.index.get_rev(node)
2735 if rev is not None:
2740 if rev is not None:
2736 # this can happen if two branches make the same change
2741 # this can happen if two branches make the same change
2737 self._nodeduplicatecallback(transaction, rev)
2742 self._nodeduplicatecallback(transaction, rev)
2738 if duplicaterevisioncb:
2743 if duplicaterevisioncb:
2739 duplicaterevisioncb(self, rev)
2744 duplicaterevisioncb(self, rev)
2740 empty = False
2745 empty = False
2741 continue
2746 continue
2742
2747
2743 for p in (p1, p2):
2748 for p in (p1, p2):
2744 if not self.index.has_node(p):
2749 if not self.index.has_node(p):
2745 raise error.LookupError(
2750 raise error.LookupError(
2746 p, self.radix, _(b'unknown parent')
2751 p, self.radix, _(b'unknown parent')
2747 )
2752 )
2748
2753
2749 if not self.index.has_node(deltabase):
2754 if not self.index.has_node(deltabase):
2750 raise error.LookupError(
2755 raise error.LookupError(
2751 deltabase, self.display_id, _(b'unknown delta base')
2756 deltabase, self.display_id, _(b'unknown delta base')
2752 )
2757 )
2753
2758
2754 baserev = self.rev(deltabase)
2759 baserev = self.rev(deltabase)
2755
2760
2756 if baserev != nullrev and self.iscensored(baserev):
2761 if baserev != nullrev and self.iscensored(baserev):
2757 # if base is censored, delta must be full replacement in a
2762 # if base is censored, delta must be full replacement in a
2758 # single patch operation
2763 # single patch operation
2759 hlen = struct.calcsize(b">lll")
2764 hlen = struct.calcsize(b">lll")
2760 oldlen = self.rawsize(baserev)
2765 oldlen = self.rawsize(baserev)
2761 newlen = len(delta) - hlen
2766 newlen = len(delta) - hlen
2762 if delta[:hlen] != mdiff.replacediffheader(
2767 if delta[:hlen] != mdiff.replacediffheader(
2763 oldlen, newlen
2768 oldlen, newlen
2764 ):
2769 ):
2765 raise error.CensoredBaseError(
2770 raise error.CensoredBaseError(
2766 self.display_id, self.node(baserev)
2771 self.display_id, self.node(baserev)
2767 )
2772 )
2768
2773
2769 if not flags and self._peek_iscensored(baserev, delta):
2774 if not flags and self._peek_iscensored(baserev, delta):
2770 flags |= REVIDX_ISCENSORED
2775 flags |= REVIDX_ISCENSORED
2771
2776
2772 # We assume consumers of addrevisioncb will want to retrieve
2777 # We assume consumers of addrevisioncb will want to retrieve
2773 # the added revision, which will require a call to
2778 # the added revision, which will require a call to
2774 # revision(). revision() will fast path if there is a cache
2779 # revision(). revision() will fast path if there is a cache
2775 # hit. So, we tell _addrevision() to always cache in this case.
2780 # hit. So, we tell _addrevision() to always cache in this case.
2776 # We're only using addgroup() in the context of changegroup
2781 # We're only using addgroup() in the context of changegroup
2777 # generation so the revision data can always be handled as raw
2782 # generation so the revision data can always be handled as raw
2778 # by the flagprocessor.
2783 # by the flagprocessor.
2779 rev = self._addrevision(
2784 rev = self._addrevision(
2780 node,
2785 node,
2781 None,
2786 None,
2782 transaction,
2787 transaction,
2783 link,
2788 link,
2784 p1,
2789 p1,
2785 p2,
2790 p2,
2786 flags,
2791 flags,
2787 (baserev, delta),
2792 (baserev, delta),
2788 alwayscache=alwayscache,
2793 alwayscache=alwayscache,
2789 deltacomputer=deltacomputer,
2794 deltacomputer=deltacomputer,
2790 sidedata=sidedata,
2795 sidedata=sidedata,
2791 )
2796 )
2792
2797
2793 if addrevisioncb:
2798 if addrevisioncb:
2794 addrevisioncb(self, rev)
2799 addrevisioncb(self, rev)
2795 empty = False
2800 empty = False
2796 finally:
2801 finally:
2797 self._adding_group = False
2802 self._adding_group = False
2798 return not empty
2803 return not empty
2799
2804
2800 def iscensored(self, rev):
2805 def iscensored(self, rev):
2801 """Check if a file revision is censored."""
2806 """Check if a file revision is censored."""
2802 if not self._censorable:
2807 if not self._censorable:
2803 return False
2808 return False
2804
2809
2805 return self.flags(rev) & REVIDX_ISCENSORED
2810 return self.flags(rev) & REVIDX_ISCENSORED
2806
2811
2807 def _peek_iscensored(self, baserev, delta):
2812 def _peek_iscensored(self, baserev, delta):
2808 """Quickly check if a delta produces a censored revision."""
2813 """Quickly check if a delta produces a censored revision."""
2809 if not self._censorable:
2814 if not self._censorable:
2810 return False
2815 return False
2811
2816
2812 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2817 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2813
2818
2814 def getstrippoint(self, minlink):
2819 def getstrippoint(self, minlink):
2815 """find the minimum rev that must be stripped to strip the linkrev
2820 """find the minimum rev that must be stripped to strip the linkrev
2816
2821
2817 Returns a tuple containing the minimum rev and a set of all revs that
2822 Returns a tuple containing the minimum rev and a set of all revs that
2818 have linkrevs that will be broken by this strip.
2823 have linkrevs that will be broken by this strip.
2819 """
2824 """
2820 return storageutil.resolvestripinfo(
2825 return storageutil.resolvestripinfo(
2821 minlink,
2826 minlink,
2822 len(self) - 1,
2827 len(self) - 1,
2823 self.headrevs(),
2828 self.headrevs(),
2824 self.linkrev,
2829 self.linkrev,
2825 self.parentrevs,
2830 self.parentrevs,
2826 )
2831 )
2827
2832
2828 def strip(self, minlink, transaction):
2833 def strip(self, minlink, transaction):
2829 """truncate the revlog on the first revision with a linkrev >= minlink
2834 """truncate the revlog on the first revision with a linkrev >= minlink
2830
2835
2831 This function is called when we're stripping revision minlink and
2836 This function is called when we're stripping revision minlink and
2832 its descendants from the repository.
2837 its descendants from the repository.
2833
2838
2834 We have to remove all revisions with linkrev >= minlink, because
2839 We have to remove all revisions with linkrev >= minlink, because
2835 the equivalent changelog revisions will be renumbered after the
2840 the equivalent changelog revisions will be renumbered after the
2836 strip.
2841 strip.
2837
2842
2838 So we truncate the revlog on the first of these revisions, and
2843 So we truncate the revlog on the first of these revisions, and
2839 trust that the caller has saved the revisions that shouldn't be
2844 trust that the caller has saved the revisions that shouldn't be
2840 removed and that it'll re-add them after this truncation.
2845 removed and that it'll re-add them after this truncation.
2841 """
2846 """
2842 if len(self) == 0:
2847 if len(self) == 0:
2843 return
2848 return
2844
2849
2845 rev, _ = self.getstrippoint(minlink)
2850 rev, _ = self.getstrippoint(minlink)
2846 if rev == len(self):
2851 if rev == len(self):
2847 return
2852 return
2848
2853
2849 # first truncate the files on disk
2854 # first truncate the files on disk
2850 data_end = self.start(rev)
2855 data_end = self.start(rev)
2851 if not self._inline:
2856 if not self._inline:
2852 transaction.add(self._datafile, data_end)
2857 transaction.add(self._datafile, data_end)
2853 end = rev * self.index.entry_size
2858 end = rev * self.index.entry_size
2854 else:
2859 else:
2855 end = data_end + (rev * self.index.entry_size)
2860 end = data_end + (rev * self.index.entry_size)
2856
2861
2857 transaction.add(self._indexfile, end)
2862 transaction.add(self._indexfile, end)
2858 if self._docket is not None:
2863 if self._docket is not None:
2859 # XXX we could, leverage the docket while stripping. However it is
2864 # XXX we could, leverage the docket while stripping. However it is
2860 # not powerfull enough at the time of this comment
2865 # not powerfull enough at the time of this comment
2861 self._docket.index_end = end
2866 self._docket.index_end = end
2862 self._docket.data_end = data_end
2867 self._docket.data_end = data_end
2863 self._docket.write(transaction, stripping=True)
2868 self._docket.write(transaction, stripping=True)
2864
2869
2865 # then reset internal state in memory to forget those revisions
2870 # then reset internal state in memory to forget those revisions
2866 self._revisioncache = None
2871 self._revisioncache = None
2867 self._chaininfocache = util.lrucachedict(500)
2872 self._chaininfocache = util.lrucachedict(500)
2868 self._chunkclear()
2873 self._chunkclear()
2869
2874
2870 del self.index[rev:-1]
2875 del self.index[rev:-1]
2871
2876
2872 def checksize(self):
2877 def checksize(self):
2873 """Check size of index and data files
2878 """Check size of index and data files
2874
2879
2875 return a (dd, di) tuple.
2880 return a (dd, di) tuple.
2876 - dd: extra bytes for the "data" file
2881 - dd: extra bytes for the "data" file
2877 - di: extra bytes for the "index" file
2882 - di: extra bytes for the "index" file
2878
2883
2879 A healthy revlog will return (0, 0).
2884 A healthy revlog will return (0, 0).
2880 """
2885 """
2881 expected = 0
2886 expected = 0
2882 if len(self):
2887 if len(self):
2883 expected = max(0, self.end(len(self) - 1))
2888 expected = max(0, self.end(len(self) - 1))
2884
2889
2885 try:
2890 try:
2886 with self._datafp() as f:
2891 with self._datafp() as f:
2887 f.seek(0, io.SEEK_END)
2892 f.seek(0, io.SEEK_END)
2888 actual = f.tell()
2893 actual = f.tell()
2889 dd = actual - expected
2894 dd = actual - expected
2890 except IOError as inst:
2895 except IOError as inst:
2891 if inst.errno != errno.ENOENT:
2896 if inst.errno != errno.ENOENT:
2892 raise
2897 raise
2893 dd = 0
2898 dd = 0
2894
2899
2895 try:
2900 try:
2896 f = self.opener(self._indexfile)
2901 f = self.opener(self._indexfile)
2897 f.seek(0, io.SEEK_END)
2902 f.seek(0, io.SEEK_END)
2898 actual = f.tell()
2903 actual = f.tell()
2899 f.close()
2904 f.close()
2900 s = self.index.entry_size
2905 s = self.index.entry_size
2901 i = max(0, actual // s)
2906 i = max(0, actual // s)
2902 di = actual - (i * s)
2907 di = actual - (i * s)
2903 if self._inline:
2908 if self._inline:
2904 databytes = 0
2909 databytes = 0
2905 for r in self:
2910 for r in self:
2906 databytes += max(0, self.length(r))
2911 databytes += max(0, self.length(r))
2907 dd = 0
2912 dd = 0
2908 di = actual - len(self) * s - databytes
2913 di = actual - len(self) * s - databytes
2909 except IOError as inst:
2914 except IOError as inst:
2910 if inst.errno != errno.ENOENT:
2915 if inst.errno != errno.ENOENT:
2911 raise
2916 raise
2912 di = 0
2917 di = 0
2913
2918
2914 return (dd, di)
2919 return (dd, di)
2915
2920
2916 def files(self):
2921 def files(self):
2917 res = [self._indexfile]
2922 res = [self._indexfile]
2918 if not self._inline:
2923 if not self._inline:
2919 res.append(self._datafile)
2924 res.append(self._datafile)
2920 return res
2925 return res
2921
2926
2922 def emitrevisions(
2927 def emitrevisions(
2923 self,
2928 self,
2924 nodes,
2929 nodes,
2925 nodesorder=None,
2930 nodesorder=None,
2926 revisiondata=False,
2931 revisiondata=False,
2927 assumehaveparentrevisions=False,
2932 assumehaveparentrevisions=False,
2928 deltamode=repository.CG_DELTAMODE_STD,
2933 deltamode=repository.CG_DELTAMODE_STD,
2929 sidedata_helpers=None,
2934 sidedata_helpers=None,
2930 ):
2935 ):
2931 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2936 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2932 raise error.ProgrammingError(
2937 raise error.ProgrammingError(
2933 b'unhandled value for nodesorder: %s' % nodesorder
2938 b'unhandled value for nodesorder: %s' % nodesorder
2934 )
2939 )
2935
2940
2936 if nodesorder is None and not self._generaldelta:
2941 if nodesorder is None and not self._generaldelta:
2937 nodesorder = b'storage'
2942 nodesorder = b'storage'
2938
2943
2939 if (
2944 if (
2940 not self._storedeltachains
2945 not self._storedeltachains
2941 and deltamode != repository.CG_DELTAMODE_PREV
2946 and deltamode != repository.CG_DELTAMODE_PREV
2942 ):
2947 ):
2943 deltamode = repository.CG_DELTAMODE_FULL
2948 deltamode = repository.CG_DELTAMODE_FULL
2944
2949
2945 return storageutil.emitrevisions(
2950 return storageutil.emitrevisions(
2946 self,
2951 self,
2947 nodes,
2952 nodes,
2948 nodesorder,
2953 nodesorder,
2949 revlogrevisiondelta,
2954 revlogrevisiondelta,
2950 deltaparentfn=self.deltaparent,
2955 deltaparentfn=self.deltaparent,
2951 candeltafn=self.candelta,
2956 candeltafn=self.candelta,
2952 rawsizefn=self.rawsize,
2957 rawsizefn=self.rawsize,
2953 revdifffn=self.revdiff,
2958 revdifffn=self.revdiff,
2954 flagsfn=self.flags,
2959 flagsfn=self.flags,
2955 deltamode=deltamode,
2960 deltamode=deltamode,
2956 revisiondata=revisiondata,
2961 revisiondata=revisiondata,
2957 assumehaveparentrevisions=assumehaveparentrevisions,
2962 assumehaveparentrevisions=assumehaveparentrevisions,
2958 sidedata_helpers=sidedata_helpers,
2963 sidedata_helpers=sidedata_helpers,
2959 )
2964 )
2960
2965
2961 DELTAREUSEALWAYS = b'always'
2966 DELTAREUSEALWAYS = b'always'
2962 DELTAREUSESAMEREVS = b'samerevs'
2967 DELTAREUSESAMEREVS = b'samerevs'
2963 DELTAREUSENEVER = b'never'
2968 DELTAREUSENEVER = b'never'
2964
2969
2965 DELTAREUSEFULLADD = b'fulladd'
2970 DELTAREUSEFULLADD = b'fulladd'
2966
2971
2967 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2972 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2968
2973
2969 def clone(
2974 def clone(
2970 self,
2975 self,
2971 tr,
2976 tr,
2972 destrevlog,
2977 destrevlog,
2973 addrevisioncb=None,
2978 addrevisioncb=None,
2974 deltareuse=DELTAREUSESAMEREVS,
2979 deltareuse=DELTAREUSESAMEREVS,
2975 forcedeltabothparents=None,
2980 forcedeltabothparents=None,
2976 sidedata_helpers=None,
2981 sidedata_helpers=None,
2977 ):
2982 ):
2978 """Copy this revlog to another, possibly with format changes.
2983 """Copy this revlog to another, possibly with format changes.
2979
2984
2980 The destination revlog will contain the same revisions and nodes.
2985 The destination revlog will contain the same revisions and nodes.
2981 However, it may not be bit-for-bit identical due to e.g. delta encoding
2986 However, it may not be bit-for-bit identical due to e.g. delta encoding
2982 differences.
2987 differences.
2983
2988
2984 The ``deltareuse`` argument control how deltas from the existing revlog
2989 The ``deltareuse`` argument control how deltas from the existing revlog
2985 are preserved in the destination revlog. The argument can have the
2990 are preserved in the destination revlog. The argument can have the
2986 following values:
2991 following values:
2987
2992
2988 DELTAREUSEALWAYS
2993 DELTAREUSEALWAYS
2989 Deltas will always be reused (if possible), even if the destination
2994 Deltas will always be reused (if possible), even if the destination
2990 revlog would not select the same revisions for the delta. This is the
2995 revlog would not select the same revisions for the delta. This is the
2991 fastest mode of operation.
2996 fastest mode of operation.
2992 DELTAREUSESAMEREVS
2997 DELTAREUSESAMEREVS
2993 Deltas will be reused if the destination revlog would pick the same
2998 Deltas will be reused if the destination revlog would pick the same
2994 revisions for the delta. This mode strikes a balance between speed
2999 revisions for the delta. This mode strikes a balance between speed
2995 and optimization.
3000 and optimization.
2996 DELTAREUSENEVER
3001 DELTAREUSENEVER
2997 Deltas will never be reused. This is the slowest mode of execution.
3002 Deltas will never be reused. This is the slowest mode of execution.
2998 This mode can be used to recompute deltas (e.g. if the diff/delta
3003 This mode can be used to recompute deltas (e.g. if the diff/delta
2999 algorithm changes).
3004 algorithm changes).
3000 DELTAREUSEFULLADD
3005 DELTAREUSEFULLADD
3001 Revision will be re-added as if their were new content. This is
3006 Revision will be re-added as if their were new content. This is
3002 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3007 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3003 eg: large file detection and handling.
3008 eg: large file detection and handling.
3004
3009
3005 Delta computation can be slow, so the choice of delta reuse policy can
3010 Delta computation can be slow, so the choice of delta reuse policy can
3006 significantly affect run time.
3011 significantly affect run time.
3007
3012
3008 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3013 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3009 two extremes. Deltas will be reused if they are appropriate. But if the
3014 two extremes. Deltas will be reused if they are appropriate. But if the
3010 delta could choose a better revision, it will do so. This means if you
3015 delta could choose a better revision, it will do so. This means if you
3011 are converting a non-generaldelta revlog to a generaldelta revlog,
3016 are converting a non-generaldelta revlog to a generaldelta revlog,
3012 deltas will be recomputed if the delta's parent isn't a parent of the
3017 deltas will be recomputed if the delta's parent isn't a parent of the
3013 revision.
3018 revision.
3014
3019
3015 In addition to the delta policy, the ``forcedeltabothparents``
3020 In addition to the delta policy, the ``forcedeltabothparents``
3016 argument controls whether to force compute deltas against both parents
3021 argument controls whether to force compute deltas against both parents
3017 for merges. By default, the current default is used.
3022 for merges. By default, the current default is used.
3018
3023
3019 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3024 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3020 `sidedata_helpers`.
3025 `sidedata_helpers`.
3021 """
3026 """
3022 if deltareuse not in self.DELTAREUSEALL:
3027 if deltareuse not in self.DELTAREUSEALL:
3023 raise ValueError(
3028 raise ValueError(
3024 _(b'value for deltareuse invalid: %s') % deltareuse
3029 _(b'value for deltareuse invalid: %s') % deltareuse
3025 )
3030 )
3026
3031
3027 if len(destrevlog):
3032 if len(destrevlog):
3028 raise ValueError(_(b'destination revlog is not empty'))
3033 raise ValueError(_(b'destination revlog is not empty'))
3029
3034
3030 if getattr(self, 'filteredrevs', None):
3035 if getattr(self, 'filteredrevs', None):
3031 raise ValueError(_(b'source revlog has filtered revisions'))
3036 raise ValueError(_(b'source revlog has filtered revisions'))
3032 if getattr(destrevlog, 'filteredrevs', None):
3037 if getattr(destrevlog, 'filteredrevs', None):
3033 raise ValueError(_(b'destination revlog has filtered revisions'))
3038 raise ValueError(_(b'destination revlog has filtered revisions'))
3034
3039
3035 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3040 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3036 # if possible.
3041 # if possible.
3037 oldlazydelta = destrevlog._lazydelta
3042 oldlazydelta = destrevlog._lazydelta
3038 oldlazydeltabase = destrevlog._lazydeltabase
3043 oldlazydeltabase = destrevlog._lazydeltabase
3039 oldamd = destrevlog._deltabothparents
3044 oldamd = destrevlog._deltabothparents
3040
3045
3041 try:
3046 try:
3042 if deltareuse == self.DELTAREUSEALWAYS:
3047 if deltareuse == self.DELTAREUSEALWAYS:
3043 destrevlog._lazydeltabase = True
3048 destrevlog._lazydeltabase = True
3044 destrevlog._lazydelta = True
3049 destrevlog._lazydelta = True
3045 elif deltareuse == self.DELTAREUSESAMEREVS:
3050 elif deltareuse == self.DELTAREUSESAMEREVS:
3046 destrevlog._lazydeltabase = False
3051 destrevlog._lazydeltabase = False
3047 destrevlog._lazydelta = True
3052 destrevlog._lazydelta = True
3048 elif deltareuse == self.DELTAREUSENEVER:
3053 elif deltareuse == self.DELTAREUSENEVER:
3049 destrevlog._lazydeltabase = False
3054 destrevlog._lazydeltabase = False
3050 destrevlog._lazydelta = False
3055 destrevlog._lazydelta = False
3051
3056
3052 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3057 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3053
3058
3054 self._clone(
3059 self._clone(
3055 tr,
3060 tr,
3056 destrevlog,
3061 destrevlog,
3057 addrevisioncb,
3062 addrevisioncb,
3058 deltareuse,
3063 deltareuse,
3059 forcedeltabothparents,
3064 forcedeltabothparents,
3060 sidedata_helpers,
3065 sidedata_helpers,
3061 )
3066 )
3062
3067
3063 finally:
3068 finally:
3064 destrevlog._lazydelta = oldlazydelta
3069 destrevlog._lazydelta = oldlazydelta
3065 destrevlog._lazydeltabase = oldlazydeltabase
3070 destrevlog._lazydeltabase = oldlazydeltabase
3066 destrevlog._deltabothparents = oldamd
3071 destrevlog._deltabothparents = oldamd
3067
3072
3068 def _clone(
3073 def _clone(
3069 self,
3074 self,
3070 tr,
3075 tr,
3071 destrevlog,
3076 destrevlog,
3072 addrevisioncb,
3077 addrevisioncb,
3073 deltareuse,
3078 deltareuse,
3074 forcedeltabothparents,
3079 forcedeltabothparents,
3075 sidedata_helpers,
3080 sidedata_helpers,
3076 ):
3081 ):
3077 """perform the core duty of `revlog.clone` after parameter processing"""
3082 """perform the core duty of `revlog.clone` after parameter processing"""
3078 deltacomputer = deltautil.deltacomputer(destrevlog)
3083 deltacomputer = deltautil.deltacomputer(destrevlog)
3079 index = self.index
3084 index = self.index
3080 for rev in self:
3085 for rev in self:
3081 entry = index[rev]
3086 entry = index[rev]
3082
3087
3083 # Some classes override linkrev to take filtered revs into
3088 # Some classes override linkrev to take filtered revs into
3084 # account. Use raw entry from index.
3089 # account. Use raw entry from index.
3085 flags = entry[0] & 0xFFFF
3090 flags = entry[0] & 0xFFFF
3086 linkrev = entry[4]
3091 linkrev = entry[4]
3087 p1 = index[entry[5]][7]
3092 p1 = index[entry[5]][7]
3088 p2 = index[entry[6]][7]
3093 p2 = index[entry[6]][7]
3089 node = entry[7]
3094 node = entry[7]
3090
3095
3091 # (Possibly) reuse the delta from the revlog if allowed and
3096 # (Possibly) reuse the delta from the revlog if allowed and
3092 # the revlog chunk is a delta.
3097 # the revlog chunk is a delta.
3093 cachedelta = None
3098 cachedelta = None
3094 rawtext = None
3099 rawtext = None
3095 if deltareuse == self.DELTAREUSEFULLADD:
3100 if deltareuse == self.DELTAREUSEFULLADD:
3096 text, sidedata = self._revisiondata(rev)
3101 text, sidedata = self._revisiondata(rev)
3097
3102
3098 if sidedata_helpers is not None:
3103 if sidedata_helpers is not None:
3099 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3104 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3100 self, sidedata_helpers, sidedata, rev
3105 self, sidedata_helpers, sidedata, rev
3101 )
3106 )
3102 flags = flags | new_flags[0] & ~new_flags[1]
3107 flags = flags | new_flags[0] & ~new_flags[1]
3103
3108
3104 destrevlog.addrevision(
3109 destrevlog.addrevision(
3105 text,
3110 text,
3106 tr,
3111 tr,
3107 linkrev,
3112 linkrev,
3108 p1,
3113 p1,
3109 p2,
3114 p2,
3110 cachedelta=cachedelta,
3115 cachedelta=cachedelta,
3111 node=node,
3116 node=node,
3112 flags=flags,
3117 flags=flags,
3113 deltacomputer=deltacomputer,
3118 deltacomputer=deltacomputer,
3114 sidedata=sidedata,
3119 sidedata=sidedata,
3115 )
3120 )
3116 else:
3121 else:
3117 if destrevlog._lazydelta:
3122 if destrevlog._lazydelta:
3118 dp = self.deltaparent(rev)
3123 dp = self.deltaparent(rev)
3119 if dp != nullrev:
3124 if dp != nullrev:
3120 cachedelta = (dp, bytes(self._chunk(rev)))
3125 cachedelta = (dp, bytes(self._chunk(rev)))
3121
3126
3122 sidedata = None
3127 sidedata = None
3123 if not cachedelta:
3128 if not cachedelta:
3124 rawtext, sidedata = self._revisiondata(rev)
3129 rawtext, sidedata = self._revisiondata(rev)
3125 if sidedata is None:
3130 if sidedata is None:
3126 sidedata = self.sidedata(rev)
3131 sidedata = self.sidedata(rev)
3127
3132
3128 if sidedata_helpers is not None:
3133 if sidedata_helpers is not None:
3129 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3134 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3130 self, sidedata_helpers, sidedata, rev
3135 self, sidedata_helpers, sidedata, rev
3131 )
3136 )
3132 flags = flags | new_flags[0] & ~new_flags[1]
3137 flags = flags | new_flags[0] & ~new_flags[1]
3133
3138
3134 with destrevlog._writing(tr):
3139 with destrevlog._writing(tr):
3135 destrevlog._addrevision(
3140 destrevlog._addrevision(
3136 node,
3141 node,
3137 rawtext,
3142 rawtext,
3138 tr,
3143 tr,
3139 linkrev,
3144 linkrev,
3140 p1,
3145 p1,
3141 p2,
3146 p2,
3142 flags,
3147 flags,
3143 cachedelta,
3148 cachedelta,
3144 deltacomputer=deltacomputer,
3149 deltacomputer=deltacomputer,
3145 sidedata=sidedata,
3150 sidedata=sidedata,
3146 )
3151 )
3147
3152
3148 if addrevisioncb:
3153 if addrevisioncb:
3149 addrevisioncb(self, rev, node)
3154 addrevisioncb(self, rev, node)
3150
3155
3151 def censorrevision(self, tr, censornode, tombstone=b''):
3156 def censorrevision(self, tr, censornode, tombstone=b''):
3152 if self._format_version == REVLOGV0:
3157 if self._format_version == REVLOGV0:
3153 raise error.RevlogError(
3158 raise error.RevlogError(
3154 _(b'cannot censor with version %d revlogs')
3159 _(b'cannot censor with version %d revlogs')
3155 % self._format_version
3160 % self._format_version
3156 )
3161 )
3157
3162
3158 censorrev = self.rev(censornode)
3163 censorrev = self.rev(censornode)
3159 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3164 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3160
3165
3161 if len(tombstone) > self.rawsize(censorrev):
3166 if len(tombstone) > self.rawsize(censorrev):
3162 raise error.Abort(
3167 raise error.Abort(
3163 _(b'censor tombstone must be no longer than censored data')
3168 _(b'censor tombstone must be no longer than censored data')
3164 )
3169 )
3165
3170
3166 # Rewriting the revlog in place is hard. Our strategy for censoring is
3171 # Rewriting the revlog in place is hard. Our strategy for censoring is
3167 # to create a new revlog, copy all revisions to it, then replace the
3172 # to create a new revlog, copy all revisions to it, then replace the
3168 # revlogs on transaction close.
3173 # revlogs on transaction close.
3169 #
3174 #
3170 # This is a bit dangerous. We could easily have a mismatch of state.
3175 # This is a bit dangerous. We could easily have a mismatch of state.
3171 newrl = revlog(
3176 newrl = revlog(
3172 self.opener,
3177 self.opener,
3173 target=self.target,
3178 target=self.target,
3174 radix=self.radix,
3179 radix=self.radix,
3175 postfix=b'tmpcensored',
3180 postfix=b'tmpcensored',
3176 censorable=True,
3181 censorable=True,
3177 )
3182 )
3178 newrl._format_version = self._format_version
3183 newrl._format_version = self._format_version
3179 newrl._format_flags = self._format_flags
3184 newrl._format_flags = self._format_flags
3180 newrl._generaldelta = self._generaldelta
3185 newrl._generaldelta = self._generaldelta
3181 newrl._parse_index = self._parse_index
3186 newrl._parse_index = self._parse_index
3182
3187
3183 for rev in self.revs():
3188 for rev in self.revs():
3184 node = self.node(rev)
3189 node = self.node(rev)
3185 p1, p2 = self.parents(node)
3190 p1, p2 = self.parents(node)
3186
3191
3187 if rev == censorrev:
3192 if rev == censorrev:
3188 newrl.addrawrevision(
3193 newrl.addrawrevision(
3189 tombstone,
3194 tombstone,
3190 tr,
3195 tr,
3191 self.linkrev(censorrev),
3196 self.linkrev(censorrev),
3192 p1,
3197 p1,
3193 p2,
3198 p2,
3194 censornode,
3199 censornode,
3195 REVIDX_ISCENSORED,
3200 REVIDX_ISCENSORED,
3196 )
3201 )
3197
3202
3198 if newrl.deltaparent(rev) != nullrev:
3203 if newrl.deltaparent(rev) != nullrev:
3199 raise error.Abort(
3204 raise error.Abort(
3200 _(
3205 _(
3201 b'censored revision stored as delta; '
3206 b'censored revision stored as delta; '
3202 b'cannot censor'
3207 b'cannot censor'
3203 ),
3208 ),
3204 hint=_(
3209 hint=_(
3205 b'censoring of revlogs is not '
3210 b'censoring of revlogs is not '
3206 b'fully implemented; please report '
3211 b'fully implemented; please report '
3207 b'this bug'
3212 b'this bug'
3208 ),
3213 ),
3209 )
3214 )
3210 continue
3215 continue
3211
3216
3212 if self.iscensored(rev):
3217 if self.iscensored(rev):
3213 if self.deltaparent(rev) != nullrev:
3218 if self.deltaparent(rev) != nullrev:
3214 raise error.Abort(
3219 raise error.Abort(
3215 _(
3220 _(
3216 b'cannot censor due to censored '
3221 b'cannot censor due to censored '
3217 b'revision having delta stored'
3222 b'revision having delta stored'
3218 )
3223 )
3219 )
3224 )
3220 rawtext = self._chunk(rev)
3225 rawtext = self._chunk(rev)
3221 else:
3226 else:
3222 rawtext = self.rawdata(rev)
3227 rawtext = self.rawdata(rev)
3223
3228
3224 newrl.addrawrevision(
3229 newrl.addrawrevision(
3225 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3230 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3226 )
3231 )
3227
3232
3228 tr.addbackup(self._indexfile, location=b'store')
3233 tr.addbackup(self._indexfile, location=b'store')
3229 if not self._inline:
3234 if not self._inline:
3230 tr.addbackup(self._datafile, location=b'store')
3235 tr.addbackup(self._datafile, location=b'store')
3231
3236
3232 self.opener.rename(newrl._indexfile, self._indexfile)
3237 self.opener.rename(newrl._indexfile, self._indexfile)
3233 if not self._inline:
3238 if not self._inline:
3234 self.opener.rename(newrl._datafile, self._datafile)
3239 self.opener.rename(newrl._datafile, self._datafile)
3235
3240
3236 self.clearcaches()
3241 self.clearcaches()
3237 self._loadindex()
3242 self._loadindex()
3238
3243
3239 def verifyintegrity(self, state):
3244 def verifyintegrity(self, state):
3240 """Verifies the integrity of the revlog.
3245 """Verifies the integrity of the revlog.
3241
3246
3242 Yields ``revlogproblem`` instances describing problems that are
3247 Yields ``revlogproblem`` instances describing problems that are
3243 found.
3248 found.
3244 """
3249 """
3245 dd, di = self.checksize()
3250 dd, di = self.checksize()
3246 if dd:
3251 if dd:
3247 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3252 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3248 if di:
3253 if di:
3249 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3254 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3250
3255
3251 version = self._format_version
3256 version = self._format_version
3252
3257
3253 # The verifier tells us what version revlog we should be.
3258 # The verifier tells us what version revlog we should be.
3254 if version != state[b'expectedversion']:
3259 if version != state[b'expectedversion']:
3255 yield revlogproblem(
3260 yield revlogproblem(
3256 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3261 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3257 % (self.display_id, version, state[b'expectedversion'])
3262 % (self.display_id, version, state[b'expectedversion'])
3258 )
3263 )
3259
3264
3260 state[b'skipread'] = set()
3265 state[b'skipread'] = set()
3261 state[b'safe_renamed'] = set()
3266 state[b'safe_renamed'] = set()
3262
3267
3263 for rev in self:
3268 for rev in self:
3264 node = self.node(rev)
3269 node = self.node(rev)
3265
3270
3266 # Verify contents. 4 cases to care about:
3271 # Verify contents. 4 cases to care about:
3267 #
3272 #
3268 # common: the most common case
3273 # common: the most common case
3269 # rename: with a rename
3274 # rename: with a rename
3270 # meta: file content starts with b'\1\n', the metadata
3275 # meta: file content starts with b'\1\n', the metadata
3271 # header defined in filelog.py, but without a rename
3276 # header defined in filelog.py, but without a rename
3272 # ext: content stored externally
3277 # ext: content stored externally
3273 #
3278 #
3274 # More formally, their differences are shown below:
3279 # More formally, their differences are shown below:
3275 #
3280 #
3276 # | common | rename | meta | ext
3281 # | common | rename | meta | ext
3277 # -------------------------------------------------------
3282 # -------------------------------------------------------
3278 # flags() | 0 | 0 | 0 | not 0
3283 # flags() | 0 | 0 | 0 | not 0
3279 # renamed() | False | True | False | ?
3284 # renamed() | False | True | False | ?
3280 # rawtext[0:2]=='\1\n'| False | True | True | ?
3285 # rawtext[0:2]=='\1\n'| False | True | True | ?
3281 #
3286 #
3282 # "rawtext" means the raw text stored in revlog data, which
3287 # "rawtext" means the raw text stored in revlog data, which
3283 # could be retrieved by "rawdata(rev)". "text"
3288 # could be retrieved by "rawdata(rev)". "text"
3284 # mentioned below is "revision(rev)".
3289 # mentioned below is "revision(rev)".
3285 #
3290 #
3286 # There are 3 different lengths stored physically:
3291 # There are 3 different lengths stored physically:
3287 # 1. L1: rawsize, stored in revlog index
3292 # 1. L1: rawsize, stored in revlog index
3288 # 2. L2: len(rawtext), stored in revlog data
3293 # 2. L2: len(rawtext), stored in revlog data
3289 # 3. L3: len(text), stored in revlog data if flags==0, or
3294 # 3. L3: len(text), stored in revlog data if flags==0, or
3290 # possibly somewhere else if flags!=0
3295 # possibly somewhere else if flags!=0
3291 #
3296 #
3292 # L1 should be equal to L2. L3 could be different from them.
3297 # L1 should be equal to L2. L3 could be different from them.
3293 # "text" may or may not affect commit hash depending on flag
3298 # "text" may or may not affect commit hash depending on flag
3294 # processors (see flagutil.addflagprocessor).
3299 # processors (see flagutil.addflagprocessor).
3295 #
3300 #
3296 # | common | rename | meta | ext
3301 # | common | rename | meta | ext
3297 # -------------------------------------------------
3302 # -------------------------------------------------
3298 # rawsize() | L1 | L1 | L1 | L1
3303 # rawsize() | L1 | L1 | L1 | L1
3299 # size() | L1 | L2-LM | L1(*) | L1 (?)
3304 # size() | L1 | L2-LM | L1(*) | L1 (?)
3300 # len(rawtext) | L2 | L2 | L2 | L2
3305 # len(rawtext) | L2 | L2 | L2 | L2
3301 # len(text) | L2 | L2 | L2 | L3
3306 # len(text) | L2 | L2 | L2 | L3
3302 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3307 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3303 #
3308 #
3304 # LM: length of metadata, depending on rawtext
3309 # LM: length of metadata, depending on rawtext
3305 # (*): not ideal, see comment in filelog.size
3310 # (*): not ideal, see comment in filelog.size
3306 # (?): could be "- len(meta)" if the resolved content has
3311 # (?): could be "- len(meta)" if the resolved content has
3307 # rename metadata
3312 # rename metadata
3308 #
3313 #
3309 # Checks needed to be done:
3314 # Checks needed to be done:
3310 # 1. length check: L1 == L2, in all cases.
3315 # 1. length check: L1 == L2, in all cases.
3311 # 2. hash check: depending on flag processor, we may need to
3316 # 2. hash check: depending on flag processor, we may need to
3312 # use either "text" (external), or "rawtext" (in revlog).
3317 # use either "text" (external), or "rawtext" (in revlog).
3313
3318
3314 try:
3319 try:
3315 skipflags = state.get(b'skipflags', 0)
3320 skipflags = state.get(b'skipflags', 0)
3316 if skipflags:
3321 if skipflags:
3317 skipflags &= self.flags(rev)
3322 skipflags &= self.flags(rev)
3318
3323
3319 _verify_revision(self, skipflags, state, node)
3324 _verify_revision(self, skipflags, state, node)
3320
3325
3321 l1 = self.rawsize(rev)
3326 l1 = self.rawsize(rev)
3322 l2 = len(self.rawdata(node))
3327 l2 = len(self.rawdata(node))
3323
3328
3324 if l1 != l2:
3329 if l1 != l2:
3325 yield revlogproblem(
3330 yield revlogproblem(
3326 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3331 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3327 node=node,
3332 node=node,
3328 )
3333 )
3329
3334
3330 except error.CensoredNodeError:
3335 except error.CensoredNodeError:
3331 if state[b'erroroncensored']:
3336 if state[b'erroroncensored']:
3332 yield revlogproblem(
3337 yield revlogproblem(
3333 error=_(b'censored file data'), node=node
3338 error=_(b'censored file data'), node=node
3334 )
3339 )
3335 state[b'skipread'].add(node)
3340 state[b'skipread'].add(node)
3336 except Exception as e:
3341 except Exception as e:
3337 yield revlogproblem(
3342 yield revlogproblem(
3338 error=_(b'unpacking %s: %s')
3343 error=_(b'unpacking %s: %s')
3339 % (short(node), stringutil.forcebytestr(e)),
3344 % (short(node), stringutil.forcebytestr(e)),
3340 node=node,
3345 node=node,
3341 )
3346 )
3342 state[b'skipread'].add(node)
3347 state[b'skipread'].add(node)
3343
3348
3344 def storageinfo(
3349 def storageinfo(
3345 self,
3350 self,
3346 exclusivefiles=False,
3351 exclusivefiles=False,
3347 sharedfiles=False,
3352 sharedfiles=False,
3348 revisionscount=False,
3353 revisionscount=False,
3349 trackedsize=False,
3354 trackedsize=False,
3350 storedsize=False,
3355 storedsize=False,
3351 ):
3356 ):
3352 d = {}
3357 d = {}
3353
3358
3354 if exclusivefiles:
3359 if exclusivefiles:
3355 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3360 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3356 if not self._inline:
3361 if not self._inline:
3357 d[b'exclusivefiles'].append((self.opener, self._datafile))
3362 d[b'exclusivefiles'].append((self.opener, self._datafile))
3358
3363
3359 if sharedfiles:
3364 if sharedfiles:
3360 d[b'sharedfiles'] = []
3365 d[b'sharedfiles'] = []
3361
3366
3362 if revisionscount:
3367 if revisionscount:
3363 d[b'revisionscount'] = len(self)
3368 d[b'revisionscount'] = len(self)
3364
3369
3365 if trackedsize:
3370 if trackedsize:
3366 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3371 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3367
3372
3368 if storedsize:
3373 if storedsize:
3369 d[b'storedsize'] = sum(
3374 d[b'storedsize'] = sum(
3370 self.opener.stat(path).st_size for path in self.files()
3375 self.opener.stat(path).st_size for path in self.files()
3371 )
3376 )
3372
3377
3373 return d
3378 return d
3374
3379
3375 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3380 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3376 if not self.hassidedata:
3381 if not self.hassidedata:
3377 return
3382 return
3378 # revlog formats with sidedata support does not support inline
3383 # revlog formats with sidedata support does not support inline
3379 assert not self._inline
3384 assert not self._inline
3380 if not helpers[1] and not helpers[2]:
3385 if not helpers[1] and not helpers[2]:
3381 # Nothing to generate or remove
3386 # Nothing to generate or remove
3382 return
3387 return
3383
3388
3384 new_entries = []
3389 new_entries = []
3385 # append the new sidedata
3390 # append the new sidedata
3386 with self._writing(transaction):
3391 with self._writing(transaction):
3387 ifh, dfh = self._writinghandles
3392 ifh, dfh = self._writinghandles
3388 if self._docket is not None:
3393 if self._docket is not None:
3389 dfh.seek(self._docket.data_end, os.SEEK_SET)
3394 dfh.seek(self._docket.data_end, os.SEEK_SET)
3390 else:
3395 else:
3391 dfh.seek(0, os.SEEK_END)
3396 dfh.seek(0, os.SEEK_END)
3392
3397
3393 current_offset = dfh.tell()
3398 current_offset = dfh.tell()
3394 for rev in range(startrev, endrev + 1):
3399 for rev in range(startrev, endrev + 1):
3395 entry = self.index[rev]
3400 entry = self.index[rev]
3396 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3401 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3397 store=self,
3402 store=self,
3398 sidedata_helpers=helpers,
3403 sidedata_helpers=helpers,
3399 sidedata={},
3404 sidedata={},
3400 rev=rev,
3405 rev=rev,
3401 )
3406 )
3402
3407
3403 serialized_sidedata = sidedatautil.serialize_sidedata(
3408 serialized_sidedata = sidedatautil.serialize_sidedata(
3404 new_sidedata
3409 new_sidedata
3405 )
3410 )
3406
3411
3407 sidedata_compression_mode = COMP_MODE_INLINE
3412 sidedata_compression_mode = COMP_MODE_INLINE
3408 if serialized_sidedata and self.hassidedata:
3413 if serialized_sidedata and self.hassidedata:
3409 sidedata_compression_mode = COMP_MODE_PLAIN
3414 sidedata_compression_mode = COMP_MODE_PLAIN
3410 h, comp_sidedata = self.compress(serialized_sidedata)
3415 h, comp_sidedata = self.compress(serialized_sidedata)
3411 if (
3416 if (
3412 h != b'u'
3417 h != b'u'
3413 and comp_sidedata[0] != b'\0'
3418 and comp_sidedata[0] != b'\0'
3414 and len(comp_sidedata) < len(serialized_sidedata)
3419 and len(comp_sidedata) < len(serialized_sidedata)
3415 ):
3420 ):
3416 assert not h
3421 assert not h
3417 if (
3422 if (
3418 comp_sidedata[0]
3423 comp_sidedata[0]
3419 == self._docket.default_compression_header
3424 == self._docket.default_compression_header
3420 ):
3425 ):
3421 sidedata_compression_mode = COMP_MODE_DEFAULT
3426 sidedata_compression_mode = COMP_MODE_DEFAULT
3422 serialized_sidedata = comp_sidedata
3427 serialized_sidedata = comp_sidedata
3423 else:
3428 else:
3424 sidedata_compression_mode = COMP_MODE_INLINE
3429 sidedata_compression_mode = COMP_MODE_INLINE
3425 serialized_sidedata = comp_sidedata
3430 serialized_sidedata = comp_sidedata
3426 if entry[8] != 0 or entry[9] != 0:
3431 if entry[8] != 0 or entry[9] != 0:
3427 # rewriting entries that already have sidedata is not
3432 # rewriting entries that already have sidedata is not
3428 # supported yet, because it introduces garbage data in the
3433 # supported yet, because it introduces garbage data in the
3429 # revlog.
3434 # revlog.
3430 msg = b"rewriting existing sidedata is not supported yet"
3435 msg = b"rewriting existing sidedata is not supported yet"
3431 raise error.Abort(msg)
3436 raise error.Abort(msg)
3432
3437
3433 # Apply (potential) flags to add and to remove after running
3438 # Apply (potential) flags to add and to remove after running
3434 # the sidedata helpers
3439 # the sidedata helpers
3435 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3440 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3436 entry_update = (
3441 entry_update = (
3437 current_offset,
3442 current_offset,
3438 len(serialized_sidedata),
3443 len(serialized_sidedata),
3439 new_offset_flags,
3444 new_offset_flags,
3440 sidedata_compression_mode,
3445 sidedata_compression_mode,
3441 )
3446 )
3442
3447
3443 # the sidedata computation might have move the file cursors around
3448 # the sidedata computation might have move the file cursors around
3444 dfh.seek(current_offset, os.SEEK_SET)
3449 dfh.seek(current_offset, os.SEEK_SET)
3445 dfh.write(serialized_sidedata)
3450 dfh.write(serialized_sidedata)
3446 new_entries.append(entry_update)
3451 new_entries.append(entry_update)
3447 current_offset += len(serialized_sidedata)
3452 current_offset += len(serialized_sidedata)
3448 if self._docket is not None:
3453 if self._docket is not None:
3449 self._docket.data_end = dfh.tell()
3454 self._docket.data_end = dfh.tell()
3450
3455
3451 # rewrite the new index entries
3456 # rewrite the new index entries
3452 ifh.seek(startrev * self.index.entry_size)
3457 ifh.seek(startrev * self.index.entry_size)
3453 for i, e in enumerate(new_entries):
3458 for i, e in enumerate(new_entries):
3454 rev = startrev + i
3459 rev = startrev + i
3455 self.index.replace_sidedata_info(rev, *e)
3460 self.index.replace_sidedata_info(rev, *e)
3456 packed = self.index.entry_binary(rev)
3461 packed = self.index.entry_binary(rev)
3457 if rev == 0 and self._docket is None:
3462 if rev == 0 and self._docket is None:
3458 header = self._format_flags | self._format_version
3463 header = self._format_flags | self._format_version
3459 header = self.index.pack_header(header)
3464 header = self.index.pack_header(header)
3460 packed = header + packed
3465 packed = header + packed
3461 ifh.write(packed)
3466 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now