##// END OF EJS Templates
revlog: simplify the try nesting in the `_writing` context...
marmoute -
r48118:1844a2e3 default
parent child Browse files
Show More
@@ -1,3468 +1,3470 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 CHANGELOGV2,
38 CHANGELOGV2,
39 COMP_MODE_DEFAULT,
39 COMP_MODE_DEFAULT,
40 COMP_MODE_INLINE,
40 COMP_MODE_INLINE,
41 COMP_MODE_PLAIN,
41 COMP_MODE_PLAIN,
42 FEATURES_BY_VERSION,
42 FEATURES_BY_VERSION,
43 FLAG_GENERALDELTA,
43 FLAG_GENERALDELTA,
44 FLAG_INLINE_DATA,
44 FLAG_INLINE_DATA,
45 INDEX_HEADER,
45 INDEX_HEADER,
46 KIND_CHANGELOG,
46 KIND_CHANGELOG,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 SUPPORTED_FLAGS,
55 SUPPORTED_FLAGS,
56 )
56 )
57 from .revlogutils.flagutil import (
57 from .revlogutils.flagutil import (
58 REVIDX_DEFAULT_FLAGS,
58 REVIDX_DEFAULT_FLAGS,
59 REVIDX_ELLIPSIS,
59 REVIDX_ELLIPSIS,
60 REVIDX_EXTSTORED,
60 REVIDX_EXTSTORED,
61 REVIDX_FLAGS_ORDER,
61 REVIDX_FLAGS_ORDER,
62 REVIDX_HASCOPIESINFO,
62 REVIDX_HASCOPIESINFO,
63 REVIDX_ISCENSORED,
63 REVIDX_ISCENSORED,
64 REVIDX_RAWTEXT_CHANGING_FLAGS,
64 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 )
65 )
66 from .thirdparty import attr
66 from .thirdparty import attr
67 from . import (
67 from . import (
68 ancestor,
68 ancestor,
69 dagop,
69 dagop,
70 error,
70 error,
71 mdiff,
71 mdiff,
72 policy,
72 policy,
73 pycompat,
73 pycompat,
74 templatefilters,
74 templatefilters,
75 util,
75 util,
76 )
76 )
77 from .interfaces import (
77 from .interfaces import (
78 repository,
78 repository,
79 util as interfaceutil,
79 util as interfaceutil,
80 )
80 )
81 from .revlogutils import (
81 from .revlogutils import (
82 deltas as deltautil,
82 deltas as deltautil,
83 docket as docketutil,
83 docket as docketutil,
84 flagutil,
84 flagutil,
85 nodemap as nodemaputil,
85 nodemap as nodemaputil,
86 revlogv0,
86 revlogv0,
87 sidedata as sidedatautil,
87 sidedata as sidedatautil,
88 )
88 )
89 from .utils import (
89 from .utils import (
90 storageutil,
90 storageutil,
91 stringutil,
91 stringutil,
92 )
92 )
93
93
94 # blanked usage of all the name to prevent pyflakes constraints
94 # blanked usage of all the name to prevent pyflakes constraints
95 # We need these name available in the module for extensions.
95 # We need these name available in the module for extensions.
96
96
97 REVLOGV0
97 REVLOGV0
98 REVLOGV1
98 REVLOGV1
99 REVLOGV2
99 REVLOGV2
100 FLAG_INLINE_DATA
100 FLAG_INLINE_DATA
101 FLAG_GENERALDELTA
101 FLAG_GENERALDELTA
102 REVLOG_DEFAULT_FLAGS
102 REVLOG_DEFAULT_FLAGS
103 REVLOG_DEFAULT_FORMAT
103 REVLOG_DEFAULT_FORMAT
104 REVLOG_DEFAULT_VERSION
104 REVLOG_DEFAULT_VERSION
105 REVLOGV1_FLAGS
105 REVLOGV1_FLAGS
106 REVLOGV2_FLAGS
106 REVLOGV2_FLAGS
107 REVIDX_ISCENSORED
107 REVIDX_ISCENSORED
108 REVIDX_ELLIPSIS
108 REVIDX_ELLIPSIS
109 REVIDX_HASCOPIESINFO
109 REVIDX_HASCOPIESINFO
110 REVIDX_EXTSTORED
110 REVIDX_EXTSTORED
111 REVIDX_DEFAULT_FLAGS
111 REVIDX_DEFAULT_FLAGS
112 REVIDX_FLAGS_ORDER
112 REVIDX_FLAGS_ORDER
113 REVIDX_RAWTEXT_CHANGING_FLAGS
113 REVIDX_RAWTEXT_CHANGING_FLAGS
114
114
115 parsers = policy.importmod('parsers')
115 parsers = policy.importmod('parsers')
116 rustancestor = policy.importrust('ancestor')
116 rustancestor = policy.importrust('ancestor')
117 rustdagop = policy.importrust('dagop')
117 rustdagop = policy.importrust('dagop')
118 rustrevlog = policy.importrust('revlog')
118 rustrevlog = policy.importrust('revlog')
119
119
120 # Aliased for performance.
120 # Aliased for performance.
121 _zlibdecompress = zlib.decompress
121 _zlibdecompress = zlib.decompress
122
122
123 # max size of revlog with inline data
123 # max size of revlog with inline data
124 _maxinline = 131072
124 _maxinline = 131072
125 _chunksize = 1048576
125 _chunksize = 1048576
126
126
127 # Flag processors for REVIDX_ELLIPSIS.
127 # Flag processors for REVIDX_ELLIPSIS.
128 def ellipsisreadprocessor(rl, text):
128 def ellipsisreadprocessor(rl, text):
129 return text, False
129 return text, False
130
130
131
131
132 def ellipsiswriteprocessor(rl, text):
132 def ellipsiswriteprocessor(rl, text):
133 return text, False
133 return text, False
134
134
135
135
136 def ellipsisrawprocessor(rl, text):
136 def ellipsisrawprocessor(rl, text):
137 return False
137 return False
138
138
139
139
140 ellipsisprocessor = (
140 ellipsisprocessor = (
141 ellipsisreadprocessor,
141 ellipsisreadprocessor,
142 ellipsiswriteprocessor,
142 ellipsiswriteprocessor,
143 ellipsisrawprocessor,
143 ellipsisrawprocessor,
144 )
144 )
145
145
146
146
147 def offset_type(offset, type):
147 def offset_type(offset, type):
148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
149 raise ValueError(b'unknown revlog index flags')
149 raise ValueError(b'unknown revlog index flags')
150 return int(int(offset) << 16 | type)
150 return int(int(offset) << 16 | type)
151
151
152
152
153 def _verify_revision(rl, skipflags, state, node):
153 def _verify_revision(rl, skipflags, state, node):
154 """Verify the integrity of the given revlog ``node`` while providing a hook
154 """Verify the integrity of the given revlog ``node`` while providing a hook
155 point for extensions to influence the operation."""
155 point for extensions to influence the operation."""
156 if skipflags:
156 if skipflags:
157 state[b'skipread'].add(node)
157 state[b'skipread'].add(node)
158 else:
158 else:
159 # Side-effect: read content and verify hash.
159 # Side-effect: read content and verify hash.
160 rl.revision(node)
160 rl.revision(node)
161
161
162
162
163 # True if a fast implementation for persistent-nodemap is available
163 # True if a fast implementation for persistent-nodemap is available
164 #
164 #
165 # We also consider we have a "fast" implementation in "pure" python because
165 # We also consider we have a "fast" implementation in "pure" python because
166 # people using pure don't really have performance consideration (and a
166 # people using pure don't really have performance consideration (and a
167 # wheelbarrow of other slowness source)
167 # wheelbarrow of other slowness source)
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
169 parsers, 'BaseIndexObject'
169 parsers, 'BaseIndexObject'
170 )
170 )
171
171
172
172
173 @attr.s(slots=True, frozen=True)
173 @attr.s(slots=True, frozen=True)
174 class _revisioninfo(object):
174 class _revisioninfo(object):
175 """Information about a revision that allows building its fulltext
175 """Information about a revision that allows building its fulltext
176 node: expected hash of the revision
176 node: expected hash of the revision
177 p1, p2: parent revs of the revision
177 p1, p2: parent revs of the revision
178 btext: built text cache consisting of a one-element list
178 btext: built text cache consisting of a one-element list
179 cachedelta: (baserev, uncompressed_delta) or None
179 cachedelta: (baserev, uncompressed_delta) or None
180 flags: flags associated to the revision storage
180 flags: flags associated to the revision storage
181
181
182 One of btext[0] or cachedelta must be set.
182 One of btext[0] or cachedelta must be set.
183 """
183 """
184
184
185 node = attr.ib()
185 node = attr.ib()
186 p1 = attr.ib()
186 p1 = attr.ib()
187 p2 = attr.ib()
187 p2 = attr.ib()
188 btext = attr.ib()
188 btext = attr.ib()
189 textlen = attr.ib()
189 textlen = attr.ib()
190 cachedelta = attr.ib()
190 cachedelta = attr.ib()
191 flags = attr.ib()
191 flags = attr.ib()
192
192
193
193
194 @interfaceutil.implementer(repository.irevisiondelta)
194 @interfaceutil.implementer(repository.irevisiondelta)
195 @attr.s(slots=True)
195 @attr.s(slots=True)
196 class revlogrevisiondelta(object):
196 class revlogrevisiondelta(object):
197 node = attr.ib()
197 node = attr.ib()
198 p1node = attr.ib()
198 p1node = attr.ib()
199 p2node = attr.ib()
199 p2node = attr.ib()
200 basenode = attr.ib()
200 basenode = attr.ib()
201 flags = attr.ib()
201 flags = attr.ib()
202 baserevisionsize = attr.ib()
202 baserevisionsize = attr.ib()
203 revision = attr.ib()
203 revision = attr.ib()
204 delta = attr.ib()
204 delta = attr.ib()
205 sidedata = attr.ib()
205 sidedata = attr.ib()
206 protocol_flags = attr.ib()
206 protocol_flags = attr.ib()
207 linknode = attr.ib(default=None)
207 linknode = attr.ib(default=None)
208
208
209
209
210 @interfaceutil.implementer(repository.iverifyproblem)
210 @interfaceutil.implementer(repository.iverifyproblem)
211 @attr.s(frozen=True)
211 @attr.s(frozen=True)
212 class revlogproblem(object):
212 class revlogproblem(object):
213 warning = attr.ib(default=None)
213 warning = attr.ib(default=None)
214 error = attr.ib(default=None)
214 error = attr.ib(default=None)
215 node = attr.ib(default=None)
215 node = attr.ib(default=None)
216
216
217
217
218 def parse_index_v1(data, inline):
218 def parse_index_v1(data, inline):
219 # call the C implementation to parse the index data
219 # call the C implementation to parse the index data
220 index, cache = parsers.parse_index2(data, inline)
220 index, cache = parsers.parse_index2(data, inline)
221 return index, cache
221 return index, cache
222
222
223
223
224 def parse_index_v2(data, inline):
224 def parse_index_v2(data, inline):
225 # call the C implementation to parse the index data
225 # call the C implementation to parse the index data
226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
227 return index, cache
227 return index, cache
228
228
229
229
230 def parse_index_cl_v2(data, inline):
230 def parse_index_cl_v2(data, inline):
231 # call the C implementation to parse the index data
231 # call the C implementation to parse the index data
232 assert not inline
232 assert not inline
233 from .pure.parsers import parse_index_cl_v2
233 from .pure.parsers import parse_index_cl_v2
234
234
235 index, cache = parse_index_cl_v2(data)
235 index, cache = parse_index_cl_v2(data)
236 return index, cache
236 return index, cache
237
237
238
238
239 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
239 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
240
240
241 def parse_index_v1_nodemap(data, inline):
241 def parse_index_v1_nodemap(data, inline):
242 index, cache = parsers.parse_index_devel_nodemap(data, inline)
242 index, cache = parsers.parse_index_devel_nodemap(data, inline)
243 return index, cache
243 return index, cache
244
244
245
245
246 else:
246 else:
247 parse_index_v1_nodemap = None
247 parse_index_v1_nodemap = None
248
248
249
249
250 def parse_index_v1_mixed(data, inline):
250 def parse_index_v1_mixed(data, inline):
251 index, cache = parse_index_v1(data, inline)
251 index, cache = parse_index_v1(data, inline)
252 return rustrevlog.MixedIndex(index), cache
252 return rustrevlog.MixedIndex(index), cache
253
253
254
254
255 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
255 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
256 # signed integer)
256 # signed integer)
257 _maxentrysize = 0x7FFFFFFF
257 _maxentrysize = 0x7FFFFFFF
258
258
259
259
260 class revlog(object):
260 class revlog(object):
261 """
261 """
262 the underlying revision storage object
262 the underlying revision storage object
263
263
264 A revlog consists of two parts, an index and the revision data.
264 A revlog consists of two parts, an index and the revision data.
265
265
266 The index is a file with a fixed record size containing
266 The index is a file with a fixed record size containing
267 information on each revision, including its nodeid (hash), the
267 information on each revision, including its nodeid (hash), the
268 nodeids of its parents, the position and offset of its data within
268 nodeids of its parents, the position and offset of its data within
269 the data file, and the revision it's based on. Finally, each entry
269 the data file, and the revision it's based on. Finally, each entry
270 contains a linkrev entry that can serve as a pointer to external
270 contains a linkrev entry that can serve as a pointer to external
271 data.
271 data.
272
272
273 The revision data itself is a linear collection of data chunks.
273 The revision data itself is a linear collection of data chunks.
274 Each chunk represents a revision and is usually represented as a
274 Each chunk represents a revision and is usually represented as a
275 delta against the previous chunk. To bound lookup time, runs of
275 delta against the previous chunk. To bound lookup time, runs of
276 deltas are limited to about 2 times the length of the original
276 deltas are limited to about 2 times the length of the original
277 version data. This makes retrieval of a version proportional to
277 version data. This makes retrieval of a version proportional to
278 its size, or O(1) relative to the number of revisions.
278 its size, or O(1) relative to the number of revisions.
279
279
280 Both pieces of the revlog are written to in an append-only
280 Both pieces of the revlog are written to in an append-only
281 fashion, which means we never need to rewrite a file to insert or
281 fashion, which means we never need to rewrite a file to insert or
282 remove data, and can use some simple techniques to avoid the need
282 remove data, and can use some simple techniques to avoid the need
283 for locking while reading.
283 for locking while reading.
284
284
285 If checkambig, indexfile is opened with checkambig=True at
285 If checkambig, indexfile is opened with checkambig=True at
286 writing, to avoid file stat ambiguity.
286 writing, to avoid file stat ambiguity.
287
287
288 If mmaplargeindex is True, and an mmapindexthreshold is set, the
288 If mmaplargeindex is True, and an mmapindexthreshold is set, the
289 index will be mmapped rather than read if it is larger than the
289 index will be mmapped rather than read if it is larger than the
290 configured threshold.
290 configured threshold.
291
291
292 If censorable is True, the revlog can have censored revisions.
292 If censorable is True, the revlog can have censored revisions.
293
293
294 If `upperboundcomp` is not None, this is the expected maximal gain from
294 If `upperboundcomp` is not None, this is the expected maximal gain from
295 compression for the data content.
295 compression for the data content.
296
296
297 `concurrencychecker` is an optional function that receives 3 arguments: a
297 `concurrencychecker` is an optional function that receives 3 arguments: a
298 file handle, a filename, and an expected position. It should check whether
298 file handle, a filename, and an expected position. It should check whether
299 the current position in the file handle is valid, and log/warn/fail (by
299 the current position in the file handle is valid, and log/warn/fail (by
300 raising).
300 raising).
301
301
302
302
303 Internal details
303 Internal details
304 ----------------
304 ----------------
305
305
306 A large part of the revlog logic deals with revisions' "index entries", tuple
306 A large part of the revlog logic deals with revisions' "index entries", tuple
307 objects that contains the same "items" whatever the revlog version.
307 objects that contains the same "items" whatever the revlog version.
308 Different versions will have different ways of storing these items (sometimes
308 Different versions will have different ways of storing these items (sometimes
309 not having them at all), but the tuple will always be the same. New fields
309 not having them at all), but the tuple will always be the same. New fields
310 are usually added at the end to avoid breaking existing code that relies
310 are usually added at the end to avoid breaking existing code that relies
311 on the existing order. The field are defined as follows:
311 on the existing order. The field are defined as follows:
312
312
313 [0] offset:
313 [0] offset:
314 The byte index of the start of revision data chunk.
314 The byte index of the start of revision data chunk.
315 That value is shifted up by 16 bits. use "offset = field >> 16" to
315 That value is shifted up by 16 bits. use "offset = field >> 16" to
316 retrieve it.
316 retrieve it.
317
317
318 flags:
318 flags:
319 A flag field that carries special information or changes the behavior
319 A flag field that carries special information or changes the behavior
320 of the revision. (see `REVIDX_*` constants for details)
320 of the revision. (see `REVIDX_*` constants for details)
321 The flag field only occupies the first 16 bits of this field,
321 The flag field only occupies the first 16 bits of this field,
322 use "flags = field & 0xFFFF" to retrieve the value.
322 use "flags = field & 0xFFFF" to retrieve the value.
323
323
324 [1] compressed length:
324 [1] compressed length:
325 The size, in bytes, of the chunk on disk
325 The size, in bytes, of the chunk on disk
326
326
327 [2] uncompressed length:
327 [2] uncompressed length:
328 The size, in bytes, of the full revision once reconstructed.
328 The size, in bytes, of the full revision once reconstructed.
329
329
330 [3] base rev:
330 [3] base rev:
331 Either the base of the revision delta chain (without general
331 Either the base of the revision delta chain (without general
332 delta), or the base of the delta (stored in the data chunk)
332 delta), or the base of the delta (stored in the data chunk)
333 with general delta.
333 with general delta.
334
334
335 [4] link rev:
335 [4] link rev:
336 Changelog revision number of the changeset introducing this
336 Changelog revision number of the changeset introducing this
337 revision.
337 revision.
338
338
339 [5] parent 1 rev:
339 [5] parent 1 rev:
340 Revision number of the first parent
340 Revision number of the first parent
341
341
342 [6] parent 2 rev:
342 [6] parent 2 rev:
343 Revision number of the second parent
343 Revision number of the second parent
344
344
345 [7] node id:
345 [7] node id:
346 The node id of the current revision
346 The node id of the current revision
347
347
348 [8] sidedata offset:
348 [8] sidedata offset:
349 The byte index of the start of the revision's side-data chunk.
349 The byte index of the start of the revision's side-data chunk.
350
350
351 [9] sidedata chunk length:
351 [9] sidedata chunk length:
352 The size, in bytes, of the revision's side-data chunk.
352 The size, in bytes, of the revision's side-data chunk.
353
353
354 [10] data compression mode:
354 [10] data compression mode:
355 two bits that detail the way the data chunk is compressed on disk.
355 two bits that detail the way the data chunk is compressed on disk.
356 (see "COMP_MODE_*" constants for details). For revlog version 0 and
356 (see "COMP_MODE_*" constants for details). For revlog version 0 and
357 1 this will always be COMP_MODE_INLINE.
357 1 this will always be COMP_MODE_INLINE.
358
358
359 [11] side-data compression mode:
359 [11] side-data compression mode:
360 two bits that detail the way the sidedata chunk is compressed on disk.
360 two bits that detail the way the sidedata chunk is compressed on disk.
361 (see "COMP_MODE_*" constants for details)
361 (see "COMP_MODE_*" constants for details)
362 """
362 """
363
363
364 _flagserrorclass = error.RevlogError
364 _flagserrorclass = error.RevlogError
365
365
366 def __init__(
366 def __init__(
367 self,
367 self,
368 opener,
368 opener,
369 target,
369 target,
370 radix,
370 radix,
371 postfix=None, # only exist for `tmpcensored` now
371 postfix=None, # only exist for `tmpcensored` now
372 checkambig=False,
372 checkambig=False,
373 mmaplargeindex=False,
373 mmaplargeindex=False,
374 censorable=False,
374 censorable=False,
375 upperboundcomp=None,
375 upperboundcomp=None,
376 persistentnodemap=False,
376 persistentnodemap=False,
377 concurrencychecker=None,
377 concurrencychecker=None,
378 trypending=False,
378 trypending=False,
379 ):
379 ):
380 """
380 """
381 create a revlog object
381 create a revlog object
382
382
383 opener is a function that abstracts the file opening operation
383 opener is a function that abstracts the file opening operation
384 and can be used to implement COW semantics or the like.
384 and can be used to implement COW semantics or the like.
385
385
386 `target`: a (KIND, ID) tuple that identify the content stored in
386 `target`: a (KIND, ID) tuple that identify the content stored in
387 this revlog. It help the rest of the code to understand what the revlog
387 this revlog. It help the rest of the code to understand what the revlog
388 is about without having to resort to heuristic and index filename
388 is about without having to resort to heuristic and index filename
389 analysis. Note: that this must be reliably be set by normal code, but
389 analysis. Note: that this must be reliably be set by normal code, but
390 that test, debug, or performance measurement code might not set this to
390 that test, debug, or performance measurement code might not set this to
391 accurate value.
391 accurate value.
392 """
392 """
393 self.upperboundcomp = upperboundcomp
393 self.upperboundcomp = upperboundcomp
394
394
395 self.radix = radix
395 self.radix = radix
396
396
397 self._docket_file = None
397 self._docket_file = None
398 self._indexfile = None
398 self._indexfile = None
399 self._datafile = None
399 self._datafile = None
400 self._nodemap_file = None
400 self._nodemap_file = None
401 self.postfix = postfix
401 self.postfix = postfix
402 self._trypending = trypending
402 self._trypending = trypending
403 self.opener = opener
403 self.opener = opener
404 if persistentnodemap:
404 if persistentnodemap:
405 self._nodemap_file = nodemaputil.get_nodemap_file(self)
405 self._nodemap_file = nodemaputil.get_nodemap_file(self)
406
406
407 assert target[0] in ALL_KINDS
407 assert target[0] in ALL_KINDS
408 assert len(target) == 2
408 assert len(target) == 2
409 self.target = target
409 self.target = target
410 # When True, indexfile is opened with checkambig=True at writing, to
410 # When True, indexfile is opened with checkambig=True at writing, to
411 # avoid file stat ambiguity.
411 # avoid file stat ambiguity.
412 self._checkambig = checkambig
412 self._checkambig = checkambig
413 self._mmaplargeindex = mmaplargeindex
413 self._mmaplargeindex = mmaplargeindex
414 self._censorable = censorable
414 self._censorable = censorable
415 # 3-tuple of (node, rev, text) for a raw revision.
415 # 3-tuple of (node, rev, text) for a raw revision.
416 self._revisioncache = None
416 self._revisioncache = None
417 # Maps rev to chain base rev.
417 # Maps rev to chain base rev.
418 self._chainbasecache = util.lrucachedict(100)
418 self._chainbasecache = util.lrucachedict(100)
419 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
419 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
420 self._chunkcache = (0, b'')
420 self._chunkcache = (0, b'')
421 # How much data to read and cache into the raw revlog data cache.
421 # How much data to read and cache into the raw revlog data cache.
422 self._chunkcachesize = 65536
422 self._chunkcachesize = 65536
423 self._maxchainlen = None
423 self._maxchainlen = None
424 self._deltabothparents = True
424 self._deltabothparents = True
425 self.index = None
425 self.index = None
426 self._docket = None
426 self._docket = None
427 self._nodemap_docket = None
427 self._nodemap_docket = None
428 # Mapping of partial identifiers to full nodes.
428 # Mapping of partial identifiers to full nodes.
429 self._pcache = {}
429 self._pcache = {}
430 # Mapping of revision integer to full node.
430 # Mapping of revision integer to full node.
431 self._compengine = b'zlib'
431 self._compengine = b'zlib'
432 self._compengineopts = {}
432 self._compengineopts = {}
433 self._maxdeltachainspan = -1
433 self._maxdeltachainspan = -1
434 self._withsparseread = False
434 self._withsparseread = False
435 self._sparserevlog = False
435 self._sparserevlog = False
436 self.hassidedata = False
436 self.hassidedata = False
437 self._srdensitythreshold = 0.50
437 self._srdensitythreshold = 0.50
438 self._srmingapsize = 262144
438 self._srmingapsize = 262144
439
439
440 # Make copy of flag processors so each revlog instance can support
440 # Make copy of flag processors so each revlog instance can support
441 # custom flags.
441 # custom flags.
442 self._flagprocessors = dict(flagutil.flagprocessors)
442 self._flagprocessors = dict(flagutil.flagprocessors)
443
443
444 # 2-tuple of file handles being used for active writing.
444 # 2-tuple of file handles being used for active writing.
445 self._writinghandles = None
445 self._writinghandles = None
446 # prevent nesting of addgroup
446 # prevent nesting of addgroup
447 self._adding_group = None
447 self._adding_group = None
448
448
449 self._loadindex()
449 self._loadindex()
450
450
451 self._concurrencychecker = concurrencychecker
451 self._concurrencychecker = concurrencychecker
452
452
453 def _init_opts(self):
453 def _init_opts(self):
454 """process options (from above/config) to setup associated default revlog mode
454 """process options (from above/config) to setup associated default revlog mode
455
455
456 These values might be affected when actually reading on disk information.
456 These values might be affected when actually reading on disk information.
457
457
458 The relevant values are returned for use in _loadindex().
458 The relevant values are returned for use in _loadindex().
459
459
460 * newversionflags:
460 * newversionflags:
461 version header to use if we need to create a new revlog
461 version header to use if we need to create a new revlog
462
462
463 * mmapindexthreshold:
463 * mmapindexthreshold:
464 minimal index size for start to use mmap
464 minimal index size for start to use mmap
465
465
466 * force_nodemap:
466 * force_nodemap:
467 force the usage of a "development" version of the nodemap code
467 force the usage of a "development" version of the nodemap code
468 """
468 """
469 mmapindexthreshold = None
469 mmapindexthreshold = None
470 opts = self.opener.options
470 opts = self.opener.options
471
471
472 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
472 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
473 new_header = CHANGELOGV2
473 new_header = CHANGELOGV2
474 elif b'revlogv2' in opts:
474 elif b'revlogv2' in opts:
475 new_header = REVLOGV2
475 new_header = REVLOGV2
476 elif b'revlogv1' in opts:
476 elif b'revlogv1' in opts:
477 new_header = REVLOGV1 | FLAG_INLINE_DATA
477 new_header = REVLOGV1 | FLAG_INLINE_DATA
478 if b'generaldelta' in opts:
478 if b'generaldelta' in opts:
479 new_header |= FLAG_GENERALDELTA
479 new_header |= FLAG_GENERALDELTA
480 elif b'revlogv0' in self.opener.options:
480 elif b'revlogv0' in self.opener.options:
481 new_header = REVLOGV0
481 new_header = REVLOGV0
482 else:
482 else:
483 new_header = REVLOG_DEFAULT_VERSION
483 new_header = REVLOG_DEFAULT_VERSION
484
484
485 if b'chunkcachesize' in opts:
485 if b'chunkcachesize' in opts:
486 self._chunkcachesize = opts[b'chunkcachesize']
486 self._chunkcachesize = opts[b'chunkcachesize']
487 if b'maxchainlen' in opts:
487 if b'maxchainlen' in opts:
488 self._maxchainlen = opts[b'maxchainlen']
488 self._maxchainlen = opts[b'maxchainlen']
489 if b'deltabothparents' in opts:
489 if b'deltabothparents' in opts:
490 self._deltabothparents = opts[b'deltabothparents']
490 self._deltabothparents = opts[b'deltabothparents']
491 self._lazydelta = bool(opts.get(b'lazydelta', True))
491 self._lazydelta = bool(opts.get(b'lazydelta', True))
492 self._lazydeltabase = False
492 self._lazydeltabase = False
493 if self._lazydelta:
493 if self._lazydelta:
494 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
494 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
495 if b'compengine' in opts:
495 if b'compengine' in opts:
496 self._compengine = opts[b'compengine']
496 self._compengine = opts[b'compengine']
497 if b'zlib.level' in opts:
497 if b'zlib.level' in opts:
498 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
498 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
499 if b'zstd.level' in opts:
499 if b'zstd.level' in opts:
500 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
500 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
501 if b'maxdeltachainspan' in opts:
501 if b'maxdeltachainspan' in opts:
502 self._maxdeltachainspan = opts[b'maxdeltachainspan']
502 self._maxdeltachainspan = opts[b'maxdeltachainspan']
503 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
503 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
504 mmapindexthreshold = opts[b'mmapindexthreshold']
504 mmapindexthreshold = opts[b'mmapindexthreshold']
505 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
505 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
506 withsparseread = bool(opts.get(b'with-sparse-read', False))
506 withsparseread = bool(opts.get(b'with-sparse-read', False))
507 # sparse-revlog forces sparse-read
507 # sparse-revlog forces sparse-read
508 self._withsparseread = self._sparserevlog or withsparseread
508 self._withsparseread = self._sparserevlog or withsparseread
509 if b'sparse-read-density-threshold' in opts:
509 if b'sparse-read-density-threshold' in opts:
510 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
510 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
511 if b'sparse-read-min-gap-size' in opts:
511 if b'sparse-read-min-gap-size' in opts:
512 self._srmingapsize = opts[b'sparse-read-min-gap-size']
512 self._srmingapsize = opts[b'sparse-read-min-gap-size']
513 if opts.get(b'enableellipsis'):
513 if opts.get(b'enableellipsis'):
514 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
514 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
515
515
516 # revlog v0 doesn't have flag processors
516 # revlog v0 doesn't have flag processors
517 for flag, processor in pycompat.iteritems(
517 for flag, processor in pycompat.iteritems(
518 opts.get(b'flagprocessors', {})
518 opts.get(b'flagprocessors', {})
519 ):
519 ):
520 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
520 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
521
521
522 if self._chunkcachesize <= 0:
522 if self._chunkcachesize <= 0:
523 raise error.RevlogError(
523 raise error.RevlogError(
524 _(b'revlog chunk cache size %r is not greater than 0')
524 _(b'revlog chunk cache size %r is not greater than 0')
525 % self._chunkcachesize
525 % self._chunkcachesize
526 )
526 )
527 elif self._chunkcachesize & (self._chunkcachesize - 1):
527 elif self._chunkcachesize & (self._chunkcachesize - 1):
528 raise error.RevlogError(
528 raise error.RevlogError(
529 _(b'revlog chunk cache size %r is not a power of 2')
529 _(b'revlog chunk cache size %r is not a power of 2')
530 % self._chunkcachesize
530 % self._chunkcachesize
531 )
531 )
532 force_nodemap = opts.get(b'devel-force-nodemap', False)
532 force_nodemap = opts.get(b'devel-force-nodemap', False)
533 return new_header, mmapindexthreshold, force_nodemap
533 return new_header, mmapindexthreshold, force_nodemap
534
534
535 def _get_data(self, filepath, mmap_threshold, size=None):
535 def _get_data(self, filepath, mmap_threshold, size=None):
536 """return a file content with or without mmap
536 """return a file content with or without mmap
537
537
538 If the file is missing return the empty string"""
538 If the file is missing return the empty string"""
539 try:
539 try:
540 with self.opener(filepath) as fp:
540 with self.opener(filepath) as fp:
541 if mmap_threshold is not None:
541 if mmap_threshold is not None:
542 file_size = self.opener.fstat(fp).st_size
542 file_size = self.opener.fstat(fp).st_size
543 if file_size >= mmap_threshold:
543 if file_size >= mmap_threshold:
544 if size is not None:
544 if size is not None:
545 # avoid potentiel mmap crash
545 # avoid potentiel mmap crash
546 size = min(file_size, size)
546 size = min(file_size, size)
547 # TODO: should .close() to release resources without
547 # TODO: should .close() to release resources without
548 # relying on Python GC
548 # relying on Python GC
549 if size is None:
549 if size is None:
550 return util.buffer(util.mmapread(fp))
550 return util.buffer(util.mmapread(fp))
551 else:
551 else:
552 return util.buffer(util.mmapread(fp, size))
552 return util.buffer(util.mmapread(fp, size))
553 if size is None:
553 if size is None:
554 return fp.read()
554 return fp.read()
555 else:
555 else:
556 return fp.read(size)
556 return fp.read(size)
557 except IOError as inst:
557 except IOError as inst:
558 if inst.errno != errno.ENOENT:
558 if inst.errno != errno.ENOENT:
559 raise
559 raise
560 return b''
560 return b''
561
561
562 def _loadindex(self):
562 def _loadindex(self):
563
563
564 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
564 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
565
565
566 if self.postfix is not None:
566 if self.postfix is not None:
567 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
567 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
568 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
568 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
569 entry_point = b'%s.i.a' % self.radix
569 entry_point = b'%s.i.a' % self.radix
570 else:
570 else:
571 entry_point = b'%s.i' % self.radix
571 entry_point = b'%s.i' % self.radix
572
572
573 entry_data = b''
573 entry_data = b''
574 self._initempty = True
574 self._initempty = True
575 entry_data = self._get_data(entry_point, mmapindexthreshold)
575 entry_data = self._get_data(entry_point, mmapindexthreshold)
576 if len(entry_data) > 0:
576 if len(entry_data) > 0:
577 header = INDEX_HEADER.unpack(entry_data[:4])[0]
577 header = INDEX_HEADER.unpack(entry_data[:4])[0]
578 self._initempty = False
578 self._initempty = False
579 else:
579 else:
580 header = new_header
580 header = new_header
581
581
582 self._format_flags = header & ~0xFFFF
582 self._format_flags = header & ~0xFFFF
583 self._format_version = header & 0xFFFF
583 self._format_version = header & 0xFFFF
584
584
585 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
585 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
586 if supported_flags is None:
586 if supported_flags is None:
587 msg = _(b'unknown version (%d) in revlog %s')
587 msg = _(b'unknown version (%d) in revlog %s')
588 msg %= (self._format_version, self.display_id)
588 msg %= (self._format_version, self.display_id)
589 raise error.RevlogError(msg)
589 raise error.RevlogError(msg)
590 elif self._format_flags & ~supported_flags:
590 elif self._format_flags & ~supported_flags:
591 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
591 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
592 display_flag = self._format_flags >> 16
592 display_flag = self._format_flags >> 16
593 msg %= (display_flag, self._format_version, self.display_id)
593 msg %= (display_flag, self._format_version, self.display_id)
594 raise error.RevlogError(msg)
594 raise error.RevlogError(msg)
595
595
596 features = FEATURES_BY_VERSION[self._format_version]
596 features = FEATURES_BY_VERSION[self._format_version]
597 self._inline = features[b'inline'](self._format_flags)
597 self._inline = features[b'inline'](self._format_flags)
598 self._generaldelta = features[b'generaldelta'](self._format_flags)
598 self._generaldelta = features[b'generaldelta'](self._format_flags)
599 self.hassidedata = features[b'sidedata']
599 self.hassidedata = features[b'sidedata']
600
600
601 if not features[b'docket']:
601 if not features[b'docket']:
602 self._indexfile = entry_point
602 self._indexfile = entry_point
603 index_data = entry_data
603 index_data = entry_data
604 else:
604 else:
605 self._docket_file = entry_point
605 self._docket_file = entry_point
606 if self._initempty:
606 if self._initempty:
607 self._docket = docketutil.default_docket(self, header)
607 self._docket = docketutil.default_docket(self, header)
608 else:
608 else:
609 self._docket = docketutil.parse_docket(
609 self._docket = docketutil.parse_docket(
610 self, entry_data, use_pending=self._trypending
610 self, entry_data, use_pending=self._trypending
611 )
611 )
612 self._indexfile = self._docket.index_filepath()
612 self._indexfile = self._docket.index_filepath()
613 index_data = b''
613 index_data = b''
614 index_size = self._docket.index_end
614 index_size = self._docket.index_end
615 if index_size > 0:
615 if index_size > 0:
616 index_data = self._get_data(
616 index_data = self._get_data(
617 self._indexfile, mmapindexthreshold, size=index_size
617 self._indexfile, mmapindexthreshold, size=index_size
618 )
618 )
619 if len(index_data) < index_size:
619 if len(index_data) < index_size:
620 msg = _(b'too few index data for %s: got %d, expected %d')
620 msg = _(b'too few index data for %s: got %d, expected %d')
621 msg %= (self.display_id, len(index_data), index_size)
621 msg %= (self.display_id, len(index_data), index_size)
622 raise error.RevlogError(msg)
622 raise error.RevlogError(msg)
623
623
624 self._inline = False
624 self._inline = False
625 # generaldelta implied by version 2 revlogs.
625 # generaldelta implied by version 2 revlogs.
626 self._generaldelta = True
626 self._generaldelta = True
627 # the logic for persistent nodemap will be dealt with within the
627 # the logic for persistent nodemap will be dealt with within the
628 # main docket, so disable it for now.
628 # main docket, so disable it for now.
629 self._nodemap_file = None
629 self._nodemap_file = None
630
630
631 if self._docket is not None:
631 if self._docket is not None:
632 self._datafile = self._docket.data_filepath()
632 self._datafile = self._docket.data_filepath()
633 elif self.postfix is None:
633 elif self.postfix is None:
634 self._datafile = b'%s.d' % self.radix
634 self._datafile = b'%s.d' % self.radix
635 else:
635 else:
636 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
636 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
637
637
638 self.nodeconstants = sha1nodeconstants
638 self.nodeconstants = sha1nodeconstants
639 self.nullid = self.nodeconstants.nullid
639 self.nullid = self.nodeconstants.nullid
640
640
641 # sparse-revlog can't be on without general-delta (issue6056)
641 # sparse-revlog can't be on without general-delta (issue6056)
642 if not self._generaldelta:
642 if not self._generaldelta:
643 self._sparserevlog = False
643 self._sparserevlog = False
644
644
645 self._storedeltachains = True
645 self._storedeltachains = True
646
646
647 devel_nodemap = (
647 devel_nodemap = (
648 self._nodemap_file
648 self._nodemap_file
649 and force_nodemap
649 and force_nodemap
650 and parse_index_v1_nodemap is not None
650 and parse_index_v1_nodemap is not None
651 )
651 )
652
652
653 use_rust_index = False
653 use_rust_index = False
654 if rustrevlog is not None:
654 if rustrevlog is not None:
655 if self._nodemap_file is not None:
655 if self._nodemap_file is not None:
656 use_rust_index = True
656 use_rust_index = True
657 else:
657 else:
658 use_rust_index = self.opener.options.get(b'rust.index')
658 use_rust_index = self.opener.options.get(b'rust.index')
659
659
660 self._parse_index = parse_index_v1
660 self._parse_index = parse_index_v1
661 if self._format_version == REVLOGV0:
661 if self._format_version == REVLOGV0:
662 self._parse_index = revlogv0.parse_index_v0
662 self._parse_index = revlogv0.parse_index_v0
663 elif self._format_version == REVLOGV2:
663 elif self._format_version == REVLOGV2:
664 self._parse_index = parse_index_v2
664 self._parse_index = parse_index_v2
665 elif self._format_version == CHANGELOGV2:
665 elif self._format_version == CHANGELOGV2:
666 self._parse_index = parse_index_cl_v2
666 self._parse_index = parse_index_cl_v2
667 elif devel_nodemap:
667 elif devel_nodemap:
668 self._parse_index = parse_index_v1_nodemap
668 self._parse_index = parse_index_v1_nodemap
669 elif use_rust_index:
669 elif use_rust_index:
670 self._parse_index = parse_index_v1_mixed
670 self._parse_index = parse_index_v1_mixed
671 try:
671 try:
672 d = self._parse_index(index_data, self._inline)
672 d = self._parse_index(index_data, self._inline)
673 index, _chunkcache = d
673 index, _chunkcache = d
674 use_nodemap = (
674 use_nodemap = (
675 not self._inline
675 not self._inline
676 and self._nodemap_file is not None
676 and self._nodemap_file is not None
677 and util.safehasattr(index, 'update_nodemap_data')
677 and util.safehasattr(index, 'update_nodemap_data')
678 )
678 )
679 if use_nodemap:
679 if use_nodemap:
680 nodemap_data = nodemaputil.persisted_data(self)
680 nodemap_data = nodemaputil.persisted_data(self)
681 if nodemap_data is not None:
681 if nodemap_data is not None:
682 docket = nodemap_data[0]
682 docket = nodemap_data[0]
683 if (
683 if (
684 len(d[0]) > docket.tip_rev
684 len(d[0]) > docket.tip_rev
685 and d[0][docket.tip_rev][7] == docket.tip_node
685 and d[0][docket.tip_rev][7] == docket.tip_node
686 ):
686 ):
687 # no changelog tampering
687 # no changelog tampering
688 self._nodemap_docket = docket
688 self._nodemap_docket = docket
689 index.update_nodemap_data(*nodemap_data)
689 index.update_nodemap_data(*nodemap_data)
690 except (ValueError, IndexError):
690 except (ValueError, IndexError):
691 raise error.RevlogError(
691 raise error.RevlogError(
692 _(b"index %s is corrupted") % self.display_id
692 _(b"index %s is corrupted") % self.display_id
693 )
693 )
694 self.index, self._chunkcache = d
694 self.index, self._chunkcache = d
695 if not self._chunkcache:
695 if not self._chunkcache:
696 self._chunkclear()
696 self._chunkclear()
697 # revnum -> (chain-length, sum-delta-length)
697 # revnum -> (chain-length, sum-delta-length)
698 self._chaininfocache = util.lrucachedict(500)
698 self._chaininfocache = util.lrucachedict(500)
699 # revlog header -> revlog compressor
699 # revlog header -> revlog compressor
700 self._decompressors = {}
700 self._decompressors = {}
701
701
702 @util.propertycache
702 @util.propertycache
703 def revlog_kind(self):
703 def revlog_kind(self):
704 return self.target[0]
704 return self.target[0]
705
705
706 @util.propertycache
706 @util.propertycache
707 def display_id(self):
707 def display_id(self):
708 """The public facing "ID" of the revlog that we use in message"""
708 """The public facing "ID" of the revlog that we use in message"""
709 # Maybe we should build a user facing representation of
709 # Maybe we should build a user facing representation of
710 # revlog.target instead of using `self.radix`
710 # revlog.target instead of using `self.radix`
711 return self.radix
711 return self.radix
712
712
713 def _get_decompressor(self, t):
713 def _get_decompressor(self, t):
714 try:
714 try:
715 compressor = self._decompressors[t]
715 compressor = self._decompressors[t]
716 except KeyError:
716 except KeyError:
717 try:
717 try:
718 engine = util.compengines.forrevlogheader(t)
718 engine = util.compengines.forrevlogheader(t)
719 compressor = engine.revlogcompressor(self._compengineopts)
719 compressor = engine.revlogcompressor(self._compengineopts)
720 self._decompressors[t] = compressor
720 self._decompressors[t] = compressor
721 except KeyError:
721 except KeyError:
722 raise error.RevlogError(
722 raise error.RevlogError(
723 _(b'unknown compression type %s') % binascii.hexlify(t)
723 _(b'unknown compression type %s') % binascii.hexlify(t)
724 )
724 )
725 return compressor
725 return compressor
726
726
727 @util.propertycache
727 @util.propertycache
728 def _compressor(self):
728 def _compressor(self):
729 engine = util.compengines[self._compengine]
729 engine = util.compengines[self._compengine]
730 return engine.revlogcompressor(self._compengineopts)
730 return engine.revlogcompressor(self._compengineopts)
731
731
732 @util.propertycache
732 @util.propertycache
733 def _decompressor(self):
733 def _decompressor(self):
734 """the default decompressor"""
734 """the default decompressor"""
735 if self._docket is None:
735 if self._docket is None:
736 return None
736 return None
737 t = self._docket.default_compression_header
737 t = self._docket.default_compression_header
738 c = self._get_decompressor(t)
738 c = self._get_decompressor(t)
739 return c.decompress
739 return c.decompress
740
740
741 def _indexfp(self):
741 def _indexfp(self):
742 """file object for the revlog's index file"""
742 """file object for the revlog's index file"""
743 return self.opener(self._indexfile, mode=b"r")
743 return self.opener(self._indexfile, mode=b"r")
744
744
745 def __index_write_fp(self):
745 def __index_write_fp(self):
746 # You should not use this directly and use `_writing` instead
746 # You should not use this directly and use `_writing` instead
747 try:
747 try:
748 f = self.opener(
748 f = self.opener(
749 self._indexfile, mode=b"r+", checkambig=self._checkambig
749 self._indexfile, mode=b"r+", checkambig=self._checkambig
750 )
750 )
751 if self._docket is None:
751 if self._docket is None:
752 f.seek(0, os.SEEK_END)
752 f.seek(0, os.SEEK_END)
753 else:
753 else:
754 f.seek(self._docket.index_end, os.SEEK_SET)
754 f.seek(self._docket.index_end, os.SEEK_SET)
755 return f
755 return f
756 except IOError as inst:
756 except IOError as inst:
757 if inst.errno != errno.ENOENT:
757 if inst.errno != errno.ENOENT:
758 raise
758 raise
759 return self.opener(
759 return self.opener(
760 self._indexfile, mode=b"w+", checkambig=self._checkambig
760 self._indexfile, mode=b"w+", checkambig=self._checkambig
761 )
761 )
762
762
763 def __index_new_fp(self):
763 def __index_new_fp(self):
764 # You should not use this unless you are upgrading from inline revlog
764 # You should not use this unless you are upgrading from inline revlog
765 return self.opener(
765 return self.opener(
766 self._indexfile,
766 self._indexfile,
767 mode=b"w",
767 mode=b"w",
768 checkambig=self._checkambig,
768 checkambig=self._checkambig,
769 atomictemp=True,
769 atomictemp=True,
770 )
770 )
771
771
772 def _datafp(self, mode=b'r'):
772 def _datafp(self, mode=b'r'):
773 """file object for the revlog's data file"""
773 """file object for the revlog's data file"""
774 return self.opener(self._datafile, mode=mode)
774 return self.opener(self._datafile, mode=mode)
775
775
776 @contextlib.contextmanager
776 @contextlib.contextmanager
777 def _datareadfp(self, existingfp=None):
777 def _datareadfp(self, existingfp=None):
778 """file object suitable to read data"""
778 """file object suitable to read data"""
779 # Use explicit file handle, if given.
779 # Use explicit file handle, if given.
780 if existingfp is not None:
780 if existingfp is not None:
781 yield existingfp
781 yield existingfp
782
782
783 # Use a file handle being actively used for writes, if available.
783 # Use a file handle being actively used for writes, if available.
784 # There is some danger to doing this because reads will seek the
784 # There is some danger to doing this because reads will seek the
785 # file. However, _writeentry() performs a SEEK_END before all writes,
785 # file. However, _writeentry() performs a SEEK_END before all writes,
786 # so we should be safe.
786 # so we should be safe.
787 elif self._writinghandles:
787 elif self._writinghandles:
788 if self._inline:
788 if self._inline:
789 yield self._writinghandles[0]
789 yield self._writinghandles[0]
790 else:
790 else:
791 yield self._writinghandles[1]
791 yield self._writinghandles[1]
792
792
793 # Otherwise open a new file handle.
793 # Otherwise open a new file handle.
794 else:
794 else:
795 if self._inline:
795 if self._inline:
796 func = self._indexfp
796 func = self._indexfp
797 else:
797 else:
798 func = self._datafp
798 func = self._datafp
799 with func() as fp:
799 with func() as fp:
800 yield fp
800 yield fp
801
801
802 def tiprev(self):
802 def tiprev(self):
803 return len(self.index) - 1
803 return len(self.index) - 1
804
804
805 def tip(self):
805 def tip(self):
806 return self.node(self.tiprev())
806 return self.node(self.tiprev())
807
807
808 def __contains__(self, rev):
808 def __contains__(self, rev):
809 return 0 <= rev < len(self)
809 return 0 <= rev < len(self)
810
810
811 def __len__(self):
811 def __len__(self):
812 return len(self.index)
812 return len(self.index)
813
813
814 def __iter__(self):
814 def __iter__(self):
815 return iter(pycompat.xrange(len(self)))
815 return iter(pycompat.xrange(len(self)))
816
816
817 def revs(self, start=0, stop=None):
817 def revs(self, start=0, stop=None):
818 """iterate over all rev in this revlog (from start to stop)"""
818 """iterate over all rev in this revlog (from start to stop)"""
819 return storageutil.iterrevs(len(self), start=start, stop=stop)
819 return storageutil.iterrevs(len(self), start=start, stop=stop)
820
820
821 @property
821 @property
822 def nodemap(self):
822 def nodemap(self):
823 msg = (
823 msg = (
824 b"revlog.nodemap is deprecated, "
824 b"revlog.nodemap is deprecated, "
825 b"use revlog.index.[has_node|rev|get_rev]"
825 b"use revlog.index.[has_node|rev|get_rev]"
826 )
826 )
827 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
827 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
828 return self.index.nodemap
828 return self.index.nodemap
829
829
830 @property
830 @property
831 def _nodecache(self):
831 def _nodecache(self):
832 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
832 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
833 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
833 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
834 return self.index.nodemap
834 return self.index.nodemap
835
835
836 def hasnode(self, node):
836 def hasnode(self, node):
837 try:
837 try:
838 self.rev(node)
838 self.rev(node)
839 return True
839 return True
840 except KeyError:
840 except KeyError:
841 return False
841 return False
842
842
843 def candelta(self, baserev, rev):
843 def candelta(self, baserev, rev):
844 """whether two revisions (baserev, rev) can be delta-ed or not"""
844 """whether two revisions (baserev, rev) can be delta-ed or not"""
845 # Disable delta if either rev requires a content-changing flag
845 # Disable delta if either rev requires a content-changing flag
846 # processor (ex. LFS). This is because such flag processor can alter
846 # processor (ex. LFS). This is because such flag processor can alter
847 # the rawtext content that the delta will be based on, and two clients
847 # the rawtext content that the delta will be based on, and two clients
848 # could have a same revlog node with different flags (i.e. different
848 # could have a same revlog node with different flags (i.e. different
849 # rawtext contents) and the delta could be incompatible.
849 # rawtext contents) and the delta could be incompatible.
850 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
850 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
851 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
851 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
852 ):
852 ):
853 return False
853 return False
854 return True
854 return True
855
855
856 def update_caches(self, transaction):
856 def update_caches(self, transaction):
857 if self._nodemap_file is not None:
857 if self._nodemap_file is not None:
858 if transaction is None:
858 if transaction is None:
859 nodemaputil.update_persistent_nodemap(self)
859 nodemaputil.update_persistent_nodemap(self)
860 else:
860 else:
861 nodemaputil.setup_persistent_nodemap(transaction, self)
861 nodemaputil.setup_persistent_nodemap(transaction, self)
862
862
863 def clearcaches(self):
863 def clearcaches(self):
864 self._revisioncache = None
864 self._revisioncache = None
865 self._chainbasecache.clear()
865 self._chainbasecache.clear()
866 self._chunkcache = (0, b'')
866 self._chunkcache = (0, b'')
867 self._pcache = {}
867 self._pcache = {}
868 self._nodemap_docket = None
868 self._nodemap_docket = None
869 self.index.clearcaches()
869 self.index.clearcaches()
870 # The python code is the one responsible for validating the docket, we
870 # The python code is the one responsible for validating the docket, we
871 # end up having to refresh it here.
871 # end up having to refresh it here.
872 use_nodemap = (
872 use_nodemap = (
873 not self._inline
873 not self._inline
874 and self._nodemap_file is not None
874 and self._nodemap_file is not None
875 and util.safehasattr(self.index, 'update_nodemap_data')
875 and util.safehasattr(self.index, 'update_nodemap_data')
876 )
876 )
877 if use_nodemap:
877 if use_nodemap:
878 nodemap_data = nodemaputil.persisted_data(self)
878 nodemap_data = nodemaputil.persisted_data(self)
879 if nodemap_data is not None:
879 if nodemap_data is not None:
880 self._nodemap_docket = nodemap_data[0]
880 self._nodemap_docket = nodemap_data[0]
881 self.index.update_nodemap_data(*nodemap_data)
881 self.index.update_nodemap_data(*nodemap_data)
882
882
883 def rev(self, node):
883 def rev(self, node):
884 try:
884 try:
885 return self.index.rev(node)
885 return self.index.rev(node)
886 except TypeError:
886 except TypeError:
887 raise
887 raise
888 except error.RevlogError:
888 except error.RevlogError:
889 # parsers.c radix tree lookup failed
889 # parsers.c radix tree lookup failed
890 if (
890 if (
891 node == self.nodeconstants.wdirid
891 node == self.nodeconstants.wdirid
892 or node in self.nodeconstants.wdirfilenodeids
892 or node in self.nodeconstants.wdirfilenodeids
893 ):
893 ):
894 raise error.WdirUnsupported
894 raise error.WdirUnsupported
895 raise error.LookupError(node, self.display_id, _(b'no node'))
895 raise error.LookupError(node, self.display_id, _(b'no node'))
896
896
897 # Accessors for index entries.
897 # Accessors for index entries.
898
898
899 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
899 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
900 # are flags.
900 # are flags.
901 def start(self, rev):
901 def start(self, rev):
902 return int(self.index[rev][0] >> 16)
902 return int(self.index[rev][0] >> 16)
903
903
904 def flags(self, rev):
904 def flags(self, rev):
905 return self.index[rev][0] & 0xFFFF
905 return self.index[rev][0] & 0xFFFF
906
906
907 def length(self, rev):
907 def length(self, rev):
908 return self.index[rev][1]
908 return self.index[rev][1]
909
909
910 def sidedata_length(self, rev):
910 def sidedata_length(self, rev):
911 if not self.hassidedata:
911 if not self.hassidedata:
912 return 0
912 return 0
913 return self.index[rev][9]
913 return self.index[rev][9]
914
914
915 def rawsize(self, rev):
915 def rawsize(self, rev):
916 """return the length of the uncompressed text for a given revision"""
916 """return the length of the uncompressed text for a given revision"""
917 l = self.index[rev][2]
917 l = self.index[rev][2]
918 if l >= 0:
918 if l >= 0:
919 return l
919 return l
920
920
921 t = self.rawdata(rev)
921 t = self.rawdata(rev)
922 return len(t)
922 return len(t)
923
923
924 def size(self, rev):
924 def size(self, rev):
925 """length of non-raw text (processed by a "read" flag processor)"""
925 """length of non-raw text (processed by a "read" flag processor)"""
926 # fast path: if no "read" flag processor could change the content,
926 # fast path: if no "read" flag processor could change the content,
927 # size is rawsize. note: ELLIPSIS is known to not change the content.
927 # size is rawsize. note: ELLIPSIS is known to not change the content.
928 flags = self.flags(rev)
928 flags = self.flags(rev)
929 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
929 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
930 return self.rawsize(rev)
930 return self.rawsize(rev)
931
931
932 return len(self.revision(rev, raw=False))
932 return len(self.revision(rev, raw=False))
933
933
934 def chainbase(self, rev):
934 def chainbase(self, rev):
935 base = self._chainbasecache.get(rev)
935 base = self._chainbasecache.get(rev)
936 if base is not None:
936 if base is not None:
937 return base
937 return base
938
938
939 index = self.index
939 index = self.index
940 iterrev = rev
940 iterrev = rev
941 base = index[iterrev][3]
941 base = index[iterrev][3]
942 while base != iterrev:
942 while base != iterrev:
943 iterrev = base
943 iterrev = base
944 base = index[iterrev][3]
944 base = index[iterrev][3]
945
945
946 self._chainbasecache[rev] = base
946 self._chainbasecache[rev] = base
947 return base
947 return base
948
948
949 def linkrev(self, rev):
949 def linkrev(self, rev):
950 return self.index[rev][4]
950 return self.index[rev][4]
951
951
952 def parentrevs(self, rev):
952 def parentrevs(self, rev):
953 try:
953 try:
954 entry = self.index[rev]
954 entry = self.index[rev]
955 except IndexError:
955 except IndexError:
956 if rev == wdirrev:
956 if rev == wdirrev:
957 raise error.WdirUnsupported
957 raise error.WdirUnsupported
958 raise
958 raise
959 if entry[5] == nullrev:
959 if entry[5] == nullrev:
960 return entry[6], entry[5]
960 return entry[6], entry[5]
961 else:
961 else:
962 return entry[5], entry[6]
962 return entry[5], entry[6]
963
963
964 # fast parentrevs(rev) where rev isn't filtered
964 # fast parentrevs(rev) where rev isn't filtered
965 _uncheckedparentrevs = parentrevs
965 _uncheckedparentrevs = parentrevs
966
966
967 def node(self, rev):
967 def node(self, rev):
968 try:
968 try:
969 return self.index[rev][7]
969 return self.index[rev][7]
970 except IndexError:
970 except IndexError:
971 if rev == wdirrev:
971 if rev == wdirrev:
972 raise error.WdirUnsupported
972 raise error.WdirUnsupported
973 raise
973 raise
974
974
975 # Derived from index values.
975 # Derived from index values.
976
976
977 def end(self, rev):
977 def end(self, rev):
978 return self.start(rev) + self.length(rev)
978 return self.start(rev) + self.length(rev)
979
979
980 def parents(self, node):
980 def parents(self, node):
981 i = self.index
981 i = self.index
982 d = i[self.rev(node)]
982 d = i[self.rev(node)]
983 # inline node() to avoid function call overhead
983 # inline node() to avoid function call overhead
984 if d[5] == self.nullid:
984 if d[5] == self.nullid:
985 return i[d[6]][7], i[d[5]][7]
985 return i[d[6]][7], i[d[5]][7]
986 else:
986 else:
987 return i[d[5]][7], i[d[6]][7]
987 return i[d[5]][7], i[d[6]][7]
988
988
989 def chainlen(self, rev):
989 def chainlen(self, rev):
990 return self._chaininfo(rev)[0]
990 return self._chaininfo(rev)[0]
991
991
992 def _chaininfo(self, rev):
992 def _chaininfo(self, rev):
993 chaininfocache = self._chaininfocache
993 chaininfocache = self._chaininfocache
994 if rev in chaininfocache:
994 if rev in chaininfocache:
995 return chaininfocache[rev]
995 return chaininfocache[rev]
996 index = self.index
996 index = self.index
997 generaldelta = self._generaldelta
997 generaldelta = self._generaldelta
998 iterrev = rev
998 iterrev = rev
999 e = index[iterrev]
999 e = index[iterrev]
1000 clen = 0
1000 clen = 0
1001 compresseddeltalen = 0
1001 compresseddeltalen = 0
1002 while iterrev != e[3]:
1002 while iterrev != e[3]:
1003 clen += 1
1003 clen += 1
1004 compresseddeltalen += e[1]
1004 compresseddeltalen += e[1]
1005 if generaldelta:
1005 if generaldelta:
1006 iterrev = e[3]
1006 iterrev = e[3]
1007 else:
1007 else:
1008 iterrev -= 1
1008 iterrev -= 1
1009 if iterrev in chaininfocache:
1009 if iterrev in chaininfocache:
1010 t = chaininfocache[iterrev]
1010 t = chaininfocache[iterrev]
1011 clen += t[0]
1011 clen += t[0]
1012 compresseddeltalen += t[1]
1012 compresseddeltalen += t[1]
1013 break
1013 break
1014 e = index[iterrev]
1014 e = index[iterrev]
1015 else:
1015 else:
1016 # Add text length of base since decompressing that also takes
1016 # Add text length of base since decompressing that also takes
1017 # work. For cache hits the length is already included.
1017 # work. For cache hits the length is already included.
1018 compresseddeltalen += e[1]
1018 compresseddeltalen += e[1]
1019 r = (clen, compresseddeltalen)
1019 r = (clen, compresseddeltalen)
1020 chaininfocache[rev] = r
1020 chaininfocache[rev] = r
1021 return r
1021 return r
1022
1022
1023 def _deltachain(self, rev, stoprev=None):
1023 def _deltachain(self, rev, stoprev=None):
1024 """Obtain the delta chain for a revision.
1024 """Obtain the delta chain for a revision.
1025
1025
1026 ``stoprev`` specifies a revision to stop at. If not specified, we
1026 ``stoprev`` specifies a revision to stop at. If not specified, we
1027 stop at the base of the chain.
1027 stop at the base of the chain.
1028
1028
1029 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1029 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1030 revs in ascending order and ``stopped`` is a bool indicating whether
1030 revs in ascending order and ``stopped`` is a bool indicating whether
1031 ``stoprev`` was hit.
1031 ``stoprev`` was hit.
1032 """
1032 """
1033 # Try C implementation.
1033 # Try C implementation.
1034 try:
1034 try:
1035 return self.index.deltachain(rev, stoprev, self._generaldelta)
1035 return self.index.deltachain(rev, stoprev, self._generaldelta)
1036 except AttributeError:
1036 except AttributeError:
1037 pass
1037 pass
1038
1038
1039 chain = []
1039 chain = []
1040
1040
1041 # Alias to prevent attribute lookup in tight loop.
1041 # Alias to prevent attribute lookup in tight loop.
1042 index = self.index
1042 index = self.index
1043 generaldelta = self._generaldelta
1043 generaldelta = self._generaldelta
1044
1044
1045 iterrev = rev
1045 iterrev = rev
1046 e = index[iterrev]
1046 e = index[iterrev]
1047 while iterrev != e[3] and iterrev != stoprev:
1047 while iterrev != e[3] and iterrev != stoprev:
1048 chain.append(iterrev)
1048 chain.append(iterrev)
1049 if generaldelta:
1049 if generaldelta:
1050 iterrev = e[3]
1050 iterrev = e[3]
1051 else:
1051 else:
1052 iterrev -= 1
1052 iterrev -= 1
1053 e = index[iterrev]
1053 e = index[iterrev]
1054
1054
1055 if iterrev == stoprev:
1055 if iterrev == stoprev:
1056 stopped = True
1056 stopped = True
1057 else:
1057 else:
1058 chain.append(iterrev)
1058 chain.append(iterrev)
1059 stopped = False
1059 stopped = False
1060
1060
1061 chain.reverse()
1061 chain.reverse()
1062 return chain, stopped
1062 return chain, stopped
1063
1063
1064 def ancestors(self, revs, stoprev=0, inclusive=False):
1064 def ancestors(self, revs, stoprev=0, inclusive=False):
1065 """Generate the ancestors of 'revs' in reverse revision order.
1065 """Generate the ancestors of 'revs' in reverse revision order.
1066 Does not generate revs lower than stoprev.
1066 Does not generate revs lower than stoprev.
1067
1067
1068 See the documentation for ancestor.lazyancestors for more details."""
1068 See the documentation for ancestor.lazyancestors for more details."""
1069
1069
1070 # first, make sure start revisions aren't filtered
1070 # first, make sure start revisions aren't filtered
1071 revs = list(revs)
1071 revs = list(revs)
1072 checkrev = self.node
1072 checkrev = self.node
1073 for r in revs:
1073 for r in revs:
1074 checkrev(r)
1074 checkrev(r)
1075 # and we're sure ancestors aren't filtered as well
1075 # and we're sure ancestors aren't filtered as well
1076
1076
1077 if rustancestor is not None and self.index.rust_ext_compat:
1077 if rustancestor is not None and self.index.rust_ext_compat:
1078 lazyancestors = rustancestor.LazyAncestors
1078 lazyancestors = rustancestor.LazyAncestors
1079 arg = self.index
1079 arg = self.index
1080 else:
1080 else:
1081 lazyancestors = ancestor.lazyancestors
1081 lazyancestors = ancestor.lazyancestors
1082 arg = self._uncheckedparentrevs
1082 arg = self._uncheckedparentrevs
1083 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1083 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1084
1084
1085 def descendants(self, revs):
1085 def descendants(self, revs):
1086 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1086 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1087
1087
1088 def findcommonmissing(self, common=None, heads=None):
1088 def findcommonmissing(self, common=None, heads=None):
1089 """Return a tuple of the ancestors of common and the ancestors of heads
1089 """Return a tuple of the ancestors of common and the ancestors of heads
1090 that are not ancestors of common. In revset terminology, we return the
1090 that are not ancestors of common. In revset terminology, we return the
1091 tuple:
1091 tuple:
1092
1092
1093 ::common, (::heads) - (::common)
1093 ::common, (::heads) - (::common)
1094
1094
1095 The list is sorted by revision number, meaning it is
1095 The list is sorted by revision number, meaning it is
1096 topologically sorted.
1096 topologically sorted.
1097
1097
1098 'heads' and 'common' are both lists of node IDs. If heads is
1098 'heads' and 'common' are both lists of node IDs. If heads is
1099 not supplied, uses all of the revlog's heads. If common is not
1099 not supplied, uses all of the revlog's heads. If common is not
1100 supplied, uses nullid."""
1100 supplied, uses nullid."""
1101 if common is None:
1101 if common is None:
1102 common = [self.nullid]
1102 common = [self.nullid]
1103 if heads is None:
1103 if heads is None:
1104 heads = self.heads()
1104 heads = self.heads()
1105
1105
1106 common = [self.rev(n) for n in common]
1106 common = [self.rev(n) for n in common]
1107 heads = [self.rev(n) for n in heads]
1107 heads = [self.rev(n) for n in heads]
1108
1108
1109 # we want the ancestors, but inclusive
1109 # we want the ancestors, but inclusive
1110 class lazyset(object):
1110 class lazyset(object):
1111 def __init__(self, lazyvalues):
1111 def __init__(self, lazyvalues):
1112 self.addedvalues = set()
1112 self.addedvalues = set()
1113 self.lazyvalues = lazyvalues
1113 self.lazyvalues = lazyvalues
1114
1114
1115 def __contains__(self, value):
1115 def __contains__(self, value):
1116 return value in self.addedvalues or value in self.lazyvalues
1116 return value in self.addedvalues or value in self.lazyvalues
1117
1117
1118 def __iter__(self):
1118 def __iter__(self):
1119 added = self.addedvalues
1119 added = self.addedvalues
1120 for r in added:
1120 for r in added:
1121 yield r
1121 yield r
1122 for r in self.lazyvalues:
1122 for r in self.lazyvalues:
1123 if not r in added:
1123 if not r in added:
1124 yield r
1124 yield r
1125
1125
1126 def add(self, value):
1126 def add(self, value):
1127 self.addedvalues.add(value)
1127 self.addedvalues.add(value)
1128
1128
1129 def update(self, values):
1129 def update(self, values):
1130 self.addedvalues.update(values)
1130 self.addedvalues.update(values)
1131
1131
1132 has = lazyset(self.ancestors(common))
1132 has = lazyset(self.ancestors(common))
1133 has.add(nullrev)
1133 has.add(nullrev)
1134 has.update(common)
1134 has.update(common)
1135
1135
1136 # take all ancestors from heads that aren't in has
1136 # take all ancestors from heads that aren't in has
1137 missing = set()
1137 missing = set()
1138 visit = collections.deque(r for r in heads if r not in has)
1138 visit = collections.deque(r for r in heads if r not in has)
1139 while visit:
1139 while visit:
1140 r = visit.popleft()
1140 r = visit.popleft()
1141 if r in missing:
1141 if r in missing:
1142 continue
1142 continue
1143 else:
1143 else:
1144 missing.add(r)
1144 missing.add(r)
1145 for p in self.parentrevs(r):
1145 for p in self.parentrevs(r):
1146 if p not in has:
1146 if p not in has:
1147 visit.append(p)
1147 visit.append(p)
1148 missing = list(missing)
1148 missing = list(missing)
1149 missing.sort()
1149 missing.sort()
1150 return has, [self.node(miss) for miss in missing]
1150 return has, [self.node(miss) for miss in missing]
1151
1151
1152 def incrementalmissingrevs(self, common=None):
1152 def incrementalmissingrevs(self, common=None):
1153 """Return an object that can be used to incrementally compute the
1153 """Return an object that can be used to incrementally compute the
1154 revision numbers of the ancestors of arbitrary sets that are not
1154 revision numbers of the ancestors of arbitrary sets that are not
1155 ancestors of common. This is an ancestor.incrementalmissingancestors
1155 ancestors of common. This is an ancestor.incrementalmissingancestors
1156 object.
1156 object.
1157
1157
1158 'common' is a list of revision numbers. If common is not supplied, uses
1158 'common' is a list of revision numbers. If common is not supplied, uses
1159 nullrev.
1159 nullrev.
1160 """
1160 """
1161 if common is None:
1161 if common is None:
1162 common = [nullrev]
1162 common = [nullrev]
1163
1163
1164 if rustancestor is not None and self.index.rust_ext_compat:
1164 if rustancestor is not None and self.index.rust_ext_compat:
1165 return rustancestor.MissingAncestors(self.index, common)
1165 return rustancestor.MissingAncestors(self.index, common)
1166 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1166 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1167
1167
1168 def findmissingrevs(self, common=None, heads=None):
1168 def findmissingrevs(self, common=None, heads=None):
1169 """Return the revision numbers of the ancestors of heads that
1169 """Return the revision numbers of the ancestors of heads that
1170 are not ancestors of common.
1170 are not ancestors of common.
1171
1171
1172 More specifically, return a list of revision numbers corresponding to
1172 More specifically, return a list of revision numbers corresponding to
1173 nodes N such that every N satisfies the following constraints:
1173 nodes N such that every N satisfies the following constraints:
1174
1174
1175 1. N is an ancestor of some node in 'heads'
1175 1. N is an ancestor of some node in 'heads'
1176 2. N is not an ancestor of any node in 'common'
1176 2. N is not an ancestor of any node in 'common'
1177
1177
1178 The list is sorted by revision number, meaning it is
1178 The list is sorted by revision number, meaning it is
1179 topologically sorted.
1179 topologically sorted.
1180
1180
1181 'heads' and 'common' are both lists of revision numbers. If heads is
1181 'heads' and 'common' are both lists of revision numbers. If heads is
1182 not supplied, uses all of the revlog's heads. If common is not
1182 not supplied, uses all of the revlog's heads. If common is not
1183 supplied, uses nullid."""
1183 supplied, uses nullid."""
1184 if common is None:
1184 if common is None:
1185 common = [nullrev]
1185 common = [nullrev]
1186 if heads is None:
1186 if heads is None:
1187 heads = self.headrevs()
1187 heads = self.headrevs()
1188
1188
1189 inc = self.incrementalmissingrevs(common=common)
1189 inc = self.incrementalmissingrevs(common=common)
1190 return inc.missingancestors(heads)
1190 return inc.missingancestors(heads)
1191
1191
1192 def findmissing(self, common=None, heads=None):
1192 def findmissing(self, common=None, heads=None):
1193 """Return the ancestors of heads that are not ancestors of common.
1193 """Return the ancestors of heads that are not ancestors of common.
1194
1194
1195 More specifically, return a list of nodes N such that every N
1195 More specifically, return a list of nodes N such that every N
1196 satisfies the following constraints:
1196 satisfies the following constraints:
1197
1197
1198 1. N is an ancestor of some node in 'heads'
1198 1. N is an ancestor of some node in 'heads'
1199 2. N is not an ancestor of any node in 'common'
1199 2. N is not an ancestor of any node in 'common'
1200
1200
1201 The list is sorted by revision number, meaning it is
1201 The list is sorted by revision number, meaning it is
1202 topologically sorted.
1202 topologically sorted.
1203
1203
1204 'heads' and 'common' are both lists of node IDs. If heads is
1204 'heads' and 'common' are both lists of node IDs. If heads is
1205 not supplied, uses all of the revlog's heads. If common is not
1205 not supplied, uses all of the revlog's heads. If common is not
1206 supplied, uses nullid."""
1206 supplied, uses nullid."""
1207 if common is None:
1207 if common is None:
1208 common = [self.nullid]
1208 common = [self.nullid]
1209 if heads is None:
1209 if heads is None:
1210 heads = self.heads()
1210 heads = self.heads()
1211
1211
1212 common = [self.rev(n) for n in common]
1212 common = [self.rev(n) for n in common]
1213 heads = [self.rev(n) for n in heads]
1213 heads = [self.rev(n) for n in heads]
1214
1214
1215 inc = self.incrementalmissingrevs(common=common)
1215 inc = self.incrementalmissingrevs(common=common)
1216 return [self.node(r) for r in inc.missingancestors(heads)]
1216 return [self.node(r) for r in inc.missingancestors(heads)]
1217
1217
1218 def nodesbetween(self, roots=None, heads=None):
1218 def nodesbetween(self, roots=None, heads=None):
1219 """Return a topological path from 'roots' to 'heads'.
1219 """Return a topological path from 'roots' to 'heads'.
1220
1220
1221 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1221 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1222 topologically sorted list of all nodes N that satisfy both of
1222 topologically sorted list of all nodes N that satisfy both of
1223 these constraints:
1223 these constraints:
1224
1224
1225 1. N is a descendant of some node in 'roots'
1225 1. N is a descendant of some node in 'roots'
1226 2. N is an ancestor of some node in 'heads'
1226 2. N is an ancestor of some node in 'heads'
1227
1227
1228 Every node is considered to be both a descendant and an ancestor
1228 Every node is considered to be both a descendant and an ancestor
1229 of itself, so every reachable node in 'roots' and 'heads' will be
1229 of itself, so every reachable node in 'roots' and 'heads' will be
1230 included in 'nodes'.
1230 included in 'nodes'.
1231
1231
1232 'outroots' is the list of reachable nodes in 'roots', i.e., the
1232 'outroots' is the list of reachable nodes in 'roots', i.e., the
1233 subset of 'roots' that is returned in 'nodes'. Likewise,
1233 subset of 'roots' that is returned in 'nodes'. Likewise,
1234 'outheads' is the subset of 'heads' that is also in 'nodes'.
1234 'outheads' is the subset of 'heads' that is also in 'nodes'.
1235
1235
1236 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1236 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1237 unspecified, uses nullid as the only root. If 'heads' is
1237 unspecified, uses nullid as the only root. If 'heads' is
1238 unspecified, uses list of all of the revlog's heads."""
1238 unspecified, uses list of all of the revlog's heads."""
1239 nonodes = ([], [], [])
1239 nonodes = ([], [], [])
1240 if roots is not None:
1240 if roots is not None:
1241 roots = list(roots)
1241 roots = list(roots)
1242 if not roots:
1242 if not roots:
1243 return nonodes
1243 return nonodes
1244 lowestrev = min([self.rev(n) for n in roots])
1244 lowestrev = min([self.rev(n) for n in roots])
1245 else:
1245 else:
1246 roots = [self.nullid] # Everybody's a descendant of nullid
1246 roots = [self.nullid] # Everybody's a descendant of nullid
1247 lowestrev = nullrev
1247 lowestrev = nullrev
1248 if (lowestrev == nullrev) and (heads is None):
1248 if (lowestrev == nullrev) and (heads is None):
1249 # We want _all_ the nodes!
1249 # We want _all_ the nodes!
1250 return (
1250 return (
1251 [self.node(r) for r in self],
1251 [self.node(r) for r in self],
1252 [self.nullid],
1252 [self.nullid],
1253 list(self.heads()),
1253 list(self.heads()),
1254 )
1254 )
1255 if heads is None:
1255 if heads is None:
1256 # All nodes are ancestors, so the latest ancestor is the last
1256 # All nodes are ancestors, so the latest ancestor is the last
1257 # node.
1257 # node.
1258 highestrev = len(self) - 1
1258 highestrev = len(self) - 1
1259 # Set ancestors to None to signal that every node is an ancestor.
1259 # Set ancestors to None to signal that every node is an ancestor.
1260 ancestors = None
1260 ancestors = None
1261 # Set heads to an empty dictionary for later discovery of heads
1261 # Set heads to an empty dictionary for later discovery of heads
1262 heads = {}
1262 heads = {}
1263 else:
1263 else:
1264 heads = list(heads)
1264 heads = list(heads)
1265 if not heads:
1265 if not heads:
1266 return nonodes
1266 return nonodes
1267 ancestors = set()
1267 ancestors = set()
1268 # Turn heads into a dictionary so we can remove 'fake' heads.
1268 # Turn heads into a dictionary so we can remove 'fake' heads.
1269 # Also, later we will be using it to filter out the heads we can't
1269 # Also, later we will be using it to filter out the heads we can't
1270 # find from roots.
1270 # find from roots.
1271 heads = dict.fromkeys(heads, False)
1271 heads = dict.fromkeys(heads, False)
1272 # Start at the top and keep marking parents until we're done.
1272 # Start at the top and keep marking parents until we're done.
1273 nodestotag = set(heads)
1273 nodestotag = set(heads)
1274 # Remember where the top was so we can use it as a limit later.
1274 # Remember where the top was so we can use it as a limit later.
1275 highestrev = max([self.rev(n) for n in nodestotag])
1275 highestrev = max([self.rev(n) for n in nodestotag])
1276 while nodestotag:
1276 while nodestotag:
1277 # grab a node to tag
1277 # grab a node to tag
1278 n = nodestotag.pop()
1278 n = nodestotag.pop()
1279 # Never tag nullid
1279 # Never tag nullid
1280 if n == self.nullid:
1280 if n == self.nullid:
1281 continue
1281 continue
1282 # A node's revision number represents its place in a
1282 # A node's revision number represents its place in a
1283 # topologically sorted list of nodes.
1283 # topologically sorted list of nodes.
1284 r = self.rev(n)
1284 r = self.rev(n)
1285 if r >= lowestrev:
1285 if r >= lowestrev:
1286 if n not in ancestors:
1286 if n not in ancestors:
1287 # If we are possibly a descendant of one of the roots
1287 # If we are possibly a descendant of one of the roots
1288 # and we haven't already been marked as an ancestor
1288 # and we haven't already been marked as an ancestor
1289 ancestors.add(n) # Mark as ancestor
1289 ancestors.add(n) # Mark as ancestor
1290 # Add non-nullid parents to list of nodes to tag.
1290 # Add non-nullid parents to list of nodes to tag.
1291 nodestotag.update(
1291 nodestotag.update(
1292 [p for p in self.parents(n) if p != self.nullid]
1292 [p for p in self.parents(n) if p != self.nullid]
1293 )
1293 )
1294 elif n in heads: # We've seen it before, is it a fake head?
1294 elif n in heads: # We've seen it before, is it a fake head?
1295 # So it is, real heads should not be the ancestors of
1295 # So it is, real heads should not be the ancestors of
1296 # any other heads.
1296 # any other heads.
1297 heads.pop(n)
1297 heads.pop(n)
1298 if not ancestors:
1298 if not ancestors:
1299 return nonodes
1299 return nonodes
1300 # Now that we have our set of ancestors, we want to remove any
1300 # Now that we have our set of ancestors, we want to remove any
1301 # roots that are not ancestors.
1301 # roots that are not ancestors.
1302
1302
1303 # If one of the roots was nullid, everything is included anyway.
1303 # If one of the roots was nullid, everything is included anyway.
1304 if lowestrev > nullrev:
1304 if lowestrev > nullrev:
1305 # But, since we weren't, let's recompute the lowest rev to not
1305 # But, since we weren't, let's recompute the lowest rev to not
1306 # include roots that aren't ancestors.
1306 # include roots that aren't ancestors.
1307
1307
1308 # Filter out roots that aren't ancestors of heads
1308 # Filter out roots that aren't ancestors of heads
1309 roots = [root for root in roots if root in ancestors]
1309 roots = [root for root in roots if root in ancestors]
1310 # Recompute the lowest revision
1310 # Recompute the lowest revision
1311 if roots:
1311 if roots:
1312 lowestrev = min([self.rev(root) for root in roots])
1312 lowestrev = min([self.rev(root) for root in roots])
1313 else:
1313 else:
1314 # No more roots? Return empty list
1314 # No more roots? Return empty list
1315 return nonodes
1315 return nonodes
1316 else:
1316 else:
1317 # We are descending from nullid, and don't need to care about
1317 # We are descending from nullid, and don't need to care about
1318 # any other roots.
1318 # any other roots.
1319 lowestrev = nullrev
1319 lowestrev = nullrev
1320 roots = [self.nullid]
1320 roots = [self.nullid]
1321 # Transform our roots list into a set.
1321 # Transform our roots list into a set.
1322 descendants = set(roots)
1322 descendants = set(roots)
1323 # Also, keep the original roots so we can filter out roots that aren't
1323 # Also, keep the original roots so we can filter out roots that aren't
1324 # 'real' roots (i.e. are descended from other roots).
1324 # 'real' roots (i.e. are descended from other roots).
1325 roots = descendants.copy()
1325 roots = descendants.copy()
1326 # Our topologically sorted list of output nodes.
1326 # Our topologically sorted list of output nodes.
1327 orderedout = []
1327 orderedout = []
1328 # Don't start at nullid since we don't want nullid in our output list,
1328 # Don't start at nullid since we don't want nullid in our output list,
1329 # and if nullid shows up in descendants, empty parents will look like
1329 # and if nullid shows up in descendants, empty parents will look like
1330 # they're descendants.
1330 # they're descendants.
1331 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1331 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1332 n = self.node(r)
1332 n = self.node(r)
1333 isdescendant = False
1333 isdescendant = False
1334 if lowestrev == nullrev: # Everybody is a descendant of nullid
1334 if lowestrev == nullrev: # Everybody is a descendant of nullid
1335 isdescendant = True
1335 isdescendant = True
1336 elif n in descendants:
1336 elif n in descendants:
1337 # n is already a descendant
1337 # n is already a descendant
1338 isdescendant = True
1338 isdescendant = True
1339 # This check only needs to be done here because all the roots
1339 # This check only needs to be done here because all the roots
1340 # will start being marked is descendants before the loop.
1340 # will start being marked is descendants before the loop.
1341 if n in roots:
1341 if n in roots:
1342 # If n was a root, check if it's a 'real' root.
1342 # If n was a root, check if it's a 'real' root.
1343 p = tuple(self.parents(n))
1343 p = tuple(self.parents(n))
1344 # If any of its parents are descendants, it's not a root.
1344 # If any of its parents are descendants, it's not a root.
1345 if (p[0] in descendants) or (p[1] in descendants):
1345 if (p[0] in descendants) or (p[1] in descendants):
1346 roots.remove(n)
1346 roots.remove(n)
1347 else:
1347 else:
1348 p = tuple(self.parents(n))
1348 p = tuple(self.parents(n))
1349 # A node is a descendant if either of its parents are
1349 # A node is a descendant if either of its parents are
1350 # descendants. (We seeded the dependents list with the roots
1350 # descendants. (We seeded the dependents list with the roots
1351 # up there, remember?)
1351 # up there, remember?)
1352 if (p[0] in descendants) or (p[1] in descendants):
1352 if (p[0] in descendants) or (p[1] in descendants):
1353 descendants.add(n)
1353 descendants.add(n)
1354 isdescendant = True
1354 isdescendant = True
1355 if isdescendant and ((ancestors is None) or (n in ancestors)):
1355 if isdescendant and ((ancestors is None) or (n in ancestors)):
1356 # Only include nodes that are both descendants and ancestors.
1356 # Only include nodes that are both descendants and ancestors.
1357 orderedout.append(n)
1357 orderedout.append(n)
1358 if (ancestors is not None) and (n in heads):
1358 if (ancestors is not None) and (n in heads):
1359 # We're trying to figure out which heads are reachable
1359 # We're trying to figure out which heads are reachable
1360 # from roots.
1360 # from roots.
1361 # Mark this head as having been reached
1361 # Mark this head as having been reached
1362 heads[n] = True
1362 heads[n] = True
1363 elif ancestors is None:
1363 elif ancestors is None:
1364 # Otherwise, we're trying to discover the heads.
1364 # Otherwise, we're trying to discover the heads.
1365 # Assume this is a head because if it isn't, the next step
1365 # Assume this is a head because if it isn't, the next step
1366 # will eventually remove it.
1366 # will eventually remove it.
1367 heads[n] = True
1367 heads[n] = True
1368 # But, obviously its parents aren't.
1368 # But, obviously its parents aren't.
1369 for p in self.parents(n):
1369 for p in self.parents(n):
1370 heads.pop(p, None)
1370 heads.pop(p, None)
1371 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1371 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1372 roots = list(roots)
1372 roots = list(roots)
1373 assert orderedout
1373 assert orderedout
1374 assert roots
1374 assert roots
1375 assert heads
1375 assert heads
1376 return (orderedout, roots, heads)
1376 return (orderedout, roots, heads)
1377
1377
1378 def headrevs(self, revs=None):
1378 def headrevs(self, revs=None):
1379 if revs is None:
1379 if revs is None:
1380 try:
1380 try:
1381 return self.index.headrevs()
1381 return self.index.headrevs()
1382 except AttributeError:
1382 except AttributeError:
1383 return self._headrevs()
1383 return self._headrevs()
1384 if rustdagop is not None and self.index.rust_ext_compat:
1384 if rustdagop is not None and self.index.rust_ext_compat:
1385 return rustdagop.headrevs(self.index, revs)
1385 return rustdagop.headrevs(self.index, revs)
1386 return dagop.headrevs(revs, self._uncheckedparentrevs)
1386 return dagop.headrevs(revs, self._uncheckedparentrevs)
1387
1387
1388 def computephases(self, roots):
1388 def computephases(self, roots):
1389 return self.index.computephasesmapsets(roots)
1389 return self.index.computephasesmapsets(roots)
1390
1390
1391 def _headrevs(self):
1391 def _headrevs(self):
1392 count = len(self)
1392 count = len(self)
1393 if not count:
1393 if not count:
1394 return [nullrev]
1394 return [nullrev]
1395 # we won't iter over filtered rev so nobody is a head at start
1395 # we won't iter over filtered rev so nobody is a head at start
1396 ishead = [0] * (count + 1)
1396 ishead = [0] * (count + 1)
1397 index = self.index
1397 index = self.index
1398 for r in self:
1398 for r in self:
1399 ishead[r] = 1 # I may be an head
1399 ishead[r] = 1 # I may be an head
1400 e = index[r]
1400 e = index[r]
1401 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1401 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1402 return [r for r, val in enumerate(ishead) if val]
1402 return [r for r, val in enumerate(ishead) if val]
1403
1403
1404 def heads(self, start=None, stop=None):
1404 def heads(self, start=None, stop=None):
1405 """return the list of all nodes that have no children
1405 """return the list of all nodes that have no children
1406
1406
1407 if start is specified, only heads that are descendants of
1407 if start is specified, only heads that are descendants of
1408 start will be returned
1408 start will be returned
1409 if stop is specified, it will consider all the revs from stop
1409 if stop is specified, it will consider all the revs from stop
1410 as if they had no children
1410 as if they had no children
1411 """
1411 """
1412 if start is None and stop is None:
1412 if start is None and stop is None:
1413 if not len(self):
1413 if not len(self):
1414 return [self.nullid]
1414 return [self.nullid]
1415 return [self.node(r) for r in self.headrevs()]
1415 return [self.node(r) for r in self.headrevs()]
1416
1416
1417 if start is None:
1417 if start is None:
1418 start = nullrev
1418 start = nullrev
1419 else:
1419 else:
1420 start = self.rev(start)
1420 start = self.rev(start)
1421
1421
1422 stoprevs = {self.rev(n) for n in stop or []}
1422 stoprevs = {self.rev(n) for n in stop or []}
1423
1423
1424 revs = dagop.headrevssubset(
1424 revs = dagop.headrevssubset(
1425 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1425 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1426 )
1426 )
1427
1427
1428 return [self.node(rev) for rev in revs]
1428 return [self.node(rev) for rev in revs]
1429
1429
1430 def children(self, node):
1430 def children(self, node):
1431 """find the children of a given node"""
1431 """find the children of a given node"""
1432 c = []
1432 c = []
1433 p = self.rev(node)
1433 p = self.rev(node)
1434 for r in self.revs(start=p + 1):
1434 for r in self.revs(start=p + 1):
1435 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1435 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1436 if prevs:
1436 if prevs:
1437 for pr in prevs:
1437 for pr in prevs:
1438 if pr == p:
1438 if pr == p:
1439 c.append(self.node(r))
1439 c.append(self.node(r))
1440 elif p == nullrev:
1440 elif p == nullrev:
1441 c.append(self.node(r))
1441 c.append(self.node(r))
1442 return c
1442 return c
1443
1443
1444 def commonancestorsheads(self, a, b):
1444 def commonancestorsheads(self, a, b):
1445 """calculate all the heads of the common ancestors of nodes a and b"""
1445 """calculate all the heads of the common ancestors of nodes a and b"""
1446 a, b = self.rev(a), self.rev(b)
1446 a, b = self.rev(a), self.rev(b)
1447 ancs = self._commonancestorsheads(a, b)
1447 ancs = self._commonancestorsheads(a, b)
1448 return pycompat.maplist(self.node, ancs)
1448 return pycompat.maplist(self.node, ancs)
1449
1449
1450 def _commonancestorsheads(self, *revs):
1450 def _commonancestorsheads(self, *revs):
1451 """calculate all the heads of the common ancestors of revs"""
1451 """calculate all the heads of the common ancestors of revs"""
1452 try:
1452 try:
1453 ancs = self.index.commonancestorsheads(*revs)
1453 ancs = self.index.commonancestorsheads(*revs)
1454 except (AttributeError, OverflowError): # C implementation failed
1454 except (AttributeError, OverflowError): # C implementation failed
1455 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1455 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1456 return ancs
1456 return ancs
1457
1457
1458 def isancestor(self, a, b):
1458 def isancestor(self, a, b):
1459 """return True if node a is an ancestor of node b
1459 """return True if node a is an ancestor of node b
1460
1460
1461 A revision is considered an ancestor of itself."""
1461 A revision is considered an ancestor of itself."""
1462 a, b = self.rev(a), self.rev(b)
1462 a, b = self.rev(a), self.rev(b)
1463 return self.isancestorrev(a, b)
1463 return self.isancestorrev(a, b)
1464
1464
1465 def isancestorrev(self, a, b):
1465 def isancestorrev(self, a, b):
1466 """return True if revision a is an ancestor of revision b
1466 """return True if revision a is an ancestor of revision b
1467
1467
1468 A revision is considered an ancestor of itself.
1468 A revision is considered an ancestor of itself.
1469
1469
1470 The implementation of this is trivial but the use of
1470 The implementation of this is trivial but the use of
1471 reachableroots is not."""
1471 reachableroots is not."""
1472 if a == nullrev:
1472 if a == nullrev:
1473 return True
1473 return True
1474 elif a == b:
1474 elif a == b:
1475 return True
1475 return True
1476 elif a > b:
1476 elif a > b:
1477 return False
1477 return False
1478 return bool(self.reachableroots(a, [b], [a], includepath=False))
1478 return bool(self.reachableroots(a, [b], [a], includepath=False))
1479
1479
1480 def reachableroots(self, minroot, heads, roots, includepath=False):
1480 def reachableroots(self, minroot, heads, roots, includepath=False):
1481 """return (heads(::(<roots> and <roots>::<heads>)))
1481 """return (heads(::(<roots> and <roots>::<heads>)))
1482
1482
1483 If includepath is True, return (<roots>::<heads>)."""
1483 If includepath is True, return (<roots>::<heads>)."""
1484 try:
1484 try:
1485 return self.index.reachableroots2(
1485 return self.index.reachableroots2(
1486 minroot, heads, roots, includepath
1486 minroot, heads, roots, includepath
1487 )
1487 )
1488 except AttributeError:
1488 except AttributeError:
1489 return dagop._reachablerootspure(
1489 return dagop._reachablerootspure(
1490 self.parentrevs, minroot, roots, heads, includepath
1490 self.parentrevs, minroot, roots, heads, includepath
1491 )
1491 )
1492
1492
1493 def ancestor(self, a, b):
1493 def ancestor(self, a, b):
1494 """calculate the "best" common ancestor of nodes a and b"""
1494 """calculate the "best" common ancestor of nodes a and b"""
1495
1495
1496 a, b = self.rev(a), self.rev(b)
1496 a, b = self.rev(a), self.rev(b)
1497 try:
1497 try:
1498 ancs = self.index.ancestors(a, b)
1498 ancs = self.index.ancestors(a, b)
1499 except (AttributeError, OverflowError):
1499 except (AttributeError, OverflowError):
1500 ancs = ancestor.ancestors(self.parentrevs, a, b)
1500 ancs = ancestor.ancestors(self.parentrevs, a, b)
1501 if ancs:
1501 if ancs:
1502 # choose a consistent winner when there's a tie
1502 # choose a consistent winner when there's a tie
1503 return min(map(self.node, ancs))
1503 return min(map(self.node, ancs))
1504 return self.nullid
1504 return self.nullid
1505
1505
1506 def _match(self, id):
1506 def _match(self, id):
1507 if isinstance(id, int):
1507 if isinstance(id, int):
1508 # rev
1508 # rev
1509 return self.node(id)
1509 return self.node(id)
1510 if len(id) == self.nodeconstants.nodelen:
1510 if len(id) == self.nodeconstants.nodelen:
1511 # possibly a binary node
1511 # possibly a binary node
1512 # odds of a binary node being all hex in ASCII are 1 in 10**25
1512 # odds of a binary node being all hex in ASCII are 1 in 10**25
1513 try:
1513 try:
1514 node = id
1514 node = id
1515 self.rev(node) # quick search the index
1515 self.rev(node) # quick search the index
1516 return node
1516 return node
1517 except error.LookupError:
1517 except error.LookupError:
1518 pass # may be partial hex id
1518 pass # may be partial hex id
1519 try:
1519 try:
1520 # str(rev)
1520 # str(rev)
1521 rev = int(id)
1521 rev = int(id)
1522 if b"%d" % rev != id:
1522 if b"%d" % rev != id:
1523 raise ValueError
1523 raise ValueError
1524 if rev < 0:
1524 if rev < 0:
1525 rev = len(self) + rev
1525 rev = len(self) + rev
1526 if rev < 0 or rev >= len(self):
1526 if rev < 0 or rev >= len(self):
1527 raise ValueError
1527 raise ValueError
1528 return self.node(rev)
1528 return self.node(rev)
1529 except (ValueError, OverflowError):
1529 except (ValueError, OverflowError):
1530 pass
1530 pass
1531 if len(id) == 2 * self.nodeconstants.nodelen:
1531 if len(id) == 2 * self.nodeconstants.nodelen:
1532 try:
1532 try:
1533 # a full hex nodeid?
1533 # a full hex nodeid?
1534 node = bin(id)
1534 node = bin(id)
1535 self.rev(node)
1535 self.rev(node)
1536 return node
1536 return node
1537 except (TypeError, error.LookupError):
1537 except (TypeError, error.LookupError):
1538 pass
1538 pass
1539
1539
1540 def _partialmatch(self, id):
1540 def _partialmatch(self, id):
1541 # we don't care wdirfilenodeids as they should be always full hash
1541 # we don't care wdirfilenodeids as they should be always full hash
1542 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1542 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1543 ambiguous = False
1543 ambiguous = False
1544 try:
1544 try:
1545 partial = self.index.partialmatch(id)
1545 partial = self.index.partialmatch(id)
1546 if partial and self.hasnode(partial):
1546 if partial and self.hasnode(partial):
1547 if maybewdir:
1547 if maybewdir:
1548 # single 'ff...' match in radix tree, ambiguous with wdir
1548 # single 'ff...' match in radix tree, ambiguous with wdir
1549 ambiguous = True
1549 ambiguous = True
1550 else:
1550 else:
1551 return partial
1551 return partial
1552 elif maybewdir:
1552 elif maybewdir:
1553 # no 'ff...' match in radix tree, wdir identified
1553 # no 'ff...' match in radix tree, wdir identified
1554 raise error.WdirUnsupported
1554 raise error.WdirUnsupported
1555 else:
1555 else:
1556 return None
1556 return None
1557 except error.RevlogError:
1557 except error.RevlogError:
1558 # parsers.c radix tree lookup gave multiple matches
1558 # parsers.c radix tree lookup gave multiple matches
1559 # fast path: for unfiltered changelog, radix tree is accurate
1559 # fast path: for unfiltered changelog, radix tree is accurate
1560 if not getattr(self, 'filteredrevs', None):
1560 if not getattr(self, 'filteredrevs', None):
1561 ambiguous = True
1561 ambiguous = True
1562 # fall through to slow path that filters hidden revisions
1562 # fall through to slow path that filters hidden revisions
1563 except (AttributeError, ValueError):
1563 except (AttributeError, ValueError):
1564 # we are pure python, or key was too short to search radix tree
1564 # we are pure python, or key was too short to search radix tree
1565 pass
1565 pass
1566 if ambiguous:
1566 if ambiguous:
1567 raise error.AmbiguousPrefixLookupError(
1567 raise error.AmbiguousPrefixLookupError(
1568 id, self.display_id, _(b'ambiguous identifier')
1568 id, self.display_id, _(b'ambiguous identifier')
1569 )
1569 )
1570
1570
1571 if id in self._pcache:
1571 if id in self._pcache:
1572 return self._pcache[id]
1572 return self._pcache[id]
1573
1573
1574 if len(id) <= 40:
1574 if len(id) <= 40:
1575 try:
1575 try:
1576 # hex(node)[:...]
1576 # hex(node)[:...]
1577 l = len(id) // 2 # grab an even number of digits
1577 l = len(id) // 2 # grab an even number of digits
1578 prefix = bin(id[: l * 2])
1578 prefix = bin(id[: l * 2])
1579 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1579 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1580 nl = [
1580 nl = [
1581 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1581 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1582 ]
1582 ]
1583 if self.nodeconstants.nullhex.startswith(id):
1583 if self.nodeconstants.nullhex.startswith(id):
1584 nl.append(self.nullid)
1584 nl.append(self.nullid)
1585 if len(nl) > 0:
1585 if len(nl) > 0:
1586 if len(nl) == 1 and not maybewdir:
1586 if len(nl) == 1 and not maybewdir:
1587 self._pcache[id] = nl[0]
1587 self._pcache[id] = nl[0]
1588 return nl[0]
1588 return nl[0]
1589 raise error.AmbiguousPrefixLookupError(
1589 raise error.AmbiguousPrefixLookupError(
1590 id, self.display_id, _(b'ambiguous identifier')
1590 id, self.display_id, _(b'ambiguous identifier')
1591 )
1591 )
1592 if maybewdir:
1592 if maybewdir:
1593 raise error.WdirUnsupported
1593 raise error.WdirUnsupported
1594 return None
1594 return None
1595 except TypeError:
1595 except TypeError:
1596 pass
1596 pass
1597
1597
1598 def lookup(self, id):
1598 def lookup(self, id):
1599 """locate a node based on:
1599 """locate a node based on:
1600 - revision number or str(revision number)
1600 - revision number or str(revision number)
1601 - nodeid or subset of hex nodeid
1601 - nodeid or subset of hex nodeid
1602 """
1602 """
1603 n = self._match(id)
1603 n = self._match(id)
1604 if n is not None:
1604 if n is not None:
1605 return n
1605 return n
1606 n = self._partialmatch(id)
1606 n = self._partialmatch(id)
1607 if n:
1607 if n:
1608 return n
1608 return n
1609
1609
1610 raise error.LookupError(id, self.display_id, _(b'no match found'))
1610 raise error.LookupError(id, self.display_id, _(b'no match found'))
1611
1611
1612 def shortest(self, node, minlength=1):
1612 def shortest(self, node, minlength=1):
1613 """Find the shortest unambiguous prefix that matches node."""
1613 """Find the shortest unambiguous prefix that matches node."""
1614
1614
1615 def isvalid(prefix):
1615 def isvalid(prefix):
1616 try:
1616 try:
1617 matchednode = self._partialmatch(prefix)
1617 matchednode = self._partialmatch(prefix)
1618 except error.AmbiguousPrefixLookupError:
1618 except error.AmbiguousPrefixLookupError:
1619 return False
1619 return False
1620 except error.WdirUnsupported:
1620 except error.WdirUnsupported:
1621 # single 'ff...' match
1621 # single 'ff...' match
1622 return True
1622 return True
1623 if matchednode is None:
1623 if matchednode is None:
1624 raise error.LookupError(node, self.display_id, _(b'no node'))
1624 raise error.LookupError(node, self.display_id, _(b'no node'))
1625 return True
1625 return True
1626
1626
1627 def maybewdir(prefix):
1627 def maybewdir(prefix):
1628 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1628 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1629
1629
1630 hexnode = hex(node)
1630 hexnode = hex(node)
1631
1631
1632 def disambiguate(hexnode, minlength):
1632 def disambiguate(hexnode, minlength):
1633 """Disambiguate against wdirid."""
1633 """Disambiguate against wdirid."""
1634 for length in range(minlength, len(hexnode) + 1):
1634 for length in range(minlength, len(hexnode) + 1):
1635 prefix = hexnode[:length]
1635 prefix = hexnode[:length]
1636 if not maybewdir(prefix):
1636 if not maybewdir(prefix):
1637 return prefix
1637 return prefix
1638
1638
1639 if not getattr(self, 'filteredrevs', None):
1639 if not getattr(self, 'filteredrevs', None):
1640 try:
1640 try:
1641 length = max(self.index.shortest(node), minlength)
1641 length = max(self.index.shortest(node), minlength)
1642 return disambiguate(hexnode, length)
1642 return disambiguate(hexnode, length)
1643 except error.RevlogError:
1643 except error.RevlogError:
1644 if node != self.nodeconstants.wdirid:
1644 if node != self.nodeconstants.wdirid:
1645 raise error.LookupError(
1645 raise error.LookupError(
1646 node, self.display_id, _(b'no node')
1646 node, self.display_id, _(b'no node')
1647 )
1647 )
1648 except AttributeError:
1648 except AttributeError:
1649 # Fall through to pure code
1649 # Fall through to pure code
1650 pass
1650 pass
1651
1651
1652 if node == self.nodeconstants.wdirid:
1652 if node == self.nodeconstants.wdirid:
1653 for length in range(minlength, len(hexnode) + 1):
1653 for length in range(minlength, len(hexnode) + 1):
1654 prefix = hexnode[:length]
1654 prefix = hexnode[:length]
1655 if isvalid(prefix):
1655 if isvalid(prefix):
1656 return prefix
1656 return prefix
1657
1657
1658 for length in range(minlength, len(hexnode) + 1):
1658 for length in range(minlength, len(hexnode) + 1):
1659 prefix = hexnode[:length]
1659 prefix = hexnode[:length]
1660 if isvalid(prefix):
1660 if isvalid(prefix):
1661 return disambiguate(hexnode, length)
1661 return disambiguate(hexnode, length)
1662
1662
1663 def cmp(self, node, text):
1663 def cmp(self, node, text):
1664 """compare text with a given file revision
1664 """compare text with a given file revision
1665
1665
1666 returns True if text is different than what is stored.
1666 returns True if text is different than what is stored.
1667 """
1667 """
1668 p1, p2 = self.parents(node)
1668 p1, p2 = self.parents(node)
1669 return storageutil.hashrevisionsha1(text, p1, p2) != node
1669 return storageutil.hashrevisionsha1(text, p1, p2) != node
1670
1670
1671 def _cachesegment(self, offset, data):
1671 def _cachesegment(self, offset, data):
1672 """Add a segment to the revlog cache.
1672 """Add a segment to the revlog cache.
1673
1673
1674 Accepts an absolute offset and the data that is at that location.
1674 Accepts an absolute offset and the data that is at that location.
1675 """
1675 """
1676 o, d = self._chunkcache
1676 o, d = self._chunkcache
1677 # try to add to existing cache
1677 # try to add to existing cache
1678 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1678 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1679 self._chunkcache = o, d + data
1679 self._chunkcache = o, d + data
1680 else:
1680 else:
1681 self._chunkcache = offset, data
1681 self._chunkcache = offset, data
1682
1682
1683 def _readsegment(self, offset, length, df=None):
1683 def _readsegment(self, offset, length, df=None):
1684 """Load a segment of raw data from the revlog.
1684 """Load a segment of raw data from the revlog.
1685
1685
1686 Accepts an absolute offset, length to read, and an optional existing
1686 Accepts an absolute offset, length to read, and an optional existing
1687 file handle to read from.
1687 file handle to read from.
1688
1688
1689 If an existing file handle is passed, it will be seeked and the
1689 If an existing file handle is passed, it will be seeked and the
1690 original seek position will NOT be restored.
1690 original seek position will NOT be restored.
1691
1691
1692 Returns a str or buffer of raw byte data.
1692 Returns a str or buffer of raw byte data.
1693
1693
1694 Raises if the requested number of bytes could not be read.
1694 Raises if the requested number of bytes could not be read.
1695 """
1695 """
1696 # Cache data both forward and backward around the requested
1696 # Cache data both forward and backward around the requested
1697 # data, in a fixed size window. This helps speed up operations
1697 # data, in a fixed size window. This helps speed up operations
1698 # involving reading the revlog backwards.
1698 # involving reading the revlog backwards.
1699 cachesize = self._chunkcachesize
1699 cachesize = self._chunkcachesize
1700 realoffset = offset & ~(cachesize - 1)
1700 realoffset = offset & ~(cachesize - 1)
1701 reallength = (
1701 reallength = (
1702 (offset + length + cachesize) & ~(cachesize - 1)
1702 (offset + length + cachesize) & ~(cachesize - 1)
1703 ) - realoffset
1703 ) - realoffset
1704 with self._datareadfp(df) as df:
1704 with self._datareadfp(df) as df:
1705 df.seek(realoffset)
1705 df.seek(realoffset)
1706 d = df.read(reallength)
1706 d = df.read(reallength)
1707
1707
1708 self._cachesegment(realoffset, d)
1708 self._cachesegment(realoffset, d)
1709 if offset != realoffset or reallength != length:
1709 if offset != realoffset or reallength != length:
1710 startoffset = offset - realoffset
1710 startoffset = offset - realoffset
1711 if len(d) - startoffset < length:
1711 if len(d) - startoffset < length:
1712 raise error.RevlogError(
1712 raise error.RevlogError(
1713 _(
1713 _(
1714 b'partial read of revlog %s; expected %d bytes from '
1714 b'partial read of revlog %s; expected %d bytes from '
1715 b'offset %d, got %d'
1715 b'offset %d, got %d'
1716 )
1716 )
1717 % (
1717 % (
1718 self._indexfile if self._inline else self._datafile,
1718 self._indexfile if self._inline else self._datafile,
1719 length,
1719 length,
1720 offset,
1720 offset,
1721 len(d) - startoffset,
1721 len(d) - startoffset,
1722 )
1722 )
1723 )
1723 )
1724
1724
1725 return util.buffer(d, startoffset, length)
1725 return util.buffer(d, startoffset, length)
1726
1726
1727 if len(d) < length:
1727 if len(d) < length:
1728 raise error.RevlogError(
1728 raise error.RevlogError(
1729 _(
1729 _(
1730 b'partial read of revlog %s; expected %d bytes from offset '
1730 b'partial read of revlog %s; expected %d bytes from offset '
1731 b'%d, got %d'
1731 b'%d, got %d'
1732 )
1732 )
1733 % (
1733 % (
1734 self._indexfile if self._inline else self._datafile,
1734 self._indexfile if self._inline else self._datafile,
1735 length,
1735 length,
1736 offset,
1736 offset,
1737 len(d),
1737 len(d),
1738 )
1738 )
1739 )
1739 )
1740
1740
1741 return d
1741 return d
1742
1742
1743 def _getsegment(self, offset, length, df=None):
1743 def _getsegment(self, offset, length, df=None):
1744 """Obtain a segment of raw data from the revlog.
1744 """Obtain a segment of raw data from the revlog.
1745
1745
1746 Accepts an absolute offset, length of bytes to obtain, and an
1746 Accepts an absolute offset, length of bytes to obtain, and an
1747 optional file handle to the already-opened revlog. If the file
1747 optional file handle to the already-opened revlog. If the file
1748 handle is used, it's original seek position will not be preserved.
1748 handle is used, it's original seek position will not be preserved.
1749
1749
1750 Requests for data may be returned from a cache.
1750 Requests for data may be returned from a cache.
1751
1751
1752 Returns a str or a buffer instance of raw byte data.
1752 Returns a str or a buffer instance of raw byte data.
1753 """
1753 """
1754 o, d = self._chunkcache
1754 o, d = self._chunkcache
1755 l = len(d)
1755 l = len(d)
1756
1756
1757 # is it in the cache?
1757 # is it in the cache?
1758 cachestart = offset - o
1758 cachestart = offset - o
1759 cacheend = cachestart + length
1759 cacheend = cachestart + length
1760 if cachestart >= 0 and cacheend <= l:
1760 if cachestart >= 0 and cacheend <= l:
1761 if cachestart == 0 and cacheend == l:
1761 if cachestart == 0 and cacheend == l:
1762 return d # avoid a copy
1762 return d # avoid a copy
1763 return util.buffer(d, cachestart, cacheend - cachestart)
1763 return util.buffer(d, cachestart, cacheend - cachestart)
1764
1764
1765 return self._readsegment(offset, length, df=df)
1765 return self._readsegment(offset, length, df=df)
1766
1766
1767 def _getsegmentforrevs(self, startrev, endrev, df=None):
1767 def _getsegmentforrevs(self, startrev, endrev, df=None):
1768 """Obtain a segment of raw data corresponding to a range of revisions.
1768 """Obtain a segment of raw data corresponding to a range of revisions.
1769
1769
1770 Accepts the start and end revisions and an optional already-open
1770 Accepts the start and end revisions and an optional already-open
1771 file handle to be used for reading. If the file handle is read, its
1771 file handle to be used for reading. If the file handle is read, its
1772 seek position will not be preserved.
1772 seek position will not be preserved.
1773
1773
1774 Requests for data may be satisfied by a cache.
1774 Requests for data may be satisfied by a cache.
1775
1775
1776 Returns a 2-tuple of (offset, data) for the requested range of
1776 Returns a 2-tuple of (offset, data) for the requested range of
1777 revisions. Offset is the integer offset from the beginning of the
1777 revisions. Offset is the integer offset from the beginning of the
1778 revlog and data is a str or buffer of the raw byte data.
1778 revlog and data is a str or buffer of the raw byte data.
1779
1779
1780 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1780 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1781 to determine where each revision's data begins and ends.
1781 to determine where each revision's data begins and ends.
1782 """
1782 """
1783 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1783 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1784 # (functions are expensive).
1784 # (functions are expensive).
1785 index = self.index
1785 index = self.index
1786 istart = index[startrev]
1786 istart = index[startrev]
1787 start = int(istart[0] >> 16)
1787 start = int(istart[0] >> 16)
1788 if startrev == endrev:
1788 if startrev == endrev:
1789 end = start + istart[1]
1789 end = start + istart[1]
1790 else:
1790 else:
1791 iend = index[endrev]
1791 iend = index[endrev]
1792 end = int(iend[0] >> 16) + iend[1]
1792 end = int(iend[0] >> 16) + iend[1]
1793
1793
1794 if self._inline:
1794 if self._inline:
1795 start += (startrev + 1) * self.index.entry_size
1795 start += (startrev + 1) * self.index.entry_size
1796 end += (endrev + 1) * self.index.entry_size
1796 end += (endrev + 1) * self.index.entry_size
1797 length = end - start
1797 length = end - start
1798
1798
1799 return start, self._getsegment(start, length, df=df)
1799 return start, self._getsegment(start, length, df=df)
1800
1800
1801 def _chunk(self, rev, df=None):
1801 def _chunk(self, rev, df=None):
1802 """Obtain a single decompressed chunk for a revision.
1802 """Obtain a single decompressed chunk for a revision.
1803
1803
1804 Accepts an integer revision and an optional already-open file handle
1804 Accepts an integer revision and an optional already-open file handle
1805 to be used for reading. If used, the seek position of the file will not
1805 to be used for reading. If used, the seek position of the file will not
1806 be preserved.
1806 be preserved.
1807
1807
1808 Returns a str holding uncompressed data for the requested revision.
1808 Returns a str holding uncompressed data for the requested revision.
1809 """
1809 """
1810 compression_mode = self.index[rev][10]
1810 compression_mode = self.index[rev][10]
1811 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1811 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1812 if compression_mode == COMP_MODE_PLAIN:
1812 if compression_mode == COMP_MODE_PLAIN:
1813 return data
1813 return data
1814 elif compression_mode == COMP_MODE_DEFAULT:
1814 elif compression_mode == COMP_MODE_DEFAULT:
1815 return self._decompressor(data)
1815 return self._decompressor(data)
1816 elif compression_mode == COMP_MODE_INLINE:
1816 elif compression_mode == COMP_MODE_INLINE:
1817 return self.decompress(data)
1817 return self.decompress(data)
1818 else:
1818 else:
1819 msg = 'unknown compression mode %d'
1819 msg = 'unknown compression mode %d'
1820 msg %= compression_mode
1820 msg %= compression_mode
1821 raise error.RevlogError(msg)
1821 raise error.RevlogError(msg)
1822
1822
1823 def _chunks(self, revs, df=None, targetsize=None):
1823 def _chunks(self, revs, df=None, targetsize=None):
1824 """Obtain decompressed chunks for the specified revisions.
1824 """Obtain decompressed chunks for the specified revisions.
1825
1825
1826 Accepts an iterable of numeric revisions that are assumed to be in
1826 Accepts an iterable of numeric revisions that are assumed to be in
1827 ascending order. Also accepts an optional already-open file handle
1827 ascending order. Also accepts an optional already-open file handle
1828 to be used for reading. If used, the seek position of the file will
1828 to be used for reading. If used, the seek position of the file will
1829 not be preserved.
1829 not be preserved.
1830
1830
1831 This function is similar to calling ``self._chunk()`` multiple times,
1831 This function is similar to calling ``self._chunk()`` multiple times,
1832 but is faster.
1832 but is faster.
1833
1833
1834 Returns a list with decompressed data for each requested revision.
1834 Returns a list with decompressed data for each requested revision.
1835 """
1835 """
1836 if not revs:
1836 if not revs:
1837 return []
1837 return []
1838 start = self.start
1838 start = self.start
1839 length = self.length
1839 length = self.length
1840 inline = self._inline
1840 inline = self._inline
1841 iosize = self.index.entry_size
1841 iosize = self.index.entry_size
1842 buffer = util.buffer
1842 buffer = util.buffer
1843
1843
1844 l = []
1844 l = []
1845 ladd = l.append
1845 ladd = l.append
1846
1846
1847 if not self._withsparseread:
1847 if not self._withsparseread:
1848 slicedchunks = (revs,)
1848 slicedchunks = (revs,)
1849 else:
1849 else:
1850 slicedchunks = deltautil.slicechunk(
1850 slicedchunks = deltautil.slicechunk(
1851 self, revs, targetsize=targetsize
1851 self, revs, targetsize=targetsize
1852 )
1852 )
1853
1853
1854 for revschunk in slicedchunks:
1854 for revschunk in slicedchunks:
1855 firstrev = revschunk[0]
1855 firstrev = revschunk[0]
1856 # Skip trailing revisions with empty diff
1856 # Skip trailing revisions with empty diff
1857 for lastrev in revschunk[::-1]:
1857 for lastrev in revschunk[::-1]:
1858 if length(lastrev) != 0:
1858 if length(lastrev) != 0:
1859 break
1859 break
1860
1860
1861 try:
1861 try:
1862 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1862 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1863 except OverflowError:
1863 except OverflowError:
1864 # issue4215 - we can't cache a run of chunks greater than
1864 # issue4215 - we can't cache a run of chunks greater than
1865 # 2G on Windows
1865 # 2G on Windows
1866 return [self._chunk(rev, df=df) for rev in revschunk]
1866 return [self._chunk(rev, df=df) for rev in revschunk]
1867
1867
1868 decomp = self.decompress
1868 decomp = self.decompress
1869 # self._decompressor might be None, but will not be used in that case
1869 # self._decompressor might be None, but will not be used in that case
1870 def_decomp = self._decompressor
1870 def_decomp = self._decompressor
1871 for rev in revschunk:
1871 for rev in revschunk:
1872 chunkstart = start(rev)
1872 chunkstart = start(rev)
1873 if inline:
1873 if inline:
1874 chunkstart += (rev + 1) * iosize
1874 chunkstart += (rev + 1) * iosize
1875 chunklength = length(rev)
1875 chunklength = length(rev)
1876 comp_mode = self.index[rev][10]
1876 comp_mode = self.index[rev][10]
1877 c = buffer(data, chunkstart - offset, chunklength)
1877 c = buffer(data, chunkstart - offset, chunklength)
1878 if comp_mode == COMP_MODE_PLAIN:
1878 if comp_mode == COMP_MODE_PLAIN:
1879 ladd(c)
1879 ladd(c)
1880 elif comp_mode == COMP_MODE_INLINE:
1880 elif comp_mode == COMP_MODE_INLINE:
1881 ladd(decomp(c))
1881 ladd(decomp(c))
1882 elif comp_mode == COMP_MODE_DEFAULT:
1882 elif comp_mode == COMP_MODE_DEFAULT:
1883 ladd(def_decomp(c))
1883 ladd(def_decomp(c))
1884 else:
1884 else:
1885 msg = 'unknown compression mode %d'
1885 msg = 'unknown compression mode %d'
1886 msg %= comp_mode
1886 msg %= comp_mode
1887 raise error.RevlogError(msg)
1887 raise error.RevlogError(msg)
1888
1888
1889 return l
1889 return l
1890
1890
1891 def _chunkclear(self):
1891 def _chunkclear(self):
1892 """Clear the raw chunk cache."""
1892 """Clear the raw chunk cache."""
1893 self._chunkcache = (0, b'')
1893 self._chunkcache = (0, b'')
1894
1894
1895 def deltaparent(self, rev):
1895 def deltaparent(self, rev):
1896 """return deltaparent of the given revision"""
1896 """return deltaparent of the given revision"""
1897 base = self.index[rev][3]
1897 base = self.index[rev][3]
1898 if base == rev:
1898 if base == rev:
1899 return nullrev
1899 return nullrev
1900 elif self._generaldelta:
1900 elif self._generaldelta:
1901 return base
1901 return base
1902 else:
1902 else:
1903 return rev - 1
1903 return rev - 1
1904
1904
1905 def issnapshot(self, rev):
1905 def issnapshot(self, rev):
1906 """tells whether rev is a snapshot"""
1906 """tells whether rev is a snapshot"""
1907 if not self._sparserevlog:
1907 if not self._sparserevlog:
1908 return self.deltaparent(rev) == nullrev
1908 return self.deltaparent(rev) == nullrev
1909 elif util.safehasattr(self.index, b'issnapshot'):
1909 elif util.safehasattr(self.index, b'issnapshot'):
1910 # directly assign the method to cache the testing and access
1910 # directly assign the method to cache the testing and access
1911 self.issnapshot = self.index.issnapshot
1911 self.issnapshot = self.index.issnapshot
1912 return self.issnapshot(rev)
1912 return self.issnapshot(rev)
1913 if rev == nullrev:
1913 if rev == nullrev:
1914 return True
1914 return True
1915 entry = self.index[rev]
1915 entry = self.index[rev]
1916 base = entry[3]
1916 base = entry[3]
1917 if base == rev:
1917 if base == rev:
1918 return True
1918 return True
1919 if base == nullrev:
1919 if base == nullrev:
1920 return True
1920 return True
1921 p1 = entry[5]
1921 p1 = entry[5]
1922 p2 = entry[6]
1922 p2 = entry[6]
1923 if base == p1 or base == p2:
1923 if base == p1 or base == p2:
1924 return False
1924 return False
1925 return self.issnapshot(base)
1925 return self.issnapshot(base)
1926
1926
1927 def snapshotdepth(self, rev):
1927 def snapshotdepth(self, rev):
1928 """number of snapshot in the chain before this one"""
1928 """number of snapshot in the chain before this one"""
1929 if not self.issnapshot(rev):
1929 if not self.issnapshot(rev):
1930 raise error.ProgrammingError(b'revision %d not a snapshot')
1930 raise error.ProgrammingError(b'revision %d not a snapshot')
1931 return len(self._deltachain(rev)[0]) - 1
1931 return len(self._deltachain(rev)[0]) - 1
1932
1932
1933 def revdiff(self, rev1, rev2):
1933 def revdiff(self, rev1, rev2):
1934 """return or calculate a delta between two revisions
1934 """return or calculate a delta between two revisions
1935
1935
1936 The delta calculated is in binary form and is intended to be written to
1936 The delta calculated is in binary form and is intended to be written to
1937 revlog data directly. So this function needs raw revision data.
1937 revlog data directly. So this function needs raw revision data.
1938 """
1938 """
1939 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1939 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1940 return bytes(self._chunk(rev2))
1940 return bytes(self._chunk(rev2))
1941
1941
1942 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1942 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1943
1943
1944 def _processflags(self, text, flags, operation, raw=False):
1944 def _processflags(self, text, flags, operation, raw=False):
1945 """deprecated entry point to access flag processors"""
1945 """deprecated entry point to access flag processors"""
1946 msg = b'_processflag(...) use the specialized variant'
1946 msg = b'_processflag(...) use the specialized variant'
1947 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1947 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1948 if raw:
1948 if raw:
1949 return text, flagutil.processflagsraw(self, text, flags)
1949 return text, flagutil.processflagsraw(self, text, flags)
1950 elif operation == b'read':
1950 elif operation == b'read':
1951 return flagutil.processflagsread(self, text, flags)
1951 return flagutil.processflagsread(self, text, flags)
1952 else: # write operation
1952 else: # write operation
1953 return flagutil.processflagswrite(self, text, flags)
1953 return flagutil.processflagswrite(self, text, flags)
1954
1954
1955 def revision(self, nodeorrev, _df=None, raw=False):
1955 def revision(self, nodeorrev, _df=None, raw=False):
1956 """return an uncompressed revision of a given node or revision
1956 """return an uncompressed revision of a given node or revision
1957 number.
1957 number.
1958
1958
1959 _df - an existing file handle to read from. (internal-only)
1959 _df - an existing file handle to read from. (internal-only)
1960 raw - an optional argument specifying if the revision data is to be
1960 raw - an optional argument specifying if the revision data is to be
1961 treated as raw data when applying flag transforms. 'raw' should be set
1961 treated as raw data when applying flag transforms. 'raw' should be set
1962 to True when generating changegroups or in debug commands.
1962 to True when generating changegroups or in debug commands.
1963 """
1963 """
1964 if raw:
1964 if raw:
1965 msg = (
1965 msg = (
1966 b'revlog.revision(..., raw=True) is deprecated, '
1966 b'revlog.revision(..., raw=True) is deprecated, '
1967 b'use revlog.rawdata(...)'
1967 b'use revlog.rawdata(...)'
1968 )
1968 )
1969 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1969 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1970 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1970 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1971
1971
1972 def sidedata(self, nodeorrev, _df=None):
1972 def sidedata(self, nodeorrev, _df=None):
1973 """a map of extra data related to the changeset but not part of the hash
1973 """a map of extra data related to the changeset but not part of the hash
1974
1974
1975 This function currently return a dictionary. However, more advanced
1975 This function currently return a dictionary. However, more advanced
1976 mapping object will likely be used in the future for a more
1976 mapping object will likely be used in the future for a more
1977 efficient/lazy code.
1977 efficient/lazy code.
1978 """
1978 """
1979 return self._revisiondata(nodeorrev, _df)[1]
1979 return self._revisiondata(nodeorrev, _df)[1]
1980
1980
1981 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1981 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1982 # deal with <nodeorrev> argument type
1982 # deal with <nodeorrev> argument type
1983 if isinstance(nodeorrev, int):
1983 if isinstance(nodeorrev, int):
1984 rev = nodeorrev
1984 rev = nodeorrev
1985 node = self.node(rev)
1985 node = self.node(rev)
1986 else:
1986 else:
1987 node = nodeorrev
1987 node = nodeorrev
1988 rev = None
1988 rev = None
1989
1989
1990 # fast path the special `nullid` rev
1990 # fast path the special `nullid` rev
1991 if node == self.nullid:
1991 if node == self.nullid:
1992 return b"", {}
1992 return b"", {}
1993
1993
1994 # ``rawtext`` is the text as stored inside the revlog. Might be the
1994 # ``rawtext`` is the text as stored inside the revlog. Might be the
1995 # revision or might need to be processed to retrieve the revision.
1995 # revision or might need to be processed to retrieve the revision.
1996 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1996 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1997
1997
1998 if self.hassidedata:
1998 if self.hassidedata:
1999 if rev is None:
1999 if rev is None:
2000 rev = self.rev(node)
2000 rev = self.rev(node)
2001 sidedata = self._sidedata(rev)
2001 sidedata = self._sidedata(rev)
2002 else:
2002 else:
2003 sidedata = {}
2003 sidedata = {}
2004
2004
2005 if raw and validated:
2005 if raw and validated:
2006 # if we don't want to process the raw text and that raw
2006 # if we don't want to process the raw text and that raw
2007 # text is cached, we can exit early.
2007 # text is cached, we can exit early.
2008 return rawtext, sidedata
2008 return rawtext, sidedata
2009 if rev is None:
2009 if rev is None:
2010 rev = self.rev(node)
2010 rev = self.rev(node)
2011 # the revlog's flag for this revision
2011 # the revlog's flag for this revision
2012 # (usually alter its state or content)
2012 # (usually alter its state or content)
2013 flags = self.flags(rev)
2013 flags = self.flags(rev)
2014
2014
2015 if validated and flags == REVIDX_DEFAULT_FLAGS:
2015 if validated and flags == REVIDX_DEFAULT_FLAGS:
2016 # no extra flags set, no flag processor runs, text = rawtext
2016 # no extra flags set, no flag processor runs, text = rawtext
2017 return rawtext, sidedata
2017 return rawtext, sidedata
2018
2018
2019 if raw:
2019 if raw:
2020 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2020 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2021 text = rawtext
2021 text = rawtext
2022 else:
2022 else:
2023 r = flagutil.processflagsread(self, rawtext, flags)
2023 r = flagutil.processflagsread(self, rawtext, flags)
2024 text, validatehash = r
2024 text, validatehash = r
2025 if validatehash:
2025 if validatehash:
2026 self.checkhash(text, node, rev=rev)
2026 self.checkhash(text, node, rev=rev)
2027 if not validated:
2027 if not validated:
2028 self._revisioncache = (node, rev, rawtext)
2028 self._revisioncache = (node, rev, rawtext)
2029
2029
2030 return text, sidedata
2030 return text, sidedata
2031
2031
2032 def _rawtext(self, node, rev, _df=None):
2032 def _rawtext(self, node, rev, _df=None):
2033 """return the possibly unvalidated rawtext for a revision
2033 """return the possibly unvalidated rawtext for a revision
2034
2034
2035 returns (rev, rawtext, validated)
2035 returns (rev, rawtext, validated)
2036 """
2036 """
2037
2037
2038 # revision in the cache (could be useful to apply delta)
2038 # revision in the cache (could be useful to apply delta)
2039 cachedrev = None
2039 cachedrev = None
2040 # An intermediate text to apply deltas to
2040 # An intermediate text to apply deltas to
2041 basetext = None
2041 basetext = None
2042
2042
2043 # Check if we have the entry in cache
2043 # Check if we have the entry in cache
2044 # The cache entry looks like (node, rev, rawtext)
2044 # The cache entry looks like (node, rev, rawtext)
2045 if self._revisioncache:
2045 if self._revisioncache:
2046 if self._revisioncache[0] == node:
2046 if self._revisioncache[0] == node:
2047 return (rev, self._revisioncache[2], True)
2047 return (rev, self._revisioncache[2], True)
2048 cachedrev = self._revisioncache[1]
2048 cachedrev = self._revisioncache[1]
2049
2049
2050 if rev is None:
2050 if rev is None:
2051 rev = self.rev(node)
2051 rev = self.rev(node)
2052
2052
2053 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2053 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2054 if stopped:
2054 if stopped:
2055 basetext = self._revisioncache[2]
2055 basetext = self._revisioncache[2]
2056
2056
2057 # drop cache to save memory, the caller is expected to
2057 # drop cache to save memory, the caller is expected to
2058 # update self._revisioncache after validating the text
2058 # update self._revisioncache after validating the text
2059 self._revisioncache = None
2059 self._revisioncache = None
2060
2060
2061 targetsize = None
2061 targetsize = None
2062 rawsize = self.index[rev][2]
2062 rawsize = self.index[rev][2]
2063 if 0 <= rawsize:
2063 if 0 <= rawsize:
2064 targetsize = 4 * rawsize
2064 targetsize = 4 * rawsize
2065
2065
2066 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2066 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2067 if basetext is None:
2067 if basetext is None:
2068 basetext = bytes(bins[0])
2068 basetext = bytes(bins[0])
2069 bins = bins[1:]
2069 bins = bins[1:]
2070
2070
2071 rawtext = mdiff.patches(basetext, bins)
2071 rawtext = mdiff.patches(basetext, bins)
2072 del basetext # let us have a chance to free memory early
2072 del basetext # let us have a chance to free memory early
2073 return (rev, rawtext, False)
2073 return (rev, rawtext, False)
2074
2074
2075 def _sidedata(self, rev):
2075 def _sidedata(self, rev):
2076 """Return the sidedata for a given revision number."""
2076 """Return the sidedata for a given revision number."""
2077 index_entry = self.index[rev]
2077 index_entry = self.index[rev]
2078 sidedata_offset = index_entry[8]
2078 sidedata_offset = index_entry[8]
2079 sidedata_size = index_entry[9]
2079 sidedata_size = index_entry[9]
2080
2080
2081 if self._inline:
2081 if self._inline:
2082 sidedata_offset += self.index.entry_size * (1 + rev)
2082 sidedata_offset += self.index.entry_size * (1 + rev)
2083 if sidedata_size == 0:
2083 if sidedata_size == 0:
2084 return {}
2084 return {}
2085
2085
2086 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2086 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2087 comp = self.index[rev][11]
2087 comp = self.index[rev][11]
2088 if comp == COMP_MODE_PLAIN:
2088 if comp == COMP_MODE_PLAIN:
2089 segment = comp_segment
2089 segment = comp_segment
2090 elif comp == COMP_MODE_DEFAULT:
2090 elif comp == COMP_MODE_DEFAULT:
2091 segment = self._decompressor(comp_segment)
2091 segment = self._decompressor(comp_segment)
2092 elif comp == COMP_MODE_INLINE:
2092 elif comp == COMP_MODE_INLINE:
2093 segment = self.decompress(comp_segment)
2093 segment = self.decompress(comp_segment)
2094 else:
2094 else:
2095 msg = 'unknown compression mode %d'
2095 msg = 'unknown compression mode %d'
2096 msg %= comp
2096 msg %= comp
2097 raise error.RevlogError(msg)
2097 raise error.RevlogError(msg)
2098
2098
2099 sidedata = sidedatautil.deserialize_sidedata(segment)
2099 sidedata = sidedatautil.deserialize_sidedata(segment)
2100 return sidedata
2100 return sidedata
2101
2101
2102 def rawdata(self, nodeorrev, _df=None):
2102 def rawdata(self, nodeorrev, _df=None):
2103 """return an uncompressed raw data of a given node or revision number.
2103 """return an uncompressed raw data of a given node or revision number.
2104
2104
2105 _df - an existing file handle to read from. (internal-only)
2105 _df - an existing file handle to read from. (internal-only)
2106 """
2106 """
2107 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2107 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2108
2108
2109 def hash(self, text, p1, p2):
2109 def hash(self, text, p1, p2):
2110 """Compute a node hash.
2110 """Compute a node hash.
2111
2111
2112 Available as a function so that subclasses can replace the hash
2112 Available as a function so that subclasses can replace the hash
2113 as needed.
2113 as needed.
2114 """
2114 """
2115 return storageutil.hashrevisionsha1(text, p1, p2)
2115 return storageutil.hashrevisionsha1(text, p1, p2)
2116
2116
2117 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2117 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2118 """Check node hash integrity.
2118 """Check node hash integrity.
2119
2119
2120 Available as a function so that subclasses can extend hash mismatch
2120 Available as a function so that subclasses can extend hash mismatch
2121 behaviors as needed.
2121 behaviors as needed.
2122 """
2122 """
2123 try:
2123 try:
2124 if p1 is None and p2 is None:
2124 if p1 is None and p2 is None:
2125 p1, p2 = self.parents(node)
2125 p1, p2 = self.parents(node)
2126 if node != self.hash(text, p1, p2):
2126 if node != self.hash(text, p1, p2):
2127 # Clear the revision cache on hash failure. The revision cache
2127 # Clear the revision cache on hash failure. The revision cache
2128 # only stores the raw revision and clearing the cache does have
2128 # only stores the raw revision and clearing the cache does have
2129 # the side-effect that we won't have a cache hit when the raw
2129 # the side-effect that we won't have a cache hit when the raw
2130 # revision data is accessed. But this case should be rare and
2130 # revision data is accessed. But this case should be rare and
2131 # it is extra work to teach the cache about the hash
2131 # it is extra work to teach the cache about the hash
2132 # verification state.
2132 # verification state.
2133 if self._revisioncache and self._revisioncache[0] == node:
2133 if self._revisioncache and self._revisioncache[0] == node:
2134 self._revisioncache = None
2134 self._revisioncache = None
2135
2135
2136 revornode = rev
2136 revornode = rev
2137 if revornode is None:
2137 if revornode is None:
2138 revornode = templatefilters.short(hex(node))
2138 revornode = templatefilters.short(hex(node))
2139 raise error.RevlogError(
2139 raise error.RevlogError(
2140 _(b"integrity check failed on %s:%s")
2140 _(b"integrity check failed on %s:%s")
2141 % (self.display_id, pycompat.bytestr(revornode))
2141 % (self.display_id, pycompat.bytestr(revornode))
2142 )
2142 )
2143 except error.RevlogError:
2143 except error.RevlogError:
2144 if self._censorable and storageutil.iscensoredtext(text):
2144 if self._censorable and storageutil.iscensoredtext(text):
2145 raise error.CensoredNodeError(self.display_id, node, text)
2145 raise error.CensoredNodeError(self.display_id, node, text)
2146 raise
2146 raise
2147
2147
2148 def _enforceinlinesize(self, tr):
2148 def _enforceinlinesize(self, tr):
2149 """Check if the revlog is too big for inline and convert if so.
2149 """Check if the revlog is too big for inline and convert if so.
2150
2150
2151 This should be called after revisions are added to the revlog. If the
2151 This should be called after revisions are added to the revlog. If the
2152 revlog has grown too large to be an inline revlog, it will convert it
2152 revlog has grown too large to be an inline revlog, it will convert it
2153 to use multiple index and data files.
2153 to use multiple index and data files.
2154 """
2154 """
2155 tiprev = len(self) - 1
2155 tiprev = len(self) - 1
2156 total_size = self.start(tiprev) + self.length(tiprev)
2156 total_size = self.start(tiprev) + self.length(tiprev)
2157 if not self._inline or total_size < _maxinline:
2157 if not self._inline or total_size < _maxinline:
2158 return
2158 return
2159
2159
2160 troffset = tr.findoffset(self._indexfile)
2160 troffset = tr.findoffset(self._indexfile)
2161 if troffset is None:
2161 if troffset is None:
2162 raise error.RevlogError(
2162 raise error.RevlogError(
2163 _(b"%s not found in the transaction") % self._indexfile
2163 _(b"%s not found in the transaction") % self._indexfile
2164 )
2164 )
2165 trindex = 0
2165 trindex = 0
2166 tr.add(self._datafile, 0)
2166 tr.add(self._datafile, 0)
2167
2167
2168 existing_handles = False
2168 existing_handles = False
2169 if self._writinghandles is not None:
2169 if self._writinghandles is not None:
2170 existing_handles = True
2170 existing_handles = True
2171 fp = self._writinghandles[0]
2171 fp = self._writinghandles[0]
2172 fp.flush()
2172 fp.flush()
2173 fp.close()
2173 fp.close()
2174 # We can't use the cached file handle after close(). So prevent
2174 # We can't use the cached file handle after close(). So prevent
2175 # its usage.
2175 # its usage.
2176 self._writinghandles = None
2176 self._writinghandles = None
2177
2177
2178 new_dfh = self._datafp(b'w+')
2178 new_dfh = self._datafp(b'w+')
2179 new_dfh.truncate(0) # drop any potentially existing data
2179 new_dfh.truncate(0) # drop any potentially existing data
2180 try:
2180 try:
2181 with self._indexfp() as read_ifh:
2181 with self._indexfp() as read_ifh:
2182 for r in self:
2182 for r in self:
2183 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2183 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2184 if troffset <= self.start(r) + r * self.index.entry_size:
2184 if troffset <= self.start(r) + r * self.index.entry_size:
2185 trindex = r
2185 trindex = r
2186 new_dfh.flush()
2186 new_dfh.flush()
2187
2187
2188 with self.__index_new_fp() as fp:
2188 with self.__index_new_fp() as fp:
2189 self._format_flags &= ~FLAG_INLINE_DATA
2189 self._format_flags &= ~FLAG_INLINE_DATA
2190 self._inline = False
2190 self._inline = False
2191 for i in self:
2191 for i in self:
2192 e = self.index.entry_binary(i)
2192 e = self.index.entry_binary(i)
2193 if i == 0 and self._docket is None:
2193 if i == 0 and self._docket is None:
2194 header = self._format_flags | self._format_version
2194 header = self._format_flags | self._format_version
2195 header = self.index.pack_header(header)
2195 header = self.index.pack_header(header)
2196 e = header + e
2196 e = header + e
2197 fp.write(e)
2197 fp.write(e)
2198 if self._docket is not None:
2198 if self._docket is not None:
2199 self._docket.index_end = fp.tell()
2199 self._docket.index_end = fp.tell()
2200
2200
2201 # There is a small transactional race here. If the rename of
2201 # There is a small transactional race here. If the rename of
2202 # the index fails, we should remove the datafile. It is more
2202 # the index fails, we should remove the datafile. It is more
2203 # important to ensure that the data file is not truncated
2203 # important to ensure that the data file is not truncated
2204 # when the index is replaced as otherwise data is lost.
2204 # when the index is replaced as otherwise data is lost.
2205 tr.replace(self._datafile, self.start(trindex))
2205 tr.replace(self._datafile, self.start(trindex))
2206
2206
2207 # the temp file replace the real index when we exit the context
2207 # the temp file replace the real index when we exit the context
2208 # manager
2208 # manager
2209
2209
2210 tr.replace(self._indexfile, trindex * self.index.entry_size)
2210 tr.replace(self._indexfile, trindex * self.index.entry_size)
2211 nodemaputil.setup_persistent_nodemap(tr, self)
2211 nodemaputil.setup_persistent_nodemap(tr, self)
2212 self._chunkclear()
2212 self._chunkclear()
2213
2213
2214 if existing_handles:
2214 if existing_handles:
2215 # switched from inline to conventional reopen the index
2215 # switched from inline to conventional reopen the index
2216 ifh = self.__index_write_fp()
2216 ifh = self.__index_write_fp()
2217 self._writinghandles = (ifh, new_dfh)
2217 self._writinghandles = (ifh, new_dfh)
2218 new_dfh = None
2218 new_dfh = None
2219 finally:
2219 finally:
2220 if new_dfh is not None:
2220 if new_dfh is not None:
2221 new_dfh.close()
2221 new_dfh.close()
2222
2222
2223 def _nodeduplicatecallback(self, transaction, node):
2223 def _nodeduplicatecallback(self, transaction, node):
2224 """called when trying to add a node already stored."""
2224 """called when trying to add a node already stored."""
2225
2225
2226 @contextlib.contextmanager
2226 @contextlib.contextmanager
2227 def _writing(self, transaction):
2227 def _writing(self, transaction):
2228 if self._trypending:
2228 if self._trypending:
2229 msg = b'try to write in a `trypending` revlog: %s'
2229 msg = b'try to write in a `trypending` revlog: %s'
2230 msg %= self.display_id
2230 msg %= self.display_id
2231 raise error.ProgrammingError(msg)
2231 raise error.ProgrammingError(msg)
2232 if self._writinghandles is not None:
2232 if self._writinghandles is not None:
2233 yield
2233 yield
2234 else:
2234 else:
2235 r = len(self)
2235 ifh = dfh = None
2236 dsize = 0
2237 if r:
2238 dsize = self.end(r - 1)
2239 dfh = None
2240 if not self._inline:
2241 try:
2242 dfh = self._datafp(b"r+")
2243 if self._docket is None:
2244 dfh.seek(0, os.SEEK_END)
2245 else:
2246 dfh.seek(self._docket.data_end, os.SEEK_SET)
2247 except IOError as inst:
2248 if inst.errno != errno.ENOENT:
2249 raise
2250 dfh = self._datafp(b"w+")
2251 transaction.add(self._datafile, dsize)
2252 try:
2236 try:
2237 r = len(self)
2238 # opening the data file.
2239 dsize = 0
2240 if r:
2241 dsize = self.end(r - 1)
2242 dfh = None
2243 if not self._inline:
2244 try:
2245 dfh = self._datafp(b"r+")
2246 if self._docket is None:
2247 dfh.seek(0, os.SEEK_END)
2248 else:
2249 dfh.seek(self._docket.data_end, os.SEEK_SET)
2250 except IOError as inst:
2251 if inst.errno != errno.ENOENT:
2252 raise
2253 dfh = self._datafp(b"w+")
2254 transaction.add(self._datafile, dsize)
2255
2256 # opening the index file.
2253 isize = r * self.index.entry_size
2257 isize = r * self.index.entry_size
2254 ifh = self.__index_write_fp()
2258 ifh = self.__index_write_fp()
2255 if self._inline:
2259 if self._inline:
2256 transaction.add(self._indexfile, dsize + isize)
2260 transaction.add(self._indexfile, dsize + isize)
2257 else:
2261 else:
2258 transaction.add(self._indexfile, isize)
2262 transaction.add(self._indexfile, isize)
2259 try:
2263 # exposing all file handle for writing.
2260 self._writinghandles = (ifh, dfh)
2264 self._writinghandles = (ifh, dfh)
2261 try:
2265 yield
2262 yield
2266 if self._docket is not None:
2263 if self._docket is not None:
2267 self._write_docket(transaction)
2264 self._write_docket(transaction)
2268 finally:
2265 finally:
2269 self._writinghandles = None
2266 self._writinghandles = None
2270 if ifh is not None:
2267 finally:
2268 ifh.close()
2271 ifh.close()
2269 finally:
2270 if dfh is not None:
2272 if dfh is not None:
2271 dfh.close()
2273 dfh.close()
2272
2274
2273 def _write_docket(self, transaction):
2275 def _write_docket(self, transaction):
2274 """write the current docket on disk
2276 """write the current docket on disk
2275
2277
2276 Exist as a method to help changelog to implement transaction logic
2278 Exist as a method to help changelog to implement transaction logic
2277
2279
2278 We could also imagine using the same transaction logic for all revlog
2280 We could also imagine using the same transaction logic for all revlog
2279 since docket are cheap."""
2281 since docket are cheap."""
2280 self._docket.write(transaction)
2282 self._docket.write(transaction)
2281
2283
2282 def addrevision(
2284 def addrevision(
2283 self,
2285 self,
2284 text,
2286 text,
2285 transaction,
2287 transaction,
2286 link,
2288 link,
2287 p1,
2289 p1,
2288 p2,
2290 p2,
2289 cachedelta=None,
2291 cachedelta=None,
2290 node=None,
2292 node=None,
2291 flags=REVIDX_DEFAULT_FLAGS,
2293 flags=REVIDX_DEFAULT_FLAGS,
2292 deltacomputer=None,
2294 deltacomputer=None,
2293 sidedata=None,
2295 sidedata=None,
2294 ):
2296 ):
2295 """add a revision to the log
2297 """add a revision to the log
2296
2298
2297 text - the revision data to add
2299 text - the revision data to add
2298 transaction - the transaction object used for rollback
2300 transaction - the transaction object used for rollback
2299 link - the linkrev data to add
2301 link - the linkrev data to add
2300 p1, p2 - the parent nodeids of the revision
2302 p1, p2 - the parent nodeids of the revision
2301 cachedelta - an optional precomputed delta
2303 cachedelta - an optional precomputed delta
2302 node - nodeid of revision; typically node is not specified, and it is
2304 node - nodeid of revision; typically node is not specified, and it is
2303 computed by default as hash(text, p1, p2), however subclasses might
2305 computed by default as hash(text, p1, p2), however subclasses might
2304 use different hashing method (and override checkhash() in such case)
2306 use different hashing method (and override checkhash() in such case)
2305 flags - the known flags to set on the revision
2307 flags - the known flags to set on the revision
2306 deltacomputer - an optional deltacomputer instance shared between
2308 deltacomputer - an optional deltacomputer instance shared between
2307 multiple calls
2309 multiple calls
2308 """
2310 """
2309 if link == nullrev:
2311 if link == nullrev:
2310 raise error.RevlogError(
2312 raise error.RevlogError(
2311 _(b"attempted to add linkrev -1 to %s") % self.display_id
2313 _(b"attempted to add linkrev -1 to %s") % self.display_id
2312 )
2314 )
2313
2315
2314 if sidedata is None:
2316 if sidedata is None:
2315 sidedata = {}
2317 sidedata = {}
2316 elif sidedata and not self.hassidedata:
2318 elif sidedata and not self.hassidedata:
2317 raise error.ProgrammingError(
2319 raise error.ProgrammingError(
2318 _(b"trying to add sidedata to a revlog who don't support them")
2320 _(b"trying to add sidedata to a revlog who don't support them")
2319 )
2321 )
2320
2322
2321 if flags:
2323 if flags:
2322 node = node or self.hash(text, p1, p2)
2324 node = node or self.hash(text, p1, p2)
2323
2325
2324 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2326 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2325
2327
2326 # If the flag processor modifies the revision data, ignore any provided
2328 # If the flag processor modifies the revision data, ignore any provided
2327 # cachedelta.
2329 # cachedelta.
2328 if rawtext != text:
2330 if rawtext != text:
2329 cachedelta = None
2331 cachedelta = None
2330
2332
2331 if len(rawtext) > _maxentrysize:
2333 if len(rawtext) > _maxentrysize:
2332 raise error.RevlogError(
2334 raise error.RevlogError(
2333 _(
2335 _(
2334 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2336 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2335 )
2337 )
2336 % (self.display_id, len(rawtext))
2338 % (self.display_id, len(rawtext))
2337 )
2339 )
2338
2340
2339 node = node or self.hash(rawtext, p1, p2)
2341 node = node or self.hash(rawtext, p1, p2)
2340 rev = self.index.get_rev(node)
2342 rev = self.index.get_rev(node)
2341 if rev is not None:
2343 if rev is not None:
2342 return rev
2344 return rev
2343
2345
2344 if validatehash:
2346 if validatehash:
2345 self.checkhash(rawtext, node, p1=p1, p2=p2)
2347 self.checkhash(rawtext, node, p1=p1, p2=p2)
2346
2348
2347 return self.addrawrevision(
2349 return self.addrawrevision(
2348 rawtext,
2350 rawtext,
2349 transaction,
2351 transaction,
2350 link,
2352 link,
2351 p1,
2353 p1,
2352 p2,
2354 p2,
2353 node,
2355 node,
2354 flags,
2356 flags,
2355 cachedelta=cachedelta,
2357 cachedelta=cachedelta,
2356 deltacomputer=deltacomputer,
2358 deltacomputer=deltacomputer,
2357 sidedata=sidedata,
2359 sidedata=sidedata,
2358 )
2360 )
2359
2361
2360 def addrawrevision(
2362 def addrawrevision(
2361 self,
2363 self,
2362 rawtext,
2364 rawtext,
2363 transaction,
2365 transaction,
2364 link,
2366 link,
2365 p1,
2367 p1,
2366 p2,
2368 p2,
2367 node,
2369 node,
2368 flags,
2370 flags,
2369 cachedelta=None,
2371 cachedelta=None,
2370 deltacomputer=None,
2372 deltacomputer=None,
2371 sidedata=None,
2373 sidedata=None,
2372 ):
2374 ):
2373 """add a raw revision with known flags, node and parents
2375 """add a raw revision with known flags, node and parents
2374 useful when reusing a revision not stored in this revlog (ex: received
2376 useful when reusing a revision not stored in this revlog (ex: received
2375 over wire, or read from an external bundle).
2377 over wire, or read from an external bundle).
2376 """
2378 """
2377 with self._writing(transaction):
2379 with self._writing(transaction):
2378 return self._addrevision(
2380 return self._addrevision(
2379 node,
2381 node,
2380 rawtext,
2382 rawtext,
2381 transaction,
2383 transaction,
2382 link,
2384 link,
2383 p1,
2385 p1,
2384 p2,
2386 p2,
2385 flags,
2387 flags,
2386 cachedelta,
2388 cachedelta,
2387 deltacomputer=deltacomputer,
2389 deltacomputer=deltacomputer,
2388 sidedata=sidedata,
2390 sidedata=sidedata,
2389 )
2391 )
2390
2392
2391 def compress(self, data):
2393 def compress(self, data):
2392 """Generate a possibly-compressed representation of data."""
2394 """Generate a possibly-compressed representation of data."""
2393 if not data:
2395 if not data:
2394 return b'', data
2396 return b'', data
2395
2397
2396 compressed = self._compressor.compress(data)
2398 compressed = self._compressor.compress(data)
2397
2399
2398 if compressed:
2400 if compressed:
2399 # The revlog compressor added the header in the returned data.
2401 # The revlog compressor added the header in the returned data.
2400 return b'', compressed
2402 return b'', compressed
2401
2403
2402 if data[0:1] == b'\0':
2404 if data[0:1] == b'\0':
2403 return b'', data
2405 return b'', data
2404 return b'u', data
2406 return b'u', data
2405
2407
2406 def decompress(self, data):
2408 def decompress(self, data):
2407 """Decompress a revlog chunk.
2409 """Decompress a revlog chunk.
2408
2410
2409 The chunk is expected to begin with a header identifying the
2411 The chunk is expected to begin with a header identifying the
2410 format type so it can be routed to an appropriate decompressor.
2412 format type so it can be routed to an appropriate decompressor.
2411 """
2413 """
2412 if not data:
2414 if not data:
2413 return data
2415 return data
2414
2416
2415 # Revlogs are read much more frequently than they are written and many
2417 # Revlogs are read much more frequently than they are written and many
2416 # chunks only take microseconds to decompress, so performance is
2418 # chunks only take microseconds to decompress, so performance is
2417 # important here.
2419 # important here.
2418 #
2420 #
2419 # We can make a few assumptions about revlogs:
2421 # We can make a few assumptions about revlogs:
2420 #
2422 #
2421 # 1) the majority of chunks will be compressed (as opposed to inline
2423 # 1) the majority of chunks will be compressed (as opposed to inline
2422 # raw data).
2424 # raw data).
2423 # 2) decompressing *any* data will likely by at least 10x slower than
2425 # 2) decompressing *any* data will likely by at least 10x slower than
2424 # returning raw inline data.
2426 # returning raw inline data.
2425 # 3) we want to prioritize common and officially supported compression
2427 # 3) we want to prioritize common and officially supported compression
2426 # engines
2428 # engines
2427 #
2429 #
2428 # It follows that we want to optimize for "decompress compressed data
2430 # It follows that we want to optimize for "decompress compressed data
2429 # when encoded with common and officially supported compression engines"
2431 # when encoded with common and officially supported compression engines"
2430 # case over "raw data" and "data encoded by less common or non-official
2432 # case over "raw data" and "data encoded by less common or non-official
2431 # compression engines." That is why we have the inline lookup first
2433 # compression engines." That is why we have the inline lookup first
2432 # followed by the compengines lookup.
2434 # followed by the compengines lookup.
2433 #
2435 #
2434 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2436 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2435 # compressed chunks. And this matters for changelog and manifest reads.
2437 # compressed chunks. And this matters for changelog and manifest reads.
2436 t = data[0:1]
2438 t = data[0:1]
2437
2439
2438 if t == b'x':
2440 if t == b'x':
2439 try:
2441 try:
2440 return _zlibdecompress(data)
2442 return _zlibdecompress(data)
2441 except zlib.error as e:
2443 except zlib.error as e:
2442 raise error.RevlogError(
2444 raise error.RevlogError(
2443 _(b'revlog decompress error: %s')
2445 _(b'revlog decompress error: %s')
2444 % stringutil.forcebytestr(e)
2446 % stringutil.forcebytestr(e)
2445 )
2447 )
2446 # '\0' is more common than 'u' so it goes first.
2448 # '\0' is more common than 'u' so it goes first.
2447 elif t == b'\0':
2449 elif t == b'\0':
2448 return data
2450 return data
2449 elif t == b'u':
2451 elif t == b'u':
2450 return util.buffer(data, 1)
2452 return util.buffer(data, 1)
2451
2453
2452 compressor = self._get_decompressor(t)
2454 compressor = self._get_decompressor(t)
2453
2455
2454 return compressor.decompress(data)
2456 return compressor.decompress(data)
2455
2457
2456 def _addrevision(
2458 def _addrevision(
2457 self,
2459 self,
2458 node,
2460 node,
2459 rawtext,
2461 rawtext,
2460 transaction,
2462 transaction,
2461 link,
2463 link,
2462 p1,
2464 p1,
2463 p2,
2465 p2,
2464 flags,
2466 flags,
2465 cachedelta,
2467 cachedelta,
2466 alwayscache=False,
2468 alwayscache=False,
2467 deltacomputer=None,
2469 deltacomputer=None,
2468 sidedata=None,
2470 sidedata=None,
2469 ):
2471 ):
2470 """internal function to add revisions to the log
2472 """internal function to add revisions to the log
2471
2473
2472 see addrevision for argument descriptions.
2474 see addrevision for argument descriptions.
2473
2475
2474 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2476 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2475
2477
2476 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2478 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2477 be used.
2479 be used.
2478
2480
2479 invariants:
2481 invariants:
2480 - rawtext is optional (can be None); if not set, cachedelta must be set.
2482 - rawtext is optional (can be None); if not set, cachedelta must be set.
2481 if both are set, they must correspond to each other.
2483 if both are set, they must correspond to each other.
2482 """
2484 """
2483 if node == self.nullid:
2485 if node == self.nullid:
2484 raise error.RevlogError(
2486 raise error.RevlogError(
2485 _(b"%s: attempt to add null revision") % self.display_id
2487 _(b"%s: attempt to add null revision") % self.display_id
2486 )
2488 )
2487 if (
2489 if (
2488 node == self.nodeconstants.wdirid
2490 node == self.nodeconstants.wdirid
2489 or node in self.nodeconstants.wdirfilenodeids
2491 or node in self.nodeconstants.wdirfilenodeids
2490 ):
2492 ):
2491 raise error.RevlogError(
2493 raise error.RevlogError(
2492 _(b"%s: attempt to add wdir revision") % self.display_id
2494 _(b"%s: attempt to add wdir revision") % self.display_id
2493 )
2495 )
2494 if self._writinghandles is None:
2496 if self._writinghandles is None:
2495 msg = b'adding revision outside `revlog._writing` context'
2497 msg = b'adding revision outside `revlog._writing` context'
2496 raise error.ProgrammingError(msg)
2498 raise error.ProgrammingError(msg)
2497
2499
2498 if self._inline:
2500 if self._inline:
2499 fh = self._writinghandles[0]
2501 fh = self._writinghandles[0]
2500 else:
2502 else:
2501 fh = self._writinghandles[1]
2503 fh = self._writinghandles[1]
2502
2504
2503 btext = [rawtext]
2505 btext = [rawtext]
2504
2506
2505 curr = len(self)
2507 curr = len(self)
2506 prev = curr - 1
2508 prev = curr - 1
2507
2509
2508 offset = self._get_data_offset(prev)
2510 offset = self._get_data_offset(prev)
2509
2511
2510 if self._concurrencychecker:
2512 if self._concurrencychecker:
2511 ifh, dfh = self._writinghandles
2513 ifh, dfh = self._writinghandles
2512 if self._inline:
2514 if self._inline:
2513 # offset is "as if" it were in the .d file, so we need to add on
2515 # offset is "as if" it were in the .d file, so we need to add on
2514 # the size of the entry metadata.
2516 # the size of the entry metadata.
2515 self._concurrencychecker(
2517 self._concurrencychecker(
2516 ifh, self._indexfile, offset + curr * self.index.entry_size
2518 ifh, self._indexfile, offset + curr * self.index.entry_size
2517 )
2519 )
2518 else:
2520 else:
2519 # Entries in the .i are a consistent size.
2521 # Entries in the .i are a consistent size.
2520 self._concurrencychecker(
2522 self._concurrencychecker(
2521 ifh, self._indexfile, curr * self.index.entry_size
2523 ifh, self._indexfile, curr * self.index.entry_size
2522 )
2524 )
2523 self._concurrencychecker(dfh, self._datafile, offset)
2525 self._concurrencychecker(dfh, self._datafile, offset)
2524
2526
2525 p1r, p2r = self.rev(p1), self.rev(p2)
2527 p1r, p2r = self.rev(p1), self.rev(p2)
2526
2528
2527 # full versions are inserted when the needed deltas
2529 # full versions are inserted when the needed deltas
2528 # become comparable to the uncompressed text
2530 # become comparable to the uncompressed text
2529 if rawtext is None:
2531 if rawtext is None:
2530 # need rawtext size, before changed by flag processors, which is
2532 # need rawtext size, before changed by flag processors, which is
2531 # the non-raw size. use revlog explicitly to avoid filelog's extra
2533 # the non-raw size. use revlog explicitly to avoid filelog's extra
2532 # logic that might remove metadata size.
2534 # logic that might remove metadata size.
2533 textlen = mdiff.patchedsize(
2535 textlen = mdiff.patchedsize(
2534 revlog.size(self, cachedelta[0]), cachedelta[1]
2536 revlog.size(self, cachedelta[0]), cachedelta[1]
2535 )
2537 )
2536 else:
2538 else:
2537 textlen = len(rawtext)
2539 textlen = len(rawtext)
2538
2540
2539 if deltacomputer is None:
2541 if deltacomputer is None:
2540 deltacomputer = deltautil.deltacomputer(self)
2542 deltacomputer = deltautil.deltacomputer(self)
2541
2543
2542 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2544 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2543
2545
2544 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2546 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2545
2547
2546 compression_mode = COMP_MODE_INLINE
2548 compression_mode = COMP_MODE_INLINE
2547 if self._docket is not None:
2549 if self._docket is not None:
2548 h, d = deltainfo.data
2550 h, d = deltainfo.data
2549 if not h and not d:
2551 if not h and not d:
2550 # not data to store at all... declare them uncompressed
2552 # not data to store at all... declare them uncompressed
2551 compression_mode = COMP_MODE_PLAIN
2553 compression_mode = COMP_MODE_PLAIN
2552 elif not h:
2554 elif not h:
2553 t = d[0:1]
2555 t = d[0:1]
2554 if t == b'\0':
2556 if t == b'\0':
2555 compression_mode = COMP_MODE_PLAIN
2557 compression_mode = COMP_MODE_PLAIN
2556 elif t == self._docket.default_compression_header:
2558 elif t == self._docket.default_compression_header:
2557 compression_mode = COMP_MODE_DEFAULT
2559 compression_mode = COMP_MODE_DEFAULT
2558 elif h == b'u':
2560 elif h == b'u':
2559 # we have a more efficient way to declare uncompressed
2561 # we have a more efficient way to declare uncompressed
2560 h = b''
2562 h = b''
2561 compression_mode = COMP_MODE_PLAIN
2563 compression_mode = COMP_MODE_PLAIN
2562 deltainfo = deltautil.drop_u_compression(deltainfo)
2564 deltainfo = deltautil.drop_u_compression(deltainfo)
2563
2565
2564 sidedata_compression_mode = COMP_MODE_INLINE
2566 sidedata_compression_mode = COMP_MODE_INLINE
2565 if sidedata and self.hassidedata:
2567 if sidedata and self.hassidedata:
2566 sidedata_compression_mode = COMP_MODE_PLAIN
2568 sidedata_compression_mode = COMP_MODE_PLAIN
2567 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2569 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2568 sidedata_offset = offset + deltainfo.deltalen
2570 sidedata_offset = offset + deltainfo.deltalen
2569 h, comp_sidedata = self.compress(serialized_sidedata)
2571 h, comp_sidedata = self.compress(serialized_sidedata)
2570 if (
2572 if (
2571 h != b'u'
2573 h != b'u'
2572 and comp_sidedata[0:1] != b'\0'
2574 and comp_sidedata[0:1] != b'\0'
2573 and len(comp_sidedata) < len(serialized_sidedata)
2575 and len(comp_sidedata) < len(serialized_sidedata)
2574 ):
2576 ):
2575 assert not h
2577 assert not h
2576 if (
2578 if (
2577 comp_sidedata[0:1]
2579 comp_sidedata[0:1]
2578 == self._docket.default_compression_header
2580 == self._docket.default_compression_header
2579 ):
2581 ):
2580 sidedata_compression_mode = COMP_MODE_DEFAULT
2582 sidedata_compression_mode = COMP_MODE_DEFAULT
2581 serialized_sidedata = comp_sidedata
2583 serialized_sidedata = comp_sidedata
2582 else:
2584 else:
2583 sidedata_compression_mode = COMP_MODE_INLINE
2585 sidedata_compression_mode = COMP_MODE_INLINE
2584 serialized_sidedata = comp_sidedata
2586 serialized_sidedata = comp_sidedata
2585 else:
2587 else:
2586 serialized_sidedata = b""
2588 serialized_sidedata = b""
2587 # Don't store the offset if the sidedata is empty, that way
2589 # Don't store the offset if the sidedata is empty, that way
2588 # we can easily detect empty sidedata and they will be no different
2590 # we can easily detect empty sidedata and they will be no different
2589 # than ones we manually add.
2591 # than ones we manually add.
2590 sidedata_offset = 0
2592 sidedata_offset = 0
2591
2593
2592 e = (
2594 e = (
2593 offset_type(offset, flags),
2595 offset_type(offset, flags),
2594 deltainfo.deltalen,
2596 deltainfo.deltalen,
2595 textlen,
2597 textlen,
2596 deltainfo.base,
2598 deltainfo.base,
2597 link,
2599 link,
2598 p1r,
2600 p1r,
2599 p2r,
2601 p2r,
2600 node,
2602 node,
2601 sidedata_offset,
2603 sidedata_offset,
2602 len(serialized_sidedata),
2604 len(serialized_sidedata),
2603 compression_mode,
2605 compression_mode,
2604 sidedata_compression_mode,
2606 sidedata_compression_mode,
2605 )
2607 )
2606
2608
2607 self.index.append(e)
2609 self.index.append(e)
2608 entry = self.index.entry_binary(curr)
2610 entry = self.index.entry_binary(curr)
2609 if curr == 0 and self._docket is None:
2611 if curr == 0 and self._docket is None:
2610 header = self._format_flags | self._format_version
2612 header = self._format_flags | self._format_version
2611 header = self.index.pack_header(header)
2613 header = self.index.pack_header(header)
2612 entry = header + entry
2614 entry = header + entry
2613 self._writeentry(
2615 self._writeentry(
2614 transaction,
2616 transaction,
2615 entry,
2617 entry,
2616 deltainfo.data,
2618 deltainfo.data,
2617 link,
2619 link,
2618 offset,
2620 offset,
2619 serialized_sidedata,
2621 serialized_sidedata,
2620 )
2622 )
2621
2623
2622 rawtext = btext[0]
2624 rawtext = btext[0]
2623
2625
2624 if alwayscache and rawtext is None:
2626 if alwayscache and rawtext is None:
2625 rawtext = deltacomputer.buildtext(revinfo, fh)
2627 rawtext = deltacomputer.buildtext(revinfo, fh)
2626
2628
2627 if type(rawtext) == bytes: # only accept immutable objects
2629 if type(rawtext) == bytes: # only accept immutable objects
2628 self._revisioncache = (node, curr, rawtext)
2630 self._revisioncache = (node, curr, rawtext)
2629 self._chainbasecache[curr] = deltainfo.chainbase
2631 self._chainbasecache[curr] = deltainfo.chainbase
2630 return curr
2632 return curr
2631
2633
2632 def _get_data_offset(self, prev):
2634 def _get_data_offset(self, prev):
2633 """Returns the current offset in the (in-transaction) data file.
2635 """Returns the current offset in the (in-transaction) data file.
2634 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2636 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2635 file to store that information: since sidedata can be rewritten to the
2637 file to store that information: since sidedata can be rewritten to the
2636 end of the data file within a transaction, you can have cases where, for
2638 end of the data file within a transaction, you can have cases where, for
2637 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2639 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2638 to `n - 1`'s sidedata being written after `n`'s data.
2640 to `n - 1`'s sidedata being written after `n`'s data.
2639
2641
2640 TODO cache this in a docket file before getting out of experimental."""
2642 TODO cache this in a docket file before getting out of experimental."""
2641 if self._docket is None:
2643 if self._docket is None:
2642 return self.end(prev)
2644 return self.end(prev)
2643 else:
2645 else:
2644 return self._docket.data_end
2646 return self._docket.data_end
2645
2647
2646 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2648 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2647 # Files opened in a+ mode have inconsistent behavior on various
2649 # Files opened in a+ mode have inconsistent behavior on various
2648 # platforms. Windows requires that a file positioning call be made
2650 # platforms. Windows requires that a file positioning call be made
2649 # when the file handle transitions between reads and writes. See
2651 # when the file handle transitions between reads and writes. See
2650 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2652 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2651 # platforms, Python or the platform itself can be buggy. Some versions
2653 # platforms, Python or the platform itself can be buggy. Some versions
2652 # of Solaris have been observed to not append at the end of the file
2654 # of Solaris have been observed to not append at the end of the file
2653 # if the file was seeked to before the end. See issue4943 for more.
2655 # if the file was seeked to before the end. See issue4943 for more.
2654 #
2656 #
2655 # We work around this issue by inserting a seek() before writing.
2657 # We work around this issue by inserting a seek() before writing.
2656 # Note: This is likely not necessary on Python 3. However, because
2658 # Note: This is likely not necessary on Python 3. However, because
2657 # the file handle is reused for reads and may be seeked there, we need
2659 # the file handle is reused for reads and may be seeked there, we need
2658 # to be careful before changing this.
2660 # to be careful before changing this.
2659 if self._writinghandles is None:
2661 if self._writinghandles is None:
2660 msg = b'adding revision outside `revlog._writing` context'
2662 msg = b'adding revision outside `revlog._writing` context'
2661 raise error.ProgrammingError(msg)
2663 raise error.ProgrammingError(msg)
2662 ifh, dfh = self._writinghandles
2664 ifh, dfh = self._writinghandles
2663 if self._docket is None:
2665 if self._docket is None:
2664 ifh.seek(0, os.SEEK_END)
2666 ifh.seek(0, os.SEEK_END)
2665 else:
2667 else:
2666 ifh.seek(self._docket.index_end, os.SEEK_SET)
2668 ifh.seek(self._docket.index_end, os.SEEK_SET)
2667 if dfh:
2669 if dfh:
2668 if self._docket is None:
2670 if self._docket is None:
2669 dfh.seek(0, os.SEEK_END)
2671 dfh.seek(0, os.SEEK_END)
2670 else:
2672 else:
2671 dfh.seek(self._docket.data_end, os.SEEK_SET)
2673 dfh.seek(self._docket.data_end, os.SEEK_SET)
2672
2674
2673 curr = len(self) - 1
2675 curr = len(self) - 1
2674 if not self._inline:
2676 if not self._inline:
2675 transaction.add(self._datafile, offset)
2677 transaction.add(self._datafile, offset)
2676 transaction.add(self._indexfile, curr * len(entry))
2678 transaction.add(self._indexfile, curr * len(entry))
2677 if data[0]:
2679 if data[0]:
2678 dfh.write(data[0])
2680 dfh.write(data[0])
2679 dfh.write(data[1])
2681 dfh.write(data[1])
2680 if sidedata:
2682 if sidedata:
2681 dfh.write(sidedata)
2683 dfh.write(sidedata)
2682 ifh.write(entry)
2684 ifh.write(entry)
2683 else:
2685 else:
2684 offset += curr * self.index.entry_size
2686 offset += curr * self.index.entry_size
2685 transaction.add(self._indexfile, offset)
2687 transaction.add(self._indexfile, offset)
2686 ifh.write(entry)
2688 ifh.write(entry)
2687 ifh.write(data[0])
2689 ifh.write(data[0])
2688 ifh.write(data[1])
2690 ifh.write(data[1])
2689 if sidedata:
2691 if sidedata:
2690 ifh.write(sidedata)
2692 ifh.write(sidedata)
2691 self._enforceinlinesize(transaction)
2693 self._enforceinlinesize(transaction)
2692 if self._docket is not None:
2694 if self._docket is not None:
2693 self._docket.index_end = self._writinghandles[0].tell()
2695 self._docket.index_end = self._writinghandles[0].tell()
2694 self._docket.data_end = self._writinghandles[1].tell()
2696 self._docket.data_end = self._writinghandles[1].tell()
2695
2697
2696 nodemaputil.setup_persistent_nodemap(transaction, self)
2698 nodemaputil.setup_persistent_nodemap(transaction, self)
2697
2699
2698 def addgroup(
2700 def addgroup(
2699 self,
2701 self,
2700 deltas,
2702 deltas,
2701 linkmapper,
2703 linkmapper,
2702 transaction,
2704 transaction,
2703 alwayscache=False,
2705 alwayscache=False,
2704 addrevisioncb=None,
2706 addrevisioncb=None,
2705 duplicaterevisioncb=None,
2707 duplicaterevisioncb=None,
2706 ):
2708 ):
2707 """
2709 """
2708 add a delta group
2710 add a delta group
2709
2711
2710 given a set of deltas, add them to the revision log. the
2712 given a set of deltas, add them to the revision log. the
2711 first delta is against its parent, which should be in our
2713 first delta is against its parent, which should be in our
2712 log, the rest are against the previous delta.
2714 log, the rest are against the previous delta.
2713
2715
2714 If ``addrevisioncb`` is defined, it will be called with arguments of
2716 If ``addrevisioncb`` is defined, it will be called with arguments of
2715 this revlog and the node that was added.
2717 this revlog and the node that was added.
2716 """
2718 """
2717
2719
2718 if self._adding_group:
2720 if self._adding_group:
2719 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2721 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2720
2722
2721 self._adding_group = True
2723 self._adding_group = True
2722 empty = True
2724 empty = True
2723 try:
2725 try:
2724 with self._writing(transaction):
2726 with self._writing(transaction):
2725 deltacomputer = deltautil.deltacomputer(self)
2727 deltacomputer = deltautil.deltacomputer(self)
2726 # loop through our set of deltas
2728 # loop through our set of deltas
2727 for data in deltas:
2729 for data in deltas:
2728 (
2730 (
2729 node,
2731 node,
2730 p1,
2732 p1,
2731 p2,
2733 p2,
2732 linknode,
2734 linknode,
2733 deltabase,
2735 deltabase,
2734 delta,
2736 delta,
2735 flags,
2737 flags,
2736 sidedata,
2738 sidedata,
2737 ) = data
2739 ) = data
2738 link = linkmapper(linknode)
2740 link = linkmapper(linknode)
2739 flags = flags or REVIDX_DEFAULT_FLAGS
2741 flags = flags or REVIDX_DEFAULT_FLAGS
2740
2742
2741 rev = self.index.get_rev(node)
2743 rev = self.index.get_rev(node)
2742 if rev is not None:
2744 if rev is not None:
2743 # this can happen if two branches make the same change
2745 # this can happen if two branches make the same change
2744 self._nodeduplicatecallback(transaction, rev)
2746 self._nodeduplicatecallback(transaction, rev)
2745 if duplicaterevisioncb:
2747 if duplicaterevisioncb:
2746 duplicaterevisioncb(self, rev)
2748 duplicaterevisioncb(self, rev)
2747 empty = False
2749 empty = False
2748 continue
2750 continue
2749
2751
2750 for p in (p1, p2):
2752 for p in (p1, p2):
2751 if not self.index.has_node(p):
2753 if not self.index.has_node(p):
2752 raise error.LookupError(
2754 raise error.LookupError(
2753 p, self.radix, _(b'unknown parent')
2755 p, self.radix, _(b'unknown parent')
2754 )
2756 )
2755
2757
2756 if not self.index.has_node(deltabase):
2758 if not self.index.has_node(deltabase):
2757 raise error.LookupError(
2759 raise error.LookupError(
2758 deltabase, self.display_id, _(b'unknown delta base')
2760 deltabase, self.display_id, _(b'unknown delta base')
2759 )
2761 )
2760
2762
2761 baserev = self.rev(deltabase)
2763 baserev = self.rev(deltabase)
2762
2764
2763 if baserev != nullrev and self.iscensored(baserev):
2765 if baserev != nullrev and self.iscensored(baserev):
2764 # if base is censored, delta must be full replacement in a
2766 # if base is censored, delta must be full replacement in a
2765 # single patch operation
2767 # single patch operation
2766 hlen = struct.calcsize(b">lll")
2768 hlen = struct.calcsize(b">lll")
2767 oldlen = self.rawsize(baserev)
2769 oldlen = self.rawsize(baserev)
2768 newlen = len(delta) - hlen
2770 newlen = len(delta) - hlen
2769 if delta[:hlen] != mdiff.replacediffheader(
2771 if delta[:hlen] != mdiff.replacediffheader(
2770 oldlen, newlen
2772 oldlen, newlen
2771 ):
2773 ):
2772 raise error.CensoredBaseError(
2774 raise error.CensoredBaseError(
2773 self.display_id, self.node(baserev)
2775 self.display_id, self.node(baserev)
2774 )
2776 )
2775
2777
2776 if not flags and self._peek_iscensored(baserev, delta):
2778 if not flags and self._peek_iscensored(baserev, delta):
2777 flags |= REVIDX_ISCENSORED
2779 flags |= REVIDX_ISCENSORED
2778
2780
2779 # We assume consumers of addrevisioncb will want to retrieve
2781 # We assume consumers of addrevisioncb will want to retrieve
2780 # the added revision, which will require a call to
2782 # the added revision, which will require a call to
2781 # revision(). revision() will fast path if there is a cache
2783 # revision(). revision() will fast path if there is a cache
2782 # hit. So, we tell _addrevision() to always cache in this case.
2784 # hit. So, we tell _addrevision() to always cache in this case.
2783 # We're only using addgroup() in the context of changegroup
2785 # We're only using addgroup() in the context of changegroup
2784 # generation so the revision data can always be handled as raw
2786 # generation so the revision data can always be handled as raw
2785 # by the flagprocessor.
2787 # by the flagprocessor.
2786 rev = self._addrevision(
2788 rev = self._addrevision(
2787 node,
2789 node,
2788 None,
2790 None,
2789 transaction,
2791 transaction,
2790 link,
2792 link,
2791 p1,
2793 p1,
2792 p2,
2794 p2,
2793 flags,
2795 flags,
2794 (baserev, delta),
2796 (baserev, delta),
2795 alwayscache=alwayscache,
2797 alwayscache=alwayscache,
2796 deltacomputer=deltacomputer,
2798 deltacomputer=deltacomputer,
2797 sidedata=sidedata,
2799 sidedata=sidedata,
2798 )
2800 )
2799
2801
2800 if addrevisioncb:
2802 if addrevisioncb:
2801 addrevisioncb(self, rev)
2803 addrevisioncb(self, rev)
2802 empty = False
2804 empty = False
2803 finally:
2805 finally:
2804 self._adding_group = False
2806 self._adding_group = False
2805 return not empty
2807 return not empty
2806
2808
2807 def iscensored(self, rev):
2809 def iscensored(self, rev):
2808 """Check if a file revision is censored."""
2810 """Check if a file revision is censored."""
2809 if not self._censorable:
2811 if not self._censorable:
2810 return False
2812 return False
2811
2813
2812 return self.flags(rev) & REVIDX_ISCENSORED
2814 return self.flags(rev) & REVIDX_ISCENSORED
2813
2815
2814 def _peek_iscensored(self, baserev, delta):
2816 def _peek_iscensored(self, baserev, delta):
2815 """Quickly check if a delta produces a censored revision."""
2817 """Quickly check if a delta produces a censored revision."""
2816 if not self._censorable:
2818 if not self._censorable:
2817 return False
2819 return False
2818
2820
2819 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2821 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2820
2822
2821 def getstrippoint(self, minlink):
2823 def getstrippoint(self, minlink):
2822 """find the minimum rev that must be stripped to strip the linkrev
2824 """find the minimum rev that must be stripped to strip the linkrev
2823
2825
2824 Returns a tuple containing the minimum rev and a set of all revs that
2826 Returns a tuple containing the minimum rev and a set of all revs that
2825 have linkrevs that will be broken by this strip.
2827 have linkrevs that will be broken by this strip.
2826 """
2828 """
2827 return storageutil.resolvestripinfo(
2829 return storageutil.resolvestripinfo(
2828 minlink,
2830 minlink,
2829 len(self) - 1,
2831 len(self) - 1,
2830 self.headrevs(),
2832 self.headrevs(),
2831 self.linkrev,
2833 self.linkrev,
2832 self.parentrevs,
2834 self.parentrevs,
2833 )
2835 )
2834
2836
2835 def strip(self, minlink, transaction):
2837 def strip(self, minlink, transaction):
2836 """truncate the revlog on the first revision with a linkrev >= minlink
2838 """truncate the revlog on the first revision with a linkrev >= minlink
2837
2839
2838 This function is called when we're stripping revision minlink and
2840 This function is called when we're stripping revision minlink and
2839 its descendants from the repository.
2841 its descendants from the repository.
2840
2842
2841 We have to remove all revisions with linkrev >= minlink, because
2843 We have to remove all revisions with linkrev >= minlink, because
2842 the equivalent changelog revisions will be renumbered after the
2844 the equivalent changelog revisions will be renumbered after the
2843 strip.
2845 strip.
2844
2846
2845 So we truncate the revlog on the first of these revisions, and
2847 So we truncate the revlog on the first of these revisions, and
2846 trust that the caller has saved the revisions that shouldn't be
2848 trust that the caller has saved the revisions that shouldn't be
2847 removed and that it'll re-add them after this truncation.
2849 removed and that it'll re-add them after this truncation.
2848 """
2850 """
2849 if len(self) == 0:
2851 if len(self) == 0:
2850 return
2852 return
2851
2853
2852 rev, _ = self.getstrippoint(minlink)
2854 rev, _ = self.getstrippoint(minlink)
2853 if rev == len(self):
2855 if rev == len(self):
2854 return
2856 return
2855
2857
2856 # first truncate the files on disk
2858 # first truncate the files on disk
2857 data_end = self.start(rev)
2859 data_end = self.start(rev)
2858 if not self._inline:
2860 if not self._inline:
2859 transaction.add(self._datafile, data_end)
2861 transaction.add(self._datafile, data_end)
2860 end = rev * self.index.entry_size
2862 end = rev * self.index.entry_size
2861 else:
2863 else:
2862 end = data_end + (rev * self.index.entry_size)
2864 end = data_end + (rev * self.index.entry_size)
2863
2865
2864 transaction.add(self._indexfile, end)
2866 transaction.add(self._indexfile, end)
2865 if self._docket is not None:
2867 if self._docket is not None:
2866 # XXX we could, leverage the docket while stripping. However it is
2868 # XXX we could, leverage the docket while stripping. However it is
2867 # not powerfull enough at the time of this comment
2869 # not powerfull enough at the time of this comment
2868 self._docket.index_end = end
2870 self._docket.index_end = end
2869 self._docket.data_end = data_end
2871 self._docket.data_end = data_end
2870 self._docket.write(transaction, stripping=True)
2872 self._docket.write(transaction, stripping=True)
2871
2873
2872 # then reset internal state in memory to forget those revisions
2874 # then reset internal state in memory to forget those revisions
2873 self._revisioncache = None
2875 self._revisioncache = None
2874 self._chaininfocache = util.lrucachedict(500)
2876 self._chaininfocache = util.lrucachedict(500)
2875 self._chunkclear()
2877 self._chunkclear()
2876
2878
2877 del self.index[rev:-1]
2879 del self.index[rev:-1]
2878
2880
2879 def checksize(self):
2881 def checksize(self):
2880 """Check size of index and data files
2882 """Check size of index and data files
2881
2883
2882 return a (dd, di) tuple.
2884 return a (dd, di) tuple.
2883 - dd: extra bytes for the "data" file
2885 - dd: extra bytes for the "data" file
2884 - di: extra bytes for the "index" file
2886 - di: extra bytes for the "index" file
2885
2887
2886 A healthy revlog will return (0, 0).
2888 A healthy revlog will return (0, 0).
2887 """
2889 """
2888 expected = 0
2890 expected = 0
2889 if len(self):
2891 if len(self):
2890 expected = max(0, self.end(len(self) - 1))
2892 expected = max(0, self.end(len(self) - 1))
2891
2893
2892 try:
2894 try:
2893 with self._datafp() as f:
2895 with self._datafp() as f:
2894 f.seek(0, io.SEEK_END)
2896 f.seek(0, io.SEEK_END)
2895 actual = f.tell()
2897 actual = f.tell()
2896 dd = actual - expected
2898 dd = actual - expected
2897 except IOError as inst:
2899 except IOError as inst:
2898 if inst.errno != errno.ENOENT:
2900 if inst.errno != errno.ENOENT:
2899 raise
2901 raise
2900 dd = 0
2902 dd = 0
2901
2903
2902 try:
2904 try:
2903 f = self.opener(self._indexfile)
2905 f = self.opener(self._indexfile)
2904 f.seek(0, io.SEEK_END)
2906 f.seek(0, io.SEEK_END)
2905 actual = f.tell()
2907 actual = f.tell()
2906 f.close()
2908 f.close()
2907 s = self.index.entry_size
2909 s = self.index.entry_size
2908 i = max(0, actual // s)
2910 i = max(0, actual // s)
2909 di = actual - (i * s)
2911 di = actual - (i * s)
2910 if self._inline:
2912 if self._inline:
2911 databytes = 0
2913 databytes = 0
2912 for r in self:
2914 for r in self:
2913 databytes += max(0, self.length(r))
2915 databytes += max(0, self.length(r))
2914 dd = 0
2916 dd = 0
2915 di = actual - len(self) * s - databytes
2917 di = actual - len(self) * s - databytes
2916 except IOError as inst:
2918 except IOError as inst:
2917 if inst.errno != errno.ENOENT:
2919 if inst.errno != errno.ENOENT:
2918 raise
2920 raise
2919 di = 0
2921 di = 0
2920
2922
2921 return (dd, di)
2923 return (dd, di)
2922
2924
2923 def files(self):
2925 def files(self):
2924 res = [self._indexfile]
2926 res = [self._indexfile]
2925 if not self._inline:
2927 if not self._inline:
2926 res.append(self._datafile)
2928 res.append(self._datafile)
2927 return res
2929 return res
2928
2930
2929 def emitrevisions(
2931 def emitrevisions(
2930 self,
2932 self,
2931 nodes,
2933 nodes,
2932 nodesorder=None,
2934 nodesorder=None,
2933 revisiondata=False,
2935 revisiondata=False,
2934 assumehaveparentrevisions=False,
2936 assumehaveparentrevisions=False,
2935 deltamode=repository.CG_DELTAMODE_STD,
2937 deltamode=repository.CG_DELTAMODE_STD,
2936 sidedata_helpers=None,
2938 sidedata_helpers=None,
2937 ):
2939 ):
2938 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2940 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2939 raise error.ProgrammingError(
2941 raise error.ProgrammingError(
2940 b'unhandled value for nodesorder: %s' % nodesorder
2942 b'unhandled value for nodesorder: %s' % nodesorder
2941 )
2943 )
2942
2944
2943 if nodesorder is None and not self._generaldelta:
2945 if nodesorder is None and not self._generaldelta:
2944 nodesorder = b'storage'
2946 nodesorder = b'storage'
2945
2947
2946 if (
2948 if (
2947 not self._storedeltachains
2949 not self._storedeltachains
2948 and deltamode != repository.CG_DELTAMODE_PREV
2950 and deltamode != repository.CG_DELTAMODE_PREV
2949 ):
2951 ):
2950 deltamode = repository.CG_DELTAMODE_FULL
2952 deltamode = repository.CG_DELTAMODE_FULL
2951
2953
2952 return storageutil.emitrevisions(
2954 return storageutil.emitrevisions(
2953 self,
2955 self,
2954 nodes,
2956 nodes,
2955 nodesorder,
2957 nodesorder,
2956 revlogrevisiondelta,
2958 revlogrevisiondelta,
2957 deltaparentfn=self.deltaparent,
2959 deltaparentfn=self.deltaparent,
2958 candeltafn=self.candelta,
2960 candeltafn=self.candelta,
2959 rawsizefn=self.rawsize,
2961 rawsizefn=self.rawsize,
2960 revdifffn=self.revdiff,
2962 revdifffn=self.revdiff,
2961 flagsfn=self.flags,
2963 flagsfn=self.flags,
2962 deltamode=deltamode,
2964 deltamode=deltamode,
2963 revisiondata=revisiondata,
2965 revisiondata=revisiondata,
2964 assumehaveparentrevisions=assumehaveparentrevisions,
2966 assumehaveparentrevisions=assumehaveparentrevisions,
2965 sidedata_helpers=sidedata_helpers,
2967 sidedata_helpers=sidedata_helpers,
2966 )
2968 )
2967
2969
2968 DELTAREUSEALWAYS = b'always'
2970 DELTAREUSEALWAYS = b'always'
2969 DELTAREUSESAMEREVS = b'samerevs'
2971 DELTAREUSESAMEREVS = b'samerevs'
2970 DELTAREUSENEVER = b'never'
2972 DELTAREUSENEVER = b'never'
2971
2973
2972 DELTAREUSEFULLADD = b'fulladd'
2974 DELTAREUSEFULLADD = b'fulladd'
2973
2975
2974 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2976 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2975
2977
2976 def clone(
2978 def clone(
2977 self,
2979 self,
2978 tr,
2980 tr,
2979 destrevlog,
2981 destrevlog,
2980 addrevisioncb=None,
2982 addrevisioncb=None,
2981 deltareuse=DELTAREUSESAMEREVS,
2983 deltareuse=DELTAREUSESAMEREVS,
2982 forcedeltabothparents=None,
2984 forcedeltabothparents=None,
2983 sidedata_helpers=None,
2985 sidedata_helpers=None,
2984 ):
2986 ):
2985 """Copy this revlog to another, possibly with format changes.
2987 """Copy this revlog to another, possibly with format changes.
2986
2988
2987 The destination revlog will contain the same revisions and nodes.
2989 The destination revlog will contain the same revisions and nodes.
2988 However, it may not be bit-for-bit identical due to e.g. delta encoding
2990 However, it may not be bit-for-bit identical due to e.g. delta encoding
2989 differences.
2991 differences.
2990
2992
2991 The ``deltareuse`` argument control how deltas from the existing revlog
2993 The ``deltareuse`` argument control how deltas from the existing revlog
2992 are preserved in the destination revlog. The argument can have the
2994 are preserved in the destination revlog. The argument can have the
2993 following values:
2995 following values:
2994
2996
2995 DELTAREUSEALWAYS
2997 DELTAREUSEALWAYS
2996 Deltas will always be reused (if possible), even if the destination
2998 Deltas will always be reused (if possible), even if the destination
2997 revlog would not select the same revisions for the delta. This is the
2999 revlog would not select the same revisions for the delta. This is the
2998 fastest mode of operation.
3000 fastest mode of operation.
2999 DELTAREUSESAMEREVS
3001 DELTAREUSESAMEREVS
3000 Deltas will be reused if the destination revlog would pick the same
3002 Deltas will be reused if the destination revlog would pick the same
3001 revisions for the delta. This mode strikes a balance between speed
3003 revisions for the delta. This mode strikes a balance between speed
3002 and optimization.
3004 and optimization.
3003 DELTAREUSENEVER
3005 DELTAREUSENEVER
3004 Deltas will never be reused. This is the slowest mode of execution.
3006 Deltas will never be reused. This is the slowest mode of execution.
3005 This mode can be used to recompute deltas (e.g. if the diff/delta
3007 This mode can be used to recompute deltas (e.g. if the diff/delta
3006 algorithm changes).
3008 algorithm changes).
3007 DELTAREUSEFULLADD
3009 DELTAREUSEFULLADD
3008 Revision will be re-added as if their were new content. This is
3010 Revision will be re-added as if their were new content. This is
3009 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3011 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3010 eg: large file detection and handling.
3012 eg: large file detection and handling.
3011
3013
3012 Delta computation can be slow, so the choice of delta reuse policy can
3014 Delta computation can be slow, so the choice of delta reuse policy can
3013 significantly affect run time.
3015 significantly affect run time.
3014
3016
3015 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3017 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3016 two extremes. Deltas will be reused if they are appropriate. But if the
3018 two extremes. Deltas will be reused if they are appropriate. But if the
3017 delta could choose a better revision, it will do so. This means if you
3019 delta could choose a better revision, it will do so. This means if you
3018 are converting a non-generaldelta revlog to a generaldelta revlog,
3020 are converting a non-generaldelta revlog to a generaldelta revlog,
3019 deltas will be recomputed if the delta's parent isn't a parent of the
3021 deltas will be recomputed if the delta's parent isn't a parent of the
3020 revision.
3022 revision.
3021
3023
3022 In addition to the delta policy, the ``forcedeltabothparents``
3024 In addition to the delta policy, the ``forcedeltabothparents``
3023 argument controls whether to force compute deltas against both parents
3025 argument controls whether to force compute deltas against both parents
3024 for merges. By default, the current default is used.
3026 for merges. By default, the current default is used.
3025
3027
3026 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3028 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3027 `sidedata_helpers`.
3029 `sidedata_helpers`.
3028 """
3030 """
3029 if deltareuse not in self.DELTAREUSEALL:
3031 if deltareuse not in self.DELTAREUSEALL:
3030 raise ValueError(
3032 raise ValueError(
3031 _(b'value for deltareuse invalid: %s') % deltareuse
3033 _(b'value for deltareuse invalid: %s') % deltareuse
3032 )
3034 )
3033
3035
3034 if len(destrevlog):
3036 if len(destrevlog):
3035 raise ValueError(_(b'destination revlog is not empty'))
3037 raise ValueError(_(b'destination revlog is not empty'))
3036
3038
3037 if getattr(self, 'filteredrevs', None):
3039 if getattr(self, 'filteredrevs', None):
3038 raise ValueError(_(b'source revlog has filtered revisions'))
3040 raise ValueError(_(b'source revlog has filtered revisions'))
3039 if getattr(destrevlog, 'filteredrevs', None):
3041 if getattr(destrevlog, 'filteredrevs', None):
3040 raise ValueError(_(b'destination revlog has filtered revisions'))
3042 raise ValueError(_(b'destination revlog has filtered revisions'))
3041
3043
3042 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3044 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3043 # if possible.
3045 # if possible.
3044 oldlazydelta = destrevlog._lazydelta
3046 oldlazydelta = destrevlog._lazydelta
3045 oldlazydeltabase = destrevlog._lazydeltabase
3047 oldlazydeltabase = destrevlog._lazydeltabase
3046 oldamd = destrevlog._deltabothparents
3048 oldamd = destrevlog._deltabothparents
3047
3049
3048 try:
3050 try:
3049 if deltareuse == self.DELTAREUSEALWAYS:
3051 if deltareuse == self.DELTAREUSEALWAYS:
3050 destrevlog._lazydeltabase = True
3052 destrevlog._lazydeltabase = True
3051 destrevlog._lazydelta = True
3053 destrevlog._lazydelta = True
3052 elif deltareuse == self.DELTAREUSESAMEREVS:
3054 elif deltareuse == self.DELTAREUSESAMEREVS:
3053 destrevlog._lazydeltabase = False
3055 destrevlog._lazydeltabase = False
3054 destrevlog._lazydelta = True
3056 destrevlog._lazydelta = True
3055 elif deltareuse == self.DELTAREUSENEVER:
3057 elif deltareuse == self.DELTAREUSENEVER:
3056 destrevlog._lazydeltabase = False
3058 destrevlog._lazydeltabase = False
3057 destrevlog._lazydelta = False
3059 destrevlog._lazydelta = False
3058
3060
3059 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3061 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3060
3062
3061 self._clone(
3063 self._clone(
3062 tr,
3064 tr,
3063 destrevlog,
3065 destrevlog,
3064 addrevisioncb,
3066 addrevisioncb,
3065 deltareuse,
3067 deltareuse,
3066 forcedeltabothparents,
3068 forcedeltabothparents,
3067 sidedata_helpers,
3069 sidedata_helpers,
3068 )
3070 )
3069
3071
3070 finally:
3072 finally:
3071 destrevlog._lazydelta = oldlazydelta
3073 destrevlog._lazydelta = oldlazydelta
3072 destrevlog._lazydeltabase = oldlazydeltabase
3074 destrevlog._lazydeltabase = oldlazydeltabase
3073 destrevlog._deltabothparents = oldamd
3075 destrevlog._deltabothparents = oldamd
3074
3076
3075 def _clone(
3077 def _clone(
3076 self,
3078 self,
3077 tr,
3079 tr,
3078 destrevlog,
3080 destrevlog,
3079 addrevisioncb,
3081 addrevisioncb,
3080 deltareuse,
3082 deltareuse,
3081 forcedeltabothparents,
3083 forcedeltabothparents,
3082 sidedata_helpers,
3084 sidedata_helpers,
3083 ):
3085 ):
3084 """perform the core duty of `revlog.clone` after parameter processing"""
3086 """perform the core duty of `revlog.clone` after parameter processing"""
3085 deltacomputer = deltautil.deltacomputer(destrevlog)
3087 deltacomputer = deltautil.deltacomputer(destrevlog)
3086 index = self.index
3088 index = self.index
3087 for rev in self:
3089 for rev in self:
3088 entry = index[rev]
3090 entry = index[rev]
3089
3091
3090 # Some classes override linkrev to take filtered revs into
3092 # Some classes override linkrev to take filtered revs into
3091 # account. Use raw entry from index.
3093 # account. Use raw entry from index.
3092 flags = entry[0] & 0xFFFF
3094 flags = entry[0] & 0xFFFF
3093 linkrev = entry[4]
3095 linkrev = entry[4]
3094 p1 = index[entry[5]][7]
3096 p1 = index[entry[5]][7]
3095 p2 = index[entry[6]][7]
3097 p2 = index[entry[6]][7]
3096 node = entry[7]
3098 node = entry[7]
3097
3099
3098 # (Possibly) reuse the delta from the revlog if allowed and
3100 # (Possibly) reuse the delta from the revlog if allowed and
3099 # the revlog chunk is a delta.
3101 # the revlog chunk is a delta.
3100 cachedelta = None
3102 cachedelta = None
3101 rawtext = None
3103 rawtext = None
3102 if deltareuse == self.DELTAREUSEFULLADD:
3104 if deltareuse == self.DELTAREUSEFULLADD:
3103 text, sidedata = self._revisiondata(rev)
3105 text, sidedata = self._revisiondata(rev)
3104
3106
3105 if sidedata_helpers is not None:
3107 if sidedata_helpers is not None:
3106 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3108 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3107 self, sidedata_helpers, sidedata, rev
3109 self, sidedata_helpers, sidedata, rev
3108 )
3110 )
3109 flags = flags | new_flags[0] & ~new_flags[1]
3111 flags = flags | new_flags[0] & ~new_flags[1]
3110
3112
3111 destrevlog.addrevision(
3113 destrevlog.addrevision(
3112 text,
3114 text,
3113 tr,
3115 tr,
3114 linkrev,
3116 linkrev,
3115 p1,
3117 p1,
3116 p2,
3118 p2,
3117 cachedelta=cachedelta,
3119 cachedelta=cachedelta,
3118 node=node,
3120 node=node,
3119 flags=flags,
3121 flags=flags,
3120 deltacomputer=deltacomputer,
3122 deltacomputer=deltacomputer,
3121 sidedata=sidedata,
3123 sidedata=sidedata,
3122 )
3124 )
3123 else:
3125 else:
3124 if destrevlog._lazydelta:
3126 if destrevlog._lazydelta:
3125 dp = self.deltaparent(rev)
3127 dp = self.deltaparent(rev)
3126 if dp != nullrev:
3128 if dp != nullrev:
3127 cachedelta = (dp, bytes(self._chunk(rev)))
3129 cachedelta = (dp, bytes(self._chunk(rev)))
3128
3130
3129 sidedata = None
3131 sidedata = None
3130 if not cachedelta:
3132 if not cachedelta:
3131 rawtext, sidedata = self._revisiondata(rev)
3133 rawtext, sidedata = self._revisiondata(rev)
3132 if sidedata is None:
3134 if sidedata is None:
3133 sidedata = self.sidedata(rev)
3135 sidedata = self.sidedata(rev)
3134
3136
3135 if sidedata_helpers is not None:
3137 if sidedata_helpers is not None:
3136 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3138 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3137 self, sidedata_helpers, sidedata, rev
3139 self, sidedata_helpers, sidedata, rev
3138 )
3140 )
3139 flags = flags | new_flags[0] & ~new_flags[1]
3141 flags = flags | new_flags[0] & ~new_flags[1]
3140
3142
3141 with destrevlog._writing(tr):
3143 with destrevlog._writing(tr):
3142 destrevlog._addrevision(
3144 destrevlog._addrevision(
3143 node,
3145 node,
3144 rawtext,
3146 rawtext,
3145 tr,
3147 tr,
3146 linkrev,
3148 linkrev,
3147 p1,
3149 p1,
3148 p2,
3150 p2,
3149 flags,
3151 flags,
3150 cachedelta,
3152 cachedelta,
3151 deltacomputer=deltacomputer,
3153 deltacomputer=deltacomputer,
3152 sidedata=sidedata,
3154 sidedata=sidedata,
3153 )
3155 )
3154
3156
3155 if addrevisioncb:
3157 if addrevisioncb:
3156 addrevisioncb(self, rev, node)
3158 addrevisioncb(self, rev, node)
3157
3159
3158 def censorrevision(self, tr, censornode, tombstone=b''):
3160 def censorrevision(self, tr, censornode, tombstone=b''):
3159 if self._format_version == REVLOGV0:
3161 if self._format_version == REVLOGV0:
3160 raise error.RevlogError(
3162 raise error.RevlogError(
3161 _(b'cannot censor with version %d revlogs')
3163 _(b'cannot censor with version %d revlogs')
3162 % self._format_version
3164 % self._format_version
3163 )
3165 )
3164
3166
3165 censorrev = self.rev(censornode)
3167 censorrev = self.rev(censornode)
3166 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3168 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3167
3169
3168 if len(tombstone) > self.rawsize(censorrev):
3170 if len(tombstone) > self.rawsize(censorrev):
3169 raise error.Abort(
3171 raise error.Abort(
3170 _(b'censor tombstone must be no longer than censored data')
3172 _(b'censor tombstone must be no longer than censored data')
3171 )
3173 )
3172
3174
3173 # Rewriting the revlog in place is hard. Our strategy for censoring is
3175 # Rewriting the revlog in place is hard. Our strategy for censoring is
3174 # to create a new revlog, copy all revisions to it, then replace the
3176 # to create a new revlog, copy all revisions to it, then replace the
3175 # revlogs on transaction close.
3177 # revlogs on transaction close.
3176 #
3178 #
3177 # This is a bit dangerous. We could easily have a mismatch of state.
3179 # This is a bit dangerous. We could easily have a mismatch of state.
3178 newrl = revlog(
3180 newrl = revlog(
3179 self.opener,
3181 self.opener,
3180 target=self.target,
3182 target=self.target,
3181 radix=self.radix,
3183 radix=self.radix,
3182 postfix=b'tmpcensored',
3184 postfix=b'tmpcensored',
3183 censorable=True,
3185 censorable=True,
3184 )
3186 )
3185 newrl._format_version = self._format_version
3187 newrl._format_version = self._format_version
3186 newrl._format_flags = self._format_flags
3188 newrl._format_flags = self._format_flags
3187 newrl._generaldelta = self._generaldelta
3189 newrl._generaldelta = self._generaldelta
3188 newrl._parse_index = self._parse_index
3190 newrl._parse_index = self._parse_index
3189
3191
3190 for rev in self.revs():
3192 for rev in self.revs():
3191 node = self.node(rev)
3193 node = self.node(rev)
3192 p1, p2 = self.parents(node)
3194 p1, p2 = self.parents(node)
3193
3195
3194 if rev == censorrev:
3196 if rev == censorrev:
3195 newrl.addrawrevision(
3197 newrl.addrawrevision(
3196 tombstone,
3198 tombstone,
3197 tr,
3199 tr,
3198 self.linkrev(censorrev),
3200 self.linkrev(censorrev),
3199 p1,
3201 p1,
3200 p2,
3202 p2,
3201 censornode,
3203 censornode,
3202 REVIDX_ISCENSORED,
3204 REVIDX_ISCENSORED,
3203 )
3205 )
3204
3206
3205 if newrl.deltaparent(rev) != nullrev:
3207 if newrl.deltaparent(rev) != nullrev:
3206 raise error.Abort(
3208 raise error.Abort(
3207 _(
3209 _(
3208 b'censored revision stored as delta; '
3210 b'censored revision stored as delta; '
3209 b'cannot censor'
3211 b'cannot censor'
3210 ),
3212 ),
3211 hint=_(
3213 hint=_(
3212 b'censoring of revlogs is not '
3214 b'censoring of revlogs is not '
3213 b'fully implemented; please report '
3215 b'fully implemented; please report '
3214 b'this bug'
3216 b'this bug'
3215 ),
3217 ),
3216 )
3218 )
3217 continue
3219 continue
3218
3220
3219 if self.iscensored(rev):
3221 if self.iscensored(rev):
3220 if self.deltaparent(rev) != nullrev:
3222 if self.deltaparent(rev) != nullrev:
3221 raise error.Abort(
3223 raise error.Abort(
3222 _(
3224 _(
3223 b'cannot censor due to censored '
3225 b'cannot censor due to censored '
3224 b'revision having delta stored'
3226 b'revision having delta stored'
3225 )
3227 )
3226 )
3228 )
3227 rawtext = self._chunk(rev)
3229 rawtext = self._chunk(rev)
3228 else:
3230 else:
3229 rawtext = self.rawdata(rev)
3231 rawtext = self.rawdata(rev)
3230
3232
3231 newrl.addrawrevision(
3233 newrl.addrawrevision(
3232 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3234 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3233 )
3235 )
3234
3236
3235 tr.addbackup(self._indexfile, location=b'store')
3237 tr.addbackup(self._indexfile, location=b'store')
3236 if not self._inline:
3238 if not self._inline:
3237 tr.addbackup(self._datafile, location=b'store')
3239 tr.addbackup(self._datafile, location=b'store')
3238
3240
3239 self.opener.rename(newrl._indexfile, self._indexfile)
3241 self.opener.rename(newrl._indexfile, self._indexfile)
3240 if not self._inline:
3242 if not self._inline:
3241 self.opener.rename(newrl._datafile, self._datafile)
3243 self.opener.rename(newrl._datafile, self._datafile)
3242
3244
3243 self.clearcaches()
3245 self.clearcaches()
3244 self._loadindex()
3246 self._loadindex()
3245
3247
3246 def verifyintegrity(self, state):
3248 def verifyintegrity(self, state):
3247 """Verifies the integrity of the revlog.
3249 """Verifies the integrity of the revlog.
3248
3250
3249 Yields ``revlogproblem`` instances describing problems that are
3251 Yields ``revlogproblem`` instances describing problems that are
3250 found.
3252 found.
3251 """
3253 """
3252 dd, di = self.checksize()
3254 dd, di = self.checksize()
3253 if dd:
3255 if dd:
3254 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3256 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3255 if di:
3257 if di:
3256 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3258 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3257
3259
3258 version = self._format_version
3260 version = self._format_version
3259
3261
3260 # The verifier tells us what version revlog we should be.
3262 # The verifier tells us what version revlog we should be.
3261 if version != state[b'expectedversion']:
3263 if version != state[b'expectedversion']:
3262 yield revlogproblem(
3264 yield revlogproblem(
3263 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3265 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3264 % (self.display_id, version, state[b'expectedversion'])
3266 % (self.display_id, version, state[b'expectedversion'])
3265 )
3267 )
3266
3268
3267 state[b'skipread'] = set()
3269 state[b'skipread'] = set()
3268 state[b'safe_renamed'] = set()
3270 state[b'safe_renamed'] = set()
3269
3271
3270 for rev in self:
3272 for rev in self:
3271 node = self.node(rev)
3273 node = self.node(rev)
3272
3274
3273 # Verify contents. 4 cases to care about:
3275 # Verify contents. 4 cases to care about:
3274 #
3276 #
3275 # common: the most common case
3277 # common: the most common case
3276 # rename: with a rename
3278 # rename: with a rename
3277 # meta: file content starts with b'\1\n', the metadata
3279 # meta: file content starts with b'\1\n', the metadata
3278 # header defined in filelog.py, but without a rename
3280 # header defined in filelog.py, but without a rename
3279 # ext: content stored externally
3281 # ext: content stored externally
3280 #
3282 #
3281 # More formally, their differences are shown below:
3283 # More formally, their differences are shown below:
3282 #
3284 #
3283 # | common | rename | meta | ext
3285 # | common | rename | meta | ext
3284 # -------------------------------------------------------
3286 # -------------------------------------------------------
3285 # flags() | 0 | 0 | 0 | not 0
3287 # flags() | 0 | 0 | 0 | not 0
3286 # renamed() | False | True | False | ?
3288 # renamed() | False | True | False | ?
3287 # rawtext[0:2]=='\1\n'| False | True | True | ?
3289 # rawtext[0:2]=='\1\n'| False | True | True | ?
3288 #
3290 #
3289 # "rawtext" means the raw text stored in revlog data, which
3291 # "rawtext" means the raw text stored in revlog data, which
3290 # could be retrieved by "rawdata(rev)". "text"
3292 # could be retrieved by "rawdata(rev)". "text"
3291 # mentioned below is "revision(rev)".
3293 # mentioned below is "revision(rev)".
3292 #
3294 #
3293 # There are 3 different lengths stored physically:
3295 # There are 3 different lengths stored physically:
3294 # 1. L1: rawsize, stored in revlog index
3296 # 1. L1: rawsize, stored in revlog index
3295 # 2. L2: len(rawtext), stored in revlog data
3297 # 2. L2: len(rawtext), stored in revlog data
3296 # 3. L3: len(text), stored in revlog data if flags==0, or
3298 # 3. L3: len(text), stored in revlog data if flags==0, or
3297 # possibly somewhere else if flags!=0
3299 # possibly somewhere else if flags!=0
3298 #
3300 #
3299 # L1 should be equal to L2. L3 could be different from them.
3301 # L1 should be equal to L2. L3 could be different from them.
3300 # "text" may or may not affect commit hash depending on flag
3302 # "text" may or may not affect commit hash depending on flag
3301 # processors (see flagutil.addflagprocessor).
3303 # processors (see flagutil.addflagprocessor).
3302 #
3304 #
3303 # | common | rename | meta | ext
3305 # | common | rename | meta | ext
3304 # -------------------------------------------------
3306 # -------------------------------------------------
3305 # rawsize() | L1 | L1 | L1 | L1
3307 # rawsize() | L1 | L1 | L1 | L1
3306 # size() | L1 | L2-LM | L1(*) | L1 (?)
3308 # size() | L1 | L2-LM | L1(*) | L1 (?)
3307 # len(rawtext) | L2 | L2 | L2 | L2
3309 # len(rawtext) | L2 | L2 | L2 | L2
3308 # len(text) | L2 | L2 | L2 | L3
3310 # len(text) | L2 | L2 | L2 | L3
3309 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3311 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3310 #
3312 #
3311 # LM: length of metadata, depending on rawtext
3313 # LM: length of metadata, depending on rawtext
3312 # (*): not ideal, see comment in filelog.size
3314 # (*): not ideal, see comment in filelog.size
3313 # (?): could be "- len(meta)" if the resolved content has
3315 # (?): could be "- len(meta)" if the resolved content has
3314 # rename metadata
3316 # rename metadata
3315 #
3317 #
3316 # Checks needed to be done:
3318 # Checks needed to be done:
3317 # 1. length check: L1 == L2, in all cases.
3319 # 1. length check: L1 == L2, in all cases.
3318 # 2. hash check: depending on flag processor, we may need to
3320 # 2. hash check: depending on flag processor, we may need to
3319 # use either "text" (external), or "rawtext" (in revlog).
3321 # use either "text" (external), or "rawtext" (in revlog).
3320
3322
3321 try:
3323 try:
3322 skipflags = state.get(b'skipflags', 0)
3324 skipflags = state.get(b'skipflags', 0)
3323 if skipflags:
3325 if skipflags:
3324 skipflags &= self.flags(rev)
3326 skipflags &= self.flags(rev)
3325
3327
3326 _verify_revision(self, skipflags, state, node)
3328 _verify_revision(self, skipflags, state, node)
3327
3329
3328 l1 = self.rawsize(rev)
3330 l1 = self.rawsize(rev)
3329 l2 = len(self.rawdata(node))
3331 l2 = len(self.rawdata(node))
3330
3332
3331 if l1 != l2:
3333 if l1 != l2:
3332 yield revlogproblem(
3334 yield revlogproblem(
3333 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3335 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3334 node=node,
3336 node=node,
3335 )
3337 )
3336
3338
3337 except error.CensoredNodeError:
3339 except error.CensoredNodeError:
3338 if state[b'erroroncensored']:
3340 if state[b'erroroncensored']:
3339 yield revlogproblem(
3341 yield revlogproblem(
3340 error=_(b'censored file data'), node=node
3342 error=_(b'censored file data'), node=node
3341 )
3343 )
3342 state[b'skipread'].add(node)
3344 state[b'skipread'].add(node)
3343 except Exception as e:
3345 except Exception as e:
3344 yield revlogproblem(
3346 yield revlogproblem(
3345 error=_(b'unpacking %s: %s')
3347 error=_(b'unpacking %s: %s')
3346 % (short(node), stringutil.forcebytestr(e)),
3348 % (short(node), stringutil.forcebytestr(e)),
3347 node=node,
3349 node=node,
3348 )
3350 )
3349 state[b'skipread'].add(node)
3351 state[b'skipread'].add(node)
3350
3352
3351 def storageinfo(
3353 def storageinfo(
3352 self,
3354 self,
3353 exclusivefiles=False,
3355 exclusivefiles=False,
3354 sharedfiles=False,
3356 sharedfiles=False,
3355 revisionscount=False,
3357 revisionscount=False,
3356 trackedsize=False,
3358 trackedsize=False,
3357 storedsize=False,
3359 storedsize=False,
3358 ):
3360 ):
3359 d = {}
3361 d = {}
3360
3362
3361 if exclusivefiles:
3363 if exclusivefiles:
3362 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3364 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3363 if not self._inline:
3365 if not self._inline:
3364 d[b'exclusivefiles'].append((self.opener, self._datafile))
3366 d[b'exclusivefiles'].append((self.opener, self._datafile))
3365
3367
3366 if sharedfiles:
3368 if sharedfiles:
3367 d[b'sharedfiles'] = []
3369 d[b'sharedfiles'] = []
3368
3370
3369 if revisionscount:
3371 if revisionscount:
3370 d[b'revisionscount'] = len(self)
3372 d[b'revisionscount'] = len(self)
3371
3373
3372 if trackedsize:
3374 if trackedsize:
3373 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3375 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3374
3376
3375 if storedsize:
3377 if storedsize:
3376 d[b'storedsize'] = sum(
3378 d[b'storedsize'] = sum(
3377 self.opener.stat(path).st_size for path in self.files()
3379 self.opener.stat(path).st_size for path in self.files()
3378 )
3380 )
3379
3381
3380 return d
3382 return d
3381
3383
3382 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3384 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3383 if not self.hassidedata:
3385 if not self.hassidedata:
3384 return
3386 return
3385 # revlog formats with sidedata support does not support inline
3387 # revlog formats with sidedata support does not support inline
3386 assert not self._inline
3388 assert not self._inline
3387 if not helpers[1] and not helpers[2]:
3389 if not helpers[1] and not helpers[2]:
3388 # Nothing to generate or remove
3390 # Nothing to generate or remove
3389 return
3391 return
3390
3392
3391 new_entries = []
3393 new_entries = []
3392 # append the new sidedata
3394 # append the new sidedata
3393 with self._writing(transaction):
3395 with self._writing(transaction):
3394 ifh, dfh = self._writinghandles
3396 ifh, dfh = self._writinghandles
3395 if self._docket is not None:
3397 if self._docket is not None:
3396 dfh.seek(self._docket.data_end, os.SEEK_SET)
3398 dfh.seek(self._docket.data_end, os.SEEK_SET)
3397 else:
3399 else:
3398 dfh.seek(0, os.SEEK_END)
3400 dfh.seek(0, os.SEEK_END)
3399
3401
3400 current_offset = dfh.tell()
3402 current_offset = dfh.tell()
3401 for rev in range(startrev, endrev + 1):
3403 for rev in range(startrev, endrev + 1):
3402 entry = self.index[rev]
3404 entry = self.index[rev]
3403 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3405 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3404 store=self,
3406 store=self,
3405 sidedata_helpers=helpers,
3407 sidedata_helpers=helpers,
3406 sidedata={},
3408 sidedata={},
3407 rev=rev,
3409 rev=rev,
3408 )
3410 )
3409
3411
3410 serialized_sidedata = sidedatautil.serialize_sidedata(
3412 serialized_sidedata = sidedatautil.serialize_sidedata(
3411 new_sidedata
3413 new_sidedata
3412 )
3414 )
3413
3415
3414 sidedata_compression_mode = COMP_MODE_INLINE
3416 sidedata_compression_mode = COMP_MODE_INLINE
3415 if serialized_sidedata and self.hassidedata:
3417 if serialized_sidedata and self.hassidedata:
3416 sidedata_compression_mode = COMP_MODE_PLAIN
3418 sidedata_compression_mode = COMP_MODE_PLAIN
3417 h, comp_sidedata = self.compress(serialized_sidedata)
3419 h, comp_sidedata = self.compress(serialized_sidedata)
3418 if (
3420 if (
3419 h != b'u'
3421 h != b'u'
3420 and comp_sidedata[0] != b'\0'
3422 and comp_sidedata[0] != b'\0'
3421 and len(comp_sidedata) < len(serialized_sidedata)
3423 and len(comp_sidedata) < len(serialized_sidedata)
3422 ):
3424 ):
3423 assert not h
3425 assert not h
3424 if (
3426 if (
3425 comp_sidedata[0]
3427 comp_sidedata[0]
3426 == self._docket.default_compression_header
3428 == self._docket.default_compression_header
3427 ):
3429 ):
3428 sidedata_compression_mode = COMP_MODE_DEFAULT
3430 sidedata_compression_mode = COMP_MODE_DEFAULT
3429 serialized_sidedata = comp_sidedata
3431 serialized_sidedata = comp_sidedata
3430 else:
3432 else:
3431 sidedata_compression_mode = COMP_MODE_INLINE
3433 sidedata_compression_mode = COMP_MODE_INLINE
3432 serialized_sidedata = comp_sidedata
3434 serialized_sidedata = comp_sidedata
3433 if entry[8] != 0 or entry[9] != 0:
3435 if entry[8] != 0 or entry[9] != 0:
3434 # rewriting entries that already have sidedata is not
3436 # rewriting entries that already have sidedata is not
3435 # supported yet, because it introduces garbage data in the
3437 # supported yet, because it introduces garbage data in the
3436 # revlog.
3438 # revlog.
3437 msg = b"rewriting existing sidedata is not supported yet"
3439 msg = b"rewriting existing sidedata is not supported yet"
3438 raise error.Abort(msg)
3440 raise error.Abort(msg)
3439
3441
3440 # Apply (potential) flags to add and to remove after running
3442 # Apply (potential) flags to add and to remove after running
3441 # the sidedata helpers
3443 # the sidedata helpers
3442 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3444 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3443 entry_update = (
3445 entry_update = (
3444 current_offset,
3446 current_offset,
3445 len(serialized_sidedata),
3447 len(serialized_sidedata),
3446 new_offset_flags,
3448 new_offset_flags,
3447 sidedata_compression_mode,
3449 sidedata_compression_mode,
3448 )
3450 )
3449
3451
3450 # the sidedata computation might have move the file cursors around
3452 # the sidedata computation might have move the file cursors around
3451 dfh.seek(current_offset, os.SEEK_SET)
3453 dfh.seek(current_offset, os.SEEK_SET)
3452 dfh.write(serialized_sidedata)
3454 dfh.write(serialized_sidedata)
3453 new_entries.append(entry_update)
3455 new_entries.append(entry_update)
3454 current_offset += len(serialized_sidedata)
3456 current_offset += len(serialized_sidedata)
3455 if self._docket is not None:
3457 if self._docket is not None:
3456 self._docket.data_end = dfh.tell()
3458 self._docket.data_end = dfh.tell()
3457
3459
3458 # rewrite the new index entries
3460 # rewrite the new index entries
3459 ifh.seek(startrev * self.index.entry_size)
3461 ifh.seek(startrev * self.index.entry_size)
3460 for i, e in enumerate(new_entries):
3462 for i, e in enumerate(new_entries):
3461 rev = startrev + i
3463 rev = startrev + i
3462 self.index.replace_sidedata_info(rev, *e)
3464 self.index.replace_sidedata_info(rev, *e)
3463 packed = self.index.entry_binary(rev)
3465 packed = self.index.entry_binary(rev)
3464 if rev == 0 and self._docket is None:
3466 if rev == 0 and self._docket is None:
3465 header = self._format_flags | self._format_version
3467 header = self._format_flags | self._format_version
3466 header = self.index.pack_header(header)
3468 header = self.index.pack_header(header)
3467 packed = header + packed
3469 packed = header + packed
3468 ifh.write(packed)
3470 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now